From 7886cbc392658546faf7f220fccdaf52e986ba7f Mon Sep 17 00:00:00 2001 From: Artem Smirnov Date: Fri, 16 Jun 2023 15:34:03 +0200 Subject: [PATCH 01/25] First implementation of the summary stat. --- .../datalore/plot/base/stat/SummaryStat.kt | 111 +++++++++++++ .../plot/base/stat/SummaryStatUtil.kt | 146 ++++++++++++++++++ .../jetbrains/datalore/plot/config/Option.kt | 9 ++ .../datalore/plot/config/OptionsAccessor.kt | 9 ++ .../datalore/plot/config/StatKind.kt | 3 +- .../datalore/plot/config/StatProto.kt | 39 +++++ .../plotDemo/model/plotConfig/Summary.kt | 77 +++++++++ .../plotDemo/plotConfig/SummaryBatik.kt | 18 +++ python-package/lets_plot/plot/__init__.py | 2 + python-package/lets_plot/plot/stat.py | 31 ++++ 10 files changed, 444 insertions(+), 1 deletion(-) create mode 100644 plot-base-portable/src/commonMain/kotlin/jetbrains/datalore/plot/base/stat/SummaryStat.kt create mode 100644 plot-base-portable/src/commonMain/kotlin/jetbrains/datalore/plot/base/stat/SummaryStatUtil.kt create mode 100644 plot-demo-common/src/commonMain/kotlin/jetbrains/datalore/plotDemo/model/plotConfig/Summary.kt create mode 100644 plot-demo/src/jvmBatikMain/kotlin/plotDemo/plotConfig/SummaryBatik.kt create mode 100644 python-package/lets_plot/plot/stat.py diff --git a/plot-base-portable/src/commonMain/kotlin/jetbrains/datalore/plot/base/stat/SummaryStat.kt b/plot-base-portable/src/commonMain/kotlin/jetbrains/datalore/plot/base/stat/SummaryStat.kt new file mode 100644 index 00000000000..926f98a16bd --- /dev/null +++ b/plot-base-portable/src/commonMain/kotlin/jetbrains/datalore/plot/base/stat/SummaryStat.kt @@ -0,0 +1,111 @@ +/* + * Copyright (c) 2023. JetBrains s.r.o. + * Use of this source code is governed by the MIT license that can be found in the LICENSE file. + */ + +package jetbrains.datalore.plot.base.stat + +import jetbrains.datalore.plot.base.Aes +import jetbrains.datalore.plot.base.DataFrame +import jetbrains.datalore.plot.base.StatContext +import jetbrains.datalore.plot.base.data.TransformVar +import jetbrains.datalore.plot.common.data.SeriesUtil + +class SummaryStat( + private val yAgg: (SummaryStatUtil.SummaryCalculator) -> Double, + private val minAgg: (SummaryStatUtil.SummaryCalculator) -> Double, + private val maxAgg: (SummaryStatUtil.SummaryCalculator) -> Double, + private val middleAgg: (SummaryStatUtil.SummaryCalculator) -> Double, + private val lowerAgg: (SummaryStatUtil.SummaryCalculator) -> Double, + private val upperAgg: (SummaryStatUtil.SummaryCalculator) -> Double +) : BaseStat(DEF_MAPPING) { + + override fun consumes(): List> { + return listOf(Aes.X, Aes.Y) + } + + override fun apply(data: DataFrame, statCtx: StatContext, messageConsumer: (s: String) -> Unit): DataFrame { + if (!hasRequiredValues(data, Aes.Y)) { + return withEmptyStatValues() + } + + val ys = data.getNumeric(TransformVar.Y) + val xs = if (data.has(TransformVar.X)) { + data.getNumeric(TransformVar.X) + } else { + List(ys.size) { 0.0 } + } + + val statData = buildStat(xs, ys) + + val builder = DataFrame.Builder() + for ((variable, series) in statData) { + builder.putNumeric(variable, series) + } + return builder.build() + } + + private fun buildStat( + xs: List, + ys: List + ): MutableMap> { + val xyPairs = SeriesUtil.filterFinite(xs, ys) + .let { (xs, ys) -> xs zip ys } + if (xyPairs.isEmpty()) { + return mutableMapOf() + } + + val binnedData: MutableMap> = HashMap() + for ((x, y) in xyPairs) { + binnedData.getOrPut(x) { ArrayList() }.add(y) + } + + val statX = ArrayList() + val statY = ArrayList() + val statMin = ArrayList() + val statMax = ArrayList() + val statMiddle = ArrayList() + val statLower = ArrayList() + val statUpper = ArrayList() + + for ((x, bin) in binnedData) { + val calc = SummaryStatUtil.SummaryCalculator(bin) + statX.add(x) + statY.add(yAgg(calc)) + statMin.add(minAgg(calc)) + statMax.add(maxAgg(calc)) + statMiddle.add(middleAgg(calc)) + statLower.add(lowerAgg(calc)) + statUpper.add(upperAgg(calc)) + } + + return mutableMapOf( + Stats.X to statX, + Stats.Y to statY, + Stats.Y_MIN to statMin, + Stats.Y_MAX to statMax, + Stats.MIDDLE to statMiddle, + Stats.LOWER to statLower, + Stats.UPPER to statUpper, + ) + } + + companion object { + const val DEF_Y_AGG_FUN = "mean" + const val DEF_MIN_AGG_FUN = "min" + const val DEF_MAX_AGG_FUN = "max" + const val DEF_MIDDLE_AGG_FUN = "nan" + const val DEF_LOWER_AGG_FUN = "nan" + const val DEF_UPPER_AGG_FUN = "nan" + + private val DEF_MAPPING: Map, DataFrame.Variable> = mapOf( + Aes.X to Stats.X, + Aes.Y to Stats.Y, + Aes.YMIN to Stats.Y_MIN, + Aes.YMAX to Stats.Y_MAX, + Aes.MIDDLE to Stats.MIDDLE, + Aes.LOWER to Stats.LOWER, + Aes.UPPER to Stats.UPPER + ) + } +} \ No newline at end of file diff --git a/plot-base-portable/src/commonMain/kotlin/jetbrains/datalore/plot/base/stat/SummaryStatUtil.kt b/plot-base-portable/src/commonMain/kotlin/jetbrains/datalore/plot/base/stat/SummaryStatUtil.kt new file mode 100644 index 00000000000..64e4de73330 --- /dev/null +++ b/plot-base-portable/src/commonMain/kotlin/jetbrains/datalore/plot/base/stat/SummaryStatUtil.kt @@ -0,0 +1,146 @@ +/* + * Copyright (c) 2023. JetBrains s.r.o. + * Use of this source code is governed by the MIT license that can be found in the LICENSE file. + */ + +package jetbrains.datalore.plot.base.stat + +import jetbrains.datalore.base.gcommon.collect.Ordering +import kotlin.math.ceil +import kotlin.math.floor +import kotlin.math.round + +object SummaryStatUtil { + fun getStandardAggFun(aggFunName: AggFun): (SummaryCalculator) -> Double { + return when (aggFunName) { + AggFun.NAN -> { calc -> calc.nan() } + AggFun.COUNT -> { calc -> calc.count() } + AggFun.SUM -> { calc -> calc.sum() } + AggFun.MEAN -> { calc -> calc.mean() } + AggFun.MEDIAN -> { calc -> calc.median() } + AggFun.MIN -> { calc -> calc.min() } + AggFun.MAX -> { calc -> calc.max() } + AggFun.Q1 -> { calc -> calc.q1() } + AggFun.Q3 -> { calc -> calc.q3() } + } + } + + fun getQuantileAggFun(p: Double): (SummaryCalculator) -> Double { + return { calc -> calc.quantile(p) } + } + + class SummaryCalculator(values: List) { + private val sortedValues: List = Ordering.natural().sortedCopy(values) + + private var count: Double? = null + private var sum: Double? = null + private var mean: Double? = null + private var median: Double? = null + private var min: Double? = null + private var max: Double? = null + private var q1: Double? = null + private var q3: Double? = null + + fun nan(): Double { + return Double.NaN + } + + fun count(): Double { + if (count == null) { + count = sortedValues.size.toDouble() + } + return count!! + } + + fun sum(): Double { + if (sum == null) { + sum = sortedValues.sum() + } + return sum!! + } + + fun mean(): Double { + if (mean == null) { + mean = if (sortedValues.isEmpty()) { + Double.NaN + } else if (sortedValues.size == 1) { + sortedValues.first() + } else { + sum() / sortedValues.size + } + } + return mean!! + } + + fun median(): Double { + if (median == null) { + median = quantile(0.5) + } + return median!! + } + + fun min(): Double { + if (min == null) { + min = if (sortedValues.isEmpty()) { + Double.NaN + } else { + sortedValues.first() + } + } + return min!! + } + + fun max(): Double { + if (max == null) { + max = if (sortedValues.isEmpty()) { + Double.NaN + } else { + sortedValues.last() + } + } + return max!! + } + + fun q1(): Double { + if (q1 == null) { + q1 = quantile(0.25) + } + return q1!! + } + + fun q3(): Double { + if (q3 == null) { + q3 = quantile(0.75) + } + return q3!! + } + + fun quantile(p: Double): Double { + if (sortedValues.isEmpty()) { + return Double.NaN + } + if (sortedValues.size == 1) { + return sortedValues.first() + } + val place = p * (sortedValues.size - 1) + val i = round(place) + return if (place == i) { + sortedValues[place.toInt()] + } else { + (sortedValues[ceil(place).toInt()] + sortedValues[floor(place).toInt()]) / 2.0 + } + } + } + + enum class AggFun { + NAN, + COUNT, + SUM, + MEAN, + MEDIAN, + MIN, + MAX, + Q1, + Q3, + } +} \ No newline at end of file diff --git a/plot-config-portable/src/commonMain/kotlin/jetbrains/datalore/plot/config/Option.kt b/plot-config-portable/src/commonMain/kotlin/jetbrains/datalore/plot/config/Option.kt index b7ec0b3a26b..d16c295514b 100644 --- a/plot-config-portable/src/commonMain/kotlin/jetbrains/datalore/plot/config/Option.kt +++ b/plot-config-portable/src/commonMain/kotlin/jetbrains/datalore/plot/config/Option.kt @@ -429,6 +429,15 @@ object Option { const val DISTRIBUTION_PARAMETERS = "dparams" const val LINE_QUANTILES = "quantiles" } + + object Summary { + const val FUN = "fun" + const val FUN_MIN = "fun_min" + const val FUN_MAX = "fun_max" + const val FUN_MIDDLE = "fun_middle" + const val FUN_LOWER = "fun_lower" + const val FUN_UPPER = "fun_upper" + } } object Pos { diff --git a/plot-config-portable/src/commonMain/kotlin/jetbrains/datalore/plot/config/OptionsAccessor.kt b/plot-config-portable/src/commonMain/kotlin/jetbrains/datalore/plot/config/OptionsAccessor.kt index c5efe20e84f..721ac21f95c 100644 --- a/plot-config-portable/src/commonMain/kotlin/jetbrains/datalore/plot/config/OptionsAccessor.kt +++ b/plot-config-portable/src/commonMain/kotlin/jetbrains/datalore/plot/config/OptionsAccessor.kt @@ -35,6 +35,11 @@ open class OptionsAccessor( return options[option] != null } + fun isNumber(option: String): Boolean { + val v = get(option) ?: return false + return v is Number + } + operator fun get(option: String): Any? { return if (hasOwn(option)) { options[option] @@ -51,6 +56,10 @@ open class OptionsAccessor( return get(option)?.toString() } + fun getStringDef(option: String, def: String): String { + return getString(option) ?: def + } + fun getStringSafe(option: String): String { return getString(option) ?: throw IllegalArgumentException("Can't get string value: option '$option' is not present.") diff --git a/plot-config-portable/src/commonMain/kotlin/jetbrains/datalore/plot/config/StatKind.kt b/plot-config-portable/src/commonMain/kotlin/jetbrains/datalore/plot/config/StatKind.kt index f18e925ab36..18495a02b09 100644 --- a/plot-config-portable/src/commonMain/kotlin/jetbrains/datalore/plot/config/StatKind.kt +++ b/plot-config-portable/src/commonMain/kotlin/jetbrains/datalore/plot/config/StatKind.kt @@ -28,7 +28,8 @@ enum class StatKind { QQ, QQ2, QQ_LINE, - QQ2_LINE; + QQ2_LINE, + SUMMARY; companion object { diff --git a/plot-config-portable/src/commonMain/kotlin/jetbrains/datalore/plot/config/StatProto.kt b/plot-config-portable/src/commonMain/kotlin/jetbrains/datalore/plot/config/StatProto.kt index 22aac78f89e..2516f8c5269 100644 --- a/plot-config-portable/src/commonMain/kotlin/jetbrains/datalore/plot/config/StatProto.kt +++ b/plot-config-portable/src/commonMain/kotlin/jetbrains/datalore/plot/config/StatProto.kt @@ -19,6 +19,7 @@ import jetbrains.datalore.plot.config.Option.Stat.DensityRidges import jetbrains.datalore.plot.config.Option.Stat.YDensity import jetbrains.datalore.plot.config.Option.Stat.QQ import jetbrains.datalore.plot.config.Option.Stat.QQLine +import jetbrains.datalore.plot.config.Option.Stat.Summary object StatProto { @@ -115,6 +116,8 @@ object StatProto { StatKind.QQ2_LINE -> return configureQQ2LineStat(options) + StatKind.SUMMARY -> return configureSummaryStat(options) + else -> throw IllegalArgumentException("Unknown stat: '$statKind'") } } @@ -401,4 +404,40 @@ object StatProto { return Stats.qq2line(lineQuantiles ?: QQLineStat.DEF_LINE_QUANTILES) } + + private fun configureSummaryStat(options: OptionsAccessor): SummaryStat { + val getAggFun: (String, String) -> (SummaryStatUtil.SummaryCalculator) -> Double = { option, default -> + if (options.isNumber(option)) { + SummaryStatUtil.getQuantileAggFun(options.getDouble(option)!!) + } else { + val aggFunName = options.getStringDef(option, default).let { + when (it.lowercase()) { + "nan" -> SummaryStatUtil.AggFun.NAN + "count" -> SummaryStatUtil.AggFun.COUNT + "sum" -> SummaryStatUtil.AggFun.SUM + "mean" -> SummaryStatUtil.AggFun.MEAN + "median" -> SummaryStatUtil.AggFun.MEDIAN + "min" -> SummaryStatUtil.AggFun.MIN + "max" -> SummaryStatUtil.AggFun.MAX + "q1" -> SummaryStatUtil.AggFun.Q1 + "q3" -> SummaryStatUtil.AggFun.Q3 + else -> throw IllegalArgumentException( + "Unsupported function name: '$it'\n" + + "Use one of: nan, count, sum, mean, median, min, max, q1, q3." + ) + } + } + SummaryStatUtil.getStandardAggFun(aggFunName) + } + } + + val yAgg = getAggFun(Summary.FUN, SummaryStat.DEF_Y_AGG_FUN) + val minAgg = getAggFun(Summary.FUN_MIN, SummaryStat.DEF_MIN_AGG_FUN) + val maxAgg = getAggFun(Summary.FUN_MAX, SummaryStat.DEF_MAX_AGG_FUN) + val middleAgg = getAggFun(Summary.FUN_MIDDLE, SummaryStat.DEF_MIDDLE_AGG_FUN) + val lowerAgg = getAggFun(Summary.FUN_LOWER, SummaryStat.DEF_LOWER_AGG_FUN) + val upperAgg = getAggFun(Summary.FUN_UPPER, SummaryStat.DEF_UPPER_AGG_FUN) + + return SummaryStat(yAgg, minAgg, maxAgg, middleAgg, lowerAgg, upperAgg) + } } diff --git a/plot-demo-common/src/commonMain/kotlin/jetbrains/datalore/plotDemo/model/plotConfig/Summary.kt b/plot-demo-common/src/commonMain/kotlin/jetbrains/datalore/plotDemo/model/plotConfig/Summary.kt new file mode 100644 index 00000000000..ff25d4ed27a --- /dev/null +++ b/plot-demo-common/src/commonMain/kotlin/jetbrains/datalore/plotDemo/model/plotConfig/Summary.kt @@ -0,0 +1,77 @@ +/* + * Copyright (c) 2023. JetBrains s.r.o. + * Use of this source code is governed by the MIT license that can be found in the LICENSE file. + */ + +package jetbrains.datalore.plotDemo.model.plotConfig + +import jetbrains.datalore.plot.parsePlotSpec +import jetbrains.datalore.plotDemo.data.Iris + +class Summary { + fun plotSpecList(): List> { + return listOf( + basic(), + vsBoxplot(), + ) + } + + private fun basic(): MutableMap { + val spec = """ + { + 'kind': 'plot', + 'mapping': { + 'x': 'target', + 'y': 'sepal length (cm)' + }, + 'ggtitle': { + 'text': 'Basic demo' + }, + 'layers': [ + { + 'geom': 'pointrange', + 'stat': 'summary' + } + ] + } + """.trimIndent() + + val plotSpec = HashMap(parsePlotSpec(spec)) + plotSpec["data"] = Iris.df + return plotSpec + + } + + private fun vsBoxplot(): MutableMap { + val spec = """ + { + 'kind': 'plot', + 'mapping': { + 'x': 'target', + 'y': 'sepal length (cm)' + }, + 'ggtitle': { + 'text': 'Summary vs. Boxplot' + }, + 'layers': [ + { + 'geom': 'boxplot' + }, + { + 'geom': 'pointrange', + 'stat': 'summary', + 'fun': 'median', + 'fun_min': 'q1', + 'fun_max': 0.75, + 'color': 'red' + } + ] + } + """.trimIndent() + + val plotSpec = HashMap(parsePlotSpec(spec)) + plotSpec["data"] = Iris.df + return plotSpec + + } +} \ No newline at end of file diff --git a/plot-demo/src/jvmBatikMain/kotlin/plotDemo/plotConfig/SummaryBatik.kt b/plot-demo/src/jvmBatikMain/kotlin/plotDemo/plotConfig/SummaryBatik.kt new file mode 100644 index 00000000000..ddc79c21970 --- /dev/null +++ b/plot-demo/src/jvmBatikMain/kotlin/plotDemo/plotConfig/SummaryBatik.kt @@ -0,0 +1,18 @@ +/* + * Copyright (c) 2023. JetBrains s.r.o. + * Use of this source code is governed by the MIT license that can be found in the LICENSE file. + */ + +package jetbrains.datalore.plotDemo.plotConfig + +import jetbrains.datalore.plotDemo.model.plotConfig.Summary +import jetbrains.datalore.vis.demoUtils.PlotSpecsDemoWindowBatik + +fun main() { + with(Summary()) { + PlotSpecsDemoWindowBatik( + "Summary stat plot", + plotSpecList() + ).open() + } +} \ No newline at end of file diff --git a/python-package/lets_plot/plot/__init__.py b/python-package/lets_plot/plot/__init__.py index 603897a8ebe..e64359aacae 100644 --- a/python-package/lets_plot/plot/__init__.py +++ b/python-package/lets_plot/plot/__init__.py @@ -22,6 +22,7 @@ from .scale_convenience import * from .scale_identity_ import * from .scale_position import * +from .stat import * from .theme_ import * from .theme_set import * from .tooltip import * @@ -42,6 +43,7 @@ scale_convenience.__all__ + scale_identity_.__all__ + scale_position.__all__ + + stat.__all__ + theme_.__all__ + theme_set.__all__ + tooltip.__all__ + diff --git a/python-package/lets_plot/plot/stat.py b/python-package/lets_plot/plot/stat.py new file mode 100644 index 00000000000..0401334f36a --- /dev/null +++ b/python-package/lets_plot/plot/stat.py @@ -0,0 +1,31 @@ +# Copyright (c) 2023. JetBrains s.r.o. +# Use of this source code is governed by the MIT license that can be found in the LICENSE file. + +from .geom import _geom + + +# +# Stats - functions, drawing attention to the statistical transformation rather than the visual appearance. +# +__all__ = ['stat_summary'] + + +def stat_summary(mapping=None, *, data=None, geom='pointrange', + position=None, show_legend=None, sampling=None, tooltips=None, + orientation=None, + fun=None, fun_min=None, fun_max=None, fun_middle=None, fun_lower=None, fun_upper=None, + color_by=None, fill_by=None, + **other_args): + return _geom(geom, + mapping=mapping, + data=data, + stat='summary', + position=position, + show_legend=show_legend, + sampling=sampling, + tooltips=tooltips, + orientation=orientation, + fun=fun, fun_min=fun_min, fun_max=fun_max, + fun_middle=fun_middle, fun_lower=fun_lower, fun_upper=fun_upper, + color_by=color_by, fill_by=fill_by, + **other_args) From d95a062b7591c929dbc305df29acb5413821a514 Mon Sep 17 00:00:00 2001 From: Artem Smirnov Date: Tue, 20 Jun 2023 18:26:52 +0200 Subject: [PATCH 02/25] Update API for the stat_summary() function. --- .../jetbrains/datalore/plot/base/Aes.kt | 4 ++ .../datalore/plot/base/stat/SummaryStat.kt | 49 ++++++------------- .../jetbrains/datalore/plot/config/Option.kt | 4 +- .../datalore/plot/config/StatProto.kt | 35 ++++++++----- .../plotDemo/model/plotConfig/Summary.kt | 28 +++++++++++ python-package/lets_plot/plot/stat.py | 5 +- 6 files changed, 73 insertions(+), 52 deletions(-) diff --git a/plot-base-portable/src/commonMain/kotlin/jetbrains/datalore/plot/base/Aes.kt b/plot-base-portable/src/commonMain/kotlin/jetbrains/datalore/plot/base/Aes.kt index cf8a28de542..ebb4a8be76d 100644 --- a/plot-base-portable/src/commonMain/kotlin/jetbrains/datalore/plot/base/Aes.kt +++ b/plot-base-portable/src/commonMain/kotlin/jetbrains/datalore/plot/base/Aes.kt @@ -202,5 +202,9 @@ class Aes private constructor(val name: String, val isNumeric: Boolean = true @Suppress("UNCHECKED_CAST") return values.filter { isPositional(it) } as List> } + + fun byName(aesName: String): Aes<*>? { + return values.firstOrNull { it.name == aesName } + } } } diff --git a/plot-base-portable/src/commonMain/kotlin/jetbrains/datalore/plot/base/stat/SummaryStat.kt b/plot-base-portable/src/commonMain/kotlin/jetbrains/datalore/plot/base/stat/SummaryStat.kt index 926f98a16bd..96d1c24f913 100644 --- a/plot-base-portable/src/commonMain/kotlin/jetbrains/datalore/plot/base/stat/SummaryStat.kt +++ b/plot-base-portable/src/commonMain/kotlin/jetbrains/datalore/plot/base/stat/SummaryStat.kt @@ -12,12 +12,7 @@ import jetbrains.datalore.plot.base.data.TransformVar import jetbrains.datalore.plot.common.data.SeriesUtil class SummaryStat( - private val yAgg: (SummaryStatUtil.SummaryCalculator) -> Double, - private val minAgg: (SummaryStatUtil.SummaryCalculator) -> Double, - private val maxAgg: (SummaryStatUtil.SummaryCalculator) -> Double, - private val middleAgg: (SummaryStatUtil.SummaryCalculator) -> Double, - private val lowerAgg: (SummaryStatUtil.SummaryCalculator) -> Double, - private val upperAgg: (SummaryStatUtil.SummaryCalculator) -> Double + private val aggFunctionsMap: Map, (SummaryStatUtil.SummaryCalculator) -> Double> ) : BaseStat(DEF_MAPPING) { override fun consumes(): List> { @@ -48,7 +43,7 @@ class SummaryStat( private fun buildStat( xs: List, ys: List - ): MutableMap> { + ): Map> { val xyPairs = SeriesUtil.filterFinite(xs, ys) .let { (xs, ys) -> xs zip ys } if (xyPairs.isEmpty()) { @@ -60,43 +55,27 @@ class SummaryStat( binnedData.getOrPut(x) { ArrayList() }.add(y) } - val statX = ArrayList() - val statY = ArrayList() - val statMin = ArrayList() - val statMax = ArrayList() - val statMiddle = ArrayList() - val statLower = ArrayList() - val statUpper = ArrayList() - + val statValues: Map, MutableList> = DEF_MAPPING.keys.associateWith { mutableListOf() } + val defaultAggFun = SummaryStatUtil.getStandardAggFun(SummaryStatUtil.AggFun.NAN) for ((x, bin) in binnedData) { val calc = SummaryStatUtil.SummaryCalculator(bin) - statX.add(x) - statY.add(yAgg(calc)) - statMin.add(minAgg(calc)) - statMax.add(maxAgg(calc)) - statMiddle.add(middleAgg(calc)) - statLower.add(lowerAgg(calc)) - statUpper.add(upperAgg(calc)) + for (aes in statValues.keys) { + if (aes == Aes.X) { + statValues[aes]!!.add(x) + } else { + statValues[aes]!!.add(aggFunctionsMap.getOrElse(aes) { defaultAggFun }(calc)) + } + } } - return mutableMapOf( - Stats.X to statX, - Stats.Y to statY, - Stats.Y_MIN to statMin, - Stats.Y_MAX to statMax, - Stats.MIDDLE to statMiddle, - Stats.LOWER to statLower, - Stats.UPPER to statUpper, - ) + return statValues.map { (aes, values) -> Pair(DEF_MAPPING[aes]!!, values) }.toMap() } companion object { const val DEF_Y_AGG_FUN = "mean" const val DEF_MIN_AGG_FUN = "min" const val DEF_MAX_AGG_FUN = "max" - const val DEF_MIDDLE_AGG_FUN = "nan" - const val DEF_LOWER_AGG_FUN = "nan" - const val DEF_UPPER_AGG_FUN = "nan" + const val DEF_AGG_FUN = "nan" private val DEF_MAPPING: Map, DataFrame.Variable> = mapOf( Aes.X to Stats.X, @@ -105,7 +84,7 @@ class SummaryStat( Aes.YMAX to Stats.Y_MAX, Aes.MIDDLE to Stats.MIDDLE, Aes.LOWER to Stats.LOWER, - Aes.UPPER to Stats.UPPER + Aes.UPPER to Stats.UPPER, ) } } \ No newline at end of file diff --git a/plot-config-portable/src/commonMain/kotlin/jetbrains/datalore/plot/config/Option.kt b/plot-config-portable/src/commonMain/kotlin/jetbrains/datalore/plot/config/Option.kt index d16c295514b..db3d7b92883 100644 --- a/plot-config-portable/src/commonMain/kotlin/jetbrains/datalore/plot/config/Option.kt +++ b/plot-config-portable/src/commonMain/kotlin/jetbrains/datalore/plot/config/Option.kt @@ -434,9 +434,7 @@ object Option { const val FUN = "fun" const val FUN_MIN = "fun_min" const val FUN_MAX = "fun_max" - const val FUN_MIDDLE = "fun_middle" - const val FUN_LOWER = "fun_lower" - const val FUN_UPPER = "fun_upper" + const val FUN_MAP = "fun_map" } } diff --git a/plot-config-portable/src/commonMain/kotlin/jetbrains/datalore/plot/config/StatProto.kt b/plot-config-portable/src/commonMain/kotlin/jetbrains/datalore/plot/config/StatProto.kt index 2516f8c5269..4ee1e106940 100644 --- a/plot-config-portable/src/commonMain/kotlin/jetbrains/datalore/plot/config/StatProto.kt +++ b/plot-config-portable/src/commonMain/kotlin/jetbrains/datalore/plot/config/StatProto.kt @@ -5,6 +5,7 @@ package jetbrains.datalore.plot.config +import jetbrains.datalore.plot.base.Aes import jetbrains.datalore.plot.base.GeomKind import jetbrains.datalore.plot.base.Stat import jetbrains.datalore.plot.base.stat.* @@ -406,11 +407,11 @@ object StatProto { } private fun configureSummaryStat(options: OptionsAccessor): SummaryStat { - val getAggFun: (String, String) -> (SummaryStatUtil.SummaryCalculator) -> Double = { option, default -> - if (options.isNumber(option)) { - SummaryStatUtil.getQuantileAggFun(options.getDouble(option)!!) + fun getAggFunction(opts: OptionsAccessor, option: String, default: String): (SummaryStatUtil.SummaryCalculator) -> Double { + return if (opts.isNumber(option)) { + SummaryStatUtil.getQuantileAggFun(opts.getDouble(option)!!) } else { - val aggFunName = options.getStringDef(option, default).let { + val aggFunName = opts.getStringDef(option, default).let { when (it.lowercase()) { "nan" -> SummaryStatUtil.AggFun.NAN "count" -> SummaryStatUtil.AggFun.COUNT @@ -431,13 +432,25 @@ object StatProto { } } - val yAgg = getAggFun(Summary.FUN, SummaryStat.DEF_Y_AGG_FUN) - val minAgg = getAggFun(Summary.FUN_MIN, SummaryStat.DEF_MIN_AGG_FUN) - val maxAgg = getAggFun(Summary.FUN_MAX, SummaryStat.DEF_MAX_AGG_FUN) - val middleAgg = getAggFun(Summary.FUN_MIDDLE, SummaryStat.DEF_MIDDLE_AGG_FUN) - val lowerAgg = getAggFun(Summary.FUN_LOWER, SummaryStat.DEF_LOWER_AGG_FUN) - val upperAgg = getAggFun(Summary.FUN_UPPER, SummaryStat.DEF_UPPER_AGG_FUN) + val standardAggFunctions = mapOf( + Aes.Y to getAggFunction(options, Summary.FUN, SummaryStat.DEF_Y_AGG_FUN), + Aes.YMIN to getAggFunction(options, Summary.FUN_MIN, SummaryStat.DEF_MIN_AGG_FUN), + Aes.YMAX to getAggFunction(options, Summary.FUN_MAX, SummaryStat.DEF_MAX_AGG_FUN) + ) + + val additionalAggFunctions: MutableMap, (SummaryStatUtil.SummaryCalculator) -> Double> = mutableMapOf() + val funMap: Map = if (options.hasOwn(Summary.FUN_MAP)) { + options.getMap(Summary.FUN_MAP) + } else { + emptyMap() + } + val funOptions = OptionsAccessor(funMap) + for (aesName in funMap.keys) { + Aes.byName(aesName.lowercase())?.let { aes -> + additionalAggFunctions[aes] = getAggFunction(funOptions, aesName, SummaryStat.DEF_AGG_FUN) + } + } - return SummaryStat(yAgg, minAgg, maxAgg, middleAgg, lowerAgg, upperAgg) + return SummaryStat(standardAggFunctions + additionalAggFunctions) } } diff --git a/plot-demo-common/src/commonMain/kotlin/jetbrains/datalore/plotDemo/model/plotConfig/Summary.kt b/plot-demo-common/src/commonMain/kotlin/jetbrains/datalore/plotDemo/model/plotConfig/Summary.kt index ff25d4ed27a..4c77000825c 100644 --- a/plot-demo-common/src/commonMain/kotlin/jetbrains/datalore/plotDemo/model/plotConfig/Summary.kt +++ b/plot-demo-common/src/commonMain/kotlin/jetbrains/datalore/plotDemo/model/plotConfig/Summary.kt @@ -13,6 +13,7 @@ class Summary { return listOf( basic(), vsBoxplot(), + crossbarGeom(), ) } @@ -74,4 +75,31 @@ class Summary { return plotSpec } + + private fun crossbarGeom(): MutableMap { + val spec = """ + { + 'kind': 'plot', + 'mapping': { + 'x': 'target', + 'y': 'sepal length (cm)' + }, + 'ggtitle': { + 'text': 'Basic demo' + }, + 'layers': [ + { + 'geom': 'crossbar', + 'stat': 'summary', + 'fun_map': {'middle': 'median'} + } + ] + } + """.trimIndent() + + val plotSpec = HashMap(parsePlotSpec(spec)) + plotSpec["data"] = Iris.df + return plotSpec + + } } \ No newline at end of file diff --git a/python-package/lets_plot/plot/stat.py b/python-package/lets_plot/plot/stat.py index 0401334f36a..cf07325bd50 100644 --- a/python-package/lets_plot/plot/stat.py +++ b/python-package/lets_plot/plot/stat.py @@ -13,7 +13,7 @@ def stat_summary(mapping=None, *, data=None, geom='pointrange', position=None, show_legend=None, sampling=None, tooltips=None, orientation=None, - fun=None, fun_min=None, fun_max=None, fun_middle=None, fun_lower=None, fun_upper=None, + fun=None, fun_min=None, fun_max=None, fun_map=None, color_by=None, fill_by=None, **other_args): return _geom(geom, @@ -25,7 +25,6 @@ def stat_summary(mapping=None, *, data=None, geom='pointrange', sampling=sampling, tooltips=tooltips, orientation=orientation, - fun=fun, fun_min=fun_min, fun_max=fun_max, - fun_middle=fun_middle, fun_lower=fun_lower, fun_upper=fun_upper, + fun=fun, fun_min=fun_min, fun_max=fun_max, fun_map=fun_map, color_by=color_by, fill_by=fill_by, **other_args) From b28bf7f314c0ba6fe2b39357b6eca6f399c2f227 Mon Sep 17 00:00:00 2001 From: Artem Smirnov Date: Wed, 21 Jun 2023 13:52:14 +0200 Subject: [PATCH 03/25] Small refactor in SummaryStat. --- .../jetbrains/datalore/plot/base/Aes.kt | 4 -- .../datalore/plot/base/stat/SummaryStat.kt | 18 ++++----- .../plot/base/stat/SummaryStatUtil.kt | 38 ++++++------------- .../datalore/plot/config/StatProto.kt | 28 +++++++------- 4 files changed, 33 insertions(+), 55 deletions(-) diff --git a/plot-base-portable/src/commonMain/kotlin/jetbrains/datalore/plot/base/Aes.kt b/plot-base-portable/src/commonMain/kotlin/jetbrains/datalore/plot/base/Aes.kt index ebb4a8be76d..cf8a28de542 100644 --- a/plot-base-portable/src/commonMain/kotlin/jetbrains/datalore/plot/base/Aes.kt +++ b/plot-base-portable/src/commonMain/kotlin/jetbrains/datalore/plot/base/Aes.kt @@ -202,9 +202,5 @@ class Aes private constructor(val name: String, val isNumeric: Boolean = true @Suppress("UNCHECKED_CAST") return values.filter { isPositional(it) } as List> } - - fun byName(aesName: String): Aes<*>? { - return values.firstOrNull { it.name == aesName } - } } } diff --git a/plot-base-portable/src/commonMain/kotlin/jetbrains/datalore/plot/base/stat/SummaryStat.kt b/plot-base-portable/src/commonMain/kotlin/jetbrains/datalore/plot/base/stat/SummaryStat.kt index 96d1c24f913..230ce5950a7 100644 --- a/plot-base-portable/src/commonMain/kotlin/jetbrains/datalore/plot/base/stat/SummaryStat.kt +++ b/plot-base-portable/src/commonMain/kotlin/jetbrains/datalore/plot/base/stat/SummaryStat.kt @@ -12,7 +12,7 @@ import jetbrains.datalore.plot.base.data.TransformVar import jetbrains.datalore.plot.common.data.SeriesUtil class SummaryStat( - private val aggFunctionsMap: Map, (SummaryStatUtil.SummaryCalculator) -> Double> + private val aggFunctionsMap: Map, (SummaryStatUtil.Calculator) -> Double> ) : BaseStat(DEF_MAPPING) { override fun consumes(): List> { @@ -44,21 +44,17 @@ class SummaryStat( xs: List, ys: List ): Map> { - val xyPairs = SeriesUtil.filterFinite(xs, ys) + val binnedData = SeriesUtil.filterFinite(xs, ys) .let { (xs, ys) -> xs zip ys } - if (xyPairs.isEmpty()) { - return mutableMapOf() - } - - val binnedData: MutableMap> = HashMap() - for ((x, y) in xyPairs) { - binnedData.getOrPut(x) { ArrayList() }.add(y) + .groupBy(keySelector = { it.first }, valueTransform = { it.second }) + if (binnedData.isEmpty()) { + return emptyMap() } val statValues: Map, MutableList> = DEF_MAPPING.keys.associateWith { mutableListOf() } - val defaultAggFun = SummaryStatUtil.getStandardAggFun(SummaryStatUtil.AggFun.NAN) + val defaultAggFun = SummaryStatUtil.AggFun.NAN.aggFun for ((x, bin) in binnedData) { - val calc = SummaryStatUtil.SummaryCalculator(bin) + val calc = SummaryStatUtil.Calculator(bin) for (aes in statValues.keys) { if (aes == Aes.X) { statValues[aes]!!.add(x) diff --git a/plot-base-portable/src/commonMain/kotlin/jetbrains/datalore/plot/base/stat/SummaryStatUtil.kt b/plot-base-portable/src/commonMain/kotlin/jetbrains/datalore/plot/base/stat/SummaryStatUtil.kt index 64e4de73330..08d3211d701 100644 --- a/plot-base-portable/src/commonMain/kotlin/jetbrains/datalore/plot/base/stat/SummaryStatUtil.kt +++ b/plot-base-portable/src/commonMain/kotlin/jetbrains/datalore/plot/base/stat/SummaryStatUtil.kt @@ -11,25 +11,11 @@ import kotlin.math.floor import kotlin.math.round object SummaryStatUtil { - fun getStandardAggFun(aggFunName: AggFun): (SummaryCalculator) -> Double { - return when (aggFunName) { - AggFun.NAN -> { calc -> calc.nan() } - AggFun.COUNT -> { calc -> calc.count() } - AggFun.SUM -> { calc -> calc.sum() } - AggFun.MEAN -> { calc -> calc.mean() } - AggFun.MEDIAN -> { calc -> calc.median() } - AggFun.MIN -> { calc -> calc.min() } - AggFun.MAX -> { calc -> calc.max() } - AggFun.Q1 -> { calc -> calc.q1() } - AggFun.Q3 -> { calc -> calc.q3() } - } - } - - fun getQuantileAggFun(p: Double): (SummaryCalculator) -> Double { + fun getQuantileAggFun(p: Double): (Calculator) -> Double { return { calc -> calc.quantile(p) } } - class SummaryCalculator(values: List) { + class Calculator(values: List) { private val sortedValues: List = Ordering.natural().sortedCopy(values) private var count: Double? = null @@ -132,15 +118,15 @@ object SummaryStatUtil { } } - enum class AggFun { - NAN, - COUNT, - SUM, - MEAN, - MEDIAN, - MIN, - MAX, - Q1, - Q3, + enum class AggFun(val aggFun: (Calculator) -> Double) { + NAN({ calc -> calc.nan() }), + COUNT({ calc -> calc.count() }), + SUM({ calc -> calc.sum() }), + MEAN({ calc -> calc.mean() }), + MEDIAN({ calc -> calc.median() }), + MIN({ calc -> calc.min() }), + MAX({ calc -> calc.max() }), + Q1({ calc -> calc.q1() }), + Q3({ calc -> calc.q3() }), } } \ No newline at end of file diff --git a/plot-config-portable/src/commonMain/kotlin/jetbrains/datalore/plot/config/StatProto.kt b/plot-config-portable/src/commonMain/kotlin/jetbrains/datalore/plot/config/StatProto.kt index 4ee1e106940..172db0a84cb 100644 --- a/plot-config-portable/src/commonMain/kotlin/jetbrains/datalore/plot/config/StatProto.kt +++ b/plot-config-portable/src/commonMain/kotlin/jetbrains/datalore/plot/config/StatProto.kt @@ -9,6 +9,7 @@ import jetbrains.datalore.plot.base.Aes import jetbrains.datalore.plot.base.GeomKind import jetbrains.datalore.plot.base.Stat import jetbrains.datalore.plot.base.stat.* +import jetbrains.datalore.plot.config.Option.Mapping import jetbrains.datalore.plot.config.Option.Stat.Bin import jetbrains.datalore.plot.config.Option.Stat.Bin2d import jetbrains.datalore.plot.config.Option.Stat.Boxplot @@ -407,28 +408,27 @@ object StatProto { } private fun configureSummaryStat(options: OptionsAccessor): SummaryStat { - fun getAggFunction(opts: OptionsAccessor, option: String, default: String): (SummaryStatUtil.SummaryCalculator) -> Double { + fun getAggFunction(opts: OptionsAccessor, option: String, default: String): (SummaryStatUtil.Calculator) -> Double { return if (opts.isNumber(option)) { SummaryStatUtil.getQuantileAggFun(opts.getDouble(option)!!) } else { - val aggFunName = opts.getStringDef(option, default).let { + opts.getStringDef(option, default).let { when (it.lowercase()) { - "nan" -> SummaryStatUtil.AggFun.NAN - "count" -> SummaryStatUtil.AggFun.COUNT - "sum" -> SummaryStatUtil.AggFun.SUM - "mean" -> SummaryStatUtil.AggFun.MEAN - "median" -> SummaryStatUtil.AggFun.MEDIAN - "min" -> SummaryStatUtil.AggFun.MIN - "max" -> SummaryStatUtil.AggFun.MAX - "q1" -> SummaryStatUtil.AggFun.Q1 - "q3" -> SummaryStatUtil.AggFun.Q3 + "nan" -> SummaryStatUtil.AggFun.NAN.aggFun + "count" -> SummaryStatUtil.AggFun.COUNT.aggFun + "sum" -> SummaryStatUtil.AggFun.SUM.aggFun + "mean" -> SummaryStatUtil.AggFun.MEAN.aggFun + "median" -> SummaryStatUtil.AggFun.MEDIAN.aggFun + "min" -> SummaryStatUtil.AggFun.MIN.aggFun + "max" -> SummaryStatUtil.AggFun.MAX.aggFun + "q1" -> SummaryStatUtil.AggFun.Q1.aggFun + "q3" -> SummaryStatUtil.AggFun.Q3.aggFun else -> throw IllegalArgumentException( "Unsupported function name: '$it'\n" + "Use one of: nan, count, sum, mean, median, min, max, q1, q3." ) } } - SummaryStatUtil.getStandardAggFun(aggFunName) } } @@ -438,7 +438,7 @@ object StatProto { Aes.YMAX to getAggFunction(options, Summary.FUN_MAX, SummaryStat.DEF_MAX_AGG_FUN) ) - val additionalAggFunctions: MutableMap, (SummaryStatUtil.SummaryCalculator) -> Double> = mutableMapOf() + val additionalAggFunctions: MutableMap, (SummaryStatUtil.Calculator) -> Double> = mutableMapOf() val funMap: Map = if (options.hasOwn(Summary.FUN_MAP)) { options.getMap(Summary.FUN_MAP) } else { @@ -446,7 +446,7 @@ object StatProto { } val funOptions = OptionsAccessor(funMap) for (aesName in funMap.keys) { - Aes.byName(aesName.lowercase())?.let { aes -> + Mapping.toAes(aesName.lowercase()).let { aes -> additionalAggFunctions[aes] = getAggFunction(funOptions, aesName, SummaryStat.DEF_AGG_FUN) } } From 6ce963b4e9937f3655f99298b8cf79b399f33061 Mon Sep 17 00:00:00 2001 From: Artem Smirnov Date: Wed, 21 Jun 2023 15:12:47 +0200 Subject: [PATCH 04/25] Remove extra enum class from the SummaryStatUtil. --- .../datalore/plot/base/stat/SummaryStat.kt | 2 +- .../plot/base/stat/SummaryStatUtil.kt | 111 ++++-------------- .../datalore/plot/config/StatProto.kt | 18 +-- 3 files changed, 33 insertions(+), 98 deletions(-) diff --git a/plot-base-portable/src/commonMain/kotlin/jetbrains/datalore/plot/base/stat/SummaryStat.kt b/plot-base-portable/src/commonMain/kotlin/jetbrains/datalore/plot/base/stat/SummaryStat.kt index 230ce5950a7..ddfc937e145 100644 --- a/plot-base-portable/src/commonMain/kotlin/jetbrains/datalore/plot/base/stat/SummaryStat.kt +++ b/plot-base-portable/src/commonMain/kotlin/jetbrains/datalore/plot/base/stat/SummaryStat.kt @@ -52,7 +52,7 @@ class SummaryStat( } val statValues: Map, MutableList> = DEF_MAPPING.keys.associateWith { mutableListOf() } - val defaultAggFun = SummaryStatUtil.AggFun.NAN.aggFun + val defaultAggFun: (SummaryStatUtil.Calculator) -> Double = { calc -> calc.nan } for ((x, bin) in binnedData) { val calc = SummaryStatUtil.Calculator(bin) for (aes in statValues.keys) { diff --git a/plot-base-portable/src/commonMain/kotlin/jetbrains/datalore/plot/base/stat/SummaryStatUtil.kt b/plot-base-portable/src/commonMain/kotlin/jetbrains/datalore/plot/base/stat/SummaryStatUtil.kt index 08d3211d701..01388481a6f 100644 --- a/plot-base-portable/src/commonMain/kotlin/jetbrains/datalore/plot/base/stat/SummaryStatUtil.kt +++ b/plot-base-portable/src/commonMain/kotlin/jetbrains/datalore/plot/base/stat/SummaryStatUtil.kt @@ -18,88 +18,35 @@ object SummaryStatUtil { class Calculator(values: List) { private val sortedValues: List = Ordering.natural().sortedCopy(values) - private var count: Double? = null - private var sum: Double? = null - private var mean: Double? = null - private var median: Double? = null - private var min: Double? = null - private var max: Double? = null - private var q1: Double? = null - private var q3: Double? = null - - fun nan(): Double { - return Double.NaN - } - - fun count(): Double { - if (count == null) { - count = sortedValues.size.toDouble() - } - return count!! - } - - fun sum(): Double { - if (sum == null) { - sum = sortedValues.sum() - } - return sum!! - } - - fun mean(): Double { - if (mean == null) { - mean = if (sortedValues.isEmpty()) { - Double.NaN - } else if (sortedValues.size == 1) { - sortedValues.first() - } else { - sum() / sortedValues.size - } - } - return mean!! - } - - fun median(): Double { - if (median == null) { - median = quantile(0.5) - } - return median!! - } - - fun min(): Double { - if (min == null) { - min = if (sortedValues.isEmpty()) { - Double.NaN - } else { - sortedValues.first() - } - } - return min!! - } - - fun max(): Double { - if (max == null) { - max = if (sortedValues.isEmpty()) { - Double.NaN - } else { - sortedValues.last() - } + val nan = Double.NaN + val count by lazy { sortedValues.size.toDouble() } + val sum by lazy { sortedValues.sum() } + val mean by lazy { + if (sortedValues.isEmpty()) { + Double.NaN + } else if (sortedValues.size == 1) { + sortedValues.first() + } else { + sum / sortedValues.size } - return max!! } - - fun q1(): Double { - if (q1 == null) { - q1 = quantile(0.25) + val median by lazy { quantile(0.5) } + val min by lazy { + if (sortedValues.isEmpty()) { + Double.NaN + } else { + sortedValues.first() } - return q1!! } - - fun q3(): Double { - if (q3 == null) { - q3 = quantile(0.75) + val max by lazy { + if (sortedValues.isEmpty()) { + Double.NaN + } else { + sortedValues.last() } - return q3!! } + val q1 by lazy { quantile(0.25) } + val q3 by lazy { quantile(0.75) } fun quantile(p: Double): Double { if (sortedValues.isEmpty()) { @@ -117,16 +64,4 @@ object SummaryStatUtil { } } } - - enum class AggFun(val aggFun: (Calculator) -> Double) { - NAN({ calc -> calc.nan() }), - COUNT({ calc -> calc.count() }), - SUM({ calc -> calc.sum() }), - MEAN({ calc -> calc.mean() }), - MEDIAN({ calc -> calc.median() }), - MIN({ calc -> calc.min() }), - MAX({ calc -> calc.max() }), - Q1({ calc -> calc.q1() }), - Q3({ calc -> calc.q3() }), - } } \ No newline at end of file diff --git a/plot-config-portable/src/commonMain/kotlin/jetbrains/datalore/plot/config/StatProto.kt b/plot-config-portable/src/commonMain/kotlin/jetbrains/datalore/plot/config/StatProto.kt index 172db0a84cb..957987b7c7f 100644 --- a/plot-config-portable/src/commonMain/kotlin/jetbrains/datalore/plot/config/StatProto.kt +++ b/plot-config-portable/src/commonMain/kotlin/jetbrains/datalore/plot/config/StatProto.kt @@ -414,15 +414,15 @@ object StatProto { } else { opts.getStringDef(option, default).let { when (it.lowercase()) { - "nan" -> SummaryStatUtil.AggFun.NAN.aggFun - "count" -> SummaryStatUtil.AggFun.COUNT.aggFun - "sum" -> SummaryStatUtil.AggFun.SUM.aggFun - "mean" -> SummaryStatUtil.AggFun.MEAN.aggFun - "median" -> SummaryStatUtil.AggFun.MEDIAN.aggFun - "min" -> SummaryStatUtil.AggFun.MIN.aggFun - "max" -> SummaryStatUtil.AggFun.MAX.aggFun - "q1" -> SummaryStatUtil.AggFun.Q1.aggFun - "q3" -> SummaryStatUtil.AggFun.Q3.aggFun + "nan" -> { calc -> calc.nan } + "count" -> { calc -> calc.count } + "sum" -> { calc -> calc.sum } + "mean" -> { calc -> calc.mean } + "median" -> { calc -> calc.median } + "min" -> { calc -> calc.min } + "max" -> { calc -> calc.max } + "q1" -> { calc -> calc.q1 } + "q3" -> { calc -> calc.q3 } else -> throw IllegalArgumentException( "Unsupported function name: '$it'\n" + "Use one of: nan, count, sum, mean, median, min, max, q1, q3." From b7a8f33fb936c2f02c73c297c3a7baca5f643692 Mon Sep 17 00:00:00 2001 From: Artem Smirnov Date: Wed, 21 Jun 2023 15:28:12 +0200 Subject: [PATCH 05/25] Remove SummaryStatUtil. --- .../plot/base/stat/SummaryCalculator.kt | 61 +++++++++++++++++ .../datalore/plot/base/stat/SummaryStat.kt | 6 +- .../plot/base/stat/SummaryStatUtil.kt | 67 ------------------- .../datalore/plot/config/StatProto.kt | 7 +- 4 files changed, 68 insertions(+), 73 deletions(-) create mode 100644 plot-base-portable/src/commonMain/kotlin/jetbrains/datalore/plot/base/stat/SummaryCalculator.kt delete mode 100644 plot-base-portable/src/commonMain/kotlin/jetbrains/datalore/plot/base/stat/SummaryStatUtil.kt diff --git a/plot-base-portable/src/commonMain/kotlin/jetbrains/datalore/plot/base/stat/SummaryCalculator.kt b/plot-base-portable/src/commonMain/kotlin/jetbrains/datalore/plot/base/stat/SummaryCalculator.kt new file mode 100644 index 00000000000..4cccd7f8277 --- /dev/null +++ b/plot-base-portable/src/commonMain/kotlin/jetbrains/datalore/plot/base/stat/SummaryCalculator.kt @@ -0,0 +1,61 @@ +/* + * Copyright (c) 2023. JetBrains s.r.o. + * Use of this source code is governed by the MIT license that can be found in the LICENSE file. + */ + +package jetbrains.datalore.plot.base.stat + +import jetbrains.datalore.base.gcommon.collect.Ordering +import kotlin.math.ceil +import kotlin.math.floor +import kotlin.math.round + +class SummaryCalculator(values: List) { + private val sortedValues: List = Ordering.natural().sortedCopy(values) + + val nan = Double.NaN + val count by lazy { sortedValues.size.toDouble() } + val sum by lazy { sortedValues.sum() } + val mean by lazy { + if (sortedValues.isEmpty()) { + Double.NaN + } else if (sortedValues.size == 1) { + sortedValues.first() + } else { + sum / sortedValues.size + } + } + val median by lazy { quantile(0.5) } + val min by lazy { + if (sortedValues.isEmpty()) { + Double.NaN + } else { + sortedValues.first() + } + } + val max by lazy { + if (sortedValues.isEmpty()) { + Double.NaN + } else { + sortedValues.last() + } + } + val q1 by lazy { quantile(0.25) } + val q3 by lazy { quantile(0.75) } + + fun quantile(p: Double): Double { + if (sortedValues.isEmpty()) { + return Double.NaN + } + if (sortedValues.size == 1) { + return sortedValues.first() + } + val place = p * (sortedValues.size - 1) + val i = round(place) + return if (place == i) { + sortedValues[place.toInt()] + } else { + (sortedValues[ceil(place).toInt()] + sortedValues[floor(place).toInt()]) / 2.0 + } + } +} \ No newline at end of file diff --git a/plot-base-portable/src/commonMain/kotlin/jetbrains/datalore/plot/base/stat/SummaryStat.kt b/plot-base-portable/src/commonMain/kotlin/jetbrains/datalore/plot/base/stat/SummaryStat.kt index ddfc937e145..08a0e4fc2ae 100644 --- a/plot-base-portable/src/commonMain/kotlin/jetbrains/datalore/plot/base/stat/SummaryStat.kt +++ b/plot-base-portable/src/commonMain/kotlin/jetbrains/datalore/plot/base/stat/SummaryStat.kt @@ -12,7 +12,7 @@ import jetbrains.datalore.plot.base.data.TransformVar import jetbrains.datalore.plot.common.data.SeriesUtil class SummaryStat( - private val aggFunctionsMap: Map, (SummaryStatUtil.Calculator) -> Double> + private val aggFunctionsMap: Map, (SummaryCalculator) -> Double> ) : BaseStat(DEF_MAPPING) { override fun consumes(): List> { @@ -52,9 +52,9 @@ class SummaryStat( } val statValues: Map, MutableList> = DEF_MAPPING.keys.associateWith { mutableListOf() } - val defaultAggFun: (SummaryStatUtil.Calculator) -> Double = { calc -> calc.nan } + val defaultAggFun: (SummaryCalculator) -> Double = { calc -> calc.nan } for ((x, bin) in binnedData) { - val calc = SummaryStatUtil.Calculator(bin) + val calc = SummaryCalculator(bin) for (aes in statValues.keys) { if (aes == Aes.X) { statValues[aes]!!.add(x) diff --git a/plot-base-portable/src/commonMain/kotlin/jetbrains/datalore/plot/base/stat/SummaryStatUtil.kt b/plot-base-portable/src/commonMain/kotlin/jetbrains/datalore/plot/base/stat/SummaryStatUtil.kt deleted file mode 100644 index 01388481a6f..00000000000 --- a/plot-base-portable/src/commonMain/kotlin/jetbrains/datalore/plot/base/stat/SummaryStatUtil.kt +++ /dev/null @@ -1,67 +0,0 @@ -/* - * Copyright (c) 2023. JetBrains s.r.o. - * Use of this source code is governed by the MIT license that can be found in the LICENSE file. - */ - -package jetbrains.datalore.plot.base.stat - -import jetbrains.datalore.base.gcommon.collect.Ordering -import kotlin.math.ceil -import kotlin.math.floor -import kotlin.math.round - -object SummaryStatUtil { - fun getQuantileAggFun(p: Double): (Calculator) -> Double { - return { calc -> calc.quantile(p) } - } - - class Calculator(values: List) { - private val sortedValues: List = Ordering.natural().sortedCopy(values) - - val nan = Double.NaN - val count by lazy { sortedValues.size.toDouble() } - val sum by lazy { sortedValues.sum() } - val mean by lazy { - if (sortedValues.isEmpty()) { - Double.NaN - } else if (sortedValues.size == 1) { - sortedValues.first() - } else { - sum / sortedValues.size - } - } - val median by lazy { quantile(0.5) } - val min by lazy { - if (sortedValues.isEmpty()) { - Double.NaN - } else { - sortedValues.first() - } - } - val max by lazy { - if (sortedValues.isEmpty()) { - Double.NaN - } else { - sortedValues.last() - } - } - val q1 by lazy { quantile(0.25) } - val q3 by lazy { quantile(0.75) } - - fun quantile(p: Double): Double { - if (sortedValues.isEmpty()) { - return Double.NaN - } - if (sortedValues.size == 1) { - return sortedValues.first() - } - val place = p * (sortedValues.size - 1) - val i = round(place) - return if (place == i) { - sortedValues[place.toInt()] - } else { - (sortedValues[ceil(place).toInt()] + sortedValues[floor(place).toInt()]) / 2.0 - } - } - } -} \ No newline at end of file diff --git a/plot-config-portable/src/commonMain/kotlin/jetbrains/datalore/plot/config/StatProto.kt b/plot-config-portable/src/commonMain/kotlin/jetbrains/datalore/plot/config/StatProto.kt index 957987b7c7f..d497270e2fd 100644 --- a/plot-config-portable/src/commonMain/kotlin/jetbrains/datalore/plot/config/StatProto.kt +++ b/plot-config-portable/src/commonMain/kotlin/jetbrains/datalore/plot/config/StatProto.kt @@ -408,9 +408,10 @@ object StatProto { } private fun configureSummaryStat(options: OptionsAccessor): SummaryStat { - fun getAggFunction(opts: OptionsAccessor, option: String, default: String): (SummaryStatUtil.Calculator) -> Double { + fun getAggFunction(opts: OptionsAccessor, option: String, default: String): (SummaryCalculator) -> Double { return if (opts.isNumber(option)) { - SummaryStatUtil.getQuantileAggFun(opts.getDouble(option)!!) + val p = opts.getDouble(option)!! + { calc -> calc.quantile(p) } } else { opts.getStringDef(option, default).let { when (it.lowercase()) { @@ -438,7 +439,7 @@ object StatProto { Aes.YMAX to getAggFunction(options, Summary.FUN_MAX, SummaryStat.DEF_MAX_AGG_FUN) ) - val additionalAggFunctions: MutableMap, (SummaryStatUtil.Calculator) -> Double> = mutableMapOf() + val additionalAggFunctions: MutableMap, (SummaryCalculator) -> Double> = mutableMapOf() val funMap: Map = if (options.hasOwn(Summary.FUN_MAP)) { options.getMap(Summary.FUN_MAP) } else { From 05345eed93dc0ad596ce5ca93fefb3e46a38e499 Mon Sep 17 00:00:00 2001 From: Artem Smirnov Date: Wed, 21 Jun 2023 16:04:19 +0200 Subject: [PATCH 06/25] Use references instead of lambdas for the SummaryCalculator. --- .../datalore/plot/base/stat/SummaryStat.kt | 3 +-- .../datalore/plot/config/StatProto.kt | 18 +++++++++--------- 2 files changed, 10 insertions(+), 11 deletions(-) diff --git a/plot-base-portable/src/commonMain/kotlin/jetbrains/datalore/plot/base/stat/SummaryStat.kt b/plot-base-portable/src/commonMain/kotlin/jetbrains/datalore/plot/base/stat/SummaryStat.kt index 08a0e4fc2ae..d0a2e4b0946 100644 --- a/plot-base-portable/src/commonMain/kotlin/jetbrains/datalore/plot/base/stat/SummaryStat.kt +++ b/plot-base-portable/src/commonMain/kotlin/jetbrains/datalore/plot/base/stat/SummaryStat.kt @@ -52,14 +52,13 @@ class SummaryStat( } val statValues: Map, MutableList> = DEF_MAPPING.keys.associateWith { mutableListOf() } - val defaultAggFun: (SummaryCalculator) -> Double = { calc -> calc.nan } for ((x, bin) in binnedData) { val calc = SummaryCalculator(bin) for (aes in statValues.keys) { if (aes == Aes.X) { statValues[aes]!!.add(x) } else { - statValues[aes]!!.add(aggFunctionsMap.getOrElse(aes) { defaultAggFun }(calc)) + statValues[aes]!!.add(aggFunctionsMap.getOrElse(aes) { SummaryCalculator::nan }(calc)) } } } diff --git a/plot-config-portable/src/commonMain/kotlin/jetbrains/datalore/plot/config/StatProto.kt b/plot-config-portable/src/commonMain/kotlin/jetbrains/datalore/plot/config/StatProto.kt index d497270e2fd..2d81638518d 100644 --- a/plot-config-portable/src/commonMain/kotlin/jetbrains/datalore/plot/config/StatProto.kt +++ b/plot-config-portable/src/commonMain/kotlin/jetbrains/datalore/plot/config/StatProto.kt @@ -415,15 +415,15 @@ object StatProto { } else { opts.getStringDef(option, default).let { when (it.lowercase()) { - "nan" -> { calc -> calc.nan } - "count" -> { calc -> calc.count } - "sum" -> { calc -> calc.sum } - "mean" -> { calc -> calc.mean } - "median" -> { calc -> calc.median } - "min" -> { calc -> calc.min } - "max" -> { calc -> calc.max } - "q1" -> { calc -> calc.q1 } - "q3" -> { calc -> calc.q3 } + "nan" -> SummaryCalculator::nan + "count" -> SummaryCalculator::count + "sum" -> SummaryCalculator::sum + "mean" -> SummaryCalculator::mean + "median" -> SummaryCalculator::median + "min" -> SummaryCalculator::min + "max" -> SummaryCalculator::max + "q1" -> SummaryCalculator::q1 + "q3" -> SummaryCalculator::q3 else -> throw IllegalArgumentException( "Unsupported function name: '$it'\n" + "Use one of: nan, count, sum, mean, median, min, max, q1, q3." From 756b626a054b991a09ca4231c15ceacf805f8bd9 Mon Sep 17 00:00:00 2001 From: Artem Smirnov Date: Wed, 21 Jun 2023 16:58:59 +0200 Subject: [PATCH 07/25] Replace SummaryCalculator by the SummaryStatUtil. --- .../plot/base/stat/SummaryCalculator.kt | 61 --------------- .../datalore/plot/base/stat/SummaryStat.kt | 8 +- .../plot/base/stat/SummaryStatUtil.kt | 78 +++++++++++++++++++ .../datalore/plot/config/StatProto.kt | 24 +++--- 4 files changed, 95 insertions(+), 76 deletions(-) delete mode 100644 plot-base-portable/src/commonMain/kotlin/jetbrains/datalore/plot/base/stat/SummaryCalculator.kt create mode 100644 plot-base-portable/src/commonMain/kotlin/jetbrains/datalore/plot/base/stat/SummaryStatUtil.kt diff --git a/plot-base-portable/src/commonMain/kotlin/jetbrains/datalore/plot/base/stat/SummaryCalculator.kt b/plot-base-portable/src/commonMain/kotlin/jetbrains/datalore/plot/base/stat/SummaryCalculator.kt deleted file mode 100644 index 4cccd7f8277..00000000000 --- a/plot-base-portable/src/commonMain/kotlin/jetbrains/datalore/plot/base/stat/SummaryCalculator.kt +++ /dev/null @@ -1,61 +0,0 @@ -/* - * Copyright (c) 2023. JetBrains s.r.o. - * Use of this source code is governed by the MIT license that can be found in the LICENSE file. - */ - -package jetbrains.datalore.plot.base.stat - -import jetbrains.datalore.base.gcommon.collect.Ordering -import kotlin.math.ceil -import kotlin.math.floor -import kotlin.math.round - -class SummaryCalculator(values: List) { - private val sortedValues: List = Ordering.natural().sortedCopy(values) - - val nan = Double.NaN - val count by lazy { sortedValues.size.toDouble() } - val sum by lazy { sortedValues.sum() } - val mean by lazy { - if (sortedValues.isEmpty()) { - Double.NaN - } else if (sortedValues.size == 1) { - sortedValues.first() - } else { - sum / sortedValues.size - } - } - val median by lazy { quantile(0.5) } - val min by lazy { - if (sortedValues.isEmpty()) { - Double.NaN - } else { - sortedValues.first() - } - } - val max by lazy { - if (sortedValues.isEmpty()) { - Double.NaN - } else { - sortedValues.last() - } - } - val q1 by lazy { quantile(0.25) } - val q3 by lazy { quantile(0.75) } - - fun quantile(p: Double): Double { - if (sortedValues.isEmpty()) { - return Double.NaN - } - if (sortedValues.size == 1) { - return sortedValues.first() - } - val place = p * (sortedValues.size - 1) - val i = round(place) - return if (place == i) { - sortedValues[place.toInt()] - } else { - (sortedValues[ceil(place).toInt()] + sortedValues[floor(place).toInt()]) / 2.0 - } - } -} \ No newline at end of file diff --git a/plot-base-portable/src/commonMain/kotlin/jetbrains/datalore/plot/base/stat/SummaryStat.kt b/plot-base-portable/src/commonMain/kotlin/jetbrains/datalore/plot/base/stat/SummaryStat.kt index d0a2e4b0946..2c60d8cc721 100644 --- a/plot-base-portable/src/commonMain/kotlin/jetbrains/datalore/plot/base/stat/SummaryStat.kt +++ b/plot-base-portable/src/commonMain/kotlin/jetbrains/datalore/plot/base/stat/SummaryStat.kt @@ -5,6 +5,7 @@ package jetbrains.datalore.plot.base.stat +import jetbrains.datalore.base.gcommon.collect.Ordering import jetbrains.datalore.plot.base.Aes import jetbrains.datalore.plot.base.DataFrame import jetbrains.datalore.plot.base.StatContext @@ -12,7 +13,7 @@ import jetbrains.datalore.plot.base.data.TransformVar import jetbrains.datalore.plot.common.data.SeriesUtil class SummaryStat( - private val aggFunctionsMap: Map, (SummaryCalculator) -> Double> + private val aggFunctionsMap: Map, (List) -> Double> ) : BaseStat(DEF_MAPPING) { override fun consumes(): List> { @@ -53,12 +54,13 @@ class SummaryStat( val statValues: Map, MutableList> = DEF_MAPPING.keys.associateWith { mutableListOf() } for ((x, bin) in binnedData) { - val calc = SummaryCalculator(bin) + val sortedBin = Ordering.natural().sortedCopy(bin) for (aes in statValues.keys) { if (aes == Aes.X) { statValues[aes]!!.add(x) } else { - statValues[aes]!!.add(aggFunctionsMap.getOrElse(aes) { SummaryCalculator::nan }(calc)) + val aggFunction = aggFunctionsMap.getOrElse(aes) { SummaryStatUtil.nan } + statValues[aes]!!.add(aggFunction(sortedBin)) } } } diff --git a/plot-base-portable/src/commonMain/kotlin/jetbrains/datalore/plot/base/stat/SummaryStatUtil.kt b/plot-base-portable/src/commonMain/kotlin/jetbrains/datalore/plot/base/stat/SummaryStatUtil.kt new file mode 100644 index 00000000000..3b99927a2f2 --- /dev/null +++ b/plot-base-portable/src/commonMain/kotlin/jetbrains/datalore/plot/base/stat/SummaryStatUtil.kt @@ -0,0 +1,78 @@ +/* + * Copyright (c) 2023. JetBrains s.r.o. + * Use of this source code is governed by the MIT license that can be found in the LICENSE file. + */ + +package jetbrains.datalore.plot.base.stat + +import kotlin.math.ceil +import kotlin.math.floor +import kotlin.math.round + +object SummaryStatUtil { + val nan: (List) -> Double = { Double.NaN } + + fun count(sortedValues: List): Double { + return sortedValues.size.toDouble() + } + + fun sum(sortedValues: List): Double { + return sortedValues.sum() + } + + fun mean(sortedValues: List): Double { + return if (sortedValues.isEmpty()) { + Double.NaN + } else if (sortedValues.size == 1) { + sortedValues.first() + } else { + sum(sortedValues) / sortedValues.size + } + } + + fun median(sortedValues: List): Double { + return quantile(0.5)(sortedValues) + } + + fun min(sortedValues: List): Double { + return if (sortedValues.isEmpty()) { + Double.NaN + } else { + sortedValues.first() + } + } + + fun max(sortedValues: List): Double { + return if (sortedValues.isEmpty()) { + Double.NaN + } else { + sortedValues.last() + } + } + + fun q1(sortedValues: List): Double { + return quantile(0.25)(sortedValues) + } + + fun q3(sortedValues: List): Double { + return quantile(0.75)(sortedValues) + } + + fun quantile(p: Double): (List) -> Double { + return { sortedValues -> + if (sortedValues.isEmpty()) { + Double.NaN + } else if (sortedValues.size == 1) { + sortedValues.first() + } else { + val place = p * (sortedValues.size - 1) + val i = round(place) + if (place == i) { + sortedValues[place.toInt()] + } else { + (sortedValues[ceil(place).toInt()] + sortedValues[floor(place).toInt()]) / 2.0 + } + } + } + } +} \ No newline at end of file diff --git a/plot-config-portable/src/commonMain/kotlin/jetbrains/datalore/plot/config/StatProto.kt b/plot-config-portable/src/commonMain/kotlin/jetbrains/datalore/plot/config/StatProto.kt index 2d81638518d..895f26d9611 100644 --- a/plot-config-portable/src/commonMain/kotlin/jetbrains/datalore/plot/config/StatProto.kt +++ b/plot-config-portable/src/commonMain/kotlin/jetbrains/datalore/plot/config/StatProto.kt @@ -408,22 +408,22 @@ object StatProto { } private fun configureSummaryStat(options: OptionsAccessor): SummaryStat { - fun getAggFunction(opts: OptionsAccessor, option: String, default: String): (SummaryCalculator) -> Double { + fun getAggFunction(opts: OptionsAccessor, option: String, default: String): (List) -> Double { return if (opts.isNumber(option)) { val p = opts.getDouble(option)!! - { calc -> calc.quantile(p) } + SummaryStatUtil.quantile(p) } else { opts.getStringDef(option, default).let { when (it.lowercase()) { - "nan" -> SummaryCalculator::nan - "count" -> SummaryCalculator::count - "sum" -> SummaryCalculator::sum - "mean" -> SummaryCalculator::mean - "median" -> SummaryCalculator::median - "min" -> SummaryCalculator::min - "max" -> SummaryCalculator::max - "q1" -> SummaryCalculator::q1 - "q3" -> SummaryCalculator::q3 + "nan" -> SummaryStatUtil.nan + "count" -> SummaryStatUtil::count + "sum" -> SummaryStatUtil::sum + "mean" -> SummaryStatUtil::mean + "median" -> SummaryStatUtil::median + "min" -> SummaryStatUtil::min + "max" -> SummaryStatUtil::max + "q1" -> SummaryStatUtil::q1 + "q3" -> SummaryStatUtil::q3 else -> throw IllegalArgumentException( "Unsupported function name: '$it'\n" + "Use one of: nan, count, sum, mean, median, min, max, q1, q3." @@ -439,7 +439,7 @@ object StatProto { Aes.YMAX to getAggFunction(options, Summary.FUN_MAX, SummaryStat.DEF_MAX_AGG_FUN) ) - val additionalAggFunctions: MutableMap, (SummaryCalculator) -> Double> = mutableMapOf() + val additionalAggFunctions: MutableMap, (List) -> Double> = mutableMapOf() val funMap: Map = if (options.hasOwn(Summary.FUN_MAP)) { options.getMap(Summary.FUN_MAP) } else { From 3b2bca7c82d2edf68f309da299f0efa0bf3606b4 Mon Sep 17 00:00:00 2001 From: Artem Smirnov Date: Thu, 22 Jun 2023 11:39:26 +0200 Subject: [PATCH 08/25] Refactor functions in SummaryStatUtil. --- .../datalore/plot/base/stat/SummaryStat.kt | 2 +- .../plot/base/stat/SummaryStatUtil.kt | 44 +++++++------------ .../datalore/plot/config/StatProto.kt | 4 +- 3 files changed, 18 insertions(+), 32 deletions(-) diff --git a/plot-base-portable/src/commonMain/kotlin/jetbrains/datalore/plot/base/stat/SummaryStat.kt b/plot-base-portable/src/commonMain/kotlin/jetbrains/datalore/plot/base/stat/SummaryStat.kt index 2c60d8cc721..0e3df5a1b4a 100644 --- a/plot-base-portable/src/commonMain/kotlin/jetbrains/datalore/plot/base/stat/SummaryStat.kt +++ b/plot-base-portable/src/commonMain/kotlin/jetbrains/datalore/plot/base/stat/SummaryStat.kt @@ -59,7 +59,7 @@ class SummaryStat( if (aes == Aes.X) { statValues[aes]!!.add(x) } else { - val aggFunction = aggFunctionsMap.getOrElse(aes) { SummaryStatUtil.nan } + val aggFunction = aggFunctionsMap.getOrElse(aes) { SummaryStatUtil::nan } statValues[aes]!!.add(aggFunction(sortedBin)) } } diff --git a/plot-base-portable/src/commonMain/kotlin/jetbrains/datalore/plot/base/stat/SummaryStatUtil.kt b/plot-base-portable/src/commonMain/kotlin/jetbrains/datalore/plot/base/stat/SummaryStatUtil.kt index 3b99927a2f2..61b7b7aee13 100644 --- a/plot-base-portable/src/commonMain/kotlin/jetbrains/datalore/plot/base/stat/SummaryStatUtil.kt +++ b/plot-base-portable/src/commonMain/kotlin/jetbrains/datalore/plot/base/stat/SummaryStatUtil.kt @@ -10,29 +10,21 @@ import kotlin.math.floor import kotlin.math.round object SummaryStatUtil { - val nan: (List) -> Double = { Double.NaN } + fun nan(sortedValues: List): Double = Double.NaN - fun count(sortedValues: List): Double { - return sortedValues.size.toDouble() - } + fun count(sortedValues: List): Double = sortedValues.size.toDouble() - fun sum(sortedValues: List): Double { - return sortedValues.sum() - } + fun sum(sortedValues: List): Double = sortedValues.sum() fun mean(sortedValues: List): Double { - return if (sortedValues.isEmpty()) { - Double.NaN - } else if (sortedValues.size == 1) { - sortedValues.first() - } else { - sum(sortedValues) / sortedValues.size + return when (sortedValues.size) { + 0 -> Double.NaN + 1 -> sortedValues.first() + else -> sum(sortedValues) / sortedValues.size } } - fun median(sortedValues: List): Double { - return quantile(0.5)(sortedValues) - } + fun median(sortedValues: List): Double = quantile(sortedValues, 0.5) fun min(sortedValues: List): Double { return if (sortedValues.isEmpty()) { @@ -50,21 +42,15 @@ object SummaryStatUtil { } } - fun q1(sortedValues: List): Double { - return quantile(0.25)(sortedValues) - } + fun q1(sortedValues: List): Double = quantile(sortedValues, 0.25) - fun q3(sortedValues: List): Double { - return quantile(0.75)(sortedValues) - } + fun q3(sortedValues: List): Double = quantile(sortedValues, 0.75) - fun quantile(p: Double): (List) -> Double { - return { sortedValues -> - if (sortedValues.isEmpty()) { - Double.NaN - } else if (sortedValues.size == 1) { - sortedValues.first() - } else { + fun quantile(sortedValues: List, p: Double): Double { + return when (sortedValues.size) { + 0 -> Double.NaN + 1 -> sortedValues.first() + else -> { val place = p * (sortedValues.size - 1) val i = round(place) if (place == i) { diff --git a/plot-config-portable/src/commonMain/kotlin/jetbrains/datalore/plot/config/StatProto.kt b/plot-config-portable/src/commonMain/kotlin/jetbrains/datalore/plot/config/StatProto.kt index 895f26d9611..80c12eb723b 100644 --- a/plot-config-portable/src/commonMain/kotlin/jetbrains/datalore/plot/config/StatProto.kt +++ b/plot-config-portable/src/commonMain/kotlin/jetbrains/datalore/plot/config/StatProto.kt @@ -411,11 +411,11 @@ object StatProto { fun getAggFunction(opts: OptionsAccessor, option: String, default: String): (List) -> Double { return if (opts.isNumber(option)) { val p = opts.getDouble(option)!! - SummaryStatUtil.quantile(p) + { values -> SummaryStatUtil.quantile(values, p) } } else { opts.getStringDef(option, default).let { when (it.lowercase()) { - "nan" -> SummaryStatUtil.nan + "nan" -> SummaryStatUtil::nan "count" -> SummaryStatUtil::count "sum" -> SummaryStatUtil::sum "mean" -> SummaryStatUtil::mean From 1f03da4efc2cbab68168275f324fa24fb64d961f Mon Sep 17 00:00:00 2001 From: Artem Smirnov Date: Thu, 22 Jun 2023 13:45:15 +0200 Subject: [PATCH 09/25] Refactor summary stat options in StatProto. --- .../datalore/plot/base/stat/SummaryStat.kt | 7 +- .../{SummaryStatUtil.kt => SummaryUtil.kt} | 2 +- .../datalore/plot/config/OptionsAccessor.kt | 4 - .../datalore/plot/config/StatProto.kt | 77 +++++++++++-------- 4 files changed, 45 insertions(+), 45 deletions(-) rename plot-base-portable/src/commonMain/kotlin/jetbrains/datalore/plot/base/stat/{SummaryStatUtil.kt => SummaryUtil.kt} (98%) diff --git a/plot-base-portable/src/commonMain/kotlin/jetbrains/datalore/plot/base/stat/SummaryStat.kt b/plot-base-portable/src/commonMain/kotlin/jetbrains/datalore/plot/base/stat/SummaryStat.kt index 0e3df5a1b4a..224dc6f0a3b 100644 --- a/plot-base-portable/src/commonMain/kotlin/jetbrains/datalore/plot/base/stat/SummaryStat.kt +++ b/plot-base-portable/src/commonMain/kotlin/jetbrains/datalore/plot/base/stat/SummaryStat.kt @@ -59,7 +59,7 @@ class SummaryStat( if (aes == Aes.X) { statValues[aes]!!.add(x) } else { - val aggFunction = aggFunctionsMap.getOrElse(aes) { SummaryStatUtil::nan } + val aggFunction = aggFunctionsMap.getOrElse(aes) { SummaryUtil::nan } statValues[aes]!!.add(aggFunction(sortedBin)) } } @@ -69,11 +69,6 @@ class SummaryStat( } companion object { - const val DEF_Y_AGG_FUN = "mean" - const val DEF_MIN_AGG_FUN = "min" - const val DEF_MAX_AGG_FUN = "max" - const val DEF_AGG_FUN = "nan" - private val DEF_MAPPING: Map, DataFrame.Variable> = mapOf( Aes.X to Stats.X, Aes.Y to Stats.Y, diff --git a/plot-base-portable/src/commonMain/kotlin/jetbrains/datalore/plot/base/stat/SummaryStatUtil.kt b/plot-base-portable/src/commonMain/kotlin/jetbrains/datalore/plot/base/stat/SummaryUtil.kt similarity index 98% rename from plot-base-portable/src/commonMain/kotlin/jetbrains/datalore/plot/base/stat/SummaryStatUtil.kt rename to plot-base-portable/src/commonMain/kotlin/jetbrains/datalore/plot/base/stat/SummaryUtil.kt index 61b7b7aee13..f5c8371f84e 100644 --- a/plot-base-portable/src/commonMain/kotlin/jetbrains/datalore/plot/base/stat/SummaryStatUtil.kt +++ b/plot-base-portable/src/commonMain/kotlin/jetbrains/datalore/plot/base/stat/SummaryUtil.kt @@ -9,7 +9,7 @@ import kotlin.math.ceil import kotlin.math.floor import kotlin.math.round -object SummaryStatUtil { +object SummaryUtil { fun nan(sortedValues: List): Double = Double.NaN fun count(sortedValues: List): Double = sortedValues.size.toDouble() diff --git a/plot-config-portable/src/commonMain/kotlin/jetbrains/datalore/plot/config/OptionsAccessor.kt b/plot-config-portable/src/commonMain/kotlin/jetbrains/datalore/plot/config/OptionsAccessor.kt index 721ac21f95c..8367af49658 100644 --- a/plot-config-portable/src/commonMain/kotlin/jetbrains/datalore/plot/config/OptionsAccessor.kt +++ b/plot-config-portable/src/commonMain/kotlin/jetbrains/datalore/plot/config/OptionsAccessor.kt @@ -56,10 +56,6 @@ open class OptionsAccessor( return get(option)?.toString() } - fun getStringDef(option: String, def: String): String { - return getString(option) ?: def - } - fun getStringSafe(option: String): String { return getString(option) ?: throw IllegalArgumentException("Can't get string value: option '$option' is not present.") diff --git a/plot-config-portable/src/commonMain/kotlin/jetbrains/datalore/plot/config/StatProto.kt b/plot-config-portable/src/commonMain/kotlin/jetbrains/datalore/plot/config/StatProto.kt index 80c12eb723b..78fde513a9f 100644 --- a/plot-config-portable/src/commonMain/kotlin/jetbrains/datalore/plot/config/StatProto.kt +++ b/plot-config-portable/src/commonMain/kotlin/jetbrains/datalore/plot/config/StatProto.kt @@ -408,50 +408,59 @@ object StatProto { } private fun configureSummaryStat(options: OptionsAccessor): SummaryStat { - fun getAggFunction(opts: OptionsAccessor, option: String, default: String): (List) -> Double { - return if (opts.isNumber(option)) { - val p = opts.getDouble(option)!! - { values -> SummaryStatUtil.quantile(values, p) } - } else { - opts.getStringDef(option, default).let { - when (it.lowercase()) { - "nan" -> SummaryStatUtil::nan - "count" -> SummaryStatUtil::count - "sum" -> SummaryStatUtil::sum - "mean" -> SummaryStatUtil::mean - "median" -> SummaryStatUtil::median - "min" -> SummaryStatUtil::min - "max" -> SummaryStatUtil::max - "q1" -> SummaryStatUtil::q1 - "q3" -> SummaryStatUtil::q3 - else -> throw IllegalArgumentException( - "Unsupported function name: '$it'\n" + - "Use one of: nan, count, sum, mean, median, min, max, q1, q3." - ) - } - } - } - } - val standardAggFunctions = mapOf( - Aes.Y to getAggFunction(options, Summary.FUN, SummaryStat.DEF_Y_AGG_FUN), - Aes.YMIN to getAggFunction(options, Summary.FUN_MIN, SummaryStat.DEF_MIN_AGG_FUN), - Aes.YMAX to getAggFunction(options, Summary.FUN_MAX, SummaryStat.DEF_MAX_AGG_FUN) + Aes.Y to getAggFunction(options, Summary.FUN, SummaryUtil::mean), + Aes.YMIN to getAggFunction(options, Summary.FUN_MIN, SummaryUtil::min), + Aes.YMAX to getAggFunction(options, Summary.FUN_MAX, SummaryUtil::max) ) - val additionalAggFunctions: MutableMap, (List) -> Double> = mutableMapOf() - val funMap: Map = if (options.hasOwn(Summary.FUN_MAP)) { + val additionalAggFunctions = if (options.hasOwn(Summary.FUN_MAP)) { options.getMap(Summary.FUN_MAP) } else { emptyMap() + }.let { funMap -> + configureFunMap(OptionsAccessor(funMap)) } - val funOptions = OptionsAccessor(funMap) - for (aesName in funMap.keys) { + + return SummaryStat(standardAggFunctions + additionalAggFunctions) + } + + private fun configureFunMap(options: OptionsAccessor): Map, (List) -> Double> { + val additionalAggFunctions: MutableMap, (List) -> Double> = mutableMapOf() + for (aesName in options.toMap().keys) { Mapping.toAes(aesName.lowercase()).let { aes -> - additionalAggFunctions[aes] = getAggFunction(funOptions, aesName, SummaryStat.DEF_AGG_FUN) + additionalAggFunctions[aes] = getAggFunction(options, aesName, SummaryUtil::nan) } } + return additionalAggFunctions + } - return SummaryStat(standardAggFunctions + additionalAggFunctions) + private fun getAggFunction( + options: OptionsAccessor, + option: String, + defaultAggFunction: (List) -> Double + ): (List) -> Double { + return if (options.isNumber(option)) { + val p = options.getDouble(option)!! + { values -> SummaryUtil.quantile(values, p) } + } else { + options.getString(option)?.let { + when (it.lowercase()) { + "nan" -> SummaryUtil::nan + "count" -> SummaryUtil::count + "sum" -> SummaryUtil::sum + "mean" -> SummaryUtil::mean + "median" -> SummaryUtil::median + "min" -> SummaryUtil::min + "max" -> SummaryUtil::max + "q1" -> SummaryUtil::q1 + "q3" -> SummaryUtil::q3 + else -> throw IllegalArgumentException( + "Unsupported function name: '$it'\n" + + "Use one of: nan, count, sum, mean, median, min, max, q1, q3." + ) + } + } ?: defaultAggFunction + } } } From baccb495387d908df1d8719e62bf7df1c49595f1 Mon Sep 17 00:00:00 2001 From: Artem Smirnov Date: Thu, 22 Jun 2023 14:29:05 +0200 Subject: [PATCH 10/25] Fix statData emptiness case in the SummaryStat. --- .../kotlin/jetbrains/datalore/plot/base/stat/SummaryStat.kt | 3 +++ 1 file changed, 3 insertions(+) diff --git a/plot-base-portable/src/commonMain/kotlin/jetbrains/datalore/plot/base/stat/SummaryStat.kt b/plot-base-portable/src/commonMain/kotlin/jetbrains/datalore/plot/base/stat/SummaryStat.kt index 224dc6f0a3b..1118d27241e 100644 --- a/plot-base-portable/src/commonMain/kotlin/jetbrains/datalore/plot/base/stat/SummaryStat.kt +++ b/plot-base-portable/src/commonMain/kotlin/jetbrains/datalore/plot/base/stat/SummaryStat.kt @@ -33,6 +33,9 @@ class SummaryStat( } val statData = buildStat(xs, ys) + if (statData.isEmpty()) { + return withEmptyStatValues() + } val builder = DataFrame.Builder() for ((variable, series) in statData) { From c9e154cd94e7b074463df866d194138a08fc26ac Mon Sep 17 00:00:00 2001 From: Artem Smirnov Date: Thu, 22 Jun 2023 18:21:11 +0200 Subject: [PATCH 11/25] Further code refactoring. --- .../datalore/plot/base/stat/SummaryStat.kt | 21 +++-- .../datalore/plot/base/stat/SummaryUtil.kt | 16 +--- .../datalore/plot/config/StatProto.kt | 79 +++++++++---------- 3 files changed, 48 insertions(+), 68 deletions(-) diff --git a/plot-base-portable/src/commonMain/kotlin/jetbrains/datalore/plot/base/stat/SummaryStat.kt b/plot-base-portable/src/commonMain/kotlin/jetbrains/datalore/plot/base/stat/SummaryStat.kt index 1118d27241e..e6d733feb3c 100644 --- a/plot-base-portable/src/commonMain/kotlin/jetbrains/datalore/plot/base/stat/SummaryStat.kt +++ b/plot-base-portable/src/commonMain/kotlin/jetbrains/datalore/plot/base/stat/SummaryStat.kt @@ -51,29 +51,27 @@ class SummaryStat( val binnedData = SeriesUtil.filterFinite(xs, ys) .let { (xs, ys) -> xs zip ys } .groupBy(keySelector = { it.first }, valueTransform = { it.second }) + if (binnedData.isEmpty()) { return emptyMap() } - val statValues: Map, MutableList> = DEF_MAPPING.keys.associateWith { mutableListOf() } + val statX = ArrayList() + val statValues: Map, MutableList> = AGG_MAPPING.keys.associateWith { mutableListOf() } for ((x, bin) in binnedData) { + statX.add(x) val sortedBin = Ordering.natural().sortedCopy(bin) - for (aes in statValues.keys) { - if (aes == Aes.X) { - statValues[aes]!!.add(x) - } else { - val aggFunction = aggFunctionsMap.getOrElse(aes) { SummaryUtil::nan } - statValues[aes]!!.add(aggFunction(sortedBin)) - } + for ((aes, values) in statValues) { + val aggFunction = aggFunctionsMap[aes] ?: SummaryUtil::nan + values.add(aggFunction(sortedBin)) } } - return statValues.map { (aes, values) -> Pair(DEF_MAPPING[aes]!!, values) }.toMap() + return mapOf(Stats.X to statX) + statValues.map { (aes, values) -> Pair(AGG_MAPPING[aes]!!, values) }.toMap() } companion object { - private val DEF_MAPPING: Map, DataFrame.Variable> = mapOf( - Aes.X to Stats.X, + private val AGG_MAPPING: Map, DataFrame.Variable> = mapOf( Aes.Y to Stats.Y, Aes.YMIN to Stats.Y_MIN, Aes.YMAX to Stats.Y_MAX, @@ -81,5 +79,6 @@ class SummaryStat( Aes.LOWER to Stats.LOWER, Aes.UPPER to Stats.UPPER, ) + private val DEF_MAPPING: Map, DataFrame.Variable> = mapOf(Aes.X to Stats.X) + AGG_MAPPING } } \ No newline at end of file diff --git a/plot-base-portable/src/commonMain/kotlin/jetbrains/datalore/plot/base/stat/SummaryUtil.kt b/plot-base-portable/src/commonMain/kotlin/jetbrains/datalore/plot/base/stat/SummaryUtil.kt index f5c8371f84e..4d3366e02f1 100644 --- a/plot-base-portable/src/commonMain/kotlin/jetbrains/datalore/plot/base/stat/SummaryUtil.kt +++ b/plot-base-portable/src/commonMain/kotlin/jetbrains/datalore/plot/base/stat/SummaryUtil.kt @@ -26,21 +26,9 @@ object SummaryUtil { fun median(sortedValues: List): Double = quantile(sortedValues, 0.5) - fun min(sortedValues: List): Double { - return if (sortedValues.isEmpty()) { - Double.NaN - } else { - sortedValues.first() - } - } + fun min(sortedValues: List): Double = sortedValues.firstOrNull() ?: Double.NaN - fun max(sortedValues: List): Double { - return if (sortedValues.isEmpty()) { - Double.NaN - } else { - sortedValues.last() - } - } + fun max(sortedValues: List): Double = sortedValues.lastOrNull() ?: Double.NaN fun q1(sortedValues: List): Double = quantile(sortedValues, 0.25) diff --git a/plot-config-portable/src/commonMain/kotlin/jetbrains/datalore/plot/config/StatProto.kt b/plot-config-portable/src/commonMain/kotlin/jetbrains/datalore/plot/config/StatProto.kt index 78fde513a9f..1f251ddf300 100644 --- a/plot-config-portable/src/commonMain/kotlin/jetbrains/datalore/plot/config/StatProto.kt +++ b/plot-config-portable/src/commonMain/kotlin/jetbrains/datalore/plot/config/StatProto.kt @@ -408,59 +408,52 @@ object StatProto { } private fun configureSummaryStat(options: OptionsAccessor): SummaryStat { - val standardAggFunctions = mapOf( - Aes.Y to getAggFunction(options, Summary.FUN, SummaryUtil::mean), - Aes.YMIN to getAggFunction(options, Summary.FUN_MIN, SummaryUtil::min), - Aes.YMAX to getAggFunction(options, Summary.FUN_MAX, SummaryUtil::max) + val defaultAggFunctions = mapOf( + Aes.Y to (getAggFunction(options, Summary.FUN) ?: SummaryUtil::mean), + Aes.YMIN to (getAggFunction(options, Summary.FUN_MIN) ?: SummaryUtil::min), + Aes.YMAX to (getAggFunction(options, Summary.FUN_MAX) ?: SummaryUtil::max) ) - val additionalAggFunctions = if (options.hasOwn(Summary.FUN_MAP)) { - options.getMap(Summary.FUN_MAP) - } else { - emptyMap() - }.let { funMap -> - configureFunMap(OptionsAccessor(funMap)) - } + val additionalAggFunctions = configureAggFunMap(options.getMap(Summary.FUN_MAP)) - return SummaryStat(standardAggFunctions + additionalAggFunctions) + return SummaryStat(defaultAggFunctions + additionalAggFunctions) } - private fun configureFunMap(options: OptionsAccessor): Map, (List) -> Double> { - val additionalAggFunctions: MutableMap, (List) -> Double> = mutableMapOf() - for (aesName in options.toMap().keys) { - Mapping.toAes(aesName.lowercase()).let { aes -> - additionalAggFunctions[aes] = getAggFunction(options, aesName, SummaryUtil::nan) - } + private fun configureAggFunMap(aggFunMap: Map): Map, (List) -> Double> { + val aggFunOptions = OptionsAccessor(aggFunMap) + return aggFunMap.keys.associate { aesName -> + Pair( + Mapping.toAes(aesName), + getAggFunction(aggFunOptions, aesName) ?: SummaryUtil::nan + ) } - return additionalAggFunctions } private fun getAggFunction( options: OptionsAccessor, - option: String, - defaultAggFunction: (List) -> Double - ): (List) -> Double { - return if (options.isNumber(option)) { - val p = options.getDouble(option)!! - { values -> SummaryUtil.quantile(values, p) } - } else { - options.getString(option)?.let { - when (it.lowercase()) { - "nan" -> SummaryUtil::nan - "count" -> SummaryUtil::count - "sum" -> SummaryUtil::sum - "mean" -> SummaryUtil::mean - "median" -> SummaryUtil::median - "min" -> SummaryUtil::min - "max" -> SummaryUtil::max - "q1" -> SummaryUtil::q1 - "q3" -> SummaryUtil::q3 - else -> throw IllegalArgumentException( - "Unsupported function name: '$it'\n" + - "Use one of: nan, count, sum, mean, median, min, max, q1, q3." - ) - } - } ?: defaultAggFunction + option: String + ): ((List) -> Double)? { + if (options.isNumber(option)) { + return options.getDouble(option)?.let { p: Double -> + { values: List -> SummaryUtil.quantile(values, p) } + } + } + return options.getString(option)?.let { + when (it.lowercase()) { + "nan" -> SummaryUtil::nan + "count" -> SummaryUtil::count + "sum" -> SummaryUtil::sum + "mean" -> SummaryUtil::mean + "median" -> SummaryUtil::median + "min" -> SummaryUtil::min + "max" -> SummaryUtil::max + "q1" -> SummaryUtil::q1 + "q3" -> SummaryUtil::q3 + else -> throw IllegalArgumentException( + "Unsupported function name: '$it'\n" + + "Use one of: nan, count, sum, mean, median, min, max, q1, q3." + ) + } } } } From 0082f7429933547ea45cd87f32a7586fc7c22355 Mon Sep 17 00:00:00 2001 From: Artem Smirnov Date: Thu, 22 Jun 2023 19:00:28 +0200 Subject: [PATCH 12/25] Small fixes. --- .../datalore/plot/base/stat/SummaryStat.kt | 8 +++--- .../datalore/plot/base/stat/SummaryUtil.kt | 26 +++++++------------ .../datalore/plot/config/StatProto.kt | 7 ++--- 3 files changed, 18 insertions(+), 23 deletions(-) diff --git a/plot-base-portable/src/commonMain/kotlin/jetbrains/datalore/plot/base/stat/SummaryStat.kt b/plot-base-portable/src/commonMain/kotlin/jetbrains/datalore/plot/base/stat/SummaryStat.kt index e6d733feb3c..ab42cf4580e 100644 --- a/plot-base-portable/src/commonMain/kotlin/jetbrains/datalore/plot/base/stat/SummaryStat.kt +++ b/plot-base-portable/src/commonMain/kotlin/jetbrains/datalore/plot/base/stat/SummaryStat.kt @@ -57,17 +57,17 @@ class SummaryStat( } val statX = ArrayList() - val statValues: Map, MutableList> = AGG_MAPPING.keys.associateWith { mutableListOf() } + val statAggValues: Map, MutableList> = AGG_MAPPING.keys.associateWith { mutableListOf() } for ((x, bin) in binnedData) { statX.add(x) val sortedBin = Ordering.natural().sortedCopy(bin) - for ((aes, values) in statValues) { + for ((aes, aggValues) in statAggValues) { val aggFunction = aggFunctionsMap[aes] ?: SummaryUtil::nan - values.add(aggFunction(sortedBin)) + aggValues.add(aggFunction(sortedBin)) } } - return mapOf(Stats.X to statX) + statValues.map { (aes, values) -> Pair(AGG_MAPPING[aes]!!, values) }.toMap() + return mapOf(Stats.X to statX) + statAggValues.map { (aes, aggValues) -> Pair(AGG_MAPPING[aes]!!, aggValues) }.toMap() } companion object { diff --git a/plot-base-portable/src/commonMain/kotlin/jetbrains/datalore/plot/base/stat/SummaryUtil.kt b/plot-base-portable/src/commonMain/kotlin/jetbrains/datalore/plot/base/stat/SummaryUtil.kt index 4d3366e02f1..b4e39ae8326 100644 --- a/plot-base-portable/src/commonMain/kotlin/jetbrains/datalore/plot/base/stat/SummaryUtil.kt +++ b/plot-base-portable/src/commonMain/kotlin/jetbrains/datalore/plot/base/stat/SummaryUtil.kt @@ -19,8 +19,7 @@ object SummaryUtil { fun mean(sortedValues: List): Double { return when (sortedValues.size) { 0 -> Double.NaN - 1 -> sortedValues.first() - else -> sum(sortedValues) / sortedValues.size + else -> sum(sortedValues) / count(sortedValues) } } @@ -30,23 +29,18 @@ object SummaryUtil { fun max(sortedValues: List): Double = sortedValues.lastOrNull() ?: Double.NaN - fun q1(sortedValues: List): Double = quantile(sortedValues, 0.25) + fun firstQuartile(sortedValues: List): Double = quantile(sortedValues, 0.25) - fun q3(sortedValues: List): Double = quantile(sortedValues, 0.75) + fun thirdQuartile(sortedValues: List): Double = quantile(sortedValues, 0.75) fun quantile(sortedValues: List, p: Double): Double { - return when (sortedValues.size) { - 0 -> Double.NaN - 1 -> sortedValues.first() - else -> { - val place = p * (sortedValues.size - 1) - val i = round(place) - if (place == i) { - sortedValues[place.toInt()] - } else { - (sortedValues[ceil(place).toInt()] + sortedValues[floor(place).toInt()]) / 2.0 - } - } + if (sortedValues.isEmpty()) { + return Double.NaN + } + val place = p * (sortedValues.size - 1) + return when (round(place)) { + place -> sortedValues[place.toInt()] + else -> (sortedValues[ceil(place).toInt()] + sortedValues[floor(place).toInt()]) / 2.0 } } } \ No newline at end of file diff --git a/plot-config-portable/src/commonMain/kotlin/jetbrains/datalore/plot/config/StatProto.kt b/plot-config-portable/src/commonMain/kotlin/jetbrains/datalore/plot/config/StatProto.kt index 1f251ddf300..4ba2d4fc21b 100644 --- a/plot-config-portable/src/commonMain/kotlin/jetbrains/datalore/plot/config/StatProto.kt +++ b/plot-config-portable/src/commonMain/kotlin/jetbrains/datalore/plot/config/StatProto.kt @@ -447,11 +447,12 @@ object StatProto { "median" -> SummaryUtil::median "min" -> SummaryUtil::min "max" -> SummaryUtil::max - "q1" -> SummaryUtil::q1 - "q3" -> SummaryUtil::q3 + "q1" -> SummaryUtil::firstQuartile + "q2" -> SummaryUtil::median + "q3" -> SummaryUtil::thirdQuartile else -> throw IllegalArgumentException( "Unsupported function name: '$it'\n" + - "Use one of: nan, count, sum, mean, median, min, max, q1, q3." + "Use one of: nan, count, sum, mean, median, min, max, q1, q2, q3." ) } } From cbabbe3ee888525936080bd3591addfff4038c33 Mon Sep 17 00:00:00 2001 From: Artem Smirnov Date: Fri, 23 Jun 2023 16:56:36 +0200 Subject: [PATCH 13/25] Add new stat variables and use them in the SummaryStat. --- .../datalore/plot/base/stat/Stats.kt | 10 +++++++ .../datalore/plot/base/stat/SummaryStat.kt | 19 +++++------- .../datalore/plot/config/StatProto.kt | 17 +++++------ .../plotDemo/model/plotConfig/Summary.kt | 3 +- python-package/lets_plot/plot/stat.py | 30 ++++++++++++++++--- 5 files changed, 54 insertions(+), 25 deletions(-) diff --git a/plot-base-portable/src/commonMain/kotlin/jetbrains/datalore/plot/base/stat/Stats.kt b/plot-base-portable/src/commonMain/kotlin/jetbrains/datalore/plot/base/stat/Stats.kt index d27c5461abb..ac455294206 100644 --- a/plot-base-portable/src/commonMain/kotlin/jetbrains/datalore/plot/base/stat/Stats.kt +++ b/plot-base-portable/src/commonMain/kotlin/jetbrains/datalore/plot/base/stat/Stats.kt @@ -23,7 +23,12 @@ object Stats { val THEORETICAL = DataFrame.Variable("..theoretical..", STAT, "theoretical") val SE = DataFrame.Variable("..se..", STAT, "standard error") val LEVEL = DataFrame.Variable("..level..", STAT, "level") + val MEAN = DataFrame.Variable("..mean..", STAT, "mean") + val MEDIAN = DataFrame.Variable("..median..", STAT, "median") val QUANTILE = DataFrame.Variable("..quantile..", STAT, "quantile") + val QUANTILE_A = DataFrame.Variable("..qa..", STAT, "quantile a") + val QUANTILE_B = DataFrame.Variable("..qb..", STAT, "quantile b") + val QUANTILE_C = DataFrame.Variable("..qc..", STAT, "quantile c") val LOWER = DataFrame.Variable("..lower..", STAT, "lower") val MIDDLE = DataFrame.Variable("..middle..", STAT, "middle") val UPPER = DataFrame.Variable("..upper..", STAT, "upper") @@ -53,7 +58,12 @@ object Stats { THEORETICAL, SE, LEVEL, + MEAN, + MEDIAN, QUANTILE, + QUANTILE_A, + QUANTILE_B, + QUANTILE_C, LOWER, MIDDLE, UPPER, diff --git a/plot-base-portable/src/commonMain/kotlin/jetbrains/datalore/plot/base/stat/SummaryStat.kt b/plot-base-portable/src/commonMain/kotlin/jetbrains/datalore/plot/base/stat/SummaryStat.kt index ab42cf4580e..1379570419d 100644 --- a/plot-base-portable/src/commonMain/kotlin/jetbrains/datalore/plot/base/stat/SummaryStat.kt +++ b/plot-base-portable/src/commonMain/kotlin/jetbrains/datalore/plot/base/stat/SummaryStat.kt @@ -13,7 +13,7 @@ import jetbrains.datalore.plot.base.data.TransformVar import jetbrains.datalore.plot.common.data.SeriesUtil class SummaryStat( - private val aggFunctionsMap: Map, (List) -> Double> + private val aggFunctionsMap: Map) -> Double> ) : BaseStat(DEF_MAPPING) { override fun consumes(): List> { @@ -57,28 +57,25 @@ class SummaryStat( } val statX = ArrayList() - val statAggValues: Map, MutableList> = AGG_MAPPING.keys.associateWith { mutableListOf() } + val statAggValues: Map> = aggFunctionsMap.keys.associateWith { mutableListOf() } for ((x, bin) in binnedData) { statX.add(x) val sortedBin = Ordering.natural().sortedCopy(bin) - for ((aes, aggValues) in statAggValues) { - val aggFunction = aggFunctionsMap[aes] ?: SummaryUtil::nan + for ((statVar, aggValues) in statAggValues) { + val aggFunction = aggFunctionsMap[statVar] ?: SummaryUtil::nan aggValues.add(aggFunction(sortedBin)) } } - return mapOf(Stats.X to statX) + statAggValues.map { (aes, aggValues) -> Pair(AGG_MAPPING[aes]!!, aggValues) }.toMap() + return mapOf(Stats.X to statX) + statAggValues } companion object { - private val AGG_MAPPING: Map, DataFrame.Variable> = mapOf( + private val DEF_MAPPING: Map, DataFrame.Variable> = mapOf( + Aes.X to Stats.X, Aes.Y to Stats.Y, Aes.YMIN to Stats.Y_MIN, - Aes.YMAX to Stats.Y_MAX, - Aes.MIDDLE to Stats.MIDDLE, - Aes.LOWER to Stats.LOWER, - Aes.UPPER to Stats.UPPER, + Aes.YMAX to Stats.Y_MAX ) - private val DEF_MAPPING: Map, DataFrame.Variable> = mapOf(Aes.X to Stats.X) + AGG_MAPPING } } \ No newline at end of file diff --git a/plot-config-portable/src/commonMain/kotlin/jetbrains/datalore/plot/config/StatProto.kt b/plot-config-portable/src/commonMain/kotlin/jetbrains/datalore/plot/config/StatProto.kt index 4ba2d4fc21b..8145335b437 100644 --- a/plot-config-portable/src/commonMain/kotlin/jetbrains/datalore/plot/config/StatProto.kt +++ b/plot-config-portable/src/commonMain/kotlin/jetbrains/datalore/plot/config/StatProto.kt @@ -5,11 +5,10 @@ package jetbrains.datalore.plot.config -import jetbrains.datalore.plot.base.Aes +import jetbrains.datalore.plot.base.DataFrame import jetbrains.datalore.plot.base.GeomKind import jetbrains.datalore.plot.base.Stat import jetbrains.datalore.plot.base.stat.* -import jetbrains.datalore.plot.config.Option.Mapping import jetbrains.datalore.plot.config.Option.Stat.Bin import jetbrains.datalore.plot.config.Option.Stat.Bin2d import jetbrains.datalore.plot.config.Option.Stat.Boxplot @@ -409,9 +408,9 @@ object StatProto { private fun configureSummaryStat(options: OptionsAccessor): SummaryStat { val defaultAggFunctions = mapOf( - Aes.Y to (getAggFunction(options, Summary.FUN) ?: SummaryUtil::mean), - Aes.YMIN to (getAggFunction(options, Summary.FUN_MIN) ?: SummaryUtil::min), - Aes.YMAX to (getAggFunction(options, Summary.FUN_MAX) ?: SummaryUtil::max) + Stats.Y to (getAggFunction(options, Summary.FUN) ?: SummaryUtil::mean), + Stats.Y_MIN to (getAggFunction(options, Summary.FUN_MIN) ?: SummaryUtil::min), + Stats.Y_MAX to (getAggFunction(options, Summary.FUN_MAX) ?: SummaryUtil::max) ) val additionalAggFunctions = configureAggFunMap(options.getMap(Summary.FUN_MAP)) @@ -419,12 +418,12 @@ object StatProto { return SummaryStat(defaultAggFunctions + additionalAggFunctions) } - private fun configureAggFunMap(aggFunMap: Map): Map, (List) -> Double> { + private fun configureAggFunMap(aggFunMap: Map): Map) -> Double> { val aggFunOptions = OptionsAccessor(aggFunMap) - return aggFunMap.keys.associate { aesName -> + return aggFunMap.keys.associate { statVarName -> Pair( - Mapping.toAes(aesName), - getAggFunction(aggFunOptions, aesName) ?: SummaryUtil::nan + Stats.statVar("..$statVarName.."), + getAggFunction(aggFunOptions, statVarName) ?: SummaryUtil::nan ) } } diff --git a/plot-demo-common/src/commonMain/kotlin/jetbrains/datalore/plotDemo/model/plotConfig/Summary.kt b/plot-demo-common/src/commonMain/kotlin/jetbrains/datalore/plotDemo/model/plotConfig/Summary.kt index 4c77000825c..f58ce030755 100644 --- a/plot-demo-common/src/commonMain/kotlin/jetbrains/datalore/plotDemo/model/plotConfig/Summary.kt +++ b/plot-demo-common/src/commonMain/kotlin/jetbrains/datalore/plotDemo/model/plotConfig/Summary.kt @@ -90,8 +90,9 @@ class Summary { 'layers': [ { 'geom': 'crossbar', + 'mapping': {'middle': '..median..'}, 'stat': 'summary', - 'fun_map': {'middle': 'median'} + 'fun_map': {'median': 'median'} } ] } diff --git a/python-package/lets_plot/plot/stat.py b/python-package/lets_plot/plot/stat.py index cf07325bd50..adde5545f0e 100644 --- a/python-package/lets_plot/plot/stat.py +++ b/python-package/lets_plot/plot/stat.py @@ -1,9 +1,8 @@ # Copyright (c) 2023. JetBrains s.r.o. # Use of this source code is governed by the MIT license that can be found in the LICENSE file. - +from .core import aes from .geom import _geom - # # Stats - functions, drawing attention to the statistical transformation rather than the visual appearance. # @@ -16,8 +15,31 @@ def stat_summary(mapping=None, *, data=None, geom='pointrange', fun=None, fun_min=None, fun_max=None, fun_map=None, color_by=None, fill_by=None, **other_args): + mapping_dict = mapping.as_dict() if mapping is not None else {} + + quantile_agg_functions = {"qa": None, "qb": None, "qc": None} + + def get_stat_name(agg_fun): + if isinstance(agg_fun, str) and agg_fun not in ["q1", "q2", "q3"]: + return agg_fun + else: + name = next((q for (q, f) in quantile_agg_functions.items() if f is None or f == agg_fun), None) + if name is None: + raise Exception("No more than three different quantiles can be used in fun_map parameter") + quantile_agg_functions[name] = agg_fun + return name + + inner_fun_map = {} + fun_mapping_dict = {} + for aes_name, fun_name in (fun_map or {}).items(): + stat_name = get_stat_name(fun_name) + inner_fun_map[stat_name] = fun_name + fun_mapping_dict[aes_name] = "..{0}..".format(stat_name) + inner_mapping_dict = {**fun_mapping_dict, **mapping_dict} + inner_mapping = aes(**inner_mapping_dict) if len(inner_mapping_dict.keys()) > 0 else None + return _geom(geom, - mapping=mapping, + mapping=inner_mapping, data=data, stat='summary', position=position, @@ -25,6 +47,6 @@ def stat_summary(mapping=None, *, data=None, geom='pointrange', sampling=sampling, tooltips=tooltips, orientation=orientation, - fun=fun, fun_min=fun_min, fun_max=fun_max, fun_map=fun_map, + fun=fun, fun_min=fun_min, fun_max=fun_max, fun_map=inner_fun_map, color_by=color_by, fill_by=fill_by, **other_args) From 7fea03b4414f3f07d198181d93c6923c73eaefb9 Mon Sep 17 00:00:00 2001 From: Artem Smirnov Date: Fri, 23 Jun 2023 17:48:05 +0200 Subject: [PATCH 14/25] Add prefix to min/max stats in stat_summary(). --- python-package/lets_plot/plot/stat.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/python-package/lets_plot/plot/stat.py b/python-package/lets_plot/plot/stat.py index adde5545f0e..233cdbb6b81 100644 --- a/python-package/lets_plot/plot/stat.py +++ b/python-package/lets_plot/plot/stat.py @@ -21,7 +21,8 @@ def stat_summary(mapping=None, *, data=None, geom='pointrange', def get_stat_name(agg_fun): if isinstance(agg_fun, str) and agg_fun not in ["q1", "q2", "q3"]: - return agg_fun + prefix = "" if agg_fun not in ["min", "max"] else "y" + return prefix + agg_fun else: name = next((q for (q, f) in quantile_agg_functions.items() if f is None or f == agg_fun), None) if name is None: From 0ecc6ec92301f69d40a281316016aa991d353a17 Mon Sep 17 00:00:00 2001 From: Artem Smirnov Date: Mon, 26 Jun 2023 16:39:44 +0200 Subject: [PATCH 15/25] Change API of the summary_stat() - add 'quantiles' parameter. --- .../datalore/plot/base/stat/Stats.kt | 12 +-- .../datalore/plot/base/stat/SummaryStat.kt | 2 + .../datalore/plot/base/stat/SummaryUtil.kt | 4 - .../jetbrains/datalore/plot/config/Option.kt | 1 + .../datalore/plot/config/OptionsAccessor.kt | 5 -- .../datalore/plot/config/StatProto.kt | 80 +++++++++++-------- .../plotDemo/model/plotConfig/Summary.kt | 8 +- python-package/lets_plot/plot/stat.py | 29 ++----- 8 files changed, 67 insertions(+), 74 deletions(-) diff --git a/plot-base-portable/src/commonMain/kotlin/jetbrains/datalore/plot/base/stat/Stats.kt b/plot-base-portable/src/commonMain/kotlin/jetbrains/datalore/plot/base/stat/Stats.kt index ac455294206..52c892b72a2 100644 --- a/plot-base-portable/src/commonMain/kotlin/jetbrains/datalore/plot/base/stat/Stats.kt +++ b/plot-base-portable/src/commonMain/kotlin/jetbrains/datalore/plot/base/stat/Stats.kt @@ -26,9 +26,9 @@ object Stats { val MEAN = DataFrame.Variable("..mean..", STAT, "mean") val MEDIAN = DataFrame.Variable("..median..", STAT, "median") val QUANTILE = DataFrame.Variable("..quantile..", STAT, "quantile") - val QUANTILE_A = DataFrame.Variable("..qa..", STAT, "quantile a") - val QUANTILE_B = DataFrame.Variable("..qb..", STAT, "quantile b") - val QUANTILE_C = DataFrame.Variable("..qc..", STAT, "quantile c") + val LOWER_QUANTILE = DataFrame.Variable("..lq..", STAT, "lower quantile") + val MIDDLE_QUANTILE = DataFrame.Variable("..mq..", STAT, "middle quantile") + val UPPER_QUANTILE = DataFrame.Variable("..uq..", STAT, "upper quantile") val LOWER = DataFrame.Variable("..lower..", STAT, "lower") val MIDDLE = DataFrame.Variable("..middle..", STAT, "middle") val UPPER = DataFrame.Variable("..upper..", STAT, "upper") @@ -61,9 +61,9 @@ object Stats { MEAN, MEDIAN, QUANTILE, - QUANTILE_A, - QUANTILE_B, - QUANTILE_C, + LOWER_QUANTILE, + MIDDLE_QUANTILE, + UPPER_QUANTILE, LOWER, MIDDLE, UPPER, diff --git a/plot-base-portable/src/commonMain/kotlin/jetbrains/datalore/plot/base/stat/SummaryStat.kt b/plot-base-portable/src/commonMain/kotlin/jetbrains/datalore/plot/base/stat/SummaryStat.kt index 1379570419d..a1ad106498f 100644 --- a/plot-base-portable/src/commonMain/kotlin/jetbrains/datalore/plot/base/stat/SummaryStat.kt +++ b/plot-base-portable/src/commonMain/kotlin/jetbrains/datalore/plot/base/stat/SummaryStat.kt @@ -71,6 +71,8 @@ class SummaryStat( } companion object { + val DEF_QUANTILES = Triple(0.25, 0.5, 0.75) + private val DEF_MAPPING: Map, DataFrame.Variable> = mapOf( Aes.X to Stats.X, Aes.Y to Stats.Y, diff --git a/plot-base-portable/src/commonMain/kotlin/jetbrains/datalore/plot/base/stat/SummaryUtil.kt b/plot-base-portable/src/commonMain/kotlin/jetbrains/datalore/plot/base/stat/SummaryUtil.kt index b4e39ae8326..0c108af5b8e 100644 --- a/plot-base-portable/src/commonMain/kotlin/jetbrains/datalore/plot/base/stat/SummaryUtil.kt +++ b/plot-base-portable/src/commonMain/kotlin/jetbrains/datalore/plot/base/stat/SummaryUtil.kt @@ -29,10 +29,6 @@ object SummaryUtil { fun max(sortedValues: List): Double = sortedValues.lastOrNull() ?: Double.NaN - fun firstQuartile(sortedValues: List): Double = quantile(sortedValues, 0.25) - - fun thirdQuartile(sortedValues: List): Double = quantile(sortedValues, 0.75) - fun quantile(sortedValues: List, p: Double): Double { if (sortedValues.isEmpty()) { return Double.NaN diff --git a/plot-config-portable/src/commonMain/kotlin/jetbrains/datalore/plot/config/Option.kt b/plot-config-portable/src/commonMain/kotlin/jetbrains/datalore/plot/config/Option.kt index db3d7b92883..a9561cb3935 100644 --- a/plot-config-portable/src/commonMain/kotlin/jetbrains/datalore/plot/config/Option.kt +++ b/plot-config-portable/src/commonMain/kotlin/jetbrains/datalore/plot/config/Option.kt @@ -431,6 +431,7 @@ object Option { } object Summary { + const val QUANTILES = "quantiles" const val FUN = "fun" const val FUN_MIN = "fun_min" const val FUN_MAX = "fun_max" diff --git a/plot-config-portable/src/commonMain/kotlin/jetbrains/datalore/plot/config/OptionsAccessor.kt b/plot-config-portable/src/commonMain/kotlin/jetbrains/datalore/plot/config/OptionsAccessor.kt index 8367af49658..c5efe20e84f 100644 --- a/plot-config-portable/src/commonMain/kotlin/jetbrains/datalore/plot/config/OptionsAccessor.kt +++ b/plot-config-portable/src/commonMain/kotlin/jetbrains/datalore/plot/config/OptionsAccessor.kt @@ -35,11 +35,6 @@ open class OptionsAccessor( return options[option] != null } - fun isNumber(option: String): Boolean { - val v = get(option) ?: return false - return v is Number - } - operator fun get(option: String): Any? { return if (hasOwn(option)) { options[option] diff --git a/plot-config-portable/src/commonMain/kotlin/jetbrains/datalore/plot/config/StatProto.kt b/plot-config-portable/src/commonMain/kotlin/jetbrains/datalore/plot/config/StatProto.kt index 8145335b437..06586162fdc 100644 --- a/plot-config-portable/src/commonMain/kotlin/jetbrains/datalore/plot/config/StatProto.kt +++ b/plot-config-portable/src/commonMain/kotlin/jetbrains/datalore/plot/config/StatProto.kt @@ -407,53 +407,63 @@ object StatProto { } private fun configureSummaryStat(options: OptionsAccessor): SummaryStat { + val sortedQuantiles: Triple = if (options.hasOwn(Summary.QUANTILES)) { + options.getBoundedDoubleList(Summary.QUANTILES, 0.0, 1.0).let { quantiles -> + if (quantiles.size != 3) error("Parameter 'quantiles' should contains 3 values") + quantiles.sorted().let { Triple(it[0], it[1], it[2]) } + } + } else { + SummaryStat.DEF_QUANTILES + } + val defaultAggFunctions = mapOf( - Stats.Y to (getAggFunction(options, Summary.FUN) ?: SummaryUtil::mean), - Stats.Y_MIN to (getAggFunction(options, Summary.FUN_MIN) ?: SummaryUtil::min), - Stats.Y_MAX to (getAggFunction(options, Summary.FUN_MAX) ?: SummaryUtil::max) + Stats.Y to (options.getString(Summary.FUN)?.lowercase()?.let { getAggFunction(it, sortedQuantiles) } ?: SummaryUtil::mean), + Stats.Y_MIN to (options.getString(Summary.FUN_MIN)?.lowercase()?.let { getAggFunction(it, sortedQuantiles) } ?: SummaryUtil::min), + Stats.Y_MAX to (options.getString(Summary.FUN_MAX)?.lowercase()?.let { getAggFunction(it, sortedQuantiles) } ?: SummaryUtil::max) ) - val additionalAggFunctions = configureAggFunMap(options.getMap(Summary.FUN_MAP)) + val additionalAggFunctions = configureAggFunMap(options.getMap(Summary.FUN_MAP), sortedQuantiles) return SummaryStat(defaultAggFunctions + additionalAggFunctions) } - private fun configureAggFunMap(aggFunMap: Map): Map) -> Double> { + private fun configureAggFunMap( + aggFunMap: Map, + sortedQuantiles: Triple + ): Map) -> Double> { val aggFunOptions = OptionsAccessor(aggFunMap) - return aggFunMap.keys.associate { statVarName -> - Pair( - Stats.statVar("..$statVarName.."), - getAggFunction(aggFunOptions, statVarName) ?: SummaryUtil::nan - ) - } + return aggFunMap.keys.mapNotNull { option -> + aggFunOptions.getString(option)?.lowercase()?.let { aggFunName -> + val statVar = when (aggFunName) { + "min", "max" -> "..y$aggFunName.." + else -> "..$aggFunName.." + }.let { Stats.statVar(it) } + Pair(statVar, getAggFunction(aggFunName, sortedQuantiles)) + } + }.toMap() } private fun getAggFunction( - options: OptionsAccessor, - option: String - ): ((List) -> Double)? { - if (options.isNumber(option)) { - return options.getDouble(option)?.let { p: Double -> - { values: List -> SummaryUtil.quantile(values, p) } - } + aggFunName: String, + sortedQuantiles: Triple + ): ((List) -> Double) { + val quantileAggFunction: (Double) -> (List) -> Double = { p -> + { values: List -> SummaryUtil.quantile(values, p) } } - return options.getString(option)?.let { - when (it.lowercase()) { - "nan" -> SummaryUtil::nan - "count" -> SummaryUtil::count - "sum" -> SummaryUtil::sum - "mean" -> SummaryUtil::mean - "median" -> SummaryUtil::median - "min" -> SummaryUtil::min - "max" -> SummaryUtil::max - "q1" -> SummaryUtil::firstQuartile - "q2" -> SummaryUtil::median - "q3" -> SummaryUtil::thirdQuartile - else -> throw IllegalArgumentException( - "Unsupported function name: '$it'\n" + - "Use one of: nan, count, sum, mean, median, min, max, q1, q2, q3." - ) - } + return when (aggFunName) { + "count" -> SummaryUtil::count + "sum" -> SummaryUtil::sum + "mean" -> SummaryUtil::mean + "median" -> SummaryUtil::median + "min" -> SummaryUtil::min + "max" -> SummaryUtil::max + "lq" -> quantileAggFunction(sortedQuantiles.first) + "mq" -> quantileAggFunction(sortedQuantiles.second) + "uq" -> quantileAggFunction(sortedQuantiles.third) + else -> throw IllegalArgumentException( + "Unsupported function name: '$aggFunName'\n" + + "Use one of: count, sum, mean, median, min, max, lq, mq, uq." + ) } } } diff --git a/plot-demo-common/src/commonMain/kotlin/jetbrains/datalore/plotDemo/model/plotConfig/Summary.kt b/plot-demo-common/src/commonMain/kotlin/jetbrains/datalore/plotDemo/model/plotConfig/Summary.kt index f58ce030755..f44f707aff5 100644 --- a/plot-demo-common/src/commonMain/kotlin/jetbrains/datalore/plotDemo/model/plotConfig/Summary.kt +++ b/plot-demo-common/src/commonMain/kotlin/jetbrains/datalore/plotDemo/model/plotConfig/Summary.kt @@ -62,8 +62,8 @@ class Summary { 'geom': 'pointrange', 'stat': 'summary', 'fun': 'median', - 'fun_min': 'q1', - 'fun_max': 0.75, + 'fun_min': 'lq', + 'fun_max': 'uq', 'color': 'red' } ] @@ -92,7 +92,9 @@ class Summary { 'geom': 'crossbar', 'mapping': {'middle': '..median..'}, 'stat': 'summary', - 'fun_map': {'median': 'median'} + 'fun_min': 'lq', + 'fun_map': {'middle': 'median'}, + 'quantiles': [0.45, 0.5, 0.55] } ] } diff --git a/python-package/lets_plot/plot/stat.py b/python-package/lets_plot/plot/stat.py index 233cdbb6b81..4a73edf8483 100644 --- a/python-package/lets_plot/plot/stat.py +++ b/python-package/lets_plot/plot/stat.py @@ -13,29 +13,15 @@ def stat_summary(mapping=None, *, data=None, geom='pointrange', position=None, show_legend=None, sampling=None, tooltips=None, orientation=None, fun=None, fun_min=None, fun_max=None, fun_map=None, + quantiles=None, color_by=None, fill_by=None, **other_args): - mapping_dict = mapping.as_dict() if mapping is not None else {} - - quantile_agg_functions = {"qa": None, "qb": None, "qc": None} + def fun_to_stat(fun_name): + prefix = "y" if fun_name in ["min", "max"] else "" + return "..{0}{1}..".format(prefix, fun_name) - def get_stat_name(agg_fun): - if isinstance(agg_fun, str) and agg_fun not in ["q1", "q2", "q3"]: - prefix = "" if agg_fun not in ["min", "max"] else "y" - return prefix + agg_fun - else: - name = next((q for (q, f) in quantile_agg_functions.items() if f is None or f == agg_fun), None) - if name is None: - raise Exception("No more than three different quantiles can be used in fun_map parameter") - quantile_agg_functions[name] = agg_fun - return name - - inner_fun_map = {} - fun_mapping_dict = {} - for aes_name, fun_name in (fun_map or {}).items(): - stat_name = get_stat_name(fun_name) - inner_fun_map[stat_name] = fun_name - fun_mapping_dict[aes_name] = "..{0}..".format(stat_name) + mapping_dict = mapping.as_dict() if mapping is not None else {} + fun_mapping_dict = {aes_name: fun_to_stat(fun_name) for aes_name, fun_name in (fun_map or {}).items()} inner_mapping_dict = {**fun_mapping_dict, **mapping_dict} inner_mapping = aes(**inner_mapping_dict) if len(inner_mapping_dict.keys()) > 0 else None @@ -48,6 +34,7 @@ def get_stat_name(agg_fun): sampling=sampling, tooltips=tooltips, orientation=orientation, - fun=fun, fun_min=fun_min, fun_max=fun_max, fun_map=inner_fun_map, + fun=fun, fun_min=fun_min, fun_max=fun_max, fun_map=fun_map, + quantiles=quantiles, color_by=color_by, fill_by=fill_by, **other_args) From 722b43aac7e68feeeb89673863a78bc32577b744 Mon Sep 17 00:00:00 2001 From: Artem Smirnov Date: Tue, 27 Jun 2023 10:28:13 +0200 Subject: [PATCH 16/25] Tiny refactor in SummaryStat and AggregateFunctions. --- .../{SummaryUtil.kt => AggregateFunctions.kt} | 4 +-- .../datalore/plot/base/stat/SummaryStat.kt | 4 +-- .../datalore/plot/config/StatProto.kt | 35 +++++++++---------- 3 files changed, 19 insertions(+), 24 deletions(-) rename plot-base-portable/src/commonMain/kotlin/jetbrains/datalore/plot/base/stat/{SummaryUtil.kt => AggregateFunctions.kt} (93%) diff --git a/plot-base-portable/src/commonMain/kotlin/jetbrains/datalore/plot/base/stat/SummaryUtil.kt b/plot-base-portable/src/commonMain/kotlin/jetbrains/datalore/plot/base/stat/AggregateFunctions.kt similarity index 93% rename from plot-base-portable/src/commonMain/kotlin/jetbrains/datalore/plot/base/stat/SummaryUtil.kt rename to plot-base-portable/src/commonMain/kotlin/jetbrains/datalore/plot/base/stat/AggregateFunctions.kt index 0c108af5b8e..fa4405cd934 100644 --- a/plot-base-portable/src/commonMain/kotlin/jetbrains/datalore/plot/base/stat/SummaryUtil.kt +++ b/plot-base-portable/src/commonMain/kotlin/jetbrains/datalore/plot/base/stat/AggregateFunctions.kt @@ -9,9 +9,7 @@ import kotlin.math.ceil import kotlin.math.floor import kotlin.math.round -object SummaryUtil { - fun nan(sortedValues: List): Double = Double.NaN - +object AggregateFunctions { fun count(sortedValues: List): Double = sortedValues.size.toDouble() fun sum(sortedValues: List): Double = sortedValues.sum() diff --git a/plot-base-portable/src/commonMain/kotlin/jetbrains/datalore/plot/base/stat/SummaryStat.kt b/plot-base-portable/src/commonMain/kotlin/jetbrains/datalore/plot/base/stat/SummaryStat.kt index a1ad106498f..866f227c763 100644 --- a/plot-base-portable/src/commonMain/kotlin/jetbrains/datalore/plot/base/stat/SummaryStat.kt +++ b/plot-base-portable/src/commonMain/kotlin/jetbrains/datalore/plot/base/stat/SummaryStat.kt @@ -62,7 +62,7 @@ class SummaryStat( statX.add(x) val sortedBin = Ordering.natural().sortedCopy(bin) for ((statVar, aggValues) in statAggValues) { - val aggFunction = aggFunctionsMap[statVar] ?: SummaryUtil::nan + val aggFunction = aggFunctionsMap[statVar] ?: { Double.NaN } aggValues.add(aggFunction(sortedBin)) } } @@ -71,7 +71,7 @@ class SummaryStat( } companion object { - val DEF_QUANTILES = Triple(0.25, 0.5, 0.75) + val DEF_QUANTILES = listOf(0.25, 0.5, 0.75) private val DEF_MAPPING: Map, DataFrame.Variable> = mapOf( Aes.X to Stats.X, diff --git a/plot-config-portable/src/commonMain/kotlin/jetbrains/datalore/plot/config/StatProto.kt b/plot-config-portable/src/commonMain/kotlin/jetbrains/datalore/plot/config/StatProto.kt index 06586162fdc..1cf0b13ae71 100644 --- a/plot-config-portable/src/commonMain/kotlin/jetbrains/datalore/plot/config/StatProto.kt +++ b/plot-config-portable/src/commonMain/kotlin/jetbrains/datalore/plot/config/StatProto.kt @@ -407,19 +407,19 @@ object StatProto { } private fun configureSummaryStat(options: OptionsAccessor): SummaryStat { - val sortedQuantiles: Triple = if (options.hasOwn(Summary.QUANTILES)) { + val sortedQuantiles: List = if (options.hasOwn(Summary.QUANTILES)) { options.getBoundedDoubleList(Summary.QUANTILES, 0.0, 1.0).let { quantiles -> if (quantiles.size != 3) error("Parameter 'quantiles' should contains 3 values") - quantiles.sorted().let { Triple(it[0], it[1], it[2]) } + quantiles.sorted() } } else { SummaryStat.DEF_QUANTILES } val defaultAggFunctions = mapOf( - Stats.Y to (options.getString(Summary.FUN)?.lowercase()?.let { getAggFunction(it, sortedQuantiles) } ?: SummaryUtil::mean), - Stats.Y_MIN to (options.getString(Summary.FUN_MIN)?.lowercase()?.let { getAggFunction(it, sortedQuantiles) } ?: SummaryUtil::min), - Stats.Y_MAX to (options.getString(Summary.FUN_MAX)?.lowercase()?.let { getAggFunction(it, sortedQuantiles) } ?: SummaryUtil::max) + Stats.Y to (options.getString(Summary.FUN)?.lowercase()?.let { getAggFunction(it, sortedQuantiles) } ?: AggregateFunctions::mean), + Stats.Y_MIN to (options.getString(Summary.FUN_MIN)?.lowercase()?.let { getAggFunction(it, sortedQuantiles) } ?: AggregateFunctions::min), + Stats.Y_MAX to (options.getString(Summary.FUN_MAX)?.lowercase()?.let { getAggFunction(it, sortedQuantiles) } ?: AggregateFunctions::max) ) val additionalAggFunctions = configureAggFunMap(options.getMap(Summary.FUN_MAP), sortedQuantiles) @@ -429,7 +429,7 @@ object StatProto { private fun configureAggFunMap( aggFunMap: Map, - sortedQuantiles: Triple + sortedQuantiles: List ): Map) -> Double> { val aggFunOptions = OptionsAccessor(aggFunMap) return aggFunMap.keys.mapNotNull { option -> @@ -445,21 +445,18 @@ object StatProto { private fun getAggFunction( aggFunName: String, - sortedQuantiles: Triple + sortedQuantiles: List ): ((List) -> Double) { - val quantileAggFunction: (Double) -> (List) -> Double = { p -> - { values: List -> SummaryUtil.quantile(values, p) } - } return when (aggFunName) { - "count" -> SummaryUtil::count - "sum" -> SummaryUtil::sum - "mean" -> SummaryUtil::mean - "median" -> SummaryUtil::median - "min" -> SummaryUtil::min - "max" -> SummaryUtil::max - "lq" -> quantileAggFunction(sortedQuantiles.first) - "mq" -> quantileAggFunction(sortedQuantiles.second) - "uq" -> quantileAggFunction(sortedQuantiles.third) + "count" -> AggregateFunctions::count + "sum" -> AggregateFunctions::sum + "mean" -> AggregateFunctions::mean + "median" -> AggregateFunctions::median + "min" -> AggregateFunctions::min + "max" -> AggregateFunctions::max + "lq" -> { values -> AggregateFunctions.quantile(values, sortedQuantiles[0]) } + "mq" -> { values -> AggregateFunctions.quantile(values, sortedQuantiles[1]) } + "uq" -> { values -> AggregateFunctions.quantile(values, sortedQuantiles[2]) } else -> throw IllegalArgumentException( "Unsupported function name: '$aggFunName'\n" + "Use one of: count, sum, mean, median, min, max, lq, mq, uq." From e30b93bd7514702365cefd943c0a3b9e293e192c Mon Sep 17 00:00:00 2001 From: Artem Smirnov Date: Tue, 27 Jun 2023 10:42:06 +0200 Subject: [PATCH 17/25] Use AggregateFunctions in the FiveNumberSummary. --- .../plot/base/stat/FiveNumberSummary.kt | 36 +++---------------- 1 file changed, 5 insertions(+), 31 deletions(-) diff --git a/plot-base-portable/src/commonMain/kotlin/jetbrains/datalore/plot/base/stat/FiveNumberSummary.kt b/plot-base-portable/src/commonMain/kotlin/jetbrains/datalore/plot/base/stat/FiveNumberSummary.kt index 71530154201..e4507eab7c2 100644 --- a/plot-base-portable/src/commonMain/kotlin/jetbrains/datalore/plot/base/stat/FiveNumberSummary.kt +++ b/plot-base-portable/src/commonMain/kotlin/jetbrains/datalore/plot/base/stat/FiveNumberSummary.kt @@ -6,9 +6,6 @@ package jetbrains.datalore.plot.base.stat import jetbrains.datalore.base.gcommon.collect.Ordering -import kotlin.math.ceil -import kotlin.math.floor -import kotlin.math.round /** * For a set of data, the minimum, first quartile, median, third quartile, and maximum. @@ -23,36 +20,13 @@ internal class FiveNumberSummary { // 25 % val thirdQuartile: Double // 75 % - private fun medianAtPointer(l: List, pointer: Double): Double { - val rint = round(pointer) - return if (pointer == rint) { - l[pointer.toInt()] - } else (l[ceil(pointer).toInt()] + l[floor(pointer).toInt()]) / 2.0 - } - constructor(data: List) { val sorted = Ordering.natural().sortedCopy(data) - if (sorted.isEmpty()) { - thirdQuartile = Double.NaN - firstQuartile = thirdQuartile - median = firstQuartile - max = median - min = max - } else if (sorted.size == 1) { - thirdQuartile = sorted.get(0) - firstQuartile = thirdQuartile - median = firstQuartile - max = median - min = max - } else { - val maxIndex = sorted.size - 1 - - min = sorted.get(0) - max = sorted.get(maxIndex) - median = medianAtPointer(sorted, maxIndex * .5) - firstQuartile = medianAtPointer(sorted, maxIndex * .25) - thirdQuartile = medianAtPointer(sorted, maxIndex * .75) - } + min = AggregateFunctions.min(sorted) + max = AggregateFunctions.max(sorted) + median = AggregateFunctions.median(sorted) + firstQuartile = AggregateFunctions.quantile(sorted, 0.25) + thirdQuartile = AggregateFunctions.quantile(sorted, 0.75) } constructor(min: Double, max: Double, median: Double, firstQuartile: Double, thirdQuartile: Double) { From f076938f13ccfd5e6015458d41fdb6e27f275394 Mon Sep 17 00:00:00 2001 From: Artem Smirnov Date: Tue, 27 Jun 2023 17:56:07 +0200 Subject: [PATCH 18/25] Add tests for AggregateFunctions. --- .../plot/base/stat/AggregateFunctions.kt | 7 +- .../plot/base/stat/AggregateFunctionsTest.kt | 80 +++++++++++++++++++ 2 files changed, 86 insertions(+), 1 deletion(-) create mode 100644 plot-base-portable/src/commonTest/kotlin/jetbrains/datalore/plot/base/stat/AggregateFunctionsTest.kt diff --git a/plot-base-portable/src/commonMain/kotlin/jetbrains/datalore/plot/base/stat/AggregateFunctions.kt b/plot-base-portable/src/commonMain/kotlin/jetbrains/datalore/plot/base/stat/AggregateFunctions.kt index fa4405cd934..10cb3ebc825 100644 --- a/plot-base-portable/src/commonMain/kotlin/jetbrains/datalore/plot/base/stat/AggregateFunctions.kt +++ b/plot-base-portable/src/commonMain/kotlin/jetbrains/datalore/plot/base/stat/AggregateFunctions.kt @@ -12,7 +12,12 @@ import kotlin.math.round object AggregateFunctions { fun count(sortedValues: List): Double = sortedValues.size.toDouble() - fun sum(sortedValues: List): Double = sortedValues.sum() + fun sum(sortedValues: List): Double { + return when (sortedValues.size) { + 0 -> Double.NaN + else -> sortedValues.sum() + } + } fun mean(sortedValues: List): Double { return when (sortedValues.size) { diff --git a/plot-base-portable/src/commonTest/kotlin/jetbrains/datalore/plot/base/stat/AggregateFunctionsTest.kt b/plot-base-portable/src/commonTest/kotlin/jetbrains/datalore/plot/base/stat/AggregateFunctionsTest.kt new file mode 100644 index 00000000000..94eaa86b927 --- /dev/null +++ b/plot-base-portable/src/commonTest/kotlin/jetbrains/datalore/plot/base/stat/AggregateFunctionsTest.kt @@ -0,0 +1,80 @@ +/* + * Copyright (c) 2023. JetBrains s.r.o. + * Use of this source code is governed by the MIT license that can be found in the LICENSE file. + */ + +package jetbrains.datalore.plot.base.stat + +import kotlin.test.Test +import kotlin.test.assertEquals + +class AggregateFunctionsTest { + @Test + fun emptyData() { + val values: List = emptyList() + assertEquals(0.0, AggregateFunctions.count(values)) + assertEquals(Double.NaN, AggregateFunctions.sum(values)) + assertEquals(Double.NaN, AggregateFunctions.mean(values)) + assertEquals(Double.NaN, AggregateFunctions.median(values)) + assertEquals(Double.NaN, AggregateFunctions.min(values)) + assertEquals(Double.NaN, AggregateFunctions.max(values)) + assertEquals(Double.NaN, AggregateFunctions.quantile(values, 0.25)) + } + + @Test + fun oneElementData() { + val value = 1.0 + val values = listOf(value) + assertEquals(1.0, AggregateFunctions.count(values)) + assertEquals(value, AggregateFunctions.sum(values)) + assertEquals(value, AggregateFunctions.mean(values)) + assertEquals(value, AggregateFunctions.median(values)) + assertEquals(value, AggregateFunctions.min(values)) + assertEquals(value, AggregateFunctions.max(values)) + assertEquals(value, AggregateFunctions.quantile(values, 0.25)) + } + + @Test + fun checkCountFunction() { + assertEquals(4.0, AggregateFunctions.count(listOf(-1.0, -1.0, 1.0, 3.0))) + } + + @Test + fun checkSumFunction() { + assertEquals(2.0, AggregateFunctions.sum(listOf(-1.0, -1.0, 1.0, 3.0))) + } + + @Test + fun checkMeanFunction() { + assertEquals(0.5, AggregateFunctions.mean(listOf(-1.0, -1.0, 1.0, 3.0))) + assertEquals(2.0, AggregateFunctions.mean(listOf(-2.0, 3.0, 5.0))) + } + + @Test + fun checkMedianFunction() { + assertEquals(0.0, AggregateFunctions.median(listOf(-1.0, -1.0, 1.0, 3.0))) + assertEquals(3.0, AggregateFunctions.median(listOf(-2.0, 3.0, 5.0))) + } + + @Test + fun checkMinFunction() { + assertEquals(-1.0, AggregateFunctions.min(listOf(-1.0, -1.0, 1.0, 3.0))) + } + + @Test + fun checkMaxFunction() { + assertEquals(3.0, AggregateFunctions.max(listOf(-1.0, -1.0, 1.0, 3.0))) + } + + @Test + fun checkQuantileFunction() { + val sortedValues = listOf(-1.0, -1.0, 1.0, 3.0) + assertEquals(-1.0, AggregateFunctions.quantile(sortedValues, 0.0)) + assertEquals(-1.0, AggregateFunctions.quantile(sortedValues, 0.25)) + assertEquals(-1.0, AggregateFunctions.quantile(sortedValues, 1.0 / 3.0)) + assertEquals(0.0, AggregateFunctions.quantile(sortedValues, 0.5)) + assertEquals(1.0, AggregateFunctions.quantile(sortedValues, 2.0 / 3.0)) + assertEquals(2.0, AggregateFunctions.quantile(sortedValues, 0.75)) + assertEquals(3.0, AggregateFunctions.quantile(sortedValues, 1.0)) + } +} \ No newline at end of file From 231387297093c98f5a02dad9297f4965dd474e62 Mon Sep 17 00:00:00 2001 From: Artem Smirnov Date: Thu, 29 Jun 2023 13:00:47 +0200 Subject: [PATCH 19/25] Replace parameter fun_map by usual aesthetics list for the stat_summary(). --- .../datalore/plot/base/StatContext.kt | 6 ++ .../plot/base/stat/SimpleStatContext.kt | 5 ++ .../datalore/plot/base/stat/SummaryStat.kt | 47 +++++++++++-- .../datalore/plot/config/StatProto.kt | 67 +++++++------------ .../server/config/ConfiguredStatContext.kt | 8 ++- .../server/config/PlotConfigServerSide.kt | 4 +- .../plotDemo/model/plotConfig/Summary.kt | 1 - python-package/lets_plot/plot/stat.py | 16 +---- 8 files changed, 86 insertions(+), 68 deletions(-) diff --git a/plot-base-portable/src/commonMain/kotlin/jetbrains/datalore/plot/base/StatContext.kt b/plot-base-portable/src/commonMain/kotlin/jetbrains/datalore/plot/base/StatContext.kt index 98c3f8f1bc0..3c21e0a148a 100644 --- a/plot-base-portable/src/commonMain/kotlin/jetbrains/datalore/plot/base/StatContext.kt +++ b/plot-base-portable/src/commonMain/kotlin/jetbrains/datalore/plot/base/StatContext.kt @@ -12,6 +12,8 @@ interface StatContext { fun overallYRange(): DoubleSpan? + fun getMapping(): Map, DataFrame.Variable> + fun getFlipped(): StatContext { return Flipped(this) } @@ -25,6 +27,10 @@ interface StatContext { return orig.overallXRange() } + override fun getMapping(): Map, DataFrame.Variable> { + return orig.getMapping() + } + override fun getFlipped(): StatContext { return orig } diff --git a/plot-base-portable/src/commonMain/kotlin/jetbrains/datalore/plot/base/stat/SimpleStatContext.kt b/plot-base-portable/src/commonMain/kotlin/jetbrains/datalore/plot/base/stat/SimpleStatContext.kt index 40d1fef5123..68c06b749c0 100644 --- a/plot-base-portable/src/commonMain/kotlin/jetbrains/datalore/plot/base/stat/SimpleStatContext.kt +++ b/plot-base-portable/src/commonMain/kotlin/jetbrains/datalore/plot/base/stat/SimpleStatContext.kt @@ -6,6 +6,7 @@ package jetbrains.datalore.plot.base.stat import jetbrains.datalore.base.interval.DoubleSpan +import jetbrains.datalore.plot.base.Aes import jetbrains.datalore.plot.base.DataFrame import jetbrains.datalore.plot.base.StatContext import jetbrains.datalore.plot.base.data.TransformVar @@ -20,4 +21,8 @@ class SimpleStatContext(private val myDataFrame: DataFrame) : override fun overallYRange(): DoubleSpan? { return myDataFrame.range(TransformVar.Y) } + + override fun getMapping(): Map, DataFrame.Variable> { + return emptyMap() + } } diff --git a/plot-base-portable/src/commonMain/kotlin/jetbrains/datalore/plot/base/stat/SummaryStat.kt b/plot-base-portable/src/commonMain/kotlin/jetbrains/datalore/plot/base/stat/SummaryStat.kt index 866f227c763..2d302a923ea 100644 --- a/plot-base-portable/src/commonMain/kotlin/jetbrains/datalore/plot/base/stat/SummaryStat.kt +++ b/plot-base-portable/src/commonMain/kotlin/jetbrains/datalore/plot/base/stat/SummaryStat.kt @@ -13,7 +13,10 @@ import jetbrains.datalore.plot.base.data.TransformVar import jetbrains.datalore.plot.common.data.SeriesUtil class SummaryStat( - private val aggFunctionsMap: Map) -> Double> + private val yAggFunction: (List) -> Double, + private val yMinAggFunction: (List) -> Double, + private val yMaxAggFunction: (List) -> Double, + private val sortedQuantiles: List ) : BaseStat(DEF_MAPPING) { override fun consumes(): List> { @@ -32,7 +35,7 @@ class SummaryStat( List(ys.size) { 0.0 } } - val statData = buildStat(xs, ys) + val statData = buildStat(xs, ys, statCtx) if (statData.isEmpty()) { return withEmptyStatValues() } @@ -46,7 +49,8 @@ class SummaryStat( private fun buildStat( xs: List, - ys: List + ys: List, + statCtx: StatContext ): Map> { val binnedData = SeriesUtil.filterFinite(xs, ys) .let { (xs, ys) -> xs zip ys } @@ -57,17 +61,46 @@ class SummaryStat( } val statX = ArrayList() - val statAggValues: Map> = aggFunctionsMap.keys.associateWith { mutableListOf() } + val statY = ArrayList() + val statYMin = ArrayList() + val statYMax = ArrayList() + val statAggValues: Map> = statCtx.getMapping().values.filter { it.isStat }.associateWith { mutableListOf() } for ((x, bin) in binnedData) { - statX.add(x) val sortedBin = Ordering.natural().sortedCopy(bin) + statX.add(x) + statY.add(yAggFunction(sortedBin)) + statYMin.add(yMinAggFunction(sortedBin)) + statYMax.add(yMaxAggFunction(sortedBin)) for ((statVar, aggValues) in statAggValues) { - val aggFunction = aggFunctionsMap[statVar] ?: { Double.NaN } + val aggFunction = aggFunctionByStat(statVar) aggValues.add(aggFunction(sortedBin)) } } - return mapOf(Stats.X to statX) + statAggValues + return mapOf( + Stats.X to statX, + Stats.Y to statY, + Stats.Y_MIN to statYMin, + Stats.Y_MAX to statYMax, + ) + statAggValues + } + + private fun aggFunctionByStat(statVar: DataFrame.Variable): (List) -> Double { + return when (statVar) { + Stats.COUNT -> AggregateFunctions::count + Stats.SUM -> AggregateFunctions::sum + Stats.MEAN -> AggregateFunctions::mean + Stats.MEDIAN -> AggregateFunctions::median + Stats.Y_MIN -> AggregateFunctions::min + Stats.Y_MAX -> AggregateFunctions::max + Stats.LOWER_QUANTILE -> { values -> AggregateFunctions.quantile(values, sortedQuantiles[0]) } + Stats.MIDDLE_QUANTILE -> { values -> AggregateFunctions.quantile(values, sortedQuantiles[1]) } + Stats.UPPER_QUANTILE -> { values -> AggregateFunctions.quantile(values, sortedQuantiles[2]) } + else -> throw IllegalStateException( + "Unsupported stat variable: '${statVar.name}'\n" + + "Use one of: ..count.., ..sum.., ..mean.., ..median.., ..ymin.., ..ymax.., ..lq.., ..mq.., ..uq.." + ) + } } companion object { diff --git a/plot-config-portable/src/commonMain/kotlin/jetbrains/datalore/plot/config/StatProto.kt b/plot-config-portable/src/commonMain/kotlin/jetbrains/datalore/plot/config/StatProto.kt index 1cf0b13ae71..1e723ef619d 100644 --- a/plot-config-portable/src/commonMain/kotlin/jetbrains/datalore/plot/config/StatProto.kt +++ b/plot-config-portable/src/commonMain/kotlin/jetbrains/datalore/plot/config/StatProto.kt @@ -416,51 +416,30 @@ object StatProto { SummaryStat.DEF_QUANTILES } - val defaultAggFunctions = mapOf( - Stats.Y to (options.getString(Summary.FUN)?.lowercase()?.let { getAggFunction(it, sortedQuantiles) } ?: AggregateFunctions::mean), - Stats.Y_MIN to (options.getString(Summary.FUN_MIN)?.lowercase()?.let { getAggFunction(it, sortedQuantiles) } ?: AggregateFunctions::min), - Stats.Y_MAX to (options.getString(Summary.FUN_MAX)?.lowercase()?.let { getAggFunction(it, sortedQuantiles) } ?: AggregateFunctions::max) - ) - - val additionalAggFunctions = configureAggFunMap(options.getMap(Summary.FUN_MAP), sortedQuantiles) - - return SummaryStat(defaultAggFunctions + additionalAggFunctions) - } - - private fun configureAggFunMap( - aggFunMap: Map, - sortedQuantiles: List - ): Map) -> Double> { - val aggFunOptions = OptionsAccessor(aggFunMap) - return aggFunMap.keys.mapNotNull { option -> - aggFunOptions.getString(option)?.lowercase()?.let { aggFunName -> - val statVar = when (aggFunName) { - "min", "max" -> "..y$aggFunName.." - else -> "..$aggFunName.." - }.let { Stats.statVar(it) } - Pair(statVar, getAggFunction(aggFunName, sortedQuantiles)) + fun getAggFunction(option: String): ((List) -> Double)? { + return options.getString(option)?.let { + when (it.lowercase()) { + "count" -> AggregateFunctions::count + "sum" -> AggregateFunctions::sum + "mean" -> AggregateFunctions::mean + "median" -> AggregateFunctions::median + "min" -> AggregateFunctions::min + "max" -> AggregateFunctions::max + "lq" -> { values -> AggregateFunctions.quantile(values, sortedQuantiles[0]) } + "mq" -> { values -> AggregateFunctions.quantile(values, sortedQuantiles[1]) } + "uq" -> { values -> AggregateFunctions.quantile(values, sortedQuantiles[2]) } + else -> throw IllegalArgumentException( + "Unsupported function name: '$it'\n" + + "Use one of: count, sum, mean, median, min, max, lq, mq, uq." + ) + } } - }.toMap() - } - - private fun getAggFunction( - aggFunName: String, - sortedQuantiles: List - ): ((List) -> Double) { - return when (aggFunName) { - "count" -> AggregateFunctions::count - "sum" -> AggregateFunctions::sum - "mean" -> AggregateFunctions::mean - "median" -> AggregateFunctions::median - "min" -> AggregateFunctions::min - "max" -> AggregateFunctions::max - "lq" -> { values -> AggregateFunctions.quantile(values, sortedQuantiles[0]) } - "mq" -> { values -> AggregateFunctions.quantile(values, sortedQuantiles[1]) } - "uq" -> { values -> AggregateFunctions.quantile(values, sortedQuantiles[2]) } - else -> throw IllegalArgumentException( - "Unsupported function name: '$aggFunName'\n" + - "Use one of: count, sum, mean, median, min, max, lq, mq, uq." - ) } + + val yAggFunction = getAggFunction(Summary.FUN) ?: AggregateFunctions::mean + val yMinAggFunction = getAggFunction(Summary.FUN_MIN) ?: AggregateFunctions::min + val yMaxAggFunction = getAggFunction(Summary.FUN_MAX) ?: AggregateFunctions::max + + return SummaryStat(yAggFunction, yMinAggFunction, yMaxAggFunction, sortedQuantiles) } } diff --git a/plot-config-portable/src/commonMain/kotlin/jetbrains/datalore/plot/server/config/ConfiguredStatContext.kt b/plot-config-portable/src/commonMain/kotlin/jetbrains/datalore/plot/server/config/ConfiguredStatContext.kt index 844299bd6dd..3be3d38d928 100644 --- a/plot-config-portable/src/commonMain/kotlin/jetbrains/datalore/plot/server/config/ConfiguredStatContext.kt +++ b/plot-config-portable/src/commonMain/kotlin/jetbrains/datalore/plot/server/config/ConfiguredStatContext.kt @@ -9,11 +9,13 @@ import jetbrains.datalore.base.interval.DoubleSpan import jetbrains.datalore.plot.base.* import jetbrains.datalore.plot.base.data.DataFrameUtil import jetbrains.datalore.plot.base.scale.ScaleUtil +import jetbrains.datalore.plot.builder.VarBinding import jetbrains.datalore.plot.common.data.SeriesUtil internal class ConfiguredStatContext( private val dataFrames: List, - private val transformByAes: Map, Transform> + private val transformByAes: Map, Transform>, + private val varBindings: List ) : StatContext { private fun overallRange(variable: DataFrame.Variable, dataFrames: List): DoubleSpan? { @@ -34,6 +36,10 @@ internal class ConfiguredStatContext( return overallRange(Aes.Y) } + override fun getMapping(): Map, DataFrame.Variable> { + return varBindings.associate { it.aes to it.variable } + } + private fun overallRange(aes: Aes<*>): DoubleSpan? { val transformVar = DataFrameUtil.transformVarFor(aes) diff --git a/plot-config-portable/src/commonMain/kotlin/jetbrains/datalore/plot/server/config/PlotConfigServerSide.kt b/plot-config-portable/src/commonMain/kotlin/jetbrains/datalore/plot/server/config/PlotConfigServerSide.kt index 1d60997fdfb..f61d49aa446 100644 --- a/plot-config-portable/src/commonMain/kotlin/jetbrains/datalore/plot/server/config/PlotConfigServerSide.kt +++ b/plot-config-portable/src/commonMain/kotlin/jetbrains/datalore/plot/server/config/PlotConfigServerSide.kt @@ -19,6 +19,7 @@ import jetbrains.datalore.plot.config.* import jetbrains.datalore.plot.config.Option.Meta.DATA_META import jetbrains.datalore.plot.config.Option.Meta.GeoDataFrame.GDF import jetbrains.datalore.plot.config.Option.Meta.GeoDataFrame.GEOMETRY +import jetbrains.datalore.plot.server.config.transform.bistro.util.layer open class PlotConfigServerSide( opts: Map @@ -118,13 +119,12 @@ open class PlotConfigServerSide( val dataByLayer: List = layerConfigs.map { layer -> DataProcessing.transformOriginals(layer.combinedData, layer.varBindings, transformByAes) } - val statCtx = ConfiguredStatContext(dataByLayer, transformByAes) return layerConfigs.mapIndexed { layerIndex, layerConfig -> applyLayerStatistic( layerConfig, layerData = dataByLayer[layerIndex], - statCtx, + ConfiguredStatContext(dataByLayer, transformByAes, layerConfig.varBindings), ) { message -> layerMessageHandler(layerIndex, message) } diff --git a/plot-demo-common/src/commonMain/kotlin/jetbrains/datalore/plotDemo/model/plotConfig/Summary.kt b/plot-demo-common/src/commonMain/kotlin/jetbrains/datalore/plotDemo/model/plotConfig/Summary.kt index f44f707aff5..91946b43427 100644 --- a/plot-demo-common/src/commonMain/kotlin/jetbrains/datalore/plotDemo/model/plotConfig/Summary.kt +++ b/plot-demo-common/src/commonMain/kotlin/jetbrains/datalore/plotDemo/model/plotConfig/Summary.kt @@ -93,7 +93,6 @@ class Summary { 'mapping': {'middle': '..median..'}, 'stat': 'summary', 'fun_min': 'lq', - 'fun_map': {'middle': 'median'}, 'quantiles': [0.45, 0.5, 0.55] } ] diff --git a/python-package/lets_plot/plot/stat.py b/python-package/lets_plot/plot/stat.py index 4a73edf8483..cb4c9563fd8 100644 --- a/python-package/lets_plot/plot/stat.py +++ b/python-package/lets_plot/plot/stat.py @@ -1,6 +1,5 @@ # Copyright (c) 2023. JetBrains s.r.o. # Use of this source code is governed by the MIT license that can be found in the LICENSE file. -from .core import aes from .geom import _geom # @@ -12,21 +11,12 @@ def stat_summary(mapping=None, *, data=None, geom='pointrange', position=None, show_legend=None, sampling=None, tooltips=None, orientation=None, - fun=None, fun_min=None, fun_max=None, fun_map=None, + fun=None, fun_min=None, fun_max=None, quantiles=None, color_by=None, fill_by=None, **other_args): - def fun_to_stat(fun_name): - prefix = "y" if fun_name in ["min", "max"] else "" - return "..{0}{1}..".format(prefix, fun_name) - - mapping_dict = mapping.as_dict() if mapping is not None else {} - fun_mapping_dict = {aes_name: fun_to_stat(fun_name) for aes_name, fun_name in (fun_map or {}).items()} - inner_mapping_dict = {**fun_mapping_dict, **mapping_dict} - inner_mapping = aes(**inner_mapping_dict) if len(inner_mapping_dict.keys()) > 0 else None - return _geom(geom, - mapping=inner_mapping, + mapping=mapping, data=data, stat='summary', position=position, @@ -34,7 +24,7 @@ def fun_to_stat(fun_name): sampling=sampling, tooltips=tooltips, orientation=orientation, - fun=fun, fun_min=fun_min, fun_max=fun_max, fun_map=fun_map, + fun=fun, fun_min=fun_min, fun_max=fun_max, quantiles=quantiles, color_by=color_by, fill_by=fill_by, **other_args) From 4bba4139aecab3b35fbad5f920f301dfafd7a7ff Mon Sep 17 00:00:00 2001 From: Artem Smirnov Date: Thu, 29 Jun 2023 14:44:38 +0200 Subject: [PATCH 20/25] Small fixes in code for summary stat. --- .../jetbrains/datalore/plot/base/StatContext.kt | 6 +----- .../datalore/plot/base/stat/AggregateFunctions.kt | 14 +++++++------- .../datalore/plot/base/stat/SimpleStatContext.kt | 4 ---- .../datalore/plot/base/stat/SummaryStat.kt | 3 ++- .../jetbrains/datalore/plot/config/StatProto.kt | 7 +++---- 5 files changed, 13 insertions(+), 21 deletions(-) diff --git a/plot-base-portable/src/commonMain/kotlin/jetbrains/datalore/plot/base/StatContext.kt b/plot-base-portable/src/commonMain/kotlin/jetbrains/datalore/plot/base/StatContext.kt index 3c21e0a148a..d35c59156db 100644 --- a/plot-base-portable/src/commonMain/kotlin/jetbrains/datalore/plot/base/StatContext.kt +++ b/plot-base-portable/src/commonMain/kotlin/jetbrains/datalore/plot/base/StatContext.kt @@ -12,7 +12,7 @@ interface StatContext { fun overallYRange(): DoubleSpan? - fun getMapping(): Map, DataFrame.Variable> + fun getMapping(): Map, DataFrame.Variable> = throw IllegalStateException("Not implemented") fun getFlipped(): StatContext { return Flipped(this) @@ -27,10 +27,6 @@ interface StatContext { return orig.overallXRange() } - override fun getMapping(): Map, DataFrame.Variable> { - return orig.getMapping() - } - override fun getFlipped(): StatContext { return orig } diff --git a/plot-base-portable/src/commonMain/kotlin/jetbrains/datalore/plot/base/stat/AggregateFunctions.kt b/plot-base-portable/src/commonMain/kotlin/jetbrains/datalore/plot/base/stat/AggregateFunctions.kt index 10cb3ebc825..918dd779e08 100644 --- a/plot-base-portable/src/commonMain/kotlin/jetbrains/datalore/plot/base/stat/AggregateFunctions.kt +++ b/plot-base-portable/src/commonMain/kotlin/jetbrains/datalore/plot/base/stat/AggregateFunctions.kt @@ -10,19 +10,19 @@ import kotlin.math.floor import kotlin.math.round object AggregateFunctions { - fun count(sortedValues: List): Double = sortedValues.size.toDouble() + fun count(values: List): Double = values.size.toDouble() - fun sum(sortedValues: List): Double { - return when (sortedValues.size) { + fun sum(values: List): Double { + return when (values.size) { 0 -> Double.NaN - else -> sortedValues.sum() + else -> values.sum() } } - fun mean(sortedValues: List): Double { - return when (sortedValues.size) { + fun mean(values: List): Double { + return when (values.size) { 0 -> Double.NaN - else -> sum(sortedValues) / count(sortedValues) + else -> sum(values) / count(values) } } diff --git a/plot-base-portable/src/commonMain/kotlin/jetbrains/datalore/plot/base/stat/SimpleStatContext.kt b/plot-base-portable/src/commonMain/kotlin/jetbrains/datalore/plot/base/stat/SimpleStatContext.kt index 68c06b749c0..86b2d1a338e 100644 --- a/plot-base-portable/src/commonMain/kotlin/jetbrains/datalore/plot/base/stat/SimpleStatContext.kt +++ b/plot-base-portable/src/commonMain/kotlin/jetbrains/datalore/plot/base/stat/SimpleStatContext.kt @@ -21,8 +21,4 @@ class SimpleStatContext(private val myDataFrame: DataFrame) : override fun overallYRange(): DoubleSpan? { return myDataFrame.range(TransformVar.Y) } - - override fun getMapping(): Map, DataFrame.Variable> { - return emptyMap() - } } diff --git a/plot-base-portable/src/commonMain/kotlin/jetbrains/datalore/plot/base/stat/SummaryStat.kt b/plot-base-portable/src/commonMain/kotlin/jetbrains/datalore/plot/base/stat/SummaryStat.kt index 2d302a923ea..103bba82409 100644 --- a/plot-base-portable/src/commonMain/kotlin/jetbrains/datalore/plot/base/stat/SummaryStat.kt +++ b/plot-base-portable/src/commonMain/kotlin/jetbrains/datalore/plot/base/stat/SummaryStat.kt @@ -64,7 +64,8 @@ class SummaryStat( val statY = ArrayList() val statYMin = ArrayList() val statYMax = ArrayList() - val statAggValues: Map> = statCtx.getMapping().values.filter { it.isStat }.associateWith { mutableListOf() } + val statAggValues: Map> = statCtx.getMapping().values + .filter(DataFrame.Variable::isStat).associateWith { mutableListOf() } for ((x, bin) in binnedData) { val sortedBin = Ordering.natural().sortedCopy(bin) statX.add(x) diff --git a/plot-config-portable/src/commonMain/kotlin/jetbrains/datalore/plot/config/StatProto.kt b/plot-config-portable/src/commonMain/kotlin/jetbrains/datalore/plot/config/StatProto.kt index 1e723ef619d..af98604d40d 100644 --- a/plot-config-portable/src/commonMain/kotlin/jetbrains/datalore/plot/config/StatProto.kt +++ b/plot-config-portable/src/commonMain/kotlin/jetbrains/datalore/plot/config/StatProto.kt @@ -408,10 +408,9 @@ object StatProto { private fun configureSummaryStat(options: OptionsAccessor): SummaryStat { val sortedQuantiles: List = if (options.hasOwn(Summary.QUANTILES)) { - options.getBoundedDoubleList(Summary.QUANTILES, 0.0, 1.0).let { quantiles -> - if (quantiles.size != 3) error("Parameter 'quantiles' should contains 3 values") - quantiles.sorted() - } + val quantiles = options.getBoundedDoubleList(Summary.QUANTILES, 0.0, 1.0) + require(quantiles.size == 3) { "Parameter 'quantiles' should contains 3 values" } + quantiles.sorted() } else { SummaryStat.DEF_QUANTILES } From 458645166d0eb023ecbbad2785a558c2a644330f Mon Sep 17 00:00:00 2001 From: Artem Smirnov Date: Thu, 29 Jun 2023 19:24:16 +0200 Subject: [PATCH 21/25] Add getMapping() method to the Flipped stat context. --- .../kotlin/jetbrains/datalore/plot/base/StatContext.kt | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/plot-base-portable/src/commonMain/kotlin/jetbrains/datalore/plot/base/StatContext.kt b/plot-base-portable/src/commonMain/kotlin/jetbrains/datalore/plot/base/StatContext.kt index d35c59156db..e6478ee16f6 100644 --- a/plot-base-portable/src/commonMain/kotlin/jetbrains/datalore/plot/base/StatContext.kt +++ b/plot-base-portable/src/commonMain/kotlin/jetbrains/datalore/plot/base/StatContext.kt @@ -27,6 +27,16 @@ interface StatContext { return orig.overallXRange() } + override fun getMapping(): Map, DataFrame.Variable> { + return orig.getMapping().map { (aes, variable) -> + when (aes) { + Aes.X -> Aes.Y to variable + Aes.Y -> Aes.X to variable + else -> aes to variable + } + }.toMap() + } + override fun getFlipped(): StatContext { return orig } From 9dcdd79401d2930e1e25dcc8a13b73185df13af4 Mon Sep 17 00:00:00 2001 From: Artem Smirnov Date: Thu, 29 Jun 2023 19:24:47 +0200 Subject: [PATCH 22/25] Add docstrings to the stat_summary() function. --- python-package/lets_plot/plot/stat.py | 137 ++++++++++++++++++++++++++ 1 file changed, 137 insertions(+) diff --git a/python-package/lets_plot/plot/stat.py b/python-package/lets_plot/plot/stat.py index cb4c9563fd8..32b7c349889 100644 --- a/python-package/lets_plot/plot/stat.py +++ b/python-package/lets_plot/plot/stat.py @@ -15,6 +15,143 @@ def stat_summary(mapping=None, *, data=None, geom='pointrange', quantiles=None, color_by=None, fill_by=None, **other_args): + """ + Visualise the aggregated values of a single continuous variable grouped along the x axis. + + Parameters + ---------- + mapping : `FeatureSpec` + Set of aesthetic mappings created by `aes()` function. + Aesthetic mappings describe the way that variables in the data are + mapped to plot "aesthetics". + data : dict or `DataFrame` or `polars.DataFrame` + The data to be displayed in this layer. If None, the default, the data + is inherited from the plot data as specified in the call to ggplot. + geom : str, default='pointrange' + The geometry to display the summary stat for this layer, as a string. + position : str or `FeatureSpec`, default='dodge' + Position adjustment, either as a string ('identity', 'stack', 'dodge', ...), + or the result of a call to a position adjustment function. + show_legend : bool, default=True + False - do not show legend for this layer. + sampling : `FeatureSpec` + Result of the call to the `sampling_xxx()` function. + To prevent any sampling for this layer pass value "none" (string "none"). + tooltips : `layer_tooltips` + Result of the call to the `layer_tooltips()` function. + Specify appearance, style and content. + orientation : str, default='x' + Specify the axis that the layer's stat and geom should run along. + Possible values: 'x', 'y'. + fun : {'count', 'sum', 'mean', 'median', 'min', 'max', 'lq', 'mq', 'uq'}, default='mean' + A name of a function that get a vector of values and should return a single number. + Values 'lq', 'mq', 'uq' corresponds to lower, middle and upper quantiles, + which are determined by the probabilities passed in the `quantiles` parameter. + fun_min : {'count', 'sum', 'mean', 'median', 'min', 'max', 'lq', 'mq', 'uq'}, default='min' + A name of a function that get a vector of values and should return a single number. + Values 'lq', 'mq', 'uq' corresponds to lower, middle and upper quantiles, + which are determined by the probabilities passed in the `quantiles` parameter. + fun_max : {'count', 'sum', 'mean', 'median', 'min', 'max', 'lq', 'mq', 'uq'}, default='max' + A name of a function that get a vector of values and should return a single number. + Values 'lq', 'mq', 'uq' corresponds to lower, middle and upper quantiles, + which are determined by the probabilities passed in the `quantiles` parameter. + quantiles : list of float, default=[0.25, 0.5, 0.75] + The list of probabilities defining the quantile functions 'lq', 'mq' and 'uq'. + Must contain exactly 3 values between 0 and 1. + color_by : {'fill', 'color', 'paint_a', 'paint_b', 'paint_c'}, default='color' + Define the color aesthetic for the geometry. + fill_by : {'fill', 'color', 'paint_a', 'paint_b', 'paint_c'}, default='fill' + Define the fill aesthetic for the geometry. + other_args + Other arguments passed on to the layer. + These are often aesthetics settings used to set an aesthetic to a fixed value, + like color='red', fill='blue', size=3 or shape=21. + They may also be parameters to the paired geom/stat. + + Returns + ------- + `LayerSpec` + Geom object specification. + + Notes + ----- + Computed variables: + + - ..ymin.. : smallest observation. + - ..ymax.. : largest observation. + + `stat_summary()` understands the following aesthetics mappings: + + - x : x-axis coordinates. + - y : mean. + - ymin : lower bound. + - ymax : upper bound. + + In addition, you can use any aesthetics, available for the geometry defined by the `geom` parameter. + They can be mapped to the following variables, which will be immediately computed: + + - ..count.. : number of observations. + - ..sum.. : sum of observations. + - ..mean.. : mean of observations. + - ..median.. : median of observations. + - ..ymin.. : smallest observation. + - ..ymax.. : largest observation. + - ..lq.. : lower quantile defined by first element of the `quantiles` parameter. + - ..mq.. : middle quantile defined by first element of the `quantiles` parameter. + - ..uq.. : upper quantile defined by first element of the `quantiles` parameter. + + Examples + -------- + .. jupyter-execute:: + :linenos: + :emphasize-lines: 9 + + import numpy as np + from lets_plot import * + LetsPlot.setup_html() + n = 100 + np.random.seed(42) + x = np.random.choice(['a', 'b', 'c'], size=n) + y = np.random.normal(size=n) + ggplot({'x': x, 'y': y}, aes(x='x', y='y')) + \\ + stat_summary() + + | + + .. jupyter-execute:: + :linenos: + :emphasize-lines: 9-10 + + import numpy as np + from lets_plot import * + LetsPlot.setup_html() + n = 100 + np.random.seed(42) + x = np.random.choice(['a', 'b', 'b', 'c'], size=n) + y = np.random.normal(size=n) + ggplot({'x': x, 'y': y}, aes(x='x', y='y', fill='x')) + \\ + stat_summary(aes(lower='..lq..', middle='..mq..', upper='..uq..'), \\ + geom='boxplot', fatten=5) + + | + + .. jupyter-execute:: + :linenos: + :emphasize-lines: 9-11 + + import numpy as np + from lets_plot import * + LetsPlot.setup_html() + n = 100 + np.random.seed(42) + x = np.random.choice(['a', 'b', 'c'], size=n) + y = np.random.normal(size=n) + ggplot({'x': x, 'y': y}, aes(x='x', y='y')) + \\ + stat_summary(position=position_nudge(x=-.1), color="red") + \\ + stat_summary(fun_min='lq', fun_max='uq', quantiles=[.1, .5, .9], \\ + position=position_nudge(x=.1), color="blue") + + """ return _geom(geom, mapping=mapping, data=data, From 37526ebf5bfad3224c4df13b9d0582541eda0bb0 Mon Sep 17 00:00:00 2001 From: Artem Smirnov Date: Thu, 29 Jun 2023 19:25:05 +0200 Subject: [PATCH 23/25] Add demo notebook for stat_summary(). --- docs/f-23c/stat_summary.ipynb | 565 ++++++++++++++++++++++++++++++++++ 1 file changed, 565 insertions(+) create mode 100644 docs/f-23c/stat_summary.ipynb diff --git a/docs/f-23c/stat_summary.ipynb b/docs/f-23c/stat_summary.ipynb new file mode 100644 index 00000000000..82621800d8b --- /dev/null +++ b/docs/f-23c/stat_summary.ipynb @@ -0,0 +1,565 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "4eef5872", + "metadata": {}, + "source": [ + "# `stat_summary()`" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "aed63373", + "metadata": {}, + "outputs": [], + "source": [ + "import numpy as np\n", + "import pandas as pd\n", + "\n", + "from lets_plot import *" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "455dd67d", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "\n", + " \n", + " \n", + " " + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "LetsPlot.setup_html()" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "272ea5ad", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "(234, 12)\n" + ] + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
Unnamed: 0manufacturermodeldisplyearcyltransdrvctyhwyflclass
01audia41.819994auto(l5)f1829pcompact
12audia41.819994manual(m5)f2129pcompact
23audia42.020084manual(m6)f2031pcompact
34audia42.020084auto(av)f2130pcompact
45audia42.819996auto(l5)f1626pcompact
\n", + "
" + ], + "text/plain": [ + " Unnamed: 0 manufacturer model displ year cyl trans drv cty hwy \\\n", + "0 1 audi a4 1.8 1999 4 auto(l5) f 18 29 \n", + "1 2 audi a4 1.8 1999 4 manual(m5) f 21 29 \n", + "2 3 audi a4 2.0 2008 4 manual(m6) f 20 31 \n", + "3 4 audi a4 2.0 2008 4 auto(av) f 21 30 \n", + "4 5 audi a4 2.8 1999 6 auto(l5) f 16 26 \n", + "\n", + " fl class \n", + "0 p compact \n", + "1 p compact \n", + "2 p compact \n", + "3 p compact \n", + "4 p compact " + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df = pd.read_csv(\"https://raw.githubusercontent.com/JetBrains/lets-plot-docs/master/data/mpg.csv\")\n", + "print(df.shape)\n", + "df.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "79bc52ff", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + " " + ], + "text/plain": [ + "" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "ggplot(df, aes(\"drv\", \"hwy\")) + stat_summary()" + ] + }, + { + "cell_type": "markdown", + "id": "90568a62", + "metadata": {}, + "source": [ + "### 1. `geom` parameter" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "b0984209", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + " " + ], + "text/plain": [ + "" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "ggplot(df, aes(\"cty\", \"hwy\")) + stat_summary(geom='smooth')" + ] + }, + { + "cell_type": "markdown", + "id": "c575a3aa", + "metadata": {}, + "source": [ + "### 2. `fun` parameter" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "9a57e5f7", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + " " + ], + "text/plain": [ + "" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "ggplot(df, aes(\"drv\", \"hwy\")) + stat_summary(geom='bar', fun='count')" + ] + }, + { + "cell_type": "markdown", + "id": "87d33124", + "metadata": {}, + "source": [ + "### 3. `quantiles` parameter" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "2d291d7e", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + " " + ], + "text/plain": [ + "" + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "ggplot(df, aes(\"drv\", \"hwy\")) + stat_summary(fun_min='lq', fun_max='uq', quantiles=[.45, .5, .55])" + ] + }, + { + "cell_type": "markdown", + "id": "9c85639e", + "metadata": {}, + "source": [ + "### 4. Custom Calculations in Aesthetics" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "8f98aa25", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + " " + ], + "text/plain": [ + "" + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "ggplot(df, aes(\"drv\", \"hwy\")) + stat_summary(aes(lower='..lq..', middle='..mq..', upper='..uq..'), geom='boxplot')" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.16" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} From 0a1d636ef65ca0670ee49aeefcd4d651931dbd57 Mon Sep 17 00:00:00 2001 From: Artem Smirnov Date: Thu, 29 Jun 2023 19:25:32 +0200 Subject: [PATCH 24/25] Mention stat_summary() in the future_changes. --- future_changes.md | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/future_changes.md b/future_changes.md index 4a6bf9212c4..512a05f4980 100644 --- a/future_changes.md +++ b/future_changes.md @@ -2,6 +2,11 @@ ### Added +- New layer `stat_summary()`. + + See: [example notebook](https://nbviewer.org/github/JetBrains/lets-plot/blob/master/docs/f-23c/stat_summary.ipynb). + + - Tooltips for `geom_step()`. See: [example notebook](https://nbviewer.org/github/JetBrains/lets-plot/blob/master/docs/f-23c/geom_step_tooltips.ipynb). From a392ba1aa66ead2ffd9b571faacf02863fb9c3ec Mon Sep 17 00:00:00 2001 From: Artem Smirnov Date: Fri, 30 Jun 2023 15:27:29 +0200 Subject: [PATCH 25/25] Refactor StatContext. --- .../jetbrains/datalore/plot/base/StatContext.kt | 12 +++--------- .../datalore/plot/base/stat/SimpleStatContext.kt | 7 +++++-- .../jetbrains/datalore/plot/base/stat/SummaryStat.kt | 4 ++-- .../datalore/plot/base/stat/BaseStatTest.kt | 2 +- .../datalore/plot/base/stat/BoxplotStatTest.kt | 2 +- .../jvmTest/kotlin/plot/base/stat/Bin2dStatTest.kt | 2 +- .../src/jvmTest/kotlin/plot/base/stat/BinStatTest.kt | 2 +- .../jvmTest/kotlin/plot/base/stat/DensityStatTest.kt | 2 +- .../plot/builder/assemble/GeomLayerBuilder.kt | 3 ++- .../plot/server/config/ConfiguredStatContext.kt | 7 +++---- .../plot/server/config/PlotConfigServerSide.kt | 4 ++-- .../datalore/plotDemo/model/stat/BinDemo.kt | 2 +- 12 files changed, 23 insertions(+), 26 deletions(-) diff --git a/plot-base-portable/src/commonMain/kotlin/jetbrains/datalore/plot/base/StatContext.kt b/plot-base-portable/src/commonMain/kotlin/jetbrains/datalore/plot/base/StatContext.kt index e6478ee16f6..005edbdd2a8 100644 --- a/plot-base-portable/src/commonMain/kotlin/jetbrains/datalore/plot/base/StatContext.kt +++ b/plot-base-portable/src/commonMain/kotlin/jetbrains/datalore/plot/base/StatContext.kt @@ -12,7 +12,7 @@ interface StatContext { fun overallYRange(): DoubleSpan? - fun getMapping(): Map, DataFrame.Variable> = throw IllegalStateException("Not implemented") + fun mappedStatVariables(): List fun getFlipped(): StatContext { return Flipped(this) @@ -27,14 +27,8 @@ interface StatContext { return orig.overallXRange() } - override fun getMapping(): Map, DataFrame.Variable> { - return orig.getMapping().map { (aes, variable) -> - when (aes) { - Aes.X -> Aes.Y to variable - Aes.Y -> Aes.X to variable - else -> aes to variable - } - }.toMap() + override fun mappedStatVariables(): List { + return orig.mappedStatVariables() } override fun getFlipped(): StatContext { diff --git a/plot-base-portable/src/commonMain/kotlin/jetbrains/datalore/plot/base/stat/SimpleStatContext.kt b/plot-base-portable/src/commonMain/kotlin/jetbrains/datalore/plot/base/stat/SimpleStatContext.kt index 86b2d1a338e..96ff2e9c321 100644 --- a/plot-base-portable/src/commonMain/kotlin/jetbrains/datalore/plot/base/stat/SimpleStatContext.kt +++ b/plot-base-portable/src/commonMain/kotlin/jetbrains/datalore/plot/base/stat/SimpleStatContext.kt @@ -6,12 +6,11 @@ package jetbrains.datalore.plot.base.stat import jetbrains.datalore.base.interval.DoubleSpan -import jetbrains.datalore.plot.base.Aes import jetbrains.datalore.plot.base.DataFrame import jetbrains.datalore.plot.base.StatContext import jetbrains.datalore.plot.base.data.TransformVar -class SimpleStatContext(private val myDataFrame: DataFrame) : +class SimpleStatContext(private val myDataFrame: DataFrame, private val mappedStatVariables: List) : StatContext { override fun overallXRange(): DoubleSpan? { @@ -21,4 +20,8 @@ class SimpleStatContext(private val myDataFrame: DataFrame) : override fun overallYRange(): DoubleSpan? { return myDataFrame.range(TransformVar.Y) } + + override fun mappedStatVariables(): List { + return mappedStatVariables + } } diff --git a/plot-base-portable/src/commonMain/kotlin/jetbrains/datalore/plot/base/stat/SummaryStat.kt b/plot-base-portable/src/commonMain/kotlin/jetbrains/datalore/plot/base/stat/SummaryStat.kt index 103bba82409..58003ce7c1b 100644 --- a/plot-base-portable/src/commonMain/kotlin/jetbrains/datalore/plot/base/stat/SummaryStat.kt +++ b/plot-base-portable/src/commonMain/kotlin/jetbrains/datalore/plot/base/stat/SummaryStat.kt @@ -64,8 +64,8 @@ class SummaryStat( val statY = ArrayList() val statYMin = ArrayList() val statYMax = ArrayList() - val statAggValues: Map> = statCtx.getMapping().values - .filter(DataFrame.Variable::isStat).associateWith { mutableListOf() } + val statAggValues: Map> = statCtx.mappedStatVariables() + .associateWith { mutableListOf() } for ((x, bin) in binnedData) { val sortedBin = Ordering.natural().sortedCopy(bin) statX.add(x) diff --git a/plot-base-portable/src/commonTest/kotlin/jetbrains/datalore/plot/base/stat/BaseStatTest.kt b/plot-base-portable/src/commonTest/kotlin/jetbrains/datalore/plot/base/stat/BaseStatTest.kt index cf405cc7687..60e4f3bca43 100644 --- a/plot-base-portable/src/commonTest/kotlin/jetbrains/datalore/plot/base/stat/BaseStatTest.kt +++ b/plot-base-portable/src/commonTest/kotlin/jetbrains/datalore/plot/base/stat/BaseStatTest.kt @@ -15,7 +15,7 @@ import kotlin.test.assertTrue open class BaseStatTest { protected fun statContext(d: DataFrame): StatContext { - return SimpleStatContext(d) + return SimpleStatContext(d, emptyList()) } protected fun dataFrame(dataMap: Map>): DataFrame { diff --git a/plot-base-portable/src/commonTest/kotlin/jetbrains/datalore/plot/base/stat/BoxplotStatTest.kt b/plot-base-portable/src/commonTest/kotlin/jetbrains/datalore/plot/base/stat/BoxplotStatTest.kt index cc8606d2925..495761f2051 100644 --- a/plot-base-portable/src/commonTest/kotlin/jetbrains/datalore/plot/base/stat/BoxplotStatTest.kt +++ b/plot-base-portable/src/commonTest/kotlin/jetbrains/datalore/plot/base/stat/BoxplotStatTest.kt @@ -16,7 +16,7 @@ import kotlin.test.assertTrue class BoxplotStatTest { private fun statContext(d: DataFrame): StatContext { - return SimpleStatContext(d) + return SimpleStatContext(d, emptyList()) } private fun df(m: Map>): DataFrame { diff --git a/plot-base-portable/src/jvmTest/kotlin/plot/base/stat/Bin2dStatTest.kt b/plot-base-portable/src/jvmTest/kotlin/plot/base/stat/Bin2dStatTest.kt index 97a8666af24..c502dd7f25a 100644 --- a/plot-base-portable/src/jvmTest/kotlin/plot/base/stat/Bin2dStatTest.kt +++ b/plot-base-portable/src/jvmTest/kotlin/plot/base/stat/Bin2dStatTest.kt @@ -158,7 +158,7 @@ class Bin2dStatTest { null, drop = drop ) - val statCtx = SimpleStatContext(df) + val statCtx = SimpleStatContext(df, emptyList()) val statDf = stat.apply(df, statCtx) val expectedSize = if (drop) { diff --git a/plot-base-portable/src/jvmTest/kotlin/plot/base/stat/BinStatTest.kt b/plot-base-portable/src/jvmTest/kotlin/plot/base/stat/BinStatTest.kt index dc3a8f77c84..7564d53ef07 100644 --- a/plot-base-portable/src/jvmTest/kotlin/plot/base/stat/BinStatTest.kt +++ b/plot-base-portable/src/jvmTest/kotlin/plot/base/stat/BinStatTest.kt @@ -22,7 +22,7 @@ class BinStatTest { BinStat.XPosKind.NONE, 0.0 ) - val statDf = stat.apply(df, SimpleStatContext(df)) + val statDf = stat.apply(df, SimpleStatContext(df, emptyList())) DataFrameAssert.assertHasVars(statDf, listOf(Stats.X, Stats.COUNT, Stats.DENSITY), binCount) return statDf } diff --git a/plot-base-portable/src/jvmTest/kotlin/plot/base/stat/DensityStatTest.kt b/plot-base-portable/src/jvmTest/kotlin/plot/base/stat/DensityStatTest.kt index 75d42c8801d..37c9730bc17 100644 --- a/plot-base-portable/src/jvmTest/kotlin/plot/base/stat/DensityStatTest.kt +++ b/plot-base-portable/src/jvmTest/kotlin/plot/base/stat/DensityStatTest.kt @@ -20,7 +20,7 @@ import kotlin.test.assertTrue class DensityStatTest { private fun statContext(d: DataFrame): StatContext { - return SimpleStatContext(d) + return SimpleStatContext(d, emptyList()) } private fun generateNormalDatapointsWithFixedEnds( diff --git a/plot-builder-portable/src/commonMain/kotlin/jetbrains/datalore/plot/builder/assemble/GeomLayerBuilder.kt b/plot-builder-portable/src/commonMain/kotlin/jetbrains/datalore/plot/builder/assemble/GeomLayerBuilder.kt index 6a60fbd2614..044dd316f90 100644 --- a/plot-builder-portable/src/commonMain/kotlin/jetbrains/datalore/plot/builder/assemble/GeomLayerBuilder.kt +++ b/plot-builder-portable/src/commonMain/kotlin/jetbrains/datalore/plot/builder/assemble/GeomLayerBuilder.kt @@ -371,7 +371,8 @@ class GeomLayerBuilder( when (builder.stat) { Stats.IDENTITY -> transformedData else -> { - val statCtx = SimpleStatContext(transformedData) + val mappedStatVariables = builder.myBindings.map(VarBinding::variable).filter(DataFrame.Variable::isStat) + val statCtx = SimpleStatContext(transformedData, mappedStatVariables) val groupingVariables = DataProcessing.defaultGroupingVariables( data, builder.myBindings, diff --git a/plot-config-portable/src/commonMain/kotlin/jetbrains/datalore/plot/server/config/ConfiguredStatContext.kt b/plot-config-portable/src/commonMain/kotlin/jetbrains/datalore/plot/server/config/ConfiguredStatContext.kt index 3be3d38d928..a23c6b4067b 100644 --- a/plot-config-portable/src/commonMain/kotlin/jetbrains/datalore/plot/server/config/ConfiguredStatContext.kt +++ b/plot-config-portable/src/commonMain/kotlin/jetbrains/datalore/plot/server/config/ConfiguredStatContext.kt @@ -9,13 +9,12 @@ import jetbrains.datalore.base.interval.DoubleSpan import jetbrains.datalore.plot.base.* import jetbrains.datalore.plot.base.data.DataFrameUtil import jetbrains.datalore.plot.base.scale.ScaleUtil -import jetbrains.datalore.plot.builder.VarBinding import jetbrains.datalore.plot.common.data.SeriesUtil internal class ConfiguredStatContext( private val dataFrames: List, private val transformByAes: Map, Transform>, - private val varBindings: List + private val mappedStatVariables: List ) : StatContext { private fun overallRange(variable: DataFrame.Variable, dataFrames: List): DoubleSpan? { @@ -36,8 +35,8 @@ internal class ConfiguredStatContext( return overallRange(Aes.Y) } - override fun getMapping(): Map, DataFrame.Variable> { - return varBindings.associate { it.aes to it.variable } + override fun mappedStatVariables(): List { + return mappedStatVariables } private fun overallRange(aes: Aes<*>): DoubleSpan? { diff --git a/plot-config-portable/src/commonMain/kotlin/jetbrains/datalore/plot/server/config/PlotConfigServerSide.kt b/plot-config-portable/src/commonMain/kotlin/jetbrains/datalore/plot/server/config/PlotConfigServerSide.kt index f61d49aa446..6e4403f4ddc 100644 --- a/plot-config-portable/src/commonMain/kotlin/jetbrains/datalore/plot/server/config/PlotConfigServerSide.kt +++ b/plot-config-portable/src/commonMain/kotlin/jetbrains/datalore/plot/server/config/PlotConfigServerSide.kt @@ -19,7 +19,6 @@ import jetbrains.datalore.plot.config.* import jetbrains.datalore.plot.config.Option.Meta.DATA_META import jetbrains.datalore.plot.config.Option.Meta.GeoDataFrame.GDF import jetbrains.datalore.plot.config.Option.Meta.GeoDataFrame.GEOMETRY -import jetbrains.datalore.plot.server.config.transform.bistro.util.layer open class PlotConfigServerSide( opts: Map @@ -121,10 +120,11 @@ open class PlotConfigServerSide( } return layerConfigs.mapIndexed { layerIndex, layerConfig -> + val mappedStatVariables = layerConfig.varBindings.map(VarBinding::variable).filter(DataFrame.Variable::isStat) applyLayerStatistic( layerConfig, layerData = dataByLayer[layerIndex], - ConfiguredStatContext(dataByLayer, transformByAes, layerConfig.varBindings), + ConfiguredStatContext(dataByLayer, transformByAes, mappedStatVariables), ) { message -> layerMessageHandler(layerIndex, message) } diff --git a/plot-demo/src/commonMain/kotlin/jetbrains/datalore/plotDemo/model/stat/BinDemo.kt b/plot-demo/src/commonMain/kotlin/jetbrains/datalore/plotDemo/model/stat/BinDemo.kt index 410de1e8b77..da94741fcec 100644 --- a/plot-demo/src/commonMain/kotlin/jetbrains/datalore/plotDemo/model/stat/BinDemo.kt +++ b/plot-demo/src/commonMain/kotlin/jetbrains/datalore/plotDemo/model/stat/BinDemo.kt @@ -92,7 +92,7 @@ open class BinDemo : SimpleDemoBase() { val stat = Stats.bin( binCount = binCount, ) - data = stat.apply(data, SimpleStatContext(data)) + data = stat.apply(data, SimpleStatContext(data, emptyList())) val statX = data.getNumeric(Stats.X) val statY = data.getNumeric(Stats.COUNT)