Skip to content

Commit

Permalink
ECDF Stat (#832)
Browse files Browse the repository at this point in the history
* Basic version of the eCDF stat.

* Refactor ECDFStat.

* Add an interpolation to ECDF.

* Add autotests for the ECDFStat.

* Fix ECDFStat (case when in one group there is only NaN's).

* Add 'pad' parameter to the StepGeom (for the eCDF).

* Add stat_ecdf() function. Tiny fixes in other stat functions.

* Specify limits for the stat_ecdf() function.

* Add docstrings for stat_ecdf().

* Small fixes in StepGeom::getPads because of sampling.

* Move scaling of the Y axis inside the step geometry.

* Add demo notebook for the stat_ecdf().

* Mention stat_ecdf() in the future_changes.md file.

* Small refactor in the StepGeom.

* Add pads to the ECDFStat.

* Add another one demo case for the eCDF.
  • Loading branch information
ASmirnov-HORIS committed Aug 2, 2023
1 parent 8689b7c commit 7ec91aa
Show file tree
Hide file tree
Showing 12 changed files with 884 additions and 7 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1,127 @@
/*
* Copyright (c) 2023. JetBrains s.r.o.
* Use of this source code is governed by the MIT license that can be found in the LICENSE file.
*/

package demo.plot.common.model.plotConfig

import demo.plot.common.data.Iris
import demoAndTestShared.parsePlotSpec

class ECDF {
fun plotSpecList(): List<MutableMap<String, Any>> {
return listOf(
basic(),
withInterpolation(),
withGrouping(),
withOrientationChange(),
)
}

private fun basic(): MutableMap<String, Any> {
val spec = """
{
'kind': 'plot',
'mapping': {
'x': 'sepal length (cm)'
},
'ggtitle': {
'text': 'Basic demo'
},
'layers': [
{
'geom': 'step',
'stat': 'ecdf',
'pad': true
}
]
}
""".trimIndent()

val plotSpec = HashMap(parsePlotSpec(spec))
plotSpec["data"] = Iris.df
return plotSpec

}

private fun withInterpolation(): MutableMap<String, Any> {
val spec = """
{
'kind': 'plot',
'mapping': {
'x': 'sepal length (cm)'
},
'ggtitle': {
'text': 'Interpolation'
},
'layers': [
{
'geom': 'step',
'stat': 'ecdf',
'n': 10,
'pad': true
}
]
}
""".trimIndent()

val plotSpec = HashMap(parsePlotSpec(spec))
plotSpec["data"] = Iris.df
return plotSpec

}

private fun withGrouping(): MutableMap<String, Any> {
val spec = """
{
'kind': 'plot',
'mapping': {
'x': 'sepal length (cm)',
'color': 'target'
},
'ggtitle': {
'text': 'With additional grouping'
},
'layers': [
{
'geom': 'step',
'stat': 'ecdf',
'pad': true
}
]
}
""".trimIndent()

val plotSpec = HashMap(parsePlotSpec(spec))
plotSpec["data"] = Iris.df
return plotSpec

}

private fun withOrientationChange(): MutableMap<String, Any> {
val spec = """
{
'kind': 'plot',
'mapping': {
'y': 'sepal length (cm)'
},
'ggtitle': {
'text': 'Orientation changed'
},
'layers': [
{
'geom': 'step',
'stat': 'ecdf',
'pad': true,
'orientation': 'y'
}
]
}
""".trimIndent()

val plotSpec = HashMap(parsePlotSpec(spec))
plotSpec["data"] = Iris.df
return plotSpec

}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
/*
* Copyright (c) 2023. JetBrains s.r.o.
* Use of this source code is governed by the MIT license that can be found in the LICENSE file.
*/

package demo.plot.batik.plotConfig

import demo.plot.common.model.plotConfig.ECDF
import demo.common.batik.demoUtils.PlotSpecsDemoWindowBatik

fun main() {
with(ECDF()) {
PlotSpecsDemoWindowBatik(
"ECDF plot",
plotSpecList()
).open()
}
}
369 changes: 369 additions & 0 deletions docs/f-23c/stat_ecdf.ipynb

Large diffs are not rendered by default.

5 changes: 5 additions & 0 deletions future_changes.md
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,11 @@
See: [example notebook](https://nbviewer.org/github/JetBrains/lets-plot/blob/master/docs/f-23c/stat_summary_bin.ipynb).


- New layer `stat_ecdf()`.

See: [example notebook](https://nbviewer.org/github/JetBrains/lets-plot/blob/master/docs/f-23c/stat_ecdf.ipynb).


- New layer `geom_function()`.

See: [example notebook](https://nbviewer.org/github/JetBrains/lets-plot/blob/master/docs/f-23c/geom_function.ipynb).
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,9 @@

package org.jetbrains.letsPlot.core.plot.base.geom

import org.jetbrains.letsPlot.commons.geometry.DoubleRectangle
import org.jetbrains.letsPlot.commons.geometry.DoubleVector
import org.jetbrains.letsPlot.core.commons.data.SeriesUtil
import org.jetbrains.letsPlot.core.plot.base.*
import org.jetbrains.letsPlot.core.plot.base.geom.util.GeomUtil
import org.jetbrains.letsPlot.core.plot.base.geom.util.LinesHelper
Expand All @@ -13,6 +16,7 @@ import org.jetbrains.letsPlot.core.plot.base.render.SvgRoot

class StepGeom : LineGeom() {
private var myDirection = DEF_DIRECTION
var padded = DEF_PADDED

fun setDirection(dir: String) {
myDirection = Direction.toDirection(dir)
Expand All @@ -25,10 +29,10 @@ class StepGeom : LineGeom() {
coord: CoordinateSystem,
ctx: GeomContext
) {
val dataPoints = dataPoints(aesthetics)
val dataPoints = GeomUtil.ordered_X(aesthetics.dataPoints())
val linesHelper = LinesHelper(pos, coord, ctx)

val pathDataList = linesHelper.createPathDataByGroup(dataPoints, GeomUtil.TO_LOCATION_X_Y)
val pathDataList = linesHelper.createPathDataByGroup(dataPoints, toLocationFor(overallAesBounds(ctx)))
val linePaths = linesHelper.createSteps(pathDataList, myDirection)

root.appendNodes(linePaths)
Expand All @@ -37,6 +41,20 @@ class StepGeom : LineGeom() {
targetCollectorHelper.addPaths(pathDataList)
}

private fun toLocationFor(viewPort: DoubleRectangle): (DataPointAesthetics) -> DoubleVector? {
return { p ->
val x = p.x()
val y = p.y()
when {
SeriesUtil.isFinite(x) && SeriesUtil.isFinite(y) -> DoubleVector(x!!, y!!)
!SeriesUtil.isFinite(y) -> null
padded && x == Double.NEGATIVE_INFINITY -> DoubleVector(viewPort.left, y!!)
padded && x == Double.POSITIVE_INFINITY -> DoubleVector(viewPort.right, y!!)
else -> null
}
}
}

enum class Direction {
HV, VH;

Expand All @@ -55,6 +73,7 @@ class StepGeom : LineGeom() {
companion object {
// default
val DEF_DIRECTION = Direction.HV
const val DEF_PADDED = false

const val HANDLES_GROUPS = LineGeom.HANDLES_GROUPS
}
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,85 @@
/*
* Copyright (c) 2023. JetBrains s.r.o.
* Use of this source code is governed by the MIT license that can be found in the LICENSE file.
*/

package org.jetbrains.letsPlot.core.plot.base.stat

import org.jetbrains.letsPlot.core.plot.base.Aes
import org.jetbrains.letsPlot.core.plot.base.DataFrame
import org.jetbrains.letsPlot.core.plot.base.StatContext
import org.jetbrains.letsPlot.core.plot.base.data.TransformVar

class ECDFStat(
private val n: Int?,
private val padded: Boolean
) : BaseStat(DEF_MAPPING) {

override fun consumes(): List<Aes<*>> {
return listOf(Aes.X)
}

override fun apply(data: DataFrame, statCtx: StatContext, messageConsumer: (s: String) -> Unit): DataFrame {
if (!hasRequiredValues(data, Aes.X)) {
return withEmptyStatValues()
}

val statData = buildStat(data.getNumeric(TransformVar.X))

return DataFrame.Builder()
.putNumeric(Stats.X, statData.getValue(Stats.X))
.putNumeric(Stats.Y, statData.getValue(Stats.Y))
.build()
}

private fun buildStat(
xs: List<Double?>
): Map<DataFrame.Variable, List<Double>> {
val xValues = xs.filter { it?.isFinite() ?: false }.map { it!! }
if (xValues.isEmpty()) {
return mapOf(
Stats.X to emptyList(),
Stats.Y to emptyList(),
)
}

val ecdf: (Double) -> Double = { t -> xValues.count { x -> x <= t }.toDouble() / xValues.size }
val statX = if (n == null) {
xValues.distinct()
} else {
linspace(xValues.min(), xValues.max(), n)
}
val statY = statX.map { ecdf(it) }
val padX = if (padded) {
listOf(Double.NEGATIVE_INFINITY, Double.POSITIVE_INFINITY)
} else {
emptyList()
}
val padY = if (padded) {
listOf(0.0, 1.0)
} else {
emptyList()
}

return mapOf(
Stats.X to statX + padX,
Stats.Y to statY + padY,
)
}

private fun linspace(start: Double, stop: Double, num: Int): List<Double> {
if (num <= 0) return emptyList()
if (num == 1) return listOf(start)
val step = (stop - start) / (num - 1)
return List(num) { start + it * step }
}

companion object {
const val DEF_PADDED = true

private val DEF_MAPPING: Map<Aes<*>, DataFrame.Variable> = mapOf(
Aes.X to Stats.X,
Aes.Y to Stats.Y
)
}
}
Loading

0 comments on commit 7ec91aa

Please sign in to comment.