Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix #936: Way to calculate proportion of points with same coordinate #940

Merged
merged 7 commits into from
Nov 22, 2023
550 changes: 550 additions & 0 deletions docs/f-23f/new_stat_count_vars.ipynb

Large diffs are not rendered by default.

6 changes: 6 additions & 0 deletions future_changes.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,11 @@

### Added

- New variables computed by `'count'` and `'count2d'` statistics: `'..sumprop..'`, `'..sumpct..'`.

See: [example notebook](https://nbviewer.jupyter.org/github/JetBrains/lets-plot/blob/master/docs/f-23f/new_stat_count_vars.ipynb).


### Changed

### Fixed
Expand All @@ -11,3 +16,4 @@
- geom_livemap: freeze at zoom 10 [[#892](https://github.com/JetBrains/lets-plot/issues/892)].
- Enormous CPU / Time/ Memory consumption on some data [[#932](https://github.com/JetBrains/lets-plot/issues/932)].
- scale_x_log2(), scale_y_log2() as a shortcut for trans='log2' [[#922](https://github.com/JetBrains/lets-plot/issues/922)].
- How to calculate proportion of points with same coordinate [[#936](https://github.com/JetBrains/lets-plot/issues/936)].
Original file line number Diff line number Diff line change
Expand Up @@ -80,14 +80,19 @@ abstract class AbstractCountStat(
// compute total location weights on the whole data
val summary = groupAndSum(locations, weights)
val totalWeights = locations.map { summary[it]!! }
val totalWeightsSum = summary.values.sum()

val prop = weights.zip(totalWeights).map { (groupWeight, totalWeight) -> groupWeight / totalWeight }
val propPercent = prop.map { it * 100 }
val sumProp = totalWeights.map { it / totalWeightsSum }
val sumPropPercent = sumProp.map { it * 100 }

val statDf = dataAfterStat.builder()
statDf.putNumeric(Stats.SUM, totalWeights)
statDf.putNumeric(Stats.PROP, prop)
statDf.putNumeric(Stats.PROPPCT, propPercent)
statDf.putNumeric(Stats.SUMPROP, sumProp)
statDf.putNumeric(Stats.SUMPCT, sumPropPercent)
return statDf.build()
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,8 @@ object Stats {
val SUM = DataFrame.Variable("..sum..", STAT, "sum")
val PROP = DataFrame.Variable("..prop..", STAT, "prop")
val PROPPCT = DataFrame.Variable("..proppct..", STAT, "proppct")
val SUMPROP = DataFrame.Variable("..sumprop..", STAT, "sumprop")
val SUMPCT = DataFrame.Variable("..sumpct..", STAT, "sumpct")

val SCALED = DataFrame.Variable("..scaled..", STAT, "scaled")

Expand Down Expand Up @@ -66,6 +68,8 @@ object Stats {
SUM,
PROP,
PROPPCT,
SUMPROP,
SUMPCT,
SCALED,
GROUP,
)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -31,8 +31,10 @@ internal object TooltipFormatting {

fun createFormatter(variable: DataFrame.Variable): (Any) -> String {
return when (variable) {
Stats.PROP -> StringFormat.forOneArg(".2f", formatFor = variable.name)::format
Stats.PROPPCT -> StringFormat.forOneArg("{.1f} %", formatFor = variable.name)::format
Stats.PROP,
Stats.SUMPROP -> StringFormat.forOneArg(".2f", formatFor = variable.name)::format
Stats.PROPPCT,
Stats.SUMPCT -> StringFormat.forOneArg("{.1f} %", formatFor = variable.name)::format
else -> { value -> value.toString() }
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ class Count2dStatTest {
val statDf = dataProcessor.applyStat(Stats.count2d())

assertThat(statDf.variables())
.containsExactlyInAnyOrder(x, y, Stats.X, Stats.Y, Stats.SUM, Stats.COUNT, Stats.PROP, Stats.PROPPCT)
.containsExactlyInAnyOrder(x, y, Stats.X, Stats.Y, Stats.SUM, Stats.COUNT, Stats.PROP, Stats.PROPPCT, Stats.SUMPROP, Stats.SUMPCT)

assertThat(statDf.rowCount()).isZero()
}
Expand All @@ -34,14 +34,16 @@ class Count2dStatTest {
val statDf = dataProcessor.applyStat(Stats.count2d())

assertThat(statDf.variables())
.containsExactlyInAnyOrder(x, y, Stats.X, Stats.Y, Stats.SUM, Stats.COUNT, Stats.PROP, Stats.PROPPCT)
.containsExactlyInAnyOrder(x, y, Stats.X, Stats.Y, Stats.SUM, Stats.COUNT, Stats.PROP, Stats.PROPPCT, Stats.SUMPROP, Stats.SUMPCT)

assertThat(statDf[Stats.X]).containsExactly("0")
assertThat(statDf[Stats.Y]).containsExactly("0")
assertThat(statDf[Stats.SUM]).containsExactly(1.0)
assertThat(statDf[Stats.COUNT]).containsExactly(1.0)
assertThat(statDf[Stats.PROP]).containsExactly(1.0)
assertThat(statDf[Stats.PROPPCT]).containsExactly(100.0)
assertThat(statDf[Stats.SUMPROP]).containsExactly(1.0)
assertThat(statDf[Stats.SUMPCT]).containsExactly(100.0)
}

@Test
Expand All @@ -55,6 +57,7 @@ class Count2dStatTest {
assertThat(statDf[Stats.SUM]).containsExactly(1.0, 1.0)
assertThat(statDf[Stats.COUNT]).containsExactly(1.0, 1.0)
assertThat(statDf[Stats.PROP]).containsExactly(1.0, 1.0)
assertThat(statDf[Stats.SUMPROP]).containsExactly(0.5, 0.5)
}

@Test
Expand All @@ -68,6 +71,7 @@ class Count2dStatTest {
assertThat(statDf[Stats.SUM]).containsExactly(2.0)
assertThat(statDf[Stats.COUNT]).containsExactly(2.0)
assertThat(statDf[Stats.PROP]).containsExactly(1.0)
assertThat(statDf[Stats.SUMPROP]).containsExactly(1.0)
}

@Test
Expand All @@ -82,6 +86,7 @@ class Count2dStatTest {
assertThat(statDf[Stats.SUM]).containsExactly(4.0)
assertThat(statDf[Stats.COUNT]).containsExactly(4.0)
assertThat(statDf[Stats.PROP]).containsExactly(1.0)
assertThat(statDf[Stats.SUMPROP]).containsExactly(1.0)
}

@Test
Expand All @@ -98,6 +103,7 @@ class Count2dStatTest {
assertThat(statDf[Stats.SUM]).containsExactly(4.0, 4.0)
assertThat(statDf[Stats.COUNT]).containsExactly(1.0, 3.0)
assertThat(statDf[Stats.PROP]).containsExactly(0.25, 0.75)
assertThat(statDf[Stats.SUMPROP]).containsExactly(1.0, 1.0)
}

@Test
Expand All @@ -113,13 +119,15 @@ class Count2dStatTest {
assertThat(statDf[Stats.SUM]).containsExactly(4.0)
assertThat(statDf[Stats.COUNT]).containsExactly(4.0)
assertThat(statDf[Stats.PROP]).containsExactly(1.0)
assertThat(statDf[Stats.SUMPROP]).containsExactly(1.0)
}

dataProcessor.groupingVarName = "g"
dataProcessor.applyStat(Stats.count2d()).let { statDf ->
assertThat(statDf[Stats.SUM]).containsExactly(4.0, 4.0)
assertThat(statDf[Stats.COUNT]).containsExactly(1.0, 3.0)
assertThat(statDf[Stats.PROP]).containsExactly(0.25, 0.75)
assertThat(statDf[Stats.SUMPROP]).containsExactly(1.0, 1.0)
}
}
}
4 changes: 4 additions & 0 deletions python-package/lets_plot/plot/geom.py
Original file line number Diff line number Diff line change
Expand Up @@ -731,6 +731,8 @@ def geom_bar(mapping=None, *, data=None, stat=None, position=None, show_legend=N
- ..sum.. : total number of points with same x-axis coordinate.
- ..prop.. : groupwise proportion.
- ..proppct.. : groupwise proportion in percent.
- ..sumprop.. : total proportion, i.e. ratio of total number of points with same x-axis coordinate to the total number of points in the data.
- ..sumpct.. : total proportion in percent.

`geom_bar()` understands the following aesthetics mappings:

Expand Down Expand Up @@ -6172,6 +6174,8 @@ def geom_pie(mapping=None, *, data=None, stat=None, position=None, show_legend=N
- ..sum.. : total number of points with same (x,y) coordinate.
- ..prop.. : groupwise proportion.
- ..proppct.. : groupwise proportion in percent.
- ..sumprop.. : total proportion, i.e. ratio of total number of points with same (x,y) coordinate to the total number of points in the data.
- ..sumpct.. : total proportion in percent.

`geom_pie()` understands the following aesthetics mappings:

Expand Down