Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Remove map_id, add sym_x and sym_y #148

Merged
merged 11 commits into from
Jun 15, 2020
Prev Previous commit
Next Next commit
Remove map_id from implementation, move data and map join implementat…
…ion from SpecChange to LayerConfig
  • Loading branch information
IKupriyanov-HORIS committed Jun 9, 2020
commit 48278c3e1e1072df6bfc7934c6806b985420bfa3
177 changes: 166 additions & 11 deletions docs/examples/jupyter-notebooks-dev/geodataframe_and_geoms.ipynb

Large diffs are not rendered by default.

807 changes: 735 additions & 72 deletions docs/examples/jupyter-notebooks-dev/geopandas_GeoDataFrame.ipynb

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
Expand Up @@ -98,7 +98,7 @@ object DataFrameUtil {
}

fun variables(df: DataFrame): Map<String, DataFrame.Variable> {
return df.variables().associateBy { it.name }
return df.variables().associateBy(DataFrame.Variable::name)
}

fun appendReplace(df0: DataFrame, df1: DataFrame): DataFrame {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@ class GeomLayerBuilder {
private lateinit var myPosProvider: PosProvider
private lateinit var myGeomProvider: GeomProvider
private var myGroupingVarName: String? = null
private var myPathIdVarName: String? = null
private val myScaleProviderByAes = HashMap<Aes<*>, ScaleProvider<*>>()

private var myDataPreprocessor: ((DataFrame) -> DataFrame)? = null
Expand Down Expand Up @@ -74,6 +75,11 @@ class GeomLayerBuilder {
return this
}

fun pathIdVarName(v: String): GeomLayerBuilder {
myPathIdVarName = v
return this
}

fun <T> addConstantAes(aes: Aes<T>, v: T): GeomLayerBuilder {
myConstantByAes.put(aes, v)
return this
Expand Down Expand Up @@ -148,7 +154,7 @@ class GeomLayerBuilder {
myPosProvider,
// handledAes(),
myGeomProvider.renders(),
GroupingContext(data, myBindings, myGroupingVarName, handlesGroups()).groupMapper,
GroupingContext(data, myBindings, myGroupingVarName, myPathIdVarName, handlesGroups()).groupMapper,
replacementBindings.values,
myConstantByAes,
dataAccess,
Expand Down Expand Up @@ -276,6 +282,7 @@ class GeomLayerBuilder {
transformedData,
builder.myBindings,
builder.myGroupingVarName,
builder.myPathIdVarName,
true
)
val dataAndGroupingContext = DataProcessing.buildStatData(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -304,8 +304,8 @@ object DataProcessing {
return inverseTransformedStatSeries
}

internal fun computeGroups(data: DataFrame, bindings: List<VarBinding>, groupingVar: Variable?): (Int) -> Int {
val groupingVariables = getGroupingVariables(data, bindings, groupingVar)
internal fun computeGroups(data: DataFrame, bindings: List<VarBinding>, groupingVar: Variable?, pathIdVar: Variable?): (Int) -> Int {
val groupingVariables = getGroupingVariables(data, bindings, groupingVar) + listOfNotNull(pathIdVar)

var currentGroups: List<Int>? = null
if (groupingVar != null) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,11 +15,13 @@ class GroupingContext(
private val myData: DataFrame,
bindings: List<VarBinding>,
groupingVarName: String?,
pathIdVarName: String?,
private val myExpectMultiple: Boolean
) {

private val myBindings: List<VarBinding>
internal val optionalGroupingVar: Variable?
private val myBindings: List<VarBinding> = ArrayList(bindings)
internal val optionalGroupingVar: Variable? = findOptionalVariable(myData, groupingVarName)
private val pathIdVar: Variable? = findOptionalVariable(myData, pathIdVarName)

private var myGroupSizeList: List<Int>? = null
private var myGroupMapper: ((Int) -> Int)? = null
Expand All @@ -33,11 +35,6 @@ class GroupingContext(
myGroupMapper!!(index)
}

init {
myBindings = ArrayList(bindings)
optionalGroupingVar = findOptionalVariable(myData, groupingVarName)
}

private fun computeGroups(): (Int) -> Int {
if (myData.has(Stats.GROUP)) {
val list = myData.getNumeric(Stats.GROUP)
Expand All @@ -54,15 +51,16 @@ class GroupingContext(
return DataProcessing.computeGroups(
myData,
myBindings,
optionalGroupingVar
optionalGroupingVar,
pathIdVar
)
}
return GroupUtil.SINGLE_GROUP
}

companion object {
internal fun withOrderedGroups(data: DataFrame, groupSizeList: List<Int>): GroupingContext {
val groupingContext = GroupingContext(data, emptyList(), null, false)
val groupingContext = GroupingContext(data, emptyList(), null, null, false)
groupingContext.myGroupSizeList = ArrayList(groupSizeList)
return groupingContext
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,6 @@ object GeoPositionField {
// fixed columns in 'boundaries' of 'centroids' data frames
const val POINT_X = "lon"
const val POINT_X1 = "longitude"
const val POINT_X2 = "long"
const val POINT_Y = "lat"
const val POINT_Y1 = "latitude"

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,282 @@
/*
* Copyright (c) 2020. JetBrains s.r.o.
* Use of this source code is governed by the MIT license that can be found in the LICENSE file.
*/

package jetbrains.datalore.plot.config

import jetbrains.datalore.base.spatial.*
import jetbrains.datalore.base.typedGeometry.*
import jetbrains.datalore.plot.base.Aes
import jetbrains.datalore.plot.base.DataFrame
import jetbrains.datalore.plot.base.GeomKind
import jetbrains.datalore.plot.base.GeomKind.*
import jetbrains.datalore.plot.base.data.DataFrameUtil.variables
import jetbrains.datalore.plot.config.ConfigUtil.createAesMapping
import jetbrains.datalore.plot.config.ConfigUtil.createDataFrame
import jetbrains.datalore.plot.config.ConfigUtil.rightJoin
import jetbrains.datalore.plot.config.CoordinatesBuilder.Companion.createCoordinateBuilder
import jetbrains.datalore.plot.config.Option.Geom.Choropleth.GEO_POSITIONS
import jetbrains.datalore.plot.config.Option.Layer.MAP_JOIN
import jetbrains.datalore.plot.config.Option.Meta.DATA_META
import jetbrains.datalore.plot.config.Option.Meta.GeoDataFrame.GDF
import jetbrains.datalore.plot.config.Option.Meta.GeoDataFrame.GEOMETRY
import jetbrains.datalore.plot.config.Option.Meta.MAP_DATA_META
import jetbrains.datalore.plot.config.Option.PlotBase.DATA

class GeoConfig(
geomKind: GeomKind,
data: DataFrame,
layerOptions: Map<*, *>,
mappingOptions: Map<*, *>
) {
val dataAndCoordinates: DataFrame
val mappings: Map<Aes<*>, DataFrame.Variable>

init {
fun getGeoJson(gdfLocation: String): List<String> {
val geoColumn: String
val geoDataFrame: Map<String, Any>
when(gdfLocation) {
GEO_POSITIONS -> {
geoDataFrame = layerOptions.getMap(GEO_POSITIONS) ?: error("require 'map' parameter")
geoColumn = layerOptions.getString(MAP_DATA_META, GDF, GEOMETRY) ?: error("Geometry column not set")
}
DATA -> {
geoDataFrame = layerOptions.getMap(DATA) ?: error("require 'data' parameter")
geoColumn = layerOptions.getString(DATA_META, GDF, GEOMETRY) ?: error("Geometry column not set")
}
else -> error("Unknown gdf location: $gdfLocation")
}
return geoDataFrame.getList(geoColumn)?.map { it as String } ?: error("$geoColumn not found in $gdfLocation")
}

val joinIds: List<Any>
val dataJoinColumn: String
val mapJoinColumn: String
val geoJson: List<String>
val dataFrame: DataFrame
val autoId = "__gdf_id__"

when {
// (aes(color='cyl'), data=data, map=gdf) - how to join without `map_join`?
with(layerOptions) { has(GEO_POSITIONS) && !has(MAP_JOIN) && !data.isEmpty && mappingOptions.isNotEmpty() } -> {
error(MAP_JOIN_REQUIRED_MESSAGE)
}

// (map=gdf) - simple geometry
with(layerOptions) { has(GEO_POSITIONS) && !has(MAP_JOIN) && has(MAP_DATA_META, GDF, GEOMETRY) } -> {
geoJson = getGeoJson(GEO_POSITIONS)

dataJoinColumn = autoId
mapJoinColumn = autoId
joinIds = geoJson.indices.map(Int::toString)
dataFrame = DataFrame.Builder(data).put(DataFrame.Variable(dataJoinColumn), joinIds).build()
}

// (data=data, map=gdf, map_join=('id', 'city'))
with(layerOptions) { has(GEO_POSITIONS) && has(MAP_DATA_META, GDF, GEOMETRY) && has(MAP_JOIN) } -> {
geoJson = getGeoJson(GEO_POSITIONS)

val mapJoin = layerOptions.getList(MAP_JOIN) ?: error("require map_join parameter")
dataJoinColumn = mapJoin[0] as String
mapJoinColumn = mapJoin[1] as String
joinIds = layerOptions.getMap(GEO_POSITIONS)?.getList(mapJoinColumn)?.requireNoNulls() ?: error("MapJoinColumn '$mapJoinColumn' is not found")
dataFrame = data
}

// (data=gdf)
with(layerOptions) { !has(GEO_POSITIONS) && has(DATA_META, GDF, GEOMETRY) } -> {
geoJson = getGeoJson(DATA)

dataJoinColumn = autoId
mapJoinColumn = autoId
joinIds = geoJson.indices.map(Int::toString)
dataFrame = DataFrame.Builder(data).put(DataFrame.Variable(dataJoinColumn), joinIds).build()
}
else -> error("GeoDataFrame not found in data or map")
}

val coordinatesBuilder = createCoordinateBuilder(geomKind)
.append(geoJson)
.setIdColumn(columnName = mapJoinColumn, values = joinIds)

dataAndCoordinates = rightJoin(
left = dataFrame,
leftKey = dataJoinColumn,
right = createDataFrame(coordinatesBuilder.build()),
rightKey = mapJoinColumn
)

val coordinatesAutoMapping = coordinatesBuilder.columns
.filterKeys { coordName -> coordName in variables(dataAndCoordinates) }
.map { (coordName, aes) -> aes to variables(dataAndCoordinates).getValue(coordName) }
.toMap()
mappings = createAesMapping(dataAndCoordinates, mappingOptions) + coordinatesAutoMapping
}

companion object {
const val MAP_JOIN_REQUIRED_MESSAGE = "map_join is required when both data and map parameters used"

fun isApplicable(layerOptions: Map<*, *>): Boolean {
return layerOptions.has(MAP_DATA_META, GDF, GEOMETRY) ||
layerOptions.has(DATA_META, GDF, GEOMETRY)
}
}
}

const val POINT_X = "__gdf_x__"
const val POINT_Y = "__gdf_y__"
const val RECT_XMIN = "__gdf_xmin__"
const val RECT_YMIN = "__gdf_ymin__"
const val RECT_XMAX = "__gdf_xmax__"
const val RECT_YMAX = "__gdf_ymax__"

internal abstract class CoordinatesBuilder(
val columns: Map<String, Aes<*>>
) {
companion object {

fun createCoordinateBuilder(geomKind: GeomKind): CoordinatesBuilder {
return when(geomKind) {
MAP, POLYGON -> BoundaryCoordinatesBuilder()
POINT, TEXT -> PointCoordinatesBuilder()
RECT -> BboxCoordinatesBuilder()
PATH -> PathCoordinatesBuilder()
else -> error("Unsupported geom: $geomKind")
}
}

val POINT_COLUMNS = mapOf(
POINT_X to Aes.X,
POINT_Y to Aes.Y
)

val RECT_COLUMNS = mapOf(
RECT_XMIN to Aes.XMIN,
RECT_YMIN to Aes.YMIN,
RECT_XMAX to Aes.XMAX,
RECT_YMAX to Aes.YMAX
)

internal fun Map<String, MutableList<Any>>.append(p: Vec<LonLat>) {
append(POINT_X, p.x)
append(POINT_Y, p.y)
}

internal fun Map<String, MutableList<Any>>.append(rect: Rect<LonLat>) {
append(RECT_XMIN, rect.left)
append(RECT_XMAX, rect.right)
append(RECT_YMIN, rect.top)
append(RECT_YMAX, rect.bottom)
}

private fun Map<String, MutableList<Any>>.append(key: String, value: Double) {
get(key)?.add(value) ?: error("$key is not found")
}
}

private var idColumnName: String? = null
private var ids: List<Any>? = null
private val groupLengths = mutableListOf<Int>()
protected val coordinates: Map<String, MutableList<Any>> = columns.keys.associateBy({ it }) { mutableListOf<Any>() }
protected abstract val geoJsonConsumer: SimpleFeature.Consumer<LonLat>
protected abstract val supportedFeatures: List<String>

fun append(geoJsons: List<String>): CoordinatesBuilder {
geoJsons.forEach {
val oldRowCount = coordinates.rowCount
GeoJson.parse(it, geoJsonConsumer)
groupLengths += coordinates.rowCount - oldRowCount
}
return this
}

fun setIdColumn(columnName: String, values: List<Any>): CoordinatesBuilder {
idColumnName = columnName
ids = values
return this
}

fun build(): Map<String, MutableList<Any>> {
if (coordinates.rowCount == 0) {
error("Geometries are empty or no matching types. Expected: " + supportedFeatures)
}

if (idColumnName == null && ids == null) {
return coordinates
}

if (idColumnName != null && ids != null) {
require(groupLengths.size == ids!!.size) { "Groups and ids should have same size" }

// (['a', 'b'], [2, 3]) => ['a', 'a', 'b', 'b', 'b']
fun <T> copies(values: Collection<T>, count: Collection<Int>) =
values.asSequence().zip(count.asSequence())
.fold(mutableListOf<T>()) { acc, (value, count) -> repeat(count) { acc += value }; acc }

return coordinates + (idColumnName!! to copies(ids!!, groupLengths))
}

error("idColumnName and idValues should be both null or not null")
}

internal fun defaultConsumer(config: SimpleFeature.Consumer<LonLat>.() -> Unit) =
SimpleFeature.Consumer<LonLat>(
onPoint = {},
onMultiPoint = {},
onLineString = {},
onMultiLineString = {},
onPolygon = {},
onMultiPolygon = {}
).apply(config)

private val <K, V : List<Any>> Map<K, V>.rowCount get() = values.firstOrNull()?.size ?: 0

class PointCoordinatesBuilder : CoordinatesBuilder(POINT_COLUMNS) {
override val supportedFeatures = listOf("Point, MultiPoint")
override val geoJsonConsumer: SimpleFeature.Consumer<LonLat> = defaultConsumer {
onPoint = { p -> coordinates.append(p) }
onMultiPoint = { it.forEach { p -> coordinates.append(p) } }
}
}

class PathCoordinatesBuilder : CoordinatesBuilder(POINT_COLUMNS) {
override val supportedFeatures = listOf("LineString, MultiLineString")
override val geoJsonConsumer: SimpleFeature.Consumer<LonLat> = defaultConsumer {
onLineString = { it.forEach { p -> coordinates.append(p) } }
onMultiLineString = { it.asSequence().flatten().forEach { p -> coordinates.append(p) } }
}
}

class BoundaryCoordinatesBuilder : CoordinatesBuilder(POINT_COLUMNS) {
override val supportedFeatures = listOf("Polygon, MultiPolygon")
override val geoJsonConsumer: SimpleFeature.Consumer<LonLat> = defaultConsumer {
onPolygon = { it.asSequence().flatten().forEach { p -> coordinates.append(p) } }
onMultiPolygon = { it.asSequence().flatten().flatten().forEach { p -> coordinates.append(p) } }
}
}

class BboxCoordinatesBuilder : CoordinatesBuilder(RECT_COLUMNS) {
override val supportedFeatures = listOf("MultiPoint, LineString, MultiLineString, Polygon, MultiPolygon")
override val geoJsonConsumer: SimpleFeature.Consumer<LonLat> = defaultConsumer {
fun insert(bboxes: List<Rect<LonLat>>) =
bboxes
.run(BBOX_CALCULATOR::union)
.run(::convertToGeoRectangle)
.run(GeoRectangle::splitByAntiMeridian)
.forEach{ r -> coordinates.append(r) }

fun insert(bbox: Rect<LonLat>) = insert(listOf(bbox))

onMultiPoint = { insert(it.boundingBox()) }
onLineString = { insert(it.boundingBox()) }
onMultiLineString = { insert(it.flatten().boundingBox()) }
onPolygon = { insert(it.limit()) }
onMultiPolygon = { insert(it.limit()) }
}
}
}


fun Map<*, *>.dataJoinVariable() = getList(MAP_JOIN)?.get(0) as? String
Loading