diff --git a/docs/examples/jupyter-notebooks-dev/livemap_tile_provider_config.ipynb b/docs/examples/jupyter-notebooks-dev/livemap_tile_provider_config.ipynb index 9cb4d5221f6..b5e60168736 100644 --- a/docs/examples/jupyter-notebooks-dev/livemap_tile_provider_config.ipynb +++ b/docs/examples/jupyter-notebooks-dev/livemap_tile_provider_config.ipynb @@ -14,29 +14,9 @@ "cell_type": "code", "execution_count": 2, "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "\n", - " \n", - " \n", - " " - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], + "outputs": [], "source": [ - "LetsPlot.setup_html()" + "LetsPlot.setup_html(isolated_frame=True, offline=False)" ] }, { @@ -47,10 +27,14 @@ { "data": { "text/html": [ - "
\n", + "\n", + " \n", + " \n", + " \n", + " \n", + "
\n", " " + " var plotContainer = document.getElementById(\"lbq8RU\");\n", + " LetsPlot.buildPlotFromProcessedSpecs(plotSpec, -1, -1, plotContainer);\n", + " \n", + " \n", + "" ], "text/plain": [ - "" + "" ] }, "execution_count": 3, @@ -104,10 +88,14 @@ { "data": { "text/html": [ - "
\n", + "\n", + " \n", + " \n", + " \n", + " \n", + "
\n", " " + " var plotContainer = document.getElementById(\"T9Vk0l\");\n", + " LetsPlot.buildPlotFromProcessedSpecs(plotSpec, -1, -1, plotContainer);\n", + " \n", + " \n", + "" ], "text/plain": [ - "" + "" ] }, "execution_count": 4, @@ -163,10 +150,14 @@ { "data": { "text/html": [ - "
\n", + "\n", + " \n", + " \n", + " \n", + " \n", + "
\n", " " + " var plotContainer = document.getElementById(\"FXfWMC\");\n", + " LetsPlot.buildPlotFromProcessedSpecs(plotSpec, -1, -1, plotContainer);\n", + " \n", + " \n", + "" ], "text/plain": [ - "" + "" ] }, "execution_count": 5, @@ -222,10 +212,14 @@ { "data": { "text/html": [ - "
\n", + "\n", + " \n", + " \n", + " \n", + " \n", + "
\n", " " + " var plotContainer = document.getElementById(\"v3Xk18\");\n", + " LetsPlot.buildPlotFromProcessedSpecs(plotSpec, -1, -1, plotContainer);\n", + " \n", + " \n", + "" ], "text/plain": [ - "" + "" ] }, "execution_count": 6, @@ -289,10 +282,14 @@ { "data": { "text/html": [ - "
\n", + "\n", + " \n", + " \n", + " \n", + " \n", + "
\n", " " + " var plotContainer = document.getElementById(\"68kpYp\");\n", + " LetsPlot.buildPlotFromProcessedSpecs(plotSpec, -1, -1, plotContainer);\n", + " \n", + " \n", + "" ], "text/plain": [ - "" + "" ] }, "execution_count": 8, @@ -347,10 +343,14 @@ { "data": { "text/html": [ - "
\n", + "\n", + " \n", + " \n", + " \n", + " \n", + "
\n", " " + " var plotContainer = document.getElementById(\"qBL9lP\");\n", + " LetsPlot.buildPlotFromProcessedSpecs(plotSpec, -1, -1, plotContainer);\n", + " \n", + " \n", + "" ], "text/plain": [ - "" + "" ] }, "execution_count": 9, @@ -406,10 +405,14 @@ { "data": { "text/html": [ - "
\n", + "\n", + " \n", + " \n", + " \n", + " \n", + "
\n", " " + " var plotContainer = document.getElementById(\"YapJIa\");\n", + " LetsPlot.buildPlotFromProcessedSpecs(plotSpec, -1, -1, plotContainer);\n", + " \n", + " \n", + "" ], "text/plain": [ - "" + "" ] }, "execution_count": 10, @@ -453,6 +455,64 @@ "# raster tiles config directly to the livemap while vector tiles are in global settings\n", "ggplot() + geom_livemap(tiles='https://a.tile.openstreetmap.org/{z}/{x}/{y}.png')" ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "\n", + " \n", + " \n", + " \n", + " \n", + "
\n", + " \n", + " \n", + "" + ], + "text/plain": [ + "" + ] + }, + "execution_count": 11, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "ggplot() + geom_livemap(tiles='http://tile.stamen.com/terrain/{z}/{x}/{y}.png')" + ] } ], "metadata": { diff --git a/docs/examples/jupyter-notebooks-dev/map_US_household_income_new.ipynb b/docs/examples/jupyter-notebooks-dev/map_US_household_income_new.ipynb index c3d2897366c..a5af8175076 100644 --- a/docs/examples/jupyter-notebooks-dev/map_US_household_income_new.ipynb +++ b/docs/examples/jupyter-notebooks-dev/map_US_household_income_new.ipynb @@ -1942,7 +1942,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.7.8" + "version": "3.7.6" } }, "nbformat": 4, diff --git a/docs/examples/jupyter-notebooks-dev/map_US_household_income_new_ik.ipynb b/docs/examples/jupyter-notebooks-dev/map_US_household_income_new_ik.ipynb index 9b8226b3c33..dac31ba96dd 100644 --- a/docs/examples/jupyter-notebooks-dev/map_US_household_income_new_ik.ipynb +++ b/docs/examples/jupyter-notebooks-dev/map_US_household_income_new_ik.ipynb @@ -2,13 +2,43 @@ "cells": [ { "cell_type": "code", - "execution_count": null, + "execution_count": 1, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "The geodata is provided by © OpenStreetMap contributors and is made available here under the Open Database License (ODbL).\n" + ] + }, + { + "data": { + "text/html": [ + "\n", + " \n", + " \n", + " " + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], "source": [ "from lets_plot import *\n", "from lets_plot.geo_data import *\n", "\n", + "from lets_plot.settings_utils import geocoding_service\n", + "#LetsPlot.set(geocoding_service(url='http://3.86.228.157:3025'))\n", + "\n", "import pandas as pd\n", "\n", "LetsPlot.setup_html()" @@ -16,9 +46,144 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 2, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
idState_CodeState_NameState_abCountyCityPlaceTypePrimaryZip_CodeArea_CodeALandAWaterLatLonMeanMedianStdevsum_w
010110001AlabamaALMobile CountyChickasawChickasaw cityCityplace366112511089495290915630.771450-88.0796973877330506331011638.260513
110110101AlabamaALBarbour CountyLouisvilleClio cityCityplace36048334260703252325431.708516-85.611039377251952843789258.017685
210110201AlabamaALShelby CountyColumbianaColumbiana cityCityplace350512054483527426103433.191452-86.615618546063193057348926.031000
\n", + "
" + ], + "text/plain": [ + " id State_Code State_Name State_ab County City \\\n", + "0 1011000 1 Alabama AL Mobile County Chickasaw \n", + "1 1011010 1 Alabama AL Barbour County Louisville \n", + "2 1011020 1 Alabama AL Shelby County Columbiana \n", + "\n", + " Place Type Primary Zip_Code Area_Code ALand AWater \\\n", + "0 Chickasaw city City place 36611 251 10894952 909156 \n", + "1 Clio city City place 36048 334 26070325 23254 \n", + "2 Columbiana city City place 35051 205 44835274 261034 \n", + "\n", + " Lat Lon Mean Median Stdev sum_w \n", + "0 30.771450 -88.079697 38773 30506 33101 1638.260513 \n", + "1 31.708516 -85.611039 37725 19528 43789 258.017685 \n", + "2 33.191452 -86.615618 54606 31930 57348 926.031000 " + ] + }, + "execution_count": 2, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "income_all = pd.read_csv('../data/US_household_income_2017.csv', encoding='latin-1')\n", "income_all.head(3)" @@ -26,9 +191,66 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 3, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
State_NameMean
0Alabama53612.925856
1Alaska77670.209524
2Arizona62578.071313
\n", + "
" + ], + "text/plain": [ + " State_Name Mean\n", + "0 Alabama 53612.925856\n", + "1 Alaska 77670.209524\n", + "2 Arizona 62578.071313" + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "income_by_state = income_all.groupby(\"State_Name\", as_index=False)[\"Mean\"].mean()\n", "income_by_state.head(3)" @@ -36,170 +258,1372 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 4, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
State_NameCountyMean
0AlabamaAutauga County53735.557235
1AlabamaBarbour County37725.000000
2AlabamaBlount County55127.000000
\n", + "
" + ], + "text/plain": [ + " State_Name County Mean\n", + "0 Alabama Autauga County 53735.557235\n", + "1 Alabama Barbour County 37725.000000\n", + "2 Alabama Blount County 55127.000000" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ - "# load coordinates of US states in low resolution\n", - "states = regions_state('US-48').boundaries(resolution=4)\n", - "states.head(3)" + "income_by_county = income_all.groupby([\"State_Name\",\"County\"], as_index=False)[\"Mean\"].mean()\n", + "income_by_county.head(3)" ] }, { - "cell_type": "markdown", + "cell_type": "code", + "execution_count": 5, "metadata": {}, + "outputs": [], "source": [ - "#### Blank map" + "us48 = regions_state('us-48').to_data_frame()['found name'].tolist()" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 6, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "1629\n" + ] + } + ], "source": [ - "map_theme = theme(axis_line=\"blank\", axis_text=\"blank\", axis_title=\"blank\", axis_ticks=\"blank\") + ggsize(900, 400)\n", - "ggplot() + geom_map(map=states) + map_theme" + "data = income_by_county\n", + "data = data[data.State_Name.isin(us48)]\n", + "row_count, _ = data.shape\n", + "print(row_count)" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 7, "metadata": {}, "outputs": [], "source": [ - "ggplot(income_by_state) + geom_map(aes(fill=\"Mean\"), map=states, map_join=[\"State_Name\", \"request\"]) + map_theme \\\n", - " + scale_fill_gradient(low=\"#007BCD\", high=\"#FE0968\", name=\"Mean income\")" + "counties = regions_builder2('county', \n", + " names=data[\"County\"].tolist(), \n", + " states=data[\"State_Name\"].tolist())\\\n", + " .drop_not_matched()\\\n", + " .build()" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 8, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
idrequestfound namestate
03697517Autauga CountyAutauga CountyAlabama
13701595Barbour CountyBarbour CountyAlabama
23697523Blount CountyBlount CountyAlabama
33697525Butler CountyButler CountyAlabama
43701599Chambers CountyChambers CountyAlabama
...............
1612578649Platte CountyPlatte CountyWyoming
1613577321Sheridan CountySheridan CountyWyoming
16142822805Sweetwater CountySweetwater CountyWyoming
1615578695Uinta CountyUinta CountyWyoming
1616578671Weston CountyWeston CountyWyoming
\n", + "

1617 rows × 4 columns

\n", + "
" + ], + "text/plain": [ + " id request found name state\n", + "0 3697517 Autauga County Autauga County Alabama\n", + "1 3701595 Barbour County Barbour County Alabama\n", + "2 3697523 Blount County Blount County Alabama\n", + "3 3697525 Butler County Butler County Alabama\n", + "4 3701599 Chambers County Chambers County Alabama\n", + "... ... ... ... ...\n", + "1612 578649 Platte County Platte County Wyoming\n", + "1613 577321 Sheridan County Sheridan County Wyoming\n", + "1614 2822805 Sweetwater County Sweetwater County Wyoming\n", + "1615 578695 Uinta County Uinta County Wyoming\n", + "1616 578671 Weston County Weston County Wyoming\n", + "\n", + "[1617 rows x 4 columns]" + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ - "scale_fill_gradient2?" + "counties.to_data_frame()" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 9, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
requestfound namestategeometry
0Autauga CountyAutauga CountyAlabamaPOINT (-86.65117 32.50771)
1Barbour CountyBarbour CountyAlabamaPOINT (-85.39351 31.88341)
2Blount CountyBlount CountyAlabamaPOINT (-86.53304 34.01333)
3Butler CountyButler CountyAlabamaPOINT (-86.67532 31.73537)
4Chambers CountyChambers CountyAlabamaPOINT (-85.39419 32.92209)
...............
1612Platte CountyPlatte CountyWyomingPOINT (-104.96764 42.12731)
1613Sheridan CountySheridan CountyWyomingPOINT (-106.90375 44.77929)
1614Sweetwater CountySweetwater CountyWyomingPOINT (-108.98868 41.63776)
1615Uinta CountyUinta CountyWyomingPOINT (-110.54782 41.28135)
1616Weston CountyWeston CountyWyomingPOINT (-104.56841 43.84001)
\n", + "

1617 rows × 4 columns

\n", + "
" + ], + "text/plain": [ + " request found name state \\\n", + "0 Autauga County Autauga County Alabama \n", + "1 Barbour County Barbour County Alabama \n", + "2 Blount County Blount County Alabama \n", + "3 Butler County Butler County Alabama \n", + "4 Chambers County Chambers County Alabama \n", + "... ... ... ... \n", + "1612 Platte County Platte County Wyoming \n", + "1613 Sheridan County Sheridan County Wyoming \n", + "1614 Sweetwater County Sweetwater County Wyoming \n", + "1615 Uinta County Uinta County Wyoming \n", + "1616 Weston County Weston County Wyoming \n", + "\n", + " geometry \n", + "0 POINT (-86.65117 32.50771) \n", + "1 POINT (-85.39351 31.88341) \n", + "2 POINT (-86.53304 34.01333) \n", + "3 POINT (-86.67532 31.73537) \n", + "4 POINT (-85.39419 32.92209) \n", + "... ... \n", + "1612 POINT (-104.96764 42.12731) \n", + "1613 POINT (-106.90375 44.77929) \n", + "1614 POINT (-108.98868 41.63776) \n", + "1615 POINT (-110.54782 41.28135) \n", + "1616 POINT (-104.56841 43.84001) \n", + "\n", + "[1617 rows x 4 columns]" + ] + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ - "# Issue: 'request' in the result is empty.\n", - "counties = regions_county(within=\"US-48\").boundaries(resolution=4)\n", - "counties.head(3)" + "centroids=counties.centroids()\n", + "centroids" ] }, { "cell_type": "code", - "execution_count": null, - "metadata": { - "scrolled": false - }, - "outputs": [], + "execution_count": 10, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
requestfound namestategeometryState_NameCountyMean
0Autauga CountyAutauga CountyAlabamaPOINT (-86.65117 32.50771)AlabamaAutauga County53735.557235
1Barbour CountyBarbour CountyAlabamaPOINT (-85.39351 31.88341)AlabamaBarbour County37725.000000
2Blount CountyBlount CountyAlabamaPOINT (-86.53304 34.01333)AlabamaBlount County55127.000000
3Butler CountyButler CountyAlabamaPOINT (-86.67532 31.73537)AlabamaButler County27993.000000
4Chambers CountyChambers CountyAlabamaPOINT (-85.39419 32.92209)AlabamaChambers County45107.000000
........................
1612Platte CountyPlatte CountyWyomingPOINT (-104.96764 42.12731)WyomingPlatte County127999.000000
1613Sheridan CountySheridan CountyWyomingPOINT (-106.90375 44.77929)WyomingSheridan County68733.000000
1614Sweetwater CountySweetwater CountyWyomingPOINT (-108.98868 41.63776)WyomingSweetwater County0.000000
1615Uinta CountyUinta CountyWyomingPOINT (-110.54782 41.28135)WyomingUinta County89130.000000
1616Weston CountyWeston CountyWyomingPOINT (-104.56841 43.84001)WyomingWeston County69215.000000
\n", + "

1617 rows × 7 columns

\n", + "
" + ], + "text/plain": [ + " request found name state \\\n", + "0 Autauga County Autauga County Alabama \n", + "1 Barbour County Barbour County Alabama \n", + "2 Blount County Blount County Alabama \n", + "3 Butler County Butler County Alabama \n", + "4 Chambers County Chambers County Alabama \n", + "... ... ... ... \n", + "1612 Platte County Platte County Wyoming \n", + "1613 Sheridan County Sheridan County Wyoming \n", + "1614 Sweetwater County Sweetwater County Wyoming \n", + "1615 Uinta County Uinta County Wyoming \n", + "1616 Weston County Weston County Wyoming \n", + "\n", + " geometry State_Name County Mean \n", + "0 POINT (-86.65117 32.50771) Alabama Autauga County 53735.557235 \n", + "1 POINT (-85.39351 31.88341) Alabama Barbour County 37725.000000 \n", + "2 POINT (-86.53304 34.01333) Alabama Blount County 55127.000000 \n", + "3 POINT (-86.67532 31.73537) Alabama Butler County 27993.000000 \n", + "4 POINT (-85.39419 32.92209) Alabama Chambers County 45107.000000 \n", + "... ... ... ... ... \n", + "1612 POINT (-104.96764 42.12731) Wyoming Platte County 127999.000000 \n", + "1613 POINT (-106.90375 44.77929) Wyoming Sheridan County 68733.000000 \n", + "1614 POINT (-108.98868 41.63776) Wyoming Sweetwater County 0.000000 \n", + "1615 POINT (-110.54782 41.28135) Wyoming Uinta County 89130.000000 \n", + "1616 POINT (-104.56841 43.84001) Wyoming Weston County 69215.000000 \n", + "\n", + "[1617 rows x 7 columns]" + ] + }, + "execution_count": 10, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ - "ggplot() + geom_map(map=counties) + map_theme" + "# map_join is lacking multi-key support, so we use pandas.merge\n", + "data_with_geometry = centroids.merge(data, left_on=['request', 'state'], right_on=['County', 'State_Name'])\n", + "data_with_geometry" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 11, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + " " + ], + "text/plain": [ + "" + ] + }, + "execution_count": 11, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ - "income_by_county = income_all.groupby([\"State_Name\",\"County\"], as_index=False)[\"Mean\"].mean()\n", - "income_by_county.head(3)" + "ggplot() + geom_point(aes(color='Mean'), data_with_geometry)" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 12, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
requestfound namestategeometry
0Autauga CountyAutauga CountyAlabamaMULTIPOLYGON (((-86.83594 32.39852, -86.83594 ...
1Barbour CountyBarbour CountyAlabamaMULTIPOLYGON (((-85.78125 31.65338, -85.60547 ...
2Blount CountyBlount CountyAlabamaMULTIPOLYGON (((-86.48438 34.16182, -86.30859 ...
3Butler CountyButler CountyAlabamaMULTIPOLYGON (((-86.83594 31.95216, -86.83594 ...
4Chambers CountyChambers CountyAlabamaMULTIPOLYGON (((-85.07812 32.84267, -85.07812 ...
...............
1612Platte CountyPlatte CountyWyomingMULTIPOLYGON (((-105.29297 42.55308, -105.2929...
1613Sheridan CountySheridan CountyWyomingMULTIPOLYGON (((-107.92969 44.96480, -107.9296...
1614Sweetwater CountySweetwater CountyWyomingMULTIPOLYGON (((-110.03906 42.03297, -110.0390...
1615Uinta CountyUinta CountyWyomingMULTIPOLYGON (((-111.09375 41.24477, -111.0937...
1616Weston CountyWeston CountyWyomingMULTIPOLYGON (((-105.11719 43.58039, -105.1171...
\n", + "

1617 rows × 4 columns

\n", + "
" + ], + "text/plain": [ + " request found name state \\\n", + "0 Autauga County Autauga County Alabama \n", + "1 Barbour County Barbour County Alabama \n", + "2 Blount County Blount County Alabama \n", + "3 Butler County Butler County Alabama \n", + "4 Chambers County Chambers County Alabama \n", + "... ... ... ... \n", + "1612 Platte County Platte County Wyoming \n", + "1613 Sheridan County Sheridan County Wyoming \n", + "1614 Sweetwater County Sweetwater County Wyoming \n", + "1615 Uinta County Uinta County Wyoming \n", + "1616 Weston County Weston County Wyoming \n", + "\n", + " geometry \n", + "0 MULTIPOLYGON (((-86.83594 32.39852, -86.83594 ... \n", + "1 MULTIPOLYGON (((-85.78125 31.65338, -85.60547 ... \n", + "2 MULTIPOLYGON (((-86.48438 34.16182, -86.30859 ... \n", + "3 MULTIPOLYGON (((-86.83594 31.95216, -86.83594 ... \n", + "4 MULTIPOLYGON (((-85.07812 32.84267, -85.07812 ... \n", + "... ... \n", + "1612 MULTIPOLYGON (((-105.29297 42.55308, -105.2929... \n", + "1613 MULTIPOLYGON (((-107.92969 44.96480, -107.9296... \n", + "1614 MULTIPOLYGON (((-110.03906 42.03297, -110.0390... \n", + "1615 MULTIPOLYGON (((-111.09375 41.24477, -111.0937... \n", + "1616 MULTIPOLYGON (((-105.11719 43.58039, -105.1171... \n", + "\n", + "[1617 rows x 4 columns]" + ] + }, + "execution_count": 12, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ - "# Issue: 'Internal error', message uninformative\n", - "ggplot(income_by_county) + geom_map(aes(fill=\"Mean\"), map=counties, map_join=[\"County\", \"request\"]) + map_theme \\\n", - " + scale_fill_gradient(low=\"#007BCD\", high=\"#FE0968\", name=\"Mean income\", na_value=\"white\")" + "boundaries=counties.boundaries()\n", + "boundaries" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 13, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
requestfound namestategeometryState_NameCountyMean
0Autauga CountyAutauga CountyAlabamaMULTIPOLYGON (((-86.83594 32.39852, -86.83594 ...AlabamaAutauga County53735.557235
1Barbour CountyBarbour CountyAlabamaMULTIPOLYGON (((-85.78125 31.65338, -85.60547 ...AlabamaBarbour County37725.000000
2Blount CountyBlount CountyAlabamaMULTIPOLYGON (((-86.48438 34.16182, -86.30859 ...AlabamaBlount County55127.000000
3Butler CountyButler CountyAlabamaMULTIPOLYGON (((-86.83594 31.95216, -86.83594 ...AlabamaButler County27993.000000
4Chambers CountyChambers CountyAlabamaMULTIPOLYGON (((-85.07812 32.84267, -85.07812 ...AlabamaChambers County45107.000000
........................
1612Platte CountyPlatte CountyWyomingMULTIPOLYGON (((-105.29297 42.55308, -105.2929...WyomingPlatte County127999.000000
1613Sheridan CountySheridan CountyWyomingMULTIPOLYGON (((-107.92969 44.96480, -107.9296...WyomingSheridan County68733.000000
1614Sweetwater CountySweetwater CountyWyomingMULTIPOLYGON (((-110.03906 42.03297, -110.0390...WyomingSweetwater County0.000000
1615Uinta CountyUinta CountyWyomingMULTIPOLYGON (((-111.09375 41.24477, -111.0937...WyomingUinta County89130.000000
1616Weston CountyWeston CountyWyomingMULTIPOLYGON (((-105.11719 43.58039, -105.1171...WyomingWeston County69215.000000
\n", + "

1617 rows × 7 columns

\n", + "
" + ], + "text/plain": [ + " request found name state \\\n", + "0 Autauga County Autauga County Alabama \n", + "1 Barbour County Barbour County Alabama \n", + "2 Blount County Blount County Alabama \n", + "3 Butler County Butler County Alabama \n", + "4 Chambers County Chambers County Alabama \n", + "... ... ... ... \n", + "1612 Platte County Platte County Wyoming \n", + "1613 Sheridan County Sheridan County Wyoming \n", + "1614 Sweetwater County Sweetwater County Wyoming \n", + "1615 Uinta County Uinta County Wyoming \n", + "1616 Weston County Weston County Wyoming \n", + "\n", + " geometry State_Name \\\n", + "0 MULTIPOLYGON (((-86.83594 32.39852, -86.83594 ... Alabama \n", + "1 MULTIPOLYGON (((-85.78125 31.65338, -85.60547 ... Alabama \n", + "2 MULTIPOLYGON (((-86.48438 34.16182, -86.30859 ... Alabama \n", + "3 MULTIPOLYGON (((-86.83594 31.95216, -86.83594 ... Alabama \n", + "4 MULTIPOLYGON (((-85.07812 32.84267, -85.07812 ... Alabama \n", + "... ... ... \n", + "1612 MULTIPOLYGON (((-105.29297 42.55308, -105.2929... Wyoming \n", + "1613 MULTIPOLYGON (((-107.92969 44.96480, -107.9296... Wyoming \n", + "1614 MULTIPOLYGON (((-110.03906 42.03297, -110.0390... Wyoming \n", + "1615 MULTIPOLYGON (((-111.09375 41.24477, -111.0937... Wyoming \n", + "1616 MULTIPOLYGON (((-105.11719 43.58039, -105.1171... Wyoming \n", + "\n", + " County Mean \n", + "0 Autauga County 53735.557235 \n", + "1 Barbour County 37725.000000 \n", + "2 Blount County 55127.000000 \n", + "3 Butler County 27993.000000 \n", + "4 Chambers County 45107.000000 \n", + "... ... ... \n", + "1612 Platte County 127999.000000 \n", + "1613 Sheridan County 68733.000000 \n", + "1614 Sweetwater County 0.000000 \n", + "1615 Uinta County 89130.000000 \n", + "1616 Weston County 69215.000000 \n", + "\n", + "[1617 rows x 7 columns]" + ] + }, + "execution_count": 13, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ - "# Issue: 'map_join' can't join tables just by county name, without state name. \n", - "ggplot(income_by_county) + geom_map(aes(fill=\"Mean\"), map=counties, map_join=[\"County\", \"found name\"]) + map_theme \\\n", - " + scale_fill_gradient(low=\"#007BCD\", high=\"#FE0968\", name=\"Mean income\", na_value=\"white\")" + "# map_join is lacking multi-key support, so we use pandas.merge\n", + "data_with_boundaries = boundaries.merge(data, left_on=['request', 'state'], right_on=['County', 'State_Name'])\n", + "data_with_boundaries\n" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 14, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + " " + ], + "text/plain": [ + "" + ] + }, + "execution_count": 14, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "map_theme = theme(axis_line=\"blank\", axis_text=\"blank\", axis_title=\"blank\", axis_ticks=\"blank\") + ggsize(900, 400)\n", + "ggplot() + geom_map(aes(fill='Mean'), data_with_boundaries) + scale_fill_gradient(low=\"#007BCD\", high=\"#FE0968\", name=\"Mean income\") + map_theme" + ] + }, + { + "cell_type": "markdown", "metadata": {}, - "outputs": [], "source": [ - "# Issue: batch query takes a lot of time and results with an error. The message is misleading:\n", - "# ValueError: Error: Service is down for maintenance\n", - "#regions_county(income_by_county[\"County\"].tolist(), within=income_by_county[\"State_Name\"].tolist())" + "Issues" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 15, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + " id request found name state country\n", + "0 3676279 Wayne County Wayne County New York usa\n", + "1 5057345 Anson County Anson County North Carolina usa" + ] + }, + "execution_count": 15, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ - "# Geocode USA only once\n", - "usa = regions_country('usa')" + "# drop_not_found breaks parents - these columns are missing\n", + "regions_builder2('county', \n", + " names=['Wayne County', 'Not existing County', 'Anson County'],\n", + " states=['New York', 'New York', 'North Carolina'],\n", + " countries=['usa', 'usa', 'usa'])\\\n", + " .drop_not_found()\\\n", + " .build()" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 16, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + " id request found name state country\n", + "0 3676279 Wayne County Wayne County New York usa\n", + "1 5068353 Essex County Essex County Virginia usa" + ] + }, + "execution_count": 16, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ - "def display_progress(i, n):\n", - " def display(s):\n", - " from IPython.display import display, clear_output, HTML\n", - " if s:\n", - " clear_output(wait=True)\n", - " display(HTML(\"{}\".format(s)))\n", - " else:\n", - " clear_output()\n", - " \n", - " if i != n:\n", - " display('Geocoding progress: {}%'.format(round(i / n * 100, 1)))\n", - " if i == n:\n", - " display(None)" + "# issue with parents geocoding - unexpected ranking behaviour results in broken responses.\n", + "# When mulitply object found by one request ambiguous response is generated without use of ranking by weight. \n", + "# Ambiguous response is also borken - it returns success response with first namesake object ¯\\_(ツ)_/¯\n", + "regions_builder2('county', \n", + " names=['Wayne County', 'Essex County'],\n", + " states=['New York', 'Virginia'],\n", + " countries=['usa', 'usa'])\\\n", + " .build()" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 17, "metadata": {}, - "outputs": [], + "outputs": [ + { + "ename": "ValueError", + "evalue": "Countries count(1) != names count(2)", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mValueError\u001b[0m Traceback (most recent call last)", + "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m\u001b[0m\n\u001b[1;32m 3\u001b[0m \u001b[0mnames\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'Wayne County'\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m'Essex County'\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 4\u001b[0m \u001b[0mstates\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'New York'\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m'Virginia'\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 5\u001b[0;31m countries=['usa'])\\\n\u001b[0m\u001b[1;32m 6\u001b[0m \u001b[0;34m.\u001b[0m\u001b[0mbuild\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m~/miniconda3/lib/python3.7/site-packages/lets_plot/geo_data/new_api.py\u001b[0m in \u001b[0;36mregions_builder2\u001b[0;34m(level, names, scope, countries, states, counties, highlights)\u001b[0m\n\u001b[1;32m 88\u001b[0m \u001b[0mcounties\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mcounties\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 89\u001b[0m \u001b[0mnew_api\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mTrue\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 90\u001b[0;31m new_scope=new_scope)\n\u001b[0m", + "\u001b[0;32m~/miniconda3/lib/python3.7/site-packages/lets_plot/geo_data/regions_builder.py\u001b[0m in \u001b[0;36m__init__\u001b[0;34m(self, level, request, scope, highlights, allow_ambiguous, countries, states, counties, new_api, new_scope)\u001b[0m\n\u001b[1;32m 178\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mnew_api\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 179\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_scope\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0mList\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mMapRegion\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mnew_scope\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 180\u001b[0;31m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_queries\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0mList\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mRegionQuery\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0m_create_new_queries\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mrequest\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_default_ambiguity_resolver\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mcountries\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mstates\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mcounties\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 181\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 182\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_scope\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0mList\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mMapRegion\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m[\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m~/miniconda3/lib/python3.7/site-packages/lets_plot/geo_data/regions_builder.py\u001b[0m in \u001b[0;36m_create_new_queries\u001b[0;34m(request, ambiguity_resovler, countries, states, counties)\u001b[0m\n\u001b[1;32m 77\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 78\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mcountries\u001b[0m \u001b[0;32mis\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0;32mNone\u001b[0m \u001b[0;32mand\u001b[0m \u001b[0mlen\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mcountries\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;34m!=\u001b[0m \u001b[0mlen\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mrequests\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 79\u001b[0;31m \u001b[0;32mraise\u001b[0m \u001b[0mValueError\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m'Countries count({}) != names count({})'\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mformat\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mlen\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mcountries\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mlen\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mrequests\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 80\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 81\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mstates\u001b[0m \u001b[0;32mis\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0;32mNone\u001b[0m \u001b[0;32mand\u001b[0m \u001b[0mlen\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mstates\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;34m!=\u001b[0m \u001b[0mlen\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mrequests\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;31mValueError\u001b[0m: Countries count(1) != names count(2)" + ] + } + ], "source": [ - "# Search states once for faster counties geocoding. \n", - "# States are duplicated but geocoding at level 'state' is pretty fast - dedup gives only 2x speed-up.\n", - "states = regions_builder('state', income_by_county[\"State_Name\"].tolist(), within=usa)\\\n", - " .chunk_request(display_progress) \\\n", + "# not informative error message\n", + "regions_builder2('county', \n", + " names=['Wayne County', 'Essex County'],\n", + " states=['New York', 'Virginia'],\n", + " countries=['usa'])\\\n", " .build()" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 19, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
idrequestfound namestate
03697517Autauga CountyAutauga CountyAlabama
13701595Barbour CountyBarbour CountyAlabama
23697523Blount CountyBlount CountyAlabama
33697525Butler CountyButler CountyAlabama
43701599Chambers CountyChambers CountyAlabama
...............
1612578649Platte CountyPlatte CountyWyoming
1613577321Sheridan CountySheridan CountyWyoming
16142822805Sweetwater CountySweetwater CountyWyoming
1615578695Uinta CountyUinta CountyWyoming
1616578671Weston CountyWeston CountyWyoming
\n", + "

1617 rows × 4 columns

\n", + "
" + ], + "text/plain": [ + " id request found name state\n", + "0 3697517 Autauga County Autauga County Alabama\n", + "1 3701595 Barbour County Barbour County Alabama\n", + "2 3697523 Blount County Blount County Alabama\n", + "3 3697525 Butler County Butler County Alabama\n", + "4 3701599 Chambers County Chambers County Alabama\n", + "... ... ... ... ...\n", + "1612 578649 Platte County Platte County Wyoming\n", + "1613 577321 Sheridan County Sheridan County Wyoming\n", + "1614 2822805 Sweetwater County Sweetwater County Wyoming\n", + "1615 578695 Uinta County Uinta County Wyoming\n", + "1616 578671 Weston County Weston County Wyoming\n", + "\n", + "[1617 rows x 4 columns]" + ] + }, + "execution_count": 19, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ - "# Geocode counties with already geocoded states for better performance.\n", - "counties = regions_builder('county', income_by_county[\"County\"].tolist(), within=states)\\\n", - " .chunk_request(display_progress) \\\n", - " .build()" + "# regions in parent is not yet supported\n", + "state_regions = regions_builder2('state', names=data[\"State_Name\"].tolist(), countries=['uSa'] * row_count).build()\n", + "counties_via_regions = regions_builder2('county', \n", + " names=data[\"County\"].tolist(), \n", + " states=state_regions)\\\n", + " .drop_not_matched()\\\n", + " .build()\n", + "counties_via_regions.to_data_frame()" + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + " id request found name\n", + "0 3270329 florida Florida" + ] + }, + "execution_count": 20, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "regions_builder2('state', names=['florida'], scope='Uruguay').build()" + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + " id request found name country\n", + "0 324101 florida Florida usa\n", + "1 3270329 florida Florida Uruguay" + ] + }, + "execution_count": 21, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "regions_builder2('state', names=['florida', 'florida'], countries=['usa', 'Uruguay']).build()" ] } ], diff --git a/docs/geocoding.md b/docs/geocoding.md index 77764dce46d..629fe28345b 100644 --- a/docs/geocoding.md +++ b/docs/geocoding.md @@ -11,60 +11,69 @@ Geocoding is the process of converting names of places into geographic coordinat *Lets-Plot* geocoding API allows a user to execute a single and batch geocoding queries, and handle possible names ambiguity. -Relatively simple geocoding queries are executed using the `regions_xxx()` functions family. For example: +The core class is `Geocoder`. There is a function's family for constsructing the `Geocoder` object - `geocode_cities()`, `geocode_counties()`, `geocode_states()`, `geocode_countries()` and `geocode()`. For example: ```python from lets_plot.geo_data import * -regions_country(['usa', 'canada']) +countries = geocode_countries(['usa', 'canada']) ``` -returns the `Regions` object containing internal IDs for Canada and the US: +Notice that actual geocoding process is not happening here, it starts when any `get_xxx()` function get called. For this document I will use function `get_geocodes()` that returns `DataFrame` with metadata. It is usefull for testing. + +Lets geocode countries: +```python +countries.get_geocodes() +``` +returns the `DataFrame` object containing internal IDs for Canada and the US: ``` - request id found name -0 usa 297677 United States of America -1 canada 2856251 Canada + |id |request |found name +---------------------------------- +0 |297677 |usa |United States +1 |2856251 |canada |Canada ``` -More complex geocoding queries can be created with the help of the `regions_builder()` function that -returns the `RegionsBuilder` object and allows chaining its various methods in order to specify +More complex geocoding queries can be created with the help of the `Geocoder` object by chaining its various methods in order to specify how to handle geocoding ambiguities. For example: ```python -regions_builder(request='warwick', level='city') \ +geocode_cities('warwick') \ .allow_ambiguous() \ - .build() + .get_geocodes() ``` -This sample returns the `Regions` object containing IDs of all cities matching "warwick": -``` - request id found name -0 warwick 785807 Warwick -1 warwick 363189 Warwick -2 warwick 352173 Warwick -3 warwick 15994531 Warwick -4 warwick 368499 Warwick -5 warwick 239553 Warwick -6 warwick 352897 Warwick -7 warwick 3679247 Warwick -8 warwick 8144841 Warwick -9 warwick 382429 West Warwick -10 warwick 7042961 Warwick Township -11 warwick 6098747 Warwick Township -12 warwick 15994533 Sainte-Élizabeth-de-Warwick +This sample returns the `DataFrame` object containing IDs of all cities matching "warwick": +``` + + |id |request |found name +---------------------------------------------------- +0 |239553 |warwick |Warwick +1 |352173 |warwick |Warwick +2 |352897 |warwick |Warwick +3 |363189 |warwick |Warwick +4 |368499 |warwick |Warwick +5 |785807 |warwick |Warwick +6 |3679247 |warwick |Warwick +7 |8144841 |warwick |Warwick +8 |15994531 |warwick |Warwick +9 |382429 |warwick |West Warwick +10 |6098747 |warwick |Warwick Township +11 |7042961 |warwick |Warwick Township +12 |18489127 |warwick |Warwick Mountain +13 |15994533 |warwick |Sainte-Élizabeth-de-Warwick ``` ```python -boston_us = regions(request='boston', within='us') -regions_builder(request='warwick', level='city') \ - .where('warwick', near=boston_us) \ - .build() +boston_us = geocode_cities('boston').scope('us') +geocode_cities('warwick') \ + .where('warwick', closest_to=boston_us) \ + .get_geocodes() ``` -This example returns the `Regions` object containing the ID of one particular "warwick" near Boston (US): +This example returns the `DataFrame` object containing the ID of one particular "warwick" closest to Boston (US): ``` - request id found name -0 warwick 785807 Warwick + |id |request |found name +------------------------------ +0 |785807 |warwick |Warwick ``` -Once the `Regions` object is available, it can be passed to any *Lets-Plot* geom +Once the `Geocoder` object is available, it can be passed to any *Lets-Plot* geom supporting the `map` parameter. -If necessary, the `Regions` object can be transformed into a regular pandas `DataFrame` using `to_data_frame()` method -or to a geopandas `GeoDataFrame` using one of `centroids()`, `boundaries()`, or `limits()` methods. +If necessary, the `Geocoder` object can be transformed to a geopandas `GeoDataFrame` using one of `get_centroids()`, `get_boundaries()`, or `get_limits()` methods. All coordinates are in the EPSG:4326 coordinate reference system (CRS). @@ -96,4 +105,288 @@ Examples:
-Couldn't load map_airports.png \ No newline at end of file +Couldn't load map_airports.png + +## Reference + +#### Levels +Geocoding supports 4 administrative levels: +- city +- county +- state +- country + + +Function `geocode()` with `level=None` can try to detect level automatically - it enumerates all levels from country to city and selects best matching level (result without ambiguity and unknown names). For example: +```python +geocode(names=['florida', 'tx']).get_geocodes() +``` + +``` + |id |request |found name +------------------------------ +0 |324101 |florida |Florida +1 |229381 |tx |Texas +``` +While it is usefull it works slower and is not recomended to use on large data sets. + + +Functions `geocode_cities()`, `geocode_counties()`, `geocode_states()`, `geocode_countries()` or `geocode(level=xxx)` search names only at a given level or return an error. +```python +geocode_states(['florida', 'tx']).get_geocodes() +``` + + + +#### Parents +`Geocoder` class provides functions for defining parents with giving administrative level - `counties()`, `states()`, `countries()`. Functions can handle single or miltiply values of types string or `Geocoder`. Number of values must match number of names in `Geocoder` so they form a table, i.e. every name associated by an index with coresponding parent. Parents will be present in result `DataFrame` to make it possible to join data and geometry via `map_join`. + +```python +geocode_cities(['warwick', 'worcester'])\ + .counties(['Worth County', 'worcester county'])\ + .states(['georgia', 'massachusetts'])\ + .get_geocodes() +``` +``` + |id | request |found name |county |state +-------------------------------------------------------------- +0 |239553 | warwick |Warwick |Worth County |georgia +1 |3688419 | worcester |Worcester |worcester county |massachusetts +``` + +Parents can contain `None` values, e.g., countries having different administrative division: +```python +geocode_cities(['warwick', 'worcester'])\ + .states(['Georgia', None])\ + .countries(['USA', 'United Kingdom'])\ + .get_geocodes() +``` +``` + + |id |request |found name |state |country +-------------------------------------------------------------- +0 |239553 |warwick |Warwick |Georgia |USA +1 |3750683 |worcester |Worcester |None |United Kingdom +``` + +Parent can be `Geocoder` object. This allows resolving parent's ambiguity: +```python + +s = geocode_states(['vermont', 'georgia']).scope('usa') +geocode_cities(['worcester', 'warwick']).states(s).get_geocodes() +``` +``` + |id |request |found name |state +------------------------------------------- +0 |17796275 |worcester |Worcester |vermont +1 |239553 |warwick |Warwick |georgia +``` + +##### Scope +`scope()` is a special kind of parent. `scope()` can handle a `string` or a single entry `Geocoder` object. `scope()` is not associated with any administrative level, it acts as parent for any other parents (or names if no other parents set). `scope()` can't be used with `countries()` - countries don't have parents. Typical use-case is when all names belong to the same parent - you don't need to generate list with required length to pass it as a parent, just use the `scope()` with single value. + +```python +geocode_counties(['Dakota County', 'Nevada County']).states(['NE', 'AR']).scope('USA').get_geocodes() +``` +``` + |id |request |found name |state +------------------------------------------------ +0 |2850895 |Dakota County |Dakota County |NE +1 |3653651 |Nevada County |Nevada County |AR +``` + +Parents can be modified between searches: + +```python +florida = geocode_states('florida') + +display(florida.countries('usa').get_geocodes()) +display(florida.countries('uruguay').get_geocodes()) +display(florida.countries(None).get_geocodes()) +``` + +``` +id |request |found name |country +------------------------------------ +324101 |florida |Florida |usa + +id |request |found name |country +------------------------------------ +3270329|florida |Florida |uruguay + +id |request |found name +--------------------------- +324101 |florida |Florida +``` +##### Fetch all + +It is possible to fetch all objects within parent - just don't set the `names` parameter. + +```python +geocode_counties().states('massachusetts').get_geocodes() +``` + +``` + |id |request |found name |state +------------------------------------------------------------- +0 |2363239 |Hampden County |Hampden County |massachusetts +1 |122643 |Berkshire County |Berkshire County |massachusetts +2 |180869 |Essex County |Essex County |massachusetts +3 |3677609 |Hampshire County |Hampshire County |massachusetts +4 |3677611 |Worcester County |Worcester County |massachusetts +... +``` + +##### US-48 (CONUS) +Geocoding supports a special name - `us-48` also known as CONUS. This name can be used as name or parent. +```python +geocode_states('us-48').get_geocodes() +``` +``` + |id |request |found name +--------------------------------------- +0 |121519 |Vermont |Vermont +1 |122631 |Massachusetts |Massachusetts +2 |122641 |New York |New York +3 |127025 |Maine |Maine +4 |134427 |New Hampshire |New Hampshire +... +``` + +#### Ambiguity +Often geocoding can find multiply objects for a name or don't find anything. in this case error will be generated: + ```python +geocode_cities(['warwick', 'worcester']).get_geocodes() +``` +``` +Multiple objects (14) were found for warwick: + +- Warwick (United States, Georgia, Worth County) +- Warwick (United States, New York, Orange County) +- Warwick (United Kingdom, England, West Midlands, Warwickshire) +- Warwick (United States, North Dakota, Benson County) +- Warwick (United States, Oklahoma, Lincoln County) +- Warwick (United States, Rhode Island, Kent County) +- Warwick (United States, Massachusetts, Franklin County) +- Warwick (Canada, Ontario, Southwestern Ontario, Lambton County) +- Warwick (Canada, Québec, Centre-du-Québec, Arthabaska) +- West Warwick (United States, Rhode Island, Kent County) Multiple objects (4) were found for worcester: +- Worcester (United States, Massachusetts, Worcester County) +- Worcester (United Kingdom, England, West Midlands, Worcestershire) +- Worcester (United States, Vermont, Washington County) +- Worcester Township (United States, Pennsylvania, Montgomery County) +``` + +The ambiguity can be resolved in different ways. + +##### `allow_ambiguous()` + +The best way is to find an object that we search and use its parents. Function `allow_ambiguous()` converts error result into success result that can be rendered on a map or verified manually in other way. + +```python +geocode_cities(['warwick', 'worcester']).allow_ambiguous().get_geocodes() +``` +``` + |id |request |found name +------------------------------ +0 |239553 |warwick |Warwick +1 |352173 |warwick |Warwick +2 |352897 |warwick |Warwick +3 |363189 |warwick |Warwick +4 |368499 |warwick |Warwick +``` + +##### `sksip_missing()` +The function `drop_not_found()` removes unknown names from result. +```python +geocode_cities(['paris', 'foo']).drop_not_found().get_geocodes() +``` + +``` + |id |request |found name +----------------------------- +0 |14889 |paris |Paris +``` + +##### `drop_not_matched()` +If request contains both unknown and ambiguous names then `drop_not_matched()` function can be used to remove them all from result. +```python +geocode_cities(['paris', 'worcester', 'foo']).drop_not_matched().get_geocodes() +``` +``` + |id |request |found name +----------------------------- +0 |14889 |paris |Paris +``` + +##### `where()` +For resolving an ambiguity geocoding provides a function that can configure names individually. +To configure a name the function `where(...)` should be called with the place name and all given parent names. Parents can't be changed via `where()` function call. If name and parents don't match with ones from the `where()` function an error will be generated. This is importnant for cases like this: +```python +geocode_counties(['Washington', 'Washington']).states(['oregon', 'utah']).get_geocodes() +``` +``` + |id |request |found name |state +------------------------------------------------- +0 |3674267 |Washington |Washington County |oregon +1 |3488745 |Washington |Washington County |utah +``` + +With parameter `closest_to` geocoding will take the only object that is closest to it. Parameter can be a single value `Geocoder`. +```python +boston = geocode_cities('boston') +geocode_cities('worcester').where('worcester', closest_to=boston).get_geocodes() +``` + +``` + |id |request |found name +--------------------------------- +0 |3688419 |worcester |Worcester +``` +Or parameter can be a `shapely.geometry.Point`. +```python +geocode_cities('worcester').where('worcester', closest_to=shapely.geometry.Point(-71.088, 42.311)).get_geocodes() +``` +``` + |id |request |found name +--------------------------------- +0 |3688419 |worcester |Worcester +``` + +With parameter `scope` a `shapely.geometry.Polygon` can be used for limiting an area of the search (coordinates should be in WGS84 cordinate system). Notice that bbox of the polygon will be used: +```python +geocode_cities('worcester')\ + .where('worcester', scope=shapely.geometry.box(-71.00, 42.00, -72.00, 43.00))\ + .get_geocodes() +``` +``` + |id |request |found name +--------------------------------- +0 |3688419 |worcester |Worcester +``` + +Also, `scope` can be a single value `Geocoder` object or a `string`: +```python +massachusetts = geocode_states('massachusetts') +geocode_cities('worcester').where('worcester', scope=massachusetts).get_geocodes() +``` + +`scope` doesn't change parents in a result `DataFrame`: +```python +worcester_county=geocode_counties('Worcester County').states('massachusetts').countries('usa') + +geocode_cities(['worcester', 'worcester'])\ + .countries(['USA', 'United Kingdom'])\ + .where('worcester', country='USA', scope=worcester_county)\ + .get_geocodes() +``` + +``` + |id |request |found name |country +------------------------------------------------- +0 |3688419 |worcester |Worcester |USA +1 |3750683 |worcester |Worcester |United Kingdom +``` + +## `map_join` +WIP \ No newline at end of file diff --git a/livemap-demo/src/commonMain/kotlin/jetbrains/livemap/plotDemo/LiveMap.kt b/livemap-demo/src/commonMain/kotlin/jetbrains/livemap/plotDemo/LiveMap.kt index 4b0a693a216..48246b0fb0e 100644 --- a/livemap-demo/src/commonMain/kotlin/jetbrains/livemap/plotDemo/LiveMap.kt +++ b/livemap-demo/src/commonMain/kotlin/jetbrains/livemap/plotDemo/LiveMap.kt @@ -12,8 +12,11 @@ import kotlin.random.Random class LiveMap : PlotConfigDemoBase() { fun plotSpecList(): List> { return listOf( - multiLayerTooltips(), - mapJoinBar() + barWithNanValuesInData(), + //pieWithNullValuesInData(), + //barWithNullValuesInData() +// multiLayerTooltips(), +// mapJoinBar() // antiMeridian() // tooltips() // symbol_point(), @@ -25,6 +28,216 @@ class LiveMap : PlotConfigDemoBase() { ) } + private fun pieWithNullValuesInData(): Map { + val spec = """ + { + "kind": "plot", + "layers": [ + { + "geom": "livemap", + "data": { + "States": [ + "Alabama", "Alabama", "Alabama", + "Alaska", "Alaska", "Alaska", + "Arizona", "Arizona", "Arizona", + "Arkansas", "Arkansas", "Arkansas" + ], + "Item": [ + "State Debt", "Local Debt", "Gross State Product", + "State Debt", "Local Debt", "Gross State Product", + "State Debt", "Local Debt", "Gross State Product", + "State Debt", "Local Debt", "Gross State Product" + ], + "Values": [ + 10.7, 26.1, 228.0, + 5.9, 3.5, 55.7, + 34.9, 23.5, 355.7, + 13.3, 30.5, 361.1 + ] + }, + "mapping": { + "sym_y": "Values", + "fill": "Item" + }, + "map_data_meta": { + "geodataframe": { + "geometry": "geometry" + } + }, + "map": { + "request": ["Alabama", "California", "Alaska", "Arizona", "Nevada"], + "found name": ["Alabama", "California", "Alaska", "Arizona", "Nevada"], + "geometry": [ + "{\"type\": \"Point\", \"coordinates\": [-86.7421099329499, 32.6446247845888]}", + "{\"type\": \"Point\", \"coordinates\": [-119.994112927034, 37.277335524559]}", + "{\"type\": \"Point\", \"coordinates\": [-152.012666774028, 63.0759818851948]}", + "{\"type\": \"Point\", \"coordinates\": [-111.665190827228, 34.1682100296021]}", + "{\"type\": \"Point\", \"coordinates\": [-116.666956541192, 38.5030842572451]}" + ] + }, + "map_join": [ + ["States"], + ["request"] + ], + "display_mode": "pie", + "tiles": { + "kind": "vector_lets_plot", + "url": "wss://tiles.datalore.jetbrains.com", + "theme": "color", + "attribution": "Map: \u00a9 Lets-Plot, map data: \u00a9 OpenStreetMap contributors." + }, + "geocoding": { + "url": "http://172.31.52.145:3025" + }, + "map_join": ["States", "state"] + } + ] + } + """.trimIndent() + + return parsePlotSpec(spec) + } + + private fun pieWithNanValuesInData(): Map { + val spec = """{ + "kind": "plot", + "layers": [ + { + "geom": "livemap", + "data": { + "x": [0, 0, 0, 10, 10, 10, 20, 20, 20], + "y": [0, 0, 0, 10, 10, 10, 20, 20, 20], + "z": [1, 2, 4, 44, null, 30, 123, 543, 231], + "c": ['A', 'B', 'C', 'A', 'B', 'C', 'A', 'B', 'C'] + }, + "mapping": { + "x": "x", + "y": "y", + "sym_y": "z", + "fill": "c" + }, + "display_mode": "pie", + "tiles": { + "kind": "vector_lets_plot", + "url": "ws://10.0.0.127:3933", + "theme": null, + "attribution": "Map: \u00a9 Lets-Plot, map data: \u00a9 OpenStreetMap contributors." + }, + "geocoding": { + "url": "http://localhost:3020" + } + } + ] +}""".trimIndent() + + return parsePlotSpec(spec) + } + + private fun barWithNanValuesInData(): Map { + val spec = """{ + "kind": "plot", + "layers": [ + { + "geom": "livemap", + "data": { + "x": [0, 0, 0, 10, 10, 10, 20, 20, 20], + "y": [0, 0, 0, 10, 10, 10, 20, 20, 20], + "z": [100, 200, 400, 144, null, 230, 123, 543, -231], + "c": ['A', 'B', 'C', 'A', 'B', 'C', 'A', 'B', 'C'] + }, + "mapping": { + "x": "x", + "y": "y", + "sym_y": "z", + "fill": "c" + }, + "display_mode": "bar", + "tiles": { + "kind": "vector_lets_plot", + "url": "ws://10.0.0.127:3933", + "theme": null, + "attribution": "Map: \u00a9 Lets-Plot, map data: \u00a9 OpenStreetMap contributors." + }, + "geocoding": { + "url": "http://localhost:3020" + } + } + ] +}""".trimIndent() + + return parsePlotSpec(spec) + } + + private fun barWithNullValuesInData(): Map { + val spec = """ + { + "kind": "plot", + "layers": [ + { + "geom": "livemap", + "data": { + "States": [ + "Alabama", "Alabama", "Alabama", + "Alaska", "Alaska", "Alaska", + "Arizona", "Arizona", "Arizona", + "Arkansas", "Arkansas", "Arkansas" + ], + "Item": [ + "State Debt", "Local Debt", "Gross State Product", + "State Debt", "Local Debt", "Gross State Product", + "State Debt", "Local Debt", "Gross State Product", + "State Debt", "Local Debt", "Gross State Product" + ], + "Values": [ + 10.7, 26.1, 228.0, + 5.9, 3.5, 55.7, + 34.9, 23.5, 355.7, + 13.3, 30.5, 361.1 + ] + }, + "mapping": { + "sym_y": "Values", + "fill": "Item" + }, + "map_data_meta": { + "geodataframe": { + "geometry": "geometry" + } + }, + "map": { + "request": ["Alabama", "California", "Alaska", "Arizona", "Nevada"], + "found name": ["Alabama", "California", "Alaska", "Arizona", "Nevada"], + "geometry": [ + "{\"type\": \"Point\", \"coordinates\": [-86.7421099329499, 32.6446247845888]}", + "{\"type\": \"Point\", \"coordinates\": [-119.994112927034, 37.277335524559]}", + "{\"type\": \"Point\", \"coordinates\": [-152.012666774028, 63.0759818851948]}", + "{\"type\": \"Point\", \"coordinates\": [-111.665190827228, 34.1682100296021]}", + "{\"type\": \"Point\", \"coordinates\": [-116.666956541192, 38.5030842572451]}" + ] + }, + "map_join": [ + ["States"], + ["request"] + ], + "display_mode": "bar", + "tiles": { + "kind": "vector_lets_plot", + "url": "wss://tiles.datalore.jetbrains.com", + "theme": "color", + "attribution": "Map: \u00a9 Lets-Plot, map data: \u00a9 OpenStreetMap contributors." + }, + "geocoding": { + "url": "http://172.31.52.145:3025" + }, + "map_join": ["States", "state"] + } + ] + } + """.trimIndent() + + return parsePlotSpec(spec) + } + private fun multiLayerTooltips(): Map { val n = 10 val rnd = Random(0) diff --git a/plot-base-portable/src/commonMain/kotlin/jetbrains/datalore/plot/base/data/DataFrameUtil.kt b/plot-base-portable/src/commonMain/kotlin/jetbrains/datalore/plot/base/data/DataFrameUtil.kt index d8fbb3ab02c..d6241ddf885 100644 --- a/plot-base-portable/src/commonMain/kotlin/jetbrains/datalore/plot/base/data/DataFrameUtil.kt +++ b/plot-base-portable/src/commonMain/kotlin/jetbrains/datalore/plot/base/data/DataFrameUtil.kt @@ -171,5 +171,3 @@ object DataFrameUtil { return b.build() } } - - diff --git a/plot-config-portable/src/commonMain/kotlin/jetbrains/datalore/plot/config/ConfigUtil.kt b/plot-config-portable/src/commonMain/kotlin/jetbrains/datalore/plot/config/ConfigUtil.kt index 2f604207ca4..4ff50a1d743 100644 --- a/plot-config-portable/src/commonMain/kotlin/jetbrains/datalore/plot/config/ConfigUtil.kt +++ b/plot-config-portable/src/commonMain/kotlin/jetbrains/datalore/plot/config/ConfigUtil.kt @@ -9,6 +9,8 @@ import jetbrains.datalore.base.geometry.DoubleVector import jetbrains.datalore.plot.base.Aes import jetbrains.datalore.plot.base.DataFrame import jetbrains.datalore.plot.base.data.DataFrameUtil +import jetbrains.datalore.plot.base.data.DataFrameUtil.findVariableOrFail +import jetbrains.datalore.plot.base.data.DataFrameUtil.variables import jetbrains.datalore.plot.base.data.Dummies import jetbrains.datalore.plot.config.Option.Meta @@ -37,58 +39,54 @@ object ConfigUtil { return updateDataFrame(DataFrame.Builder.emptyFrame(), varNameMap) } - /** - * @return All rows from the right table, and the matched rows from the left table - */ - fun rightJoin(left: DataFrame, leftKey: String, right: DataFrame, rightKey: String): DataFrame { - val leftMap = DataFrameUtil.toMap(left) - if (!leftMap.containsKey(leftKey)) { - throw IllegalArgumentException("Can't join data: left key not found '$leftKey'") - } - val rightMap = DataFrameUtil.toMap(right) - if (!rightMap.containsKey(rightKey)) { - throw IllegalArgumentException("Can't join data: right key not found '$rightKey'") - } - val leftKeyValues = leftMap.getValue(leftKey) - val indexByKeyValueLeft = HashMap() - var index = 0 - for (keyValue in leftKeyValues) { - indexByKeyValueLeft[keyValue!!] = index++ + fun join(left: DataFrame, leftKeyVariableNames: List<*>, right: DataFrame, rightKeyVariableNames : List<*>): DataFrame { + require(rightKeyVariableNames.size == leftKeyVariableNames.size) { + "Keys count for merging should be equal, but was ${leftKeyVariableNames.size} and ${rightKeyVariableNames.size}" } - val jointMap = HashMap>() - for (key in leftMap.keys) { - jointMap[key] = ArrayList() + fun computeMultiKeys(dataFrame: DataFrame, keyVarNames: List<*>): List> { + val keyVars = keyVarNames.map { keyVarName -> findVariableOrFail(dataFrame, keyVarName as String)} + return (0 until dataFrame.rowCount()).map { rowIndex -> keyVars.map { dataFrame.get(it)[rowIndex] } } } - for (key in rightMap.keys) { - if (leftMap.containsKey(key)) { - continue - } + val leftMultiKeys = computeMultiKeys(left, leftKeyVariableNames) + val rightMultiKeys = computeMultiKeys(right, rightKeyVariableNames) + + fun List<*>.containsDuplicates(): Boolean = toSet().size < size + val restrictRightDuplicates = leftMultiKeys.containsDuplicates() && rightMultiKeys.containsDuplicates() + - val values = rightMap.getValue(key) - jointMap[key] = values + val jointMap = HashMap>() + right.variables().forEach { variable -> jointMap[variable] = mutableListOf() } + left.variables().forEach { variable -> jointMap[variable] = mutableListOf() } + + // return only first match if left and right contains duplicates to not generate m*n rows + fun List<*>.indicesOf(obj: Any?): List = when { + restrictRightDuplicates -> listOf(indexOf(obj)) + else -> mapIndexed { i, v -> i.takeIf { v == obj } }.filterNotNull() } - for (keyValue in rightMap.getValue(rightKey)) { - val leftIndex = indexByKeyValueLeft[keyValue] - for (key in leftMap.keys) { - val fillValue = if (leftIndex == null) - null - else - leftMap.getValue(key).get(leftIndex) - - val list = jointMap[key] - if (list is ArrayList) { - list.add(fillValue) - } else { - throw IllegalStateException("The list should be mutable") + val notMatchedRightMultiKeys = rightMultiKeys.toMutableSet() + leftMultiKeys.forEachIndexed { leftRowIndex, leftMultiKey -> + rightMultiKeys.indicesOf(leftMultiKey).forEach { rightRowIndex -> + if (rightRowIndex >= 0) { + notMatchedRightMultiKeys.remove(leftMultiKey) + right.variables().forEach { jointMap[it]!!.add(right.get(it)[rightRowIndex]) } + left.variables().forEach { jointMap[it]!!.add(left.get(it)[leftRowIndex]) } } } } - return createDataFrame(jointMap) + notMatchedRightMultiKeys.forEach { notMatchedRightKey -> + val rightRowIndices = rightMultiKeys.indicesOf(notMatchedRightKey) + rightRowIndices.forEach { rightRowIndex -> + right.variables().forEach { jointMap[it]!!.add(right.get(it)[rightRowIndex]) } + left.variables().forEach { jointMap[it]!!.add(null) } + } + } + + return jointMap.entries.fold(DataFrame.Builder()) { b, (variable, values) -> b.put(variable, values)}.build() } private fun asVarNameMap(data: Any?): Map> { @@ -138,7 +136,7 @@ object ConfigUtil { } private fun updateDataFrame(df: DataFrame, data: Map>): DataFrame { - val dfVars = DataFrameUtil.variables(df) + val dfVars = variables(df) val b = df.builder() for ((varName, values) in data) { val variable = dfVars[varName] ?: DataFrameUtil.createVariable(varName) @@ -161,7 +159,7 @@ object ConfigUtil { return emptyMap() } - val dfVariables = DataFrameUtil.variables(data) + val dfVariables = variables(data) val result = HashMap, DataFrame.Variable>() val options = Option.Mapping.REAL_AES_OPTION_NAMES diff --git a/plot-config-portable/src/commonMain/kotlin/jetbrains/datalore/plot/config/GeoConfig.kt b/plot-config-portable/src/commonMain/kotlin/jetbrains/datalore/plot/config/GeoConfig.kt index dbba5ddfdd6..de4a2dd97ea 100644 --- a/plot-config-portable/src/commonMain/kotlin/jetbrains/datalore/plot/config/GeoConfig.kt +++ b/plot-config-portable/src/commonMain/kotlin/jetbrains/datalore/plot/config/GeoConfig.kt @@ -5,18 +5,30 @@ package jetbrains.datalore.plot.config -import jetbrains.datalore.base.spatial.* -import jetbrains.datalore.base.typedGeometry.* +import jetbrains.datalore.base.spatial.BBOX_CALCULATOR +import jetbrains.datalore.base.spatial.GeoJson +import jetbrains.datalore.base.spatial.GeoRectangle +import jetbrains.datalore.base.spatial.LonLat +import jetbrains.datalore.base.spatial.SimpleFeature +import jetbrains.datalore.base.spatial.convertToGeoRectangle +import jetbrains.datalore.base.spatial.union +import jetbrains.datalore.base.typedGeometry.Rect +import jetbrains.datalore.base.typedGeometry.Vec +import jetbrains.datalore.base.typedGeometry.bottom +import jetbrains.datalore.base.typedGeometry.boundingBox +import jetbrains.datalore.base.typedGeometry.left +import jetbrains.datalore.base.typedGeometry.limit +import jetbrains.datalore.base.typedGeometry.right +import jetbrains.datalore.base.typedGeometry.top import jetbrains.datalore.plot.base.Aes import jetbrains.datalore.plot.base.DataFrame import jetbrains.datalore.plot.base.DataFrame.Variable import jetbrains.datalore.plot.base.GeomKind import jetbrains.datalore.plot.base.GeomKind.* +import jetbrains.datalore.plot.base.data.DataFrameUtil import jetbrains.datalore.plot.base.data.DataFrameUtil.findVariableOrFail -import jetbrains.datalore.plot.base.data.DataFrameUtil.variables import jetbrains.datalore.plot.config.ConfigUtil.createAesMapping -import jetbrains.datalore.plot.config.ConfigUtil.createDataFrame -import jetbrains.datalore.plot.config.ConfigUtil.rightJoin +import jetbrains.datalore.plot.config.ConfigUtil.join import jetbrains.datalore.plot.config.CoordinatesCollector.* import jetbrains.datalore.plot.config.GeoConfig.Companion.GEO_ID import jetbrains.datalore.plot.config.Option.Geom.Choropleth.GEO_POSITIONS @@ -38,39 +50,24 @@ class GeoConfig( val mappings: Map, Variable> init { - fun getGeoData(gdfLocation: String, keys: List?): List> { - val geoColumn: String - val geoDataFrame: Map - when(gdfLocation) { - GEO_POSITIONS -> { - geoDataFrame = layerOptions.getMap(GEO_POSITIONS) ?: error("require 'map' parameter") - geoColumn = layerOptions.getString(MAP_DATA_META, GDF, GEOMETRY) ?: error("Geometry column not set") - } - DATA -> { - geoDataFrame = layerOptions.getMap(DATA) ?: error("require 'data' parameter") - geoColumn = layerOptions.getString(DATA_META, GDF, GEOMETRY) ?: error("Geometry column not set") - } + fun getGeoDataFrame(gdfLocation: String): DataFrame { + val geoDataFrame: Map = when(gdfLocation) { + GEO_POSITIONS -> layerOptions.getMap(GEO_POSITIONS) ?: error("require 'map' parameter") + DATA -> layerOptions.getMap(DATA) ?: error("require 'data' parameter") else -> error("Unknown gdf location: $gdfLocation") } - // If no keys provided use indicies - val ids = keys ?: geoDataFrame.indicies?.map(Int::toString) ?: emptyList() - val geoJsons = geoDataFrame.getList(geoColumn)?.map { it as String } ?: error("$geoColumn not found in $gdfLocation") - - return ids.zip(geoJsons) + return DataFrameUtil.fromMap(geoDataFrame) } - fun appendGeoId( - data: DataFrame, - geoData: List>, - dataKeyColumn: String - ): DataFrame { - return DataFrame.Builder(data).put(Variable(dataKeyColumn), geoData.map { (key, _) -> key }).build() + fun getGeometryColumn(gdfLocation: String): String = when(gdfLocation) { + GEO_POSITIONS -> layerOptions.getString(MAP_DATA_META, GDF, GEOMETRY) ?: error("Geometry column not set") + DATA -> layerOptions.getString(DATA_META, GDF, GEOMETRY) ?: error("Geometry column not set") + else -> error("Unknown gdf location: $gdfLocation") } - val dataKeyColumn: String - val geoData: List> val dataFrame: DataFrame + val geometries: Variable when { // (aes(color='cyl'), data=data, map=gdf) - how to join without `map_join`? @@ -84,64 +81,44 @@ class GeoConfig( require(layerOptions.has(GEO_POSITIONS)) { "'map' parameter is mandatory with MAP_DATA_META" } val mapJoin = layerOptions.getList(MAP_JOIN) ?: error("require map_join parameter") - val geoKeyColumn = mapJoin[1] as String - val mapKeys = layerOptions - .getMap(GEO_POSITIONS) - ?.getList(geoKeyColumn) - ?.requireNoNulls() - ?: error("'$geoKeyColumn' is not found in map") - geoData = getGeoData(gdfLocation = GEO_POSITIONS, keys = mapKeys) - - dataFrame = data - dataKeyColumn = mapJoin[0] as String + dataFrame = join( + left = data, + leftKeyVariableNames = (mapJoin[0] as List<*>), + right = getGeoDataFrame(gdfLocation = GEO_POSITIONS), + rightKeyVariableNames = (mapJoin[1] as List<*>) + ) + + geometries = findVariableOrFail(dataFrame, getGeometryColumn(GEO_POSITIONS)) } // (map=gdf) - simple geometry with(layerOptions) { has(MAP_DATA_META, GDF, GEOMETRY) && !has(MAP_JOIN) } -> { require(layerOptions.has(GEO_POSITIONS)) { "'map' parameter is mandatory with MAP_DATA_META" } - geoData = getGeoData(gdfLocation = GEO_POSITIONS, keys = null) - - dataKeyColumn = GEO_ID - dataFrame = appendGeoId(data, geoData, dataKeyColumn) + dataFrame = getGeoDataFrame(gdfLocation = GEO_POSITIONS) + geometries = findVariableOrFail(dataFrame, getGeometryColumn(GEO_POSITIONS)) } // (data=gdf) with(layerOptions) { has(DATA_META, GDF, GEOMETRY) && !has(GEO_POSITIONS) && !has(MAP_JOIN) } -> { require(layerOptions.has(DATA)) { "'data' parameter is mandatory with DATA_META" } - geoData = getGeoData(gdfLocation = DATA, keys = null) - dataKeyColumn = GEO_ID - dataFrame = appendGeoId(data, geoData, dataKeyColumn) + dataFrame = data + geometries = findVariableOrFail(dataFrame, getGeometryColumn(DATA)) } else -> error("GeoDataFrame not found in data or map") } val coordinatesCollector = when(geomKind) { - MAP, POLYGON -> BoundaryCoordinatesCollector() - LIVE_MAP, POINT, TEXT -> PointCoordinatesCollector() - RECT -> BboxCoordinatesCollector() - PATH -> PathCoordinatesCollector() + MAP, POLYGON -> BoundaryCoordinatesCollector(dataFrame, geometries) + LIVE_MAP, POINT, TEXT -> PointCoordinatesCollector(dataFrame, geometries) + RECT -> BboxCoordinatesCollector(dataFrame, geometries) + PATH -> PathCoordinatesCollector(dataFrame, geometries) else -> error("Unsupported geom: $geomKind") } - val geoFrame = coordinatesCollector - .append(geoData) - .buildCoordinatesMap() - .let(::createDataFrame) - - dataAndCoordinates = rightJoin( - left = dataFrame, - leftKey = dataKeyColumn, - right = geoFrame, - rightKey = GEO_ID - ) - - val coordinatesAutoMapping = coordinatesCollector.mappings - .filterValues { coordName -> coordName in variables(dataAndCoordinates) } - .map { (aes, coordName) -> aes to variables(dataAndCoordinates).getValue(coordName) } - .toMap() - mappings = createAesMapping(dataAndCoordinates, mappingOptions) + coordinatesAutoMapping + dataAndCoordinates = coordinatesCollector.buildDataFrame() + mappings = createAesMapping(dataAndCoordinates, mappingOptions + coordinatesCollector.mappings) } companion object { @@ -170,38 +147,38 @@ class GeoConfig( } internal abstract class CoordinatesCollector( - val mappings: Map, String> + private val dataFrame: DataFrame, + private val geometries: Variable, + val mappings: Map ) { - private val groupKeys = mutableListOf() - private val groupLengths = mutableListOf() + private val dupCounter = mutableListOf() protected val coordinates: Map> = mappings.values.associateBy({ it }) { mutableListOf() } protected abstract val geoJsonConsumer: SimpleFeature.Consumer protected abstract val supportedFeatures: List - fun append(geoData: List>): CoordinatesCollector { - geoData.forEach { (key, geoJson) -> + // (['a', 'b'], [2, 3]) => ['a', 'a', 'b', 'b', 'b'] + private fun duplicate(values: List, frequencies: Collection) = + frequencies.mapIndexed { i, n -> MutableList(n) { values[i] } }.flatten() + + fun buildDataFrame(): DataFrame { + for (geoJson in dataFrame.get(geometries)) { val oldRowCount = coordinates.rowCount - GeoJson.parse(geoJson, geoJsonConsumer) - groupLengths += coordinates.rowCount - oldRowCount - groupKeys += key + GeoJson.parse(geoJson as String, geoJsonConsumer) + dupCounter += coordinates.rowCount - oldRowCount } if (coordinates.rowCount == 0) { error("Geometries are empty or no matching types. Expected: " + supportedFeatures) } - return this - } - - fun buildCoordinatesMap(): Map> { - require(groupLengths.size == groupKeys.size) { "Groups and ids should have same size" } + val builder = DataFrame.Builder() + dataFrame.variables().forEach { variable -> builder.put(variable, duplicate(dataFrame.get(variable), dupCounter)) } + coordinates.entries.forEach { (name, values) -> builder.put(Variable(name), values) } - // (['a', 'b'], [2, 3]) => ['a', 'a', 'b', 'b', 'b'] - fun copies(values: Collection, count: Collection) = - values.asSequence().zip(count.asSequence()) - .fold(mutableListOf()) { acc, (value, count) -> repeat(count) { acc += value }; acc } + builder.put(Variable(GEO_ID), duplicate((0 until dataFrame.rowCount()).toList(), dupCounter)) + builder.remove(geometries) - return coordinates + (GEO_ID to copies(groupKeys, groupLengths)) + return builder.build() } internal fun defaultConsumer(config: SimpleFeature.Consumer.() -> Unit) = @@ -216,7 +193,7 @@ internal abstract class CoordinatesCollector( private val > Map.rowCount get() = values.firstOrNull()?.size ?: 0 - class PointCoordinatesCollector : CoordinatesCollector(POINT_COLUMNS) { + class PointCoordinatesCollector(dataFrame: DataFrame, geometries: Variable) : CoordinatesCollector(dataFrame, geometries, POINT_COLUMNS) { override val supportedFeatures = listOf("Point, MultiPoint") override val geoJsonConsumer: SimpleFeature.Consumer = defaultConsumer { onPoint = { p -> coordinates.append(p) } @@ -224,7 +201,7 @@ internal abstract class CoordinatesCollector( } } - class PathCoordinatesCollector : CoordinatesCollector(POINT_COLUMNS) { + class PathCoordinatesCollector(dataFrame: DataFrame, geometries: Variable) : CoordinatesCollector(dataFrame, geometries, POINT_COLUMNS) { override val supportedFeatures = listOf("LineString, MultiLineString") override val geoJsonConsumer: SimpleFeature.Consumer = defaultConsumer { onLineString = { it.forEach { p -> coordinates.append(p) } } @@ -232,7 +209,7 @@ internal abstract class CoordinatesCollector( } } - class BoundaryCoordinatesCollector : CoordinatesCollector(POINT_COLUMNS) { + class BoundaryCoordinatesCollector(dataFrame: DataFrame, geometries: Variable) : CoordinatesCollector(dataFrame, geometries, POINT_COLUMNS) { override val supportedFeatures = listOf("Polygon, MultiPolygon") override val geoJsonConsumer: SimpleFeature.Consumer = defaultConsumer { onPolygon = { it.asSequence().flatten().forEach { p -> coordinates.append(p) } } @@ -240,7 +217,7 @@ internal abstract class CoordinatesCollector( } } - class BboxCoordinatesCollector : CoordinatesCollector(RECT_MAPPINGS) { + class BboxCoordinatesCollector(dataFrame: DataFrame, geometries: Variable) : CoordinatesCollector(dataFrame, geometries, RECT_MAPPINGS) { override val supportedFeatures = listOf("MultiPoint, LineString, MultiLineString, Polygon, MultiPolygon") override val geoJsonConsumer: SimpleFeature.Consumer = defaultConsumer { fun insert(bboxes: List>) = @@ -262,16 +239,16 @@ internal abstract class CoordinatesCollector( companion object { - val POINT_COLUMNS = mapOf, String>( - Aes.X to GeoConfig.POINT_X, - Aes.Y to GeoConfig.POINT_Y + val POINT_COLUMNS = mapOf( + Aes.X.name to GeoConfig.POINT_X, + Aes.Y.name to GeoConfig.POINT_Y ) - val RECT_MAPPINGS = mapOf, String>( - Aes.XMIN to GeoConfig.RECT_XMIN, - Aes.YMIN to GeoConfig.RECT_YMIN, - Aes.XMAX to GeoConfig.RECT_XMAX, - Aes.YMAX to GeoConfig.RECT_YMAX + val RECT_MAPPINGS = mapOf( + Aes.XMIN.name to GeoConfig.RECT_XMIN, + Aes.YMIN.name to GeoConfig.RECT_YMIN, + Aes.XMAX.name to GeoConfig.RECT_XMAX, + Aes.YMAX.name to GeoConfig.RECT_YMAX ) internal fun Map>.append(p: Vec) { @@ -291,8 +268,3 @@ internal abstract class CoordinatesCollector( } } } - - -fun Map<*, *>.dataJoinVariable() = getList(MAP_JOIN)?.get(0) as? String -private fun DataFrame.getOrFail(varName: String) = this.get(findVariableOrFail(this, varName)) -private val Map.indicies get() = (values.firstOrNull() as? List<*>)?.indices diff --git a/plot-config-portable/src/commonMain/kotlin/jetbrains/datalore/plot/config/LayerConfig.kt b/plot-config-portable/src/commonMain/kotlin/jetbrains/datalore/plot/config/LayerConfig.kt index 78d6ac8bea2..5aa2ef36055 100644 --- a/plot-config-portable/src/commonMain/kotlin/jetbrains/datalore/plot/config/LayerConfig.kt +++ b/plot-config-portable/src/commonMain/kotlin/jetbrains/datalore/plot/config/LayerConfig.kt @@ -225,7 +225,7 @@ class LayerConfig( return varBindings.find { it.aes == aes }?.variable } - fun getMapJoin(): Pair? { + fun getMapJoin(): Pair, List<*>>? { if (!hasOwn(MAP_JOIN)) { return null } @@ -234,7 +234,14 @@ class LayerConfig( require(mapJoin.size == 2) { "map_join require 2 parameters" } val (dataVar, mapVar) = mapJoin - require(dataVar is String && mapVar is String) { "map_join parameters type should be a String" } + require(dataVar != null) + require(mapVar != null) + require(dataVar is List<*>) { + "Wrong map_join parameter type: should be a list of strings, but was ${dataVar::class.simpleName}" + } + require(mapVar is List<*>) { + "Wrong map_join parameter type: should be a list of string, but was ${mapVar::class.simpleName}" + } return Pair(dataVar, mapVar) } diff --git a/plot-config-portable/src/commonMain/kotlin/jetbrains/datalore/plot/config/PlotConfigClientSideUtil.kt b/plot-config-portable/src/commonMain/kotlin/jetbrains/datalore/plot/config/PlotConfigClientSideUtil.kt index 7218003410e..309c08610be 100644 --- a/plot-config-portable/src/commonMain/kotlin/jetbrains/datalore/plot/config/PlotConfigClientSideUtil.kt +++ b/plot-config-portable/src/commonMain/kotlin/jetbrains/datalore/plot/config/PlotConfigClientSideUtil.kt @@ -114,11 +114,6 @@ object PlotConfigClientSideUtil { layerBuilder.pathIdVarName(GeoConfig.GEO_ID) } - // with map_join use data variable to group values and geometries - layerConfig.mergedOptions.dataJoinVariable()?.let { - layerBuilder.pathIdVarName(it) - } - // variable bindings val bindings = layerConfig.varBindings for (binding in bindings) { diff --git a/plot-config-portable/src/commonMain/kotlin/jetbrains/datalore/plot/server/config/PlotConfigServerSide.kt b/plot-config-portable/src/commonMain/kotlin/jetbrains/datalore/plot/server/config/PlotConfigServerSide.kt index d6f5648906a..211b6fb98ed 100644 --- a/plot-config-portable/src/commonMain/kotlin/jetbrains/datalore/plot/server/config/PlotConfigServerSide.kt +++ b/plot-config-portable/src/commonMain/kotlin/jetbrains/datalore/plot/server/config/PlotConfigServerSide.kt @@ -377,7 +377,7 @@ open class PlotConfigServerSide(opts: Map) : PlotConfig(opts) { varsToKeep.map(Variable::name) + Stats.GROUP.name + listOfNotNull(layerConfig.mergedOptions.getString(DATA_META, GDF, GEOMETRY)) + - listOfNotNull(layerConfig.getMapJoin()?.first) + + (layerConfig.getMapJoin()?.first?.map { it as String } ?: emptyList()) + facets.variables + listOfNotNull(layerConfig.explicitGroupingVarName) + layerConfig.tooltips.valueSources diff --git a/plot-config/src/jvmTest/kotlin/plot/config/ConfigUtilTest.kt b/plot-config/src/jvmTest/kotlin/plot/config/ConfigUtilTest.kt deleted file mode 100644 index 9ddf0cc4235..00000000000 --- a/plot-config/src/jvmTest/kotlin/plot/config/ConfigUtilTest.kt +++ /dev/null @@ -1,91 +0,0 @@ -/* - * Copyright (c) 2019. JetBrains s.r.o. - * Use of this source code is governed by the MIT license that can be found in the LICENSE file. - */ - -package jetbrains.datalore.plot.config - -import jetbrains.datalore.plot.base.DataFrame -import jetbrains.datalore.plot.base.DataFrame.Variable -import org.assertj.core.api.Assertions.assertThat -import kotlin.test.Test -import kotlin.test.assertEquals -import kotlin.test.assertNotNull - -class ConfigUtilTest { - - @Test - fun rightJoinShouldNotRewriteLeftColumns() { - val idList = listOf(0, 1, 2, 3) - val dataValues = listOf("a", "b", "c", "d") - - val data = DataFrame.Builder() - .put(Variable("id"), idList) - .put(Variable("foo"), dataValues) - .build() - - val map = DataFrame.Builder() - .put(Variable("id"), idList) - .put(Variable("lon"), listOf(13.0, 24.0, -65.0, 117.0)) - .put(Variable("lat"), listOf(42.0, 21.0, -12.0, 77.0)) - .build() - - val joinedDf = ConfigUtil.rightJoin(data, "id", map, "id") - - assertThat(joinedDf.variables().map { it.toString() }) - .containsExactlyInAnyOrder("id", "foo", "lon", "lat") - - var dataVar: Variable? = null - for (variable in joinedDf.variables()) { - if ("foo" == variable.name) { - dataVar = variable - break - } - } - - assertNotNull(dataVar) - assertEquals(dataValues, joinedDf[dataVar]) - } - - @Test - fun joinWithDuplicatedKeys() { - val items = listOf( - "State Debt", "Local Debt", "Gross State Product", - "State Debt", "Local Debt", "Gross State Product", - "State Debt", "Local Debt", "Gross State Product" - ) - - val state = listOf( - "Alabama", "Alabama", "Alabama", - "Alaska", "Alaska", "Alaska", - "Arizona", "Arizona", "Arizona" - ) - - val value = listOf( - 10.7, 26.1, 228.0, - 5.9, 3.5, 55.7, - 13.3, 30.5, 361.1 - ) - - val data = DataFrame.Builder() - .put(Variable("item"), items) - .put(Variable("state"), state) - .put(Variable("value"), value) - .build() - - - val y = listOf(32.806671, 61.370716, 33.729759) - val x = listOf(-86.79113000000001, -152.404419, -111.431221) - val geoId = listOf("Alabama", "Alaska", "Arizona") - - val geo = DataFrame.Builder() - .put(Variable("__x__"), x) - .put(Variable("__y__"), y) - .put(Variable("__geo_id__"), geoId) - .build() - - val res = ConfigUtil.rightJoin(data, "state", geo, "__geo_id__") - assertEquals(3, res.rowCount()) // TODO: should be 9, not 3 - } - -} \ No newline at end of file diff --git a/plot-config/src/jvmTest/kotlin/plot/config/DataJoinTest.kt b/plot-config/src/jvmTest/kotlin/plot/config/DataJoinTest.kt new file mode 100644 index 00000000000..d2199c1b1e4 --- /dev/null +++ b/plot-config/src/jvmTest/kotlin/plot/config/DataJoinTest.kt @@ -0,0 +1,541 @@ +/* + * Copyright (c) 2020. JetBrains s.r.o. + * Use of this source code is governed by the MIT license that can be found in the LICENSE file. + */ + +package jetbrains.datalore.plot.config + +import jetbrains.datalore.plot.base.DataFrame +import jetbrains.datalore.plot.base.DataFrame.Variable +import jetbrains.datalore.plot.base.data.DataFrameUtil.variables +import org.assertj.core.api.AbstractAssert +import org.assertj.core.api.Assertions +import org.junit.Ignore +import org.junit.Test + +fun variable(df: DataFrame, varName: String) = variables(df)[varName] ?: error("Variable $varName not found") +fun values(df: DataFrame, name: String) = df.get(variable(df, name)) + +class DataJoinTest { + @Test + fun singleKey_MatchingRows() { + // User searches names from data - same size, same order + // Data: [USA, RU, FR] + // Map: [USA, RU, FR] + // Result: [USA, RU, FR] + val data = DataFrame.Builder() + .put(Variable("Countries"), listOf("USA", "RU", "FR")) + .put(Variable("Values"), listOf(0.0, 1.0, 2.0)) + .build() + + val map = DataFrame.Builder() + .put(Variable("request"), listOf("USA", "RU", "FR")) + .put(Variable("found name"), listOf("United States of America", "Russia", "France")) + .put(Variable("geometry"), listOf("usa_geometry", "ru_geometry", "fr_geometry")) + .build() + + val jointDataFrame = ConfigUtil.join(data, listOf("Countries"), map, listOf("request")) + + assertThat(jointDataFrame) + .hasSerieFrom(data, "Countries") + .hasSerieFrom(data, "Values") + .hasSerieFrom(map, "request") + .hasSerieFrom(map, "found name") + .hasSerieFrom(map, "geometry") + } + + @Test + fun tripleKeys_MatchingRows() { + // User searches names from data - same size, same order + // Data: [Anderson, Clay, Alameda] + // Map: [Anderson, Clay, Alameda] + // Result: [Anderson, Clay, Alameda] + val data = DataFrame.Builder() + .put(Variable("Countries"), listOf("USA", "USA", "USA")) + .put(Variable("States"), listOf("TX", "AL", "CA")) + .put(Variable("Counties"), listOf("Anderson", "Clay", "Alameda")) + .put(Variable("Values"), listOf(0.0, 1.0, 2.0)) + .build() + + val map = DataFrame.Builder() + .put(Variable("request"), listOf("Anderson", "Clay", "Alameda")) + .put(Variable("state"), listOf("TX", "AL", "CA")) + .put(Variable("country"), listOf("USA", "USA", "USA")) + .put(Variable("found name"), listOf("Anderson County", "Clay County", "Alameda County")) + .put(Variable("geometry"), listOf("anderson_geometry", "clay_geometry", "alameda_geometry")) + .build() + + val jointDataFrame = ConfigUtil.join(data, listOf("Counties", "States", "Countries"), map, listOf("request", "state", "country")) + + assertThat(jointDataFrame) + .hasSerieFrom(data, "Countries") + .hasSerieFrom(data, "States") + .hasSerieFrom(data, "Counties") + .hasSerieFrom(data, "Values") + .hasSerieFrom(map, "request") + .hasSerieFrom(map, "state") + .hasSerieFrom(map, "country") + .hasSerieFrom(map, "found name") + .hasSerieFrom(map, "geometry") + } + + @Test + fun singleKey_extraMapRows() { + // Data: [USA, RU, FR] + // Map: [UA, USA, GER, FR, RU] + // Result: [USA, RU, FR, UA, GER] + val data = DataFrame.Builder() + .put(Variable("Countries"), listOf("USA", "RU", "FR")) + .put(Variable("Values"), listOf(0.0, 1.0, 2.0)) + .build() + + val map = DataFrame.Builder() + .put(Variable("request"), listOf("UA", "USA", "GER", "FR", "RU")) + .put(Variable("found name"), listOf("Ukraine", "United States of America", "Germany", "France", "Russia")) + .put(Variable("geometry"), listOf("ua_geometry", "usa_geometry", "ger_geometry", "fr_geometry", "ru_geometry")) + .build() + + val jointDataFrame = ConfigUtil.join(data, listOf("Countries"), map, listOf("request")) + + assertThat(jointDataFrame) + .hasSerie(variable(data, "Countries"), listOf("USA", "RU", "FR", null, null)) // nulls for UA and GER + .hasSerie(variable(data, "Values"), listOf(0.0, 1.0, 2.0, null, null)) // nulls for UA and GER + .hasSerie(variable(map, "request"), listOf("USA", "RU", "FR", "UA", "GER")) + .hasSerie(variable(map, "found name"), listOf("United States of America", "Russia", "France", "Ukraine", "Germany")) + .hasSerie(variable(map, "geometry"), listOf("usa_geometry", "ru_geometry", "fr_geometry", "ua_geometry", "ger_geometry")) + } + + + @Test + fun singleKey_MatchingDupsInMap() { + // Data: [Asia, Europe] + // Map: [Europe, Asia, Europe] + // Result: [Asia, Europe, Europe] + + val data = DataFrame.Builder() + .put(Variable("Continents"), listOf("Asia", "Europe")) + .put(Variable("Values"), listOf(1.0, 2.0)) + .build() + + val map = DataFrame.Builder() + .put(Variable("Country"), listOf("Germany", "Japan", "France")) + .put(Variable("Cont"), listOf("Europe", "Asia", "Europe")) + .put(Variable("geometry"), listOf("ger_geometry", "jap_geometry", "fr_geometry")) + .build() + + val jointDataFrame = ConfigUtil.join(data, listOf("Continents"), map, listOf("Cont")) + + assertThat(jointDataFrame) + .hasSerie(variable(data, "Continents"), listOf("Asia", "Europe", "Europe")) + .hasSerie(variable(data, "Values"), listOf(1.0, 2.0, 2.0)) + .hasSerie(variable(map, "Country"), listOf("Japan", "Germany", "France")) + .hasSerie(variable(map, "Cont"), listOf("Asia", "Europe", "Europe")) + .hasSerie(variable(map, "geometry"), listOf("jap_geometry", "ger_geometry", "fr_geometry")) + } + + + @Test + fun singleKey_MissingDupsInMap() { + // Data: [Asia] + // Map: [Europe, Asia, Europe] + // Result: [Asia, Europe, Europe] + + val data = DataFrame.Builder() + .put(Variable("Continents"), listOf("Asia")) + .put(Variable("Values"), listOf(1.0)) + .build() + + val map = DataFrame.Builder() + .put(Variable("Country"), listOf("Germany", "Japan", "France")) + .put(Variable("Cont"), listOf("Europe", "Asia", "Europe")) + .put(Variable("geometry"), listOf("ger_geometry", "jap_geometry", "fr_geometry")) + .build() + + val jointDataFrame = ConfigUtil.join(data, listOf("Continents"), map, listOf("Cont")) + + assertThat(jointDataFrame) + .hasSerie(variable(data, "Continents"), listOf("Asia", null, null)) + .hasSerie(variable(data, "Values"), listOf(1.0, null, null)) + .hasSerie(variable(map, "Country"), listOf("Japan", "Germany", "France")) + .hasSerie(variable(map, "Cont"), listOf("Asia", "Europe", "Europe")) + .hasSerie(variable(map, "geometry"), listOf("jap_geometry", "ger_geometry", "fr_geometry")) + } + + @Test + fun dupsInDataAndMap_takeOnlyFirstEntryFromMap() { + // Drops France - expected behaviour. We can't predict is there multiindex in map by a single key. + // Data: [Asia, Asia] + // Map: [Europe, Asia, Europe] + // Result: [Asia, Asia, Europe] + + val data = DataFrame.Builder() + .put(Variable("Continents"), listOf("Asia", "Asia")) + .put(Variable("Values"), listOf(1.0, 2.0)) + .build() + + val map = DataFrame.Builder() + .put(Variable("Country"), listOf("Germany", "Japan", "France", "Japan")) + .put(Variable("Cont"), listOf("Europe", "Asia", "Europe", "Asia")) + .put(Variable("geometry"), listOf("ger_geometry", "jap_geometry", "fr_geometry", "jap_geometry")) + .build() + + val jointDataFrame = ConfigUtil.join(data, listOf("Continents"), map, listOf("Cont")) + + assertThat(jointDataFrame) + .hasSerie(variable(data, "Continents"), listOf("Asia", "Asia", null)) + .hasSerie(variable(data, "Values"), listOf(1.0, 2.0, null)) + .hasSerie(variable(map, "Country"), listOf("Japan", "Japan", "Germany")) + .hasSerie(variable(map, "Cont"), listOf("Asia", "Asia", "Europe")) + .hasSerie(variable(map, "geometry"), listOf("jap_geometry", "jap_geometry", "ger_geometry")) + } + + @Test + fun tripleKey_extraMapRows() { + // User searches names from data - same size, same order + // Data: [Anderson, Clay, Alameda] + // Map: [Carson, Anderson, Clay, Adams, Alameda] + // Result: [Anderson, Clay, Alameda, Carson, Adams] + val data = DataFrame.Builder() + .put(Variable("Countries"), listOf("USA", "USA", "USA")) + .put(Variable("States"), listOf("TX", "AL", "CA")) + .put(Variable("Counties"), listOf("Anderson", "Clay", "Alameda")) + .put(Variable("Values"), listOf(0.0, 1.0, 2.0)) + .build() + + val map = DataFrame.Builder() + .put(Variable("request"), listOf("Carson", "Anderson", "Clay", "Adams", "Alameda")) + .put(Variable("state"), listOf("NV", "TX", "AL", "CO", "CA")) + .put(Variable("country"), listOf("USA", "USA", "USA", "USA", "USA")) + .put(Variable("found name"), listOf("Carson County", "Anderson County", "Clay County", "Adams County", "Alameda County")) + .put(Variable("geometry"), listOf("carson_geometry", "anderson_geometry", "clay_geometry", "adams_geometry", "alameda_geometry")) + .build() + + val jointDataFrame = ConfigUtil.join(data, listOf("Counties", "States", "Countries"), map, listOf("request", "state", "country")) + + assertThat(jointDataFrame) + .hasSerie(variable(data, "Countries"), listOf("USA", "USA", "USA", null, null)) + .hasSerie(variable(data, "States"), listOf("TX", "AL", "CA", null, null)) + .hasSerie(variable(data, "Counties"), listOf("Anderson", "Clay", "Alameda", null, null)) + .hasSerie(variable(data, "Values"), listOf(0.0, 1.0, 2.0, null, null)) + .hasSerie(variable(map, "request"), listOf("Anderson", "Clay", "Alameda", "Carson", "Adams")) + .hasSerie(variable(map, "state"), listOf("TX", "AL", "CA", "NV", "CO")) + .hasSerie(variable(map, "country"), listOf("USA", "USA", "USA", "USA", "USA")) + .hasSerie(variable(map, "found name"), listOf("Anderson County", "Clay County", "Alameda County", "Carson County", "Adams County")) + .hasSerie(variable(map, "geometry"), listOf("anderson_geometry", "clay_geometry", "alameda_geometry", "carson_geometry", "adams_geometry")) + } + + + @Test + fun singleKey_extraDataRows() { + // Remove data rows that not matched to a map + // Data: [USA, RU, FR] + // Map: [FR, RU] + val data = DataFrame.Builder() + .put(Variable("Countries"), listOf("USA", "RU", "FR")) + .put(Variable("Values"), listOf(0.0, 1.0, 2.0)) + .build() + + val map = DataFrame.Builder() + .put(Variable("request"), listOf("FR", "RU")) + .put(Variable("found name"), listOf("France", "Russia")) + .put(Variable("geometry"), listOf("fr_geometry", "ru_geometry")) + .build() + + val jointDataFrame = ConfigUtil.join(data, listOf("Countries"), map, listOf("request")) + + // Should take variables from corresponding dataframes, not recreate them + assertThat(jointDataFrame) + .hasSerie(variable(data, "Countries"), listOf("RU", "FR")) + .hasSerie(variable(data, "Values"), listOf(1.0, 2.0)) + .hasSerie(variable(map, "request"), listOf("RU", "FR")) + .hasSerie(variable(map, "found name"), listOf("Russia", "France")) + .hasSerie(variable(map, "geometry"), listOf("ru_geometry", "fr_geometry")) + } + + + @Test + fun tripleKey_extraDataRows() { + // User searches names from data - same size, same order + val data = DataFrame.Builder() + .put(Variable("Countries"), listOf("USA", "USA", "USA")) + .put(Variable("States"), listOf("TX", "AL", "CA")) + .put(Variable("Counties"), listOf("Anderson", "Clay", "Alameda")) + .put(Variable("Values"), listOf(0.0, 1.0, 2.0)) + .build() + + val map = DataFrame.Builder() + .put(Variable("request"), listOf("Anderson", "Alameda")) + .put(Variable("state"), listOf("TX", "CA")) + .put(Variable("country"), listOf("USA", "USA")) + .put(Variable("found name"), listOf("Anderson County", "Alameda County")) + .put(Variable("geometry"), listOf("anderson_geometry", "alameda_geometry")) + .build() + + val jointDataFrame = ConfigUtil.join(data, listOf("Counties", "States", "Countries"), map, listOf("request", "state", "country")) + + // Should take variables from corresponding dataframes, not recreate them + assertThat(jointDataFrame) + .hasSerie(variable(data, "Countries"), listOf("USA", "USA")) + .hasSerie(variable(data, "States"), listOf("TX", "CA")) + .hasSerie(variable(data, "Counties"), listOf("Anderson", "Alameda")) + .hasSerie(variable(data, "Values"), listOf(0.0, 2.0)) + .hasSerieFrom(map, "request") + .hasSerieFrom(map, "state") + .hasSerieFrom(map, "country") + .hasSerieFrom(map, "found name") + .hasSerieFrom(map, "geometry") + } + + @Test + fun multiindex_singleKey() { + // Data: [USA, RU, FR] + // Map: [USA, FR, RU] + // Result: [USA, RU, FR] + val data = DataFrame.Builder() + .put(Variable("Country"), listOf("USA", "USA", "RU", "RU", "FR", "FR")) + .put(Variable("Category"), listOf("A", "B", "A", "B", "A", "B")) + .put(Variable("Value"), listOf(0.0, 1.0, 2.0, 3.0, 4.0, 5.0)) + .build() + + val map = DataFrame.Builder() + .put(Variable("request"), listOf("USA", "FR", "RU")) + .put(Variable("found name"), listOf("United States of America", "France", "Russia")) + .put(Variable("geometry"), listOf("usa_geometry", "fr_geometry", "ru_geometry")) + .build() + + val jointDataFrame = ConfigUtil.join(data, listOf("Country"), map, listOf("request")) + + assertThat(jointDataFrame) + .hasSerieFrom(data, "Country") + .hasSerieFrom(data, "Category") + .hasSerieFrom(data, "Value") + .hasSerie(variable(map, "request"), listOf("USA", "USA", "RU", "RU", "FR", "FR")) + .hasSerie(variable(map, "found name"), listOf("United States of America", "United States of America", "Russia", "Russia", "France", "France")) + .hasSerie(variable(map, "geometry"), listOf("usa_geometry", "usa_geometry", "ru_geometry", "ru_geometry", "fr_geometry", "fr_geometry")) + } + + @Test + fun multiIndex_singleKey_ExtraMapEntries() { + // Data: [USA, RU, FR] + // Map: [GER, USA, FR, RU] + // Result: [USA, RU, FR, GER] + val data = DataFrame.Builder() + .put(Variable("Country"), listOf("USA", "USA", "RU", "RU", "FR", "FR")) + .put(Variable("Category"), listOf("A", "B", "A", "B", "A", "B")) + .put(Variable("Value"), listOf(0.0, 1.0, 2.0, 3.0, 4.0, 5.0)) + .build() + + val map = DataFrame.Builder() + .put(Variable("request"), listOf("GER", "USA", "FR", "RU")) + .put(Variable("found name"), listOf("Germany", "United States of America", "France", "Russia")) + .put(Variable("geometry"), listOf("ger_geometry", "usa_geometry", "fr_geometry", "ru_geometry")) + .build() + + val jointDataFrame = ConfigUtil.join(data, listOf("Country"), map, listOf("request")) + + assertThat(jointDataFrame) + .hasSerie(variable(data, "Country"), listOf("USA", "USA", "RU", "RU", "FR", "FR", null)) + .hasSerie(variable(data, "Category"), listOf("A", "B", "A", "B", "A", "B", null)) + .hasSerie(variable(data, "Value"), listOf(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, null)) + .hasSerie(variable(map, "request"), listOf("USA", "USA", "RU", "RU", "FR", "FR", "GER")) + .hasSerie(variable(map, "found name"), listOf("United States of America", "United States of America", "Russia", "Russia", "France", "France", "Germany")) + .hasSerie(variable(map, "geometry"), listOf("usa_geometry", "usa_geometry", "ru_geometry", "ru_geometry", "fr_geometry", "fr_geometry", "ger_geometry")) + } + + + @Test + fun multiIndex_singleKey_MisingDataEntries() { + // Remove data rows that not matched to a map + + // Data: [USA, RU, FR] + // Map: [GER, USA, FR] + // Result: [USA, FR, GER] + val data = DataFrame.Builder() + .put(Variable("Country"), listOf("USA", "USA", "RU", "RU", "FR", "FR")) + .put(Variable("Category"), listOf("A", "B", "A", "B", "A", "B")) + .put(Variable("Value"), listOf(0.0, 1.0, 2.0, 3.0, 4.0, 5.0)) + .build() + + val map = DataFrame.Builder() + .put(Variable("request"), listOf("GER", "USA", "FR")) + .put(Variable("found name"), listOf("Germany", "United States of America", "France")) + .put(Variable("geometry"), listOf("ger_geometry", "usa_geometry", "fr_geometry")) + .build() + + val jointDataFrame = ConfigUtil.join(data, listOf("Country"), map, listOf("request")) + + assertThat(jointDataFrame) + .hasSerie(variable(data, "Country"), listOf( + "USA", "USA", + "FR", "FR", + null // GER + )) + .hasSerie(variable(data, "Category"), listOf( + "A", "B", // USA + "A", "B", // FR + null // GER + )) + .hasSerie(variable(data, "Value"), listOf( + 0.0, 1.0, // USA + 4.0, 5.0, // FR + null // GER + )) + .hasSerie(variable(map, "request"), listOf( + "USA", "USA", + "FR", "FR", + "GER" + )) + .hasSerie(variable(map, "found name"), listOf( + "United States of America", "United States of America", + "France", "France", + "Germany" + )) + .hasSerie(variable(map, "geometry"), listOf( + "usa_geometry", "usa_geometry", + "fr_geometry", "fr_geometry", + "ger_geometry" + )) + } + + @Test + fun multiIndex_singleKey_DuplicatedMapEntries() { + // Remove data rows that not matched to a map + + // Data: [USA, RU, FR] + // Map: [GER, FR, USA, FR] + // Result: [USA, FR, GER] + val data = DataFrame.Builder() + .put(Variable("Country"), listOf("USA", "USA", "RU", "RU", "FR", "FR")) + .put(Variable("Category"), listOf("A", "B", "A", "B", "A", "B")) + .put(Variable("Value"), listOf(0.0, 1.0, 2.0, 3.0, 4.0, 5.0)) + .build() + + val map = DataFrame.Builder() + .put(Variable("request"), listOf("GER", "FR", "USA", "FR")) + .put(Variable("found name"), listOf("Germany", "France", "United States of America", "France")) + .put(Variable("geometry"), listOf("ger_geometry", "fr_geometry", "usa_geometry", "fr_geometry")) + .build() + + val jointDataFrame = ConfigUtil.join(data, listOf("Country"), map, listOf("request")) + + assertThat(jointDataFrame) + .hasSerie(variable(data, "Country"), listOf( + "USA", "USA", + "FR", "FR", + null // GER + )) + .hasSerie(variable(data, "Category"), listOf( + "A", "B", // USA + "A", "B", // FR + null // GER + )) + .hasSerie(variable(data, "Value"), listOf( + 0.0, 1.0, // USA + 4.0, 5.0, // FR + null // GER + )) + .hasSerie(variable(map, "request"), listOf( + "USA", "USA", + "FR", "FR", + "GER" + )) + .hasSerie(variable(map, "found name"), listOf( + "United States of America", "United States of America", + "France", "France", + "Germany" + )) + .hasSerie(variable(map, "geometry"), listOf( + "usa_geometry", "usa_geometry", + "fr_geometry", "fr_geometry", + "ger_geometry" + )) + } + + + @Test + fun multiIndex_singleKey_DuplicatedMapEntriesNotMatchingToData() { + // Duplication in both data and map - map duplications will be removed + + // Data: [USA, RU, FR] + // Map: [GER, FR, GER, USA] + // Result: [USA, FR, GER] + val data = DataFrame.Builder() + .put(Variable("Country"), listOf("USA", "USA", "RU", "RU", "FR", "FR")) + .put(Variable("Category"), listOf("A", "B", "A", "B", "A", "B")) + .put(Variable("Value"), listOf(0.0, 1.0, 2.0, 3.0, 4.0, 5.0)) + .build() + + val map = DataFrame.Builder() + .put(Variable("request"), listOf("GER", "FR", "GER", "USA")) + .put(Variable("found name"), listOf("Germany", "France", "Germany", "United States of America")) + .put(Variable("geometry"), listOf("ger_geometry", "fr_geometry", "ger_geometry", "usa_geometry")) + .build() + + val jointDataFrame = ConfigUtil.join(data, listOf("Country"), map, listOf("request")) + + assertThat(jointDataFrame) + .hasSerie(variable(data, "Country"), listOf( + "USA", "USA", + "FR", "FR", + null // GER + )) + .hasSerie(variable(data, "Category"), listOf( + "A", "B", // USA + "A", "B", // FR + null // GER + )) + .hasSerie(variable(data, "Value"), listOf( + 0.0, 1.0, // USA + 4.0, 5.0, // FR + null // GER + )) + .hasSerie(variable(map, "request"), listOf( + "USA", "USA", + "FR", "FR", + "GER" + )) + .hasSerie(variable(map, "found name"), listOf( + "United States of America", "United States of America", + "France", "France", + "Germany" + )) + .hasSerie(variable(map, "geometry"), listOf( + "usa_geometry", "usa_geometry", + "fr_geometry", "fr_geometry", + "ger_geometry" + )) + } + + + class DataFrameAssert(actual: DataFrame?) : + AbstractAssert(actual, DataFrameAssert::class.java) { + + fun hasVariables(vararg names: String): DataFrameAssert { + Assertions.assertThat(actual.variables().map(Variable::name)) + .containsExactlyInAnyOrder(*names) + return this + } + + fun hasVariables(vararg variables: Variable): DataFrameAssert { + Assertions.assertThat(actual.variables()) + .containsExactlyInAnyOrder(*variables) + return this + } + + fun hasSerie(variable: Variable, values: List<*>): DataFrameAssert { + Assertions.assertThat(actual.get(variable)) + .containsExactlyElementsOf(values) + return this + } + + fun hasSerieFrom(df: DataFrame, name: String): DataFrameAssert { + hasSerie(variable(df, name), values(df, name)) + return this + } + } + + private fun assertThat(df: DataFrame): DataFrameAssert { + return DataFrameAssert(df) + } + +} diff --git a/plot-config/src/jvmTest/kotlin/plot/config/GeoConfigTest.kt b/plot-config/src/jvmTest/kotlin/plot/config/GeoConfigTest.kt index f15a09a2005..692a6adc986 100644 --- a/plot-config/src/jvmTest/kotlin/plot/config/GeoConfigTest.kt +++ b/plot-config/src/jvmTest/kotlin/plot/config/GeoConfigTest.kt @@ -5,6 +5,7 @@ package jetbrains.datalore.plot.config +import jetbrains.datalore.base.values.Color import jetbrains.datalore.plot.base.Aes import jetbrains.datalore.plot.base.DataPointAesthetics import jetbrains.datalore.plot.base.data.DataFrameUtil.findVariableOrFail @@ -73,8 +74,8 @@ class GeoConfigTest { |}""".trimMargin() - private fun polygonGroup(groupId: Int) = (0..14).map { groupId } - private fun multiPolygonGroup(groupId: Int) = (0..3).map { groupId } + private fun polygonSequence(groupId: T) = (0..14).map { groupId } + private fun multiPolygonSequence(groupId: T) = (0..3).map { groupId } private val gdf = """ |{ @@ -130,7 +131,8 @@ class GeoConfigTest { .assertBinding(Aes.X, POINT_X) .assertBinding(Aes.Y, POINT_Y) .assertBinding(Aes.COLOR, "kind") - .assertGroups(polygonGroup(0) + multiPolygonGroup(1)) + .assertGroups(polygonSequence(0) + multiPolygonSequence(1)) + .assertAes(Aes.COLOR, polygonSequence(Color(102,194,165)) + multiPolygonSequence(Color(252,141,98))) } @Test @@ -151,7 +153,7 @@ class GeoConfigTest { | "mapping": {"color": "value"}, | "map": $gdf, | "map_data_meta": {"geodataframe": {"geometry": "coord"}}, - | "map_join": ["fig", "kind"] + | "map_join": [["fig"], ["kind"]] | }] |} """.trimMargin() @@ -172,7 +174,7 @@ class GeoConfigTest { .assertBinding(Aes.X, POINT_X) .assertBinding(Aes.Y, POINT_Y) .assertBinding(Aes.COLOR, "value") - .assertGroups(polygonGroup(0) + multiPolygonGroup(1)) + .assertGroups(polygonSequence(0) + multiPolygonSequence(1)) } @@ -273,7 +275,7 @@ class GeoConfigTest { | "mapping": {"fill": "value"}, | "map": $gdf, | "map_data_meta": {"geodataframe": {"geometry": "coord"}}, - | "map_join": ["fig", "kind"] + | "map_join": [["fig"], ["kind"]] | }] |} """.trimMargin() @@ -294,11 +296,11 @@ class GeoConfigTest { | "mapping": {"fill": "value"}, | "map": $gdf, | "map_data_meta": {"geodataframe": {"geometry": "coord"}}, - | "map_join": ["fig", "kind"] + | "map_join": [["fig"], ["kind"]] | }] |} """.trimMargin() - ).assertGroups(polygonGroup(0) + multiPolygonGroup(1)) + ).assertGroups(polygonSequence(0) + multiPolygonSequence(1)) } @Test @@ -343,6 +345,8 @@ class GeoConfigTest { |}""".trimMargin() + val europe = Color(102, 194, 165) + val asia = Color(252, 141, 98) singleGeomLayer(""" |{ | "kind": "plot", @@ -359,7 +363,7 @@ class GeoConfigTest { | "coord": ["$foo1", "$foo2", "$bar1"] | }, | "map_data_meta": {"geodataframe": {"geometry": "coord"}}, - | "map_join": ["continent", "cont"] + | "map_join": [["continent"], ["cont"]] | }] |} """.trimMargin() @@ -368,8 +372,7 @@ class GeoConfigTest { .assertBinding(Aes.XMAX, RECT_XMAX) .assertBinding(Aes.YMIN, RECT_YMIN) .assertBinding(Aes.YMAX, RECT_YMAX) - .assertGroups(listOf(0, 0, 1)) // RECTs of Germany, France, China - + .assertAes(Aes.FILL, listOf(europe, europe, asia)) } @Ignore @@ -412,6 +415,51 @@ class GeoConfigTest { .assertValues("__y__", listOf(4.0, 4.0, 2.0, 2.0)) } + @Test + fun `color mapping to __geo_id__ with multikey and map_join to make colors unique`() { + val fooQux = """{\"type\": \"Point\", \"coordinates\": [1.0, 2.0]}""" + val barQux = """{\"type\": \"Point\", \"coordinates\": [3.0, 4.0]}""" + val bazQux = """{\"type\": \"Point\", \"coordinates\": [5.0, 6.0]}""" + + // county is not unique so to get unique color use special variable __geo_id__ + + singleGeomLayer( + """ + |{ + | "kind": "plot", + | "layers": [{ + | "geom": "point", + | "data": { + | "State": ["foo", "bar", "baz"], + | "County": ["qux", "qux", "qux"], + | "values": [100.0, 500.0, 42.42] + | }, + | "mapping": { + | "color": "__geo_id__" + | }, + | "map": { + | "state": ["foo", "bar", "baz"], + | "county": ["qux", "qux", "qux"], + | "name": ["Qux", "Qux", "Qux"], + | "coord": ["$fooQux", "$barQux", "$bazQux"] + | }, + | "map_data_meta": {"geodataframe": {"geometry": "coord"}}, + | "map_join": [["County", "State"], ["county", "state"]] + | }] + |} + """.trimMargin() + ) + .assertValues("County", listOf("qux", "qux", "qux")) + .assertValues("State", listOf("foo", "bar", "baz")) + .assertValues("name", listOf("Qux", "Qux", "Qux")) + .assertValues("county", listOf("qux", "qux", "qux")) + .assertValues("state", listOf("foo", "bar", "baz")) + .assertValues("values", listOf(100.0, 500.0, 42.42)) + .assertValues("lon", listOf(1.0, 3.0, 5.0)) + .assertValues("lat", listOf(2.0, 4.0, 6.0)) + .assertAes(Aes.COLOR, listOf(Color(102, 194, 165), Color(252, 141, 98), Color(141, 160, 203))) + } + @Test fun `should not trigger when positional mapping exist`() { singleGeomLayer(""" @@ -450,16 +498,22 @@ class GeoConfigTest { return this } - private fun GeomLayer.assertValues(variable: String, values: List<*>): GeomLayer { assertEquals(values, dataFrame.get(findVariableOrFail(dataFrame, variable))) return this } - - private fun GeomLayer.assertGroups(expected: Collection<*>) { + private fun GeomLayer.assertGroups(expected: Collection<*>): GeomLayer { val actualGroups = createLayerRendererData(this, emptyMap(), emptyMap()) .aesthetics.dataPoints().map(DataPointAesthetics::group) assertEquals(expected, actualGroups,"Aes valeus didn't match") + return this + } + + private fun GeomLayer.assertAes(aes: Aes<*>, expected: Collection<*>): GeomLayer { + val actualGroups = createLayerRendererData(this, emptyMap(), emptyMap()) + .aesthetics.dataPoints().map { it.get(aes) } + assertEquals(expected, actualGroups,"Aes valeus didn't match") + return this } } diff --git a/plot-config/src/jvmTest/kotlin/plot/server/config/DropUnusedDataTest.kt b/plot-config/src/jvmTest/kotlin/plot/server/config/DropUnusedDataTest.kt index 413815f5e8c..550fa8addcd 100644 --- a/plot-config/src/jvmTest/kotlin/plot/server/config/DropUnusedDataTest.kt +++ b/plot-config/src/jvmTest/kotlin/plot/server/config/DropUnusedDataTest.kt @@ -586,7 +586,7 @@ class DropUnusedDataTest { "{\"type\": \"MultiPolygon\", \"coordinates\": [[[[11.0, 12.0], [13.0, 14.0], [15.0, 13.0], [11.0, 12.0]]]]}" ] }, - "map_join": ["name", "id"] + "map_join": [["name"], ["id"]] } ] } @@ -627,7 +627,7 @@ class DropUnusedDataTest { "{\"type\": \"MultiPolygon\", \"coordinates\": [[[[11.0, 12.0], [13.0, 14.0], [15.0, 13.0], [11.0, 12.0]]]]}" ] }, - "map_join": ["name", "id"] + "map_join": [["name"], ["id"]] } ] } @@ -699,7 +699,7 @@ class DropUnusedDataTest { "lat": [ 51.030349, 51.797754, 53.94575, 54.561879, 55.193929, 53.816229, 52.924809, 52.525588, 51.113188, 51.030349, 53.294124, 54.049078, 53.60816, 51.305902, 50.221916, 48.679365, 48.007575, 49.485266, 50.024691, 51.552493, 53.294124, 48.095702, 50.586036, 48.795295, 46.365136, 44.169607, 43.663114, 43.088157, 43.631315, 46.51655, 48.095702], "country": [ "UK", "UK", "UK", "UK", "UK", "UK", "UK", "UK", "UK", "UK", "Germany", "Germany", "Germany", "Germany", "Germany", "Germany", "Germany", "Germany", "Germany", "Germany", "Germany", "France", "France", "France", "France", "France", "France", "France", "France", "France", "France"] }, - "map_join": ["Country", "country"], + "map_join": [["Country"], ["country"]], "map_data_meta": {"geodict": {}}, "alpha": 0.3 } diff --git a/plot-demo-common/src/commonMain/kotlin/jetbrains/datalore/plotDemo/model/plotConfig/Polygons.kt b/plot-demo-common/src/commonMain/kotlin/jetbrains/datalore/plotDemo/model/plotConfig/Polygons.kt index a3e24e16b6c..fb791f65f8f 100644 --- a/plot-demo-common/src/commonMain/kotlin/jetbrains/datalore/plotDemo/model/plotConfig/Polygons.kt +++ b/plot-demo-common/src/commonMain/kotlin/jetbrains/datalore/plotDemo/model/plotConfig/Polygons.kt @@ -12,7 +12,8 @@ import jetbrains.datalore.plotDemo.model.SharedPieces open class Polygons : PlotConfigDemoBase() { fun plotSpecList(): List> { return listOf( - basic() + basic(), + join() ) } @@ -41,5 +42,208 @@ open class Polygons : PlotConfigDemoBase() { plotSpec["data"] = SharedPieces.samplePolygons() return plotSpec } + + fun join(): Map { + + + val spec = """ +{ + "data": null, + "mapping": { + "x": null, + "y": null + }, + "data_meta": {}, + "ggsize": { + "width": 800, + "height": 400 + }, + "theme": { + "axis_title": "blank", + "axis_title_x": null, + "axis_title_y": null, + "axis_text": "blank", + "axis_text_x": null, + "axis_text_y": null, + "axis_ticks": "blank", + "axis_ticks_x": null, + "axis_ticks_y": null, + "axis_line": "blank", + "axis_line_x": null, + "axis_line_y": null, + "legend_position": null, + "legend_justification": null, + "legend_direction": null, + "axis_tooltip": null, + "axis_tooltip_x": null, + "axis_tooltip_y": null + }, + "kind": "plot", + "scales": [ + { + "name": "Average t[C\u00b0]", + "aesthetic": "fill", + "breaks": null, + "labels": null, + "limits": null, + "expand": null, + "na_value": null, + "guide": null, + "trans": null, + "low": "light_blue", + "high": "dark_green", + "scale_mapper_kind": "color_gradient" + } + ], + "layers": [ + { + "geom": "rect", + "stat": null, + "data": { + "region": [ + "Europe", + "Asia", + "North America", + "Africa", + "Australia", + "Oceania" + ], + "avg_temp": [ + 8.6, + 16.6, + 11.7, + 21.9, + 14.9, + 23.9 + ] + }, + "mapping": { + "x": null, + "y": null, + "fill": "avg_temp" + }, + "position": null, + "show_legend": null, + "sampling": null, + "tooltips": { + "tooltip_formats": [], + "tooltip_lines": [ + "^fill C\u00b0" + ], + "tooltip_anchor": null, + "tooltip_min_width": null + }, + "data_meta": {}, + "map_data_meta": { + "geodataframe": { + "geometry": "geometry" + } + }, + "map": { + "pop_est": [ + 44293293, + 17789267, + 2931, + 3360148, + 207353391, + 11138234, + 31036656, + 47698524, + 31304016, + 737718, + 591919, + 16290913, + 6943739 + ], + "continent": [ + "South America", + "South America", + "South America", + "South America", + "South America", + "South America", + "South America", + "South America", + "South America", + "South America", + "South America", + "South America", + "South America" + ], + "name": [ + "Argentina", + "Chile", + "Falkland Is.", + "Uruguay", + "Brazil", + "Bolivia", + "Peru", + "Colombia", + "Venezuela", + "Guyana", + "Suriname", + "Ecuador", + "Paraguay" + ], + "iso_a3": [ + "ARG", + "CHL", + "FLK", + "URY", + "BRA", + "BOL", + "PER", + "COL", + "VEN", + "GUY", + "SUR", + "ECU", + "PRY" + ], + "gdp_md_est": [ + 879400.0, + 436100.0, + 281.8, + 73250.0, + 3081000.0, + 78350.0, + 410400.0, + 688000.0, + 468600.0, + 6093.0, + 8547.0, + 182400.0, + 64670.0 + ], + "geometry": [ + "{\"type\": \"MultiPolygon\", \"coordinates\": [[[[-68.63401022758323, -52.63637045887449], [-68.25, -53.1], [-67.75, -53.85], [-66.45, -54.45], [-65.05, -54.699999999999996], [-65.5, -55.2], [-66.45, -55.25], [-66.95992000000001, -54.896810000000016], [-67.56244, -54.87001], [-68.63335000000001, -54.869499999999995], [-68.63401022758323, -52.63637045887449]]], [[[-57.62513342958296, -30.21629485445426], [-57.87493730328188, -31.016556084926208], [-58.14244035504076, -32.044503676076154], [-58.13264767112145, -33.040566908502015], [-58.349611172098875, -33.26318897881541], [-58.42707414410439, -33.909454441057576], [-58.49544206402655, -34.43148976007008], [-57.22582963726366, -35.28802662530788], [-57.36235877137878, -35.977390232081476], [-56.73748735210545, -36.41312590916655], [-56.78828528504836, -36.901571547189334], [-57.74915686708346, -38.18387053807989], [-59.23185706240189, -38.720220228837235], [-61.23744523786564, -38.9284245745412], [-62.33595699731013, -38.827707208004334], [-62.125763108962936, -39.42410491308485], [-62.330530971919494, -40.17258635840034], [-62.145994432205214, -40.67689666113672], [-62.745802781816984, -41.0287614886121], [-63.77049475773255, -41.16678923926369], [-64.73208980981973, -40.80267709733515], [-65.11803524439158, -41.06431487402891], [-64.97856055363582, -42.05800099056934], [-64.3034079657425, -42.35901620866951], [-63.75594784204239, -42.043686618824495], [-63.458059048095876, -42.563138116222405], [-64.37880388045633, -42.87355844499969], [-65.18180396183975, -43.495380954767796], [-65.32882341171013, -44.501366062193696], [-65.5652689276616, -45.036785577169795], [-66.50996578638934, -45.03962778094586], [-67.29379391139247, -45.55189625425519], [-67.58054643418008, -46.30177296324257], [-66.59706641301729, -47.033924655953825], [-65.64102657740149, -47.23613453551193], [-65.98508826360079, -48.133289076531135], [-67.16617896184769, -48.697337334996945], [-67.81608761256643, -49.86966887797038], [-68.72874508327321, -50.26421843851883], [-69.13853919134777, -50.732510267947795], [-68.81556148952356, -51.771104011594126], [-68.14999487982038, -52.34998340612768], [-68.57154537624133, -52.299443855346226], [-69.49836218939609, -52.14276091263727], [-71.91480383979638, -52.0090223058659], [-72.32940385607407, -51.42595631287243], [-72.30997351753234, -50.67700977966632], [-72.97574683296469, -50.741450290734285], [-73.32805091011453, -50.378785088909915], [-73.4154357571201, -49.31843637471297], [-72.64824744331494, -48.87861825947683], [-72.33116085477201, -48.2442383766618], [-72.44735531278027, -47.73853281025352], [-71.91725847033024, -46.88483814879177], [-71.55200944689128, -45.5607329241771], [-71.65931555854536, -44.973688653341426], [-71.22277889675976, -44.784242852559416], [-71.32980078803622, -44.407521661151655], [-71.79362260607193, -44.207172133156064], [-71.46405615913051, -43.787611179378345], [-71.91542395698389, -43.40856454851745], [-72.14889807807856, -42.254888197601375], [-71.7468037584155, -42.05138640723598], [-71.91573401557763, -40.83233936947069], [-71.68076127794649, -39.808164157878046], [-71.41351660834906, -38.91602223079114], [-70.81466427273469, -38.55299529394074], [-71.11862504747549, -37.57682748794724], [-71.12188066270987, -36.65812387466232], [-70.36476925320164, -36.00508879978992], [-70.38804948594913, -35.16968759535949], [-69.81730912950152, -34.1935714657983], [-69.81477698431922, -33.273886000299825], [-70.0743993801536, -33.09120981214805], [-70.53506893581951, -31.36501026787031], [-69.91900834825194, -30.33633920666828], [-70.01355038112992, -29.367922865518572], [-69.65613033718317, -28.459141127233686], [-69.00123491074825, -27.52121388113618], [-68.29554155137043, -26.89933969493578], [-68.59479977077268, -26.506908868111296], [-68.38600114609736, -26.185016371365215], [-68.41765296087614, -24.51855478281688], [-67.32844295924417, -24.02530323659095], [-66.9852339341777, -22.98634856536284], [-67.1066735500636, -22.735924574476417], [-66.27333940292485, -21.83231047942072], [-64.96489213729461, -22.075861504812327], [-64.37702104354226, -22.79809132252354], [-63.986838141522476, -21.99364430103595], [-62.84646847192156, -22.03498544686945], [-62.685057135657885, -22.249029229422387], [-60.846564704009914, -23.880712579038292], [-60.02896603050403, -24.032796319273274], [-58.80712846539498, -24.77145924245331], [-57.77721716981794, -25.16233977630904], [-57.63366004091113, -25.60365650808164], [-58.61817359071975, -27.123718763947096], [-57.60975969097614, -27.395898532828387], [-56.486701626192996, -27.548499037386293], [-55.69584550639816, -27.387837009390864], [-54.78879492859505, -26.621785577096134], [-54.625290696823576, -25.739255466415514], [-54.13004960795439, -25.547639255477254], [-53.628348965048744, -26.124865004177472], [-53.64873531758789, -26.92347258881609], [-54.490725267135524, -27.47475676850579], [-55.16228634298457, -27.881915378533463], [-56.29089962423908, -28.852760512000895], [-57.62513342958296, -30.21629485445426]]]]}", + "{\"type\": \"MultiPolygon\", \"coordinates\": [[[[-68.63401022758323, -52.63637045887449], [-68.63335000000001, -54.869499999999995], [-67.56244, -54.87001], [-66.95992000000001, -54.896810000000016], [-67.29102999999992, -55.30123999999995], [-68.14862999999991, -55.61183], [-68.63999081081187, -55.58001799908692], [-69.2321, -55.49905999999993], [-69.95808999999997, -55.19843000000003], [-71.00567999999998, -55.053830000000005], [-72.26390000000004, -54.49513999999999], [-73.28519999999997, -53.95751999999993], [-74.66253, -52.837489999999946], [-73.8381, -53.04743000000002], [-72.43417999999997, -53.71539999999999], [-71.10773, -54.07432999999992], [-70.59177999999986, -53.61582999999996], [-70.26747999999998, -52.93123000000003], [-69.34564999999992, -52.518299999999954], [-68.63401022758323, -52.63637045887449]]], [[[-69.59042375352405, -17.580011895419332], [-69.10024695501949, -18.260125420812678], [-68.96681840684187, -18.981683444904107], [-68.44222510443092, -19.40506845467143], [-68.75716712103375, -20.372657972904463], [-68.21991309271128, -21.494346612231865], [-67.82817989772273, -22.872918796482175], [-67.1066735500636, -22.735924574476417], [-66.9852339341777, -22.98634856536284], [-67.32844295924417, -24.02530323659095], [-68.41765296087614, -24.51855478281688], [-68.38600114609736, -26.185016371365215], [-68.59479977077268, -26.506908868111296], [-68.29554155137043, -26.89933969493578], [-69.00123491074825, -27.52121388113618], [-69.65613033718317, -28.459141127233686], [-70.01355038112992, -29.367922865518572], [-69.91900834825194, -30.33633920666828], [-70.53506893581951, -31.36501026787031], [-70.0743993801536, -33.09120981214805], [-69.81477698431922, -33.273886000299825], [-69.81730912950152, -34.1935714657983], [-70.38804948594913, -35.16968759535949], [-70.36476925320164, -36.00508879978992], [-71.12188066270987, -36.65812387466232], [-71.11862504747549, -37.57682748794724], [-70.81466427273469, -38.55299529394074], [-71.41351660834906, -38.91602223079114], [-71.68076127794649, -39.808164157878046], [-71.91573401557763, -40.83233936947069], [-71.7468037584155, -42.05138640723598], [-72.14889807807856, -42.254888197601375], [-71.91542395698389, -43.40856454851745], [-71.46405615913051, -43.787611179378345], [-71.79362260607193, -44.207172133156064], [-71.32980078803622, -44.407521661151655], [-71.22277889675976, -44.784242852559416], [-71.65931555854536, -44.973688653341426], [-71.55200944689128, -45.5607329241771], [-71.91725847033024, -46.88483814879177], [-72.44735531278027, -47.73853281025352], [-72.33116085477201, -48.2442383766618], [-72.64824744331494, -48.87861825947683], [-73.4154357571201, -49.31843637471297], [-73.32805091011453, -50.378785088909915], [-72.97574683296469, -50.741450290734285], [-72.30997351753234, -50.67700977966632], [-72.32940385607407, -51.42595631287243], [-71.91480383979638, -52.0090223058659], [-69.49836218939609, -52.14276091263727], [-68.57154537624133, -52.299443855346226], [-69.46128434922667, -52.29195077266391], [-69.9427795071062, -52.53793059037322], [-70.8451016913546, -52.89920052852571], [-71.00633216010525, -53.83325204220132], [-71.429794684521, -53.85645476030037], [-72.55794287788488, -53.53141000118449], [-73.7027567206629, -52.835069268607235], [-73.7027567206629, -52.835070076051494], [-74.94676347522517, -52.262753588419], [-75.2600260077785, -51.62935475037325], [-74.97663245308988, -51.0433956846157], [-75.47975419788355, -50.37837167745158], [-75.60801510283198, -48.67377288187184], [-75.18276974150216, -47.7119194476232], [-74.1265809801047, -46.93925343199511], [-75.64439531116545, -46.64764332457207], [-74.69215369332312, -45.76397633238103], [-74.35170935738425, -44.10304412208794], [-73.24035600451522, -44.454960625995604], [-72.7178039211798, -42.38335580827898], [-73.38889990913822, -42.117532240569574], [-73.70133561877488, -43.365776462579774], [-74.33194312203261, -43.22495818458442], [-74.0179571194272, -41.79481292090683], [-73.67709937202999, -39.94221282324317], [-73.21759253609065, -39.25868865331856], [-73.50555945503712, -38.282882582351114], [-73.58806087919109, -37.15628468195598], [-73.1667170884993, -37.12378020604439], [-72.55313696968174, -35.50884002049106], [-71.86173214383263, -33.90909270603153], [-71.4384504869299, -32.41889942803078], [-71.66872066922247, -30.920644626592495], [-71.37008256700773, -30.09568206148503], [-71.48989437527645, -28.861442152625923], [-70.90512386746161, -27.640379734001247], [-70.72495398627599, -25.705924167587256], [-70.40396582709502, -23.628996677344574], [-70.09124589708074, -21.39331918710126], [-70.16441972520605, -19.756468194256165], [-70.37257239447771, -18.34797535570887], [-69.85844356960587, -18.092693780187012], [-69.59042375352405, -17.580011895419332]]]]}", + "{\"type\": \"Polygon\", \"coordinates\": [[[-61.2, -51.85], [-60.0, -51.25], [-59.15, -51.5], [-58.550000000000004, -51.10000000000001], [-57.75, -51.55], [-58.050000000000004, -51.900000000000006], [-59.400000000000006, -52.199999999999996], [-59.85000000000001, -51.85], [-60.7, -52.300000000000004], [-61.2, -51.85]]]}", + "{\"type\": \"Polygon\", \"coordinates\": [[[-57.62513342958296, -30.21629485445426], [-56.976025763564735, -30.109686374636127], [-55.97324459494094, -30.883075860316303], [-55.601510179249345, -30.853878676071393], [-54.57245154480512, -31.494511407193748], [-53.78795162618219, -32.047242526987624], [-53.209588995971544, -32.727666110974724], [-53.6505439927181, -33.20200408298183], [-53.373661668498244, -33.768377780900764], [-53.806425950726535, -34.39681487400223], [-54.93586605489773, -34.952646579733624], [-55.67408972840329, -34.75265878676407], [-56.21529700379607, -34.85983570733742], [-57.1396850246331, -34.430456231424245], [-57.81786068381551, -34.4625472958775], [-58.42707414410439, -33.909454441057576], [-58.349611172098875, -33.26318897881541], [-58.13264767112145, -33.040566908502015], [-58.14244035504076, -32.044503676076154], [-57.87493730328188, -31.016556084926208], [-57.62513342958296, -30.21629485445426]]]}", + "{\"type\": \"Polygon\", \"coordinates\": [[[-53.373661668498244, -33.768377780900764], [-53.6505439927181, -33.20200408298183], [-53.209588995971544, -32.727666110974724], [-53.78795162618219, -32.047242526987624], [-54.57245154480512, -31.494511407193748], [-55.601510179249345, -30.853878676071393], [-55.97324459494094, -30.883075860316303], [-56.976025763564735, -30.109686374636127], [-57.62513342958296, -30.21629485445426], [-56.29089962423908, -28.852760512000895], [-55.16228634298457, -27.881915378533463], [-54.490725267135524, -27.47475676850579], [-53.64873531758789, -26.92347258881609], [-53.628348965048744, -26.124865004177472], [-54.13004960795439, -25.547639255477254], [-54.625290696823576, -25.739255466415514], [-54.42894609233059, -25.162184747012166], [-54.29347632507745, -24.570799655863965], [-54.29295956075452, -24.02101409271073], [-54.65283423523513, -23.83957813893396], [-55.02790178080955, -24.00127369557523], [-55.40074723979542, -23.956935316668805], [-55.517639329639636, -23.571997572526637], [-55.610682745981144, -22.655619398694846], [-55.79795813660691, -22.356929620047822], [-56.47331743022939, -22.086300144135283], [-56.8815095689029, -22.28215382252148], [-57.937155727761294, -22.090175876557172], [-57.8706739976178, -20.73268767668195], [-58.166392381408045, -20.176700941653678], [-57.85380164247451, -19.96999521248619], [-57.949997321185826, -19.40000416430682], [-57.67600887717431, -18.96183969490403], [-57.49837114117099, -18.174187513911292], [-57.734558274961, -17.55246835700777], [-58.28080400250225, -17.271710300366017], [-58.38805843772404, -16.877109063385276], [-58.24121985536668, -16.299573256091293], [-60.158389655179036, -16.258283786690086], [-60.54296566429515, -15.093910414289596], [-60.251148851142936, -15.07721892665932], [-60.26432634137737, -14.645979099183641], [-60.45919816755003, -14.354007256734555], [-60.503304002511136, -13.775954685117659], [-61.08412126325565, -13.479383640194598], [-61.71320431176078, -13.489202162330052], [-62.127080857986385, -13.198780612849724], [-62.803060268796386, -13.000653171442686], [-63.19649878605057, -12.627032565972435], [-64.3163529120316, -12.461978041232193], [-65.40228146021303, -11.566270440317155], [-65.32189876978302, -10.895872084194679], [-65.44483700220539, -10.511451104375432], [-65.33843522811642, -9.761987806846392], [-66.6469083319628, -9.931331475466862], [-67.17380123561074, -10.306812432499612], [-68.04819230820539, -10.712059014532485], [-68.27125362819326, -11.01452117273682], [-68.78615759954948, -11.03638030359628], [-69.52967810736496, -10.951734307502194], [-70.0937522040469, -11.123971856331012], [-70.54868567572841, -11.009146823778465], [-70.48189388699117, -9.490118096558845], [-71.30241227892154, -10.079436130415374], [-72.18489071316985, -10.053597914269432], [-72.56303300646564, -9.520193780152717], [-73.22671342639016, -9.462212823121234], [-73.01538265653255, -9.032833347208062], [-73.57105933296707, -8.424446709835834], [-73.98723548042966, -7.523829847853065], [-73.7234014553635, -7.340998630404414], [-73.72448666044164, -6.91859547285064], [-73.1200274319236, -6.629930922068239], [-73.21971126981461, -6.089188734566078], [-72.9645072089412, -5.7412513159448935], [-72.89192765978726, -5.274561455916981], [-71.74840572781655, -4.593982842633011], [-70.92884334988358, -4.401591485210368], [-70.7947688463023, -4.251264743673303], [-69.89363521999663, -4.2981869441943275], [-69.44410193548961, -1.5562871232198177], [-69.42048580593223, -1.1226185034264091], [-69.5770653957766, -0.549991957200163], [-70.02065589057005, -0.18515634521953928], [-70.01556576198931, 0.5414142928042054], [-69.45239600287246, 0.7061587589506929], [-69.25243404811906, 0.6026508650700748], [-69.21863766140018, 0.9856765812174331], [-69.80459672715773, 1.0890811222334662], [-69.81697323269162, 1.7148052026396243], [-67.86856502955884, 1.6924551456733923], [-67.5378100246747, 2.03716278727633], [-67.2599975246736, 1.7199986840849562], [-67.0650481838525, 1.130112209473225], [-66.87632585312258, 1.253360500489336], [-66.32576514348496, 0.7244522159820121], [-65.54826738143757, 0.7892544620760303], [-65.35471330428837, 1.0952822941085003], [-64.61101192895987, 1.3287305769870417], [-64.19930579289051, 1.49285492594602], [-64.08308549666609, 1.9163691267940803], [-63.368788011311665, 2.200899562993129], [-63.42286739770512, 2.4110676131241746], [-64.2699991522658, 2.497005520025567], [-64.40882788761792, 3.126786200366624], [-64.3684944322141, 3.797210394705246], [-64.81606401229402, 4.056445217297423], [-64.62865943058755, 4.14848094320925], [-63.88834286157416, 4.020530096854571], [-63.093197597899106, 3.7705711938587854], [-62.804533047116706, 4.006965033377952], [-62.08542965355913, 4.162123521334308], [-60.96689327660154, 4.536467596856639], [-60.601179165271944, 4.91809804933213], [-60.73357418480372, 5.200277207861901], [-60.21368343773133, 5.244486395687602], [-59.980958624904886, 5.014061184098139], [-60.11100236676738, 4.574966538914083], [-59.767405768458715, 4.423502915866607], [-59.53803992373123, 3.9588025984819377], [-59.815413174057866, 3.6064985213320853], [-59.97452490908456, 2.755232652188056], [-59.71854570172675, 2.2496304386443597], [-59.64604366722126, 1.786893825686789], [-59.03086157900265, 1.3176976586927225], [-58.540012986878295, 1.2680882836925207], [-58.429477098205965, 1.4639419620787208], [-58.11344987652502, 1.5071951359070253], [-57.66097103537737, 1.6825849471056387], [-57.335822923396904, 1.9485377058957594], [-56.78270423036083, 1.8637108422886541], [-56.539385748914555, 1.8995226098669207], [-55.995698004771754, 1.8176671411166012], [-55.905600145070885, 2.0219957543986595], [-56.0733418442903, 2.2207949894254995], [-55.973322109589375, 2.510363877773017], [-55.569755011606, 2.4215062524471307], [-55.09758744975514, 2.5237480737366127], [-54.524754197799716, 2.3118488631237852], [-54.08806250671725, 2.105556545414629], [-53.77852067728892, 2.3767027856500818], [-53.554839240113544, 2.334896551925951], [-53.41846513529531, 2.0533891870159806], [-52.939657151894956, 2.1248576928756364], [-52.55642473001842, 2.504705308437053], [-52.249337531123956, 3.241094468596245], [-51.65779741067889, 4.156232408053029], [-51.31714636901086, 4.203490505383954], [-51.069771287629656, 3.650397650564031], [-50.508875291533656, 1.901563828942457], [-49.97407589374506, 1.736483465986069], [-49.94710079608871, 1.0461896834312228], [-50.699251268096916, 0.22298411702168153], [-50.38821082213214, -0.07844451253681939], [-48.62056677915632, -0.2354891902718208], [-48.58449662941659, -1.2378052710050014], [-47.824956427590635, -0.5816179337628], [-46.566583624851226, -0.941027520352776], [-44.905703090990414, -1.551739597178134], [-44.417619187993665, -2.137750339367976], [-44.58158850765578, -2.691308282078524], [-43.418791266440195, -2.383110039889793], [-41.47265682632825, -2.9120183243971165], [-39.97866533055404, -2.873054294449041], [-38.50038347019657, -3.7006523576033956], [-37.2232521225352, -4.820945733258917], [-36.45293738457639, -5.109403578312154], [-35.59779578301047, -5.149504489770649], [-35.23538896334756, -5.464937432480247], [-34.89602983248683, -6.738193047719711], [-34.729993455533034, -7.343220716992967], [-35.12821204277422, -8.996401462442286], [-35.636966518687714, -9.649281508017815], [-37.046518724097, -11.040721123908803], [-37.68361161960736, -12.171194756725823], [-38.42387651218844, -13.038118584854288], [-38.67388709161652, -13.057652276260619], [-38.953275722802545, -13.793369642800023], [-38.88229814304965, -15.667053724838768], [-39.16109249526431, -17.208406670808472], [-39.2673392400564, -17.867746270420483], [-39.58352149103423, -18.262295830968938], [-39.76082333022764, -19.59911345792741], [-40.77474077001034, -20.904511814052423], [-40.94475623225061, -21.93731698983781], [-41.754164191238225, -22.370675551037458], [-41.98828426773656, -22.970070489190896], [-43.07470374202475, -22.96769337330547], [-44.64781185563781, -23.351959323827842], [-45.35213578955992, -23.796841729428582], [-46.47209326840554, -24.088968601174543], [-47.64897233742066, -24.885199069927722], [-48.4954581365777, -25.877024834905654], [-48.64100480812774, -26.623697605090932], [-48.474735887228654, -27.17591196056189], [-48.661520351747626, -28.18613453543572], [-48.8884574041574, -28.674115085567884], [-49.587329474472675, -29.224469089476337], [-50.696874152211485, -30.98446502047296], [-51.576226162306156, -31.77769825615321], [-52.256081305538046, -32.24536996839467], [-52.712099982297694, -33.19657805759118], [-53.373661668498244, -33.768377780900764]]]}", + "{\"type\": \"Polygon\", \"coordinates\": [[[-69.52967810736496, -10.951734307502194], [-68.78615759954948, -11.03638030359628], [-68.27125362819326, -11.01452117273682], [-68.04819230820539, -10.712059014532485], [-67.17380123561074, -10.306812432499612], [-66.6469083319628, -9.931331475466862], [-65.33843522811642, -9.761987806846392], [-65.44483700220539, -10.511451104375432], [-65.32189876978302, -10.895872084194679], [-65.40228146021303, -11.566270440317155], [-64.3163529120316, -12.461978041232193], [-63.19649878605057, -12.627032565972435], [-62.803060268796386, -13.000653171442686], [-62.127080857986385, -13.198780612849724], [-61.71320431176078, -13.489202162330052], [-61.08412126325565, -13.479383640194598], [-60.503304002511136, -13.775954685117659], [-60.45919816755003, -14.354007256734555], [-60.26432634137737, -14.645979099183641], [-60.251148851142936, -15.07721892665932], [-60.54296566429515, -15.093910414289596], [-60.158389655179036, -16.258283786690086], [-58.24121985536668, -16.299573256091293], [-58.38805843772404, -16.877109063385276], [-58.28080400250225, -17.271710300366017], [-57.734558274961, -17.55246835700777], [-57.49837114117099, -18.174187513911292], [-57.67600887717431, -18.96183969490403], [-57.949997321185826, -19.40000416430682], [-57.85380164247451, -19.96999521248619], [-58.166392381408045, -20.176700941653678], [-58.183471442280506, -19.868399346600363], [-59.11504248720611, -19.3569060197754], [-60.04356462262649, -19.342746677327426], [-61.78632646345377, -19.633736667562964], [-62.2659612697708, -20.513734633061276], [-62.291179368729225, -21.051634616787393], [-62.685057135657885, -22.249029229422387], [-62.84646847192156, -22.03498544686945], [-63.986838141522476, -21.99364430103595], [-64.37702104354226, -22.79809132252354], [-64.96489213729461, -22.075861504812327], [-66.27333940292485, -21.83231047942072], [-67.1066735500636, -22.735924574476417], [-67.82817989772273, -22.872918796482175], [-68.21991309271128, -21.494346612231865], [-68.75716712103375, -20.372657972904463], [-68.44222510443092, -19.40506845467143], [-68.96681840684187, -18.981683444904107], [-69.10024695501949, -18.260125420812678], [-69.59042375352405, -17.580011895419332], [-68.9596353827533, -16.50069793057127], [-69.38976416693471, -15.660129082911652], [-69.16034664577495, -15.323973890853019], [-69.33953467474701, -14.953195489158832], [-68.9488866848366, -14.453639418193283], [-68.92922380234954, -13.602683607643009], [-68.88007951523997, -12.899729099176653], [-68.66507971868963, -12.561300144097173], [-69.52967810736496, -10.951734307502194]]]}", + "{\"type\": \"Polygon\", \"coordinates\": [[[-69.89363521999663, -4.2981869441943275], [-70.7947688463023, -4.251264743673303], [-70.92884334988358, -4.401591485210368], [-71.74840572781655, -4.593982842633011], [-72.89192765978726, -5.274561455916981], [-72.9645072089412, -5.7412513159448935], [-73.21971126981461, -6.089188734566078], [-73.1200274319236, -6.629930922068239], [-73.72448666044164, -6.91859547285064], [-73.7234014553635, -7.340998630404414], [-73.98723548042966, -7.523829847853065], [-73.57105933296707, -8.424446709835834], [-73.01538265653255, -9.032833347208062], [-73.22671342639016, -9.462212823121234], [-72.56303300646564, -9.520193780152717], [-72.18489071316985, -10.053597914269432], [-71.30241227892154, -10.079436130415374], [-70.48189388699117, -9.490118096558845], [-70.54868567572841, -11.009146823778465], [-70.0937522040469, -11.123971856331012], [-69.52967810736496, -10.951734307502194], [-68.66507971868963, -12.561300144097173], [-68.88007951523997, -12.899729099176653], [-68.92922380234954, -13.602683607643009], [-68.9488866848366, -14.453639418193283], [-69.33953467474701, -14.953195489158832], [-69.16034664577495, -15.323973890853019], [-69.38976416693471, -15.660129082911652], [-68.9596353827533, -16.50069793057127], [-69.59042375352405, -17.580011895419332], [-69.85844356960587, -18.092693780187012], [-70.37257239447771, -18.34797535570887], [-71.37525021023693, -17.773798516513857], [-71.46204077827113, -17.363487644116383], [-73.44452958850042, -16.359362888252996], [-75.23788265654144, -15.265682875227782], [-76.00920508492995, -14.649286390850321], [-76.42346920439775, -13.823186944232432], [-76.25924150257417, -13.535039157772943], [-77.10619238962184, -12.22271615972082], [-78.09215287953464, -10.377712497604065], [-79.03695309112695, -8.386567884965892], [-79.44592037628485, -7.93083342858386], [-79.76057817251005, -7.194340915560084], [-80.53748165558608, -6.541667575713717], [-81.24999630402642, -6.136834405139183], [-80.92634680858244, -5.690556735866565], [-81.41094255239946, -4.7367648250554595], [-81.09966956248937, -4.036394138203697], [-80.30256059438722, -3.4048564591647126], [-80.18401485870967, -3.8211617977080437], [-80.46929460317695, -4.0592867977089995], [-80.44224199087216, -4.425724379090674], [-80.02890804718561, -4.3460909969288934], [-79.62497921417618, -4.454198093283495], [-79.20528906931773, -4.959128513207389], [-78.63989722361234, -4.547784112164074], [-78.45068396677564, -3.873096612161376], [-77.83790483265861, -3.003020521663103], [-76.63539425322672, -2.6086776668438176], [-75.54499569365204, -1.5616097957458803], [-75.23372270374195, -0.9114169246495294], [-75.37322323271385, -0.1520317521204504], [-75.10662451852008, -0.05720549886486026], [-74.44160051135597, -0.5308200008198867], [-74.12239518908906, -1.002832533373848], [-73.6595035468346, -1.2604912247811342], [-73.07039221870724, -2.3089543595509525], [-72.32578650581365, -2.434218031426454], [-71.7747607082854, -2.169789727388938], [-71.41364579942979, -2.3428024227021282], [-70.81347571479196, -2.2568645158007428], [-70.04770850287485, -2.725156345229699], [-70.69268205430971, -3.742872002785859], [-70.39404395209499, -3.7665914852078255], [-69.89363521999663, -4.2981869441943275]]]}", + "{\"type\": \"Polygon\", \"coordinates\": [[[-66.87632585312258, 1.253360500489336], [-67.0650481838525, 1.130112209473225], [-67.2599975246736, 1.7199986840849562], [-67.5378100246747, 2.03716278727633], [-67.86856502955884, 1.6924551456733923], [-69.81697323269162, 1.7148052026396243], [-69.80459672715773, 1.0890811222334662], [-69.21863766140018, 0.9856765812174331], [-69.25243404811906, 0.6026508650700748], [-69.45239600287246, 0.7061587589506929], [-70.01556576198931, 0.5414142928042054], [-70.02065589057005, -0.18515634521953928], [-69.5770653957766, -0.549991957200163], [-69.42048580593223, -1.1226185034264091], [-69.44410193548961, -1.5562871232198177], [-69.89363521999663, -4.2981869441943275], [-70.39404395209499, -3.7665914852078255], [-70.69268205430971, -3.742872002785859], [-70.04770850287485, -2.725156345229699], [-70.81347571479196, -2.2568645158007428], [-71.41364579942979, -2.3428024227021282], [-71.7747607082854, -2.169789727388938], [-72.32578650581365, -2.434218031426454], [-73.07039221870724, -2.3089543595509525], [-73.6595035468346, -1.2604912247811342], [-74.12239518908906, -1.002832533373848], [-74.44160051135597, -0.5308200008198867], [-75.10662451852008, -0.05720549886486026], [-75.37322323271385, -0.1520317521204504], [-75.8014658271166, 0.08480133707320192], [-76.29231441924097, 0.4160472680641192], [-76.5763797675494, 0.256935533037435], [-77.4249843004304, 0.395686753741117], [-77.66861284047044, 0.8258930525709616], [-77.85506140817952, 0.8099250349927729], [-78.85525875518871, 1.380923773601822], [-78.99093522817104, 1.6913699405952514], [-78.61783138702371, 1.766404120283056], [-78.66211808949785, 2.2673554549204766], [-78.42761043975733, 2.629555568854215], [-77.93154252797149, 2.6966057397529255], [-77.51043128122501, 3.325016994638247], [-77.12768978545526, 3.8496361352653565], [-77.49627193877703, 4.087606105969428], [-77.3076012844794, 4.6679841170394525], [-77.53322058786573, 5.582811997902497], [-77.31881507028675, 5.84535411216136], [-77.47666073272228, 6.691116441266303], [-77.88157141794525, 7.223771267114785], [-77.7534138658614, 7.709839789252143], [-77.43110795765699, 7.638061224798734], [-77.24256649444008, 7.935278225125444], [-77.47472286651133, 8.524286200388218], [-77.35336076527386, 8.67050466555807], [-76.83667395700357, 8.638749497914716], [-76.08638383655786, 9.336820583529487], [-75.67460018584006, 9.443248195834599], [-75.66470414905618, 9.774003200718738], [-75.48042599150335, 10.618990383339309], [-74.90689510771199, 11.083044745320322], [-74.27675269234489, 11.102035834187587], [-74.1972226630477, 11.310472723836867], [-73.41476396350029, 11.22701528568548], [-72.62783525255963, 11.731971543825523], [-72.23819495307892, 11.955549628136326], [-71.75409013536864, 12.437303168177309], [-71.3998223537917, 12.376040757695293], [-71.13746110704588, 12.112981879113505], [-71.3315836249503, 11.776284084515808], [-71.97392167833829, 11.60867157637712], [-72.22757544624294, 11.10870209395324], [-72.61465776232521, 10.821975409381778], [-72.9052860175347, 10.450344346554772], [-73.02760413276957, 9.736770331252444], [-73.30495154488005, 9.151999823437606], [-72.7887298245004, 9.085027167187334], [-72.6604947577681, 8.625287787302682], [-72.43986223009796, 8.405275376820029], [-72.36090064155597, 8.002638454617895], [-72.47967892117885, 7.632506008327354], [-72.44448727078807, 7.423784898300482], [-72.19835242378188, 7.340430813013683], [-71.96017574734864, 6.991614895043539], [-70.67423356798152, 7.087784735538719], [-70.09331295437242, 6.96037649172311], [-69.38947994655712, 6.0998605411988365], [-68.98531856960236, 6.206804917826858], [-68.26505245631823, 6.153268133972475], [-67.69508724635502, 6.267318020040647], [-67.34143958196557, 6.095468044454023], [-67.52153194850275, 5.556870428891969], [-67.74469662135522, 5.221128648291668], [-67.82301225449355, 4.503937282728899], [-67.62183590358129, 3.8394817163199946], [-67.33756384954368, 3.5423422306417223], [-67.30317318385345, 3.31845408773718], [-67.8099381171237, 2.820655015469569], [-67.44709204778631, 2.6002808699608693], [-67.18129431829307, 2.250638129074062], [-66.87632585312258, 1.253360500489336]]]}", + "{\"type\": \"Polygon\", \"coordinates\": [[[-60.73357418480372, 5.200277207861901], [-60.601179165271944, 4.91809804933213], [-60.96689327660154, 4.536467596856639], [-62.08542965355913, 4.162123521334308], [-62.804533047116706, 4.006965033377952], [-63.093197597899106, 3.7705711938587854], [-63.88834286157416, 4.020530096854571], [-64.62865943058755, 4.14848094320925], [-64.81606401229402, 4.056445217297423], [-64.3684944322141, 3.797210394705246], [-64.40882788761792, 3.126786200366624], [-64.2699991522658, 2.497005520025567], [-63.42286739770512, 2.4110676131241746], [-63.368788011311665, 2.200899562993129], [-64.08308549666609, 1.9163691267940803], [-64.19930579289051, 1.49285492594602], [-64.61101192895987, 1.3287305769870417], [-65.35471330428837, 1.0952822941085003], [-65.54826738143757, 0.7892544620760303], [-66.32576514348496, 0.7244522159820121], [-66.87632585312258, 1.253360500489336], [-67.18129431829307, 2.250638129074062], [-67.44709204778631, 2.6002808699608693], [-67.8099381171237, 2.820655015469569], [-67.30317318385345, 3.31845408773718], [-67.33756384954368, 3.5423422306417223], [-67.62183590358129, 3.8394817163199946], [-67.82301225449355, 4.503937282728899], [-67.74469662135522, 5.221128648291668], [-67.52153194850275, 5.556870428891969], [-67.34143958196557, 6.095468044454023], [-67.69508724635502, 6.267318020040647], [-68.26505245631823, 6.153268133972475], [-68.98531856960236, 6.206804917826858], [-69.38947994655712, 6.0998605411988365], [-70.09331295437242, 6.96037649172311], [-70.67423356798152, 7.087784735538719], [-71.96017574734864, 6.991614895043539], [-72.19835242378188, 7.340430813013683], [-72.44448727078807, 7.423784898300482], [-72.47967892117885, 7.632506008327354], [-72.36090064155597, 8.002638454617895], [-72.43986223009796, 8.405275376820029], [-72.6604947577681, 8.625287787302682], [-72.7887298245004, 9.085027167187334], [-73.30495154488005, 9.151999823437606], [-73.02760413276957, 9.736770331252444], [-72.9052860175347, 10.450344346554772], [-72.61465776232521, 10.821975409381778], [-72.22757544624294, 11.10870209395324], [-71.97392167833829, 11.60867157637712], [-71.3315836249503, 11.776284084515808], [-71.36000566271082, 11.539993597861212], [-71.94704993354651, 11.423282375530022], [-71.62086829292019, 10.969459947142795], [-71.63306393094109, 10.446494452349029], [-72.07417395698451, 9.865651353388373], [-71.69564409044654, 9.072263088411248], [-71.26455929226773, 9.137194525585983], [-71.03999935574339, 9.859992784052409], [-71.35008378771079, 10.211935126176215], [-71.40062333849224, 10.968969021036015], [-70.15529883490652, 11.37548167566004], [-70.29384334988103, 11.846822414594214], [-69.94324459499683, 12.162307033736099], [-69.58430009629747, 11.459610907431212], [-68.88299923366445, 11.443384507691563], [-68.23327145045873, 10.885744126829946], [-68.19412655299763, 10.554653225135922], [-67.29624854192633, 10.54586823164631], [-66.227864142508, 10.648626817258688], [-65.65523759628175, 10.200798855017323], [-64.89045223657817, 10.0772146671913], [-64.32947872583374, 10.38959870039568], [-64.31800655786495, 10.64141795495398], [-63.07932247582873, 10.7017243514386], [-61.880946010980196, 10.715625311725104], [-62.73011898461641, 10.420268662960906], [-62.388511928950976, 9.94820445397464], [-61.58876746280194, 9.873066921422264], [-60.83059668643172, 9.381339829948942], [-60.67125240745973, 8.580174261911878], [-60.15009558779618, 8.602756862823426], [-59.758284878159195, 8.367034816924047], [-60.5505879380582, 7.779602972846178], [-60.637972785063766, 7.4149999048108555], [-60.2956680975624, 7.043911444522919], [-60.54399919294099, 6.856584377464883], [-61.15933631045648, 6.696077378766319], [-61.13941504580795, 6.234296779806144], [-61.410302903881956, 5.959068101419618], [-60.73357418480372, 5.200277207861901]]]}", + "{\"type\": \"Polygon\", \"coordinates\": [[[-56.539385748914555, 1.8995226098669207], [-56.78270423036083, 1.8637108422886541], [-57.335822923396904, 1.9485377058957594], [-57.66097103537737, 1.6825849471056387], [-58.11344987652502, 1.5071951359070253], [-58.429477098205965, 1.4639419620787208], [-58.540012986878295, 1.2680882836925207], [-59.03086157900265, 1.3176976586927225], [-59.64604366722126, 1.786893825686789], [-59.71854570172675, 2.2496304386443597], [-59.97452490908456, 2.755232652188056], [-59.815413174057866, 3.6064985213320853], [-59.53803992373123, 3.9588025984819377], [-59.767405768458715, 4.423502915866607], [-60.11100236676738, 4.574966538914083], [-59.980958624904886, 5.014061184098139], [-60.21368343773133, 5.244486395687602], [-60.73357418480372, 5.200277207861901], [-61.410302903881956, 5.959068101419618], [-61.13941504580795, 6.234296779806144], [-61.15933631045648, 6.696077378766319], [-60.54399919294099, 6.856584377464883], [-60.2956680975624, 7.043911444522919], [-60.637972785063766, 7.4149999048108555], [-60.5505879380582, 7.779602972846178], [-59.758284878159195, 8.367034816924047], [-59.10168412945866, 7.999201971870492], [-58.48296220562806, 7.347691351750697], [-58.45487606467742, 6.832787380394464], [-58.078103196837375, 6.809093736188643], [-57.542218593970645, 6.321268215353356], [-57.14743648947689, 5.973149929219161], [-57.307245856339506, 5.073566595882227], [-57.91428890647214, 4.812626451024414], [-57.8602095200787, 4.57680105226045], [-58.04469438336068, 4.0608635522583825], [-57.60156897645787, 3.3346546492606848], [-57.28143347840971, 3.3334919295341194], [-57.15009782573991, 2.7689269067454063], [-56.539385748914555, 1.8995226098669207]]]}", + "{\"type\": \"Polygon\", \"coordinates\": [[[-54.524754197799716, 2.3118488631237852], [-55.09758744975514, 2.5237480737366127], [-55.569755011606, 2.4215062524471307], [-55.973322109589375, 2.510363877773017], [-56.0733418442903, 2.2207949894254995], [-55.905600145070885, 2.0219957543986595], [-55.995698004771754, 1.8176671411166012], [-56.539385748914555, 1.8995226098669207], [-57.15009782573991, 2.7689269067454063], [-57.28143347840971, 3.3334919295341194], [-57.60156897645787, 3.3346546492606848], [-58.04469438336068, 4.0608635522583825], [-57.8602095200787, 4.57680105226045], [-57.91428890647214, 4.812626451024414], [-57.307245856339506, 5.073566595882227], [-57.14743648947689, 5.973149929219161], [-55.9493184067898, 5.772877915872002], [-55.841779751190415, 5.95312531170606], [-55.033250291551774, 6.025291449401664], [-53.9580446030709, 5.756548163267765], [-54.47863298197923, 4.896755682795586], [-54.399542202356514, 4.212611395683467], [-54.00693050801901, 3.6200377465925584], [-54.181726040246275, 3.1897797713304215], [-54.2697051662232, 2.7323916691150463], [-54.524754197799716, 2.3118488631237852]]]}", + "{\"type\": \"Polygon\", \"coordinates\": [[[-75.37322323271385, -0.1520317521204504], [-75.23372270374195, -0.9114169246495294], [-75.54499569365204, -1.5616097957458803], [-76.63539425322672, -2.6086776668438176], [-77.83790483265861, -3.003020521663103], [-78.45068396677564, -3.873096612161376], [-78.63989722361234, -4.547784112164074], [-79.20528906931773, -4.959128513207389], [-79.62497921417618, -4.454198093283495], [-80.02890804718561, -4.3460909969288934], [-80.44224199087216, -4.425724379090674], [-80.46929460317695, -4.0592867977089995], [-80.18401485870967, -3.8211617977080437], [-80.30256059438722, -3.4048564591647126], [-79.77029334178093, -2.65751189535964], [-79.98655921092242, -2.220794366061014], [-80.36878394236925, -2.6851587866357884], [-80.96776546906436, -2.246942640800704], [-80.76480628123804, -1.9650477026485331], [-80.93365902375172, -1.057454522306358], [-80.58337032746127, -0.9066626928786832], [-80.39932471385376, -0.28370330160014134], [-80.02089820018037, 0.3603400740534682], [-80.09060970734211, 0.7684288598623965], [-79.5427620103998, 0.982937730305963], [-78.85525875518871, 1.380923773601822], [-77.85506140817952, 0.8099250349927729], [-77.66861284047044, 0.8258930525709616], [-77.4249843004304, 0.395686753741117], [-76.5763797675494, 0.256935533037435], [-76.29231441924097, 0.4160472680641192], [-75.8014658271166, 0.08480133707320192], [-75.37322323271385, -0.1520317521204504]]]}", + "{\"type\": \"Polygon\", \"coordinates\": [[[-58.166392381408045, -20.176700941653678], [-57.8706739976178, -20.73268767668195], [-57.937155727761294, -22.090175876557172], [-56.8815095689029, -22.28215382252148], [-56.47331743022939, -22.086300144135283], [-55.79795813660691, -22.356929620047822], [-55.610682745981144, -22.655619398694846], [-55.517639329639636, -23.571997572526637], [-55.40074723979542, -23.956935316668805], [-55.02790178080955, -24.00127369557523], [-54.65283423523513, -23.83957813893396], [-54.29295956075452, -24.02101409271073], [-54.29347632507745, -24.570799655863965], [-54.42894609233059, -25.162184747012166], [-54.625290696823576, -25.739255466415514], [-54.78879492859505, -26.621785577096134], [-55.69584550639816, -27.387837009390864], [-56.486701626192996, -27.548499037386293], [-57.60975969097614, -27.395898532828387], [-58.61817359071975, -27.123718763947096], [-57.63366004091113, -25.60365650808164], [-57.77721716981794, -25.16233977630904], [-58.80712846539498, -24.77145924245331], [-60.02896603050403, -24.032796319273274], [-60.846564704009914, -23.880712579038292], [-62.685057135657885, -22.249029229422387], [-62.291179368729225, -21.051634616787393], [-62.2659612697708, -20.513734633061276], [-61.78632646345377, -19.633736667562964], [-60.04356462262649, -19.342746677327426], [-59.11504248720611, -19.3569060197754], [-58.183471442280506, -19.868399346600363], [-58.166392381408045, -20.176700941653678]]]}" + ] + }, + "map_join": [ + [ + "region" + ], + [ + "continent" + ] + ], + "color": "white" + } + ] +}""" + return parsePlotSpec(spec) + } } } diff --git a/plot-livemap/src/commonMain/kotlin/jetbrains/datalore/plot/livemap/MultiDataPointHelper.kt b/plot-livemap/src/commonMain/kotlin/jetbrains/datalore/plot/livemap/MultiDataPointHelper.kt index d8e8bdc9aa6..2e84f3d7383 100644 --- a/plot-livemap/src/commonMain/kotlin/jetbrains/datalore/plot/livemap/MultiDataPointHelper.kt +++ b/plot-livemap/src/commonMain/kotlin/jetbrains/datalore/plot/livemap/MultiDataPointHelper.kt @@ -21,11 +21,13 @@ internal class MultiDataPointHelper private constructor( fun fetchBuilder(p: DataPointAesthetics): MultiDataPointBuilder { val coord = explicitVec(p.x()!!, p.y()!!) - return builders.getOrPut(coord, { MultiDataPointBuilder(p, sortingMode) }) + return builders.getOrPut(coord) { MultiDataPointBuilder(p, sortingMode) } } - aesthetics.dataPoints().forEach { fetchBuilder(it).add(it) } - return builders.values.map { it.build() } + aesthetics.dataPoints() + .filter { it.symY() != null } + .forEach { p -> fetchBuilder(p).add(p) } + return builders.values.map(MultiDataPointBuilder::build) } } @@ -59,7 +61,7 @@ internal class MultiDataPointHelper private constructor( return MultiDataPoint( aes = myAes, indices = myPoints.map { it.index() }, - values = myPoints.map { it.symY()!! }, + values = myPoints.map { it.symY()!! }, // symY can't be null - pre-filtered in function getPoints() colors = myPoints.map { it.fill()!! } ) } @@ -88,5 +90,4 @@ internal class MultiDataPointHelper private constructor( val values: List, val colors: List ) - -} \ No newline at end of file +} diff --git a/python-package/lets_plot/_global_settings.py b/python-package/lets_plot/_global_settings.py index aef7fa86e2f..fed0af5cdef 100644 --- a/python-package/lets_plot/_global_settings.py +++ b/python-package/lets_plot/_global_settings.py @@ -57,7 +57,7 @@ _DATALORE_TILES_SERVICE = 'wss://tiles.datalore.jetbrains.com' _DATALORE_TILES_ATTRIBUTION = 'Map: \u00a9 Lets-Plot, map data: \u00a9 OpenStreetMap contributors.' _DATALORE_TILES_THEME = 'color' -_DATALORE_GEOCODING_SERVICE = 'https://geo.datalore.jetbrains.com' +_DATALORE_GEOCODING_SERVICE = 'http://3.86.228.157:3025' def _init_value(actual_name: str, def_val: Any) -> Any: diff --git a/python-package/lets_plot/_type_utils.py b/python-package/lets_plot/_type_utils.py index c101647228b..e89ec5659fa 100644 --- a/python-package/lets_plot/_type_utils.py +++ b/python-package/lets_plot/_type_utils.py @@ -4,7 +4,6 @@ # import json import math -from abc import abstractmethod from datetime import datetime from typing import Dict @@ -75,21 +74,13 @@ def _standardize_value(v): if (numpy and isinstance(v, numpy.ndarray)) or (pandas and isinstance(v, pandas.Series)): return _standardize_value(v.tolist()) if isinstance(v, datetime): - if (pandas and v is pandas.NaT): + if pandas and v is pandas.NaT: return None else: return v.timestamp() * 1000 # convert from second to millisecond - if isinstance(v, CanToDataFrame): - return standardize_dict(v.to_data_frame()) - if (shapely and isinstance(v, shapely.geometry.base.BaseGeometry)): + if shapely and isinstance(v, shapely.geometry.base.BaseGeometry): return json.dumps(shapely.geometry.mapping(v)) try: return repr(v) except Exception: raise Exception('Unsupported type: {0}({1})'.format(v, type(v))) - - -class CanToDataFrame: - @abstractmethod - def to_data_frame(self): # -> pandas.DataFrame - pass diff --git a/python-package/lets_plot/export/simple.py b/python-package/lets_plot/export/simple.py index 21be3ecacfc..d3f683cc94c 100644 --- a/python-package/lets_plot/export/simple.py +++ b/python-package/lets_plot/export/simple.py @@ -5,7 +5,6 @@ from os.path import abspath from typing import Union -from .. import _kbridge as kbr from .._global_settings import is_production from .._version import __version__ from ..plot.core import PlotSpec @@ -32,6 +31,8 @@ def export_svg(plot: Union[PlotSpec, GGBunch], filename: str) -> str: if not (isinstance(plot, PlotSpec) or isinstance(plot, GGBunch)): raise ValueError("PlotSpec or GGBunch expected but was: {}".format(type(plot))) + from .. import _kbridge as kbr + svg = kbr._generate_svg(plot.as_dict()) with io.open(filename, mode="w", encoding="utf-8") as f: f.write(svg) @@ -62,6 +63,9 @@ def export_html(plot: Union[PlotSpec, GGBunch], filename: str, iframe: bool = Fa raise ValueError("PlotSpec or GGBunch expected but was: {}".format(type(plot))) version = __version__ if is_production() else "latest" + + from .. import _kbridge as kbr + html_page = kbr._generate_static_html_page(plot.as_dict(), version, iframe) with io.open(filename, mode="w", encoding="utf-8") as f: f.write(html_page) diff --git a/python-package/lets_plot/geo_data/__init__.py b/python-package/lets_plot/geo_data/__init__.py index 6083ce2777f..a57f5eef4dc 100644 --- a/python-package/lets_plot/geo_data/__init__.py +++ b/python-package/lets_plot/geo_data/__init__.py @@ -1,8 +1,9 @@ from .core import * from .map_geometry import * -from .regions import * +from .geocoder import * +from .geocodes import * -__all__ = (core.__all__ + map_geometry.__all__) +__all__ = (core.__all__ + map_geometry.__all__ + geocoder.__all__) # print on the package import print("The geodata is provided by © OpenStreetMap contributors" diff --git a/python-package/lets_plot/geo_data/core.py b/python-package/lets_plot/geo_data/core.py index ac4aa2cd5ea..3f7e12313e6 100644 --- a/python-package/lets_plot/geo_data/core.py +++ b/python-package/lets_plot/geo_data/core.py @@ -1,15 +1,8 @@ -from typing import Any, Union, List, Optional +from typing import Any import numpy as np -from pandas import Series -from .gis.geocoding_service import GeocodingService -from .gis.geometry import GeoPoint -from .gis.request import RequestBuilder, RequestKind -from .gis.response import Response, SuccessResponse -from .regions import Regions, _raise_exception, _to_level_kind, _to_scope -from .regions_builder import RegionsBuilder -from .type_assertion import assert_list_type +from .geocoder import Geocoder __all__ = [ 'distance', @@ -35,43 +28,14 @@ } -def _to_coords(lon: Optional[Union[float, Series, List[float]]], lat: Optional[Union[float, Series, List[float]]]) -> List[GeoPoint]: - if type(lon) != type(lat): - raise ValueError('lon and lat have different types') +def regions_xy(lon, lat, level, within=None) -> Geocoder: + raise ValueError('Function `regions_xy(...)` is deprecated. Use new function `reverse_geocode(...)`.') - if isinstance(lon, float): - return [GeoPoint(lon, lat)] - if isinstance(lon, Series): - lon = lon.tolist() - lat = lat.tolist() - - if isinstance(lon, list): - assert_list_type(lon, float) - assert_list_type(lat, float) - return [GeoPoint(lo, la) for lo, la in zip(lon, lat)] - - -def regions_xy(lon, lat, level, within=None): - request = RequestBuilder() \ - .set_request_kind(RequestKind.reverse) \ - .set_reverse_coordinates(_to_coords(lon, lat)) \ - .set_level(_to_level_kind(level)) \ - .set_reverse_scope(_to_scope(within)) \ - .build() - - response: Response = GeocodingService().do_request(request) - - if not isinstance(response, SuccessResponse): - _raise_exception(response) - - return Regions(response.level, response.features, False) - - -def regions_builder(level=None, request=None, within=None, highlights=False) -> RegionsBuilder: +def regions_builder(level=None, request=None, within=None, highlights=False): """ Create a RegionBuilder class by level and request. Allows to refine ambiguous request with - where method. build() method creates Regions object or shows details for ambiguous result. + where method. build() method creates Geocoder object or shows details for ambiguous result. regions_builder(level, request, within) @@ -83,19 +47,19 @@ def regions_builder(level=None, request=None, within=None, highlights=False) -> Data can be filtered by full names at any level (only exact matching). For 'state' level: -'US-48' returns continental part of United States (48 states) in a compact form. - within : [array | string | Regions | None] + within : [array | string | Geocoder | None] Data can be filtered by within name. If within is array then request and within will be merged positionally (size should be equal). - If within is Regions then request will be searched in any of these regions. + If within is Geocoder then request will be searched in any of these regions. 'US-48' includes continental part of United States (48 states). Returns ------- - RegionsBuilder object : + Geocoder object : Note ----- - regions_builder() allows to refine ambiguous request with where() method. Call build() method to create Regions object + regions_builder() allows to refine ambiguous request with where() method. Call build() method to create Geocoder object Examples --------- @@ -105,12 +69,13 @@ def regions_builder(level=None, request=None, within=None, highlights=False) -> >>> r = regions_builder(level='city', request=['moscow', 'york']).where('york', regions_state('New York')).build() >>> r """ - return RegionsBuilder(level, request, within, highlights) + raise ValueError('Function `regions_builder(...)` is deprecated. Use new function `geocode(...)`.') + #return Geocoder(level, request, within, highlights) -def regions(level=None, request=None, within=None) -> Regions: +def regions(level=None, request=None, within=None): """ - Create a Regions class by level and request. + Create a Geocoder class by level and request. regions(level, request, within) @@ -123,15 +88,15 @@ def regions(level=None, request=None, within=None) -> Regions: None with explicit level returns all corresponding regions, like all countries i.e. regions(level='country'). For 'state' level: -'US-48' returns continental part of United States (48 states) in a compact form. - within : [array | string | Regions| None] + within : [array | string | Geocoder| None] Data can be filtered by within name. If within is array then request and within will be merged positionally (size should be equal). - If within is Regions then request will be searched in any of these regions. + If within is Geocoder then request will be searched in any of these regions. 'US-48' includes continental part of United States (48 states). Returns ------- - Regions object : + Geocoder object : Note ----- @@ -146,12 +111,13 @@ def regions(level=None, request=None, within=None) -> Regions: >>> r = regions(level='country', request=['Germany', 'USA']) >>> r """ - return RegionsBuilder(level=level, request=request, scope=within).build() + raise ValueError('Function `regions(...)` is deprecated. Use new function `geocode(...)`.') + #return Geocoder(level=level, request=request, scope=within).build() def regions_country(request=None): """ - Create a Regions class for country level by request. + Create a Geocoder class for country level by request. regions_country(request) @@ -162,7 +128,7 @@ def regions_country(request=None): Returns ------- - Regions object : + Geocoder object : Note ----- @@ -178,12 +144,13 @@ def regions_country(request=None): >>> r_country = regions_country(request=['Germany', 'USA']) >>> r_country """ - return regions('country', request, None) + raise ValueError('Function `regions_country(...)` is deprecated. Use new function `geocode_countries(...)`.') + #return regions('country', request, None) def regions_state(request=None, within=None): """ - Create a Regions class for state level by request. + Create a Geocoder class for state level by request. regions_state(request, within) @@ -193,15 +160,15 @@ def regions_state(request=None, within=None): Data can be filtered by full names at any level (only exact matching). For 'state' level: -'US-48' returns continental part of United States (48 states) in a compact form. - within : [array | string | Regions| None] + within : [array | string | Geocoder| None] Data can be filtered by within name. If within is array then filter and within will be merged positionally (size should be equal). - If within is Regions then request will be searched in any of these regions. + If within is Geocoder then request will be searched in any of these regions. 'US-48' includes continental part of United States (48 states). Returns ------- - Regions object : + Geocoder object : Note ----- @@ -217,12 +184,13 @@ def regions_state(request=None, within=None): >>> r_state = regions_state(request=['Texas', 'Iowa'], within='USA') >>> r_state """ - return regions('state', request, within) + raise ValueError('Function `regions_state(...)` is deprecated. Use new function `geocode_states(...)`') + #return regions('state', request, within) def regions_county(request=None, within=None): """ - Create a Regions class for county level by request. + Create a Geocoder class for county level by request. regions_county(request, within) @@ -230,15 +198,15 @@ def regions_county(request=None, within=None): ---------- request : [array | string | None] Data can be filtered by full names at any level (only exact matching). - within : [array | string | Regions| None] + within : [array | string | Geocoder| None] Data can be filtered by within name. If within is array then request and within will be merged positionally (size should be equal). - If within is Regions then request will be searched in any of these regions. + If within is Geocoder then request will be searched in any of these regions. 'US-48' includes continental part of United States (48 states). Returns ------- - Regions object : + Geocoder object : Note ----- @@ -254,12 +222,13 @@ def regions_county(request=None, within=None): >>> r_county = regions_county(request=['Calhoun County', 'Howard County'], within='Texas') >>> r_county """ - return regions('county', request, within) + raise ValueError('Function `regions_county(...)` is deprecated. Use new function `geocode_counties(...)`') + #return regions('county', request, within) def regions_city(request=None, within=None): """ - Create a Regions class for city level by request. + Create a Geocoder class for city level by request. regions_city(request, within) @@ -267,15 +236,15 @@ def regions_city(request=None, within=None): ---------- request : [array | string | None] Data can be filtered by full names at any level (only exact matching). - within : [array | string | Regions| None] + within : [array | string | Geocoder| None] Data can be filtered by within name. If within is array then request and within will be merged positionally (size should be equal). - If within is Regions then request will be searched in any of these regions. + If within is Geocoder then request will be searched in any of these regions. 'US-48' includes continental part of United States (48 states). Returns ------- - Regions object : + Geocoder object : Note ----- @@ -291,7 +260,8 @@ def regions_city(request=None, within=None): >>> r_city = regions_city(request=['New York', 'Los Angeles']) >>> r_city """ - return regions('city', request, within) + raise ValueError('Function `regions_city(...)` is deprecated. Use new function `geocode_cities(...)`') + #return regions('city', request, within) def distance(lon0, lat0, lon1, lat1, units='km'): diff --git a/python-package/lets_plot/geo_data/geocoder.py b/python-package/lets_plot/geo_data/geocoder.py new file mode 100644 index 00000000000..aaf6dce831a --- /dev/null +++ b/python-package/lets_plot/geo_data/geocoder.py @@ -0,0 +1,646 @@ +# Copyright (c) 2020. JetBrains s.r.o. +# Use of this source code is governed by the MIT license that can be found in the LICENSE file. +from collections import namedtuple, Iterable +from typing import Union, List, Optional, Dict + +from pandas import Series + +from .geocodes import _to_level_kind, request_types, Geocodes, _raise_exception, _ensure_is_list +from .gis.geocoding_service import GeocodingService +from .gis.geometry import GeoRect, GeoPoint +from .gis.request import RequestBuilder, GeocodingRequest, RequestKind, MapRegion, AmbiguityResolver, \ + RegionQuery, LevelKind, IgnoringStrategyKind, PayloadKind, ReverseGeocodingRequest +from .gis.response import Response, SuccessResponse +from .type_assertion import assert_list_type + +__all__ = [ + 'geocode', + 'geocode_cities', + 'geocode_counties', + 'geocode_states', + 'geocode_countries', + 'reverse_geocode' +] + +NAMESAKE_MAX_COUNT = 10 + +ShapelyPointType = 'shapely.geometry.Point' +ShapelyPolygonType = 'shapely.geometry.Polygon' + +QuerySpec = namedtuple('QuerySpec', 'name, county, state, country') +WhereSpec = namedtuple('WhereSpec', 'scope, ambiguity_resolver') + +parent_types = Optional[Union[str, Geocodes, 'Geocoder', MapRegion, List]] # list of same types +scope_types = Optional[Union[str, Geocodes, 'Geocoder', ShapelyPolygonType]] + + +def _to_scope(location: scope_types) -> Optional[Union[List[MapRegion], MapRegion]]: + if location is None: + return None + + def _make_region(obj: Union[str, Geocodes]) -> Optional[MapRegion]: + if isinstance(obj, Geocodes): + return MapRegion.scope(obj.unique_ids()) + + if isinstance(obj, str): + return MapRegion.with_name(obj) + + raise ValueError('Unsupported scope type. Expected Geocoder, str or list, but was `{}`'.format(type(obj))) + + if isinstance(location, list): + return [_make_region(obj) for obj in location] + + return _make_region(location) + + +class LazyShapely: + @staticmethod + def is_point(p) -> bool: + if not LazyShapely._is_shapely_available(): + return False + + from shapely.geometry import Point + return isinstance(p, Point) + + @staticmethod + def is_polygon(p): + if not LazyShapely._is_shapely_available(): + return False + + from shapely.geometry import Polygon + return isinstance(p, Polygon) + + @staticmethod + def _is_shapely_available(): + try: + import shapely + return True + except: + return False + + +def _make_ambiguity_resolver(ignoring_strategy: Optional[IgnoringStrategyKind] = None, + scope: Optional[ShapelyPolygonType] = None, + closest_object: Optional[Union[Geocodes, ShapelyPointType]] = None): + if LazyShapely.is_polygon(scope): + rect = GeoRect(min_lon=scope.bounds[0], min_lat=scope.bounds[1], max_lon=scope.bounds[2], max_lat=scope.bounds[3]) + elif scope is None: + rect = None + else: + assert scope is not None # else for empty scope - existing scope should be already handled + raise ValueError('Wrong type of parameter `scope` - expected `shapely.geometry.Polygon`, but was `{}`'.format(type(scope).__name__)) + + return AmbiguityResolver( + ignoring_strategy=ignoring_strategy, + closest_coord=_to_geo_point(closest_object), + box=rect + ) + + +def _to_geo_point(closest_place: Optional[Union[Geocodes, ShapelyPointType]]) -> Optional[GeoPoint]: + if closest_place is None: + return None + + if isinstance(closest_place, Geocoder): + closest_place = closest_place._geocode() + + if isinstance(closest_place, Geocodes): + closest_place_id = closest_place.as_list()[0].unique_ids() + assert len(closest_place_id) == 1 + + request = RequestBuilder() \ + .set_request_kind(RequestKind.explicit) \ + .set_requested_payload([PayloadKind.centroids]) \ + .set_ids(closest_place_id) \ + .build() + + response: Response = GeocodingService().do_request(request) + if isinstance(response, SuccessResponse): + assert len(response.features) == 1 + centroid = response.features[0].centroid + return GeoPoint(lon=centroid.lon, lat=centroid.lat) + else: + raise ValueError("Unexpected geocoding response for id " + str(closest_place_id[0])) + + if LazyShapely.is_point(closest_place): + return GeoPoint(lon=closest_place.x, lat=closest_place.y) + + raise ValueError('Not supported type: {}'.format(type(closest_place))) + + +def _get_or_none(list, index): + if index >= len(list): + return None + return list[index] + + +def _ensure_is_parent_list(obj): + if obj is None: + return None + + if isinstance(obj, Geocoder): + obj = obj._geocode() + + if isinstance(obj, Geocodes): + return obj.as_list() + + if isinstance(obj, Iterable) and not isinstance(obj, str): + return [v for v in obj] + + return [obj] + + +def _make_parents(values: parent_types) -> List[Optional[MapRegion]]: + values = _ensure_is_parent_list(values) + + if values is None: + return [] + + return list(map(lambda v: _make_parent_region(v) if values is not None else None, values)) + + +def _make_parent_region(place: parent_types) -> Optional[MapRegion]: + if place is None: + return None + + if isinstance(place, Geocoder): + place = place._geocode() + + if isinstance(place, str): + return MapRegion.with_name(place) + + if isinstance(place, Geocodes): + assert len(place.to_map_regions()) == 1, 'Region object used as parent should contain only single record' + return place.to_map_regions()[0] + + raise ValueError('Unsupported parent type: ' + str(type(place))) + + +class Geocoder: + def get_limits(self) -> 'GeoDataFrame': + return self._geocode().limits() + + def get_centroids(self) -> 'GeoDataFrame': + return self._geocode().centroids() + + def get_boundaries(self, resolution=None) -> 'GeoDataFrame': + return self._geocode().boundaries(resolution) + + def get_geocodes(self) -> 'DataFrame': + return self._geocode().to_data_frame() + + def _geocode(self) -> Geocodes: + raise ValueError('Abstract method') + + +def _to_coords(lon: Optional[Union[float, Series, List[float]]], lat: Optional[Union[float, Series, List[float]]]) -> List[GeoPoint]: + if type(lon) != type(lat): + raise ValueError('lon and lat have different types') + + if isinstance(lon, float): + return [GeoPoint(lon, lat)] + + if isinstance(lon, Series): + lon = lon.tolist() + lat = lat.tolist() + + if isinstance(lon, list): + assert_list_type(lon, float) + assert_list_type(lat, float) + return [GeoPoint(lo, la) for lo, la in zip(lon, lat)] + + +class ReverseGeocoder(Geocoder): + def __init__(self, lon, lat, level: Optional[Union[str, LevelKind]], scope=None): + self._geocodes: Optional[Geocodes] = None + self._request: ReverseGeocodingRequest = RequestBuilder() \ + .set_request_kind(RequestKind.reverse) \ + .set_reverse_coordinates(_to_coords(lon, lat)) \ + .set_level(_to_level_kind(level)) \ + .set_reverse_scope(_to_scope(scope)) \ + .build() + + def _geocode(self) -> Geocodes: + if self._geocodes is None: + response: Response = GeocodingService().do_request(self._request) + if not isinstance(response, SuccessResponse): + _raise_exception(response) + self._geocodes = Geocodes( + response.level, + response.answers, + [RegionQuery(request='[{}, {}]'.format(pt.lon, pt.lat)) for pt in self._request.coordinates], + highlights=False + ) + + return self._geocodes + + +class NamesGeocoder(Geocoder): + def __init__(self, + level: Optional[Union[str, LevelKind]] = None, + request: request_types = None + ): + self._geocodes: Optional[Geocodes] = None + self._scope: List[Optional[MapRegion]] = [] + self._level: Optional[LevelKind] = _to_level_kind(level) + self._default_ambiguity_resolver: AmbiguityResolver = AmbiguityResolver.empty() # TODO rename to geohint + self._highlights: bool = False + self._allow_ambiguous = False + self._countries: List[Optional[MapRegion]] = [] + self._states: List[Optional[MapRegion]] = [] + self._counties: List[Optional[MapRegion]] = [] + self._overridings: Dict[QuerySpec, WhereSpec] = {} # query to scope + + requests: Optional[List[str]] = _ensure_is_list(request) + if requests is not None: + self._names: List[Optional[str]] = list(map(lambda name: name if requests is not None else None, requests)) + else: + self._names = [] + + def scope(self, scope: scope_types) -> 'NamesGeocoder': + self._reset_geocodes() + self._scope = _prepare_new_scope(scope) + return self + + def highlights(self, v: bool) -> 'NamesGeocoder': + self._highlights = v + return self + + def countries(self, countries: parent_types) -> 'NamesGeocoder': + self._reset_geocodes() + self._countries = _make_parents(countries) + return self + + def states(self, states: parent_types) -> 'NamesGeocoder': + self._reset_geocodes() + self._states = _make_parents(states) + return self + + def counties(self, counties: parent_types) -> 'NamesGeocoder': + self._reset_geocodes() + self._counties = _make_parents(counties) + return self + + def drop_not_found(self) -> 'NamesGeocoder': + self._reset_geocodes() + self._default_ambiguity_resolver = AmbiguityResolver(IgnoringStrategyKind.skip_missing) + return self + + def drop_not_matched(self) -> 'NamesGeocoder': + self._reset_geocodes() + self._default_ambiguity_resolver = AmbiguityResolver(IgnoringStrategyKind.skip_all) + return self + + def allow_ambiguous(self) -> 'NamesGeocoder': + self._reset_geocodes() + self._default_ambiguity_resolver = AmbiguityResolver(IgnoringStrategyKind.take_namesakes) + self._allow_ambiguous = True + return self + + def where(self, name: str, + county: Optional[parent_types] = None, + state: Optional[parent_types] = None, + country: Optional[parent_types] = None, + scope: scope_types = None, + closest_to: Optional[Union[Geocodes, ShapelyPointType]] = None + ) -> 'NamesGeocoder': + """ + Allows to resolve ambiguity by setting up extra parameters. Combination of name, county, state, country + identifies a row with an ambiguity. + If row with given names doesn't exist error will be generated. + + + Parameters + ---------- + name : string + Name in Geocoder that needs better qualification. + county : [string | None] + When Geocoder built with counties this field is used to identify a row for the name. + state : [string | None] + When Geocoder built with states this field is used to identify a row for the name. + country : [string | None] + When Geocoder built with countries this field is used to identify a row for the name. + scope : [string | Geocoder | shapely.Polygon | None] + Limits area of geocoding. If parent country is set then error will be generated. + If type is a string - geoobject should have geocoded scope in parents. + If type is a Geocoder - geoobject should have geocoded scope in parents. Scope should contain only one entry. + If type is a shapely.Polygon - geoobject centroid should fall into bbox of the polygon. + closest_to: [Geocoder | shapely.geometry.Point | None] + Resolve ambiguity by taking closest geoobject. + + Returns + ------- + Geocoder object + """ + self._reset_geocodes() + query_spec = QuerySpec( + name, + _make_parent_region(county), + _make_parent_region(state), + _make_parent_region(country) + ) + + def query_exist(query): + for i in range(len(self._names)): + if query.name == self._names[i] and \ + query.country == _get_or_none(self._countries, i) and \ + query.state == _get_or_none(self._states, i) and \ + query.county == _get_or_none(self._counties, i): + return True + return False + + if not query_exist(query_spec): + parents: List[str] = [] + if query_spec.county is not None: + parents.append('county={}'.format(str(query_spec.county))) + + if query_spec.state is not None: + parents.append('state={}'.format(str(query_spec.state))) + + if query_spec.country is not None: + parents.append('country={}'.format(str(query_spec.country))) + + parents_str = ", ".join(parents) + if len(parents_str) == 0: + raise ValueError("{} is not found in names".format(name)) + else: + raise ValueError("{}({}) is not found in names".format(name, parents_str)) + + if scope is None: + new_scope = None + ambiguity_resolver = _make_ambiguity_resolver(scope=None, closest_object=closest_to) + else: + if LazyShapely.is_polygon(scope): + new_scope = None + ambiguity_resolver = _make_ambiguity_resolver(scope=scope, closest_object=closest_to) + else: + new_scope = _prepare_new_scope(scope)[0] + ambiguity_resolver = _make_ambiguity_resolver(scope=None, closest_object=closest_to) + + self._overridings[query_spec] = WhereSpec(new_scope, ambiguity_resolver) + return self + + + def _build_request(self) -> GeocodingRequest: + if len(self._names) == 0: + def to_scope(parents): + if len(parents) == 0: + return None + elif len(parents) == 1: + return parents[0] + else: + raise ValueError('Too many parent objects. Expcted single object instead of {}'.format(len(parents))) + + # all countries/states etc. We need one dummy query + queries = [ + RegionQuery( + request=None, + country=to_scope(self._countries), + state=to_scope(self._states), + county=to_scope(self._counties) + ) + ] + else: + def assert_parents_size(parents: List, parents_level: str): + if len(parents) == 0: + return + + if len(parents) != len(self._names): + raise ValueError('Invalid request: {} count({}) != names count({})'.format(parents_level, len(parents), len(self._names))) + + if len(self._countries) > 0 and len(self._scope) > 0: + raise ValueError("Invalid request: countries and scope can't be used simultaneously") + + assert_parents_size(self._countries, 'countries') + assert_parents_size(self._states, 'states') + assert_parents_size(self._counties, 'counties') + + queries = [] + for i in range(len(self._names)): + name = self._names[i] + country = _get_or_none(self._countries, i) + state = _get_or_none(self._states, i) + county = _get_or_none(self._counties, i) + + scope, ambiguity_resolver = self._overridings.get( + QuerySpec(name, county, state, country), + WhereSpec(None, self._default_ambiguity_resolver) + ) + + query = RegionQuery( + request=name, + country=country, + state=state, + county=county, + scope=scope, + ambiguity_resolver=ambiguity_resolver + ) + + queries.append(query) + + request = RequestBuilder() \ + .set_request_kind(RequestKind.geocoding) \ + .set_requested_payload([PayloadKind.highlights] if self._highlights else []) \ + .set_queries(queries) \ + .set_scope(self._scope) \ + .set_level(self._level) \ + .set_namesake_limit(NAMESAKE_MAX_COUNT) \ + .set_allow_ambiguous(self._allow_ambiguous) \ + .build() + + return request + + def _geocode(self) -> Geocodes: + if self._geocodes is None: + request: GeocodingRequest = self._build_request() + response: Response = GeocodingService().do_request(request) + if not isinstance(response, SuccessResponse): + _raise_exception(response) + self._geocodes = Geocodes(response.level, response.answers, request.region_queries, self._highlights) + + return self._geocodes + + def _reset_geocodes(self): + self._geocodes = None + + def __eq__(self, o): + return isinstance(o, NamesGeocoder) \ + and self._overridings == o._overridings + + def __ne__(self, o): + return not self == o + + +def _prepare_new_scope(scope: Optional[Union[str, Geocoder, Geocodes, MapRegion]]) -> List[MapRegion]: + """ + Return list of MapRegions. Every MapRegion object contains only one name or id. + """ + if scope is None: + return [] + + def assert_scope_length_(l): + if l != 1: + raise ValueError("'scope' has {} entries, but expected to have exactly 1".format(l)) + + if isinstance(scope, MapRegion): + assert_scope_length_(len(scope.values)) + return [scope] + + if isinstance(scope, str): + return [MapRegion.with_name(scope)] + + if isinstance(scope, Geocoder): + scope = scope._geocode() + + if isinstance(scope, Geocodes): + map_regions = scope.to_map_regions() + assert_scope_length_(len(map_regions)) + return map_regions + + raise ValueError("Unsupported 'scope' type. Expected 'str' or 'Geocoder' but was '{}'".format(type(scope).__name__)) + + +def geocode(level=None, names=None, countries=None, states=None, counties=None, scope=None) -> NamesGeocoder: + """ + Create a Geocoder. Allows to refine ambiguous request with where method, scope that limits area of geocoding + or with parents. + + Parameters + ---------- + level : ['country' | 'state' | 'county' | 'city' | None] + The level of administrative division. Autodetection by default. + names : [array | string | None] + Names of objects to be geocoded. + For 'state' level: + -'US-48' returns continental part of United States (48 states) in a compact form. + countries : [array | None] + Parent countries. Should have same size as names. Can contain strings or Geocoder objects. + states : [array | None] + Parent states. Should have same size as names. Can contain strings or Geocoder objects. + counties : [array | None] + Parent counties. Should have same size as names. Can contain strings or Geocoder objects. + scope : [string | Geocoder | None] + Limits area of geocoding. If parent country is set then error will be generated. + If type is a string - geoobject should have geocoded scope in parents. + If type is a Geocoder - geoobject should have geocoded scope in parents. Scope should contain only one entry. + """ + return NamesGeocoder(level, names) \ + .scope(scope) \ + .countries(countries) \ + .states(states) \ + .counties(counties) + + +def geocode_cities(names=None) -> NamesGeocoder: + """ + Create a Geocoder object for cities. Allows to refine ambiguous request with + where method, with a scope that limits area of geocoding or with parents. + + geocode_cities(names) + + Parameters + ---------- + names : [array | string | None] + Names of objects to be geocoded. + + Returns + ------- + Geocoder object : + + Note + ----- + Geocoder allows to refine ambiguous request with where() method. + + Examples + --------- + >>> from lets_plot.geo_data import * + >>> r = geocode_cities(['moscow', 'york']).where('york', scope=geocode_states('New York')).get_geocodes() + """ + return NamesGeocoder('city', names) + + +def geocode_counties(names=None) -> NamesGeocoder: + """ + Create a Geocoder object for counties. Allows to refine ambiguous request with + where method, with a scope that limits area of geocoding or with parents. + + geocode_counties(names) + + Parameters + ---------- + names : [array | string | None] + Names of objects to be geocoded. + + Returns + ------- + Geocoder object : + + Note + ----- + Geocoder allows to refine ambiguous request with where() method. + + Examples + --------- + >>> from lets_plot.geo_data import * + >>> r = geocode_counties('barnstable').get_geocodes() + """ + return NamesGeocoder('county', names) + + +def geocode_states(names=None) -> NamesGeocoder: + """ + Create a Geocoder object for states. Allows to refine ambiguous request with + where method, with a scope that limits area of geocoding or with parents. + + geocode_states(names) + + Parameters + ---------- + names : [array | string | None] + Names of objects to be geocoded. + + Returns + ------- + Geocoder object : + + Note + ----- + Geocoder allows to refine ambiguous request with where() method. + + Examples + --------- + >>> from lets_plot.geo_data import * + >>> r = geocode_states('texas').get_geocodes() + """ + return NamesGeocoder('state', names) + + +def geocode_countries(names=None) -> NamesGeocoder: + """ + Create a Geocoder object for countries. Allows to refine ambiguous request with + where method. + + geocode_countries(names) + + Parameters + ---------- + names : [array | string | None] + Names of objects to be geocoded. + + Returns + ------- + Geocoder object : + + Note + ----- + Geocoder allows to refine ambiguous request with where() method. + + Examples + --------- + >>> from lets_plot.geo_data import * + >>> r = geocode_countries('USA').get_geocodes() + """ + return NamesGeocoder('country', names) + +def reverse_geocode(lon, lat, level=None, scope=None) -> ReverseGeocoder: + return ReverseGeocoder(lon, lat, level, scope) \ No newline at end of file diff --git a/python-package/lets_plot/geo_data/regions.py b/python-package/lets_plot/geo_data/geocodes.py similarity index 66% rename from python-package/lets_plot/geo_data/regions.py rename to python-package/lets_plot/geo_data/geocodes.py index 086b4a48d71..dd3d02b939c 100644 --- a/python-package/lets_plot/geo_data/regions.py +++ b/python-package/lets_plot/geo_data/geocodes.py @@ -1,24 +1,26 @@ import enum from abc import abstractmethod -from typing import List, Dict, Optional, Union +from collections import Iterable +from typing import List, Optional, Union from pandas import DataFrame, Series from .gis.geocoding_service import GeocodingService -from .gis.request import PayloadKind, RequestBuilder, RequestKind, MapRegion -from .gis.response import GeocodedFeature, Namesake, AmbiguousFeature, LevelKind +from .gis.request import PayloadKind, RequestBuilder, RequestKind, MapRegion, RegionQuery +from .gis.response import Answer, GeocodedFeature, Namesake, AmbiguousFeature, LevelKind from .gis.response import SuccessResponse, Response, AmbiguousResponse, ErrorResponse -from .type_assertion import assert_type -from .._type_utils import CanToDataFrame +from .type_assertion import assert_type, assert_list_type NO_OBJECTS_FOUND_EXCEPTION_TEXT = 'No objects were found.' MULTIPLE_OBJECTS_FOUND_EXCEPTION_TEXT = "Multiple objects were found. Use all_result=True to see them." -DF_REQUEST = 'request' -DF_ID = 'id' -DF_FOUND_NAME = 'found name' -DF_HIGHLIGHTS = 'highlights' -DF_GROUP = 'group' +DF_COLUMN_ID = 'id' +DF_COLUMN_FOUND_NAME = 'found name' +DF_COLUMN_HIGHLIGHTS = 'highlights' +DF_COLUMN_CITY = 'city' +DF_COLUMN_COUNTRY = 'country' +DF_COLUMN_STATE = 'state' +DF_COLUMN_COUNTY = 'county' class Resolution(enum.Enum): @@ -39,36 +41,136 @@ class Resolution(enum.Enum): world_low = 1 -def select_not_empty_name(feature: GeocodedFeature) -> str: - return feature.name if feature.query is None or feature.query == '' else feature.query +def select_request_string(request: Optional[str], name: str) -> str: + if request is None: + return name + if len(request) == 0: + return name -class DataFrameProvider(): - def __init__(self): + if 'us-48' == request.lower(): + return name + + return request + + +def level_to_column_name(level_kind: LevelKind): + if level_kind == LevelKind.city: + return DF_COLUMN_CITY + elif level_kind == LevelKind.county: + return DF_COLUMN_COUNTY + elif level_kind == LevelKind.state: + return DF_COLUMN_STATE + elif level_kind == LevelKind.country: + return DF_COLUMN_COUNTRY + else: + raise ValueError('Unknown level kind: {}'.format(level_kind)) + + +def zip_answers(queries: List, answers: List): + if len(queries) > 0: + return zip(queries, answers) + else: + return zip([None] * len(answers), answers) + + +class PlacesDataFrameBuilder: + def __init__(self, level_kind: LevelKind): + self.level_kind: LevelKind = level_kind self._request: List[str] = [] self._found_name: List[str] = [] + self._county: List[Optional[str]] = [] + self._state: List[Optional[str]] = [] + self._country: List[Optional[str]] = [] + + def append_row(self, query: RegionQuery, feature: GeocodedFeature): + self._request.append(select_request_string(query.request, feature.name)) + self._found_name.append(feature.name) + + if query is None: + self._county.append(MapRegion.name_or_none(None)) + self._state.append(MapRegion.name_or_none(None)) + self._country.append(MapRegion.name_or_none(None)) + else: + self._county.append(MapRegion.name_or_none(query.county)) + self._state.append(MapRegion.name_or_none(query.state)) + self._country.append(MapRegion.name_or_none(query.country)) + + def build_dict(self): + def contains_values(column): + return any(v is not None for v in column) + + data = {} + + request_column = level_to_column_name(self.level_kind) + + data[request_column] = self._request + data[DF_COLUMN_FOUND_NAME] = self._found_name + + if contains_values(self._county): + data[DF_COLUMN_COUNTY] = self._county + + if contains_values(self._state): + data[DF_COLUMN_STATE] = self._state + + if contains_values(self._country): + data[DF_COLUMN_COUNTRY] = self._country + + return data @abstractmethod - def to_data_frame(self, features: List[GeocodedFeature]) -> DataFrame: + def to_data_frame(self, answers: List[Answer], queries: List[RegionQuery], level_kind: LevelKind) -> DataFrame: raise ValueError('Not implemented') -class Regions(CanToDataFrame): - def __init__(self, level_kind: LevelKind, features: List[GeocodedFeature], highlights: bool = False): +class Geocodes: + def __init__(self, level_kind: LevelKind, answers: List[Answer], queries: List[RegionQuery], + highlights: bool = False): + assert_list_type(answers, Answer) + assert_list_type(queries, RegionQuery) + + if len(answers) == 0: + assert len(queries) == 1 and queries[0].request is None # select all + else: + assert len(queries) == len(answers) # regular request - should have same size + try: import geopandas except: - raise ValueError('Module \'geopandas\'is required for using regions') from None + raise ValueError('Module \'geopandas\'is required for geocoding') from None self._level_kind: LevelKind = level_kind + self._answers: List[Answer] = answers + + features = [] + for answer in answers: + features.extend(answer.features) + self._geocoded_features: List[GeocodedFeature] = features self._highlights: bool = highlights + self._queries: List[RegionQuery] = queries def __repr__(self): return self.to_data_frame().to_string() - def as_list(self) -> List['Regions']: - return [Regions(self._level_kind, [feature], self._highlights) for feature in self._geocoded_features] + def __len__(self): + return len(self._geocoded_features) + + def to_map_regions(self) -> List[MapRegion]: + regions: List[MapRegion] = [] + for answer, query in zip_answers(self._answers, self._queries): + for feature in answer.features: + regions.append(MapRegion.place(feature.id, select_request_string(query.request, feature.name), self._level_kind)) + return regions + + def as_list(self) -> List['Geocodes']: + if len(self._queries) == 0: + return [Geocodes(self._level_kind, [answer], [RegionQuery(request=None)], self._highlights) for answer in + self._answers] + + assert len(self._queries) == len(self._answers) + return [Geocodes(self._level_kind, [answer], [query], self._highlights) for query, answer in + zip(self._queries, self._answers)] def unique_ids(self) -> List[str]: seen = set() @@ -159,7 +261,7 @@ def boundaries(self, resolution: Optional[Union[int, str, Resolution]] = None): BoundariesGeoDataFrame() ) - def limits(self): + def limits(self) -> 'GeoDataFrame': """ Return bboxes (Polygon geometry) for given regions in form of GeoDataFrame. For regions intersecting anti-meridian bbox will be divided into two and stored as two rows. @@ -196,28 +298,23 @@ def centroids(self): CentroidsGeoDataFrame() ) - # implements abstract in CanToDataFrame def to_data_frame(self) -> DataFrame: - keyMappers: Dict = { - DF_REQUEST: lambda feature: select_not_empty_name(feature), - DF_ID: lambda feature: feature.id, - DF_FOUND_NAME: lambda feature: feature.name, - DF_HIGHLIGHTS: lambda feature: feature.highlights - } + places = PlacesDataFrameBuilder(self._level_kind) - keyList: List[str] = [DF_REQUEST, DF_ID, DF_FOUND_NAME] + data = {} + data[DF_COLUMN_ID] = [feature.id for feature in self._geocoded_features] - if self._highlights: - keyList.append(DF_HIGHLIGHTS) + # for us-48 queries doesnt' count + for query, answer in zip_answers(self._queries, self._answers): + for feature in answer.features: + places.append_row(query, feature) - data: Dict = {} - for key in keyList: - data[key] = [keyMappers[key](feature) for feature in self._geocoded_features] + data = {**data, **places.build_dict()} - return DataFrame(data, columns=keyList) + if self._highlights: + data[DF_COLUMN_HIGHLIGHTS] = [feature.highlights for feature in self._geocoded_features] - def __len__(self): - return len(self._geocoded_features) + return DataFrame(data) def _execute(self, request_builder: RequestBuilder, df_converter): response = GeocodingService().do_request(request_builder.build()) @@ -225,9 +322,14 @@ def _execute(self, request_builder: RequestBuilder, df_converter): if not isinstance(response, SuccessResponse): _raise_exception(response) - self._join_payload(response.features) + features = [] + + for a in response.answers: + features.extend(a.features) - return df_converter.to_data_frame(self._geocoded_features) + self._join_payload(features) + + return df_converter.to_data_frame(self._answers, self._queries, self._level_kind) def _request_builder(self, payload_kind: PayloadKind) -> RequestBuilder: assert_type(payload_kind, PayloadKind) @@ -257,8 +359,22 @@ def _get_features(self, feature_id: str) -> List[GeocodedFeature]: return [feature for feature in self._geocoded_features if feature.id == feature_id] + @classmethod + def find_name_columns(cls, geocodes_df) -> List[str]: + names = [] + if DF_COLUMN_CITY in geocodes_df: + names.append(DF_COLUMN_CITY) + if DF_COLUMN_COUNTY in geocodes_df: + names.append(DF_COLUMN_COUNTY) + if DF_COLUMN_STATE in geocodes_df: + names.append(DF_COLUMN_STATE) + if DF_COLUMN_COUNTRY in geocodes_df: + names.append(DF_COLUMN_COUNTRY) + + return names + + request_types = Optional[Union[str, List[str], Series]] -scope_types = Optional[Union[str, List[str], Regions, List[Regions]]] def _raise_exception(response: Response): @@ -351,39 +467,14 @@ def _parse_resolution(resolution: str) -> Resolution: raise ValueError('Invalid resolution type: ' + type(resolution).__name__) -def _to_scope(location: scope_types) -> Optional[Union[List[MapRegion], MapRegion]]: - if location is None: - return None - - def _make_region(obj: Union[str, Regions]) -> Optional[MapRegion]: - if isinstance(obj, Regions): - return MapRegion.with_ids(obj.unique_ids()) - - if isinstance(obj, str): - return MapRegion.with_name(obj) - - raise ValueError('Invalid region: ' + obj) - - if isinstance(location, list): - return [_make_region(obj) for obj in location] - - return _make_region(location) - - -def _ensure_is_list(obj: request_types) -> Optional[List[str]]: +def _ensure_is_list(obj) -> Optional[List[str]]: if obj is None: return None - if isinstance(obj, list): - return obj - - if isinstance(obj, str): - return [obj] - - if isinstance(obj, Series): - return obj.tolist() + if isinstance(obj, Iterable) and not isinstance(obj, str): + return [v for v in obj] - raise ValueError("Wrong type") + return [obj] def _coerce_resolution(res: int) -> int: diff --git a/python-package/lets_plot/geo_data/gis/geocoding_service.py b/python-package/lets_plot/geo_data/gis/geocoding_service.py index 95a4f0a464d..3b7cb99a06a 100644 --- a/python-package/lets_plot/geo_data/gis/geocoding_service.py +++ b/python-package/lets_plot/geo_data/gis/geocoding_service.py @@ -1,77 +1,19 @@ import json import urllib.parse import urllib.request +import gzip from urllib.error import HTTPError from .json_request import RequestFormatter from .json_response import ResponseParser -from .request import Request, GeocodingRequest -from .response import Response, SuccessResponse, ErrorResponse, AmbiguousResponse, ResponseBuilder, Status +from .request import Request +from .response import Response from ..._global_settings import has_global_value, get_global_str from ...settings_utils import GEOCODING_PROVIDER_URL class GeocodingService: - def do_request(self, request: Request, chunk_size=None, progress_callback=None) -> Response: - # level autodetection can work only with whole request - if chunk_size is not None and isinstance(request, GeocodingRequest) and request.level is not None: - return self._execute_chunked(request, chunk_size, progress_callback) - else: - return self._execute(request) - - def _execute_chunked(self, request: GeocodingRequest, chunk_size, progress_callback) -> Response: - success_chunks = [] - ambiguous_chunks = [] - - def chunked(items): - for i in range(0, len(items), chunk_size): - yield items[i:i + chunk_size] - - total_count = len(request.region_queries) - i = 0 - if progress_callback: - progress_callback(i, total_count) - - for q in chunked(request.region_queries): - chunked_request = GeocodingRequest( - requested_payload=request.requested_payload, - resolution=request.resolution, - region_queries=q, - level=request.level, - namesake_example_limit=request.namesake_example_limit, - allow_ambiguous=request.allow_ambiguous - ) - - response = self._execute(chunked_request) - if progress_callback: - i = i + len(q) - progress_callback(i, total_count) - - if isinstance(response, ErrorResponse): - return response - elif isinstance(response, SuccessResponse): - success_chunks.append(response) - elif isinstance(response, AmbiguousResponse): - ambiguous_chunks.append(response) - else: - raise ValueError('Unknown response type: ' + type(response).__name__) - - # combine ambiguous features from all chunks - if ambiguous_chunks: - ambiguous_features = [] - for response in ambiguous_chunks: - ambiguous_features.extend(response.features) - - return AmbiguousResponse(ambiguous_chunks[0].message, request.level, ambiguous_features) - - # no errors or ambiguous responses - combine success features from all chunks - success_features = [] - for response in success_chunks: - success_features.extend(response.features) - - return SuccessResponse(success_chunks[0].message, request.level, success_features) - - def _execute(self, request: Request) -> Response: + def do_request(self, request: Request) -> Response: if not has_global_value(GEOCODING_PROVIDER_URL): raise ValueError('Geocoding server url is not defined') @@ -81,21 +23,20 @@ def _execute(self, request: Request) -> Response: request = urllib.request.Request( url=get_global_str(GEOCODING_PROVIDER_URL) + '/map_data/geocoding', - headers={'Content-Type': 'application/json'}, + headers={'Content-Type': 'application/json', 'Accept-Encoding': 'gzip'}, method='POST', data=bytearray(request_str, 'utf-8') ) response = urllib.request.urlopen(request) - response_str = response.read().decode('utf-8') + if response.info().get('Content-Encoding') == 'gzip': + content = response.read() + response_str = gzip.decompress(content).decode('utf-8') + else: + response_str = response.read().decode('utf-8') + response_json = json.loads(response_str) return ResponseParser().parse(response_json) except HTTPError as e: raise ValueError( 'Geocoding server connection failure: {} {} ({})'.format(e.code, e.msg, e.filename)) from None - - except Exception as e: - return ResponseBuilder() \ - .set_status(Status.error) \ - .set_message('Geocoding service exception: {}'.format(str(e))) \ - .build() diff --git a/python-package/lets_plot/geo_data/gis/json_request.py b/python-package/lets_plot/geo_data/gis/json_request.py index e6bf12c0cf9..4e64c0ef0ad 100644 --- a/python-package/lets_plot/geo_data/gis/json_request.py +++ b/python-package/lets_plot/geo_data/gis/json_request.py @@ -3,7 +3,7 @@ from .fluent_dict import FluentDict, FluentList from .geometry import GeoPoint -from .request import RegionQuery, MapRegion, PayloadKind, RegionQueryBuilder, IgnoringStrategyKind, MapRegionBuilder +from .request import RegionQuery, MapRegion, MapRegionKind, PayloadKind, RegionQueryBuilder, IgnoringStrategyKind, MapRegionBuilder from .request import Request, GeocodingRequest, ExplicitRequest, RequestBuilder, RequestKind, ReverseGeocodingRequest from .response import LevelKind, GeoRect @@ -21,7 +21,11 @@ class Field(enum.Enum): geo_object_list = 'ids' region_queries = 'region_queries' region_query_names = 'region_query_names' + region_query_countries = 'region_query_countries' + region_query_states = 'region_query_states' + region_query_counties = 'region_query_counties' region_query_parent = 'region_query_parent' + scope = 'scope' level = 'level' map_region_kind = 'kind' map_region_values = 'values' @@ -63,6 +67,7 @@ def _format_geocoding_request(request: 'GeocodingRequest') -> FluentDict: return RequestFormatter \ ._common(RequestKind.geocoding, request) \ .put(Field.region_queries, RequestFormatter._format_region_queries(request.region_queries)) \ + .put(Field.scope, RequestFormatter._format_scope(request.scope)) \ .put(Field.level, request.level) \ .put(Field.namesake_example_limit, request.namesake_example_limit) \ .put(Field.allow_ambiguous, request.allow_ambiguous) @@ -98,6 +103,9 @@ def _format_region_queries(region_queires: List[RegionQuery]) -> List[Dict]: result.append( FluentDict() .put(Field.region_query_names, [] if query.request is None else [query.request]) + .put(Field.region_query_countries, RequestFormatter._format_map_region(query.country)) + .put(Field.region_query_states, RequestFormatter._format_map_region(query.state)) + .put(Field.region_query_counties, RequestFormatter._format_map_region(query.county)) .put(Field.ambiguity_resolver, None if query.ambiguity_resolver is None else FluentDict() .put(Field.ambiguity_ignoring_strategy, query.ambiguity_resolver.ignoring_strategy) .put(Field.ambiguity_box, RequestFormatter._format_box(query.ambiguity_resolver.box)) @@ -107,11 +115,23 @@ def _format_region_queries(region_queires: List[RegionQuery]) -> List[Dict]: ) return result + @staticmethod + def _format_scope(scope: List[MapRegion]) -> List[Dict]: + return [RequestFormatter._format_map_region(s) for s in scope] + @staticmethod def _format_map_region(parent: Optional[MapRegion]) -> Optional[Dict]: if parent is None: return None + # special case - place is just a geocoded object with id and extra information, used by client + # server doesn't need this extra information + if parent.kind.value == 'place': + return FluentDict() \ + .put(Field.map_region_kind, MapRegionKind.id.value) \ + .put(Field.map_region_values, parent.values) \ + .to_dict() + return FluentDict() \ .put(Field.map_region_kind, parent.kind.value) \ .put(Field.map_region_values, parent.values) \ diff --git a/python-package/lets_plot/geo_data/gis/json_response.py b/python-package/lets_plot/geo_data/gis/json_response.py index c83ccc3d3be..bab42bf54bc 100644 --- a/python-package/lets_plot/geo_data/gis/json_response.py +++ b/python-package/lets_plot/geo_data/gis/json_response.py @@ -7,13 +7,14 @@ from .geometry import Ring from .response import Multipolygon, GeoPoint, GeoRect, Boundary, Polygon from .response import Response, ResponseBuilder, SuccessResponse, AmbiguousResponse -from .response import Status, LevelKind, GeocodedFeature, AmbiguousFeature, Namesake, NamesakeParent, FeatureBuilder +from .response import Status, LevelKind, Answer, GeocodedFeature, AmbiguousFeature, Namesake, NamesakeParent, FeatureBuilder class ResponseField(Enum): status = 'status' message = 'message' data = 'data' + answers = 'answers' features = 'features' geocoded_data = 'good_features' incorrect_data = 'bad_features' @@ -66,7 +67,7 @@ def parse(response_json: Dict) -> Response: .visit_enum_existing(ResponseField.level, LevelKind, response.set_level) if response.status == Status.success: - data_dict.visit(ResponseField.features, partial(ResponseParser._parse_geocoded_features, response=response)) + data_dict.visit(ResponseField.answers, partial(ResponseParser._parse_answers, response=response)) elif response.status == Status.ambiguous: data_dict.visit(ResponseField.features, partial(ResponseParser._parse_ambiguous_features, response=response)) else: @@ -75,23 +76,27 @@ def parse(response_json: Dict) -> Response: return response.build() @staticmethod - def _parse_geocoded_features(features_json: List[Dict], response: ResponseBuilder): - geocoded_features: List[GeocodedFeature] = [] - for feature_json in features_json: - feature = FeatureBuilder() - FluentDict(feature_json) \ - .visit_str(ResponseField.query, feature.set_query) \ - .visit_str(ResponseField.geo_object_id, feature.set_id) \ - .visit_str(ResponseField.name, feature.set_name) \ - .visit_str_list_optional(ResponseField.highlights, feature.set_highlights) \ - .visit_str_existing(ResponseField.boundary, lambda json: feature.set_boundary(GeoJson().parse_geometry(json))) \ - .visit_object_optional(ResponseField.centroid, lambda json: feature.set_centroid(ResponseParser._parse_point(json))) \ - .visit_object_optional(ResponseField.limit, lambda json: feature.set_limit(ResponseParser._parse_rect(json))) \ - .visit_object_optional(ResponseField.position, lambda json: feature.set_position(ResponseParser._parse_rect(json))) - - geocoded_features.append(feature.build_geocoded()) - - response.set_geocoded_features(geocoded_features) + def _parse_answers(answers_json: List[Dict], response: ResponseBuilder): + answers: List[Answer] = [] + for answer_json in answers_json: + features_json = answer_json.get(ResponseField.features.value, []) + geocoded_features: List[GeocodedFeature] = [] + for feature_json in features_json: + feature = FeatureBuilder() + + FluentDict(feature_json) \ + .visit_str(ResponseField.geo_object_id, feature.set_id) \ + .visit_str(ResponseField.name, feature.set_name) \ + .visit_str_list_optional(ResponseField.highlights, feature.set_highlights) \ + .visit_str_existing(ResponseField.boundary, lambda json: feature.set_boundary(GeoJson().parse_geometry(json))) \ + .visit_object_optional(ResponseField.centroid, lambda json: feature.set_centroid(ResponseParser._parse_point(json))) \ + .visit_object_optional(ResponseField.limit, lambda json: feature.set_limit(ResponseParser._parse_rect(json))) \ + .visit_object_optional(ResponseField.position, lambda json: feature.set_position(ResponseParser._parse_rect(json))) + + geocoded_features.append(feature.build_geocoded()) + answers.append(Answer(geocoded_features)) + + response.set_answers(answers) @staticmethod def _parse_ambiguous_features(features_json: List[Dict], response: ResponseBuilder): @@ -146,7 +151,7 @@ def format(response: Response) -> Dict: .put(ResponseField.message, response.message) \ .put(ResponseField.data, FluentDict() .put(ResponseField.level, response.level.value) - .put(ResponseField.features, list(map(ResponseFormatter._format_geocoded_feature, response.features)))) \ + .put(ResponseField.answers, list(map(ResponseFormatter._format_answer, response.answers)))) \ .to_dict() elif isinstance(response, AmbiguousResponse): return FluentDict() \ @@ -158,17 +163,25 @@ def format(response: Response) -> Dict: .to_dict() @staticmethod - def _format_geocoded_feature(feature: GeocodedFeature) -> Dict: + def _format_answer(answer: Answer) -> Dict: + features = [] + for feature in answer.features: + features.append( + FluentDict() \ + .put(ResponseField.geo_object_id, feature.id) \ + .put(ResponseField.name, feature.name) \ + .put(ResponseField.boundary, ResponseFormatter._format_boundary(feature.boundary)) \ + .put(ResponseField.centroid, ResponseFormatter._format_centroid(feature.centroid)) \ + .put(ResponseField.limit, ResponseFormatter._format_rect(feature.limit)) \ + .put(ResponseField.position, ResponseFormatter._format_rect(feature.position)) \ + .to_dict() + ) + return FluentDict() \ - .put(ResponseField.query, feature.query) \ - .put(ResponseField.geo_object_id, feature.id) \ - .put(ResponseField.name, feature.name) \ - .put(ResponseField.boundary, ResponseFormatter._format_boundary(feature.boundary)) \ - .put(ResponseField.centroid, ResponseFormatter._format_centroid(feature.centroid)) \ - .put(ResponseField.limit, ResponseFormatter._format_rect(feature.limit)) \ - .put(ResponseField.position, ResponseFormatter._format_rect(feature.position)) \ + .put(ResponseField.features, features) \ .to_dict() + @staticmethod def _format_centroid(point: Optional[GeoPoint]) -> Optional[Dict]: if point is None: diff --git a/python-package/lets_plot/geo_data/gis/request.py b/python-package/lets_plot/geo_data/gis/request.py index dd09fd035e7..39f3fdaf52d 100644 --- a/python-package/lets_plot/geo_data/gis/request.py +++ b/python-package/lets_plot/geo_data/gis/request.py @@ -1,13 +1,13 @@ import enum from numbers import Number -from typing import Optional, List, Tuple +from typing import Optional, List, Tuple, Union from .geometry import GeoRect, GeoPoint from ..type_assertion import assert_type, assert_list_type, assert_optional_type -MISSING_WITHIN_OR_REQUEST_EXCEPTION_TEXT = 'Missing required argument: within or request.' +MISSING_SCOPE_OR_REQUEST_EXCEPTION_TEXT = 'Missing required argument: scope or request.' MISSING_LEVEL_OR_REQUEST_EXCEPTION_TEXT = 'Missing required argument: level or request.' -MISSING_LEVEL_AND_WITHIN_OR_REQUEST_EXCEPTION_TEXT = 'Missing required argument. You must enter level and within or request.' +MISSING_LEVEL_AND_SCOPE_OR_REQUEST_EXCEPTION_TEXT = 'Missing required argument. You must enter level and scope or request.' GeoId = str @@ -45,11 +45,40 @@ class LevelKind(enum.Enum): class MapRegionKind(enum.Enum): id = True name = False + place = 'place' class MapRegion: + ''' + Represents three different entities: + scope - ids of already geocoded objects. The only kind of MapRegion allowed to store multiply objects + place - already geocoded single place. In addition to id it holds administrative level and requeted name. + Used mostly as parent object for geocoding other objects. + with_name - single name, not yet geocoded. + ''' @staticmethod - def with_ids(parent_ids: List[str]): + def name_or_none(place: Optional['MapRegion']): + if place is None: + return None + + if place.kind == MapRegionKind.place: + return place.request() + + if place.kind == MapRegionKind.name: + return place.name() + + raise ValueError('MapRegion with kind \'{}\' doesn\'t have a name'.format(place.kind)) + + + @staticmethod + def place(id: str, request: Optional[str], level_kind: LevelKind): + assert_type(id, str) + assert_optional_type(request, str) + assert_type(level_kind, LevelKind) + return MapRegion(MapRegionKind.place, [id], request, level_kind) + + @staticmethod + def scope(parent_ids: List[str]): assert_list_type(parent_ids, str) return MapRegion(MapRegionKind.id, parent_ids) @@ -58,23 +87,52 @@ def with_name(name: str): assert_type(name, str) return MapRegion(MapRegionKind.name, [name]) - def __init__(self, kind: MapRegionKind, values: List[str]): + def __init__(self, kind: MapRegionKind, values: List[str], request: Optional[str] = None, level_kind: Optional[LevelKind] = None): assert_type(kind, MapRegionKind) assert_list_type(values, str) + assert_optional_type(request, str) + assert_optional_type(level_kind, LevelKind) self.kind: MapRegionKind = kind self.values: Tuple[str] = tuple(values, ) + self._request:Optional[str] = request + self._level_kind: Optional[LevelKind] = level_kind self._hash = hash((self.values, self.kind)) + def request(self) -> Optional[str]: + assert self.kind == MapRegionKind.place, 'Invalid MapRegion kind. Expected \'place\', but was ' + str(self.kind) + assert_optional_type(self._request, str) + return self._request + + def name(self) -> str: + assert self.kind == MapRegionKind.name, 'Invalid MapRegion kind. Expected \'name\', but was ' + str(self.kind) + assert_type(self.values[0], str) + return self.values[0] + + def level_kind(self) -> Optional[LevelKind]: + assert self.kind == MapRegionKind.place, 'Invalid MapRegion kind: only place contains level_kind' + return self._level_kind + def __eq__(self, other: 'MapRegion'): return isinstance(other, MapRegion) \ and self.kind == other.kind \ - and self.values == other.values + and self.values == other.values \ + and self._request == other._request \ + and self._level_kind == other._level_kind def __ne__(self, o: object) -> bool: return not self == o def __str__(self): + if self.kind == MapRegionKind.place: + return '{} {} {}'.format(str(self.values), self._request, self._level_kind) + + if self.kind == MapRegionKind.name: + return self.values[0] + + if self.kind == MapRegionKind.id: + return ",".join(self.values) + return str(self.values) def __hash__(self): @@ -110,20 +168,36 @@ def __ne__(self, o): class RegionQuery: - def __init__(self, request: Optional[str], scope: Optional[MapRegion], ambiguity_resolver: AmbiguityResolver): + def __init__(self, + request: Optional[str], + scope: Optional[MapRegion] = None, + ambiguity_resolver: AmbiguityResolver = AmbiguityResolver.empty(), + country: Optional[MapRegion] = None, + state: Optional[MapRegion] = None, + county: Optional[MapRegion] = None + ): assert_optional_type(request, str) assert_optional_type(scope, MapRegion) assert_type(ambiguity_resolver, AmbiguityResolver) + assert_optional_type(county, MapRegion) + assert_optional_type(state, MapRegion) + assert_optional_type(country, MapRegion) self.request: Optional[str] = request self.scope: Optional[MapRegion] = scope self.ambiguity_resolver: AmbiguityResolver = ambiguity_resolver + self.country: Optional[MapRegion] = country + self.state: Optional[MapRegion] = state + self.county: Optional[MapRegion] = county def __eq__(self, o: object) -> bool: return isinstance(o, RegionQuery) \ and self.request == o.request \ and self.scope == o.scope \ - and self.ambiguity_resolver == o.ambiguity_resolver + and self.ambiguity_resolver == o.ambiguity_resolver \ + and self.country == o.country \ + and self.state == o.state \ + and self.county == o.county def __ne__(self, o: object) -> bool: return not self == o @@ -158,22 +232,23 @@ def _check_required_parameters(region_queries: List[RegionQuery], level: Optional[LevelKind]) -> None: if len(region_queries) == 0 and not level: - raise ValueError(MISSING_LEVEL_AND_WITHIN_OR_REQUEST_EXCEPTION_TEXT) + raise ValueError(MISSING_LEVEL_AND_SCOPE_OR_REQUEST_EXCEPTION_TEXT) for query in region_queries: if not query.request and not level and not query.scope: - raise ValueError(MISSING_LEVEL_AND_WITHIN_OR_REQUEST_EXCEPTION_TEXT) + raise ValueError(MISSING_LEVEL_AND_SCOPE_OR_REQUEST_EXCEPTION_TEXT) if not query.request and not level and query.scope: raise ValueError(MISSING_LEVEL_OR_REQUEST_EXCEPTION_TEXT) - if not query.request and level is not LevelKind.country and not query.scope: - raise ValueError(MISSING_WITHIN_OR_REQUEST_EXCEPTION_TEXT) + if not query.request and not level and not query.scope: + raise ValueError(MISSING_SCOPE_OR_REQUEST_EXCEPTION_TEXT) def __init__(self, requested_payload: List[PayloadKind], resolution: Optional[int], region_queries: List[RegionQuery], + scope: List[MapRegion], level: Optional[LevelKind], namesake_example_limit: int, allow_ambiguous: bool @@ -192,6 +267,7 @@ def __init__(self, assert namesake_example_limit is not None self.region_queries: List[RegionQuery] = region_queries + self.scope: List[MapRegion] = scope self.level: Optional[LevelKind] = level self.namesake_example_limit: int = namesake_example_limit self.allow_ambiguous: bool = allow_ambiguous @@ -262,17 +338,18 @@ def __ne__(self, o: object) -> bool: class RequestBuilder: def __init__(self): - self.request_kind: RequestKind = None + self.request_kind: Optional[RequestKind] = None self.requested_payload: List[PayloadKind] = [] self.resolution: Optional[int] = None self.ids: List[str] = [] self.region_queries: List[RegionQuery] = [] + self.scope: List[MapRegion] = [] self.level: Optional[LevelKind] = None self.namesake_limit: int = 10 self.allow_ambiguous: bool = False # reverse - self.reverse_coordinates: List[GeoPoint] = None + self.reverse_coordinates: Optional[List[GeoPoint]] = None self.reverse_scope: Optional[MapRegion] = None def set_reverse_coordinates(self, coordinates: List[GeoPoint]) -> 'RequestBuilder': @@ -310,6 +387,11 @@ def set_queries(self, v: List[RegionQuery]) -> 'RequestBuilder': self.region_queries = v return self + def set_scope(self, v: List[MapRegion]) -> 'RequestBuilder': + assert_list_type(v, MapRegion) + self.scope = v + return self + def set_level(self, v: LevelKind) -> 'RequestBuilder': assert_optional_type(v, LevelKind) self.level = v @@ -325,13 +407,13 @@ def set_allow_ambiguous(self, v: bool) -> 'RequestBuilder': self.allow_ambiguous = v return self - def build(self) -> 'Request': + def build(self) -> Union[ExplicitRequest, GeocodingRequest, ReverseGeocodingRequest]: if self.request_kind == RequestKind.explicit: return ExplicitRequest(self.requested_payload, self.ids, self.resolution) elif self.request_kind == RequestKind.geocoding: - return GeocodingRequest(self.requested_payload, self.resolution, self.region_queries, self.level, - self.namesake_limit, self.allow_ambiguous) + return GeocodingRequest(self.requested_payload, self.resolution, self.region_queries, self.scope, + self.level, self.namesake_limit, self.allow_ambiguous) elif self.request_kind == RequestKind.reverse: assert self.reverse_coordinates is not None @@ -365,7 +447,7 @@ def build(self) -> Optional[MapRegion]: class RegionQueryBuilder: def __init__(self): - self.request: Optional[str] = [] + self.request: Optional[str] = None self.scope: Optional[MapRegion] = None self.ignoring_strategy: Optional[IgnoringStrategyKind] = None self.closest_coord: Optional[GeoPoint] = None diff --git a/python-package/lets_plot/geo_data/gis/response.py b/python-package/lets_plot/geo_data/gis/response.py index da04e40b50c..887122b12ad 100644 --- a/python-package/lets_plot/geo_data/gis/response.py +++ b/python-package/lets_plot/geo_data/gis/response.py @@ -36,13 +36,13 @@ def __init__(self, geometry: Union[Multipolygon, Polygon, GeoPoint]): class GeocodedFeature: - def __init__(self, query: str, id: str, name: str, - highlights: Optional[List[str]], - boundary: Optional[Boundary], - centroid: Optional[GeoPoint], - limit: Optional[GeoRect], - position: Optional[GeoRect]): - assert_type(query, str) + def __init__(self, + id: str, name: str, + highlights: Optional[List[str]]=None, + boundary: Optional[Boundary]=None, + centroid: Optional[GeoPoint]=None, + limit: Optional[GeoRect]=None, + position: Optional[GeoRect]=None): assert_type(id, str) assert_type(name, str) assert_optional_list_type(highlights, str) @@ -51,7 +51,6 @@ def __init__(self, query: str, id: str, name: str, assert_optional_type(limit, GeoRect) assert_optional_type(position, GeoRect) - self.query: str = query self.id: str = id self.name: str = name self.highlights: Optional[List[str]] = highlights @@ -77,16 +76,27 @@ def __init__(self, message: str): assert_type(message, str) self.message: str = message +class Answer: + def __init__(self, features: List[GeocodedFeature]): + assert_list_type(features, GeocodedFeature) + self.features: List[GeocodedFeature] = features + class SuccessResponse(Response): - def __init__(self, message: str, level: LevelKind, features: List[GeocodedFeature]): + def __init__(self, message: str, level: LevelKind, answers: List[Answer]): super().__init__(message) assert_type(message, str) assert_optional_type(level, LevelKind) - assert_list_type(features, GeocodedFeature) + assert_list_type(answers, Answer) self.level: LevelKind = level + self.answers: List[Answer] = answers + + features = [] + for answer in answers: + features.extend(answer.features) + self.features: List[GeocodedFeature] = features @@ -109,7 +119,7 @@ def __init__(self, message: str): class FeatureBuilder: def __init__(self): - self.query: str = None + self.query: Optional[str] = None self.id: Optional[str] = None self.name: Optional[str] = None self.highlights: Optional[List[str]] = None @@ -120,8 +130,8 @@ def __init__(self): self.total_namesake_count: Optional[int] = None self.namesake_examples: List[Namesake] = [] - def set_query(self, v: str) -> 'FeatureBuilder': - assert_type(v, str) + def set_query(self, v: Optional[str]) -> 'FeatureBuilder': + assert_optional_type(v, str) self.query = v return self @@ -179,7 +189,7 @@ def build_ambiguous(self) -> AmbiguousFeature: return AmbiguousFeature(self.query, self.total_namesake_count, self.namesake_examples) def build_geocoded(self) -> GeocodedFeature: - return GeocodedFeature(self.query, self.id, self.name, self.highlights, self.boundary, self.centroid, self.limit, self.position) + return GeocodedFeature(self.id, self.name, self.highlights, self.boundary, self.centroid, self.limit, self.position) class ResponseBuilder: @@ -187,7 +197,7 @@ def __init__(self): self.status: Status = None self.level: LevelKind = None self.message: str = None - self.geocoded_features: List[GeocodedFeature] = None + self.answers: List[Answer] = None self.ambiguous_features: List[AmbiguousFeature] = None self.data: Dict = None @@ -211,16 +221,25 @@ def set_ambiguous_features(self, v: List[AmbiguousFeature]) -> 'ResponseBuilder' self.ambiguous_features = v return self - def set_geocoded_features(self, v: List[GeocodedFeature]) -> 'ResponseBuilder': + def set_answers(self, v: List[Answer]) -> 'ResponseBuilder': + assert_list_type(v, Answer) + self.answers = v + return self + + def set_geocoded_features(self, v: List[GeocodedFeature]): + ''' + Exactly matching non-exploding features, i.e. one feature per answer + ''' assert_list_type(v, GeocodedFeature) - self.geocoded_features = v + self.answers = [Answer([f]) for f in v] return self + def build(self) -> Response: if self.status == Status.error: return ErrorResponse(self.message) elif self.status == Status.success: - return SuccessResponse(self.message, self.level, self.geocoded_features) + return SuccessResponse(self.message, self.level, self.answers) elif self.status == Status.ambiguous: return AmbiguousResponse(self.message, self.level, self.ambiguous_features) else: diff --git a/python-package/lets_plot/geo_data/livemap_helper.py b/python-package/lets_plot/geo_data/livemap_helper.py index 761791b75e4..28ed0bf117e 100644 --- a/python-package/lets_plot/geo_data/livemap_helper.py +++ b/python-package/lets_plot/geo_data/livemap_helper.py @@ -3,7 +3,7 @@ from pandas import DataFrame -from .regions import Regions +from .geocodes import Geocodes LOCATION_COORDINATE_COLUMNS = {'lon', 'lat'} LOCATION_RECTANGLE_COLUMNS = {'lonmin', 'latmin', 'lonmax', 'latmax'} @@ -19,11 +19,11 @@ class RegionKind(Enum): data_frame = 'data_frame' -def _prepare_parent(parent: Union[str, Regions]) -> Optional[dict]: +def _prepare_parent(parent: Union[str, Geocodes]) -> Optional[dict]: if not parent: return None - if isinstance(parent, Regions): + if isinstance(parent, Geocodes): kind = RegionKind.region_ids value = parent.unique_ids() @@ -37,12 +37,12 @@ def _prepare_parent(parent: Union[str, Regions]) -> Optional[dict]: return {'type': kind.value, 'data': value} -def _prepare_location(location: Union[str, Regions, List[float], DataFrame]) -> Optional[dict]: +def _prepare_location(location: Union[str, Geocodes, List[float], DataFrame]) -> Optional[dict]: if location is None: return None value = location - if isinstance(location, Regions): + if isinstance(location, Geocodes): kind = RegionKind.region_ids value = location.unique_ids() diff --git a/python-package/lets_plot/geo_data/regions_builder.py b/python-package/lets_plot/geo_data/regions_builder.py deleted file mode 100644 index b8bad97f334..00000000000 --- a/python-package/lets_plot/geo_data/regions_builder.py +++ /dev/null @@ -1,246 +0,0 @@ -from typing import Optional, List, Union, Tuple - -from .gis.geocoding_service import GeocodingService -from .gis.geometry import GeoPoint -from .gis.request import MapRegion, RegionQuery, RequestBuilder, RequestKind, PayloadKind, AmbiguityResolver, \ - IgnoringStrategyKind -from .gis.response import LevelKind, Response, SuccessResponse, GeoRect -from .regions import _to_level_kind, request_types, scope_types, Regions, _raise_exception, \ - _ensure_is_list, _to_scope - -NAMESAKE_MAX_COUNT = 10 - -ShapelyPointType = 'shapely.geometry.Point' -ShapelyPolygonType = 'shapely.geometry.Polygon' - - -def _to_near_coord(near: Optional[Union[Regions, ShapelyPointType]]) -> Optional[GeoPoint]: - if near is None: - return None - - if isinstance(near, Regions): - near_id = near.as_list()[0].unique_ids() - assert len(near_id) == 1 - - request = RequestBuilder() \ - .set_request_kind(RequestKind.explicit) \ - .set_requested_payload([PayloadKind.centroids]) \ - .set_ids(near_id) \ - .build() - - response: Response = GeocodingService().do_request(request) - if isinstance(response, SuccessResponse): - assert len(response.features) == 1 - centroid = response.features[0].centroid - return GeoPoint(lon=centroid.lon, lat=centroid.lat) - else: - raise ValueError("Unexpected geocoding response for id " + str(near_id[0])) - - if ShapelyWrapper.is_point(near): - return GeoPoint(lon=near.x, lat=near.y) - - raise ValueError('Not supported type') - - -def _split(box: Optional[Union[str, List[str], Regions, List[Regions], ShapelyPolygonType]]) -> Tuple[scope_types, Optional[GeoRect]]: - if not ShapelyWrapper.is_polygon(box): - return box, None - - return None, GeoRect(min_lon=box.bounds[0], min_lat=box.bounds[1], max_lon=box.bounds[2], max_lat=box.bounds[3]) - - -def _create_queries(request: request_types, scope: scope_types, ambiguity_resovler: AmbiguityResolver) -> List[RegionQuery]: - requests: Optional[List[str]] = _ensure_is_list(request) - scopes: Optional[Union[List[MapRegion], MapRegion]] = _to_scope(scope) - - positional_matching = isinstance(scopes, list) - - if positional_matching: - if len(requests) != len(scopes): - raise ValueError('Length of request and scope is not equal') - - return [ - RegionQuery(r, s, ambiguity_resovler) for r, s in zip(requests, scopes) - ] - else: - # us-48 request - no requests, only scopes - if requests is None and scopes is not None: - return [RegionQuery(None, scopes, ambiguity_resovler)] - - # countries request - no requests and scopes - if requests is None and scopes is None: - return [] - - return [RegionQuery(r, scopes, ambiguity_resovler) for r in requests] - - -class ShapelyWrapper: - @staticmethod - def is_point(p) -> bool: - if not ShapelyWrapper.is_shapely_available(): - return False - - from shapely.geometry import Point - return isinstance(p, Point) - - @staticmethod - def is_polygon(p): - if not ShapelyWrapper.is_shapely_available(): - return False - - from shapely.geometry import Polygon - return isinstance(p, Polygon) - - @staticmethod - def is_shapely_available(): - try: - import shapely - return True - except: - return False - - -class RegionsBuilder: - def __init__(self, - level: Optional[Union[str, LevelKind]] = None, - request: request_types = None, - scope: scope_types = None, - highlights: bool = False, - progress_callback = None, - chunk_size = None, - allow_ambiguous = False - ): - - self._level: Optional[LevelKind] = _to_level_kind(level) - self._overridings: List[RegionQuery] = [] - self._default_ambiguity_resolver: AmbiguityResolver = AmbiguityResolver.empty() # TODO rename to geohint - self._queries: List[RegionQuery] = _create_queries(request, scope, self._default_ambiguity_resolver) - self._highlights: bool = highlights - self._on_progress = progress_callback - self._chunk_size = chunk_size - self._allow_ambiguous = allow_ambiguous - - def drop_not_found(self) -> 'RegionsBuilder': - self._default_ambiguity_resolver = AmbiguityResolver(IgnoringStrategyKind.skip_missing) - return self - - def drop_not_matched(self) -> 'RegionsBuilder': - self._default_ambiguity_resolver = AmbiguityResolver(IgnoringStrategyKind.skip_all) - return self - - def allow_ambiguous(self) -> 'RegionsBuilder': - self._default_ambiguity_resolver = AmbiguityResolver(IgnoringStrategyKind.take_namesakes) - self._allow_ambiguous = True - return self - - def chunk_request(self, on_progress=None, chunk_size=40): - self._chunk_size = chunk_size - self._on_progress = on_progress - return self - - def where(self, - request: request_types = None, - within: Optional[Union[str, List[str], Regions, List[Regions], ShapelyPolygonType]] = None, - near: Optional[Union[Regions, ShapelyPointType]] = None) -> 'RegionsBuilder': - """ - If request is not exist - append it to a list with specified scope. - If request is already exist in the list - specify scope exactly for that request. - - where(request, scope) - - Parameters - ---------- - request : [array | string | None] - Data can be filtered by full names at any level (only exact matching). - For 'state' level: - -'US-48' returns continental part of United States (48 states) in a compact form. - within : [array | string | Regions | shapely.Polygon | None] - Data can be filtered by scope name or polygon. - 'US-48' includes continental part of United States (48 states). - near: [Regions | None] - Resolve ambiguity by taking object closest to a 'near' object. - - - Returns - ------- - RegionsBuilder object - - Note - ----- - If request is mixed with existing and new items scope will be specified for all of them. - It is allowed to specify scope by chaihing calls of the where method as many times as needed - while trying to resolve an ambiguity. Latest call defines the value, used by geocoding. - - Examples - --------- - >>> from lets_plot.geo_data import * - >>> r = regions(level='country', request=['Germany', 'USA']) - """ - - scope, box = _split(within) - - ambiguity_resolver = AmbiguityResolver( - None, - _to_near_coord(near), - box - ) - - new_overridings = _create_queries(request, scope, ambiguity_resolver) - for overriding in self._overridings: - if overriding.request in set([overriding.request for overriding in new_overridings]): - self._overridings.remove(overriding) - - self._overridings.extend(new_overridings) - return self - - def build(self) -> Regions: - request = RequestBuilder() \ - .set_request_kind(RequestKind.geocoding) \ - .set_requested_payload([PayloadKind.highlights] if self._highlights else []) \ - .set_queries(self._get_queries()) \ - .set_level(self._level) \ - .set_namesake_limit(NAMESAKE_MAX_COUNT) \ - .set_allow_ambiguous(self._allow_ambiguous) \ - .build() - - # Too many queries - can fail with timeout. Chunk queries. - if len(self._get_queries()) > 100 and self._chunk_size is None: - self.chunk_request(self._on_progress, 40) - - response: Response = GeocodingService().do_request(request, self._chunk_size, self._on_progress) - - if not isinstance(response, SuccessResponse): - _raise_exception(response) - - return Regions(response.level, response.features, self._highlights) - - def _get_queries(self) -> List[RegionQuery]: - for overriding in self._overridings: - overriding_did_not_match = True # if overriding.name is not found in self._names just add it as new query - for query in self._queries: - if query.request == overriding.request: - query.scope = overriding.scope - query.ambiguity_resolver = overriding.ambiguity_resolver - overriding_did_not_match = False - - if overriding_did_not_match: - self._queries.append(overriding) - - if len(self._queries) == 0: - return [RegionQuery(None, None, self._default_ambiguity_resolver)] - - return [ - RegionQuery( - q.request, - q.scope, - q.ambiguity_resolver if q.ambiguity_resolver != AmbiguityResolver.empty() else self._default_ambiguity_resolver - ) - for q in self._queries - ] - - def __eq__(self, o): - return isinstance(o, RegionsBuilder) \ - and self._overridings == o._overridings - - def __ne__(self, o): - return not self == o diff --git a/python-package/lets_plot/geo_data/to_geo_data_frame.py b/python-package/lets_plot/geo_data/to_geo_data_frame.py index cfa8d5fd2a0..6c0920d0bba 100644 --- a/python-package/lets_plot/geo_data/to_geo_data_frame.py +++ b/python-package/lets_plot/geo_data/to_geo_data_frame.py @@ -5,8 +5,9 @@ from pandas import DataFrame from shapely.geometry import box -from lets_plot.geo_data import DataFrameProvider, select_not_empty_name, DF_REQUEST, DF_FOUND_NAME, abstractmethod -from lets_plot.geo_data.gis.response import GeocodedFeature, GeoRect, Boundary, Multipolygon, Polygon, GeoPoint +from lets_plot.geo_data import PlacesDataFrameBuilder, zip_answers, abstractmethod +from lets_plot.geo_data.gis.request import RegionQuery, LevelKind +from lets_plot.geo_data.gis.response import Answer, GeocodedFeature, GeoRect, Boundary, Multipolygon, Polygon, GeoPoint ShapelyPoint = shapely.geometry.Point ShapelyLinearRing = shapely.geometry.LinearRing @@ -22,7 +23,7 @@ def _create_geo_data_frame(data, geometry) -> DataFrame: ) -class RectGeoDataFrame(DataFrameProvider): +class RectGeoDataFrame: @staticmethod def intersected_by_antimeridian(lonmin: float, lonmax: float): @@ -39,27 +40,24 @@ def __init__(self): self._lonmax: List[float] = [] self._latmax: List[float] = [] - def to_data_frame(self, features: List[GeocodedFeature]) -> DataFrame: - data = self._calc_common_data(features) + def to_data_frame(self, answers: List[Answer], queries: List[RegionQuery], level_kind: LevelKind) -> DataFrame: + assert len(answers) == len(queries) + places = PlacesDataFrameBuilder(level_kind) + + for query, answer in zip_answers(queries, answers): + for feature in answer.features: + rects: List[GeoRect] = self._read_rect(feature) + for rect in rects: + places.append_row(query, feature) + self._lonmin.append(rect.min_lon) + self._latmin.append(rect.min_lat) + self._lonmax.append(rect.max_lon) + self._latmax.append(rect.max_lat) + geometry = [RectGeoDataFrame.limit2geometry(lmt[0], lmt[1], lmt[2], lmt[3]) for lmt in - zip(self._lonmin, self._latmin, self._lonmax, self._latmax)] - return _create_geo_data_frame(data, geometry=geometry) - - def _calc_common_data(self, features: List[GeocodedFeature]) -> dict: - for feature in features: - rects: GeoRect = self._read_rect(feature) - for rect in rects: - self._lonmin.append(rect.min_lon) - self._latmin.append(rect.min_lat) - self._lonmax.append(rect.max_lon) - self._latmax.append(rect.max_lat) - self._request.append(select_not_empty_name(feature)) - self._found_name.append(feature.name) - - return { - DF_REQUEST: self._request, - DF_FOUND_NAME: self._found_name - } + zip(self._lonmin, self._latmin, self._lonmax, self._latmax)] + return _create_geo_data_frame(places.build_dict(), geometry=geometry) + def _read_rect(self, feature: GeocodedFeature) -> List[GeoRect]: rect: GeoRect = self._select_rect(feature) @@ -76,43 +74,39 @@ def _select_rect(self, feature: GeocodedFeature) -> GeoRect: pass -class CentroidsGeoDataFrame(DataFrameProvider): +class CentroidsGeoDataFrame: def __init__(self): super().__init__() self._lons: List[float] = [] self._lats: List[float] = [] - def to_data_frame(self, features: List[GeocodedFeature]) -> DataFrame: - for feature in features: - self._lons.append(feature.centroid.lon) - self._lats.append(feature.centroid.lat) - self._request.append(select_not_empty_name(feature)) - self._found_name.append(feature.name) - - data = { - DF_REQUEST: self._request, - DF_FOUND_NAME: self._found_name, - } + def to_data_frame(self, answers: List[Answer], queries: List[RegionQuery], level_kind: LevelKind) -> DataFrame: + places = PlacesDataFrameBuilder(level_kind) + + for query, answer in zip_answers(queries, answers): + for feature in answer.features: + places.append_row(query, feature) + self._lons.append(feature.centroid.lon) + self._lats.append(feature.centroid.lat) + geometry = [ShapelyPoint(pnt[0], pnt[1]) for pnt in zip(self._lons, self._lats)] - return _create_geo_data_frame(data, geometry) + return _create_geo_data_frame(places.build_dict(), geometry) -class BoundariesGeoDataFrame(DataFrameProvider): +class BoundariesGeoDataFrame: def __init__(self): super().__init__() - def to_data_frame(self, features: List[GeocodedFeature]) -> DataFrame: + def to_data_frame(self, answers: List[Answer], queries: List[RegionQuery], level_kind: LevelKind) -> DataFrame: + places = PlacesDataFrameBuilder(level_kind) + geometry = [] - for feature in features: - self._request.append(select_not_empty_name(feature)) - self._found_name.append(feature.name) - geometry.append(self._geo_parse_geometry(feature.boundary)) - - df = { - DF_REQUEST: self._request, - DF_FOUND_NAME: self._found_name - } - return _create_geo_data_frame(df, geometry=geometry) + for query, answer in zip_answers(queries, answers): + for feature in answer.features: + places.append_row(query, feature) + geometry.append(self._geo_parse_geometry(feature.boundary)) + + return _create_geo_data_frame(places.build_dict(), geometry=geometry) def _geo_parse_geometry(self, boundary: Boundary): diff --git a/python-package/lets_plot/plot/geom.py b/python-package/lets_plot/plot/geom.py index f7caf56d693..e05045e9363 100644 --- a/python-package/lets_plot/plot/geom.py +++ b/python-package/lets_plot/plot/geom.py @@ -3,8 +3,8 @@ # Use of this source code is governed by the MIT license that can be found in the LICENSE file. # from .core import FeatureSpec, LayerSpec -from .util import as_annotated_data, as_annotated_map_data, is_geo_data_frame, is_geo_data_regions, map_join_regions, \ - geo_data_frame_to_wgs84, as_pair +from .util import as_annotated_data, as_annotated_map_data, is_geo_data_frame, is_geocoder, auto_join_geocoder, \ + geo_data_frame_to_wgs84, as_map_join # # Geoms, short for geometric objects, describe the type of plot ggplot will produce. @@ -49,13 +49,30 @@ def geom_point(mapping=None, *, data=None, stat=None, position=None, show_legend Value 'none' will disable sampling for this layer. tooltips : result of the call to the layer_tooltips() function. Specifies appearance, style and content. - map : GeoDataFrame (supported shapes Point and MultiPoint) or Regions (implicitly invoke centroids()) + map : GeoDataFrame (supported shapes Point and MultiPoint) or Geocoder (implicitly invoke centroids()) Data containing coordinates of points. map_join : str, pair - Pair of names used to join map coordinates with data. - str is allowed only when used with Regions object - map key 'request' will be automatically added. - first value in pair - column in data - second value in pair - column in map + Keys used to join map coordinates with data. + first value in pair - column/columns in data + second value in pair - column/columns in map + + When map is a GeoDataFrame: + map_join='state': + 'state' is a key for both data and map. + map_join=[['city', 'state']]: + ['city', 'state'] is a key for both data and map. + map_join=[['City_Name', 'State_Name'], ['city', 'state']]: + data key - ['City_Name', 'State_Name'], map key - ['city', 'state'] + + If map is a Geocoder then second value can be omitted - it will be generated automatically with columns that were used for geocoding. + map_join='State_Name': + data key - ['State_Name'], map key - ['state'] + map_join=['City_Name', 'State_Name']: + data key - ['City_Name', 'State_Name'], map key - ['city', 'state'] + map_join=[['City_Name', 'State_Name'], ['city', 'state']]: + data key - ['City_Name', 'State_Name'], map key - ['city', 'state']. In case of extra parents + in a map parameter that were needed for ambituity resolving but not present in data. + other_args : Other arguments passed on to the layer. These are often aesthetics settings used to set an aesthetic to a fixed value, like color = "red", fill = "blue", size = 3 or shape = 21. They may also be parameters to the @@ -100,9 +117,9 @@ def geom_point(mapping=None, *, data=None, stat=None, position=None, show_legend >>> p """ - if is_geo_data_regions(map): - map = map.centroids() - map_join = map_join_regions(map_join) + if is_geocoder(map): + map_join = auto_join_geocoder(map_join, map) + map = map.get_centroids() return _geom('point', mapping=mapping, @@ -148,7 +165,7 @@ def geom_path(mapping=None, *, data=None, stat=None, position=None, show_legend= Data containing coordinates of lines. map_join : str, pair Pair of names used to join map coordinates with data. - str is allowed only when used with Regions object - map key 'request' will be automatically added. + str is allowed only when used with Geocoder object - map key 'request' will be automatically added. first value in pair - column in data second value in pair - column in map other_args : @@ -208,6 +225,8 @@ def geom_path(mapping=None, *, data=None, stat=None, position=None, show_legend= >>> p += geom_path(stat='smooth', color='red', linetype='longdash') >>> p """ + if is_geocoder(map): + raise ValueError("Geocoding doesn't provide geometries supported by geom_path") return _geom('path', mapping=mapping, data=data, @@ -1436,11 +1455,11 @@ def geom_polygon(mapping=None, *, data=None, stat=None, position=None, show_lege Value 'none' will disable sampling for this layer. tooltips : result of the call to the layer_tooltips() function. Specifies appearance, style and content. - map : GeoDataFrame (supported shapes Polygon and MultiPolygon) or Regions (implicitly invoke boundaries()) + map : GeoDataFrame (supported shapes Polygon and MultiPolygon) or Geocoder (implicitly invoke boundaries()) Data contains coordinates of polygon vertices on map. map_join : str, pair Pair of names used to join map coordinates with data. - str is allowed only when used with Regions object - map key 'request' will be automatically added. + str is allowed only when used with Geocoder object - map key 'request' will be automatically added. first value in pair - column in data second value in pair - column in map other_args : @@ -1486,9 +1505,9 @@ def geom_polygon(mapping=None, *, data=None, stat=None, position=None, show_lege >>> ggplot(dat, aes('x', 'y')) + geom_polygon(aes(fill='id'), alpha=0.5) """ - if is_geo_data_regions(map): - map = map.boundaries() - map_join = map_join_regions(map_join) + if is_geocoder(map): + map_join = auto_join_geocoder(map_join, map) + map = map.get_boundaries() return _geom('polygon', mapping=mapping, @@ -1530,11 +1549,11 @@ def geom_map(mapping=None, *, data=None, stat=None, position=None, show_legend=N Value 'none' will disable sampling for this layer. tooltips : result of the call to the layer_tooltips() function. Specifies appearance, style and content. - map : GeoDataFrame (supported shapes Polygon and MultiPolygon) or Regions (implicitly invoke boundaries()) + map : GeoDataFrame (supported shapes Polygon and MultiPolygon) or Geocoder (implicitly invoke boundaries()) Data containing region boundaries (coordinates of polygon vertices on map). map_join : str, pair Pair of names used to join map coordinates with data. - str is allowed only when used with Regions object - map key 'request' will be automatically added. + str is allowed only when used with Geocoder object - map key 'request' will be automatically added. first value in pair - column in data second value in pair - column in map other_args : @@ -1573,7 +1592,7 @@ def geom_map(mapping=None, *, data=None, stat=None, position=None, show_legend=N >>> from lets_plot import * >>> import lets_plot.geo_data as gd >>> LetsPlot.setup_html() - >>> boundaries = gd.regions_state(request=['Texas', 'Iowa', 'Arizona'], within='US-48').boundaries() + >>> boundaries = gd.geocode_states(['Texas', 'Iowa', 'Arizona']).scope('US-48').get_boundaries() >>> regions = np.unique(boundaries['found name']) >>> num_of_regions = len(regions) >>> df = pd.DataFrame(regions, columns=['state']) @@ -1581,9 +1600,9 @@ def geom_map(mapping=None, *, data=None, stat=None, position=None, show_legend=N >>> ggplot(df) + ggtitle('Randomly colored states') + geom_map(aes(fill='value'), map=boundaries, map_join=('state', 'found name'), color='white') """ - if is_geo_data_regions(map): - map = map.boundaries() - map_join = map_join_regions(map_join) + if is_geocoder(map): + map_join = auto_join_geocoder(map_join, map) + map = map.get_boundaries() return _geom('map', mapping=mapping, @@ -2649,11 +2668,11 @@ def geom_rect(mapping=None, *, data=None, stat=None, position=None, show_legend= Value 'none' will disable sampling for this layer. tooltips : result of the call to the layer_tooltips() function. Specifies appearance, style and content. - map : GeoDataFrame (shapes MultiPoint, Line, MultiLine, Polygon and MultiPolygon) or Regions (implicitly invoke limits()) + map : GeoDataFrame (shapes MultiPoint, Line, MultiLine, Polygon and MultiPolygon) or Geocoder (implicitly invoke limits()) Bounding boxes of geometries will be drawn. map_join : str, pair Pair of names used to join map coordinates with data. - str is allowed only when used with Regions object - map key 'request' will be automatically added. + str is allowed only when used with Geocoder object - map key 'request' will be automatically added. first value in pair - column in data second value in pair - column in map other_args : @@ -2694,9 +2713,9 @@ def geom_rect(mapping=None, *, data=None, stat=None, position=None, show_legend= """ - if is_geo_data_regions(map): - map = map.limits() - map_join = map_join_regions(map_join) + if is_geocoder(map): + map_join = auto_join_geocoder(map_join, map) + map = map.get_limits() return _geom('rect', mapping=mapping, @@ -2815,11 +2834,11 @@ def geom_text(mapping=None, *, data=None, stat=None, position=None, show_legend= Value 'none' will disable sampling for this layer. tooltips : result of the call to the layer_tooltips() function. Specifies appearance, style and content. - map : GeoDataFrame (supported shapes Point and MultiPoint) or Regions (implicitly invoke centroids()) + map : GeoDataFrame (supported shapes Point and MultiPoint) or Geocoder (implicitly invoke centroids()) Data containing coordinates of points. map_join : str, pair Pair of names used to join map coordinates with data. - str is allowed only when used with Regions object - map key 'request' will be automatically added. + str is allowed only when used with Geocoder object - map key 'request' will be automatically added. first value in pair - column in data second value in pair - column in map label_format : str @@ -2830,7 +2849,7 @@ def geom_text(mapping=None, *, data=None, stat=None, position=None, show_legend= 'TTL: {.2f}$' -> 'TTL: 12.45$' na_text : str Text to show for missing values. - Default: 'n/a' + Default: 'n/a' other_args : Other arguments passed on to layer. These are often aesthetics settings, used to set an aesthetic to a fixed value, like color = "red", fill = "blue", size = 3 or shape = 21. They may also be parameters to the @@ -2871,9 +2890,9 @@ def geom_text(mapping=None, *, data=None, stat=None, position=None, show_legend= >>> ggplot() + geom_text(aes(x=[1], y=[1], label=['Text'], angle=[30], family=['mono']), size = 10) """ - if is_geo_data_regions(map): - map = map.centroids() - map_join = map_join_regions(map_join) + if is_geocoder(map): + map_join = auto_join_geocoder(map_join, map) + map = map.get_centroids() return _geom('text', mapping=mapping, @@ -2904,6 +2923,9 @@ def _geom(name, *, data, mapping, data_meta = as_annotated_data(data, mapping) + if is_geocoder(data): + data = data.get_geocodes() + if is_geo_data_frame(data): data = geo_data_frame_to_wgs84(data) @@ -2912,13 +2934,9 @@ def _geom(name, *, map_data_meta = as_annotated_map_data(kwargs.get('map', None)) - map_join = kwargs.get('map_join', None) - if map_join is not None: - pair = as_pair(map_join) - if pair is not None: - kwargs['map_join'] = pair - else: - raise ValueError("Unexpected 'map_join' format. Should be str, [str] or [str, str]") + kwargs['map_join'] = as_map_join( + kwargs.get('map_join', None) + ) return LayerSpec(geom=name, stat=stat, diff --git a/python-package/lets_plot/plot/geom_extras.py b/python-package/lets_plot/plot/geom_extras.py index e4fd5b9af65..207c6469b32 100644 --- a/python-package/lets_plot/plot/geom_extras.py +++ b/python-package/lets_plot/plot/geom_extras.py @@ -4,7 +4,7 @@ # from .core import FeatureSpec -__all__ = ['arrow', 'lon_lat'] +__all__ = ['arrow'] # @@ -36,7 +36,3 @@ def arrow(angle=None, length=None, ends=None, type=None): >>> ggplot() + geom_segment(aes(x=[3], y=[6], xend=[4], yend=[10]), arrow=arrow(type='closed')) """ return FeatureSpec('arrow', 'arrow', **locals()) - - -def lon_lat(lon, lat): - return FeatureSpec('deferred_procedure', 'lon_lat', lon=lon, lat=lat) diff --git a/python-package/lets_plot/plot/geom_livemap_.py b/python-package/lets_plot/plot/geom_livemap_.py index 8bcb23f1eab..09286dc9ea5 100644 --- a/python-package/lets_plot/plot/geom_livemap_.py +++ b/python-package/lets_plot/plot/geom_livemap_.py @@ -6,11 +6,11 @@ from typing import Union, Optional, List from .geom import _geom -from .util import is_geo_data_regions, map_join_regions -from .._global_settings import MAPTILES_KIND, MAPTILES_URL, MAPTILES_THEME, MAPTILES_ATTRIBUTION, \ +from .util import is_geocoder, auto_join_geocoder +from lets_plot._global_settings import MAPTILES_KIND, MAPTILES_URL, MAPTILES_THEME, MAPTILES_ATTRIBUTION, \ GEOCODING_PROVIDER_URL, \ TILES_RASTER_ZXY, TILES_VECTOR_LETS_PLOT, MAPTILES_MIN_ZOOM, MAPTILES_MAX_ZOOM -from .._global_settings import has_global_value, get_global_val +from lets_plot._global_settings import has_global_value, get_global_val try: import pandas @@ -50,11 +50,11 @@ def geom_livemap(mapping=None, *, data=None, show_legend=None, sampling=None, to Value 'none' will disable sampling for this layer. tooltips : result of the call to the layer_tooltips() function. Specifies appearance, style and content. - map : GeoDataFrame (supported shapes Point and MultiPoint) or Regions (implicitly invoke centroids()) + map : GeoDataFrame (supported shapes Point and MultiPoint) or Geocoder (implicitly invoke centroids()) Data containing coordinates of points. map_join : str, pair, optional Pair of names used to join map coordinates with data. - str is allowed only when used with Regions object - map key 'request' will be automatically added. + str is allowed only when used with Geocoder object - map key 'request' will be automatically added. first value in pair - column in data second value in pair - column in map symbol : string, optional @@ -109,9 +109,6 @@ def geom_livemap(mapping=None, *, data=None, show_legend=None, sampling=None, to >>> p = ggplot() + geom_livemap() >>> p += ggtitle('Live Map') """ - # if within is not None: - # within = _prepare_parent(within) - if location is not None: location = _prepare_location(location) @@ -123,9 +120,9 @@ def geom_livemap(mapping=None, *, data=None, show_legend=None, sampling=None, to if _display_mode in other_args.keys(): other_args.pop(_display_mode) - if is_geo_data_regions(map): - map = map.centroids() - map_join = map_join_regions(map_join) + if is_geocoder(map): + map_join = auto_join_geocoder(map_join, map) + map = map.get_centroids() return _geom('livemap', mapping=mapping, @@ -240,7 +237,7 @@ def _prepare_location(location: Union[str, List[float]]) -> Optional[dict]: return None value = location - # if isinstance(location, Regions): + # if isinstance(location, Geocoder): # kind = RegionKind.region_ids # value = location.unique_ids() diff --git a/python-package/lets_plot/plot/plot.py b/python-package/lets_plot/plot/plot.py index c5c59d8502f..85993788a18 100644 --- a/python-package/lets_plot/plot/plot.py +++ b/python-package/lets_plot/plot/plot.py @@ -6,8 +6,8 @@ from lets_plot.plot.core import FeatureSpec from lets_plot.plot.core import PlotSpec -from lets_plot.plot.util import as_annotated_data -from .._global_settings import has_global_value, get_global_val, MAX_WIDTH, MAX_HEIGHT +from lets_plot.plot.util import as_annotated_data, is_geocoder +from lets_plot._global_settings import has_global_value, get_global_val, MAX_WIDTH, MAX_HEIGHT __all__ = ['ggplot', 'ggsize', 'GGBunch'] @@ -74,6 +74,9 @@ def ggplot(data=None, mapping=None): if isinstance(data, FeatureSpec): raise ValueError("Object {!r} is not acceptable as 'data' argument in ggplot()".format(data.kind)) + if is_geocoder(data): + data = data.get_geocodes() + data, mapping, data_meta = as_annotated_data(data, mapping) return PlotSpec(data, mapping, scales=[], layers=[], **data_meta) diff --git a/python-package/lets_plot/plot/util.py b/python-package/lets_plot/plot/util.py index 849ac99371c..06acc6794e7 100644 --- a/python-package/lets_plot/plot/util.py +++ b/python-package/lets_plot/plot/util.py @@ -3,8 +3,10 @@ # Use of this source code is governed by the MIT license that can be found in the LICENSE file. # from collections import Iterable -from typing import Any, Tuple +from typing import Any, Tuple, Sequence +from lets_plot.geo_data import Geocodes +from lets_plot.geo_data.geocoder import Geocoder from lets_plot.mapping import MappingMeta from lets_plot.plot.core import aes @@ -71,7 +73,7 @@ def as_annotated_map_data(raw_map: Any) -> dict: if raw_map is None: return {} - if is_geo_data_regions(raw_map): + if is_geocoder(raw_map): return {'map_data_meta': {'georeference': {}}} if is_geo_data_frame(raw_map): @@ -80,19 +82,37 @@ def as_annotated_map_data(raw_map: Any) -> dict: raise ValueError('Unsupported map parameter type: ' + str(type(raw_map)) + '. Should be a GeoDataFrame.') -def is_geo_data_regions(data: Any) -> bool: - # do not import Regions directly to suppress OSM attribution from geo_data package - return data is not None and type(data).__name__ == 'Regions' +def is_geocoder(data: Any) -> bool: + # do not import Geocoder directly to suppress OSM attribution from geo_data package + if data is None: + return False + + return any(base.__name__ == 'Geocoder' for base in type(data).mro()) -def map_join_regions(map_join: Any): +def auto_join_geocoder(map_join: Any, geocoder: Geocoder): + if map_join is None: + return None + if isinstance(map_join, str): - return [map_join, 'request'] + data_names = [map_join] + map_names = Geocodes.find_name_columns(geocoder.get_geocodes()) + elif isinstance(map_join, Sequence): + if len(map_join) == 2 and all(isinstance(v, Sequence) and not isinstance(v, str) for v in map_join): + data_names = map_join[0] + map_names = map_join[1] + else: + data_names = map_join + map_names = Geocodes.find_name_columns(geocoder.get_geocodes()) + if any(not isinstance(v, str) for v in data_names): + raise ValueError("'map_join' with `Geocoder` must be a str, list[str] or pair of list[str]") + else: + raise ValueError("'map_join' with `Geocoder` must be a str, list[str] or pair of list[str], but was{}".format(repr(type(map_join)))) - if isinstance(map_join, Iterable) and len(map_join) == 1: - return [map_join[0], 'request'] + if len(data_names) != len(map_names): + raise ValueError("`map_join` expected to have ({}) items, but was({})".format(len(map_names), len(data_names))) - return map_join + return [data_names, map_names] def is_geo_data_frame(data: Any) -> bool: @@ -110,6 +130,34 @@ def get_geo_data_frame_meta(geo_data_frame) -> dict: } } +def as_map_join(map_join): + if map_join is None: + return None + + if isinstance(map_join, str): + data_join_on, map_join_on = map_join, map_join + elif isinstance(map_join, Sequence): + if len(map_join) == 0: + data_join_on, map_join_on = None, None + if len(map_join) == 1: + data_join_on, map_join_on = map_join[0], map_join[0] + elif len(map_join) == 2: + data_join_on, map_join_on = map_join[0], map_join[1] + + if data_join_on is None and map_join_on is None: + return None + + if data_join_on is None or map_join_on is None: + raise ValueError('map_join should not contain None') + + if isinstance(data_join_on, str): + data_join_on = [data_join_on] + + if isinstance(map_join_on, str): + map_join_on = [map_join_on] + + return [data_join_on, map_join_on] + def geo_data_frame_to_wgs84(data): if data.crs is not None: @@ -125,15 +173,3 @@ def is_ndarray(data) -> bool: except ImportError: return False -def as_pair(data): - if isinstance(data, str): - return [data, None] - elif isinstance(data, Iterable): - if len(data) == 0: - return [None, None] - if len(data) == 1: - return [data[0], None] - elif len(data) == 2: - return [data[0], data[1]] - else: - return None diff --git a/python-package/test/geo_data/geo_data.py b/python-package/test/geo_data/geo_data.py index 76f21de4ce5..39b44dca331 100644 --- a/python-package/test/geo_data/geo_data.py +++ b/python-package/test/geo_data/geo_data.py @@ -2,35 +2,31 @@ # Use of this source code is governed by the MIT license that can be found in the LICENSE file. import json -from typing import List, Union +from typing import List, Union, Callable, Any -from lets_plot.geo_data import DF_REQUEST, DF_FOUND_NAME +import shapely +from geopandas import GeoDataFrame +from shapely.geometry import Point + +from lets_plot.geo_data import DF_COLUMN_ID, DF_COLUMN_FOUND_NAME, DF_COLUMN_COUNTY, DF_COLUMN_STATE, DF_COLUMN_COUNTRY +from lets_plot.geo_data.geocodes import Geocodes from lets_plot.geo_data.gis.geometry import Ring, Polygon, Multipolygon from lets_plot.geo_data.gis.json_response import ResponseField, GeometryKind -from lets_plot.geo_data.gis.response import GeocodedFeature, FeatureBuilder, LevelKind, Status, GeoRect, GeoPoint, \ +from lets_plot.geo_data.gis.request import RegionQuery +from lets_plot.geo_data.gis.response import Answer, GeocodedFeature, FeatureBuilder, LevelKind, Status, GeoRect, \ + GeoPoint, \ Response, SuccessResponse, AmbiguousResponse, ErrorResponse, ResponseBuilder -from lets_plot.geo_data.regions import Regions - -from pandas import DataFrame GEOJSON_TYPE = ResponseField.boundary_type.value GEOJSON_COORDINATES = ResponseField.boundary_coordinates.value -GEO_OBJECT_ID: str = '777' LEVEL: LevelKind = LevelKind.county -TEXAS: str = 'Texas' -REQUEST: str = 'request' ID: str = 'iddd' NAME: str = 'rrr' -OTHER_NAME: str = 'otherrr name' FOUND_NAME: str = 'a' MESSAGE = 'msg' ERROR_MESSAGE = 'error msg' -SUCCESS = Status.success.value -AMBIGUOUS = Status.ambiguous.value -ERROR = Status.error.value - GJPoint = List[float] GJRing = List[GJPoint] GJPolygon = List[GJRing] @@ -44,18 +40,159 @@ GEO_RECT_MAX_LON: float = 9 GEO_RECT_MAX_LAT: float = 7 - -def make_geocode_region(request: str, name: str, geo_object_id: str, highlights: List[str], level_kind: LevelKind = LevelKind.city) -> Regions: - return Regions(level_kind, [make_region(request, name, geo_object_id, highlights)]) +COLUMN_NAME_CITY = 'city' + +def run_intergration_tests() -> bool: + import os + if 'RUN_GEOCODING_INTEGRATION_TEST' in os.environ.keys(): + return os.environ.get('RUN_GEOCODING_INTEGRATION_TEST').lower() == 'true' + return False + + +NO_COLUMN = '' +IGNORED = '__value_ignored__' + + +def assert_error(message: str, action: Callable[[], Any]): + assert isinstance(message, str) + try: + action() + assert False, 'No error, but expected: {}'.format(message) + except Exception as e: + assert message == str(e), "'{}' != '{}'".format(message, str(e)) + + +def assert_request_and_found_name_are_equal(df, r=None): + if r is None: + r = range(len(df)) + + assert df[get_request_column_name(df)].tolist()[r.start:r.stop] == df[DF_COLUMN_FOUND_NAME].tolist()[r.start:r.stop] + + +def get_request_column_name(df) -> str: + if COLUMN_NAME_CITY in df.columns: + return COLUMN_NAME_CITY + elif DF_COLUMN_COUNTY in df.columns: + return DF_COLUMN_COUNTY + elif DF_COLUMN_STATE in df.columns: + return DF_COLUMN_STATE + elif DF_COLUMN_COUNTRY in df.columns: + return DF_COLUMN_COUNTRY + else: + raise ValueError('Magic state - no expected columns') + + +def assert_row( + df, + index: int = 0, + names: Union[str, List] = IGNORED, + found_name: Union[str, List] = IGNORED, + id: Union[str, List] = IGNORED, + city: Union[str, List] = IGNORED, + county: Union[str, List] = IGNORED, + state: Union[str, List] = IGNORED, + country: Union[str, List] = IGNORED, + lon=None, + lat=None, + lon_min=None, + lon_max=None, + lat_min=None, + lat_max=None, + boundary=None +): + def assert_str(column, expected): + if expected == IGNORED: + return + + if expected == NO_COLUMN: + assert column not in df.columns.tolist() + return + + if isinstance(expected, str): + assert expected == df[column][index], '{} != {}'.format(expected, df[column][index]) + return + + if isinstance(expected, list): + actual = df[column][index:index + len(expected)].tolist() + assert actual == expected, '{} != {}'.format(expected, actual) + return + + raise ValueError('Not support type of expected: {}'.format(str(type(expected)))) + + assert_str(get_request_column_name(df), names) + assert_str(DF_COLUMN_ID, id) + assert_str(DF_COLUMN_FOUND_NAME, found_name) + assert_str(COLUMN_NAME_CITY, city) + assert_str(DF_COLUMN_COUNTY, county) + assert_str(DF_COLUMN_STATE, state) + assert_str(DF_COLUMN_COUNTRY, country) + if lon is not None: + assert Point(df.geometry[index]).x == lon, 'lon {} != {}'.format(lon, Point(df.geometry[index]).x) + + if lat is not None: + assert Point(df.geometry[index]).y == lat, 'lat {} != {}'.format(lat, Point(df.geometry[index]).y) + + if any([v is not None for v in [lon_min, lon_max, lat_min, lat_max]]): + if isinstance(df, GeoDataFrame): + bounds = df.geometry[index].bounds + + if lon_min is not None: + assert lon_min == bounds[0] + + if lat_min is not None: + assert lat_min == bounds[1] + + if lon_max is not None: + assert lon_max == bounds[2] + + if lat_max is not None: + assert lat_max == bounds[3] + else: + assert GEO_RECT_MIN_LON == df.lonmin[index] + assert GEO_RECT_MIN_LAT == df.latmin[index] + assert GEO_RECT_MAX_LON == df.lonmax[index] + assert GEO_RECT_MAX_LAT == df.latmax[index] + + if boundary is not None: + def assert_geo_multipolygon(geo_multipolygon, multipolygon: GJMultipolygon): + for i, geo_polygon in enumerate(geo_multipolygon.geoms): + assert_geo_polygon(geo_polygon, multipolygon[i]) + + def assert_geo_polygon(geo_polygon, polygon: GJPolygon): + assert_geo_ring(geo_polygon.exterior.coords, polygon[0]) + + for i, interior in enumerate(geo_polygon.interiors): + assert_geo_ring(interior.coords, polygon[1 + i]) + + def assert_geo_ring(geo_ring, ring: GJRing): + for i, point in enumerate(ring): + assert point[0] == geo_ring[i][0] # lon + assert point[1] == geo_ring[i][1] # lat + + geometry = df.geometry[index] + if isinstance(geometry, shapely.geometry.Polygon): + assert_geo_polygon(geometry, boundary) + + if isinstance(geometry, shapely.geometry.MultiPolygon): + assert_geo_multipolygon(geometry, boundary) + + +def make_geocode_region(request: str, name: str, geo_object_id: str, highlights: List[str], level_kind: LevelKind = LevelKind.city) -> Geocodes: + return Geocodes( + level_kind=level_kind, + queries=[RegionQuery(request=request)], + answers=[make_answer(name, geo_object_id, highlights)] + ) -def make_region(request: str, name: str, geo_object_id: str, highlights: List[str]) -> GeocodedFeature: - return FeatureBuilder() \ - .set_query(request) \ - .set_name(name) \ - .set_id(geo_object_id) \ - .set_highlights(highlights) \ - .build_geocoded() +def make_answer(name: str, geo_object_id: str, highlights: List[str]) -> Answer: + return Answer([FeatureBuilder() \ + .set_name(name) \ + .set_id(geo_object_id) \ + .set_highlights(highlights) \ + .build_geocoded() + ] + ) def make_centroid_point() -> GeoPoint: @@ -191,36 +328,11 @@ def get_map_data_meta(plotSpec, layerIdx: int) -> dict: return plotSpec.as_dict()['layers'][layerIdx]['map_data_meta'] +def feature_to_answer(feature: GeocodedFeature) -> Answer: + return Answer([feature]) -def assert_names(df: DataFrame, index: int, name=NAME, found_name=FOUND_NAME): - assert name == df[DF_REQUEST][index] - assert found_name == df[DF_FOUND_NAME][index] - - -def assert_limit(limit: DataFrame, index: int, name=NAME, found_name=FOUND_NAME): - assert_names(limit, index, name, found_name) - - min_lon = limit['lonmin'][index] - min_lat = limit['latmin'][index] - max_lon = limit['lonmax'][index] - max_lat = limit['latmax'][index] - assert GEO_RECT_MIN_LON == min_lon - assert GEO_RECT_MIN_LAT == min_lat - assert GEO_RECT_MAX_LON == max_lon - assert GEO_RECT_MAX_LAT == max_lat - - -def assert_centroid(centroid: DataFrame, index: int, name=NAME, found_name=FOUND_NAME, lon=CENTROID_LON, lat=CENTROID_LAT): - assert_names(centroid, index, name, found_name) - assert_point(centroid, index, lon, lat) - - -def assert_boundary(boundary: DataFrame, index: int, points: List[GJPoint], name=NAME, found_name=FOUND_NAME): - assert_names(boundary, index, name, found_name) - for i, point in enumerate(points): - assert_point(boundary, index + i, lon(point), lat(point)) - +def features_to_answers(features: List[GeocodedFeature]) -> List[Answer]: + return [Answer([feature]) for feature in features] -def assert_point(df: DataFrame, index: int, lon: float, lat: float): - assert lon == df[DF_LON][index] - assert lat == df[DF_LAT][index] +def features_to_queries(features: List[GeocodedFeature]) -> List[RegionQuery]: + return [RegionQuery(feature.name) for feature in features] \ No newline at end of file diff --git a/python-package/test/geo_data/gis/assertions.py b/python-package/test/geo_data/gis/assertions.py index 3dbbe0accad..762918dfae6 100644 --- a/python-package/test/geo_data/gis/assertions.py +++ b/python-package/test/geo_data/gis/assertions.py @@ -6,7 +6,6 @@ def assert_geocoded(expected: GeocodedFeature, actual: GeocodedFeature): - assert expected.query == actual.query assert expected.id == actual.id assert expected.name == actual.name assert_point(expected.centroid, actual.centroid) diff --git a/python-package/test/geo_data/gis/test_response_parser.py b/python-package/test/geo_data/gis/test_response_parser.py index e290d7e3a05..ccee25b8947 100644 --- a/python-package/test/geo_data/gis/test_response_parser.py +++ b/python-package/test/geo_data/gis/test_response_parser.py @@ -52,9 +52,9 @@ def test_valid_success_response(): assert isinstance(response, SuccessResponse) assert 'OK' == response.message - assert 2 == len(response.features) - assert_geocoded(foo, response.features[0]) - assert_geocoded(foofoo, response.features[1]) + assert 2 == len(response.answers) + assert_geocoded(foo, response.answers[0].features[0]) + assert_geocoded(foofoo, response.answers[1].features[0]) def test_ambiguous_response(): diff --git a/python-package/test/geo_data/request_assertion.py b/python-package/test/geo_data/request_assertion.py new file mode 100644 index 00000000000..ae026536079 --- /dev/null +++ b/python-package/test/geo_data/request_assertion.py @@ -0,0 +1,199 @@ +# Copyright (c) 2020. JetBrains s.r.o. +# Use of this source code is governed by the MIT license that can be found in the LICENSE file. +from typing import TypeVar, Generic, Optional, List, Union + +from lets_plot.geo_data.gis.geometry import GeoRect, GeoPoint + +from lets_plot.geo_data.geocodes import _ensure_is_list +from lets_plot.geo_data.gis.request import Request, GeocodingRequest, RegionQuery, MapRegion, AmbiguityResolver, \ + PayloadKind, MapRegionKind, IgnoringStrategyKind, LevelKind + +T = TypeVar('T') + + +class ValueMatcher(Generic[T]): + def check(self, value): + raise ValueError('abstract') + + +class any(ValueMatcher[T]): + def check(self, value): + return + + +class eq(ValueMatcher[T]): + def __init__(self, v): + self.expected = v + + def check(self, value): + assert type(self.expected) == type(value), "{} != {}".format(type(self.expected), type(value)) + assert self.expected == value, '{} != {}'.format(self.expected, value) + + +class eq_map_region_with_name(eq[MapRegion]): + def __init__(self, name: str): + self.expected = MapRegion.with_name(name) + + +class eq_map_region_with_id(ValueMatcher[MapRegion]): + """ + Checks only id + """ + + def __init__(self, ids: Union[str, List[str]]): + ids = _ensure_is_list(ids) + self.expected = MapRegion.scope(ids) + + def check(self, value): + assert value.kind == MapRegionKind.id or value.kind == MapRegionKind.place + assert self.expected.values == value.values + + +class empty(ValueMatcher[T]): + def check(self, value): + assert value is None, '{} is not None'.format(value) + + +class item_exists(ValueMatcher[T]): + def __init__(self, value): + self._expected = value + + def check(self, value): + exists = False + for v in value: + if v == self._expected: + exists = True + break + + assert exists, 'Item {} not found in list'.format(self._expected) + + +class ScopeMatcher: + ''' + Scope can't be mixed with names and ids. + Scope with name should have length exactly 1. + Scope with ids should have length exactly 1. + + ''' + + def __init__(self): + self._names: Optional[List[str]] = None + self._ids: Optional[List[str]] = None + + def with_names(self, names: List[str]) -> 'ScopeMatcher': + self._names = names + return self + + def with_ids(self, ids: List[str]) -> 'ScopeMatcher': + self._ids = ids + return self + + def empty(self) -> 'ScopeMatcher': + self._names = None + self._ids = None + return self + + def check(self, scope: List[MapRegion]): + if self._names is None and self._ids is None: + assert len(scope) == 0 + elif self._names is not None: + assert len(self._names) == len(scope) + for expected_name, region in zip(self._names, scope): + assert expected_name == MapRegion.name_or_none(region) + elif self._ids is not None: + for expected_id, region in zip(self._ids, scope): + assert len(region.values) == 1 + assert expected_id == region.values[0] + else: + raise ValueError('Invalid matcher state') + + +class AmbiguityResolverMatcher(ValueMatcher[AmbiguityResolver]): + def __init__(self): + self._ignoring_strategy: ValueMatcher[IgnoringStrategyKind] = any() + self._box: ValueMatcher[GeoRect] = any() + self._near: ValueMatcher[GeoPoint] = any() + + def check(self, value): + self._ignoring_strategy.check(value.ignoring_strategy) + self._box.check(value.box) + self._near.check(value.closest_coord) + + +class QueryMatcher: + def __init__(self, + name: ValueMatcher[Optional[str]] = any(), + scope: ValueMatcher[Optional[MapRegion]] = any(), + ambiguity_resolver: ValueMatcher[AmbiguityResolver] = any(), + country: ValueMatcher[Optional[MapRegion]] = any(), + state: ValueMatcher[Optional[MapRegion]] = any(), + county: ValueMatcher[Optional[MapRegion]] = any() + ): + self._name: ValueMatcher[Optional[str]] = name + self._scope: ValueMatcher[Optional[MapRegion]] = scope + self._ambiguity_resolver: ValueMatcher[AmbiguityResolver] = ambiguity_resolver + self._country: ValueMatcher[Optional[MapRegion]] = country + self._state: ValueMatcher[Optional[MapRegion]] = state + self._county: ValueMatcher[Optional[MapRegion]] = county + + def with_name(self, name: Optional[str]) -> 'QueryMatcher': + self._name = eq(name) + return self + + def scope(self, scope: ValueMatcher[Optional[MapRegion]]) -> 'QueryMatcher': + self._scope = scope + return self + + def ambiguity_resolver(self, ambiguity_resolver: ValueMatcher[AmbiguityResolver]) -> 'QueryMatcher': + self._ambiguity_resolver = ambiguity_resolver + return self + + def country(self, country: ValueMatcher[Optional[MapRegion]]) -> 'QueryMatcher': + self._country = country + return self + + def state(self, state: ValueMatcher[Optional[MapRegion]]) -> 'QueryMatcher': + self._state = state + return self + + def county(self, county: ValueMatcher[Optional[MapRegion]]) -> 'QueryMatcher': + self._county = county + return self + + def check(self, q: RegionQuery): + self._name.check(q.request) + self._scope.check(q.scope) + self._ambiguity_resolver.check(q.ambiguity_resolver) + self._country.check(q.country) + self._state.check(q.state) + self._county.check(q.county) + + +class GeocodingRequestAssertion: + def __init__(self, request: Request): + self._i = 0 + assert isinstance(request, GeocodingRequest) + self._request: GeocodingRequest = request + + def allows_ambiguous(self) -> 'GeocodingRequestAssertion': + assert self._request.allow_ambiguous + return self + + def has_query(self, query_matcher: QueryMatcher, i: Optional[int] = None) -> 'GeocodingRequestAssertion': + if i is None: + i = self._i + self._i += 1 + + query_matcher.check(self._request.region_queries[i]) + return self + + def has_scope(self, scope_matcher: ScopeMatcher) -> 'GeocodingRequestAssertion': + scope_matcher.check(self._request.scope) + return self + + def has_level(self, level_matcher: ValueMatcher[LevelKind]) -> 'GeocodingRequestAssertion': + level_matcher.check(self._request.level) + return self + + def fetches(self, payload: PayloadKind): + item_exists(payload).check(self._request.requested_payload) diff --git a/python-package/test/geo_data/test_check_required_parameters.py b/python-package/test/geo_data/test_check_required_parameters.py index d95dd06b32a..cb01f35f912 100644 --- a/python-package/test/geo_data/test_check_required_parameters.py +++ b/python-package/test/geo_data/test_check_required_parameters.py @@ -5,8 +5,8 @@ import pytest -from lets_plot.geo_data.gis.request import MapRegion, RegionQuery, MISSING_LEVEL_AND_WITHIN_OR_REQUEST_EXCEPTION_TEXT, \ - MISSING_LEVEL_OR_REQUEST_EXCEPTION_TEXT, MISSING_WITHIN_OR_REQUEST_EXCEPTION_TEXT, GeocodingRequest, \ +from lets_plot.geo_data.gis.request import MapRegion, RegionQuery, MISSING_LEVEL_AND_SCOPE_OR_REQUEST_EXCEPTION_TEXT, \ + MISSING_LEVEL_OR_REQUEST_EXCEPTION_TEXT, GeocodingRequest, \ AmbiguityResolver from lets_plot.geo_data.gis.response import LevelKind @@ -19,10 +19,9 @@ @pytest.mark.parametrize('region_queries,level,message', [ - ([], None, MISSING_LEVEL_AND_WITHIN_OR_REQUEST_EXCEPTION_TEXT), - ([RegionQuery(None, None, REACTION_KIND_ALERT)], None, MISSING_LEVEL_AND_WITHIN_OR_REQUEST_EXCEPTION_TEXT), + ([], None, MISSING_LEVEL_AND_SCOPE_OR_REQUEST_EXCEPTION_TEXT), + ([RegionQuery(None, None, REACTION_KIND_ALERT)], None, MISSING_LEVEL_AND_SCOPE_OR_REQUEST_EXCEPTION_TEXT), ([RegionQuery(None, PARENT, REACTION_KIND_ALERT)], None, MISSING_LEVEL_OR_REQUEST_EXCEPTION_TEXT), - ([RegionQuery(None, None, REACTION_KIND_ALERT)], LevelKind.state, MISSING_WITHIN_OR_REQUEST_EXCEPTION_TEXT), ]) def test_args_that_fail(region_queries: List[RegionQuery], level: Optional[LevelKind], diff --git a/python-package/test/geo_data/test_core.py b/python-package/test/geo_data/test_core.py index e09162c90ab..5645e5a231b 100644 --- a/python-package/test/geo_data/test_core.py +++ b/python-package/test/geo_data/test_core.py @@ -1,23 +1,22 @@ # Copyright (c) 2020. JetBrains s.r.o. # Use of this source code is governed by the MIT license that can be found in the LICENSE file. -from typing import Union from unittest import mock import pytest from pandas import DataFrame -from lets_plot._type_utils import _standardize_value -from lets_plot.geo_data import regions, regions_builder +from lets_plot.geo_data import geocode +from lets_plot.geo_data.geocoder import _to_scope +from lets_plot.geo_data.geocodes import _coerce_resolution, _ensure_is_list, Geocodes, DF_COLUMN_ID, DF_COLUMN_FOUND_NAME from lets_plot.geo_data.gis.geocoding_service import GeocodingService from lets_plot.geo_data.gis.request import MapRegion, RegionQuery, GeocodingRequest, PayloadKind, ExplicitRequest, \ AmbiguityResolver -from lets_plot.geo_data.gis.response import LevelKind, FeatureBuilder, GeoPoint +from lets_plot.geo_data.gis.response import LevelKind, FeatureBuilder, GeoPoint, Answer from lets_plot.geo_data.livemap_helper import _prepare_location, RegionKind, _prepare_parent, \ LOCATION_LIST_ERROR_MESSAGE, LOCATION_DATAFRAME_ERROR_MESSAGE -from lets_plot.geo_data.regions import _to_scope, _coerce_resolution, _ensure_is_list, Regions, DF_REQUEST, DF_ID, \ - DF_FOUND_NAME -from .geo_data import make_geocode_region, make_region, make_success_response +from .geo_data import make_geocode_region, make_success_response, features_to_answers, features_to_queries, \ + COLUMN_NAME_CITY DATAFRAME_COLUMN_NAME = 'name' DATAFRAME_NAME_LIST = ['usa', 'russia'] @@ -38,12 +37,17 @@ PARENT_WITH_NAME = MapRegion.with_name(REGION_NAME) -REGION_QUERY_LA = RegionQuery('LA', PARENT_WITH_NAME, AmbiguityResolver()) -REGION_QUERY_NY = RegionQuery('NY', PARENT_WITH_NAME, AmbiguityResolver()) +REGION_QUERY_LA = RegionQuery('LA', None, AmbiguityResolver()) +REGION_QUERY_NY = RegionQuery('NY', None, AmbiguityResolver()) NAMESAKES_EXAMPLE_LIMIT = 10 +def feature_id(answer: Answer) -> str: + assert len(answer.features) == 1 + return answer.features[0].id + + def make_expected_map_region(region_kind: RegionKind, values): return { 'type': region_kind.value, @@ -54,7 +58,7 @@ def make_expected_map_region(region_kind: RegionKind, values): @mock.patch.object(GeocodingService, 'do_request') def test_regions(mock_geocoding): try: - regions(level=LEVEL, request=FILTER_LIST, within=REGION_NAME).to_data_frame() + geocode(level=LEVEL, names=FILTER_LIST).scope(REGION_NAME).get_geocodes() except Exception: pass # response doesn't contain proper feature with ids - ignore @@ -63,18 +67,17 @@ def test_regions(mock_geocoding): resolution=None, region_queries=[REGION_QUERY_LA, REGION_QUERY_NY], level=LEVEL_KIND, + scope=[MapRegion.with_name(REGION_NAME)], namesake_example_limit=NAMESAKES_EXAMPLE_LIMIT, allow_ambiguous=False - ), - None, # chunk_size - None # progress_callback + ) ) @mock.patch.object(GeocodingService, 'do_request') def test_regions_with_highlights(mock_geocoding): try: - regions_builder(level=LEVEL, request=FILTER_LIST, within=REGION_NAME, highlights=True).build() + geocode(level=LEVEL, names=FILTER_LIST).scope(REGION_NAME).highlights(True).get_geocodes() except Exception: pass # response doesn't contain proper feature with ids - ignore @@ -82,17 +85,18 @@ def test_regions_with_highlights(mock_geocoding): GeocodingRequest(requested_payload=[PayloadKind.highlights], resolution=None, region_queries=[REGION_QUERY_LA, REGION_QUERY_NY], + scope=MapRegion.with_name(REGION_NAME), level=LEVEL_KIND, namesake_example_limit=NAMESAKES_EXAMPLE_LIMIT, allow_ambiguous=False - ), - None, # chunk_size - None # progress_callback + ) ) +FOO_FEATURE = FeatureBuilder().set_name('fooname').set_id('fooid').build_geocoded() +BAR_FEATURE = FeatureBuilder().set_name('barname').set_id('barid').build_geocoded() -FOO = FeatureBuilder().set_query('foo').set_name('fooname').set_id('fooid').build_geocoded() -BAR = FeatureBuilder().set_query('foo').set_name('barname').set_id('barid').build_geocoded() +FOO = Answer([FeatureBuilder().set_name('fooname').set_id('fooid').build_geocoded()]) +BAR = Answer([FeatureBuilder().set_name('barname').set_id('barid').build_geocoded()]) @pytest.mark.parametrize('location,expected', [ # none @@ -106,13 +110,12 @@ def test_regions_with_highlights(mock_geocoding): ), # single region - (Regions( - LEVEL_KIND, - [ - FOO, - BAR - ]), - MapRegion.with_ids([FOO.id, BAR.id]) + (Geocodes( + level_kind=LEVEL_KIND, + queries=features_to_queries([FOO_FEATURE, BAR_FEATURE]), + answers=features_to_answers([FOO_FEATURE, BAR_FEATURE]) + ), + MapRegion.scope([feature_id(FOO), feature_id(BAR)]) ), # list of strings @@ -127,24 +130,36 @@ def test_regions_with_highlights(mock_geocoding): # list of regions ([ - Regions(LEVEL_KIND, [FOO]), - Regions(LEVEL_KIND, [BAR]) + Geocodes( + level_kind=LEVEL_KIND, + queries=features_to_queries([FOO_FEATURE]), + answers=features_to_answers([FOO_FEATURE]) + ), + Geocodes( + level_kind=LEVEL_KIND, + queries=features_to_queries([BAR_FEATURE]), + answers=features_to_answers([BAR_FEATURE]), + ) ], [ - MapRegion.with_ids([FOO.id]), - MapRegion.with_ids([BAR.id]) + MapRegion.scope([feature_id(FOO)]), + MapRegion.scope([feature_id(BAR)]) ] ), # mix of strings and regions ([ - 'foo', - Regions(LEVEL_KIND, [BAR]), + FOO_FEATURE.name, + Geocodes( + level_kind=LEVEL_KIND, + queries=features_to_queries([BAR_FEATURE]), + answers=features_to_answers([BAR_FEATURE]) + ) ], [ - MapRegion.with_name(FOO.query), - MapRegion.with_ids([BAR.id]) + MapRegion.with_name(FOO_FEATURE.name), + MapRegion.scope([feature_id(BAR)]) ] ) ]) @@ -154,18 +169,16 @@ def test_to_parent_with_name(location, expected): def test_to_parent_with_id(): - assert MapRegion.with_ids(REGION_LIST) == _to_scope(make_geocode_region(REQUEST, REGION_NAME, REGION_ID, REGION_HIGHLIGHTS)) + assert MapRegion.scope(REGION_LIST) == _to_scope(make_geocode_region(REQUEST, REGION_NAME, REGION_ID, REGION_HIGHLIGHTS)) @mock.patch.object(GeocodingService, 'do_request') def test_request_remove_duplicated_ids(mock_request): try: - Regions( - LEVEL_KIND, - [ - make_region(REQUEST, REGION_NAME, REGION_ID, REGION_HIGHLIGHTS), - make_region(REQUEST, REGION_NAME, REGION_ID, REGION_HIGHLIGHTS) - ] + Geocodes( + level_kind=LEVEL_KIND, + queries=features_to_queries([FOO_FEATURE, FOO_FEATURE]), + answers=features_to_answers([FOO_FEATURE, FOO_FEATURE]) ).centroids() except ValueError: pass # response doesn't contain proper feature with ids - ignore @@ -173,7 +186,7 @@ def test_request_remove_duplicated_ids(mock_request): mock_request.assert_called_with( ExplicitRequest( requested_payload=[PayloadKind.centroids], - ids=[REGION_ID] + ids=[FOO_FEATURE.id] ) ) @@ -226,26 +239,20 @@ def test_geocode_limit(mock_request): @mock.patch.object(GeocodingService, 'do_request') def test_reorder_for_centroids_should_happen(mock_request): - mock_request.return_value = make_success_response() \ - .set_geocoded_features( - [ - FeatureBuilder().set_id('2').set_query('New York').set_name('New York').set_centroid(GeoPoint(0, 0)).build_geocoded(), - FeatureBuilder().set_id('3').set_query('Las Vegas').set_name('Las Vegas').set_centroid(GeoPoint(0, 0)).build_geocoded(), - FeatureBuilder().set_id('1').set_query('Los Angeles').set_name('Los Angeles').set_centroid(GeoPoint(0, 0)).build_geocoded() - ] - ).build() - - df = Regions( - LevelKind.city, - [ - make_region('Los Angeles', 'Los Angeles', '1', []), - make_region('New York', 'New York', '2', []), - make_region('Las Vegas', 'Las Vegas', '3', []), - make_region('Los Angeles', 'Los Angeles', '1', []), - ] + new_york = FeatureBuilder().set_id('2').set_query('New York').set_name('New York').set_centroid(GeoPoint(0, 0)).build_geocoded() + las_vegas = FeatureBuilder().set_id('3').set_query('Las Vegas').set_name('Las Vegas').set_centroid(GeoPoint(0, 0)).build_geocoded() + + los_angeles = FeatureBuilder().set_id('1').set_query('Los Angeles').set_name('Los Angeles').set_centroid( + GeoPoint(0, 0)).build_geocoded() + mock_request.return_value = make_success_response().set_geocoded_features([new_york, las_vegas, los_angeles]).build() + + df = Geocodes( + level_kind=LevelKind.city, + queries=features_to_queries([los_angeles, new_york, las_vegas, los_angeles]), + answers=features_to_answers([los_angeles, new_york, las_vegas, los_angeles]) ).centroids() - assert ['Los Angeles', 'New York', 'Las Vegas', 'Los Angeles'] == df[DF_FOUND_NAME].tolist() + assert ['Los Angeles', 'New York', 'Las Vegas', 'Los Angeles'] == df[DF_COLUMN_FOUND_NAME].tolist() @pytest.mark.parametrize('arg,expected_resolution', [ @@ -306,13 +313,5 @@ def test_ensure_is_list(arg, expected_result): def test_regions_to_data_frame_should_skip_highlights(): regions = make_geocode_region(REQUEST, REGION_NAME, REGION_ID, REGION_HIGHLIGHTS) regions_df = regions.to_data_frame() - assert [DF_REQUEST, DF_ID, DF_FOUND_NAME] == list(regions_df.columns.values) - - -def test_regions_to_dict(): - regions = make_geocode_region(REQUEST, REGION_NAME, REGION_ID, []) - regions_dict = _standardize_value(regions) - assert REQUEST == regions_dict[DF_REQUEST][0] - assert REGION_ID == regions_dict[DF_ID][0] - assert REGION_NAME == regions_dict[DF_FOUND_NAME][0] + assert [DF_COLUMN_ID, COLUMN_NAME_CITY, DF_COLUMN_FOUND_NAME] == list(regions_df.columns.values) diff --git a/python-package/test/geo_data/test_geocoder.py b/python-package/test/geo_data/test_geocoder.py new file mode 100644 index 00000000000..9ebbb92df2c --- /dev/null +++ b/python-package/test/geo_data/test_geocoder.py @@ -0,0 +1,442 @@ +# Copyright (c) 2020. JetBrains s.r.o. +# Use of this source code is governed by the MIT license that can be found in the LICENSE file. + +from typing import Optional, Union, List, Callable +from unittest import mock + +import shapely + +from lets_plot.geo_data import GeocodingService, SuccessResponse, Answer, GeocodedFeature +from lets_plot.geo_data.gis.geometry import GeoRect, GeoPoint +from lets_plot.geo_data.gis.request import MapRegion, AmbiguityResolver, GeocodingRequest, LevelKind, RegionQuery, \ + IgnoringStrategyKind +from lets_plot.geo_data.geocoder import geocode_countries, geocode, NamesGeocoder, geocode_cities, geocode_states +from lets_plot.geo_data.geocodes import Geocodes +from .geo_data import make_answer, assert_row +from .request_assertion import GeocodingRequestAssertion, QueryMatcher, ScopeMatcher, ValueMatcher, eq, empty, \ + eq_map_region_with_name, eq_map_region_with_id + + +def test_simple(): + request = geocode(names='foo')\ + ._build_request() + + assert_that(request) \ + .has_query(no_parents(request=eq('foo'))) + + +def test_no_parents_where_should_override_scope(): + # without parents should update scope for matching name + + request = geocode(names='foo') \ + .where('foo', scope='bar') \ + ._build_request() + + assert_that(request) \ + .has_query(no_parents(request=eq('foo'), scope=eq_map_region_with_name('bar'))) + + +def test_when_twice_override_same_name_with_where_should_use_last_scope(): + # without parents should update scope for matching name + + request = geocode(names='foo') \ + .where('foo', scope='bar') \ + .where('foo', scope='baz') \ + ._build_request() + + assert_that(request) \ + .has_query(no_parents(request=eq('foo'), scope=eq_map_region_with_name('baz'))) + + +def test_when_regions_in_parent_should_take_region_id(): + builder = geocode(names='foo') \ + .states(make_simple_region('bar')) + + assert_that(builder) \ + .has_query(QueryMatcher() + .with_name('foo')\ + .state(eq_map_region_with_id('bar_id')) + ) + + +def test_parents_can_contain_nulls(): + builder = geocode(names=['foo', 'bar'])\ + .states([None, 'baz']) + + assert_that(builder) \ + .has_query(QueryMatcher() + .with_name('foo') \ + .state(empty()) + ) \ + .has_query(QueryMatcher() + .with_name('bar') \ + .state(eq_map_region_with_name('baz')) + ) + + +def test_where_with_given_parents_and_duplicated_names(): + # should update scope only for matching name and parents - query with index 1 + + request = geocode(names=['foo', 'foo']) \ + .states(['bar', 'baz']) \ + .where(name='foo', state='baz', scope='spam') \ + ._build_request() + + assert_that(request) \ + .has_query(QueryMatcher() + .with_name('foo') + .state(eq_map_region_with_name('bar')) + .scope(empty()) + ) \ + .has_query(QueryMatcher() + .with_name('foo') + .state(eq_map_region_with_name('baz')) + .scope(eq_map_region_with_name('spam')) + ) + + +def test_where_with_given_country_should_be_used(): + # should update scope only for matching name and parents - query with index 1 + + request = geocode(names=['foo', 'foo']) \ + .countries(['bar', 'baz']) \ + .where(name='foo', country='baz', scope='spam') \ + ._build_request() + + assert_that(request) \ + .has_query(QueryMatcher() + .with_name('foo') + .country(eq_map_region_with_name('bar')) + .scope(empty()) + ) \ + .has_query(QueryMatcher() + .with_name('foo') + .country(eq_map_region_with_name('baz')) + .scope(eq_map_region_with_name('spam')) + ) + + +def test_where_scope_is_box(): + request = geocode(names=['foo']) \ + .states(['bar']) \ + .where(name='foo', state='bar', scope=shapely.geometry.box(1, 2, 3, 4)) \ + ._build_request() + + assert_that(request) \ + .has_query(QueryMatcher() + .with_name('foo') + .state(eq_map_region_with_name('bar')) + .ambiguity_resolver(eq(AmbiguityResolver(box=GeoRect(1, 2, 3, 4)))) + ) + + +def test_where_closets_to_point(): + request = geocode(names=['foo']) \ + .states(['bar']) \ + .where(name='foo', state='bar', closest_to=shapely.geometry.Point(1, 2)) \ + ._build_request() + + assert_that(request) \ + .has_query(QueryMatcher() + .with_name('foo') + .state(eq_map_region_with_name('bar')) + .ambiguity_resolver(eq(AmbiguityResolver(closest_coord=GeoPoint(1, 2)))) + ) + + +@mock.patch.object(GeocodingService, 'do_request', lambda self, reqest: SuccessResponse( + message='', + level=LevelKind.city, + answers=[ + Answer( + features=[ + GeocodedFeature( + id='foo_id', + name='foo', + centroid=GeoPoint(1, 2) + ) + ]) + ] +)) +def test_where_closest_to_region(): + request = geocode(names=['foo']) \ + .states(['bar']) \ + .where(name='foo', state='bar', closest_to=make_simple_region('foo', 'foo_id')) \ + ._build_request() + + + assert_that(request) \ + .has_query(QueryMatcher() + .with_name('foo') + .state(eq_map_region_with_name('bar')) + .ambiguity_resolver(eq(AmbiguityResolver(closest_coord=GeoPoint(1, 2)))) + ) + + +@mock.patch.object(GeocodingService, 'do_request', lambda self, reqest: SuccessResponse( + message='', + level=LevelKind.city, + answers=[] +)) +def test_select_all_query_with_empty_result_should_return_empty_dataframe(): + geocoder = geocode_cities().scope('foo') + + geocodes = geocoder.get_geocodes() + assert 0 == len(geocodes) + + centroids = geocoder.get_centroids() + assert 0 == len(centroids) + + +@mock.patch.object(GeocodingService, 'do_request', lambda self, reqest: SuccessResponse( + message='', + level=LevelKind.state, + answers=[ + Answer( + features=[ + GeocodedFeature(id='foo_id', name='foo'), + GeocodedFeature(id='bar_id', name='bar'), + GeocodedFeature(id='baz_id', name='baz'), + ] + ) + ] +)) +def test_for_us48_request_should_contain_feature_name(): + states = geocode_states('us-48') + + assert_row( + states.get_geocodes(), + names=['foo', 'bar', 'baz'], + found_name=['foo', 'bar', 'baz'] + ) + + +@mock.patch.object(GeocodingService, 'do_request', lambda self, reqest: SuccessResponse( + message='', + level=LevelKind.city, + answers=[ + Answer( + features=[ + GeocodedFeature(id='foo1_id', name='Foo'), + GeocodedFeature(id='foo2_id', name='Foo'), + GeocodedFeature(id='foo3_id', name='Fooo'), + ] + ) + ] +)) +def test_allow_ambiguous_result_should_keep_request(): + cities = geocode_cities('foo') + + assert_row( + cities.get_geocodes(), + names=['foo', 'foo', 'foo'], + found_name=['Foo', 'Foo', 'Fooo'] + ) + + +def test_allow_ambiguous(): + request = geocode(names='foo')\ + .allow_ambiguous()\ + ._build_request() + + assert_that(request) \ + .has_query(QueryMatcher() + .with_name('foo') + .ambiguity_resolver(eq(AmbiguityResolver(ignoring_strategy=IgnoringStrategyKind.take_namesakes))) + ) + + +def test_allow_ambiguous_and_closest_to(): + request = geocode(names=['foo', 'bar'])\ + .where('foo', closest_to=shapely.geometry.Point(1, 2))\ + .allow_ambiguous()\ + ._build_request() + + assert_that(request) \ + .allows_ambiguous()\ + .has_query(QueryMatcher() + .with_name('foo') + .ambiguity_resolver(eq(AmbiguityResolver(closest_coord=GeoPoint(1, 2)))) + ) \ + .has_query(QueryMatcher() + .with_name('bar') + .ambiguity_resolver(eq(AmbiguityResolver(ignoring_strategy=IgnoringStrategyKind.take_namesakes))) + ) + + +def test_global_scope(): + # scope should be applied to whole request, not to queries + + builder: NamesGeocoder = geocode(names='foo') + + # single str scope + assert_that(builder.scope('bar')) \ + .has_scope(ScopeMatcher().with_names(['bar'])) \ + .has_query(QueryMatcher().with_name('foo').scope(empty())) + + # single regions scope + assert_that(builder.scope(make_simple_region('bar', 'bar_id'))) \ + .has_scope(ScopeMatcher().with_ids(['bar_id'])) \ + .has_query(QueryMatcher().with_name('foo').scope(empty())) + + +def test_request_without_name(): + assert_that(geocode(level='county').states('New York')) \ + .has_level(eq(LevelKind.county)) \ + .has_query(QueryMatcher() + .with_name(None) + .state(eq_map_region_with_name('New York')) + ) + + +def test_request_us_48_in_scope(): + assert_that(geocode(level='state').scope('us-48'))\ + .has_scope(ScopeMatcher().with_names(['us-48']))\ + .has_query(QueryMatcher() + .with_name(None) + .scope(empty()) + ) + + +def test_request_us_48_in_name(): + assert_that(geocode(level='state', names='us-48'))\ + .has_scope(ScopeMatcher().empty())\ + .has_query(QueryMatcher() + .with_name('us-48') + .scope(empty()) + ) + + +def test_request_countries(): + assert_that(geocode_countries()) \ + .has_level(eq(LevelKind.country))\ + .has_query(QueryMatcher().with_name(None)) + + +def test_request_countries_with_empty_names_list(): + assert_that(geocode_countries([])) \ + .has_level(eq(LevelKind.country))\ + .has_query(QueryMatcher().with_name(None)) + + +def test_request_scope_and_parent_county(): + assert_that(geocode_cities('foo_city').counties('foo_county').scope('foo_country'))\ + .has_level(eq(LevelKind.city))\ + .has_scope(ScopeMatcher().with_names(['foo_country'])) \ + .has_query(QueryMatcher() + .with_name('foo_city') + .county(eq_map_region_with_name('foo_county')) + ) + + + + +def test_error_when_country_and_scope_set_should_show_error(): + # scope can't work with given country parent. + check_validation_error( + "Invalid request: countries and scope can't be used simultaneously", + lambda: geocode(names='foo').countries('bar').scope('baz') + ) + + +def test_error_when_names_and_parents_have_different_size(): + check_validation_error( + 'Invalid request: countries count(2) != names count(1)', + lambda: geocode(names='foo').countries(['bar', 'baz']) + ) + + check_validation_error( + 'Invalid request: states count(2) != names count(1)', + lambda: geocode(names='foo').states(['bar', 'baz']) + ) + + check_validation_error( + 'Invalid request: counties count(2) != names count(1)', + lambda: geocode(names='foo').counties(['bar', 'baz']) + ) + + +def test_error_where_scope_len_is_invalid(): + check_validation_error( + "Unsupported 'scope' type. Expected 'str' or 'Geocoder' but was 'list'", + lambda: geocode(names='foo').where('foo', scope=['bar', 'baz']) + ) + + +def test_error_for_where_with_unknown_name(): + check_validation_error( + "bar is not found in names", + lambda: geocode(names='foo').where('bar', scope='baz') + ) + + +def test_error_for_where_with_unknown_name_and_parents(): + check_validation_error( + "bar(country=baz) is not found in names", + lambda: geocode(names='foo').where('bar', country='baz', scope='spam') + ) + +def test_error_multi_entries_map_region_in_scope(): + check_validation_error( + "'scope' has 2 entries, but expected to have exactly 1", + lambda : geocode(names='foo').where('foo', scope=make_simple_region(['bar', 'baz'], ['bar_id', 'baz_id'])) + ) + +def test_error_multi_entries_map_region_scope_in_request(): + check_validation_error( + "'scope' has 2 entries, but expected to have exactly 1", + lambda : geocode(names='foo').scope(make_simple_region(['bar', 'baz'], ['bar_id', 'baz_id'])) + ) + +def test_error_list_scopein_request(): + check_validation_error( + "Unsupported 'scope' type. Expected 'str' or 'Geocoder' but was 'list'", + lambda : geocode(names='foo').scope(['bar', 'baz']) + ) + +def test_parents_always_positional(): + check_validation_error( + "Invalid request: countries count(1) != names count(2)", + lambda : geocode(names=['foo', 'bar']).countries('baz') + ) + + + +def make_simple_region(requests: Union[str, List[str]], geo_object_ids: Union[str, List[str]] = None, level_kind: LevelKind = LevelKind.county) -> Geocodes: + requests = requests if isinstance(requests, (list, tuple)) else [requests] + geo_object_ids = geo_object_ids if geo_object_ids is not None else [request + '_id' for request in requests] + geo_object_ids = geo_object_ids if isinstance(geo_object_ids, (list, tuple)) else [geo_object_ids] + + queries = [] + answers = [] + for request, id in zip(requests, geo_object_ids): + queries.append(RegionQuery(request=request)) + answers.append(make_answer(request, id, [])) + + return Geocodes(level_kind, answers, queries) + + +def no_parents(request: ValueMatcher[Optional[str]], + scope: ValueMatcher[Optional[MapRegion]] = empty(), + ambiguity_resolver: ValueMatcher[AmbiguityResolver] = eq(AmbiguityResolver.empty()) + ) -> QueryMatcher: + return QueryMatcher(name=request, scope=scope, ambiguity_resolver=ambiguity_resolver, + country=empty(), state=empty(), county=empty()) + + +def assert_that(request: Union[NamesGeocoder, GeocodingRequest]) -> GeocodingRequestAssertion: + if isinstance(request, NamesGeocoder): + return GeocodingRequestAssertion(request._build_request()) + elif isinstance(request, GeocodingRequest): + return GeocodingRequestAssertion(request) + else: + raise ValueError('Expected types are [RegionsBuilder2, GeocodingRequest], but was {}', str(type(request))) + + +def check_validation_error(message: str, get_builder: Callable[[], NamesGeocoder]): + assert isinstance(message, str) + try: + get_builder()._build_request() + assert False, 'Validation error expected' + except Exception as e: + assert message == str(e) diff --git a/python-package/test/geo_data/test_geocoder_in_geom.py b/python-package/test/geo_data/test_geocoder_in_geom.py new file mode 100644 index 00000000000..4e0c4ebcd6a --- /dev/null +++ b/python-package/test/geo_data/test_geocoder_in_geom.py @@ -0,0 +1,214 @@ +# Copyright (c) 2020. JetBrains s.r.o. +# Use of this source code is governed by the MIT license that can be found in the LICENSE file. +import pandas +import pytest +from geopandas import GeoDataFrame +from pandas import DataFrame +from shapely.geometry import Point, Polygon, LinearRing, MultiPolygon + +from lets_plot._kbridge import _standardize_plot_spec +from lets_plot.geo_data import DF_COLUMN_CITY, DF_COLUMN_STATE +from lets_plot.geo_data.geocoder import Geocoder, LevelKind +from lets_plot.plot import ggplot, geom_polygon, geom_point, geom_map, geom_rect, geom_text, geom_path, geom_livemap +from .geo_data import get_map_data_meta, assert_error + + +def geo_data_frame(geometry, columns=[]): + data = { key: None for key in columns } + data['coord'] = geometry + return GeoDataFrame( + data=data, + geometry='coord' + ) + +def get_map(plot_spec) -> dict: + return _standardize_plot_spec(plot_spec.as_dict())['layers'][0]['map'] + +def get_map_join(plot_spec) -> dict: + return _standardize_plot_spec(plot_spec.as_dict())['layers'][0]['map_join'] + + +def get_data(plot_spec) -> dict: + return _standardize_plot_spec(plot_spec.as_dict())['layers'][0]['data'] + + +def assert_map_data_meta(plot_spec): + expected_map_data_meta = {'geodataframe': {'geometry': 'coord'}} + assert expected_map_data_meta == get_map_data_meta(plot_spec, 0) + + +def test_geom_path_raises_an_error(): + assert_error( + "Geocoding doesn't provide geometries supported by geom_path", + lambda: ggplot() + geom_path(map=mock_geocoder()) + ) + + +def test_geom_point_fetches_centroids(): + geocoder = mock_geocoder() + plot_spec = ggplot() + geom_point(map=geocoder) + + assert_map_data_meta(plot_spec) + assert geocoder.get_test_point_dict() == get_map(plot_spec) + + +def test_geom_polygon_fetches_boundaries(): + geocoder = mock_geocoder() + plot_spec = ggplot() + geom_polygon(map=geocoder) + + assert_map_data_meta(plot_spec) + assert geocoder.get_test_polygon_dict() == get_map(plot_spec) + + +def test_geom_map_fetches_boundaries(): + geocoder = mock_geocoder() + plot_spec = ggplot() + geom_map(map=geocoder) + + assert_map_data_meta(plot_spec) + assert geocoder.get_test_polygon_dict() == get_map(plot_spec) + + +def test_geom_rect_fetches_limits(): + geocoder = mock_geocoder() + plot_spec = ggplot() + geom_rect(map=geocoder) + + assert_map_data_meta(plot_spec) + assert geocoder.get_test_polygon_dict() == get_map(plot_spec) + + +def test_geom_text_fetches_centroids(): + geocoder = mock_geocoder() + plot_spec = ggplot() + geom_text(map=geocoder) + + assert_map_data_meta(plot_spec) + assert geocoder.get_test_point_dict() == get_map(plot_spec) + + +def test_geom_livemap_fetches_centroids(): + geocoder = mock_geocoder() + plot_spec = ggplot() + geom_livemap(map=geocoder) + + assert_map_data_meta(plot_spec) + assert geocoder.get_test_point_dict() == get_map(plot_spec) + + +def test_data_should_call_to_dataframe(): + geocoder = mock_geocoder() + plot_spec = ggplot() + geom_map(data=geocoder) + + geocoder.assert_get_geocodes_invocation() + assert geocoder.get_test_geocodes() == get_layer_spec(plot_spec, 'data') + +def get_layer_spec(plot_spec, spec_name): + return _standardize_plot_spec(plot_spec.as_dict())['layers'][0][spec_name] + +@pytest.mark.parametrize('map_join,map_columns,expected', [ + ( + 'City_Name', + [DF_COLUMN_CITY], + [['City_Name'], [DF_COLUMN_CITY]] + ), + ( # automatically use all names from map as multi-key + ['City_Name', 'State_Name'], + [DF_COLUMN_CITY, DF_COLUMN_STATE], + [['City_Name', 'State_Name'], [DF_COLUMN_CITY, DF_COLUMN_STATE]] + ), + ( # not all names were used for join + [['City_Name', 'State_Name'], [DF_COLUMN_CITY, DF_COLUMN_STATE]], + [DF_COLUMN_CITY, 'county', DF_COLUMN_STATE], + [['City_Name', 'State_Name'], [DF_COLUMN_CITY, DF_COLUMN_STATE]] + ), + ( + None, + [DF_COLUMN_CITY, DF_COLUMN_STATE], + None + ), + ( + ['City_Name', 'State_Name'], + [DF_COLUMN_CITY], + "`map_join` expected to have (1) items, but was(2)" + ), + ( + 'City_Name', + [DF_COLUMN_CITY, DF_COLUMN_STATE], + "`map_join` expected to have (2) items, but was(1)" + ), + ( + 'City_Name', + [DF_COLUMN_CITY, DF_COLUMN_STATE], + "`map_join` expected to have (2) items, but was(1)" + ), +]) +def test_map_join_regions(map_join, map_columns, expected): + class MockGeocoder(Geocoder): + def get_centroids(self) -> 'GeoDataFrame': + return geo_data_frame([Point(-5, 17)], map_columns) + + def get_geocodes(self) -> pandas.DataFrame: + return pandas.DataFrame(columns=map_columns) + + geocoder = MockGeocoder() + + if not isinstance(expected, str): + plot_spec = ggplot() + geom_point(map_join=map_join, map=geocoder) + assert get_layer_spec(plot_spec, 'map_join') == expected + else: + assert_error( + expected, + lambda :ggplot() + geom_point(map_join=map_join, map=geocoder) + ) + + +def mock_geocoder() -> 'MockGeocoder': + point_gdf = geo_data_frame([Point(-5, 17)]) + point_dict = {'coord': ['{"type": "Point", "coordinates": [-5.0, 17.0]}']} + + polygon_gdf = geo_data_frame(MultiPolygon([ + Polygon(LinearRing([(11, 12), (13, 14), (15, 13), (7, 4)])), + Polygon(LinearRing([(10, 2), (13, 10), (12, 3)])) + ]) + ) + + polygon_dict = { + 'coord': [ + '{"type": "Polygon", "coordinates": [[[11.0, 12.0], [13.0, 14.0], [15.0, 13.0], [7.0, 4.0], [11.0, 12.0]]]}', '{"type": "Polygon", "coordinates": [[[10.0, 2.0], [13.0, 10.0], [12.0, 3.0], [10.0, 2.0]]]}' + ] + } + + class MockGeocoder(Geocoder): + def __init__(self): + self._get_geocodes_invoked = False + self._limits_fetched = False + self._centroids_fetched = False + self._boundaries_fetched = False + + def get_test_point_dict(self): + return point_dict + + def get_test_polygon_dict(self): + return polygon_dict + + def get_test_geocodes(self) -> dict: + return {'request': ['foo'], 'found name': ['FOO']} + + def get_geocodes(self): + self._get_geocodes_invoked = True + return DataFrame(self.get_test_geocodes()) + + def get_limits(self) -> GeoDataFrame: + self._limits_fetched = True + return polygon_gdf + + def get_centroids(self) -> GeoDataFrame: + self._centroids_fetched = True + return point_gdf + + def get_boundaries(self, resolution=None) -> GeoDataFrame: + self._boundaries_fetched = True + return polygon_gdf + + def assert_get_geocodes_invocation(self): + assert self._get_geocodes_invoked, 'to_data_frame() invocation expected, but not happened' + + + return MockGeocoder() diff --git a/python-package/test/geo_data/test_georect.py b/python-package/test/geo_data/test_georect.py index 8d602e95b41..459b20746a7 100644 --- a/python-package/test/geo_data/test_georect.py +++ b/python-package/test/geo_data/test_georect.py @@ -2,13 +2,11 @@ # Use of this source code is governed by the MIT license that can be found in the LICENSE file. import pytest -from pandas import DataFrame -from lets_plot.geo_data.gis.response import GeoRect, FeatureBuilder -from lets_plot.geo_data import DF_FOUND_NAME, DF_ID, DF_REQUEST +from lets_plot.geo_data.gis.request import RegionQuery, LevelKind +from lets_plot.geo_data.gis.response import Answer, GeoRect, FeatureBuilder from lets_plot.geo_data.to_geo_data_frame import LimitsGeoDataFrame - -from .geo_data import NAME, FOUND_NAME, ID +from .geo_data import NAME, FOUND_NAME, ID, assert_row DEFAULT_LAT_MIN = 20 DEFAULT_LAT_MAX = 30 @@ -46,14 +44,18 @@ def make_rect(lon_min: float, lon_max: float) -> GeoRect: def data_frame(r: GeoRect): return LimitsGeoDataFrame().to_data_frame( - [ - FeatureBuilder() \ - .set_id(ID) \ - .set_query(NAME) \ - .set_name(FOUND_NAME) \ - .set_limit(r) \ - .build_geocoded() - ] + answers=[ + Answer([ + FeatureBuilder() \ + .set_id(ID) \ + .set_name(FOUND_NAME) \ + .set_limit(r) \ + .build_geocoded() + ] + ) + ], + queries=[RegionQuery(request=NAME)], + level_kind=LevelKind.city ) @@ -64,18 +66,3 @@ def assert_whole_rect(r: GeoRect, lon_min: float, lon_max: float): def assert_split_rect(r: GeoRect, lon_min: float, lon_max: float): assert_row(data_frame(r), 0, lon_min=lon_min, lon_max=180.) assert_row(data_frame(r), 1, lon_min=-180., lon_max=lon_max) - - -def assert_row(df: 'DataFrame', row: int, lon_min: float, lon_max: float): - assert NAME == df[DF_REQUEST][row] - assert FOUND_NAME == df[DF_FOUND_NAME][row] - # assert lon_min == df[DF_LONMIN][row] - # assert lon_max == df[DF_LONMAX][row] - # assert DEFAULT_LAT_MIN == df[DF_LATMIN][row] - # assert DEFAULT_LAT_MAX == df[DF_LATMAX][row] - - bounds = df.geometry[row].bounds - assert lon_min == bounds[0] - assert DEFAULT_LAT_MIN == bounds[1] - assert lon_max == bounds[2] - assert DEFAULT_LAT_MAX == bounds[3] \ No newline at end of file diff --git a/python-package/test/geo_data/test_integration_new_api.py b/python-package/test/geo_data/test_integration_new_api.py new file mode 100644 index 00000000000..7caac1309c0 --- /dev/null +++ b/python-package/test/geo_data/test_integration_new_api.py @@ -0,0 +1,317 @@ +# Copyright (c) 2020. JetBrains s.r.o. +# Use of this source code is governed by the MIT license that can be found in the LICENSE file. + +import pytest +from shapely.geometry import Point, box + +import lets_plot.geo_data as geodata +from lets_plot.geo_data import DF_COLUMN_FOUND_NAME, DF_COLUMN_ID, DF_COLUMN_COUNTRY, DF_COLUMN_STATE, DF_COLUMN_COUNTY +from .geo_data import assert_row, assert_error, NO_COLUMN, COLUMN_NAME_CITY +from .test_integration_with_geocoding_serever import TURN_OFF_INTERACTION_TEST + + +@pytest.mark.skipif(TURN_OFF_INTERACTION_TEST, reason='Need proper server ip') +def test_all_columns_order(): + boston = geodata.geocode_cities('boston').counties('suffolk').states('massachusetts').countries('usa') + assert boston.get_geocodes().columns.tolist() == [DF_COLUMN_ID, COLUMN_NAME_CITY, DF_COLUMN_FOUND_NAME, DF_COLUMN_COUNTY, + DF_COLUMN_STATE, DF_COLUMN_COUNTRY] + + gdf_columns = [COLUMN_NAME_CITY, DF_COLUMN_FOUND_NAME, DF_COLUMN_COUNTY, DF_COLUMN_STATE, DF_COLUMN_COUNTRY, 'geometry'] + assert boston.get_limits().columns.tolist() == gdf_columns + assert boston.get_centroids().columns.tolist() == gdf_columns + assert boston.get_boundaries().columns.tolist() == gdf_columns + + +@pytest.mark.skipif(TURN_OFF_INTERACTION_TEST, reason='Need proper server ip') +def test_do_not_add_unsued_parents_columns(): + moscow = geodata.geocode_cities('moscow').countries('russia') + + assert moscow.get_geocodes().columns.tolist() == [DF_COLUMN_ID, COLUMN_NAME_CITY, DF_COLUMN_FOUND_NAME, DF_COLUMN_COUNTRY] + + gdf_columns = [COLUMN_NAME_CITY, DF_COLUMN_FOUND_NAME, DF_COLUMN_COUNTRY, 'geometry'] + assert moscow.get_limits().columns.tolist() == gdf_columns + assert moscow.get_centroids().columns.tolist() == gdf_columns + assert moscow.get_boundaries().columns.tolist() == gdf_columns + + +@pytest.mark.skipif(TURN_OFF_INTERACTION_TEST, reason='Need proper server ip') +def test_parents_in_regions_object_and_geo_data_frame(): + boston = geodata.geocode_cities('boston').counties('suffolk').states('massachusetts').countries('usa') + + assert_row(boston.get_geocodes(), names='boston', county='suffolk', state='massachusetts', country='usa') + assert_row(boston.get_limits(), names='boston', county='suffolk', state='massachusetts', country='usa') + assert_row(boston.get_centroids(), names='boston', county='suffolk', state='massachusetts', country='usa') + assert_row(boston.get_boundaries(), names='boston', county='suffolk', state='massachusetts', country='usa') + + # antimeridian + ru = geodata.geocode(level='country', names='russia') + assert_row(ru.get_geocodes(), country='russia', city=NO_COLUMN, county=NO_COLUMN, state=NO_COLUMN) + assert_row(ru.get_limits(), country=['russia', 'russia'], city=NO_COLUMN, county=NO_COLUMN, state=NO_COLUMN) + + +@pytest.mark.skipif(TURN_OFF_INTERACTION_TEST, reason='Need proper server ip') +def test_regions_parents_in_regions_object_and_geo_data_frame(): + # parent request from regions object should be propagated to resulting GeoDataFrame + massachusetts = geodata.geocode_states('massachusetts') + boston = geodata.geocode_cities('boston').states(massachusetts) + + assert_row(boston.get_geocodes(), names='boston', state='massachusetts', county=NO_COLUMN, country=NO_COLUMN) + assert_row(boston.get_centroids(), names='boston', state='massachusetts', county=NO_COLUMN, country=NO_COLUMN) + + +@pytest.mark.skipif(TURN_OFF_INTERACTION_TEST, reason='Need proper server ip') +def test_list_of_regions_parents_in_regions_object_and_geo_data_frame(): + # parent request from regions object should be propagated to resulting GeoDataFrame + states = geodata.geocode_states(['massachusetts', 'texas']) + cities = geodata.geocode_cities(['boston', 'austin']).states(states) + + assert_row(cities.get_geocodes(), + names=['boston', 'austin'], + state=['massachusetts', 'texas'], + county=NO_COLUMN, + country=NO_COLUMN + ) + + assert_row(cities.get_geocodes(), + names=['boston', 'austin'], + state=['massachusetts', 'texas'], + county=NO_COLUMN, + country=NO_COLUMN + ) + + +@pytest.mark.skipif(TURN_OFF_INTERACTION_TEST, reason='Need proper server ip') +def test_parents_lists(): + states = geodata.geocode_states(['texas', 'nevada']).countries(['usa', 'usa']) + + assert_row(states.get_geocodes(), + names=['texas', 'nevada'], + found_name=['Texas', 'Nevada'], + country=['usa', 'usa'] + ) + + +@pytest.mark.skipif(TURN_OFF_INTERACTION_TEST, reason='Need proper server ip') +def test_with_drop_not_found(): + states = geodata.geocode_states(['texas', 'trololo', 'nevada']) \ + .countries(['usa', 'usa', 'usa']) \ + .drop_not_found() + + assert_row(states.get_geocodes(), names=['texas', 'nevada'], found_name=['Texas', 'Nevada'], country=['usa', 'usa']) + assert_row(states.get_centroids(), names=['texas', 'nevada'], found_name=['Texas', 'Nevada'], country=['usa', 'usa']) + assert_row(states.get_boundaries(), names=['texas', 'nevada'], found_name=['Texas', 'Nevada'], country=['usa', 'usa']) + assert_row(states.get_limits(), names=['texas', 'nevada'], found_name=['Texas', 'Nevada'], country=['usa', 'usa']) + + +@pytest.mark.skipif(TURN_OFF_INTERACTION_TEST, reason='Need proper server ip') +def test_drop_not_found_with_namesakes(): + states = geodata.geocode_counties(['jefferson', 'trololo', 'jefferson']) \ + .states(['alabama', 'asd', 'arkansas']) \ + .countries(['usa', 'usa', 'usa']) \ + .drop_not_found() + + assert_row(states.get_geocodes(), + names=['jefferson', 'jefferson'], + found_name=['Jefferson County', 'Jefferson County'], + state=['alabama', 'arkansas'], + country=['usa', 'usa'] + ) + + +@pytest.mark.skipif(TURN_OFF_INTERACTION_TEST, reason='Need proper server ip') +def test_simple_scope(): + florida_with_country = geodata.geocode( + 'state', + names=['florida', 'florida'], + countries=['Uruguay', 'usa'] + ).get_geocodes() + + assert florida_with_country[DF_COLUMN_ID][0] != florida_with_country[DF_COLUMN_ID][1] + + florida_with_scope = geodata.geocode( + 'state', + names=['florida'], + scope='Uruguay' + ).get_geocodes() + + assert florida_with_country[DF_COLUMN_ID][0] == florida_with_scope[DF_COLUMN_ID][0] + + +@pytest.mark.skipif(TURN_OFF_INTERACTION_TEST, reason='Need proper server ip') +def test_where(): + worcester = geodata.geocode_cities('worcester').where('worcester', scope='massachusetts') + + assert_row(worcester.get_geocodes(), names='worcester', found_name='Worcester', id='3688419') + + +@pytest.mark.skipif(TURN_OFF_INTERACTION_TEST, reason='Need proper server ip') +def test_where_closest_to_point(): + worcester = geodata.geocode_cities('worcester').where('worcester', closest_to=Point(-71.00, 42.00)) + + assert_row(worcester.get_centroids(), lon=-71.8154652712922, lat=42.2678737342358) + assert_row(worcester.get_geocodes(), names='worcester', found_name='Worcester', id='3688419') + + +@pytest.mark.skipif(TURN_OFF_INTERACTION_TEST, reason='Need proper server ip') +def test_where_closest_to_regions(): + boston = geodata.geocode_cities('boston') + worcester = geodata.geocode_cities('worcester').where('worcester', closest_to=boston) + + assert_row(worcester.get_geocodes(), names='worcester', found_name='Worcester', id='3688419') + assert_row(worcester.get_centroids(), lon=-71.8154652712922, lat=42.2678737342358) + + +@pytest.mark.skipif(TURN_OFF_INTERACTION_TEST, reason='Need proper server ip') +def test_where_scope(): + worcester = geodata.geocode_cities('worcester').where('worcester', scope=box(-71.00, 42.00, -72.00, 43.00)) + + assert_row(worcester.get_geocodes(), names='worcester', found_name='Worcester', id='3688419') + assert_row(worcester.get_centroids(), lon=-71.8154652712922, lat=42.2678737342358) + + +@pytest.mark.skipif(TURN_OFF_INTERACTION_TEST, reason='Need proper server ip') +def test_where_west_warwick(): + warwick = geodata.geocode_cities('west warwick').states('rhode island') + + assert_row(warwick.get_geocodes(), names='west warwick', state='rhode island', found_name='West Warwick', id='382429') + assert_row(warwick.get_centroids(), lon=-71.5257788638961, lat=41.6969098895788) + + +@pytest.mark.skipif(TURN_OFF_INTERACTION_TEST, reason='Need proper server ip') +def test_query_scope_with_different_level_should_work(): + geodata.geocode_cities(['moscow', 'worcester'])\ + .where('moscow', scope='russia')\ + .where('worcester', scope='massachusetts')\ + .get_geocodes() + + +@pytest.mark.skipif(TURN_OFF_INTERACTION_TEST, reason='Need proper server ip') +def test_error_with_scopeand_level_detection(): + assert_error( + "Region is not found: blablabla", + lambda: geodata.geocode(names='florida', scope='blablabla').get_geocodes() + ) + + +@pytest.mark.skipif(TURN_OFF_INTERACTION_TEST, reason='Need proper server ip') +def test_city_with_ambiguous_county_and_scope(): + assert_error( + "Region is not found: worcester county", + lambda: geodata.geocode_cities('worcester').counties('worcester county').scope('usa').get_geocodes() + ) + + +@pytest.mark.skipif(TURN_OFF_INTERACTION_TEST, reason='Need proper server ip') +def test_level_detection(): + geodata.geocode(names='boston', countries='usa').get_geocodes() + + +@pytest.mark.skipif(TURN_OFF_INTERACTION_TEST, reason='Need proper server ip') +def test_where_scope_with_existing_country(): + washington_county=geodata.geocode_counties('Washington county').states('iowa').countries('usa') + washington = geodata.geocode_cities('washington').countries('United States of America')\ + .where('washington', country='United States of America', scope=washington_county) + + assert_row(washington.get_geocodes(), names='washington', country='United States of America', found_name='Washington') + + +@pytest.mark.skipif(TURN_OFF_INTERACTION_TEST, reason='Need proper server ip') +def test_where_scope_with_existing_country_in_df(): + df = { + 'city': ['moscow', 'tashkent', 'washington'], + 'country': ['russia', 'uzbekistan', 'usa'] + } + + washington_county=geodata.geocode_counties('Washington county').states('iowa').countries('usa') + cities = geodata.geocode_cities(df['city']).countries(df['country'])\ + .where('washington', country='usa', scope=washington_county) + + assert_row(cities.get_geocodes(), index=2, names='washington', country='usa', found_name='Washington') + + +@pytest.mark.skipif(TURN_OFF_INTERACTION_TEST, reason='Need proper server ip') +def test_scope_with_level_detection_should_work(): + florida_uruguay = geodata.geocode(names='florida', scope='uruguay').get_geocodes()[DF_COLUMN_ID][0] + florida_usa = geodata.geocode(names='florida', scope='usa').get_geocodes()[DF_COLUMN_ID][0] + assert florida_usa != florida_uruguay, 'florida_usa({}) != florida_uruguay({})'.format(florida_usa, florida_uruguay) + + +@pytest.mark.skipif(TURN_OFF_INTERACTION_TEST, reason='Need proper server ip') +def test_fetch_all_countries(): + countries = geodata.geocode_countries() + df = countries.get_geocodes() + assert len(df) == 217 + + +@pytest.mark.skipif(TURN_OFF_INTERACTION_TEST, reason='Need proper server ip') +def test_fetch_all_counties_by_state(): + geodata.geocode_counties().states('New York').get_geocodes() + + +@pytest.mark.skipif(TURN_OFF_INTERACTION_TEST, reason='Need proper server ip') +def test_duplications_in_filter_should_preserve_order(): + states = geodata.geocode_states(['Texas', 'TX', 'Arizona', 'Texas']).get_geocodes() + assert_row( + states, + names=['Texas', 'TX', 'Arizona', 'Texas'], + found_name=['Texas', 'Texas', 'Arizona', 'Texas'] + ) + + +@pytest.mark.skipif(TURN_OFF_INTERACTION_TEST, reason='Need proper server ip') +def test_select_all_query_with_empty_result_should_return_empty_dataframe(): + geocoder = geodata.geocode_counties().scope('Norway') + + geocodes = geocoder.get_geocodes() + assert 0 == len(geocodes) + + centroids = geocoder.get_centroids() + assert 0 == len(centroids) + + +@pytest.mark.skipif(TURN_OFF_INTERACTION_TEST, reason='Need proper server ip') +def test_none_parents_at_diff_levels(): + warwick = geodata.geocode_cities('warwick').states('georgia').get_geocodes() + worcester = geodata.geocode_cities('worcester').countries('uk').get_geocodes() + + cities = geodata.geocode_cities(['warwick', 'worcester'])\ + .states(['Georgia', None])\ + .countries([None, 'United Kingdom'])\ + .get_geocodes() + + assert_row( + cities, + names=['warwick', 'worcester'], + id=[warwick.id[0], worcester.id[0]] + ) + + +@pytest.mark.skipif(TURN_OFF_INTERACTION_TEST, reason='Need proper server ip') +def test_where_with_parent(): + washington_county=geodata.geocode_counties('Washington county').states('Vermont').countries('usa') + geodata.geocode_cities(['worcester', 'worcester']) \ + .countries(['usa', 'Great Britain']) \ + .where('worcester', country='usa', scope=washington_county) \ + .get_geocodes() + + +@pytest.mark.skipif(TURN_OFF_INTERACTION_TEST, reason='Need proper server ip') +def test_counties(): + counties = [] + states = [] + + for state in geodata.geocode_states("us-48").get_geocodes()['found name']: + for county in geodata.geocode_counties().states(state).scope('usa').get_geocodes()['found name']: + states.append(state) + counties.append(county) + + geocoded_counties = geodata.geocode_counties(counties).states(states).scope('usa').get_boundaries('country') + + assert_row(geocoded_counties, names=counties) + + +@pytest.mark.skipif(TURN_OFF_INTERACTION_TEST, reason='Need proper server ip') +def test_request_in_ambiguous_df(): + warwick = geodata.geocode_cities('warwick').allow_ambiguous().get_geocodes() + + assert_row(warwick, names='warwick', found_name='Warwick') \ No newline at end of file diff --git a/python-package/test/geo_data/test_integration_with_geocoding_serever.py b/python-package/test/geo_data/test_integration_with_geocoding_serever.py index b26ae952a14..418bc797ac0 100644 --- a/python-package/test/geo_data/test_integration_with_geocoding_serever.py +++ b/python-package/test/geo_data/test_integration_with_geocoding_serever.py @@ -1,16 +1,14 @@ # Copyright (c) 2020. JetBrains s.r.o. # Use of this source code is governed by the MIT license that can be found in the LICENSE file. -import os -from typing import List - import pytest import shapely -from pandas import DataFrame from shapely.geometry import Point import lets_plot.geo_data as geodata -from lets_plot.geo_data import DF_FOUND_NAME, DF_ID, DF_REQUEST +from lets_plot.geo_data import DF_COLUMN_FOUND_NAME +from .geo_data import run_intergration_tests, assert_row, assert_error, get_request_column_name, \ + assert_request_and_found_name_are_equal ShapelyPoint = shapely.geometry.Point @@ -18,86 +16,8 @@ NYC_ID = '351811' -def run_intergration_tests() -> bool: - if 'RUN_GEOCODING_INTEGRATION_TEST' in os.environ.keys(): - return os.environ.get('RUN_GEOCODING_INTEGRATION_TEST').lower() == 'true' - return False - - -def use_local_server(): - old = os.environ.copy() - os.environ.update({'GEOSERVER_URL': 'http://localhost:3012', **old}) - - -def assert_found_names(df: DataFrame, names: List[str]): - assert names == df[DF_FOUND_NAME].tolist() - - -def assert_row(df: DataFrame, request: str = None, found_name: str = None, index=0, id=None, lon=None, lat=None): - if request is not None: - assert df[DF_REQUEST][index] == request - - if found_name is not None: - assert df[DF_FOUND_NAME][index] == found_name - - if id is not None: - assert df[DF_ID][index] == id - - if lon is not None: - actual_lon = ShapelyPoint(df.geometry[index]).x - assert actual_lon == lon - - if lat is not None: - actual_lat = ShapelyPoint(df.geometry[index]).y - assert actual_lat == lat - - TURN_OFF_INTERACTION_TEST = not run_intergration_tests() -DO_NOT_DROP = False -NO_ERROR = None -NOT_FOUND = None - - -@pytest.mark.parametrize('address,drop_not_found,found,error', [ - pytest.param(['NYC, NY', 'Dallas, TX'], DO_NOT_DROP, ['New York City', 'Dallas'], NO_ERROR), - pytest.param(['NYC, NY', 'foobar, barbaz'], DO_NOT_DROP, NOT_FOUND, 'No objects were found for barbaz.\n'), -]) -@pytest.mark.skipif(TURN_OFF_INTERACTION_TEST, reason='Need proper server ip') -def test_missing_address(address, drop_not_found, found, error): - # use_local_server() - builder = geodata.regions_builder(level='city', request=address, within='usa') - if drop_not_found: - builder.drop_not_found() - - if error is not None: - try: - builder.build() - except ValueError as e: - assert str(e).startswith(error) - else: - r = builder.build() - assert_found_names(r.to_data_frame(), found) - - -NO_LEVEL = None -NO_REGION = None - - -@pytest.mark.parametrize('address,level,region,expected_name', [ - pytest.param('moscow, Latah County, Idaho, USA', NO_LEVEL, NO_REGION, 'Moscow'), - # TODO: CHECK - pytest.param('richmond, virginia, usa', NO_LEVEL, NO_REGION, 'Richmond City'), - # TODO: CHECK - pytest.param('richmond, virginia, usa', 'county', NO_REGION, 'Richmond County'), - pytest.param('NYC, usa', NO_LEVEL, NO_REGION, 'New York City'), - pytest.param('NYC, NY', NO_LEVEL, 'usa', 'New York City'), - pytest.param('dallas, TX', NO_LEVEL, NO_REGION, 'Dallas'), - pytest.param('moscow, russia', NO_LEVEL, NO_REGION, 'Москва'), -]) -@pytest.mark.skipif(TURN_OFF_INTERACTION_TEST, reason='Need proper server ip') -def test_address_request(address, level, region, expected_name): - r = geodata.regions(request=address, level=level, within=region) - assert_row(r.to_data_frame(), found_name=expected_name) - MOSCOW_LON = 37.620393 MOSCOW_LAT = 55.753960 @@ -110,42 +30,42 @@ def test_address_request(address, level, region, expected_name): ]) @pytest.mark.skipif(TURN_OFF_INTERACTION_TEST, reason='Need proper server ip') def test_reverse_moscow(level, expected_name): - r = geodata.regions_xy(lon=MOSCOW_LON, lat=MOSCOW_LAT, level=level) - assert_row(r.to_data_frame(), found_name=expected_name) + r = geodata.reverse_geocode(lon=MOSCOW_LON, lat=MOSCOW_LAT, level=level) + assert_row(r.get_geocodes(), found_name=expected_name) @pytest.mark.parametrize('geometry_getter', [ - pytest.param(lambda regions_obj: regions_obj.centroids(), id='centroids()'), - pytest.param(lambda regions_obj: regions_obj.limits(), id='limits()'), - pytest.param(lambda regions_obj: regions_obj.boundaries(5), id='boundaries(5)'), - pytest.param(lambda regions_obj: regions_obj.boundaries(), id='boundaries()') + pytest.param(lambda regions_obj: regions_obj.get_centroids(), id='centroids()'), + pytest.param(lambda regions_obj: regions_obj.get_limits(), id='limits()'), + pytest.param(lambda regions_obj: regions_obj.get_boundaries(5), id='boundaries(5)'), + pytest.param(lambda regions_obj: regions_obj.get_boundaries(), id='boundaries()') ]) @pytest.mark.skipif(TURN_OFF_INTERACTION_TEST, reason='Need proper server ip') def test_name_columns(geometry_getter): request = 'boston' found_name = 'Boston' - boston = geodata.regions_city(request) + boston = geodata.geocode_cities(request) - assert_row(boston.to_data_frame(), request=request, found_name=found_name) - assert_row(geometry_getter(boston), request=request, found_name=found_name) + assert_row(boston.get_geocodes(), names=request, found_name=found_name) + assert_row(geometry_getter(boston), names=request, found_name=found_name) @pytest.mark.parametrize('geometry_getter', [ - pytest.param(lambda regions_obj: regions_obj.centroids(), id='centroids()'), - pytest.param(lambda regions_obj: regions_obj.limits(), id='limits()'), - pytest.param(lambda regions_obj: regions_obj.boundaries(5), id='boundaries(5)'), - pytest.param(lambda regions_obj: regions_obj.boundaries(), id='boundaries()') + pytest.param(lambda regions_obj: regions_obj.get_centroids(), id='centroids()'), + pytest.param(lambda regions_obj: regions_obj.get_limits(), id='limits()'), + pytest.param(lambda regions_obj: regions_obj.get_boundaries(5), id='boundaries(5)'), + pytest.param(lambda regions_obj: regions_obj.get_boundaries(), id='boundaries()') ]) @pytest.mark.skipif(TURN_OFF_INTERACTION_TEST, reason='Need proper server ip') def test_empty_request_name_columns(geometry_getter): - request = 'Missouri' - found_name = 'Missouri' + request = 'Vermont' + found_name = 'Vermont' - states = geodata.regions_state('us-48') + states = geodata.geocode_states('us-48') - assert_row(states.to_data_frame(), request=request, found_name=found_name) - assert_row(geometry_getter(states), request=request, found_name=found_name) + assert_row(states.get_geocodes(), names=request, found_name=found_name) + assert_row(geometry_getter(states), names=request, found_name=found_name) BOSTON_LON = -71.057083 @@ -161,22 +81,22 @@ def test_empty_request_name_columns(geometry_getter): ]) @pytest.mark.skipif(TURN_OFF_INTERACTION_TEST, reason='Need proper server ip') def test_reverse_geocoding_of_list_(lons, lats): - r = geodata.regions_xy(lons, lats, 'city') - assert_row(r.to_data_frame(), index=0, request='[-71.057083, 42.361145]', found_name='Boston') - assert_row(r.to_data_frame(), index=1, request='[-73.935242, 40.730610]', found_name='New York City') + r = geodata.reverse_geocode(lons, lats, 'city') + assert_row(r.get_geocodes(), index=0, names='[-71.057083, 42.361145]', found_name='Boston') + assert_row(r.get_geocodes(), index=1, names='[-73.935242, 40.73061]', found_name='New York') @pytest.mark.skipif(TURN_OFF_INTERACTION_TEST, reason='Need proper server ip') def test_reverse_geocoding_of_nyc(): - r = geodata.regions_xy(NYC_LON, NYC_LAT, 'city') + r = geodata.reverse_geocode(NYC_LON, NYC_LAT, 'city') - assert_row(r.to_data_frame(), found_name='New York City') + assert_row(r.get_geocodes(), found_name='New York') @pytest.mark.skipif(TURN_OFF_INTERACTION_TEST, reason='Need proper server ip') def test_reverse_geocoding_of_nothing(): try: - geodata.regions_xy(-30.0, -30.0, 'city') + geodata.reverse_geocode(-30.0, -30.0, 'city').get_geocodes() except ValueError as e: assert str(e).startswith('No objects were found for [-30.000000, -30.000000].\n') return @@ -191,91 +111,88 @@ def test_reverse_geocoding_of_nothing(): @pytest.mark.skipif(TURN_OFF_INTERACTION_TEST, reason='Need proper server ip') def test_only_one_sevastopol(): - sevastopol = geodata.regions_xy(SEVASTOPOL_LON, SEVASTOPOL_LAT, 'city') + sevastopol = geodata.reverse_geocode(SEVASTOPOL_LON, SEVASTOPOL_LAT, 'city') - assert_row(sevastopol.to_data_frame(), id=SEVASTOPOL_ID) + assert_row(sevastopol.get_geocodes(), id=SEVASTOPOL_ID) -WARWICK_LON = -71.4332743004962 -WARWICK_LAT = 41.7155512422323 +WARWICK_LON = -71.4332938210472 +WARWICK_LAT = 41.715542525053 WARWICK_ID = '785807' @pytest.mark.skipif(TURN_OFF_INTERACTION_TEST, reason='Need proper server ip') -def test_ambiguity_near_boston_by_name(): - r = geodata.regions_builder( +def test_ambiguity_closest_to_boston_by_name(): + r = geodata.geocode( level='city', - request='Warwick' + names='Warwick' ) \ - .where('Warwick', near=geodata.regions_city('boston')) \ - .build() + .where('Warwick', closest_to=geodata.geocode_cities('boston')) - assert_row(r.to_data_frame(), id=WARWICK_ID, found_name='Warwick') - assert_row(r.centroids(), lon=WARWICK_LON, lat=WARWICK_LAT) + assert_row(r.get_geocodes(), id=WARWICK_ID, found_name='Warwick') + assert_row(r.get_centroids(), lon=WARWICK_LON, lat=WARWICK_LAT) @pytest.mark.skipif(TURN_OFF_INTERACTION_TEST, reason='Need proper server ip') -def test_ambiguity_near_boston_by_coord(): - r = geodata.regions_builder( +def test_ambiguity_closest_to_boston_by_coord(): + r = geodata.geocode( level='city', - request='Warwick' + names='Warwick' ) \ - .where('Warwick', near=ShapelyPoint(BOSTON_LON, BOSTON_LAT)) \ - .build() + .where('Warwick', closest_to=ShapelyPoint(BOSTON_LON, BOSTON_LAT)) - assert_row(r.to_data_frame(), id=WARWICK_ID, found_name='Warwick') - assert_row(r.centroids(), lon=WARWICK_LON, lat=WARWICK_LAT) + assert_row(r.get_geocodes(), id=WARWICK_ID, found_name='Warwick') + assert_row(r.get_centroids(), lon=WARWICK_LON, lat=WARWICK_LAT) @pytest.mark.skipif(TURN_OFF_INTERACTION_TEST, reason='Need proper server ip') -def test_ambiguity_near_boston_by_box(): - boston = geodata.regions_city('boston').centroids().iloc[[0]] +def test_ambiguity_scope_boston_by_box(): + boston = geodata.geocode_cities('boston').get_centroids().iloc[[0]] buffer = 0.6 boston_centroid = ShapelyPoint(boston.geometry.x, boston.geometry.y) - r = geodata.regions_builder( + r = geodata.geocode( level='city', - request='Warwick' + names='Warwick' ) \ .where('Warwick', - within=shapely.geometry.box( + scope=shapely.geometry.box( boston_centroid.x - buffer, boston_centroid.y - buffer, boston_centroid.x + buffer, boston_centroid.y + buffer - )) \ - .build() + )) - assert_row(r.to_data_frame(), id=WARWICK_ID, found_name='Warwick') - assert_row(r.centroids(), lon=WARWICK_LON, lat=WARWICK_LAT) + assert_row(r.get_geocodes(), id=WARWICK_ID, found_name='Warwick') + assert_row(r.get_centroids(), lon=WARWICK_LON, lat=WARWICK_LAT) @pytest.mark.skipif(TURN_OFF_INTERACTION_TEST, reason='Need proper server ip') def test_ambiguity_allow_ambiguous(): - r = geodata.regions_builder(level='city', request=['gotham', 'new york city', 'manchester']) \ + r = geodata.geocode_cities(['gotham', 'new york', 'manchester']) \ .allow_ambiguous() \ - .build() + .get_geocodes() - actual = r.to_data_frame()[DF_FOUND_NAME].tolist() - assert 28 == len(actual) # 1 New York City + 27 Manchester + actual = r[DF_COLUMN_FOUND_NAME].tolist() + assert 29 == len(actual) # 1 New York + 27 Manchester @pytest.mark.skipif(TURN_OFF_INTERACTION_TEST, reason='Need proper server ip') def test_ambiguity_drop_not_matched(): - r = geodata.regions_builder(level='city', request=['gotham', 'new york city', 'manchester']) \ + r = geodata.geocode_cities(['gotham', 'new york', 'manchester']) \ .drop_not_matched() \ - .build() + .get_geocodes() - actual = r.to_data_frame()[DF_FOUND_NAME].tolist() - assert ['New York City'] == actual + actual = r[DF_COLUMN_FOUND_NAME].tolist() + assert actual == ['New York'] @pytest.mark.skipif(TURN_OFF_INTERACTION_TEST, reason='Need proper server ip') def test_ambiguity_drop_not_found(): try: - r = geodata.regions_builder(level='city', request=['gotham', 'new york city', 'manchester']) \ + r = geodata.geocode_cities(['gotham', 'new york', 'manchester']) \ .drop_not_found() \ - .build() + .get_geocodes() except ValueError as ex: str(ex).startswith('Multiple objects (27) were found for manchester') return @@ -285,120 +202,83 @@ def test_ambiguity_drop_not_found(): @pytest.mark.skipif(TURN_OFF_INTERACTION_TEST, reason='Need proper server ip') def test_single_request_level_detection(): - r = geodata.regions_builder(request=['new york city', 'boston']) \ - .build() + r = geodata.geocode(names=['new york', 'boston']).scope('usa').get_geocodes() - assert [NYC_ID, BOSTON_ID] == r.to_data_frame().id.tolist() + assert r.id.tolist() == [NYC_ID, BOSTON_ID] @pytest.mark.skipif(TURN_OFF_INTERACTION_TEST, reason='Need proper server ip') def test_where_request_level_detection(): """ - where('new york', region=geodata.regions_state('new york')) gives county as first detected level - where('boston', region=geodata.regions_country('usa')) gives city as first detected level + where('new york', region=geodata.geocode_states('new york')) gives county as first detected level + where('boston', region=geodata.geocode_countries('usa')) gives city as first detected level But 'new york' also matches a city name so common level should be a city """ - r = geodata.regions_builder(request=['new york', 'boston']) \ - .where('new york', within=geodata.regions_state('new york')) \ - .where('boston', within=geodata.regions_country('usa')) \ - .build() - - assert [NYC_ID, BOSTON_ID] == r.to_data_frame().id.tolist() - - -@pytest.mark.skipif(TURN_OFF_INTERACTION_TEST, reason='Need proper server ip') -def test_havana_new_york(): - try: - r = geodata.regions_builder(request=['havana', 'new york city']) \ - .where(request='havana', within=geodata.regions_country('cuba')) \ - .where(request='new york city', within=geodata.regions_state('new york')) \ - .build() - except ValueError as ex: - assert 'No objects were found for new york city.\n' == str(ex) - return + r = geodata.geocode(names=['new york', 'boston']) \ + .where('new york', scope=geodata.geocode_states('new york')) \ + .where('boston', scope=geodata.geocode_countries('usa')) \ + .get_geocodes() - assert False, 'Should throw exception' + assert [NYC_ID, BOSTON_ID] == r.id.tolist() @pytest.mark.skipif(TURN_OFF_INTERACTION_TEST, reason='Need proper server ip') def test_positional_regions(): - df = geodata.regions_city( - request=['york', 'york'], - within=[ - geodata.regions_state(['New York']), - geodata.regions_state(['Illinois']), - ] - ).to_data_frame() - - assert ['New York City', 'Little York'] == df['found name'].tolist() + df = geodata.geocode_cities(['york', 'york']).states(['New York', 'Illinois']).get_geocodes() - -@pytest.mark.skipif(TURN_OFF_INTERACTION_TEST, reason='Need proper server ip') -def test_region_us48(): - df = geodata.regions_state(within='us-48').to_data_frame() - assert 49 == len(df['request'].tolist()) - for state in df.request: - assert len(state) > 0 - - -@pytest.mark.skipif(TURN_OFF_INTERACTION_TEST, reason='Need proper server ip') -def test_filter_us48(): - df = geodata.regions_state(request='us-48').to_data_frame() - assert 49 == len(df['request'].tolist()) - for state in df.request: - assert len(state) > 0 + assert ['New York', 'Little York'] == df['found name'].tolist() @pytest.mark.skipif(TURN_OFF_INTERACTION_TEST, reason='Need proper server ip') def test_duplications(): - r1 = geodata.regions(request=['Virginia', 'West Virginia'], within='USA') - r1.centroids() + r1 = geodata.geocode(names=['Virginia', 'West Virginia'], scope='USA') + r1.get_centroids() @pytest.mark.skipif(TURN_OFF_INTERACTION_TEST, reason='Need proper server ip') def test_limits_request(): - print(geodata.regions(request='texas').limits()) + print(geodata.geocode(names='texas').get_limits()) @pytest.mark.skipif(TURN_OFF_INTERACTION_TEST, reason='Need proper server ip') def test_centroids_request(): - print(geodata.regions(request='texas').centroids()) + print(geodata.geocode(names='texas').get_centroids()) @pytest.mark.skipif(TURN_OFF_INTERACTION_TEST, reason='Need proper server ip') def test_polygon_boundaries_request(): - print(geodata.regions(request='colorado').boundaries(14)) + print(geodata.geocode(names='colorado').get_boundaries(14)) @pytest.mark.skipif(TURN_OFF_INTERACTION_TEST, reason='Need proper server ip') def test_multipolygon_boundaries_request(): - assert geodata.regions(request='USA').boundaries(1) is not None + assert geodata.geocode(names='USA').get_boundaries(1) is not None @pytest.mark.skipif(TURN_OFF_INTERACTION_TEST, reason='Need proper server ip') def test_regions(): - map_regions = geodata.regions(level='country', request=['Russia', 'USA']) - map_regions.boundaries() - assert map_regions is not None + countries_geocoder = geodata.geocode(level='country', names=['Russia', 'USA']) + countries_geocoder.get_boundaries() + assert countries_geocoder is not None @pytest.mark.skipif(TURN_OFF_INTERACTION_TEST, reason='Need proper server ip') def test_mapregion(): - usa: geodata.Regions = geodata.regions_country(request='USA') - print(usa.centroids()) + usa = geodata.geocode_countries(names='USA') + print(usa.get_centroids()) @pytest.mark.skipif(TURN_OFF_INTERACTION_TEST, reason='Need proper server ip') def test_geocoderegion_as_region(): - usa = geodata.regions_country(request=['usa']) + usa = geodata.geocode_countries(names=['usa']) states_list = ['NY', 'TX', 'NV'] - geodata.regions_state(request=states_list, within=usa) + geodata.geocode_states(names=states_list).scope(usa).get_geocodes() @pytest.mark.skipif(TURN_OFF_INTERACTION_TEST, reason='Need proper server ip') def test_error_response(): with pytest.raises(ValueError) as exception: - geodata.regions_country(request='blablabla').centroids() + geodata.geocode_countries(names='blablabla').get_centroids() assert 'No objects were found for blablabla.\n' == exception.value.args[0] @@ -406,111 +286,125 @@ def test_error_response(): @pytest.mark.skipif(TURN_OFF_INTERACTION_TEST, reason='Need proper server ip') def test_rows_order(): city_names = ['Boston', 'Phoenix', 'Tucson', 'Salt Lake City', 'Los Angeles', 'San Francisco'] - city_regions = geodata.regions_city(city_names, within='US') + city_regions = geodata.geocode_cities(city_names).scope('US') # create path preserving the order - df = city_regions.centroids() + df = city_regions.get_centroids() - df = df.set_index('request') + df = df.set_index(get_request_column_name(df)) df = df.reindex(city_names) @pytest.mark.skipif(TURN_OFF_INTERACTION_TEST, reason='Need proper server ip') def test_new_server(): - c = geodata.regions_country(request='USA') - print(c.centroids()) + c = geodata.geocode_countries(names='USA') + print(c.get_centroids()) print(c) @pytest.mark.skipif(TURN_OFF_INTERACTION_TEST, reason='Need proper server ip') def test_case(): - usa = geodata.regions_country(request=['usa']) - states_48 = geodata.regions_state(['us-48']) + usa = geodata.geocode_countries(names=['usa']) + states_48 = geodata.geocode_states(['us-48']) states_list = ['NY', 'TX', 'louisiana'] - states = geodata.regions_state(request=states_list, within=usa) + states = geodata.geocode_states(names=states_list).scope(usa) cities_list = ['New york', 'boston', 'la'] - t_cities = geodata.regions_city(request=cities_list, within=usa) + t_cities = geodata.geocode_cities(names=cities_list).scope(usa) @pytest.mark.skipif(TURN_OFF_INTERACTION_TEST, reason='Need proper server ip') def test_ambiguous_not_found_with_level(): with pytest.raises(ValueError) as exception: - r = geodata.regions(request=['zimbabwe', 'moscow'], level='country') + r = geodata.geocode(names=['zimbabwe', 'moscow'], level='country').get_geocodes() assert 'No objects were found for moscow.\n' == exception.value.args[0] @pytest.mark.skipif(TURN_OFF_INTERACTION_TEST, reason='Need proper server ip') def test_order(): - bound = geodata.regions(request=['Russia', 'USA', 'France', 'Japan']) - df = bound.to_data_frame() - assert ['Russia', 'USA', 'France', 'Japan'] == df['request'].tolist() + bound = geodata.geocode(names=['Russia', 'USA', 'France', 'Japan']) + assert_row(bound.get_geocodes(), names=['Russia', 'USA', 'France', 'Japan']) @pytest.mark.skipif(TURN_OFF_INTERACTION_TEST, reason='Need proper server ip') def test_resolution(): - r = geodata.regions(request=['monaco', ], level='country') + r = geodata.geocode(names=['monaco', ], level='country') sizes = [] for res in range(1, 16): - b = r.boundaries(res) - sizes.append(len(b['request'])) + b = r.get_boundaries(res) + sizes.append(len(b)) assert 15 == len(sizes) @pytest.mark.skipif(TURN_OFF_INTERACTION_TEST, reason='Need proper server ip') -def test_duplications_in_filter_should_preserve_order(): - df = geodata.regions(request=['Texas', 'TX', 'Arizona', 'Texas'], level='state').to_data_frame() - assert ['Texas', 'TX', 'Arizona', 'Texas'] == df['request'].tolist() +def test_should_copy_found_name_to_request_for_us48(): + df = geodata.geocode_states('us-48').get_geocodes() + + assert len(df) == 49 + assert_request_and_found_name_are_equal(df) + + +@pytest.mark.skipif(TURN_OFF_INTERACTION_TEST, reason='Need proper server ip') +def test_us48_in_scope(): + df = geodata.geocode_states().scope('us-48').get_geocodes() + + assert 49 == len(df) + assert_request_and_found_name_are_equal(df) + + +@pytest.mark.skipif(TURN_OFF_INTERACTION_TEST, reason='Need proper server ip') +def test_us48_in_name_without_level(): + df = geodata.geocode(names='us-48').get_geocodes() + + assert 49 == len(df) @pytest.mark.skipif(TURN_OFF_INTERACTION_TEST, reason='Need proper server ip') def test_duplication_with_us48(): - df = geodata.regions_state(request=['tx', 'us-48', 'tx']).to_data_frame() + df = geodata.geocode_states(names=['tx', 'us-48', 'tx']).get_geocodes() - assert 51 == len(df['request']) - assert_row(df, 'tx', 'Texas', 0) - assert_row(df, 'Missouri', 'Missouri', 1) - assert_row(df, 'tx', 'Texas', 50) + assert 51 == len(df) + assert_row(df, names='tx', found_name='Texas', index=0) + assert_row(df, names='Vermont', found_name='Vermont', index=1) + assert_row(df, names='tx', found_name='Texas', index=50) @pytest.mark.skipif(TURN_OFF_INTERACTION_TEST, reason='Need proper server ip') -def test_empty_request_to_data_frame(): - r = geodata.regions_city(within='orange county') - df = r.to_data_frame() - assert set(['Chapel Hill', 'Town of Carrboro', 'Carrboro', 'Hillsborough', 'Town of Carrboro', 'City of Durham']) == \ - set(df['request'].tolist()) +def test_empty_request_get_geocodes(): + orange_county = geodata.geocode_counties('orange county').scope('north carolina') + r = geodata.geocode_cities().scope(orange_county) + df = r.get_geocodes() + assert_request_and_found_name_are_equal(df) @pytest.mark.skipif(TURN_OFF_INTERACTION_TEST, reason='Need proper server ip') def test_empty_request_centroid(): - r = geodata.regions_city(within='orange county') - df = r.centroids() - assert set(['Chapel Hill', 'Town of Carrboro', 'Carrboro', 'Hillsborough', 'Town of Carrboro', 'City of Durham']) == \ - set(df['request'].tolist()) + orange_county = geodata.geocode_counties('orange county').scope('north carolina') + r = geodata.geocode_cities().scope(orange_county) + df = r.get_centroids() + assert_request_and_found_name_are_equal(df) + @pytest.mark.skipif(TURN_OFF_INTERACTION_TEST, reason='Need proper server ip') def test_highlights(): - r = geodata.regions_builder(level='city', request='NY', highlights=True).build() - df = r.to_data_frame() - assert ['Peel'] == df['found name'].tolist() - assert [['Purt ny h-Inshey']] == df['highlights'].tolist() + r = geodata.geocode(level='city', names='NYC').highlights(True) + df = r.get_geocodes() + assert_row(df, found_name='New York') + assert df['highlights'].tolist() == [['NYC']] @pytest.mark.skipif(TURN_OFF_INTERACTION_TEST, reason='Need proper server ip') def test_countries(): - assert 221 == len(geodata.regions_country().centroids().request) + df = geodata.geocode_countries().get_centroids() + assert 217 == len(df) @pytest.mark.skipif(TURN_OFF_INTERACTION_TEST, reason='Need proper server ip') -def test_incorrect_group_processing(): - c = geodata.regions_country().centroids() - c = list(c.request[141:142]) + list(c.request[143:144]) + list(c.request[136:137]) + list(c.request[114:134]) - print(c) - c = geodata.regions_country(c).centroids() - r = geodata.regions_country(c['request']) - boundaries: DataFrame = r.boundaries(resolution=10) - - assert 'group' not in boundaries.keys() +def test_not_found_scope(): + assert_error( + "Region is not found: blablabla", + lambda: geodata.geocode(names=['texas'], scope='blablabla').get_geocodes() + ) \ No newline at end of file diff --git a/python-package/test/geo_data/test_map_geodataframe.py b/python-package/test/geo_data/test_map_geodataframe.py index 761d5b1fd75..0837a9f7cde 100644 --- a/python-package/test/geo_data/test_map_geodataframe.py +++ b/python-package/test/geo_data/test_map_geodataframe.py @@ -3,7 +3,7 @@ import json -from geo_data.geo_data import get_data_meta, get_map_data_meta +from .geo_data import get_data_meta, get_map_data_meta from geopandas import GeoDataFrame from shapely.geometry import MultiPolygon, Polygon, LinearRing, Point, mapping diff --git a/python-package/test/geo_data/test_map_regions.py b/python-package/test/geo_data/test_map_regions.py index bb8a484d206..ed406fcaba6 100644 --- a/python-package/test/geo_data/test_map_regions.py +++ b/python-package/test/geo_data/test_map_regions.py @@ -2,14 +2,17 @@ # Use of this source code is governed by the MIT license that can be found in the LICENSE file. from unittest import mock + import pytest +from lets_plot.geo_data.geocoder import Geocoder +from lets_plot.geo_data.geocodes import _coerce_resolution, _parse_resolution, Geocodes, Resolution from lets_plot.geo_data.gis.geocoding_service import GeocodingService -from lets_plot.geo_data.gis.request import ExplicitRequest, PayloadKind, LevelKind, RequestBuilder, RequestKind -from lets_plot.geo_data.gis.response import FeatureBuilder, GeoPoint -from lets_plot.geo_data.regions import _coerce_resolution, _parse_resolution, Regions, Resolution, DF_ID, DF_FOUND_NAME, DF_REQUEST -from lets_plot.plot import ggplot, geom_polygon -from .geo_data import make_region, make_success_response, get_map_data_meta +from lets_plot.geo_data.gis.request import ExplicitRequest, PayloadKind, LevelKind, RequestBuilder, RequestKind, \ + RegionQuery +from lets_plot.geo_data.gis.response import Answer, FeatureBuilder, GeoPoint +from .geo_data import make_success_response, features_to_queries, features_to_answers, assert_row, \ + assert_request_and_found_name_are_equal USA_REQUEST = 'united states' USA_NAME = 'USA' @@ -28,34 +31,28 @@ RESOLUTION = 12 -def assert_region_df(region_object, df, index=0): - assert region_object.query == df[DF_REQUEST][index] - assert region_object.id == df[DF_ID][index] - assert region_object.name == df[DF_FOUND_NAME][index] - - class TestMapRegions: def setup(self): self.foo_id = 'foo_id' self.foo_query = 'foo' self.foo_name = 'Foo' - self.foo = FeatureBuilder().set_query(self.foo_query).set_id(self.foo_id).set_name(self.foo_name) + self.foo: FeatureBuilder = FeatureBuilder().set_query(self.foo_query).set_id(self.foo_id).set_name(self.foo_name) self.bar_id = 'bar_id' self.bar_query = 'bar' self.bar_name = 'Bar' - self.bar = FeatureBuilder().set_query(self.bar_query).set_id(self.bar_id).set_name(self.bar_name) + self.bar: FeatureBuilder = FeatureBuilder().set_query(self.bar_query).set_id(self.bar_id).set_name(self.bar_name) self.baz_id = 'baz_id' self.baz_query = 'baz' self.baz_name = 'Baz' - self.baz = FeatureBuilder().set_query(self.baz_query).set_id(self.baz_id).set_name(self.baz_name) + self.baz: FeatureBuilder = FeatureBuilder().set_query(self.baz_query).set_id(self.baz_id).set_name(self.baz_name) @mock.patch.object(GeocodingService, 'do_request') def test_boundaries(self, mock_request): try: - self.make_regions().boundaries(resolution=RESOLUTION) + self.make_geocoder().get_boundaries(resolution=RESOLUTION) except ValueError: pass # response doesn't contain proper feature with ids - ignore @@ -81,7 +78,7 @@ def test_parse_resolution(self, str, expected): @mock.patch.object(GeocodingService, 'do_request') def test_limits(self, mock_request): try: - self.make_regions().limits() + self.make_geocoder().get_limits() except ValueError: pass # response doesn't contain proper feature with ids - ignore @@ -95,7 +92,7 @@ def test_limits(self, mock_request): @mock.patch.object(GeocodingService, 'do_request') def test_centroids(self, mock_request): try: - self.make_regions().centroids() + self.make_geocoder().get_centroids() except ValueError: pass # response doesn't contain proper feature with ids - ignore @@ -107,38 +104,41 @@ def test_centroids(self, mock_request): ) def test_to_dataframe(self): - df = Regions( - LevelKind.city, - [ - self.foo.set_query('').set_id('123').build_geocoded(), - self.bar.set_query('').set_id('456').build_geocoded(), - ] + df = Geocodes( + level_kind=LevelKind.city, + queries=[RegionQuery(request='FOO'), RegionQuery(request='BAR')], + answers=features_to_answers([self.foo.build_geocoded(), self.bar.build_geocoded()]) ).to_data_frame() - assert [self.foo.name, self.bar.name] == df[DF_REQUEST].tolist() + assert_row(df, names=['FOO', 'BAR']) def test_as_list(self): - regions = Regions( - LevelKind.city, - [ - self.foo.build_geocoded(), - self.bar.build_geocoded() - ] + regions = Geocodes( + level_kind=LevelKind.city, + queries=features_to_queries([self.foo.build_geocoded(), self.bar.build_geocoded()]), + answers=features_to_answers([self.foo.build_geocoded(), self.bar.build_geocoded()]) ).as_list() assert 2 == len(regions) - assert_region_df(self.foo, regions[0].to_data_frame()) - assert_region_df(self.bar, regions[1].to_data_frame()) + assert_row(regions[0].to_data_frame(), names=self.foo.name, id=self.foo.id, found_name=self.foo.name) + assert_row(regions[1].to_data_frame(), names=self.bar.name, id=self.bar.id, found_name=self.bar.name) @mock.patch.object(GeocodingService, 'do_request') - def test_df_request_when_query_is_empty_should_be_taken_from_found_name_column(self, mock_request): + def test_exploding_answers_to_data_frame_take_request_from_feature_name(self, mock_request): foo_id = '123' foo_name = 'foo' - geocoding_result = Regions( - LevelKind.city, - [ - FeatureBuilder().set_id(foo_id).set_query('').set_name(foo_name).build_geocoded() + + bar_id = '456' + bar_name = 'bar' + geocoding_result = Geocodes( + level_kind=LevelKind.city, + queries=[RegionQuery(request=None)], + answers=[ + Answer([ + FeatureBuilder().set_id(foo_id).set_name(foo_name).build_geocoded(), + FeatureBuilder().set_id(bar_id).set_name(bar_name).build_geocoded() + ]) ] ) @@ -146,7 +146,9 @@ def test_df_request_when_query_is_empty_should_be_taken_from_found_name_column(s .set_geocoded_features( [ FeatureBuilder().set_id(foo_id).set_query(foo_id).set_name(foo_name).set_centroid( - GeoPoint(0, 1)).build_geocoded() + GeoPoint(0, 1)).build_geocoded(), + FeatureBuilder().set_id(bar_id).set_query(bar_id).set_name(bar_name).set_centroid( + GeoPoint(2, 3)).build_geocoded(), ] ).build() @@ -155,22 +157,27 @@ def test_df_request_when_query_is_empty_should_be_taken_from_found_name_column(s mock_request.assert_called_with( RequestBuilder() \ .set_request_kind(RequestKind.explicit) - .set_ids([foo_id]) \ + .set_ids([foo_id, bar_id]) \ .set_requested_payload([PayloadKind.centroids]) \ .build() ) - assert foo_name == df[DF_REQUEST][0] + assert_request_and_found_name_are_equal(df) @mock.patch.object(GeocodingService, 'do_request') - def test_df_rows_order(self, mock_request): - - geocoding_result = Regions( - LevelKind.city, - [ - self.foo.set_query('').build_geocoded(), - self.bar.set_query('').build_geocoded(), - self.baz.set_query('').build_geocoded(), + def test_direct_answers_take_request_from_query(self, mock_request): + + geocoding_result = Geocodes( + level_kind=LevelKind.city, + queries=[ + RegionQuery(request='fooo'), + RegionQuery(request='barr'), + RegionQuery(request='bazz'), + ], + answers=[ + Answer([self.foo.set_query('').build_geocoded()]), + Answer([self.bar.set_query('').build_geocoded()]), + Answer([self.baz.set_query('').build_geocoded()]), ] ) @@ -193,7 +200,8 @@ def test_df_rows_order(self, mock_request): .build() ) - assert [self.foo.name, self.bar.name, self.baz.name] == df[DF_REQUEST].tolist() + assert_row(df, names=['fooo', 'barr', 'bazz']) + @mock.patch.object(GeocodingService, 'do_request') def test_df_rows_duplication_should_be_processed_correctly(self, mock_request): @@ -203,20 +211,21 @@ def test_df_rows_duplication_should_be_processed_correctly(self, mock_request): bar_id = '234' bar_name = 'bar' - geocoding_result = Regions( - LevelKind.city, - [ - self.foo.set_query('').build_geocoded(), - self.bar.set_query('').build_geocoded(), - self.foo.set_query('').build_geocoded() + geocoding_result = Geocodes( + level_kind=LevelKind.city, + queries=[RegionQuery('foo'), RegionQuery('bar'), RegionQuery('foo')], + answers=[ + Answer([self.foo.build_geocoded()]), + Answer([self.bar.build_geocoded()]), + Answer([self.foo.build_geocoded()]) ] ) mock_request.return_value = make_success_response() \ - .set_geocoded_features( + .set_answers( [ - self.foo.set_query(foo_id).set_centroid(GeoPoint(0, 1)).build_geocoded(), - self.bar.set_query(bar_id).set_centroid(GeoPoint(0, 1)).build_geocoded() + Answer([self.foo.set_query(foo_id).set_centroid(GeoPoint(0, 1)).build_geocoded()]), + Answer([self.bar.set_query(bar_id).set_centroid(GeoPoint(0, 1)).build_geocoded()]) ] ).build() @@ -230,51 +239,30 @@ def test_df_rows_duplication_should_be_processed_correctly(self, mock_request): .build() ) - assert [self.foo.name, self.bar.name, self.foo.name] == df[DF_REQUEST].tolist() + assert_row(df, names=['foo', 'bar', 'foo']) - # python invokes geocoding functions when Regions objects detected in map - # changed from previous version, where client invoked these functions - @mock.patch.object(GeocodingService, 'do_request') - def test_plot_should_have_geometries_when_regions_in_map_parameter(self, mock_request): - mock_request.return_value = make_success_response() \ - .set_geocoded_features( - [ - FeatureBuilder() \ - .set_query(USA_REQUEST) \ - .set_id(USA_ID) \ - .set_name(USA_NAME) \ - .set_boundary(GeoPoint(0, 1)) - .build_geocoded(), - FeatureBuilder() \ - .set_query(RUSSIA_REQUEST) \ - .set_id(RUSSIA_ID) \ - .set_name(RUSSIA_NAME) \ - .set_boundary(GeoPoint(0, 1)) - .build_geocoded() + def make_geocoder(self) -> Geocoder: + usa = FeatureBuilder() \ + .set_name(USA_NAME) \ + .set_id(USA_ID) \ + .set_highlights(USA_HIGHLIGHTS) \ + .build_geocoded() - ] - ).build() + russia = FeatureBuilder() \ + .set_name(RUSSIA_NAME) \ + .set_id(RUSSIA_ID) \ + .set_highlights(RUSSIA_HIGHLIGHTS) \ + .build_geocoded() - plotSpec = ggplot() + geom_polygon(map=self.make_regions()) - - # previous behaviour - # expected_map_data_meta = { - # 'georeference': {} - # } - - expected_map_data_meta = { - 'geodataframe': {'geometry': 'geometry'} - } + geocodes = Geocodes( + level_kind=LevelKind.country, + queries=features_to_queries([usa, russia]), + answers=features_to_answers([usa, russia]) + ) - assert expected_map_data_meta == get_map_data_meta(plotSpec, 0) + class StubGeocoder(Geocoder): + def _geocode(self) -> Geocodes: + return geocodes - def make_regions(self) -> Regions: - regions = Regions( - LevelKind.country, - [ - make_region(USA_REQUEST, USA_NAME, USA_ID, USA_HIGHLIGHTS), - make_region(RUSSIA_REQUEST, RUSSIA_NAME, RUSSIA_ID, RUSSIA_HIGHLIGHTS) - ] - ) - return regions + return StubGeocoder() diff --git a/python-package/test/geo_data/test_regions_builder.py b/python-package/test/geo_data/test_regions_builder.py deleted file mode 100644 index 50a09a4a685..00000000000 --- a/python-package/test/geo_data/test_regions_builder.py +++ /dev/null @@ -1,485 +0,0 @@ -# Copyright (c) 2020. JetBrains s.r.o. -# Use of this source code is governed by the MIT license that can be found in the LICENSE file. - -from collections import namedtuple -from typing import Optional, List, Union -from unittest import mock - -import shapely -from shapely.geometry import Point - -from lets_plot.geo_data import regions_builder, GeocodingService -from lets_plot.geo_data.gis.request import RegionQuery, MapRegion, MapRegionKind, IgnoringStrategyKind, \ - AmbiguityResolver -from lets_plot.geo_data.gis.response import FeatureBuilder, LevelKind, GeoPoint, GeoRect -from lets_plot.geo_data.regions import Regions -from lets_plot.geo_data.regions_builder import RegionsBuilder -from .geo_data import make_success_response - -Query = namedtuple('Query', 'name, region_id, region, feature') -ShapelyPoint = Point - - -def make_query(name: str, region_id: str) -> Query: - region_feataure = FeatureBuilder().set_query(name).set_name(name).set_id(region_id).build_geocoded() - return Query(name, region_id, MapRegion.with_ids([region_id]), region_feataure) - - -FOO = make_query('foo', 'foo_region') -BAR = make_query('bar', 'bar_region') -BAZ = make_query('baz', 'baz_region') -FOO_NAMESAKE = make_query(FOO.name, 'foo_namesake_region') - - -def feature(q: Query) -> FeatureBuilder: - return FeatureBuilder().set_id(q.region_id).set_query(q.name).set_name(q.name) - - -def test_ctor(): - actual = \ - regions_builder(request=names(FOO), within=single_region(FOO)) \ - ._get_queries() - expected = [query(FOO.name, FOO.region)] - assert expected == actual - - -def test_single_chaining_with_addition(): - actual = \ - regions_builder(request=names(FOO)) \ - .where(names(BAR), single_region(BAR)) \ - ._get_queries() - - expected = [ - query(FOO.name), - query(BAR.name, BAR.region) - ] - - assert expected == actual - - -def test_list_chaining_with_addition(): - actual = \ - regions_builder(request=names(FOO, BAR)) \ - .where(names(BAZ), single_region(BAZ)) \ - ._get_queries() - - expected = [ - query(FOO.name), - query(BAR.name), - query(BAZ.name, BAZ.region) - ] - - assert expected == actual - - -def test_single_chaining_with_overriding(): - actual = \ - regions_builder(request=names(FOO, BAR)) \ - .where(names(BAR), single_region(BAR)) \ - ._get_queries() - - expected = [ - query(FOO.name), - query(BAR.name, BAR.region) - ] - - assert expected == actual - - -def test_list_chaining_with_overriding(): - queries = \ - regions_builder(request=names(FOO, BAR, BAZ)) \ - .where(names(BAR), single_region(BAR)) \ - .where(names(BAZ), single_region(BAZ)) \ - ._get_queries() - - expected = [ - query(FOO.name), - query(BAR.name, BAR.region), - query(BAZ.name, BAZ.region) - ] - - assert expected == queries - - -def test_override_twice(): - actual = \ - regions_builder(request=names(FOO)) \ - .where(names(FOO), single_region(FOO)) \ - .where(names(FOO), 'foofoo_region') \ - ._get_queries() - - expected = [ - query(FOO.name, 'foofoo_region') - ] - - assert expected == actual - - -def test_with_regions(): - actual = \ - regions_builder( - request=names(FOO), - within=Regions(LevelKind.city, [FOO.feature])) \ - ._get_queries() - - expected = [ - query(FOO.name, FOO.region) - ] - - assert expected == actual - - -def test_countries_alike(): - assert [query()] == RegionsBuilder(level=LevelKind.country)._get_queries() - - -def test_us48_alike(): - actual = RegionsBuilder(level=LevelKind.state, scope='us-48')._get_queries() - expected = [query(request=None, scope=MapRegion.with_name('us-48'))] - assert expected == actual - - -def test_list_with_duplications(): - assert [query('foo'), query('foo')] == regions_builder(request=names(FOO, FOO))._get_queries() - - -def test_list_duplication_with_overriding(): - actual = \ - regions_builder(request=names(FOO, FOO)) \ - .where(names(FOO)) \ - ._get_queries() - - expected = [ - query(FOO.name), query(FOO.name) - ] - - assert expected == actual - - -def test_list_duplication_with_overriding_duplication(): - actual = \ - regions_builder(request=names(FOO, FOO)) \ - .where(names(FOO, FOO)) \ - ._get_queries() - - expected = [ - query(FOO.name), query(FOO.name) - ] - - assert expected == actual - - -def test_simple_positional(): - actual = \ - regions_builder( - request=names(FOO, BAR), - within=regions_list(FOO, BAR) - )._get_queries() - - expected = [ - query(FOO.name, FOO.region), - query(BAR.name, BAR.region) - ] - - assert expected == actual - - -def test_positional_ctor_with_duplicated_queries_and_different_regions(): - actual = \ - regions_builder( - request=names(FOO, FOO_NAMESAKE), - within=regions_list(FOO, FOO_NAMESAKE), - )._get_queries() - - expected = [ - query(FOO.name, FOO.region), - query(FOO_NAMESAKE.name, FOO_NAMESAKE.region) - ] - - assert expected == actual - - -def test_positional_with_duplicated_queries_and_regions(): - actual = regions_builder( - request=names(FOO, FOO), - within=regions_list(FOO, FOO) - )._get_queries() - - expected = [ - query(FOO.name, FOO.region), - query(FOO.name, FOO.region) - ] - - assert expected == actual - - -def test_positional_where_full_replace(): - actual = \ - regions_builder( - request=names(FOO, BAR) - ) \ - .where(names(FOO, BAR), regions_list(FOO, BAR)) \ - ._get_queries() - - expected = [ - query(FOO.name, FOO.region), - query(BAR.name, BAR.region) - ] - - assert expected == actual - - -def test_positional_where_partial_replace(): - actual = \ - regions_builder( - request=names(FOO, BAR, BAZ) - ) \ - .where(names(FOO, BAZ), regions_list(FOO, BAZ)) \ - ._get_queries() - - expected = [ - query(FOO.name, FOO.region), - query(BAR.name), - query(BAZ.name, BAZ.region) - ] - - assert expected == actual - - -def test_positional_multi_where_replace(): - actual = \ - regions_builder( - request=names(FOO, BAR, BAZ) - ) \ - .where(names(BAR), regions_list(BAR)) \ - .where(names(BAZ, FOO), regions_list(BAZ, FOO)) \ - ._get_queries() - - expected = [ - query(FOO.name, FOO.region), - query(BAR.name, BAR.region), - query(BAZ.name, BAZ.region) - ] - - assert expected == actual - - -def test_order_with_positional_where(): - actual = \ - regions_builder( - request=names(FOO, BAR, BAZ) - ) \ - .where(names(BAR), regions_list(BAR)) \ - .where(names(BAZ, FOO), regions_list(BAZ, FOO)) \ - ._get_queries() - - expected = [ - query(FOO.name, FOO.region), - query(BAR.name, BAR.region), - query(BAZ.name, BAZ.region) - ] - - assert expected == actual - - -def test_order_with_where(): - actual = \ - regions_builder( - request=names(FOO, BAR, BAZ) - ) \ - .where(names(BAR), regions_list(BAR)) \ - .where(names(BAZ, FOO), single_region(BAZ, FOO)) \ - ._get_queries() - - expected = [ - query(FOO.name, map_region([BAZ, FOO])), - query(BAR.name, BAR.region), - query(BAZ.name, map_region([BAZ, FOO])) - ] - - assert expected == actual - - -def test_only_default_ignoring_strategy(): - actual = \ - regions_builder(request=names(FOO, BAR)) \ - .allow_ambiguous() \ - ._get_queries() - - expected = [ - query(FOO.name, ignoring_strategy=IgnoringStrategyKind.take_namesakes), - query(BAR.name, ignoring_strategy=IgnoringStrategyKind.take_namesakes) - ] - - assert expected == actual - - -def test_empty_where_with_default_ignoring_strategy(): - actual = \ - regions_builder(request=names(FOO)) \ - .allow_ambiguous() \ - .where(names(BAR)) \ - ._get_queries() - - expected = [ - query(FOO.name, ignoring_strategy=IgnoringStrategyKind.take_namesakes), - query(BAR.name, ignoring_strategy=IgnoringStrategyKind.take_namesakes), - ] - - assert expected == actual - - -@mock.patch.object(GeocodingService, 'do_request') -def test_near_with_default_ignoring_strategy(mock_request): - mock_request.return_value = make_success_response() \ - .set_geocoded_features( - [ - feature(BAZ).set_centroid(GeoPoint(1., 2.)).build_geocoded() - ] - ).build() - - actual = \ - regions_builder(request=names(FOO, BAR)) \ - .allow_ambiguous() \ - .where(names(BAR), near=single_region(BAZ)) \ - ._get_queries() - - expected = [ - query(FOO.name, ignoring_strategy=IgnoringStrategyKind.take_namesakes), - query(BAR.name, near=GeoPoint(1., 2.), ignoring_strategy=None) - ] - - assert expected == actual - - -@mock.patch.object(GeocodingService, 'do_request') -def test_near_to_region(mock_request): - mock_request.return_value = make_success_response() \ - .set_geocoded_features( - [ - feature(BAZ).set_centroid(GeoPoint(1, 2)).build_geocoded() - ] - ).build() - - actual = \ - regions_builder(request=names(FOO, BAR)) \ - .where(names(BAR), near=single_region(BAZ)) \ - ._get_queries() - - expected = [ - query(FOO.name), - query(BAR.name, near=GeoPoint(1, 2)) - ] - - assert expected == actual - - -def test_near_shapely_point(): - actual = \ - regions_builder(request=names(FOO, BAR)) \ - .where(names(BAR), near=ShapelyPoint(1., 2.)) \ - ._get_queries() - - expected = [ - query(FOO.name), - query(BAR.name, near=GeoPoint(1., 2.)) - ] - - assert expected == actual - - -def test_within_shapely_box(): - actual = \ - regions_builder(request=names(FOO, BAR)) \ - .where(names(BAR), within=shapely.geometry.box(0, 1, 2, 3)) \ - ._get_queries() - - expected = [ - query(FOO.name), - query(BAR.name, box=GeoRect(min_lon=0, min_lat=1, max_lon=2, max_lat=3)) - ] - - assert expected == actual - - -def test_empty(): - actual = \ - regions_builder()._get_queries() - - expected = [ - query() - ] - - assert expected == actual - - -def test_positional_empty(): - actual = \ - regions_builder(request=[])._get_queries() - - expected = [ - query() - ] - - assert expected == actual - - -def test_positional_wrong_size(): - try: - regions_builder(request=names(FOO, BAR))._get_queries() - except ValueError as e: - assert 'Length of filter and region is not equal' == str(e) - - -def test_controversy_positional_where_with_duplicated_queries_and_different_regions(): - # We have to add duplicated objects with properly set regions. It's imposible to fix it later - - actual = \ - regions_builder( - request=names(FOO, FOO_NAMESAKE) - ).where( - request=names(FOO, FOO_NAMESAKE), - within=regions_list(FOO, FOO_NAMESAKE) - )._get_queries() - - expected = [ - query(FOO.name, FOO_NAMESAKE.region), - query(FOO_NAMESAKE.name, FOO_NAMESAKE.region) - ] - - assert expected == actual - - -def query( - request: Optional[str] = None, - scope: Optional[Union[str, MapRegion]] = None, - ignoring_strategy: Optional[IgnoringStrategyKind] = None, - near: Optional[Union[str, GeoPoint]] = None, - box: Optional[GeoRect] = None) -> RegionQuery: - if isinstance(scope, MapRegion): - pass - elif isinstance(scope, str): - scope = MapRegion.with_name(scope) - else: - scope = None - - return RegionQuery(request, scope, AmbiguityResolver(ignoring_strategy, near, box)) - - -def map_region(queries: List[Query]): - return MapRegion(MapRegionKind.id, [query.region_id for query in queries]) - - -def names(*queries: Query) -> List[str]: - return [query.name for query in queries] - - -def single_region(*queries: Query) -> Regions: - return Regions(LevelKind.city, [query.feature for query in queries]) - - -def regions_list(*queries: Query) -> List[Regions]: - return [ - Regions(LevelKind.city, [query.feature]) for query in queries - ] diff --git a/python-package/test/geo_data/test_response_errors.py b/python-package/test/geo_data/test_response_errors.py index 023678cd1b9..6fb7f1fe41e 100644 --- a/python-package/test/geo_data/test_response_errors.py +++ b/python-package/test/geo_data/test_response_errors.py @@ -7,7 +7,7 @@ from lets_plot.geo_data.gis.response import Namesake, LevelKind, FeatureBuilder, NamesakeParent, AmbiguousFeature, \ AmbiguousResponse, ErrorResponse -from lets_plot.geo_data.regions import _create_multiple_error_message, _format_error_message +from lets_plot.geo_data.geocodes import _create_multiple_error_message, _format_error_message from .geo_data import ERROR_MESSAGE, make_ambiguous_response, make_error_response diff --git a/python-package/test/geo_data/test_to_geo_data_frame.py b/python-package/test/geo_data/test_to_geo_data_frame.py index 08a0c9a21df..2c33f57bee3 100644 --- a/python-package/test/geo_data/test_to_geo_data_frame.py +++ b/python-package/test/geo_data/test_to_geo_data_frame.py @@ -1,17 +1,20 @@ # Copyright (c) 2020. JetBrains s.r.o. # Use of this source code is governed by the MIT license that can be found in the LICENSE file. -from geo_data.geo_data import assert_names -from geopandas import GeoDataFrame +from geo_data.geo_data import features_to_answers, assert_row, FOUND_NAME, GEO_RECT_MIN_LON, GEO_RECT_MAX_LON, \ + GEO_RECT_MIN_LAT, GEO_RECT_MAX_LAT +from pandas import DataFrame +from lets_plot.geo_data.gis.request import RegionQuery, LevelKind from lets_plot.geo_data.gis.response import SuccessResponse, FeatureBuilder from lets_plot.geo_data.to_geo_data_frame import LimitsGeoDataFrame, CentroidsGeoDataFrame, BoundariesGeoDataFrame -from .geo_data import GJMultipolygon, GJPolygon, GJRing, lon, lat, NAME, \ - FOUND_NAME, CENTROID_LON, CENTROID_LAT, GEO_RECT_MIN_LON, GEO_RECT_MIN_LAT, GEO_RECT_MAX_LON, GEO_RECT_MAX_LAT, \ - assert_success_response, \ - make_success_response, make_limit_rect, make_centroid_point, polygon, ring, point, make_polygon_boundary, \ - multipolygon, \ - make_multipolygon_boundary, make_single_point_boundary, ID +from .geo_data import CENTROID_LON, CENTROID_LAT, GEO_RECT_MIN_LON, GEO_RECT_MIN_LAT, GEO_RECT_MAX_LON, GEO_RECT_MAX_LAT +from .geo_data import GJMultipolygon, GJPolygon +from .geo_data import ID, NAME, FOUND_NAME +from .geo_data import assert_success_response, assert_row, make_success_response +from .geo_data import feature_to_answer, features_to_answers, features_to_queries +from .geo_data import make_limit_rect, make_centroid_point, polygon, ring, point, make_polygon_boundary, multipolygon, \ + make_multipolygon_boundary, make_single_point_boundary NAMED_FEATURE_BUILDER = FeatureBuilder() \ .set_query(NAME) \ @@ -20,33 +23,54 @@ def test_requestless_boundaries(): - gdf = BoundariesGeoDataFrame().to_data_frame([ - FeatureBuilder() - .set_id(ID) - .set_name(FOUND_NAME) - .set_boundary(make_single_point_boundary()) # dummy geometry to not fail on None property - ]) - assert_names(gdf, 0, FOUND_NAME, FOUND_NAME) + gdf = BoundariesGeoDataFrame().to_data_frame( + answers=[ + feature_to_answer( + FeatureBuilder() + .set_id(ID) + .set_name(FOUND_NAME) + .set_boundary(make_single_point_boundary()) # dummy geometry to not fail on None property + .build_geocoded() + ) + ], + queries=[RegionQuery(request=FOUND_NAME)], + level_kind=LevelKind.city + ) + assert_row(gdf, names=FOUND_NAME, found_name=FOUND_NAME) def test_requestless_centroids(): - gdf = CentroidsGeoDataFrame().to_data_frame([ - FeatureBuilder() - .set_id(ID) - .set_name(FOUND_NAME) - .set_centroid(make_centroid_point()) - ]) - assert_names(gdf, 0, FOUND_NAME, FOUND_NAME) + gdf = CentroidsGeoDataFrame().to_data_frame( + answers=[ + feature_to_answer( + FeatureBuilder() + .set_id(ID) + .set_name(FOUND_NAME) + .set_centroid(make_centroid_point()) + .build_geocoded() + ) + ], + queries=[RegionQuery(request=FOUND_NAME)], + level_kind=LevelKind.city + ) + assert_row(gdf, names=FOUND_NAME, found_name=FOUND_NAME) def test_requestless_limits(): - gdf = LimitsGeoDataFrame().to_data_frame([ - FeatureBuilder() - .set_id(ID) - .set_name(FOUND_NAME) - .set_limit(make_limit_rect()) - ]) - assert_names(gdf, 0, FOUND_NAME, FOUND_NAME) + gdf = LimitsGeoDataFrame().to_data_frame( + answers=[ + feature_to_answer( + FeatureBuilder() + .set_id(ID) + .set_name(FOUND_NAME) + .set_limit(make_limit_rect()) + .build_geocoded() + ) + ], + queries=[RegionQuery(request=FOUND_NAME)], + level_kind=LevelKind.city + ) + assert_row(gdf, names=FOUND_NAME, found_name=FOUND_NAME) def test_geo_limit_response(): @@ -60,8 +84,20 @@ def test_geo_limit_response(): ).build() assert_success_response(response) - data_frame: GeoDataFrame = LimitsGeoDataFrame().to_data_frame(response.features) - assert_geo_limit(data_frame, 0, name=NAME) + data_frame: DataFrame = LimitsGeoDataFrame().to_data_frame( + answers=features_to_answers(response.features), + queries=features_to_queries(response.features), + level_kind=LevelKind.city + ) + assert_row( + df=data_frame, + names=FOUND_NAME, + found_name=FOUND_NAME, + lon_min=GEO_RECT_MIN_LON, + lon_max=GEO_RECT_MAX_LON, + lat_min=GEO_RECT_MIN_LAT, + lat_max=GEO_RECT_MAX_LAT + ) def test_geo_centroid_response(): @@ -75,8 +111,12 @@ def test_geo_centroid_response(): ).build() assert_success_response(response) - data_frame: GeoDataFrame = CentroidsGeoDataFrame().to_data_frame(response.features) - assert_geo_centroid(data_frame, 0, name=NAME) + data_frame: DataFrame = CentroidsGeoDataFrame().to_data_frame( + answers=features_to_answers(response.features), + queries=features_to_queries(response.features), + level_kind=LevelKind.city + ) + assert_geo_centroid(data_frame, 0) def test_geo_boundaries_point_response(): @@ -94,7 +134,11 @@ def test_geo_boundaries_point_response(): ).build() assert_success_response(response) - boundary: GeoDataFrame = BoundariesGeoDataFrame().to_data_frame(response.features) + boundary: DataFrame = BoundariesGeoDataFrame().to_data_frame( + queries=features_to_queries(response.features), + answers=features_to_answers(response.features), + level_kind=LevelKind.city + ) assert_geo_centroid(boundary, index=0, lon=-5, lat=13) assert_geo_centroid(boundary, index=1, lon=7, lat=-3) @@ -131,7 +175,11 @@ def test_geo_boundaries_polygon_response(): ).build() assert_success_response(response) - boundary: GeoDataFrame = BoundariesGeoDataFrame().to_data_frame(response.features) + boundary: DataFrame = BoundariesGeoDataFrame().to_data_frame( + queries=features_to_queries(response.features), + answers=features_to_answers(response.features), + level_kind=LevelKind.city + ) assert_geo_boundary(boundary, index=0, polygon=[[point(1, 2), point(3, 4), point(5, 6)]]) assert_geo_boundary(boundary, index=1, polygon=[[point(11, 11), point(11, 14), point(14, 14), point(14, 11), point(11, 11)], [point(12, 12), point(12, 13), point(13, 13), point(13, 12), point(12, 12)]]) @@ -171,54 +219,36 @@ def test_geo_boundaries_multipolygon_response(): ).build() assert_success_response(response) - boundary: GeoDataFrame = BoundariesGeoDataFrame().to_data_frame(response.features) + boundary: DataFrame = BoundariesGeoDataFrame().to_data_frame( + queries=features_to_queries(response.features), + answers=features_to_answers(response.features), + level_kind=LevelKind.city + ) assert_geo_multiboundary(boundary, index=0, multipolygon=[[r0], [r1, r2]]) assert_geo_multiboundary(boundary, index=1, multipolygon=[[r3]]) -def assert_geo_limit(limit: GeoDataFrame, index: int, name=NAME, found_name=FOUND_NAME): - assert isinstance(limit, GeoDataFrame) - assert_names(limit, index, name, found_name) +def assert_geo_limit(limit: DataFrame, index: int, name=FOUND_NAME, found_name=FOUND_NAME): + assert_row( + df=limit, + index=index, + names=name, + found_name=found_name, + lon_min=GEO_RECT_MIN_LON, + lon_max=GEO_RECT_MAX_LON, + lat_min=GEO_RECT_MIN_LAT, + lat_max=GEO_RECT_MAX_LAT + ) - bounds = limit.geometry[index].bounds - assert GEO_RECT_MIN_LON == bounds[0] - assert GEO_RECT_MIN_LAT == bounds[1] - assert GEO_RECT_MAX_LON == bounds[2] - assert GEO_RECT_MAX_LAT == bounds[3] +def assert_geo_centroid(centroid: DataFrame, index: int, name=FOUND_NAME, found_name=FOUND_NAME, lon=CENTROID_LON, lat=CENTROID_LAT): + assert_row(df=centroid, index=index, names=name, found_name=found_name, lon=lon, lat=lat) -def assert_geo_centroid(centroid: GeoDataFrame, index: int, name=NAME, found_name=FOUND_NAME, lon=CENTROID_LON, lat=CENTROID_LAT): - assert isinstance(centroid, GeoDataFrame) - assert_names(centroid, index, name, found_name) - assert lon == centroid.geometry[index].x - assert lat == centroid.geometry[index].y +def assert_geo_boundary(boundary: DataFrame, index: int, polygon: GJPolygon, name=FOUND_NAME, found_name=FOUND_NAME): + assert_row(df=boundary, index=index, names=name, found_name=found_name, boundary=polygon) -def assert_geo_boundary(boundary: GeoDataFrame, index: int, polygon: GJPolygon, name=NAME, found_name=FOUND_NAME): - assert isinstance(boundary, GeoDataFrame) - assert_names(boundary, index, name, found_name) - assert_geo_polygon(boundary.geometry[index], polygon) +def assert_geo_multiboundary(boundary: DataFrame, index: int, multipolygon: GJMultipolygon, name=FOUND_NAME, found_name=FOUND_NAME): + assert_row(df=boundary, index=index, names=name, found_name=found_name, boundary=multipolygon) -def assert_geo_multiboundary(boundary: GeoDataFrame, index: int, multipolygon: GJMultipolygon, name=NAME, found_name=FOUND_NAME): - assert isinstance(boundary, GeoDataFrame) - assert_names(boundary, index, name, found_name) - assert_geo_multipolygon(boundary.geometry[index], multipolygon) - - -def assert_geo_multipolygon(geo_multipolygon, multipolygon: GJMultipolygon): - for i, geo_polygon in enumerate(geo_multipolygon.geoms): - assert_geo_polygon(geo_polygon, multipolygon[i]) - - -def assert_geo_polygon(geo_polygon, polygon: GJPolygon): - assert_geo_ring(geo_polygon.exterior.coords, polygon[0]) - - for i, interior in enumerate(geo_polygon.interiors): - assert_geo_ring(interior.coords, polygon[1 + i]) - - -def assert_geo_ring(geo_ring, ring: GJRing): - for i, point in enumerate(ring): - assert lon(point) == geo_ring[i][0] - assert lat(point) == geo_ring[i][1] diff --git a/python-package/test/geo_data/test_us48.py b/python-package/test/geo_data/test_us48.py new file mode 100644 index 00000000000..aff57612756 --- /dev/null +++ b/python-package/test/geo_data/test_us48.py @@ -0,0 +1,85 @@ +# Copyright (c) 2020. JetBrains s.r.o. +# Use of this source code is governed by the MIT license that can be found in the LICENSE file. +import lets_plot.geo_data as geodata +from .geo_data import assert_error, assert_row, assert_request_and_found_name_are_equal + + +def test_us48_in_names_with_level(): + us48 = geodata.geocode_states('us-48').get_geocodes() + assert 49 == len(us48) + assert_request_and_found_name_are_equal(us48) + + +def test_us48_in_names_without_level(): + us48 = geodata.geocode(names='us-48').get_geocodes() + assert 49 == len(us48) + assert_request_and_found_name_are_equal(us48) + + +def test_us48_with_extra_names(): + us48 = geodata.geocode(names=['texas', 'us-48', 'nevada']).get_geocodes() + assert 51 == len(us48) + assert_request_and_found_name_are_equal(us48, range(1, 49)) + assert_row(us48, index=0, names='texas', found_name='Texas') + assert_row(us48, index=50, names='nevada', found_name='Nevada') + + +def test_us48_with_extra_and_missing_names(): + us48 = geodata.geocode(names=['texas', 'blahblahblah', 'us-48', 'nevada'])\ + .drop_not_found()\ + .get_geocodes() + + # still 51 - drop missing completley + assert 51 == len(us48) + assert_request_and_found_name_are_equal(us48, range(1, 49)) + assert_row(us48, index=0, names='texas', found_name='Texas') + assert_row(us48, index=50, names='nevada', found_name='Nevada') + + +def test_scope_us_48_with_level(): + # Oslo is a city in Marshall County, Minnesota, United States + # Also Oslo is a capital of Norway + name = 'oslo' + oslo_in_norway = geodata.geocode_cities(name).where(name, scope='norway').get_geocodes() + oslo_in_usa = geodata.geocode_cities(name).where(name, scope='us-48').get_geocodes() + + assert oslo_in_norway.id[0] != oslo_in_usa.id[0] + + +def test_where_scope_us_48_without_level(): + name = 'oslo' + oslo_in_norway = geodata.geocode(names=name).where(name, scope='norway').get_geocodes() + oslo_in_usa = geodata.geocode(names=name).where(name, scope='us-48').get_geocodes() + + assert oslo_in_norway.id[0] != oslo_in_usa.id[0] + + +def test_where_scope_us_48_with_level(): + name = 'oslo' + oslo_in_norway = geodata.geocode_cities(names=name).scope('norway').get_geocodes() + oslo_in_usa = geodata.geocode_cities(names=name).scope('us-48').get_geocodes() + + assert oslo_in_norway.id[0] != oslo_in_usa.id[0] + + +def test_scope_us_48_without_level(): + name = 'oslo' + oslo_in_norway = geodata.geocode(names=name).scope('norway').get_geocodes() + oslo_in_usa = geodata.geocode(names=name).scope('us-48').get_geocodes() + + assert oslo_in_norway.id[0] != oslo_in_usa.id[0] + + +def test_parent_states_us48(): + boston = geodata.geocode_cities('boston').states('us-48').get_geocodes() + + assert_row(boston, names='boston', found_name='Boston') + + +def test_error_us48_in_request_not_available(): + assert_error( + "us-48 can't be used in requests with parents.", + lambda: geodata.geocode_states('us-48').countries('usa').get_geocodes() + ) + + diff --git a/python-package/test/plot/test_util.py b/python-package/test/plot/test_util.py index b304b2376eb..029d6d7dd18 100644 --- a/python-package/test/plot/test_util.py +++ b/python-package/test/plot/test_util.py @@ -25,14 +25,3 @@ def test_as_boolean(val, default, expected): assert util.as_boolean(val, default=default) == expected - -@pytest.mark.parametrize('map_join,expected', [ - ('state', ['state', 'request']), # only data key set - add 'request' - (['state'], ['state', 'request']), # only data key set - add 'request' - (None, None), # without map_join should change nothing - (['state', 'found name'], ['state', 'found name']), # both keys set - do not change - ([None, None], [None, None]), # not sure what will happen later, but map_join_regions should change nothing - ([], []), # not sure what will happen later, but map_join_regions should change nothing -]) -def test_map_join_regions(map_join, expected): - assert util.map_join_regions(map_join) == expected