From 9acb86b269bd61f65c7c014e148b3761017f2e6c Mon Sep 17 00:00:00 2001 From: Rashid Yangazov <129742127+RYangazov@users.noreply.github.com> Date: Fri, 29 Mar 2024 13:00:27 +0100 Subject: [PATCH 1/4] Refactoring of choosing orientation. Move logic into init method of LayerConfig. --- .../auto_orientation_discretes.ipynb | 3413 +++++++++++++++++ docs/f-24b/auto_rotate.ipynb | 891 +++++ future_changes.md | 5 + .../core/spec/config/DataConfigUtil.kt | 22 + .../letsPlot/core/spec/config/LayerConfig.kt | 45 + 5 files changed, 4376 insertions(+) create mode 100644 docs/dev/notebooks/auto_orientation_discretes.ipynb create mode 100644 docs/f-24b/auto_rotate.ipynb diff --git a/docs/dev/notebooks/auto_orientation_discretes.ipynb b/docs/dev/notebooks/auto_orientation_discretes.ipynb new file mode 100644 index 00000000000..5981f0ce050 --- /dev/null +++ b/docs/dev/notebooks/auto_orientation_discretes.ipynb @@ -0,0 +1,3413 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "id": "88b6cfbb", + "metadata": {}, + "outputs": [], + "source": [ + "import numpy as np\n", + "import pandas as pd\n", + "import random\n", + "from lets_plot import *\n", + "from lets_plot.mapping import as_discrete" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "a7fd8774", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "\n", + " \n", + " \n", + " " + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "def dump_plot(plot, display=None):\n", + " import json\n", + "\n", + " try:\n", + " import clipboard\n", + " except:\n", + " clipboard = None\n", + " \n", + " from lets_plot._type_utils import standardize_dict\n", + " \n", + " plot_dict = standardize_dict(plot.as_dict())\n", + " plot_json = json.dumps(plot_dict, indent=2)\n", + " \n", + " if clipboard:\n", + " clipboard.copy('')\n", + " clipboard.copy(str(plot_json))\n", + " else:\n", + " if display is None:\n", + " display = True\n", + " \n", + " if display:\n", + " print(plot_json)\n", + "\n", + " return plot\n", + "\n", + "LetsPlot.setup_html()" + ] + }, + { + "cell_type": "markdown", + "id": "f51ee2cf", + "metadata": {}, + "source": [ + "## Regression Testing\n", + "Regression testing of geometries that may be affected by current changes." + ] + }, + { + "cell_type": "markdown", + "id": "eeb35e19", + "metadata": {}, + "source": [ + "### Scatter Plots" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "2297d2f8", + "metadata": {}, + "outputs": [], + "source": [ + "np.random.seed(42)\n", + "data = dict(\n", + " cond=np.repeat(['A','B'], 10),\n", + " xvar=[i + random.normalvariate(0, 3) for i in range(0,20)],\n", + " yvar=[i + random.normalvariate(0, 3) for i in range(0,20)]\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "bb8a7928", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + " " + ], + "text/plain": [ + "" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "gggrid([\n", + " ggplot(data, aes(x='xvar', y='yvar')) + geom_point(shape=1),\n", + " ggplot(data, aes(x='yvar', y='xvar')) + geom_point(shape=1),\n", + " ggplot(data, aes(x='xvar', y='yvar')) + geom_point(shape=1) + geom_smooth(),\n", + " ggplot(data, aes(x='yvar', y='xvar')) + geom_point(shape=1) + geom_smooth()\n", + "])" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "1d892d2d", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + " " + ], + "text/plain": [ + "" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "gggrid([\n", + " ggplot(data, aes(x='xvar', y='yvar', color='cond')) + geom_point(shape=1) + geom_smooth(se=False),\n", + " ggplot(data, aes(x='yvar', y='xvar', color='cond')) + geom_point(shape=1) + geom_smooth(se=False)\n", + "])" + ] + }, + { + "cell_type": "markdown", + "id": "7531ddf4", + "metadata": {}, + "source": [ + "### Marginal Plots" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "3247f51a", + "metadata": {}, + "outputs": [], + "source": [ + "np.random.seed(42)\n", + "cov0=[[1, -.8], [-.8, 1]] \n", + "cov1=[[ 10, .1], [.1, .1]]\n", + "\n", + "x0, y0 = np.random.multivariate_normal(mean=[-2,0], cov=cov0, size=200).T\n", + "x1, y1 = np.random.multivariate_normal(mean=[0,1], cov=cov1, size=200).T\n", + "\n", + "data = dict(\n", + " x = np.concatenate((x0,x1)),\n", + " y = np.concatenate((y0,y1)),\n", + " c = [\"A\"]*200 + [\"B\"]*200\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "4cbc63a4", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + " " + ], + "text/plain": [ + "" + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "gggrid([\n", + " ggplot(data, aes(\"x\", \"y\")) + geom_density2df(aes(fill=\"..level..\")) + coord_cartesian() \\\n", + " + ggmarginal(\"tr\", layer=geom_area(stat=\"density\")),\n", + " ggplot(data, aes(\"y\", \"x\")) + geom_density2df(aes(fill=\"..level..\")) + coord_cartesian() \\\n", + " + ggmarginal(\"tr\", layer=geom_area(stat=\"density\"))\n", + "])" + ] + }, + { + "cell_type": "markdown", + "id": "15bd6c1d", + "metadata": {}, + "source": [ + "### `coord_flip()`" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "c52de749", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + " " + ], + "text/plain": [ + "" + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "np.random.seed(42)\n", + "n = 10\n", + "x = np.arange(n)\n", + "y = 1 + np.random.randint(5, size=10)\n", + "gggrid([\n", + " ggplot() + geom_bar(aes(x='x', y='y'), data={'x': x, 'y': y}, stat='identity'),\n", + " ggplot() + geom_bar(aes(x='x', y='y'), data={'x': x, 'y': y}, stat='identity') + coord_flip()\n", + "])" + ] + }, + { + "cell_type": "markdown", + "id": "55bb8824", + "metadata": {}, + "source": [ + "### `geom_bar()`" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "id": "959a5ec0", + "metadata": {}, + "outputs": [], + "source": [ + "data = {\n", + " 'code': ['a','b','c','d','e'],\n", + " 'value': [2, 5, 3, 8, -1],\n", + "}" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "id": "1a4077a0", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + " " + ], + "text/plain": [ + "" + ] + }, + "execution_count": 10, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "gggrid([\n", + " ggplot(data, aes('code', 'value')) + geom_bar(stat='identity'),\n", + " ggplot(data, aes('value', 'code')) + geom_bar(orientation='y', stat='identity'),\n", + " ggplot(data, aes('value', 'code')) + geom_bar(stat='identity'),\n", + " ggplot(data) + geom_bar(aes('value', 'code'), stat='identity'),\n", + " ggplot(data, aes('value', as_discrete('code'))) + geom_bar(stat='identity')\n", + "])" + ] + }, + { + "cell_type": "markdown", + "id": "e737e168", + "metadata": {}, + "source": [ + "### `geom_lollipop()`" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "id": "27967418", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + " " + ], + "text/plain": [ + "" + ] + }, + "execution_count": 11, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "gggrid([\n", + " ggplot(data, aes('code', 'value')) + geom_lollipop(stat='identity'),\n", + " ggplot(data, aes('value', 'code')) + geom_lollipop(orientation='y', stat='identity'),\n", + " ggplot(data, aes('value', 'code')) + geom_lollipop(stat='identity'),\n", + " ggplot(data) + geom_lollipop(aes('value', 'code'), stat='identity'),\n", + " ggplot(data, aes('value', as_discrete('code'))) + geom_lollipop(stat='identity') \n", + "])" + ] + }, + { + "cell_type": "markdown", + "id": "e3b7b6b7", + "metadata": {}, + "source": [ + "### `geom_boxplot()`" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "id": "8e934911", + "metadata": {}, + "outputs": [], + "source": [ + "n = 100\n", + "np.random.seed(42)\n", + "data = {\n", + " 'code': np.random.choice(list('abcde'), size=100),\n", + " 'value': np.random.normal(size=100),\n", + " 'value_str': [str(i) for i in np.random.normal(size=100)],\n", + "}" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "id": "df453671", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + " " + ], + "text/plain": [ + "" + ] + }, + "execution_count": 13, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "gggrid([\n", + " ggplot(data, aes('code', 'value')) + geom_boxplot(),\n", + " ggplot(data, aes('value', 'code')) + geom_boxplot(orientation='y'),\n", + " ggplot(data, aes('value', 'code')) + geom_boxplot(),\n", + " ggplot(data) + geom_boxplot(aes('value', 'code')),\n", + " ggplot(data, aes('value', as_discrete('code'))) + geom_boxplot() \n", + "])" + ] + }, + { + "cell_type": "markdown", + "id": "0dacca7a", + "metadata": {}, + "source": [ + "### `geom_violin()`" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "id": "7db1d033", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + " " + ], + "text/plain": [ + "" + ] + }, + "execution_count": 14, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "gggrid([\n", + " ggplot(data, aes('code', 'value')) + geom_violin(),\n", + " ggplot(data, aes('value', 'code')) + geom_violin(orientation='y'),\n", + " ggplot(data, aes('value', 'code')) + geom_violin(),\n", + " ggplot(data) + geom_violin(aes('value', 'code')),\n", + " ggplot(data, aes('value', as_discrete('code'))) + geom_violin() \n", + "])" + ] + }, + { + "cell_type": "markdown", + "id": "e1d07a13", + "metadata": {}, + "source": [ + "### `geom_ydotplot()`" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "id": "5a05e47f", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + " " + ], + "text/plain": [ + "" + ] + }, + "execution_count": 15, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "p = gggrid([\n", + " ggplot(data, aes('code', 'value')) + geom_ydotplot(),\n", + " ggplot(data, aes('value', 'code')) + geom_ydotplot(orientation='y'),\n", + " ggplot(data, aes('value', 'code')) + geom_ydotplot(),\n", + " ggplot(data) + geom_ydotplot(aes('value', 'code')),\n", + " ggplot(data, aes('value', as_discrete('code'))) + geom_ydotplot() \n", + "])\n", + "dump_plot(p)" + ] + }, + { + "cell_type": "markdown", + "id": "632d6de8", + "metadata": {}, + "source": [ + "## Some Special Cases" + ] + }, + { + "cell_type": "markdown", + "id": "92f95198", + "metadata": {}, + "source": [ + "### `geom_smooth()`\n", + "`geom_smooth()` needs `orientation=”y”` when we want to flip axis." + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "id": "bb724b19", + "metadata": {}, + "outputs": [], + "source": [ + "np.random.seed(42)\n", + "n = 100\n", + "x = np.linspace(-2, 2, n)\n", + "y = x ** 2 + np.random.normal(size=n)" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "id": "d5bd40cb", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + " " + ], + "text/plain": [ + "" + ] + }, + "execution_count": 17, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "gggrid([\n", + " ggplot({'x': x, 'y': y}, aes(x='x', y='y')) + geom_point() + geom_smooth(deg=2, se=False),\n", + " ggplot({'x': x, 'y': y}, aes(x='y', y='x')) + geom_point() + geom_smooth(deg=2, se=False),\n", + " ggplot({'x': x, 'y': y}, aes(x='y', y='x')) + geom_point() + geom_smooth(deg=2, se=False, orientation=\"y\"),\n", + "])" + ] + }, + { + "cell_type": "markdown", + "id": "aa2e0c86", + "metadata": {}, + "source": [ + "### Few Datasets" + ] + }, + { + "cell_type": "markdown", + "id": "bbc0359b", + "metadata": {}, + "source": [ + "If we have some dataset in the `ggplot()` context, then we can use data from it in `geom_boxplot()`. And, in this case, we can use variable names from the `ggplot()` context even if we set another context in `geom_boxplot()`." + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "id": "baf88e20", + "metadata": {}, + "outputs": [], + "source": [ + "np.random.seed(42)\n", + "data = {\n", + " 'code': np.random.choice(list('abcde'), size=100),\n", + " 'value': np.random.normal(size=100),\n", + " 'value_str': [str(i) for i in np.random.normal(size=100)],\n", + "}\n", + "data_num = {\n", + " 'number': np.random.choice(list('qwxyz'), size=100),\n", + " 'volume': np.random.normal(size=100),\n", + "}" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "id": "956932a3", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + " " + ], + "text/plain": [ + "" + ] + }, + "execution_count": 19, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "gggrid([\n", + " ggplot(data, aes('value', 'code')) + geom_boxplot(aes('number', 'volume'), data = data_num),\n", + " ggplot(data, aes('value', 'code')) + geom_boxplot(aes('value', 'code'), data = data_num)\n", + "])" + ] + }, + { + "cell_type": "markdown", + "id": "b9a2bded", + "metadata": {}, + "source": [ + "### `stat_summary(geom='crossbar')`" + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "id": "f0976fe8", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + " " + ], + "text/plain": [ + "" + ] + }, + "execution_count": 20, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "gggrid([\n", + " ggplot(data, aes('code', 'value')) + stat_summary(geom='crossbar'),\n", + " ggplot(data, aes('value', 'code')) + stat_summary(geom='crossbar') \n", + "])" + ] + }, + { + "cell_type": "markdown", + "id": "f949d6de", + "metadata": {}, + "source": [ + "### Discrete Both Axes" + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "id": "0aba6fbb", + "metadata": {}, + "outputs": [], + "source": [ + "x = ['a', 'a', 'b', 'b']\n", + "y = ['x','x', 'x', 'y']" + ] + }, + { + "cell_type": "markdown", + "id": "fa99080f", + "metadata": {}, + "source": [ + "For `stat_sum()` and `geom_point()`, rotation does not work for both discrete axes." + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "id": "14e36676", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + " " + ], + "text/plain": [ + "" + ] + }, + "execution_count": 22, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "gggrid([\n", + " ggplot({'x': x, 'y': y}, aes(x='x', y='y')) + stat_sum(),\n", + " ggplot({'x': x, 'y': y}, aes(x='x', y='y')) + geom_point(orientation=\"y\")\n", + "])" + ] + }, + { + "cell_type": "markdown", + "id": "5a761ca2", + "metadata": {}, + "source": [ + "The lollipop can be drawing when both axes are discrete. But this case is not usual for a lollipop." + ] + }, + { + "cell_type": "code", + "execution_count": 23, + "id": "f5ccf129", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + " " + ], + "text/plain": [ + "" + ] + }, + "execution_count": 23, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "x = ['a', 'a', 'a', 'b', 'b']\n", + "y = ['x','x', 'x', 'y', 'y']\n", + "gggrid([\n", + " ggplot({'x': x, 'y': y}, aes(x='x', y='y')) + geom_lollipop(stat='count'),\n", + " ggplot({'x': x, 'y': y}, aes(x='x')) + geom_lollipop(stat='count'),\n", + " ggplot({'x': x, 'y': y}, aes(y='y')) + geom_lollipop(stat='count'), \n", + " ggplot({'x': x, 'y': y}, aes(y='y')) + geom_lollipop(stat='count', orientation='x'), \n", + " ggplot({'x': x, 'y': y}, aes(x='x', y='y')) + geom_lollipop(stat='count', orientation='y') \n", + "])" + ] + }, + { + "cell_type": "markdown", + "id": "c5b003f3", + "metadata": {}, + "source": [ + "### `stat='boxplot'`" + ] + }, + { + "cell_type": "code", + "execution_count": 24, + "id": "70805e17", + "metadata": {}, + "outputs": [], + "source": [ + "n = 50\n", + "np.random.seed(42)\n", + "x = np.random.uniform(size=n)\n", + "c = np.random.choice(['a', 'b', 'c'], size=n)" + ] + }, + { + "cell_type": "code", + "execution_count": 25, + "id": "e27e1ed0", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + " " + ], + "text/plain": [ + "" + ] + }, + "execution_count": 25, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "p1 = ggplot({'x': x, 'c': c}) + \\\n", + " geom_crossbar(aes(x='c', y='x', color='c'), \\\n", + " stat='boxplot') + \\\n", + " geom_point(aes(x='c', y='x', color='c'), \\\n", + " size=4, shape=21, fill='white',\n", + " position=position_jitterdodge(seed=42))\n", + "p2 = ggplot({'x': x, 'c': c}) + \\\n", + " geom_crossbar(aes(x='x', y='c', color='c'), \\\n", + " stat='boxplot') + \\\n", + " geom_point(aes(x='x', y='c', color='c'), \\\n", + " size=4, shape=21, fill='white',\n", + " position=position_jitterdodge(seed=42))\n", + "gggrid([p1, p2])" + ] + }, + { + "cell_type": "code", + "execution_count": 26, + "id": "0986ea7d", + "metadata": {}, + "outputs": [], + "source": [ + "n = 100\n", + "np.random.seed(42)\n", + "x = np.random.choice(['a', 'b', 'c'], size=n)\n", + "y = np.random.normal(size=n)" + ] + }, + { + "cell_type": "code", + "execution_count": 27, + "id": "e687f5c9", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + " " + ], + "text/plain": [ + "" + ] + }, + "execution_count": 27, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "gggrid([\n", + " ggplot({'x': x, 'y': y}, aes(x='x', y='y')) + geom_pointrange(stat='boxplot'),\n", + " ggplot({'x': x, 'y': y}, aes(x='y', y='x')) + geom_pointrange(stat='boxplot')\n", + "])" + ] + }, + { + "cell_type": "markdown", + "id": "6604561b", + "metadata": {}, + "source": [ + "### `stat='boxplot_outlier'`" + ] + }, + { + "cell_type": "code", + "execution_count": 28, + "id": "c7b7a899", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + " " + ], + "text/plain": [ + "" + ] + }, + "execution_count": 28, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "gggrid([\n", + " ggplot({'x': x, 'y': y}, aes(x='x', y='y')) + geom_pointrange(stat='boxplot_outlier'),\n", + " ggplot({'x': x, 'y': y}, aes(x='y', y='x')) + geom_pointrange(stat='boxplot_outlier')\n", + "])" + ] + }, + { + "cell_type": "markdown", + "id": "57f10f16", + "metadata": {}, + "source": [ + "### `stat_summary()`" + ] + }, + { + "cell_type": "code", + "execution_count": 29, + "id": "6bdc4826", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + " " + ], + "text/plain": [ + "" + ] + }, + "execution_count": 29, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "p1 = ggplot({'x': x, 'y': y}, aes(x='x', y='y')) + \\\n", + " stat_summary(position=position_nudge(x=-.1), color=\"red\") + \\\n", + " stat_summary(fun='mq', fun_min='lq', fun_max='uq', quantiles=[.1, .5, .9], \\\n", + " position=position_nudge(x=.1), color=\"blue\")\n", + "p2 = p=ggplot({'x': x, 'y': y}, aes(x='y', y='x')) + \\\n", + " stat_summary(position=position_nudge(x=-.1), color=\"red\") + \\\n", + " stat_summary(fun='mq', fun_min='lq', fun_max='uq', quantiles=[.1, .5, .9], \\\n", + " position=position_nudge(x=.1), color=\"blue\")\n", + "gggrid([p1, p2])" + ] + }, + { + "cell_type": "markdown", + "id": "a44e0244", + "metadata": {}, + "source": [ + "### `stat='ydensity'`" + ] + }, + { + "cell_type": "code", + "execution_count": 30, + "id": "3248a6b1", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + " " + ], + "text/plain": [ + "" + ] + }, + "execution_count": 30, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "gggrid([ \n", + " ggplot({'x': x, 'y': y}, aes('x', 'y')) + geom_crossbar(aes(ymin='..y..', ymax='..y..', width='..violinwidth..'), stat='ydensity'),\n", + " ggplot({'x': x, 'y': y}, aes('y', 'x')) + geom_crossbar(aes(ymin='..y..', ymax='..y..', width='..violinwidth..'), stat='ydensity', orientation='y'),\n", + " ggplot({'x': x, 'y': y}, aes('y', 'x')) + geom_crossbar(aes(ymin='..y..', ymax='..y..', width='..violinwidth..'), stat='ydensity')\n", + "])" + ] + }, + { + "cell_type": "markdown", + "id": "5296f46a", + "metadata": {}, + "source": [ + "### `stat='ydotplot'`" + ] + }, + { + "cell_type": "code", + "execution_count": 31, + "id": "9d820d5e", + "metadata": {}, + "outputs": [], + "source": [ + "n = 100\n", + "np.random.seed(42)\n", + "x = np.random.choice(['a'], size=n)\n", + "y = np.random.normal(size=n)" + ] + }, + { + "cell_type": "code", + "execution_count": 32, + "id": "08d6ea01", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + " " + ], + "text/plain": [ + "" + ] + }, + "execution_count": 32, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "gggrid([ \n", + " ggplot({'x': x, 'y': y}, aes(x='x', y='y')) + geom_crossbar(aes(ymin='..y..', ymax='..y..', width='..count..'), stat='ydotplot'),\n", + " ggplot({'x': x, 'y': y}, aes(x='y', y='x')) + geom_crossbar(aes(ymin='..y..', ymax='..y..', width='..count..'), stat='ydotplot', orientation='y'),\n", + " ggplot({'x': x, 'y': y}, aes(x='y', y='x')) + geom_crossbar(aes(ymin='..y..', ymax='..y..', width='..count..'), stat='ydotplot')\n", + "])" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.11" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/docs/f-24b/auto_rotate.ipynb b/docs/f-24b/auto_rotate.ipynb new file mode 100644 index 00000000000..c517f0a8b89 --- /dev/null +++ b/docs/f-24b/auto_rotate.ipynb @@ -0,0 +1,891 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "e00d3551", + "metadata": {}, + "source": [ + "# Automatically Choose `orientation=\"y\"` When Aes Y Is Discrete" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "ff95f57a", + "metadata": {}, + "outputs": [], + "source": [ + "import pandas as pd\n", + "\n", + "from lets_plot import *\n", + "from lets_plot.mapping import as_discrete" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "eae93dff", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "\n", + " \n", + " \n", + " " + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "LetsPlot.setup_html()" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "26cdc00e", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
countrycontinentyearlifeExppopgdpPercap
0AfghanistanAsia195228.8018425333779.445314
1AfghanistanAsia195730.3329240934820.853030
2AfghanistanAsia196231.99710267083853.100710
\n", + "
" + ], + "text/plain": [ + " country continent year lifeExp pop gdpPercap\n", + "0 Afghanistan Asia 1952 28.801 8425333 779.445314\n", + "1 Afghanistan Asia 1957 30.332 9240934 820.853030\n", + "2 Afghanistan Asia 1962 31.997 10267083 853.100710" + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df = pd.read_csv('https://raw.githubusercontent.com/JetBrains/lets-plot-docs/master/data/gapminder.csv')\n", + "albania_df = df.loc[df['country'] == 'Albania']\n", + "df.head(3)" + ] + }, + { + "cell_type": "markdown", + "id": "624bf284", + "metadata": {}, + "source": [ + "#### 1. How Does Automatic Setting `orientation=\"y\"` Work" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "f1acbed4", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + " " + ], + "text/plain": [ + "" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "gggrid([\n", + " ggplot(df, aes('continent', 'pop', fill='continent')) + geom_bar() + ggtitle('Default'),\n", + " ggplot(df, aes('pop', 'continent', fill='continent')) + geom_bar() + ggtitle('Automatic setting orientation=\"y\"')\n", + "], ncol=2)" + ] + }, + { + "cell_type": "markdown", + "id": "91589cbd", + "metadata": {}, + "source": [ + "#### 2. Marking by `as_discrete()`" + ] + }, + { + "cell_type": "markdown", + "id": "6fa6b5f2", + "metadata": {}, + "source": [ + "If you're using continuous data for the Y-axis, the plot may be incorrect. Try marking the Y-axis data with the `as_discrete()` function, this will rotate your geom." + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "c28918fd", + "metadata": { + "scrolled": false + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + " " + ], + "text/plain": [ + "" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "gggrid([\n", + " ggplot(albania_df) + geom_bar(aes('lifeExp', 'year'), stat='sum', size=0),\n", + " ggplot(albania_df) + geom_bar(aes('lifeExp', as_discrete('year')), stat='sum', size=0)\n", + "], ncol=2)" + ] + }, + { + "cell_type": "markdown", + "id": "47a2e97a", + "metadata": {}, + "source": [ + "#### 3. Other Kinds of Geoms Support Automatic Choosing of Orientation" + ] + }, + { + "cell_type": "markdown", + "id": "bbd7db96", + "metadata": {}, + "source": [ + "Such manipulations with automatic rotation can be done for geoms `geom_boxplot()`, `geom_violin()`, `geom_bar()` and `geom_lollipop()`." + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "593c52c6", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + " " + ], + "text/plain": [ + "" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "gggrid([\n", + " ggplot(df, aes('continent', 'lifeExp', fill='continent')) + geom_boxplot(),\n", + " ggplot(df, aes('lifeExp', 'continent', fill='continent')) + geom_boxplot(),\n", + " ggplot(df, aes('continent', 'lifeExp', fill='continent')) + geom_violin(),\n", + " ggplot(df, aes('lifeExp', 'continent', fill='continent')) + geom_violin(),\n", + " ggplot(albania_df, aes(as_discrete('year'), 'lifeExp')) + geom_lollipop(stat='sum', size=2),\n", + " ggplot(albania_df, aes('lifeExp', as_discrete('year'))) + geom_lollipop(stat='sum', size=2),\n", + "], ncol=2)" + ] + }, + { + "cell_type": "markdown", + "id": "8867feb1", + "metadata": {}, + "source": [ + "#### 4. `stat='boxplot'`, `stat='boxplot_outlier'`, `stat_summary()`" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "13f2b318", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + " " + ], + "text/plain": [ + "" + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "gggrid([\n", + " ggplot(df, aes('continent', 'lifeExp')) + geom_pointrange(stat='boxplot'),\n", + " ggplot(df, aes('lifeExp', 'continent')) + geom_pointrange(stat='boxplot'),\n", + " ggplot(df, aes('continent', 'lifeExp')) + geom_pointrange(stat='boxplot_outlier'),\n", + " ggplot(df, aes('lifeExp', 'continent')) + geom_pointrange(stat='boxplot_outlier'),\n", + " ggplot(df, aes('continent', 'lifeExp')) + stat_summary(),\n", + " ggplot(df, aes('lifeExp', 'continent')) + stat_summary()\n", + "], ncol=2)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.11" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/future_changes.md b/future_changes.md index eea3b4e381e..a18a0076a5f 100644 --- a/future_changes.md +++ b/future_changes.md @@ -17,6 +17,11 @@ See: [example notebook](https://nbviewer.org/github/JetBrains/lets-plot/blob/master/docs/f-24b/param_size_unit.ipynb). +- Automatically choose `orientation="y"` when aes y is discrete [[#558](https://github.com/JetBrains/lets-plot/issues/558)]. + + See: [example notebook](https://nbviewer.org/github/JetBrains/lets-plot/blob/master/docs/f-24b/auto_rotate.ipynb). + + ### Changed ### Fixed diff --git a/plot-stem/src/commonMain/kotlin/org/jetbrains/letsPlot/core/spec/config/DataConfigUtil.kt b/plot-stem/src/commonMain/kotlin/org/jetbrains/letsPlot/core/spec/config/DataConfigUtil.kt index e6c88debfa3..6f636701ab3 100644 --- a/plot-stem/src/commonMain/kotlin/org/jetbrains/letsPlot/core/spec/config/DataConfigUtil.kt +++ b/plot-stem/src/commonMain/kotlin/org/jetbrains/letsPlot/core/spec/config/DataConfigUtil.kt @@ -135,6 +135,28 @@ internal object DataConfigUtil { ) } + fun isAesDiscrete( + aes: Aes<*>, + sharedData: DataFrame, + layerData: DataFrame, + sharedMappings: Map, + layerMappings: Map, + combinedDiscreteMappings: Map + ): Boolean { + // Checking if the y-axis mark as_discrete + if (combinedDiscreteMappings.containsKey(aes.name)) return true + + // Checking if the y-axis is discrete + val aesName = layerMappings[aes.name] ?: sharedMappings[aes.name] ?: return false + // The logic of choosing the data frame is the same as in the layerMappingsAndCombinedData function + if (DataFrameUtil.hasVariable(layerData, aesName) + && DataFrameUtil.findVariableOrFail(layerData, aesName).let(layerData::isDiscrete) + ) return true + + return (DataFrameUtil.hasVariable(sharedData, aesName) + && DataFrameUtil.findVariableOrFail(sharedData, aesName).let(sharedData::isDiscrete)) + } + fun combinedDataWithDataMeta( rawCombinedData: DataFrame, varBindings: List, diff --git a/plot-stem/src/commonMain/kotlin/org/jetbrains/letsPlot/core/spec/config/LayerConfig.kt b/plot-stem/src/commonMain/kotlin/org/jetbrains/letsPlot/core/spec/config/LayerConfig.kt index 0d54204cd33..b63112ae7de 100644 --- a/plot-stem/src/commonMain/kotlin/org/jetbrains/letsPlot/core/spec/config/LayerConfig.kt +++ b/plot-stem/src/commonMain/kotlin/org/jetbrains/letsPlot/core/spec/config/LayerConfig.kt @@ -157,6 +157,26 @@ class LayerConfig( ownDiscreteAes = DataMetaUtil.getAsDiscreteAesSet(getMap(DATA_META)) ) + if (!clientSide + && isOrientationApplicable() + && !DataConfigUtil.isAesDiscrete( + Aes.X, + plotData, + ownData, + plotMappings, + layerMappings, + combinedDiscreteMappings + ) + && DataConfigUtil.isAesDiscrete( + Aes.Y, + plotData, + ownData, + plotMappings, + layerMappings, + combinedDiscreteMappings + ) + ) setOrientationY() + val consumedAesSet: Set> = renderedAes.toSet().let { when (clientSide) { true -> it @@ -279,6 +299,31 @@ class LayerConfig( combinedDataValid = false } + private fun isOrientationApplicable(): Boolean { + val isSuitableGeomKind = geomProto.geomKind in listOf( + GeomKind.BAR, + GeomKind.BOX_PLOT, + GeomKind.VIOLIN, + GeomKind.LOLLIPOP, + GeomKind.Y_DOT_PLOT + ) + val isSuitableStatKind = statKind in listOf( + StatKind.COUNT, + StatKind.SUMMARY, + StatKind.BOXPLOT, + StatKind.BOXPLOT_OUTLIER, + StatKind.YDOTPLOT, + StatKind.YDENSITY + ) + + return isSuitableGeomKind || isSuitableStatKind + } + + private fun setOrientationY() { + check(!clientSide) + update(ORIENTATION, "y") + } + fun hasExplicitGrouping(): Boolean { return explicitGroupingVarName != null } From 1871f191312f59307975108195a28dbe17e4859f Mon Sep 17 00:00:00 2001 From: Rashid Yangazov <129742127+RYangazov@users.noreply.github.com> Date: Mon, 1 Apr 2024 16:25:39 +0200 Subject: [PATCH 2/4] Refactor isAesDiscrete. Add unit-tests. --- .../auto_orientation_discretes.ipynb | 170 +++++++------- docs/f-24b/auto_rotate.ipynb | 26 +-- .../core/plot/base/data/DataFrameUtil.kt | 5 + .../core/spec/config/DataConfigUtil.kt | 24 +- .../letsPlot/core/spec/config/LayerConfig.kt | 4 +- .../core/spec/config/IsAesDiscreteTest.kt | 216 ++++++++++++++++++ 6 files changed, 337 insertions(+), 108 deletions(-) create mode 100644 plot-stem/src/jvmTest/kotlin/org/jetbrains/letsPlot/core/spec/config/IsAesDiscreteTest.kt diff --git a/docs/dev/notebooks/auto_orientation_discretes.ipynb b/docs/dev/notebooks/auto_orientation_discretes.ipynb index 5981f0ce050..dd26320a9fb 100644 --- a/docs/dev/notebooks/auto_orientation_discretes.ipynb +++ b/docs/dev/notebooks/auto_orientation_discretes.ipynb @@ -27,7 +27,7 @@ " \n", " \n", @@ -108,7 +108,7 @@ { "data": { "text/html": [ - "
\n", + "
\n", " " ], "text/plain": [ - "" + "" ] }, "execution_count": 4, @@ -274,7 +274,7 @@ { "data": { "text/html": [ - "
\n", + "
\n", " " ], "text/plain": [ - "" + "" ] }, "execution_count": 5, @@ -423,7 +423,7 @@ { "data": { "text/html": [ - "
\n", + "
\n", " " ], "text/plain": [ - "" + "" ] }, "execution_count": 7, @@ -595,7 +595,7 @@ { "data": { "text/html": [ - "
\n", + "
\n", " " ], "text/plain": [ - "" + "" ] }, "execution_count": 8, @@ -712,7 +712,7 @@ { "data": { "text/html": [ - "
\n", + "
\n", " " ], "text/plain": [ - "" + "" ] }, "execution_count": 10, @@ -898,7 +898,7 @@ { "data": { "text/html": [ - "
\n", + "
\n", " " ], "text/plain": [ - "" + "" ] }, "execution_count": 11, @@ -1100,7 +1100,7 @@ { "data": { "text/html": [ - "
\n", + "
\n", " " ], "text/plain": [ - "" + "" ] }, "execution_count": 13, @@ -1392,7 +1392,7 @@ { "data": { "text/html": [ - "
\n", + "
\n", " " ], "text/plain": [ - "" + "" ] }, "execution_count": 14, @@ -1583,7 +1583,7 @@ { "data": { "text/html": [ - "
\n", + "
\n", " " ], "text/plain": [ - "" + "" ] }, "execution_count": 15, @@ -1797,7 +1797,7 @@ { "data": { "text/html": [ - "
\n", + "
\n", " " ], "text/plain": [ - "" + "" ] }, "execution_count": 17, @@ -1983,7 +1983,7 @@ { "data": { "text/html": [ - "
\n", + "
\n", " " ], "text/plain": [ - "" + "" ] }, "execution_count": 19, @@ -2133,7 +2133,7 @@ { "data": { "text/html": [ - "
\n", + "
\n", " " ], "text/plain": [ - "" + "" ] }, "execution_count": 20, @@ -2257,7 +2257,7 @@ { "data": { "text/html": [ - "
\n", + "
\n", " " ], "text/plain": [ - "" + "" ] }, "execution_count": 22, @@ -2358,7 +2358,7 @@ { "data": { "text/html": [ - "
\n", + "
\n", " " ], "text/plain": [ - "" + "" ] }, "execution_count": 23, @@ -2548,7 +2548,7 @@ { "data": { "text/html": [ - "
\n", + "
\n", " " ], "text/plain": [ - "" + "" ] }, "execution_count": 25, @@ -2715,7 +2715,7 @@ { "data": { "text/html": [ - "
\n", + "
\n", " " ], "text/plain": [ - "" + "" ] }, "execution_count": 27, @@ -2826,7 +2826,7 @@ { "data": { "text/html": [ - "
\n", + "
\n", " " ], "text/plain": [ - "" + "" ] }, "execution_count": 28, @@ -2937,7 +2937,7 @@ { "data": { "text/html": [ - "
\n", + "
\n", " " ], "text/plain": [ - "" + "" ] }, "execution_count": 29, @@ -3102,7 +3102,7 @@ { "data": { "text/html": [ - "
\n", + "
\n", " " ], "text/plain": [ - "" + "" ] }, "execution_count": 30, @@ -3260,7 +3260,7 @@ { "data": { "text/html": [ - "
\n", + "
\n", " " ], "text/plain": [ - "" + "" ] }, "execution_count": 32, diff --git a/docs/f-24b/auto_rotate.ipynb b/docs/f-24b/auto_rotate.ipynb index c517f0a8b89..1cf69a15caa 100644 --- a/docs/f-24b/auto_rotate.ipynb +++ b/docs/f-24b/auto_rotate.ipynb @@ -34,7 +34,7 @@ " \n", " \n", @@ -151,7 +151,7 @@ { "data": { "text/html": [ - "
\n", + "
\n", " " ], "text/plain": [ - "" + "" ] }, "execution_count": 4, @@ -268,7 +268,7 @@ { "data": { "text/html": [ - "
\n", + "
\n", " " ], "text/plain": [ - "" + "" ] }, "execution_count": 5, @@ -388,7 +388,7 @@ { "data": { "text/html": [ - "
\n", + "
\n", " " ], "text/plain": [ - "" + "" ] }, "execution_count": 6, @@ -656,7 +656,7 @@ { "data": { "text/html": [ - "
\n", + "
\n", " " ], "text/plain": [ - "" + "" ] }, "execution_count": 7, diff --git a/plot-base/src/commonMain/kotlin/org/jetbrains/letsPlot/core/plot/base/data/DataFrameUtil.kt b/plot-base/src/commonMain/kotlin/org/jetbrains/letsPlot/core/plot/base/data/DataFrameUtil.kt index a697414966d..048d55507a0 100644 --- a/plot-base/src/commonMain/kotlin/org/jetbrains/letsPlot/core/plot/base/data/DataFrameUtil.kt +++ b/plot-base/src/commonMain/kotlin/org/jetbrains/letsPlot/core/plot/base/data/DataFrameUtil.kt @@ -55,6 +55,11 @@ object DataFrameUtil { ) } + fun findVariableOrNull(data: DataFrame, varName: String): DataFrame.Variable? { + if (!hasVariable(data, varName)) return null + return findVariableOrFail(data, varName) + } + fun isNumeric(data: DataFrame, varName: String): Boolean { return data.isNumeric(findVariableOrFail(data, varName)) } diff --git a/plot-stem/src/commonMain/kotlin/org/jetbrains/letsPlot/core/spec/config/DataConfigUtil.kt b/plot-stem/src/commonMain/kotlin/org/jetbrains/letsPlot/core/spec/config/DataConfigUtil.kt index 6f636701ab3..3e3bcf3f85f 100644 --- a/plot-stem/src/commonMain/kotlin/org/jetbrains/letsPlot/core/spec/config/DataConfigUtil.kt +++ b/plot-stem/src/commonMain/kotlin/org/jetbrains/letsPlot/core/spec/config/DataConfigUtil.kt @@ -143,18 +143,24 @@ internal object DataConfigUtil { layerMappings: Map, combinedDiscreteMappings: Map ): Boolean { - // Checking if the y-axis mark as_discrete + // Check if the aes is marked with as_discrete() if (combinedDiscreteMappings.containsKey(aes.name)) return true - // Checking if the y-axis is discrete - val aesName = layerMappings[aes.name] ?: sharedMappings[aes.name] ?: return false - // The logic of choosing the data frame is the same as in the layerMappingsAndCombinedData function - if (DataFrameUtil.hasVariable(layerData, aesName) - && DataFrameUtil.findVariableOrFail(layerData, aesName).let(layerData::isDiscrete) - ) return true + // Check if the aes is discrete. + val varName = layerMappings[aes.name] ?: sharedMappings[aes.name] ?: return false + // The DataFrame selection logic is identical to that of the layerMappingsAndCombinedData() function. + val layerVar = DataFrameUtil.findVariableOrNull(layerData, varName) + val sharedVar = DataFrameUtil.findVariableOrNull(sharedData, varName) - return (DataFrameUtil.hasVariable(sharedData, aesName) - && DataFrameUtil.findVariableOrFail(sharedData, aesName).let(sharedData::isDiscrete)) + if (layerVar != null) { + return layerData.isDiscrete(layerVar) + } + + if (sharedVar != null) { + return sharedData.isDiscrete(sharedVar) + } + + return false } fun combinedDataWithDataMeta( diff --git a/plot-stem/src/commonMain/kotlin/org/jetbrains/letsPlot/core/spec/config/LayerConfig.kt b/plot-stem/src/commonMain/kotlin/org/jetbrains/letsPlot/core/spec/config/LayerConfig.kt index b63112ae7de..d134a003785 100644 --- a/plot-stem/src/commonMain/kotlin/org/jetbrains/letsPlot/core/spec/config/LayerConfig.kt +++ b/plot-stem/src/commonMain/kotlin/org/jetbrains/letsPlot/core/spec/config/LayerConfig.kt @@ -175,7 +175,9 @@ class LayerConfig( layerMappings, combinedDiscreteMappings ) - ) setOrientationY() + ) { + setOrientationY() + } val consumedAesSet: Set> = renderedAes.toSet().let { when (clientSide) { diff --git a/plot-stem/src/jvmTest/kotlin/org/jetbrains/letsPlot/core/spec/config/IsAesDiscreteTest.kt b/plot-stem/src/jvmTest/kotlin/org/jetbrains/letsPlot/core/spec/config/IsAesDiscreteTest.kt new file mode 100644 index 00000000000..2575fe3d312 --- /dev/null +++ b/plot-stem/src/jvmTest/kotlin/org/jetbrains/letsPlot/core/spec/config/IsAesDiscreteTest.kt @@ -0,0 +1,216 @@ +/* + * Copyright (c) 2024. JetBrains s.r.o. + * Use of this source code is governed by the MIT license that can be found in the LICENSE file. + */ + +package org.jetbrains.letsPlot.core.spec.config + +import org.jetbrains.letsPlot.core.plot.base.Aes +import org.jetbrains.letsPlot.core.plot.base.data.DataFrameUtil +import kotlin.test.* + +class IsAesDiscreteTest { + private val data = mapOf( + "code" to listOf("a", "b", "c"), + "code_num" to listOf(4.0, 5.0, 6.0), + "value" to listOf(1.0, -5.0, 6.0) + ) + + private val dataInv = mapOf( + "code" to listOf(2.0, 3.0, 7.0), + "value" to listOf("f", "g", "h") + ) + + @Test + fun `data and mapping in plot`() { + val plotData = DataFrameUtil.fromMap(data) + val layerData = DataFrameUtil.fromMap(emptyMap()) + val plotMapping = mapOf( + "x" to "code", + "y" to "value" + ) + val layerMapping = emptyMap() + val asDiscreteMapping = emptyMap() + + val isXDiscrete = DataConfigUtil.isAesDiscrete( + Aes.X, + plotData, + layerData, + plotMapping, + layerMapping, + asDiscreteMapping + ) + val isYDiscrete = DataConfigUtil.isAesDiscrete( + Aes.Y, + plotData, + layerData, + plotMapping, + layerMapping, + asDiscreteMapping + ) + + assertTrue(isXDiscrete) + assertFalse(isYDiscrete) + } + + @Test + fun `data and mapping in layer`(){ + val plotData = DataFrameUtil.fromMap(emptyMap()) + val layerData = DataFrameUtil.fromMap(data) + val plotMapping = emptyMap() + val layerMapping = mapOf( + "x" to "code", + "y" to "value" + ) + val asDiscreteMapping = emptyMap() + + val isXDiscrete = DataConfigUtil.isAesDiscrete( + Aes.X, + plotData, + layerData, + plotMapping, + layerMapping, + asDiscreteMapping + ) + val isYDiscrete = DataConfigUtil.isAesDiscrete( + Aes.Y, + plotData, + layerData, + plotMapping, + layerMapping, + asDiscreteMapping + ) + + assertTrue(isXDiscrete) + assertFalse(isYDiscrete) + } + + @Test + fun `data in plot but mapping in layer`(){ + val plotData = DataFrameUtil.fromMap(data) + val layerData = DataFrameUtil.fromMap(emptyMap()) + val plotMapping = emptyMap() + val layerMapping = mapOf( + "x" to "code", + "y" to "value" + ) + val asDiscreteMapping = emptyMap() + + val isXDiscrete = DataConfigUtil.isAesDiscrete( + Aes.X, + plotData, + layerData, + plotMapping, + layerMapping, + asDiscreteMapping + ) + val isYDiscrete = DataConfigUtil.isAesDiscrete( + Aes.Y, + plotData, + layerData, + plotMapping, + layerMapping, + asDiscreteMapping + ) + + assertTrue(isXDiscrete) + assertFalse(isYDiscrete) + } + + + @Test + fun `aes use as_discrete()`() { + val plotData = DataFrameUtil.fromMap(data) + val layerData = DataFrameUtil.fromMap(emptyMap()) + val plotMapping = mapOf( + "x" to "code", + "y" to "value" + ) + val layerMapping = emptyMap() + val asDiscreteMapping = mapOf( + "x" to "code", + "y" to "value" + ) + + val isXDiscrete = DataConfigUtil.isAesDiscrete( + Aes.X, + plotData, + layerData, + plotMapping, + layerMapping, + asDiscreteMapping + ) + val isYDiscrete = DataConfigUtil.isAesDiscrete( + Aes.Y, + plotData, + layerData, + plotMapping, + layerMapping, + asDiscreteMapping + ) + + assertTrue(isXDiscrete) + assertTrue(isYDiscrete) + } + + @Test + fun `data and mapping in plot with layer`() { + val plotData = DataFrameUtil.fromMap(data) + val layerData = DataFrameUtil.fromMap(emptyMap()) + val plotMapping = mapOf( + "x" to "code", + "y" to "value" + ) + val layerMapping = mapOf( + "x" to "code_num" + ) + val asDiscreteMapping = emptyMap() + + val isXDiscrete = DataConfigUtil.isAesDiscrete( + Aes.X, + plotData, + layerData, + plotMapping, + layerMapping, + asDiscreteMapping + ) + assertFalse(isXDiscrete) + } + + @Test + fun `different data in plot and layer`() { + val plotData = DataFrameUtil.fromMap(data) + val layerData = DataFrameUtil.fromMap(dataInv) + val plotMapping = mapOf( + "x" to "code", + "y" to "value" + ) + val layerMapping = mapOf( + "x" to "code", + "y" to "value" + ) + val asDiscreteMapping = emptyMap() + + val isXDiscrete = DataConfigUtil.isAesDiscrete( + Aes.X, + plotData, + layerData, + plotMapping, + layerMapping, + asDiscreteMapping + ) + val isYDiscrete = DataConfigUtil.isAesDiscrete( + Aes.Y, + plotData, + layerData, + plotMapping, + layerMapping, + asDiscreteMapping + ) + + assertFalse(isXDiscrete) + assertTrue(isYDiscrete) + } + + +} \ No newline at end of file From 67bb1fad67d78e79c173c27372e908256bbd3ba8 Mon Sep 17 00:00:00 2001 From: Rashid Yangazov <129742127+RYangazov@users.noreply.github.com> Date: Thu, 4 Apr 2024 12:19:01 +0200 Subject: [PATCH 3/4] Disable automatic orientation selection if it is specified by the user. --- ...rientation_discretes_with_user_input.ipynb | 888 ++++++++++++++++++ .../letsPlot/core/spec/config/LayerConfig.kt | 1 + 2 files changed, 889 insertions(+) create mode 100644 docs/dev/notebooks/auto_orientation_discretes_with_user_input.ipynb diff --git a/docs/dev/notebooks/auto_orientation_discretes_with_user_input.ipynb b/docs/dev/notebooks/auto_orientation_discretes_with_user_input.ipynb new file mode 100644 index 00000000000..076cf40fc8d --- /dev/null +++ b/docs/dev/notebooks/auto_orientation_discretes_with_user_input.ipynb @@ -0,0 +1,888 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "id": "b0453a86", + "metadata": {}, + "outputs": [], + "source": [ + "import numpy as np\n", + "import pandas as pd\n", + "import random\n", + "from lets_plot import *\n", + "from lets_plot.mapping import as_discrete\n", + "from IPython.display import display, Markdown" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "17b5d01e", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "\n", + " \n", + " \n", + " " + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "def dump_plot(plot, display=None):\n", + " import json\n", + "\n", + " try:\n", + " import clipboard\n", + " except:\n", + " clipboard = None\n", + " \n", + " from lets_plot._type_utils import standardize_dict\n", + " \n", + " plot_dict = standardize_dict(plot.as_dict())\n", + " plot_json = json.dumps(plot_dict, indent=2)\n", + " \n", + " if clipboard:\n", + " clipboard.copy('')\n", + " clipboard.copy(str(plot_json))\n", + " else:\n", + " if display is None:\n", + " display = True\n", + " \n", + " if display:\n", + " print(plot_json)\n", + "\n", + " return plot\n", + "\n", + "LetsPlot.setup_html()" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "99c8df00", + "metadata": {}, + "outputs": [], + "source": [ + "def run_catching(f):\n", + " def colored(s):\n", + " return Markdown('{}'.format(s))\n", + " try:\n", + " plot = f()\n", + " return plot\n", + " except Exception as e:\n", + " display(colored('{}'.format(e)))\n", + " return None" + ] + }, + { + "cell_type": "markdown", + "id": "4a4814ec", + "metadata": {}, + "source": [ + "### `Aes.X` and `Aes.Y` discrete" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "3196fa44", + "metadata": {}, + "outputs": [], + "source": [ + "n = 100\n", + "np.random.seed(42)\n", + "data = {\n", + " 'code': np.random.choice(list('abcde'), size=100),\n", + " 'value': np.random.normal(size=100),\n", + " 'value_str': np.random.choice(list('fghjk'), size=100),\n", + "}" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "344dceff", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + " " + ], + "text/plain": [ + "" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "gggrid([\n", + " ggplot(data, aes('code', 'value_str')) + geom_bar(),\n", + " ggplot(data, aes('code', 'value_str')) + geom_bar(orientation='y'),\n", + " ggplot(data, aes('code', 'value_str')) + geom_bar(orientation='x')\n", + "])" + ] + }, + { + "cell_type": "markdown", + "id": "003ae9d8", + "metadata": {}, + "source": [ + "### `Aes.X` - discrete, `Aes.Y` - continues" + ] + }, + { + "cell_type": "code", + "execution_count": 36, + "id": "9379ee1a", + "metadata": {}, + "outputs": [], + "source": [ + "np.random.seed(42)\n", + "data = {\n", + " 'code': np.random.choice(list('abcde'), size=4),\n", + " 'value': np.random.normal(size=4)\n", + "}" + ] + }, + { + "cell_type": "code", + "execution_count": 37, + "id": "11e216a7", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + " " + ], + "text/plain": [ + "" + ] + }, + "execution_count": 37, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "gggrid([\n", + " ggplot(data, aes('code', 'value')) + geom_bar(),\n", + " ggplot(data, aes('code', 'value')) + geom_bar(orientation='y'),\n", + " ggplot(data, aes('code', 'value')) + geom_bar(orientation='x')\n", + "])" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "49c106d7", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + " " + ], + "text/plain": [ + "" + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "run_catching(lambda: ggplot(data, aes(x='code', y='value')) + geom_boxplot())" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "id": "989aed11", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + " " + ], + "text/plain": [ + "" + ] + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "run_catching(lambda: ggplot(data, aes(x='code', y='value')) + geom_boxplot(orientation=\"y\"))" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "id": "be202202", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + " " + ], + "text/plain": [ + "" + ] + }, + "execution_count": 10, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "run_catching(lambda: ggplot(data, aes(x='code', y='value')) + geom_boxplot(orientation=\"x\"))" + ] + }, + { + "cell_type": "markdown", + "id": "72eb8ab7", + "metadata": {}, + "source": [ + "### `Aes.X` - continues, `Aes.Y` - discrete" + ] + }, + { + "cell_type": "code", + "execution_count": 38, + "id": "aad5fa4a", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + " " + ], + "text/plain": [ + "" + ] + }, + "execution_count": 38, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "gggrid([\n", + " ggplot(data, aes('value', 'code')) + geom_bar(),\n", + " ggplot(data, aes('value', 'code')) + geom_bar(orientation='y'),\n", + " ggplot(data, aes('value', 'code')) + geom_bar(orientation='x')\n", + "])" + ] + }, + { + "cell_type": "markdown", + "id": "ded376e7", + "metadata": {}, + "source": [ + "### `Aes.X` and `Aes.Y` continues" + ] + }, + { + "cell_type": "code", + "execution_count": 23, + "id": "5dd2a7a3", + "metadata": {}, + "outputs": [], + "source": [ + "n = 100\n", + "np.random.seed(42)\n", + "data = {\n", + " 'value1': np.random.normal(size=100),\n", + " 'value2': 10*np.random.normal(size=100)\n", + "}" + ] + }, + { + "cell_type": "code", + "execution_count": 24, + "id": "e82549a3", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + " " + ], + "text/plain": [ + "" + ] + }, + "execution_count": 24, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "gggrid([\n", + " ggplot(data, aes('value1', 'value2')) + geom_lollipop(),\n", + " ggplot(data, aes('value1', 'value2')) + geom_lollipop(orientation='y'),\n", + " ggplot(data, aes('value1', 'value2')) + geom_lollipop(orientation='x')\n", + "])" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.11" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/plot-stem/src/commonMain/kotlin/org/jetbrains/letsPlot/core/spec/config/LayerConfig.kt b/plot-stem/src/commonMain/kotlin/org/jetbrains/letsPlot/core/spec/config/LayerConfig.kt index d134a003785..15b2e7a6983 100644 --- a/plot-stem/src/commonMain/kotlin/org/jetbrains/letsPlot/core/spec/config/LayerConfig.kt +++ b/plot-stem/src/commonMain/kotlin/org/jetbrains/letsPlot/core/spec/config/LayerConfig.kt @@ -158,6 +158,7 @@ class LayerConfig( ) if (!clientSide + && !hasOwn(ORIENTATION) && isOrientationApplicable() && !DataConfigUtil.isAesDiscrete( Aes.X, From 2b23c4e9afc86cf63a0946ea36956526ab1fdadf Mon Sep 17 00:00:00 2001 From: Rashid Yangazov <129742127+RYangazov@users.noreply.github.com> Date: Fri, 5 Apr 2024 18:09:20 +0200 Subject: [PATCH 4/4] Move initialization of isYOrientation variable. --- .../letsPlot/core/spec/config/LayerConfig.kt | 66 ++++++++++--------- 1 file changed, 34 insertions(+), 32 deletions(-) diff --git a/plot-stem/src/commonMain/kotlin/org/jetbrains/letsPlot/core/spec/config/LayerConfig.kt b/plot-stem/src/commonMain/kotlin/org/jetbrains/letsPlot/core/spec/config/LayerConfig.kt index 15b2e7a6983..f9e4fc8f185 100644 --- a/plot-stem/src/commonMain/kotlin/org/jetbrains/letsPlot/core/spec/config/LayerConfig.kt +++ b/plot-stem/src/commonMain/kotlin/org/jetbrains/letsPlot/core/spec/config/LayerConfig.kt @@ -90,17 +90,6 @@ class LayerConfig( } val isYOrientation: Boolean - get() = when (hasOwn(ORIENTATION)) { - true -> getString(ORIENTATION)?.lowercase()?.let { - when (it) { - "y" -> true - "x" -> false - else -> throw IllegalArgumentException("$ORIENTATION expected x|y but was $it") - } - } ?: false - - false -> false - } // Marginal layers val isMarginal: Boolean = getBoolean(MARGINAL, false) @@ -157,27 +146,40 @@ class LayerConfig( ownDiscreteAes = DataMetaUtil.getAsDiscreteAesSet(getMap(DATA_META)) ) - if (!clientSide - && !hasOwn(ORIENTATION) - && isOrientationApplicable() - && !DataConfigUtil.isAesDiscrete( - Aes.X, - plotData, - ownData, - plotMappings, - layerMappings, - combinedDiscreteMappings - ) - && DataConfigUtil.isAesDiscrete( - Aes.Y, - plotData, - ownData, - plotMappings, - layerMappings, - combinedDiscreteMappings - ) - ) { - setOrientationY() + isYOrientation = when (hasOwn(ORIENTATION)) { + true -> getString(ORIENTATION)?.lowercase()?.let { + when (it) { + "y" -> true + "x" -> false + else -> throw IllegalArgumentException("$ORIENTATION expected x|y but was $it") + } + } ?: false + + false -> + if (!clientSide + && isOrientationApplicable() + && !DataConfigUtil.isAesDiscrete( + Aes.X, + plotData, + ownData, + plotMappings, + layerMappings, + combinedDiscreteMappings + ) + && DataConfigUtil.isAesDiscrete( + Aes.Y, + plotData, + ownData, + plotMappings, + layerMappings, + combinedDiscreteMappings + ) + ) { + setOrientationY() + true + } else { + false + } } val consumedAesSet: Set> = renderedAes.toSet().let {