diff --git a/.gitattributes b/.gitattributes new file mode 100644 index 0000000..dfe0770 --- /dev/null +++ b/.gitattributes @@ -0,0 +1,2 @@ +# Auto detect text files and perform LF normalization +* text=auto diff --git a/.gitignore b/.gitignore new file mode 100755 index 0000000..55436fd --- /dev/null +++ b/.gitignore @@ -0,0 +1,3 @@ +__pycache__ +catboost_info +.DS_Store \ No newline at end of file diff --git a/cat_prediction.ipynb b/cat_prediction.ipynb new file mode 100755 index 0000000..9d8c3e1 --- /dev/null +++ b/cat_prediction.ipynb @@ -0,0 +1,631 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "%load_ext autoreload\n", + "%autoreload 2" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[nltk_data] Downloading package stopwords to\n", + "[nltk_data] /Users/administrator/nltk_data...\n", + "[nltk_data] Package stopwords is already up-to-date!\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "INFO: Pandarallel will run on 4 workers.\n", + "INFO: Pandarallel will use standard multiprocessing data transfer (pipe) to transfer data between the main process and workers.\n" + ] + } + ], + "source": [ + "from functions.preprocessing import get_train_val_data_for_catboost\n", + "from functions.fit_eval_funcs import train_and_validate_catboost\n", + "import pandas as pd\n", + "import warnings\n", + "warnings.filterwarnings('ignore')" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [], + "source": [ + "path = 'data/TenderHack_Москва_train_data.xls'\n", + "data = pd.read_excel(path)" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
idСтатусНаименование КСОКПД 2КПГЗРегионНМЦКИтоговая ценаДатаУчастникиСтавкиИНН
00ЗавершенаСТУЛЬЯ УЧЕНИЧЕСКИЕNaN01.06.01.03.01;01.06.01.03.01Москва596790.0593806.052021-06-30 11:20:05.72011d8912494ba2edd3bfeba55206a78a0f5
11Не состояласьМЕДИЦИНСКИЕ РАСХОДНЫЕ МАТЕРИАЛЫNaN01.02.10.50.33;01.02.10.43.05.01Москва4964.90.002022-10-04 09:16:04.77000237a5c57a66f02f8deb152e21f33863a
23ЗавершенаВидеокарта Palit PCI-ENaN01.13.17.08Москва462000.0311850.002021-07-01 13:23:09.177665040f1570117a744d529f4675f827a50f
34ЗавершенаТОВАРЫ ИНФОРМАЦИОННО-ТЕХНОЛОГИЧЕСКИЕ, СРЕДСТВА...NaN01.13.11.03.02;01.13.04.04.06.05;01.10.04.04.0...Москва505555.0460042.902021-03-16 10:42:20.810618c00c03dca0274fe43fc34e974434a927
45ЗавершенаПинцет острыйNaN01.02.10.06.48.04Москва2000.01980.002022-05-26 11:04:42.59722cc997efa7a6742b6119a3c253a084e80
\n", + "
" + ], + "text/plain": [ + " id Статус Наименование КС \\\n", + "0 0 Завершена СТУЛЬЯ УЧЕНИЧЕСКИЕ \n", + "1 1 Не состоялась МЕДИЦИНСКИЕ РАСХОДНЫЕ МАТЕРИАЛЫ \n", + "2 3 Завершена Видеокарта Palit PCI-E \n", + "3 4 Завершена ТОВАРЫ ИНФОРМАЦИОННО-ТЕХНОЛОГИЧЕСКИЕ, СРЕДСТВА... \n", + "4 5 Завершена Пинцет острый \n", + "\n", + " ОКПД 2 КПГЗ Регион НМЦК \\\n", + "0 NaN 01.06.01.03.01;01.06.01.03.01 Москва 596790.0 \n", + "1 NaN 01.02.10.50.33;01.02.10.43.05.01 Москва 4964.9 \n", + "2 NaN 01.13.17.08 Москва 462000.0 \n", + "3 NaN 01.13.11.03.02;01.13.04.04.06.05;01.10.04.04.0... Москва 505555.0 \n", + "4 NaN 01.02.10.06.48.04 Москва 2000.0 \n", + "\n", + " Итоговая цена Дата Участники Ставки \\\n", + "0 593806.05 2021-06-30 11:20:05.720 1 1 \n", + "1 0.00 2022-10-04 09:16:04.770 0 0 \n", + "2 311850.00 2021-07-01 13:23:09.177 6 65 \n", + "3 460042.90 2021-03-16 10:42:20.810 6 18 \n", + "4 1980.00 2022-05-26 11:04:42.597 2 2 \n", + "\n", + " ИНН \n", + "0 d8912494ba2edd3bfeba55206a78a0f5 \n", + "1 237a5c57a66f02f8deb152e21f33863a \n", + "2 040f1570117a744d529f4675f827a50f \n", + "3 c00c03dca0274fe43fc34e974434a927 \n", + "4 cc997efa7a6742b6119a3c253a084e80 " + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "data.head()" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Get data: (normally ended sessions and use datetime features)\n", + "1. filter out normally ended sessions\n", + "2. Add datetime features to feature dataframe" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[INFO] X y split...\n", + "[INFO] Done...\n" + ] + } + ], + "source": [ + "features, drawdown, num_competitors = get_train_val_data_for_catboost(data, status_columns=['Завершена'], use_date_features=True)" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Simple model using datetime features and other categorical objects" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Eval MAE error on 1st target on validation dataset: price drawdown in percents" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + " R2 MAE\n", + "Score 0.144217 12.451576\n" + ] + } + ], + "source": [ + "# MAE loss function\n", + "drawdown_model, drawdown_score = train_and_validate_catboost(features.train, features.valid, drawdown.train, drawdown.valid, \n", + " iterations=5000, loss_function='MAE', custom_metric=\"MAE\", verbose=0, use_gpu=False)" + ] + }, + { + "cell_type": "code", + "execution_count": 68, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + " R2 MAE\n", + "Score 0.208469 13.067395\n" + ] + } + ], + "source": [ + "# RMSE loss function\n", + "drawdown_model = train_and_validate_catboost(features.train, features.valid, drawdown.train, drawdown.valid, \n", + " iterations=5000, loss_function='RMSE', custom_metric=\"RMSE\", verbose=0)" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [], + "source": [ + "drawdown_model.save_model('models/drawdown/drawdown_dt_features_model.cbm')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Valid MAE error on 2nd target: number of competitors" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + " R2 MAE\n", + "Score 0.277494 1.85062\n" + ] + } + ], + "source": [ + "# MAE loss function\n", + "num_comp_model, num_comp_score = train_and_validate_catboost(features.train, features.valid, num_competitors.train, num_competitors.valid, \n", + " iterations=5000, loss_function='MAE', custom_metric=\"MAE\", verbose=0, use_gpu=False)" + ] + }, + { + "cell_type": "code", + "execution_count": 99, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + " R2 MAE\n", + "Score 0.310553 1.884676\n" + ] + } + ], + "source": [ + "# RMSE loss function\n", + "num_comp_model, num_comp_score = train_and_validate_catboost(features.train, features.valid, num_competitors.train, num_competitors.valid, \n", + " iterations=5000, loss_function='RMSE', custom_metric=\"RMSE\", verbose=0)" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [], + "source": [ + "num_comp_model.save_model('models/num_competitors/num_comp_dt_features_model.cbm')" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Text processing: \n", + "pass to catboost text features like 'Наименование КС'" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Valid MAE error on 1st target: price drawdown in percents" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + " R2 MAE\n", + "Score 0.171268 12.350899\n" + ] + } + ], + "source": [ + "# MAE loss function\n", + "drawdown_model, drawdown_score = train_and_validate_catboost(features.train, features.valid, drawdown.train, drawdown.valid, \n", + " use_text_features=True, use_gpu=False,\n", + " iterations=5000, loss_function='MAE', custom_metric=\"MAE\", verbose=0)" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": {}, + "outputs": [], + "source": [ + "drawdown_model.save_model('models/drawdown/drawdown_text_processing_model.cbm')" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Valid MAE error on 2nd target: number of competitors" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + " R2 MAE\n", + "Score 0.302868 1.831835\n" + ] + } + ], + "source": [ + "# MAE loss function\n", + "num_comp_model, num_comp_score = train_and_validate_catboost(features.train, features.valid, num_competitors.train, num_competitors.valid, \n", + " use_text_features=True, use_gpu=False,\n", + " iterations=5000, loss_function='MAE', custom_metric=\"MAE\", verbose=0)" + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "metadata": {}, + "outputs": [], + "source": [ + "num_comp_model.save_model('models/num_competitors/num_comp_text_processing_model.cbm')" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## TEXT VECTORS:\n", + "transform code to words, then transform word columns (Наименование КС и code) to embedding vectors, using gensim" + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[INFO] Loading classifier database...\n", + "[INFO] Starting code to words process...\n", + "[INFO] Transform words to vectors...\n", + "[INFO] Unite vectors...\n", + "[INFO] X y split...\n", + "[INFO] Done...\n" + ] + } + ], + "source": [ + "features, drawdown, num_competitors = get_train_val_data_for_catboost(\n", + " data, \n", + " status_columns=['Завершена'],\n", + " vectorize_features=True,\n", + " use_date_features=True)" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Valid MAE error on 1st target: price drawdown in percents" + ] + }, + { + "cell_type": "code", + "execution_count": 23, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + " R2 MAE\n", + "Score 0.202713 12.240252\n" + ] + } + ], + "source": [ + "# MAE loss function\n", + "drawdown_model, drawdown_score = train_and_validate_catboost(features.train, features.valid, drawdown.train, drawdown.valid, \n", + " use_gpu=False, iterations=5000, loss_function='MAE', custom_metric=\"MAE\", verbose=0)" + ] + }, + { + "cell_type": "code", + "execution_count": 25, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + " R2 MAE\n", + "Score 0.254822 12.512833\n" + ] + } + ], + "source": [ + "# RMSE loss function\n", + "drawdown_model, drawdown_score = train_and_validate_catboost(features.train, features.valid, drawdown.train, drawdown.valid, \n", + " use_gpu=False, iterations=5000, loss_function='RMSE', custom_metric=\"RMSE\", verbose=0)" + ] + }, + { + "cell_type": "code", + "execution_count": 24, + "metadata": {}, + "outputs": [], + "source": [ + "drawdown_model.save_model('models/drawdown/drawdown_vector_model.cbm')" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Valid MAE error on 2nd target: number of competitors" + ] + }, + { + "cell_type": "code", + "execution_count": 27, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + " R2 MAE\n", + "Score 0.30421 1.832699\n" + ] + } + ], + "source": [ + "# MAE loss function\n", + "num_comp_model, num_comp_score = train_and_validate_catboost(features.train, features.valid, num_competitors.train, num_competitors.valid, \n", + " use_gpu=False, iterations=5000, loss_function='MAE', custom_metric=\"MAE\", verbose=0)" + ] + }, + { + "cell_type": "code", + "execution_count": 34, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + " R2 MAE\n", + "Score 0.343018 1.851319\n" + ] + } + ], + "source": [ + "# RMSE loss function\n", + "num_comp_model, num_comp_score = train_and_validate_catboost(features.train, features.valid, num_competitors.train, num_competitors.valid, \n", + " use_gpu=False, iterations=5000, loss_function='RMSE', custom_metric=\"RMSE\", verbose=0)" + ] + }, + { + "cell_type": "code", + "execution_count": 28, + "metadata": {}, + "outputs": [], + "source": [ + "num_comp_model.save_model('models/num_competitors/num_comp_vector_model.cbm')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "text", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.13" + }, + "orig_nbformat": 4, + "vscode": { + "interpreter": { + "hash": "6baaebbc6b412d7a69107c03fd1fd043a0da00adf5530acdffe0f36b6e4b3935" + } + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/check_data.ipynb b/check_data.ipynb new file mode 100755 index 0000000..5938088 --- /dev/null +++ b/check_data.ipynb @@ -0,0 +1,34390 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "import numpy as np\n", + "import pandas as pd\n", + "from sklearn.model_selection import train_test_split" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "def load_data():\n", + " path = 'tender/TenderHack_Москва_train_data.xlsx'\n", + " data = pd.read_excel(path)\n", + " return data" + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "metadata": {}, + "outputs": [], + "source": [ + "data = load_data()" + ] + }, + { + "cell_type": "code", + "execution_count": 45, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
idСтатусНаименование КСОКПД 2КПГЗРегионНМЦКИтоговая ценаДатаУчастникиСтавкиИННdownfall_pct
3751АктивнаМясо кур, в том числе цыплят (включая цыплят-б...10.12.10.110NaNСургут296100.000.02021-10-13 11:47:20.6330003158b7dd4d1259be7d808e38e7a5e2c1.0
6481АктивнаПоставка бытовой химии17.22.11.110;17.22.11.110;20.41.31.130;20.41.3...NaNПермский8033.000.02022-08-29 14:21:52.68300e2665e6d987cdc398b24de217dc95c621.0
206252АктивнаШприц без иглы, вариант исполнения: Шприц \"Омн...NaN01.02.10.42.22.08Москва132000.000.02022-11-30 09:46:02.18000eb32ee94563b8f26fa37e97f16e706cb1.0
254308АктивнаКраска интерьерная EVEREST А24. PREMIUM силико...NaN01.11.03.05.05.01Москва30690.000.02022-11-30 12:27:02.343115dd29f3517795299c153da6fe8dde9b31.0
601721АктивнаДезинфицирующее средство Профидез-ОФАNaN01.02.10.55.01Москва106000.000.02022-11-30 09:04:04.753236cb7d3297dffe5434d9ea149f1d176441.0
..........................................
245940287497АктивнаПоставка продуктов питания (бакалея)10.89.13.112;10.89.13.112;10.41.54.000;10.20.2...NaNКемеровская область - Кузбасс63000.000.02022-08-30 06:42:59.09000977d2dd2303199383efd01c6d12724351.0
246445288087АктивнаПоставка лекарственных препаратов для медицинс...21.20.10.239;21.20.10.239;21.20.10.239;21.20.1...NaNКемеровская область - Кузбасс972.000.02021-08-06 07:23:53.46000665d9f08691c031aa6a3e2ef5b5836fa1.0
246573288228АктивнаМЕБЕЛЬ ОБЩЕГО НАЗНАЧЕНИЯNaN01.16.03.07;01.16.04.01;01.16.07.02.02;01.16.2...Москва593991.340.02022-11-30 09:36:03.0702257f7a9915808171f19e9a4d77581547d1.0
246651288321АктивнаСРЕДСТВА ДЛЯ СПАСЕНИЯ С ВЫСОТЫNaN01.04.01.13.03;01.04.01.13.03Москва80914.000.02022-11-30 10:14:01.117274b2fdc3463bfd6b9bcab02f2934423131.0
246695288372АктивнаГовядина замороженная10.11.31.110;10.11.31.110NaNСургут460000.000.02022-01-20 09:00:00.000001b7feea57b9cef7edfc2724ca9f0a1c51.0
\n", + "

1191 rows × 13 columns

\n", + "
" + ], + "text/plain": [ + " id Статус Наименование КС \\\n", + "37 51 Активна Мясо кур, в том числе цыплят (включая цыплят-б... \n", + "64 81 Активна Поставка бытовой химии \n", + "206 252 Активна Шприц без иглы, вариант исполнения: Шприц \"Омн... \n", + "254 308 Активна Краска интерьерная EVEREST А24. PREMIUM силико... \n", + "601 721 Активна Дезинфицирующее средство Профидез-ОФА \n", + "... ... ... ... \n", + "245940 287497 Активна Поставка продуктов питания (бакалея) \n", + "246445 288087 Активна Поставка лекарственных препаратов для медицинс... \n", + "246573 288228 Активна МЕБЕЛЬ ОБЩЕГО НАЗНАЧЕНИЯ \n", + "246651 288321 Активна СРЕДСТВА ДЛЯ СПАСЕНИЯ С ВЫСОТЫ \n", + "246695 288372 Активна Говядина замороженная \n", + "\n", + " ОКПД 2 \\\n", + "37 10.12.10.110 \n", + "64 17.22.11.110;17.22.11.110;20.41.31.130;20.41.3... \n", + "206 NaN \n", + "254 NaN \n", + "601 NaN \n", + "... ... \n", + "245940 10.89.13.112;10.89.13.112;10.41.54.000;10.20.2... \n", + "246445 21.20.10.239;21.20.10.239;21.20.10.239;21.20.1... \n", + "246573 NaN \n", + "246651 NaN \n", + "246695 10.11.31.110;10.11.31.110 \n", + "\n", + " КПГЗ \\\n", + "37 NaN \n", + "64 NaN \n", + "206 01.02.10.42.22.08 \n", + "254 01.11.03.05.05.01 \n", + "601 01.02.10.55.01 \n", + "... ... \n", + "245940 NaN \n", + "246445 NaN \n", + "246573 01.16.03.07;01.16.04.01;01.16.07.02.02;01.16.2... \n", + "246651 01.04.01.13.03;01.04.01.13.03 \n", + "246695 NaN \n", + "\n", + " Регион НМЦК Итоговая цена \\\n", + "37 Сургут 296100.00 0.0 \n", + "64 Пермский 8033.00 0.0 \n", + "206 Москва 132000.00 0.0 \n", + "254 Москва 30690.00 0.0 \n", + "601 Москва 106000.00 0.0 \n", + "... ... ... ... \n", + "245940 Кемеровская область - Кузбасс 63000.00 0.0 \n", + "246445 Кемеровская область - Кузбасс 972.00 0.0 \n", + "246573 Москва 593991.34 0.0 \n", + "246651 Москва 80914.00 0.0 \n", + "246695 Сургут 460000.00 0.0 \n", + "\n", + " Дата Участники Ставки \\\n", + "37 2021-10-13 11:47:20.633 0 0 \n", + "64 2022-08-29 14:21:52.683 0 0 \n", + "206 2022-11-30 09:46:02.180 0 0 \n", + "254 2022-11-30 12:27:02.343 1 1 \n", + "601 2022-11-30 09:04:04.753 2 3 \n", + "... ... ... ... \n", + "245940 2022-08-30 06:42:59.090 0 0 \n", + "246445 2021-08-06 07:23:53.460 0 0 \n", + "246573 2022-11-30 09:36:03.070 2 2 \n", + "246651 2022-11-30 10:14:01.117 2 7 \n", + "246695 2022-01-20 09:00:00.000 0 0 \n", + "\n", + " ИНН downfall_pct \n", + "37 03158b7dd4d1259be7d808e38e7a5e2c 1.0 \n", + "64 e2665e6d987cdc398b24de217dc95c62 1.0 \n", + "206 eb32ee94563b8f26fa37e97f16e706cb 1.0 \n", + "254 5dd29f3517795299c153da6fe8dde9b3 1.0 \n", + "601 6cb7d3297dffe5434d9ea149f1d17644 1.0 \n", + "... ... ... \n", + "245940 977d2dd2303199383efd01c6d1272435 1.0 \n", + "246445 665d9f08691c031aa6a3e2ef5b5836fa 1.0 \n", + "246573 57f7a9915808171f19e9a4d77581547d 1.0 \n", + "246651 4b2fdc3463bfd6b9bcab02f293442313 1.0 \n", + "246695 1b7feea57b9cef7edfc2724ca9f0a1c5 1.0 \n", + "\n", + "[1191 rows x 13 columns]" + ] + }, + "execution_count": 45, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "data[data['Статус']=='Активна']" + ] + }, + { + "cell_type": "code", + "execution_count": 46, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
idСтатусНаименование КСОКПД 2КПГЗРегионНМЦКИтоговая ценаДатаУчастникиСтавкиИННdownfall_pct
00ЗавершенаСТУЛЬЯ УЧЕНИЧЕСКИЕNaN01.06.01.03.01;01.06.01.03.01Москва596790.0593806.052021-06-30 11:20:05.72011d8912494ba2edd3bfeba55206a78a0f50.005000
11Не состояласьМЕДИЦИНСКИЕ РАСХОДНЫЕ МАТЕРИАЛЫNaN01.02.10.50.33;01.02.10.43.05.01Москва4964.90.002022-10-04 09:16:04.77000237a5c57a66f02f8deb152e21f33863a1.000000
23ЗавершенаВидеокарта Palit PCI-ENaN01.13.17.08Москва462000.0311850.002021-07-01 13:23:09.177665040f1570117a744d529f4675f827a50f0.325000
34ЗавершенаТОВАРЫ ИНФОРМАЦИОННО-ТЕХНОЛОГИЧЕСКИЕ, СРЕДСТВА...NaN01.13.11.03.02;01.13.04.04.06.05;01.10.04.04.0...Москва505555.0460042.902021-03-16 10:42:20.810618c00c03dca0274fe43fc34e974434a9270.090024
45ЗавершенаПинцет острыйNaN01.02.10.06.48.04Москва2000.01980.002022-05-26 11:04:42.59722cc997efa7a6742b6119a3c253a084e800.010000
\n", + "
" + ], + "text/plain": [ + " id Статус Наименование КС \\\n", + "0 0 Завершена СТУЛЬЯ УЧЕНИЧЕСКИЕ \n", + "1 1 Не состоялась МЕДИЦИНСКИЕ РАСХОДНЫЕ МАТЕРИАЛЫ \n", + "2 3 Завершена Видеокарта Palit PCI-E \n", + "3 4 Завершена ТОВАРЫ ИНФОРМАЦИОННО-ТЕХНОЛОГИЧЕСКИЕ, СРЕДСТВА... \n", + "4 5 Завершена Пинцет острый \n", + "\n", + " ОКПД 2 КПГЗ Регион НМЦК \\\n", + "0 NaN 01.06.01.03.01;01.06.01.03.01 Москва 596790.0 \n", + "1 NaN 01.02.10.50.33;01.02.10.43.05.01 Москва 4964.9 \n", + "2 NaN 01.13.17.08 Москва 462000.0 \n", + "3 NaN 01.13.11.03.02;01.13.04.04.06.05;01.10.04.04.0... Москва 505555.0 \n", + "4 NaN 01.02.10.06.48.04 Москва 2000.0 \n", + "\n", + " Итоговая цена Дата Участники Ставки \\\n", + "0 593806.05 2021-06-30 11:20:05.720 1 1 \n", + "1 0.00 2022-10-04 09:16:04.770 0 0 \n", + "2 311850.00 2021-07-01 13:23:09.177 6 65 \n", + "3 460042.90 2021-03-16 10:42:20.810 6 18 \n", + "4 1980.00 2022-05-26 11:04:42.597 2 2 \n", + "\n", + " ИНН downfall_pct \n", + "0 d8912494ba2edd3bfeba55206a78a0f5 0.005000 \n", + "1 237a5c57a66f02f8deb152e21f33863a 1.000000 \n", + "2 040f1570117a744d529f4675f827a50f 0.325000 \n", + "3 c00c03dca0274fe43fc34e974434a927 0.090024 \n", + "4 cc997efa7a6742b6119a3c253a084e80 0.010000 " + ] + }, + "execution_count": 46, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "data.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "RangeIndex: 246762 entries, 0 to 246761\n", + "Data columns (total 12 columns):\n", + " # Column Non-Null Count Dtype \n", + "--- ------ -------------- ----- \n", + " 0 id 246762 non-null int64 \n", + " 1 Статус 246762 non-null object \n", + " 2 Наименование КС 246762 non-null object \n", + " 3 ОКПД 2 22550 non-null object \n", + " 4 КПГЗ 224212 non-null object \n", + " 5 Регион 246762 non-null object \n", + " 6 НМЦК 246762 non-null float64\n", + " 7 Итоговая цена 246762 non-null float64\n", + " 8 Дата 246762 non-null object \n", + " 9 Участники 246762 non-null int64 \n", + " 10 Ставки 246762 non-null int64 \n", + " 11 ИНН 246762 non-null object \n", + "dtypes: float64(2), int64(3), object(7)\n", + "memory usage: 22.6+ MB\n" + ] + } + ], + "source": [ + "data.info()" + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/var/folders/cs/2g5m6qs920x2c75_zsz4cmwr0000gn/T/ipykernel_4783/2627137660.py:1: FutureWarning: The default value of numeric_only in DataFrame.corr is deprecated. In a future version, it will default to False. Select only valid columns or specify the value of numeric_only to silence this warning.\n", + " data.corr()\n" + ] + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
idНМЦКИтоговая ценаУчастникиСтавки
id1.000000-0.001619-0.0005960.0000020.000488
НМЦК-0.0016191.0000000.9181100.0681100.027008
Итоговая цена-0.0005960.9181101.0000000.052806-0.018159
Участники0.0000020.0681100.0528061.0000000.749631
Ставки0.0004880.027008-0.0181590.7496311.000000
\n", + "
" + ], + "text/plain": [ + " id НМЦК Итоговая цена Участники Ставки\n", + "id 1.000000 -0.001619 -0.000596 0.000002 0.000488\n", + "НМЦК -0.001619 1.000000 0.918110 0.068110 0.027008\n", + "Итоговая цена -0.000596 0.918110 1.000000 0.052806 -0.018159\n", + "Участники 0.000002 0.068110 0.052806 1.000000 0.749631\n", + "Ставки 0.000488 0.027008 -0.018159 0.749631 1.000000" + ] + }, + "execution_count": 22, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "data.corr()" + ] + }, + { + "cell_type": "code", + "execution_count": 23, + "metadata": {}, + "outputs": [], + "source": [ + "data[\"downfall_pct\"] = 1.0 - data[\"Итоговая цена\"] / data[\"НМЦК\"]" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "81068cd48a274756b906a002d168da65", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "Summarize dataset: 0%| | 0/5 [00:00" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/plain": [] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "import pandas_profiling\n", + "pandas_profiling.ProfileReport(data)" + ] + }, + { + "cell_type": "code", + "execution_count": 69, + "metadata": {}, + "outputs": [], + "source": [ + "data = data[data['Статус']=='Завершена']\n", + "data['ОКПД 2'] = data['ОКПД 2'].fillna(-1)\n", + "data['КПГЗ'] = data['КПГЗ'].fillna(-1)" + ] + }, + { + "cell_type": "code", + "execution_count": 70, + "metadata": {}, + "outputs": [], + "source": [ + "X = data.drop([\"Итоговая цена\", \"Ставки\", \"downfall_pct\", \"id\", \"ИНН\", \"Участники\"], axis=1)\n", + "y = data['downfall_pct']" + ] + }, + { + "cell_type": "code", + "execution_count": 72, + "metadata": {}, + "outputs": [], + "source": [ + "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42, shuffle=True)" + ] + }, + { + "cell_type": "code", + "execution_count": 73, + "metadata": {}, + "outputs": [], + "source": [ + "# Main imports\n", + "from catboost import CatBoostRegressor\n", + "from catboost import cv\n", + "from catboost import Pool\n", + "import matplotlib.pyplot as plt" + ] + }, + { + "cell_type": "code", + "execution_count": 74, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "['Статус', 'Наименование КС', 'ОКПД 2', 'КПГЗ', 'Регион', 'Дата']" + ] + }, + "execution_count": 74, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "cat_features = X.select_dtypes('object').columns.to_list()\n", + "cat_features" + ] + }, + { + "cell_type": "code", + "execution_count": 75, + "metadata": {}, + "outputs": [], + "source": [ + "cat = CatBoostRegressor(learning_rate=0.1, \n", + " cat_features=cat_features, \n", + " iterations=300,\n", + " loss_function = 'MAE')" + ] + }, + { + "cell_type": "code", + "execution_count": 78, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "0:\tlearn: 0.1456064\ttotal: 474ms\tremaining: 2m 21s\n", + "1:\tlearn: 0.1436095\ttotal: 534ms\tremaining: 1m 19s\n", + "2:\tlearn: 0.1418076\ttotal: 676ms\tremaining: 1m 6s\n", + "3:\tlearn: 0.1404233\ttotal: 783ms\tremaining: 57.9s\n", + "4:\tlearn: 0.1391808\ttotal: 942ms\tremaining: 55.6s\n", + "5:\tlearn: 0.1381552\ttotal: 1.07s\tremaining: 52.5s\n", + "6:\tlearn: 0.1372887\ttotal: 1.17s\tremaining: 48.9s\n", + "7:\tlearn: 0.1365311\ttotal: 1.26s\tremaining: 46.2s\n", + "8:\tlearn: 0.1358840\ttotal: 1.36s\tremaining: 43.9s\n", + "9:\tlearn: 0.1349656\ttotal: 1.43s\tremaining: 41.6s\n", + "10:\tlearn: 0.1341862\ttotal: 1.54s\tremaining: 40.4s\n", + "11:\tlearn: 0.1335165\ttotal: 1.63s\tremaining: 39.1s\n", + "12:\tlearn: 0.1328806\ttotal: 1.71s\tremaining: 37.7s\n", + "13:\tlearn: 0.1324379\ttotal: 1.78s\tremaining: 36.3s\n", + "14:\tlearn: 0.1319982\ttotal: 1.88s\tremaining: 35.7s\n", + "15:\tlearn: 0.1316460\ttotal: 1.92s\tremaining: 34.1s\n", + "16:\tlearn: 0.1313984\ttotal: 2s\tremaining: 33.3s\n", + "17:\tlearn: 0.1310908\ttotal: 2.06s\tremaining: 32.3s\n", + "18:\tlearn: 0.1308630\ttotal: 2.16s\tremaining: 31.9s\n", + "19:\tlearn: 0.1306476\ttotal: 2.23s\tremaining: 31.3s\n", + "20:\tlearn: 0.1304699\ttotal: 2.37s\tremaining: 31.4s\n", + "21:\tlearn: 0.1302634\ttotal: 2.43s\tremaining: 30.7s\n", + "22:\tlearn: 0.1301173\ttotal: 2.5s\tremaining: 30.1s\n", + "23:\tlearn: 0.1300049\ttotal: 2.6s\tremaining: 29.9s\n", + "24:\tlearn: 0.1298556\ttotal: 2.73s\tremaining: 30.1s\n", + "25:\tlearn: 0.1297512\ttotal: 2.8s\tremaining: 29.5s\n", + "26:\tlearn: 0.1296790\ttotal: 2.87s\tremaining: 29s\n", + "27:\tlearn: 0.1295954\ttotal: 2.94s\tremaining: 28.5s\n", + "28:\tlearn: 0.1295206\ttotal: 3s\tremaining: 28.1s\n", + "29:\tlearn: 0.1294591\ttotal: 3.05s\tremaining: 27.5s\n", + "30:\tlearn: 0.1293917\ttotal: 3.12s\tremaining: 27.1s\n", + "31:\tlearn: 0.1293496\ttotal: 3.17s\tremaining: 26.6s\n", + "32:\tlearn: 0.1292814\ttotal: 3.24s\tremaining: 26.2s\n", + "33:\tlearn: 0.1292402\ttotal: 3.33s\tremaining: 26.1s\n", + "34:\tlearn: 0.1291735\ttotal: 3.47s\tremaining: 26.3s\n", + "35:\tlearn: 0.1291053\ttotal: 3.63s\tremaining: 26.6s\n", + "36:\tlearn: 0.1290757\ttotal: 3.72s\tremaining: 26.4s\n", + "37:\tlearn: 0.1290503\ttotal: 3.8s\tremaining: 26.2s\n", + "38:\tlearn: 0.1290355\ttotal: 3.89s\tremaining: 26s\n", + "39:\tlearn: 0.1290206\ttotal: 3.98s\tremaining: 25.9s\n", + "40:\tlearn: 0.1289998\ttotal: 4.19s\tremaining: 26.5s\n", + "41:\tlearn: 0.1289875\ttotal: 4.43s\tremaining: 27.2s\n", + "42:\tlearn: 0.1289711\ttotal: 4.66s\tremaining: 27.9s\n", + "43:\tlearn: 0.1289642\ttotal: 4.7s\tremaining: 27.3s\n", + "44:\tlearn: 0.1288584\ttotal: 4.84s\tremaining: 27.4s\n", + "45:\tlearn: 0.1288361\ttotal: 4.94s\tremaining: 27.3s\n", + "46:\tlearn: 0.1288237\ttotal: 5.09s\tremaining: 27.4s\n", + "47:\tlearn: 0.1287875\ttotal: 5.13s\tremaining: 26.9s\n", + "48:\tlearn: 0.1287577\ttotal: 5.19s\tremaining: 26.6s\n", + "49:\tlearn: 0.1287477\ttotal: 5.28s\tremaining: 26.4s\n", + "50:\tlearn: 0.1287242\ttotal: 5.38s\tremaining: 26.3s\n", + "51:\tlearn: 0.1287059\ttotal: 5.44s\tremaining: 26s\n", + "52:\tlearn: 0.1286687\ttotal: 5.58s\tremaining: 26s\n", + "53:\tlearn: 0.1286662\ttotal: 5.66s\tremaining: 25.8s\n", + "54:\tlearn: 0.1286415\ttotal: 5.72s\tremaining: 25.5s\n", + "55:\tlearn: 0.1285763\ttotal: 5.84s\tremaining: 25.4s\n", + "56:\tlearn: 0.1285757\ttotal: 5.88s\tremaining: 25.1s\n", + "57:\tlearn: 0.1285518\ttotal: 5.94s\tremaining: 24.8s\n", + "58:\tlearn: 0.1285069\ttotal: 6.02s\tremaining: 24.6s\n", + "59:\tlearn: 0.1284708\ttotal: 6.1s\tremaining: 24.4s\n", + "60:\tlearn: 0.1284478\ttotal: 6.17s\tremaining: 24.2s\n", + "61:\tlearn: 0.1284301\ttotal: 6.25s\tremaining: 24s\n", + "62:\tlearn: 0.1284232\ttotal: 6.34s\tremaining: 23.9s\n", + "63:\tlearn: 0.1284208\ttotal: 6.38s\tremaining: 23.5s\n", + "64:\tlearn: 0.1284155\ttotal: 6.45s\tremaining: 23.3s\n", + "65:\tlearn: 0.1284114\ttotal: 6.55s\tremaining: 23.2s\n", + "66:\tlearn: 0.1283513\ttotal: 6.63s\tremaining: 23s\n", + "67:\tlearn: 0.1283287\ttotal: 6.75s\tremaining: 23s\n", + "68:\tlearn: 0.1282566\ttotal: 6.84s\tremaining: 22.9s\n", + "69:\tlearn: 0.1282386\ttotal: 7.03s\tremaining: 23.1s\n", + "70:\tlearn: 0.1282163\ttotal: 7.1s\tremaining: 22.9s\n", + "71:\tlearn: 0.1282117\ttotal: 7.2s\tremaining: 22.8s\n", + "72:\tlearn: 0.1282053\ttotal: 7.3s\tremaining: 22.7s\n", + "73:\tlearn: 0.1281533\ttotal: 7.37s\tremaining: 22.5s\n", + "74:\tlearn: 0.1281301\ttotal: 7.45s\tremaining: 22.4s\n", + "75:\tlearn: 0.1280397\ttotal: 7.51s\tremaining: 22.1s\n", + "76:\tlearn: 0.1279929\ttotal: 7.58s\tremaining: 22s\n", + "77:\tlearn: 0.1279802\ttotal: 7.67s\tremaining: 21.8s\n", + "78:\tlearn: 0.1279620\ttotal: 7.79s\tremaining: 21.8s\n", + "79:\tlearn: 0.1279581\ttotal: 7.93s\tremaining: 21.8s\n", + "80:\tlearn: 0.1279027\ttotal: 8.09s\tremaining: 21.9s\n", + "81:\tlearn: 0.1278493\ttotal: 8.19s\tremaining: 21.8s\n", + "82:\tlearn: 0.1277746\ttotal: 8.33s\tremaining: 21.8s\n", + "83:\tlearn: 0.1277291\ttotal: 8.43s\tremaining: 21.7s\n", + "84:\tlearn: 0.1277161\ttotal: 8.52s\tremaining: 21.6s\n", + "85:\tlearn: 0.1276933\ttotal: 8.6s\tremaining: 21.4s\n", + "86:\tlearn: 0.1276845\ttotal: 8.65s\tremaining: 21.2s\n", + "87:\tlearn: 0.1276305\ttotal: 8.74s\tremaining: 21s\n", + "88:\tlearn: 0.1275939\ttotal: 8.87s\tremaining: 21s\n", + "89:\tlearn: 0.1275822\ttotal: 8.95s\tremaining: 20.9s\n", + "90:\tlearn: 0.1275283\ttotal: 9.03s\tremaining: 20.7s\n", + "91:\tlearn: 0.1275197\ttotal: 9.09s\tremaining: 20.5s\n", + "92:\tlearn: 0.1275111\ttotal: 9.16s\tremaining: 20.4s\n", + "93:\tlearn: 0.1275056\ttotal: 9.22s\tremaining: 20.2s\n", + "94:\tlearn: 0.1274742\ttotal: 9.35s\tremaining: 20.2s\n", + "95:\tlearn: 0.1274253\ttotal: 9.44s\tremaining: 20.1s\n", + "96:\tlearn: 0.1274155\ttotal: 9.54s\tremaining: 20s\n", + "97:\tlearn: 0.1274125\ttotal: 9.67s\tremaining: 19.9s\n", + "98:\tlearn: 0.1274119\ttotal: 9.69s\tremaining: 19.7s\n", + "99:\tlearn: 0.1273860\ttotal: 9.77s\tremaining: 19.5s\n", + "100:\tlearn: 0.1273653\ttotal: 9.87s\tremaining: 19.4s\n", + "101:\tlearn: 0.1273570\ttotal: 9.99s\tremaining: 19.4s\n", + "102:\tlearn: 0.1273400\ttotal: 10s\tremaining: 19.2s\n", + "103:\tlearn: 0.1273334\ttotal: 10.1s\tremaining: 19s\n", + "104:\tlearn: 0.1273228\ttotal: 10.2s\tremaining: 18.9s\n", + "105:\tlearn: 0.1273135\ttotal: 10.3s\tremaining: 18.8s\n", + "106:\tlearn: 0.1273063\ttotal: 10.3s\tremaining: 18.6s\n", + "107:\tlearn: 0.1273050\ttotal: 10.3s\tremaining: 18.4s\n", + "108:\tlearn: 0.1272941\ttotal: 10.4s\tremaining: 18.2s\n", + "109:\tlearn: 0.1272676\ttotal: 10.5s\tremaining: 18.1s\n", + "110:\tlearn: 0.1272368\ttotal: 10.5s\tremaining: 17.9s\n", + "111:\tlearn: 0.1272348\ttotal: 10.6s\tremaining: 17.8s\n", + "112:\tlearn: 0.1272205\ttotal: 10.7s\tremaining: 17.7s\n", + "113:\tlearn: 0.1272080\ttotal: 10.8s\tremaining: 17.6s\n", + "114:\tlearn: 0.1271950\ttotal: 10.9s\tremaining: 17.5s\n", + "115:\tlearn: 0.1271896\ttotal: 11s\tremaining: 17.5s\n", + "116:\tlearn: 0.1271648\ttotal: 11.1s\tremaining: 17.4s\n", + "117:\tlearn: 0.1271267\ttotal: 11.2s\tremaining: 17.3s\n", + "118:\tlearn: 0.1270940\ttotal: 11.3s\tremaining: 17.1s\n", + "119:\tlearn: 0.1270923\ttotal: 11.3s\tremaining: 17s\n", + "120:\tlearn: 0.1270694\ttotal: 11.5s\tremaining: 17s\n", + "121:\tlearn: 0.1270659\ttotal: 11.5s\tremaining: 16.8s\n", + "122:\tlearn: 0.1270432\ttotal: 11.6s\tremaining: 16.7s\n", + "123:\tlearn: 0.1270344\ttotal: 11.7s\tremaining: 16.6s\n", + "124:\tlearn: 0.1269940\ttotal: 11.8s\tremaining: 16.5s\n", + "125:\tlearn: 0.1269457\ttotal: 12s\tremaining: 16.5s\n", + "126:\tlearn: 0.1269407\ttotal: 12.1s\tremaining: 16.4s\n", + "127:\tlearn: 0.1269217\ttotal: 12.2s\tremaining: 16.3s\n", + "128:\tlearn: 0.1268836\ttotal: 12.3s\tremaining: 16.3s\n", + "129:\tlearn: 0.1268599\ttotal: 12.4s\tremaining: 16.2s\n", + "130:\tlearn: 0.1268226\ttotal: 12.5s\tremaining: 16.2s\n", + "131:\tlearn: 0.1268157\ttotal: 12.6s\tremaining: 16.1s\n", + "132:\tlearn: 0.1268127\ttotal: 12.7s\tremaining: 16s\n", + "133:\tlearn: 0.1268072\ttotal: 12.8s\tremaining: 15.9s\n", + "134:\tlearn: 0.1267785\ttotal: 13s\tremaining: 15.8s\n", + "135:\tlearn: 0.1267567\ttotal: 13.1s\tremaining: 15.8s\n", + "136:\tlearn: 0.1267502\ttotal: 13.2s\tremaining: 15.7s\n", + "137:\tlearn: 0.1267428\ttotal: 13.3s\tremaining: 15.6s\n", + "138:\tlearn: 0.1267306\ttotal: 13.4s\tremaining: 15.5s\n", + "139:\tlearn: 0.1266966\ttotal: 13.5s\tremaining: 15.5s\n", + "140:\tlearn: 0.1266900\ttotal: 13.6s\tremaining: 15.4s\n", + "141:\tlearn: 0.1266808\ttotal: 13.7s\tremaining: 15.2s\n", + "142:\tlearn: 0.1266761\ttotal: 13.9s\tremaining: 15.3s\n", + "143:\tlearn: 0.1266631\ttotal: 14s\tremaining: 15.2s\n", + "144:\tlearn: 0.1266236\ttotal: 14.2s\tremaining: 15.1s\n", + "145:\tlearn: 0.1265992\ttotal: 14.4s\tremaining: 15.2s\n", + "146:\tlearn: 0.1265925\ttotal: 14.6s\tremaining: 15.2s\n", + "147:\tlearn: 0.1265870\ttotal: 14.7s\tremaining: 15.1s\n", + "148:\tlearn: 0.1265780\ttotal: 14.8s\tremaining: 15s\n", + "149:\tlearn: 0.1265544\ttotal: 14.9s\tremaining: 14.9s\n", + "150:\tlearn: 0.1265446\ttotal: 15s\tremaining: 14.8s\n", + "151:\tlearn: 0.1265269\ttotal: 15.1s\tremaining: 14.7s\n", + "152:\tlearn: 0.1264809\ttotal: 15.1s\tremaining: 14.6s\n", + "153:\tlearn: 0.1264628\ttotal: 15.2s\tremaining: 14.4s\n", + "154:\tlearn: 0.1264613\ttotal: 15.4s\tremaining: 14.4s\n", + "155:\tlearn: 0.1264357\ttotal: 15.4s\tremaining: 14.2s\n", + "156:\tlearn: 0.1264285\ttotal: 15.5s\tremaining: 14.1s\n", + "157:\tlearn: 0.1263836\ttotal: 15.6s\tremaining: 14s\n", + "158:\tlearn: 0.1263749\ttotal: 15.7s\tremaining: 13.9s\n", + "159:\tlearn: 0.1263666\ttotal: 15.8s\tremaining: 13.8s\n", + "160:\tlearn: 0.1263552\ttotal: 15.9s\tremaining: 13.7s\n", + "161:\tlearn: 0.1263139\ttotal: 16s\tremaining: 13.6s\n", + "162:\tlearn: 0.1263104\ttotal: 16.1s\tremaining: 13.5s\n", + "163:\tlearn: 0.1262666\ttotal: 16.2s\tremaining: 13.4s\n", + "164:\tlearn: 0.1262508\ttotal: 16.4s\tremaining: 13.4s\n", + "165:\tlearn: 0.1262488\ttotal: 16.5s\tremaining: 13.3s\n", + "166:\tlearn: 0.1262265\ttotal: 16.6s\tremaining: 13.2s\n", + "167:\tlearn: 0.1262201\ttotal: 16.7s\tremaining: 13.1s\n", + "168:\tlearn: 0.1262111\ttotal: 16.7s\tremaining: 13s\n", + "169:\tlearn: 0.1262059\ttotal: 16.9s\tremaining: 12.9s\n", + "170:\tlearn: 0.1261677\ttotal: 17s\tremaining: 12.8s\n", + "171:\tlearn: 0.1261592\ttotal: 17.1s\tremaining: 12.7s\n", + "172:\tlearn: 0.1261573\ttotal: 17.2s\tremaining: 12.6s\n", + "173:\tlearn: 0.1261143\ttotal: 17.3s\tremaining: 12.5s\n", + "174:\tlearn: 0.1260757\ttotal: 17.3s\tremaining: 12.4s\n", + "175:\tlearn: 0.1260616\ttotal: 17.4s\tremaining: 12.3s\n", + "176:\tlearn: 0.1260550\ttotal: 17.5s\tremaining: 12.2s\n", + "177:\tlearn: 0.1260516\ttotal: 17.6s\tremaining: 12.1s\n", + "178:\tlearn: 0.1260205\ttotal: 17.7s\tremaining: 12s\n", + "179:\tlearn: 0.1260162\ttotal: 17.8s\tremaining: 11.9s\n", + "180:\tlearn: 0.1260103\ttotal: 17.9s\tremaining: 11.8s\n", + "181:\tlearn: 0.1260063\ttotal: 18s\tremaining: 11.6s\n", + "182:\tlearn: 0.1260034\ttotal: 18s\tremaining: 11.5s\n", + "183:\tlearn: 0.1259938\ttotal: 18.2s\tremaining: 11.4s\n", + "184:\tlearn: 0.1259683\ttotal: 18.3s\tremaining: 11.4s\n", + "185:\tlearn: 0.1259621\ttotal: 18.3s\tremaining: 11.2s\n", + "186:\tlearn: 0.1259412\ttotal: 18.4s\tremaining: 11.1s\n", + "187:\tlearn: 0.1259195\ttotal: 18.6s\tremaining: 11.1s\n", + "188:\tlearn: 0.1259008\ttotal: 18.6s\tremaining: 10.9s\n", + "189:\tlearn: 0.1258991\ttotal: 18.7s\tremaining: 10.8s\n", + "190:\tlearn: 0.1258929\ttotal: 18.8s\tremaining: 10.7s\n", + "191:\tlearn: 0.1258655\ttotal: 18.9s\tremaining: 10.6s\n", + "192:\tlearn: 0.1258626\ttotal: 19s\tremaining: 10.5s\n", + "193:\tlearn: 0.1258549\ttotal: 19.1s\tremaining: 10.4s\n", + "194:\tlearn: 0.1258411\ttotal: 19.2s\tremaining: 10.3s\n", + "195:\tlearn: 0.1258322\ttotal: 19.3s\tremaining: 10.2s\n", + "196:\tlearn: 0.1258223\ttotal: 19.4s\tremaining: 10.1s\n", + "197:\tlearn: 0.1258180\ttotal: 19.5s\tremaining: 10s\n", + "198:\tlearn: 0.1258131\ttotal: 19.6s\tremaining: 9.93s\n", + "199:\tlearn: 0.1258122\ttotal: 19.7s\tremaining: 9.84s\n", + "200:\tlearn: 0.1258002\ttotal: 19.8s\tremaining: 9.75s\n", + "201:\tlearn: 0.1257961\ttotal: 19.9s\tremaining: 9.65s\n", + "202:\tlearn: 0.1257937\ttotal: 20s\tremaining: 9.55s\n", + "203:\tlearn: 0.1257763\ttotal: 20.1s\tremaining: 9.44s\n", + "204:\tlearn: 0.1257523\ttotal: 20.1s\tremaining: 9.32s\n", + "205:\tlearn: 0.1257475\ttotal: 20.2s\tremaining: 9.2s\n", + "206:\tlearn: 0.1257458\ttotal: 20.2s\tremaining: 9.09s\n", + "207:\tlearn: 0.1257455\ttotal: 20.3s\tremaining: 8.99s\n", + "208:\tlearn: 0.1257135\ttotal: 20.4s\tremaining: 8.9s\n", + "209:\tlearn: 0.1256926\ttotal: 20.6s\tremaining: 8.81s\n", + "210:\tlearn: 0.1256909\ttotal: 20.7s\tremaining: 8.71s\n", + "211:\tlearn: 0.1256800\ttotal: 20.7s\tremaining: 8.59s\n", + "212:\tlearn: 0.1256625\ttotal: 20.8s\tremaining: 8.5s\n", + "213:\tlearn: 0.1256436\ttotal: 20.9s\tremaining: 8.4s\n", + "214:\tlearn: 0.1256406\ttotal: 21s\tremaining: 8.28s\n", + "215:\tlearn: 0.1256204\ttotal: 21.1s\tremaining: 8.19s\n", + "216:\tlearn: 0.1255997\ttotal: 21.2s\tremaining: 8.1s\n", + "217:\tlearn: 0.1255812\ttotal: 21.3s\tremaining: 8s\n", + "218:\tlearn: 0.1255765\ttotal: 21.3s\tremaining: 7.89s\n", + "219:\tlearn: 0.1255713\ttotal: 21.4s\tremaining: 7.79s\n", + "220:\tlearn: 0.1255375\ttotal: 21.5s\tremaining: 7.7s\n", + "221:\tlearn: 0.1255288\ttotal: 21.7s\tremaining: 7.61s\n", + "222:\tlearn: 0.1255205\ttotal: 21.8s\tremaining: 7.52s\n", + "223:\tlearn: 0.1255185\ttotal: 21.9s\tremaining: 7.42s\n", + "224:\tlearn: 0.1255143\ttotal: 22s\tremaining: 7.32s\n", + "225:\tlearn: 0.1255004\ttotal: 22.1s\tremaining: 7.22s\n", + "226:\tlearn: 0.1254933\ttotal: 22.1s\tremaining: 7.12s\n", + "227:\tlearn: 0.1254645\ttotal: 22.2s\tremaining: 7.03s\n", + "228:\tlearn: 0.1254636\ttotal: 22.4s\tremaining: 6.94s\n", + "229:\tlearn: 0.1254605\ttotal: 22.4s\tremaining: 6.83s\n", + "230:\tlearn: 0.1254581\ttotal: 22.5s\tremaining: 6.73s\n", + "231:\tlearn: 0.1254550\ttotal: 22.7s\tremaining: 6.64s\n", + "232:\tlearn: 0.1254544\ttotal: 22.7s\tremaining: 6.54s\n", + "233:\tlearn: 0.1254488\ttotal: 22.8s\tremaining: 6.44s\n", + "234:\tlearn: 0.1254442\ttotal: 22.9s\tremaining: 6.34s\n", + "235:\tlearn: 0.1254435\ttotal: 23s\tremaining: 6.23s\n", + "236:\tlearn: 0.1254397\ttotal: 23.1s\tremaining: 6.13s\n", + "237:\tlearn: 0.1254301\ttotal: 23.1s\tremaining: 6.03s\n", + "238:\tlearn: 0.1254248\ttotal: 23.3s\tremaining: 5.93s\n", + "239:\tlearn: 0.1254087\ttotal: 23.4s\tremaining: 5.84s\n", + "240:\tlearn: 0.1253944\ttotal: 23.5s\tremaining: 5.75s\n", + "241:\tlearn: 0.1253935\ttotal: 23.6s\tremaining: 5.64s\n", + "242:\tlearn: 0.1253874\ttotal: 23.6s\tremaining: 5.54s\n", + "243:\tlearn: 0.1253855\ttotal: 23.7s\tremaining: 5.43s\n", + "244:\tlearn: 0.1253689\ttotal: 23.8s\tremaining: 5.33s\n", + "245:\tlearn: 0.1253489\ttotal: 23.9s\tremaining: 5.24s\n", + "246:\tlearn: 0.1253464\ttotal: 24s\tremaining: 5.14s\n", + "247:\tlearn: 0.1253455\ttotal: 24.1s\tremaining: 5.05s\n", + "248:\tlearn: 0.1253295\ttotal: 24.2s\tremaining: 4.95s\n", + "249:\tlearn: 0.1253232\ttotal: 24.3s\tremaining: 4.85s\n", + "250:\tlearn: 0.1253223\ttotal: 24.4s\tremaining: 4.76s\n", + "251:\tlearn: 0.1253092\ttotal: 24.5s\tremaining: 4.66s\n", + "252:\tlearn: 0.1252990\ttotal: 24.6s\tremaining: 4.57s\n", + "253:\tlearn: 0.1252964\ttotal: 24.7s\tremaining: 4.47s\n", + "254:\tlearn: 0.1252869\ttotal: 24.8s\tremaining: 4.38s\n", + "255:\tlearn: 0.1252840\ttotal: 24.9s\tremaining: 4.28s\n", + "256:\tlearn: 0.1252823\ttotal: 25s\tremaining: 4.18s\n", + "257:\tlearn: 0.1252797\ttotal: 25s\tremaining: 4.07s\n", + "258:\tlearn: 0.1252786\ttotal: 25.1s\tremaining: 3.97s\n", + "259:\tlearn: 0.1252623\ttotal: 25.2s\tremaining: 3.87s\n", + "260:\tlearn: 0.1252602\ttotal: 25.2s\tremaining: 3.77s\n", + "261:\tlearn: 0.1252592\ttotal: 25.4s\tremaining: 3.68s\n", + "262:\tlearn: 0.1252569\ttotal: 25.5s\tremaining: 3.58s\n", + "263:\tlearn: 0.1252563\ttotal: 25.6s\tremaining: 3.48s\n", + "264:\tlearn: 0.1252472\ttotal: 25.7s\tremaining: 3.39s\n", + "265:\tlearn: 0.1252319\ttotal: 25.7s\tremaining: 3.29s\n", + "266:\tlearn: 0.1252265\ttotal: 25.8s\tremaining: 3.19s\n", + "267:\tlearn: 0.1252258\ttotal: 25.9s\tremaining: 3.09s\n", + "268:\tlearn: 0.1252190\ttotal: 26s\tremaining: 2.99s\n", + "269:\tlearn: 0.1252179\ttotal: 26.1s\tremaining: 2.9s\n", + "270:\tlearn: 0.1252079\ttotal: 26.2s\tremaining: 2.8s\n", + "271:\tlearn: 0.1252020\ttotal: 26.3s\tremaining: 2.71s\n", + "272:\tlearn: 0.1251899\ttotal: 26.4s\tremaining: 2.61s\n", + "273:\tlearn: 0.1251886\ttotal: 26.4s\tremaining: 2.51s\n", + "274:\tlearn: 0.1251832\ttotal: 26.5s\tremaining: 2.41s\n", + "275:\tlearn: 0.1251789\ttotal: 26.6s\tremaining: 2.31s\n", + "276:\tlearn: 0.1251602\ttotal: 26.7s\tremaining: 2.22s\n", + "277:\tlearn: 0.1251411\ttotal: 26.9s\tremaining: 2.13s\n", + "278:\tlearn: 0.1251241\ttotal: 27s\tremaining: 2.03s\n", + "279:\tlearn: 0.1251239\ttotal: 27.1s\tremaining: 1.93s\n", + "280:\tlearn: 0.1251204\ttotal: 27.2s\tremaining: 1.84s\n", + "281:\tlearn: 0.1251188\ttotal: 27.3s\tremaining: 1.74s\n", + "282:\tlearn: 0.1251014\ttotal: 27.4s\tremaining: 1.65s\n", + "283:\tlearn: 0.1250969\ttotal: 27.5s\tremaining: 1.55s\n", + "284:\tlearn: 0.1250955\ttotal: 27.6s\tremaining: 1.45s\n", + "285:\tlearn: 0.1250897\ttotal: 27.7s\tremaining: 1.35s\n", + "286:\tlearn: 0.1250887\ttotal: 27.7s\tremaining: 1.26s\n", + "287:\tlearn: 0.1250849\ttotal: 27.8s\tremaining: 1.16s\n", + "288:\tlearn: 0.1250718\ttotal: 27.9s\tremaining: 1.06s\n", + "289:\tlearn: 0.1250690\ttotal: 28s\tremaining: 965ms\n", + "290:\tlearn: 0.1250642\ttotal: 28.1s\tremaining: 868ms\n", + "291:\tlearn: 0.1250586\ttotal: 28.2s\tremaining: 772ms\n", + "292:\tlearn: 0.1250583\ttotal: 28.3s\tremaining: 676ms\n", + "293:\tlearn: 0.1250577\ttotal: 28.4s\tremaining: 579ms\n", + "294:\tlearn: 0.1250528\ttotal: 28.5s\tremaining: 483ms\n", + "295:\tlearn: 0.1250333\ttotal: 28.6s\tremaining: 386ms\n", + "296:\tlearn: 0.1250242\ttotal: 28.7s\tremaining: 290ms\n", + "297:\tlearn: 0.1250227\ttotal: 28.8s\tremaining: 193ms\n", + "298:\tlearn: 0.1250187\ttotal: 28.9s\tremaining: 96.6ms\n", + "299:\tlearn: 0.1250175\ttotal: 29s\tremaining: 0us\n" + ] + }, + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 78, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "cat.fit(X_train, y_train)" + ] + }, + { + "cell_type": "code", + "execution_count": 79, + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAABLkAAAI7CAYAAAAAk0kkAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjYuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8o6BhiAAAACXBIWXMAAA9hAAAPYQGoP6dpAABfr0lEQVR4nO3deXyM9/7//+ckZCMJQWRBxK622mtXUkErtatqba2WWtqqpVoteuponRb1PapOF8vpcVQdS3XRogS11FJStRSNnRQpISSWvH9/9Jf5GEkQmWR6TR73221ut8z7fV0zr7mua2bM0/t6XzZjjBEAAAAAAABgYR6uLgAAAAAAAADIKUIuAAAAAAAAWB4hFwAAAAAAACyPkAsAAAAAAACWR8gFAAAAAAAAyyPkAgAAAAAAgOURcgEAAAAAAMDyCLkAAAAAAABgeYRcAAAAAAAAsDxCLgD4iyhbtqxsNtttb9OmTXN1mW6hZcuWstlsWrt2ratLQT6zdu1a2Ww2tWzZ0tWl2PXt21c2m01z5sxxdSm3da91nj9/XoMHD1ZERIS8vLz+cts/P1m+fLmaNWumgIAA+/fazZ/Ds2fPVr169VSoUCF7/+HDh3X48GHZbDaVLVs2xzVY5XgHANybAq4uAADgqEmTJqpQoUKmfffdd18eVyMdPnxYkZGRioiI0OHDh/P8+ZE7ypYtqyNHjig+Pt4pPxyR+2w2myTJGOPiSqzlmWee0eeff66yZcuqc+fO8vHxUZUqVVxdVr6zc+dOdenSRWlpaWrVqpVCQ0Nls9kUEhIiSfrqq6/Uv39/+fj4KCoqSsWKFZMkFS5cWJcuXXJl6bmib9++mjt3rmbPnq2+ffu6uhwAcBuEXADwF/P000/zD95cNm/ePF2+fFllypRxdSmAy02aNEkvv/yyQkNDXV2K0127dk1LliyRj4+Pdu3apYCAAFeXlG8tXbpU165d0yuvvKKJEydm6P/8888lSdOnT9eAAQMc+gIDA7V3714VLFgwx3W48/EOACDkAgDkQ4RbwP8JDQ112x/8p06d0vXr1xUeHk7A5WJHjx6VJFWsWDHb/QULFnTa6Dt3Pt4BAMzJBQCWtn37dvXq1UtlypSRt7e3goKCFB0dra+//jrT5ffs2aNx48apSZMmCg8Pl5eXl4oVK6aoqCgtXLgww/J9+/ZVZGSkJOnIkSMZ5gi7ebnbzXEyZ84c2Wy2DCPUbm5PTEzUCy+8oPLly8vb2zvDnDmrV69W586dFRoaKi8vLwUHB6tTp07atGnT3W+w/19Wc3Ld/Dr279+vHj16KDg4WIUKFVL9+vW1bNky+7JbtmxRTEyMSpQoIV9fXzVq1EirV6/O9Plu3l4ffvih6tatq0KFCqlIkSJq3769Nm/enGWtiYmJeuWVV1StWjX5+fnJ399fdevW1eTJk3XlypUMy98859Ply5f1+uuvq2rVqvLz81PZsmXt2/zIkSOSpMjISId9evM2Wbx4sZ5++mlVr15dRYsWlY+PjyIjI9W/f3/t378/03pv3obx8fF68sknFRISIm9vb5UvX15jx45Vampqlq93+/bt6tOnjyIjI+Xj46OgoCDVqlVLI0eOtNd8s5MnT2r48OH21+jv76/69evrn//8p65fv55h+dTUVP3jH/9Q3bp15e/vLy8vL4WEhKh+/foaNWqUEhMTs6wtM9ndPze7fPmyXnnlFVWoUEE+Pj4KCwvTU089pRMnTjgsN378eIf3263vw/TTiK9du6ZPP/1UvXr1UpUqVRQQECBfX19VrlxZw4YN08mTJzOtI6v3b/rzjh8/XmfOnNHgwYNVunRpeXl5qXTp0ho6dKjOnz+f5ev79ddf9eyzz6p8+fLy8fFRYGCgmjdvrk8//fS22/OFF15QRESEvL29VaZMGQ0ZMiTb+0X6cztFRERIyvj5lX6c3/zad+/erR49eig0NFSenp4aP368Q133+j5MTU3VhAkTVKlSJfn4+KhMmTIaPXq0UlJSJEkXLlzQiBEjVK5cOfn4+Khs2bIaP358psfv3cjue+jHH39U9+7dFRYWZv9s7dChg1auXHnb57nbz+T042j27NmSpH79+tn3Q8uWLe37YM2aNZKkBx980N6f/p1xpzm5Ll++rGnTpqlp06YqWrSovL29FRERoQ4dOmj+/PkOy97p+yq736vp82kePnxYa9asUZs2bVS0aFH5+vqqTp06mjdvnsPy6a9l7ty5GbZH+vst3YEDB9S/f39FRkbK29tbhQsXVkREhB5++GH79gQA3MIAAP4SIiIijCQze/bsu1p+2rRpxsPDw0gy999/v+natatp2rSp8fLyMpLMhAkTMqzz1FNPGUmmSpUqJjo62vTo0cM0atTI/jgvvviiw/Iffvih6dKli5FkChUqZPr06eNwS9enT5/b1j579mwjyWGdm9sffvhhExkZaYoWLWpiYmJMt27dTK9evezLvfTSS0aS8fDwMA0aNDDdunUzDRs2NDabzXh6eppPPvnkrrZZuhYtWhhJZs2aNQ7t6a9j6NChplChQqZy5crmscceM40aNTKSjM1mM59//rlZsmSJKViwoKldu7bp0aOHqVWrlpFkChQoYNavX5/h+STZt6/NZjNNmzY1PXv2NNWrV7evt3jx4gzrHTp0yH5clChRwnTp0sXExMQYf39/I8nUqVPHJCYmOqyzZs0aI8k0bNjQ1K9f3xQqVMi0a9fO9OjRw0RFRZn169ebPn36mEKFChlJpkuXLg77dO/evfbH8vT0NH5+fqZevXqmc+fOJiYmxpQrV85+PPzwww8Zak7fhs8//7wJCAgwERERpnv37iYqKsr4+voaSaZjx46Z7pfJkyfbj8VKlSqZ7t27mw4dOpiqVatmenzFxsaaokWLGkmmbNmyJiYmxkRHR9vb2rRpY65evWpf/saNG6Z169ZGkgkICDDt2rUzPXv2NFFRUfbt/NNPP2VaW2Zysn8aNWpkHnjgAePn52fat29vunXrZkJDQ40kExISYn799Vf7OkuWLLFv1/T30c23M2fOGGOMOXbsmJFkAgMDzQMPPGC6detm2rdvb8LCwuw1HjhwIMt9duv2HTdunJFk+vfvb0qVKmVKlixpOnfubNq3b28CAwONJFO/fn2HbZxu4cKFxsfHx/5506lTJ9OqVSv7cdevX78M65w+fdpUrFjRSDJFixY1nTt3Nh07djRFihQx5cuXNzExMdn6jOzTp0+Wn1/px3n6ax8wYIDx9vY2ZcuWtR9377zzjlP2c4sWLUxAQICJiYkxjzzyiH3bPfLII+bcuXOmcuXK9sdt06aNfbsNHDjwrl7nzbL7HvrXv/5lX7527dqmZ8+epnHjxvZjbfz48Zk+T3Y+k9OP3/LlyxtJpkmTJvb9MGnSJPPhhx+aPn36mJIlSxpJJjo62t7/4YcfGmOMiY+PN5JMREREhlqOHj1q7rvvPiPJ+Pn5mYceesg89thjplmzZiYwMDDDOrf7vrqX79X0Y+O1114zNpvN1K1b1zz22GPmgQcesG/HqVOn2pc/c+ZMltujT58+ZsmSJcYYY37++WcTEBBgJJnKlSubzp07m27duplGjRqZwoULm1q1amW6bwAgvyPkAoC/iOyEXCtWrDA2m80UL17cxMbGOvTFxcWZUqVKGUlm7dq1Dn1r1641hw4dyvB4+/bts6+zZcsWh77b/bhIl9OQS5Jp3bq1uXDhQoZ1//WvfxlJpkKFCmbXrl0OfbGxscbf3994eXk5hAJ3cqeQS5J58803TVpamr1v+vTpRpIpVaqUKVq0qJk3b57Dui+88IKRZKKiojI8X/pj+vr6mtWrVzv0TZ482R5MJCQkOPQ1bNjQSDIxMTHm0qVL9vbff//d1KlTx0gyjz/+uMM66T+uJZmaNWuaU6dOZboN0o+3+Pj4LLfTggULHJ7XGGPS0tLMjBkzjCRTrVo1h21kjOM2fPXVV83169ftfT///LM95Ni4caPDesuWLTOSjI+Pj/nss88y1PLLL7+YPXv22O+fOnXKFCtWzNhsNvP++++bGzdu2PvOnj1rWrVqleFHaWxsrP3HfFJSUobn2Lp1qzl79myW2+NWOd0/FSpUMEeOHLH3XblyxR7KPPDAAxmeL329rCQlJZlly5aZ1NRUh/arV6+aMWPGGEmmffv2Gda7U8glyfTt29ekpKTY+44ePWrCw8ONJDN//nyH9eLi4oy3t7fx8fEx//vf/xz6Dh8+bGrUqGEkmblz5zr0de3a1UgyzZo1M+fPn7e3nzt3zr6tsxNyGXPnz6+bj9eXX37Z4ThKl9P93KBBA4fj6vDhw/YgtkaNGqZDhw4mOTnZ3r9161ZToEAB4+Hh4XB83El230NxcXGmQIECxmazZfg8+/rrr+3BznfffefQd6+fyXf6nsjqc9mYrPfjjRs3TL169eyh9u+//+7Qf+XKFfPVV1/dVR33+r2a/llasGBBs3z5coe+9O+4wMBAc/ny5Wxtj379+tm/i251+fLlDDUCAP5EyAUAfxHp/1DO6taiRQv7suk/uhYtWpTpYy1cuNA+SuduzZo1y0gyI0eOdGjPi5CrYMGCmYZvN27csI9C2bZtW6aPnR4SvfTSS7d9fTe7U8jVoEGDDOHNtWvXTFBQkJFkunXrluExz549ayQZLy+vDCNb0vfhCy+8kGk96T/SJk6caG9bv369fWTC6dOnM6yzbds2+0iKY8eO2dtv/nG9bt26LLfB3YRct5M+uu2XX35xaE/fhnXr1s2wDY0xZuDAgUaSeeONNxza77//fiPJvPvuu3f1/KNHjzaSzJAhQzLtP378uClYsKApUaKEvY7098WwYcPu6jluxxn7Z+nSpRnWS0hIMH5+fkZShpFydwq57iQsLMx4eHhkCPjuFHKVKlXKIYRJ99Zbbxnpz5FeN+vRo4eRZB8Jdasff/zRfoykO3r0qPHw8DA2my3DMWWMMT/99FOuhlyVKlVyCGTT5XQ/22w28/PPP2dYb9iwYUaSKVy4cIZw2xhjOnTokGkQeDvZfQ+lj+zt3Llzpv1DhgwxksxDDz1kb8vJZ3JuhFxLly41kkxoaKi5ePFi1i/2Luq41+/V9M/S4cOHZ7pelSpVMv08vtP2aN++vZFkduzYcVevCwDwJyaeB4C/mCZNmqhChQoZ2tMn3T179qx+/PFH+fr6qkOHDpk+Rvp8Vhs3bszQd+nSJX3zzTf66aefdPbsWV29elXSnxM0S8pyrqXcVLt2bZUrVy5D+08//aSTJ0+qfPnyqlu3bqbr3u613qt27do5zIEkSQUKFFBkZKQSExPVvn37DOsUK1ZMQUFBSkxM1Llz5xQSEpJhmT59+mT6fL1799a2bdu0du1avfLKK5JknzOobdu2KlmyZIZ16tatq1q1amnXrl2KjY1Vr169HPqDg4PVrFmzu3q9t3Pw4EGtWLFCBw8e1MWLF3Xjxg1JUkJCgqQ/j5f77rsvw3qPPPJIhm0oSVWrVpUkh3mnTp8+rZ07d8rDw0NPPfXUXdX11VdfSZJ69OiRaX94eLgqVqyoPXv26MCBA6pUqZLq1KkjT09PffLJJ6pUqZJ9PqF7kdP9U6RIEcXExGRYLzg4WG3bttXixYu1du1aNW7cONu17dq1S6tXr1Z8fLySk5OVlpYmSbp+/brS0tJ08OBB1a5d+64fr3Xr1vLz88vQntm+TEtL0zfffCMp631Tr149FS5cWD/99JNSUlLk4+OjdevWKS0tTXXr1s30eLr//vtVs2ZNxcXF3XXd2dGxY0d5enpmaM/pfi5TpoyqV6+eYb30ydXr1q2r4ODgLPuzmkftVvfyHrp5XrLMPPXUU/rnP/+p9evX68aNG/L09HTZZ3JWVqxYIUl6/PHHVbhw4Xt+nJx+r0rKcr2qVatq3759Gebau5MGDRro66+/1qBBgzRhwgS1aNFCPj4+2XoMAMiPCLkA4C/m6aefzvJHhyTFx8fLGKMrV67I29v7to915swZh/vLly9Xv379dO7cuSzXSUpKyla9zpDVZMK//fabJOnQoUOZBiY3u/W15kRWV19M/xGVVb+/v78SExPtE0rfKn0S/6zajx8/bm9L/0GU1TqSVL58ee3atSvTH09ZbdO7dePGDQ0ZMkSzZs2SMSbL5bI6XrLaRulXuLt5G6VfVS00NFSBgYF3VV/6sXE3Qd6ZM2dUqVIllS9fXlOnTtXIkSM1ZMgQDRkyRBEREWrUqJEeeeQRdevWTV5eXnf1/M7YP1kd05kdD3cjOTlZTz75pJYsWXLb5bL7Hs/Ovjx37pz98UuXLn3Hxz537pzCw8Ptr/V22zMyMjLXQq6s3i853c85+SyRlOVnya3u5T10p9dWvnx5ew3nzp1TcHCwyz6Ts5I+kX5Or7yYk+/VdNl5n9yNkSNHasOGDVq1apXatm2rggULqlatWmrevLkee+wx1a9fP1uPBwD5BSEXAFhM+qiMwoULq0uXLne93okTJ9SjRw9duXJFo0aNUq9evVS2bFkVLlxYHh4e+u677xQdHX3bQCOnNWfF19f3tuuFhIQoOjr6to9RvHjxeysuEx4et7/48J3675Uzt31W2/Ruvffee/rggw8UEhKiKVOmqHHjxipZsqR9JMHjjz+u//73v1nWnFvbKF36sdG1a1cVKlTotssWK1bM/vfQoUPVvXt3ffHFF9qwYYM2bNigBQsWaMGCBRo3bpzWr19/z6O7nC27x8OYMWO0ZMkSValSRW+99Zbq16+v4sWL24O7xo0ba9OmTdl+3Ozsy5vf61mNXLzZnQKFvJLT90tWXPVZkltc9Zmc2+71e/Vmzt6Xfn5+WrlypbZu3aoVK1Zo48aN2rhxo7Zt26YpU6boueee04wZM5z6nADgDgi5AMBi0kdH2Gw2ffLJJ3f9D+vly5frypUr6tSpk95+++0M/QcOHLjnmtJ/RF+8eDHT/swuW3830l9rsWLFsrzcu5XEx8fr/vvvz9B++PBhSVKpUqXsbeHh4ZL+b8RSZtL70pd1poULF0qSZs2alelpdTk5Xm6VPgLi1KlTunDhwl2NRCldurQOHDig0aNHq169etl6vpIlS2rAgAEaMGCAJGnfvn3q37+/Nm3apJdffllz586942PkdP+k7/PMZHY83I30ffbZZ5+pZs2aGfqduc+yUrx4cfn6+urKlSt655137jroSN9Gd7Nd8pKr34d3617eQ+Hh4Tp06JB+++23TE+pTH9dPj4+CgoKkvTX+0xOf9379u3L0ePc6/dqXqhfv7591Nb169e1dOlS9e7dW++//766du2qBx980MUVAsBfy1/nExwAcFfCwsJUs2ZNXbx40T4fyd1ITEyUJEVERGToM8Zo/vz5ma6XHmBdv349y8dO/3G3d+/eTB87fY6e7EofibJnzx798ssv9/QYfyX//ve/b9uePufLzX+vWLHCPv/VzX766Sf7HDzNmzfPdi132q+3O15++eUX7dy5M9vPmZWQkBDVqlVLaWlp+uSTT+5qnXbt2kn6v2AnJ6pUqaLRo0dL0l2/rpzun/Pnz2v58uUZ2s+cOWN/X998PEhSwYIFJd3bPvv222919uzZrF+Qk3h6euqhhx6SlL1907x5c9lsNu3YsSPTwGLXrl25dqri7eT2+9BZ7uU9lP7asgqr0h+nWbNmKlDgz/8X/6t9Jrdt21aS9N///lfJycn3/Dj3+r2aE3fz3XqrAgUKqGvXrvZRdM78HAYAd0HIBQAW9Oabb0qS+vXrl+kPZWOMtmzZou+++87elj5J9KJFi+yTzEt/zr30+uuvZzmZbokSJeTl5aXTp0/bf0TfKioqStKfYc2ePXvs7deuXdPo0aO1devWbL7CPxUsWFDjxo2TMUadOnXShg0bMixz48YNff/999q8efM9PUdemjlzpn2y53RTp07Vjz/+KH9/f4cJo5s2baqGDRvqypUrevbZZ3X58mV739mzZ/Xss89Kkh577LG7mvvoVumjhLL6oZp+vMyYMcPhFLRTp06pd+/e2fphdjfGjRsnSXr11Vf1v//9L0P/nj17HELUkSNHqkiRIpoyZYreffdd+wUUbhYfH69PP/3Ufv/777/X119/rWvXrjksZ4zRl19+KSnzgCgzztg/L730ksO8W6mpqRo8eLCSk5PVoEEDNWnSxGH5u91n/+///T+H9v3792vgwIF39bqcYdy4cfLy8tLIkSM1d+7cTE9X3r17txYvXmy/X6ZMGXXq1ElpaWkaNGiQw7xhf/zxh5577rlcOZX6TnL7fehM2X0PPf/88ypQoICWLl3q8D6RpO+++06zZs2SJI0YMcLe/lf7TI6JiVHt2rV18uRJdevWLcN8kykpKXf9nyz38r2aE3d6P7///vuZXgjm9OnT2rZtm6S7/7wCgPyE0xUBwII6dOig9957Ty+99JJiYmJUoUIFVa5cWYGBgTpz5ox27dql33//XaNHj1abNm3s69StW1fbt29XpUqV1KJFCxUqVEhbtmzRyZMnNXr06ExPYyxYsKBiYmK0aNEi3X///WratKn9SmsfffSRpD+vCPnoo49q2bJlqlevnpo2bSpfX1/t2LFDSUlJev755/Xee+/d02sdMmSIjh49qn/84x9q1qyZqlWrpgoVKsjX19d+RbHz589r5syZeuCBB+5xi+aNZ599Vq1atVKzZs0UHh6u3bt36+eff7Zf8e/WKzLOnz9frVq10rJlyxQZGanmzZvr2rVrWrNmjZKSklSnTh3985//vKdaunTpojVr1uiJJ55QmzZtVLRoUUl/hkeVK1fWK6+8ohUrVujDDz/UmjVrVKdOHSUlJSk2NlblypVTp06d7jjBeXZ06tRJEydO1NixY9W1a1dVqVJFtWrV0pUrV3Tw4EHt2bNHs2fPtgc5pUqV0rJly9SlSxeNGDFCkydPVvXq1RUaGqoLFy5o7969OnTokBo2bKgnnnhCkhQXF6cXX3xRAQEBqlOnjsLCwnTlyhXt2LFDR44cUWBgoN544427rjkn+6dRo0ZKS0tT5cqV1apVK/n5+WnDhg06efKkgoODNW/evAzrdOnSRe+8846ioqLUqlUr++Tkb7/9tooVK6Zx48apa9eueu2117Rw4UJVq1ZNv//+u9avX69mzZopLCwsT654V6dOHX366afq27ev+vbtq7Fjx+q+++5TiRIllJiYqJ9//lnHjx9Xjx491LlzZ/t6M2bM0K5du7R27VpFRkaqZcuWMsZozZo1KlasmGJiYvTFF1/kev23ys33oTNl9z1Uo0YNzZgxQ4MGDdKTTz6pqVOnqkqVKjpy5Ig2btwoY4zGjx9v/w5J91f6TPbw8NCSJUsUHR2tb775RmXKlFHTpk1VrFgxnThxQrt27VKRIkXu6lTXe/lezYmOHTtqwoQJmj59unbv3q3SpUvLw8NDMTExiomJ0b/+9S8NHjxYkZGRql69ugICAnTmzBmtX79eV65cUatWrTI9lRwA8j0DAPhLiIiIMJLM7Nmz73qdn3/+2TzzzDOmYsWKxsfHx/j5+Zly5cqZ6OhoM336dHPixAmH5S9evGheeeUVU7lyZePj42OCg4NNx44dzbZt28yaNWuMJNOiRYsMz3Pu3Dnz7LPPmjJlypiCBQsaSebWr5CUlBQzduxYU65cOVOwYEETHBxsevbsaQ4ePGhmz55tJJk+ffo4rJNVe2Z++OEH06tXLxMREWG8vb2Nv7+/qVSpkunYsaP56KOPTGJi4l1vtxYtWhhJZs2aNQ7tffr0ue0+yGq9dOn7MD4+3qH95u01c+ZMc//99xtfX18TEBBg2rZta3744Ycsaz137pwZM2aMqVq1qn0f165d27z11lvm8uXLGZa/3X682Y0bN8ykSZNMtWrVjI+Pj73Gm19bXFyciYmJMaGhocbHx8dUrFjRjBo1yiQlJWW5re60De+0zzdt2mR69uxpwsPDTcGCBU1QUJCpVauWGTVqlDly5EiG5RMSEsxrr71m6tSpY/z9/Y2Xl5cpVaqUady4sRk3bpyJi4uzL3vw4EEzfvx407p1a1OmTBnj4+NjihYtamrWrGlefvllc+zYsdtus8zkZP9cunTJjBw50kRGRhovLy9TsmRJ07dvX3P06NFMn+vKlStm1KhRpkKFCsbLy8u+z24+3tatW2dat25tihcvbvz8/Ez16tXNxIkTTWpqaraP+3HjxhlJZty4cZnWc6djLT4+3rz44oumevXqplChQsbHx8dERESYli1bmrfeesscPHgwwzpnz541Q4cONaVKlbLvy4EDB5ozZ87c8djKqgZJJiIiItP+u31MZ78P7/Q+uNO2v53svoc2b95sunbtakJCQkyBAgVMsWLFzMMPP2y+++672z5Pdj+Tc/L5eqf9ePHiRfP222+b+vXrG39/f+Pt7W0iIiJMTEyMWbBgQbbqyO73alaf+3fzfEuWLDFNmjQx/v7+xmazOezzL7/80gwaNMjUrl3blChRwv5+aNmypZk7d665evVqps8HAPmdzRgXjP0GACAfsdlskpx79UQAAAAAjpiTCwAAAAAAAJZHyAUAAAAAAADLI+QCAAAAAACA5XF1RQAAchlzcQEAAAC5j5FcAAAAAAAAsDxCLgAAAAAAAFjeX+50xbS0NJ08eVL+/v72S64DAAAAAAAgfzLG6OLFiwoLC5OHR9bjtf5yIdfJkydVunRpV5cBAAAAAACAv5Bjx46pVKlSWfb/5UIuf39/SX8WHhAQ4OJqAAAAAAAA4EpJSUkqXbq0PTPKyl8u5Eo/RTEgIICQCwAAAAAAAJJ0x2mtmHgeAAAAAAAAlpetkGvmzJmqWbOmfZRVo0aN9M0339j7W7ZsKZvN5nAbOHCg04sGAAAAAAAAbpat0xVLlSqlt956SxUrVpQxRnPnztWjjz6qn376SdWqVZMkDRgwQG+88YZ9HT8/P+dWDAAAAAAAANwiWyFXhw4dHO5PnDhRM2fO1ObNm+0hl5+fn0JCQpxXIQAAAAAAAHAH9zwn140bN7RgwQIlJyerUaNG9vb//Oc/Kl68uKpXr64xY8bo8uXLTikUAAAAAAAAyEq2r674888/q1GjRkpJSVHhwoW1ZMkS3XfffZKkxx9/XBEREQoLC1NcXJxGjx6t/fv3a/HixVk+XmpqqlJTU+33k5KS7uFlAAAAAAAAID+zGWNMdla4evWqjh49qgsXLmjRokX66KOPFBsbaw+6bvb999+rdevWOnjwoMqXL5/p440fP14TJkzI0H7hwgUFBARkpzQAAAAAAAC4maSkJAUGBt4xK8p2yHWrqKgolS9fXrNmzcrQl5ycrMKFC2vFihWKjo7OdP3MRnKVLl2akAsAAAAAAAB3HXJl+3TFW6WlpTmEVDfbuXOnJCk0NDTL9b29veXt7Z3TMgAAAAAAAJCPZSvkGjNmjNq1a6cyZcro4sWLmj9/vtauXatvv/1Whw4d0vz589W+fXsVK1ZMcXFxevHFF9W8eXPVrFkzt+oHAAAAAAAAshdy/f777+rdu7dOnTqlwMBA1axZU99++60eeughHTt2TKtWrdK0adOUnJys0qVLq0uXLho7dmxu1Q4AAAAAAABIcsKcXM52t+dZAgAAAAAAwP3dbVbkkYc1AQAAAAAAALmCkAsAAAAAAACWR8gFAAAAAAAAy8vWxPO4d77j3nF1CchjVyaMcHUJAAAAAADkG4zkAgAAAAAAgOURcgEAAAAAAMDyCLkAAAAAAABgeYRcAAAAAAAAsDxCLgAAAAAAAFgeIRcAAAAAAAAsj5ALAAAAAAAAlkfIBQAAAAAAAMsj5AIAAAAAAIDlEXIBAAAAAADA8gi5AAAAAAAAYHmEXAAAAAAAALA8Qi4AAAAAAABYHiEXAAAAAAAALK+AqwsA4Hy+495xdQnIQ1cmjHB1CQAAAADgcozkAgAAAAAAgOURcgEAAAAAAMDyCLkAAAAAAABgeYRcAAAAAAAAsDxCLgAAAAAAAFgeIRcAAAAAAAAsj5ALAAAAAAAAlkfIBQAAAAAAAMsj5AIAAAAAAIDlEXIBAAAAAADA8gi5AAAAAAAAYHmEXAAAAAAAALA8Qi4AAAAAAABYHiEXAAAAAAAALI+QCwAAAAAAAJZHyAUAAAAAAADLI+QCAAAAAACA5RFyAQAAAAAAwPIIuQAAAAAAAGB5hFwAAAAAAACwPEIuAAAAAAAAWB4hFwAAAAAAACyPkAsAAAAAAACWR8gFAAAAAAAAy8tWyDVz5kzVrFlTAQEBCggIUKNGjfTNN9/Y+1NSUjR48GAVK1ZMhQsXVpcuXZSQkOD0ogEAAAAAAICbZSvkKlWqlN566y1t375d27ZtU6tWrfToo4/ql19+kSS9+OKLWr58uT7//HPFxsbq5MmT6ty5c64UDgAAAAAAAKQrkJ2FO3To4HB/4sSJmjlzpjZv3qxSpUrp448/1vz589WqVStJ0uzZs1W1alVt3rxZDzzwgPOqBgD8JfiOe8fVJSAPXZkwwtUlAAAAAFm65zm5bty4oQULFig5OVmNGjXS9u3bde3aNUVFRdmXqVKlisqUKaNNmzZl+TipqalKSkpyuAEAAAAAAADZke2Q6+eff1bhwoXl7e2tgQMHasmSJbrvvvt0+vRpeXl5qUiRIg7LlyxZUqdPn87y8SZNmqTAwED7rXTp0tl+EQAAAAAAAMjfsh1yVa5cWTt37tSWLVs0aNAg9enTR3v27LnnAsaMGaMLFy7Yb8eOHbvnxwIAAAAAAED+lK05uSTJy8tLFSpUkCTVrVtXW7du1XvvvacePXro6tWrOn/+vMNoroSEBIWEhGT5eN7e3vL29s5+5QAAAAAAAMD/757n5EqXlpam1NRU1a1bVwULFtTq1avtffv379fRo0fVqFGjnD4NAAAAAAAAkKVsjeQaM2aM2rVrpzJlyujixYuaP3++1q5dq2+//VaBgYF66qmnNHz4cAUFBSkgIEBDhw5Vo0aNuLIiAAAAAAAAclW2Qq7ff/9dvXv31qlTpxQYGKiaNWvq22+/1UMPPSRJmjp1qjw8PNSlSxelpqYqOjpa77//fq4UDgAAAAAAAKTLVsj18ccf37bfx8dHM2bM0IwZM3JUFAAAAAAAAJAdOZ6TCwAAAAAAAHA1Qi4AAAAAAABYHiEXAAAAAAAALI+QCwAAAAAAAJZHyAUAAAAAAADLI+QCAAAAAACA5RFyAQAAAAAAwPIIuQAAAAAAAGB5hFwAAAAAAACwPEIuAAAAAAAAWB4hFwAAAAAAACyPkAsAAAAAAACWR8gFAAAAAAAAyyPkAgAAAAAAgOURcgEAAAAAAMDyCLkAAAAAAABgeYRcAAAAAAAAsDxCLgAAAAAAAFgeIRcAAAAAAAAsj5ALAAAAAAAAlkfIBQAAAAAAAMsj5AIAAAAAAIDlEXIBAAAAAADA8gi5AAAAAAAAYHmEXAAAAAAAALA8Qi4AAAAAAABYHiEXAAAAAAAALI+QCwAAAAAAAJZHyAUAAAAAAADLI+QCAAAAAACA5RFyAQAAAAAAwPIIuQAAAAAAAGB5hFwAAAAAAACwPEIuAAAAAAAAWB4hFwAAAAAAACyPkAsAAAAAAACWR8gFAAAAAAAAyyPkAgAAAAAAgOURcgEAAAAAAMDyCLkAAAAAAABgeYRcAAAAAAAAsDxCLgAAAAAAAFgeIRcAAAAAAAAsL1sh16RJk1S/fn35+/srODhYHTt21P79+x2WadmypWw2m8Nt4MCBTi0aAAAAAAAAuFm2Qq7Y2FgNHjxYmzdv1sqVK3Xt2jW1adNGycnJDssNGDBAp06dst8mT57s1KIBAAAAAACAmxXIzsIrVqxwuD9nzhwFBwdr+/btat68ub3dz89PISEhzqkQAAAAAAAAuIMczcl14cIFSVJQUJBD+3/+8x8VL15c1atX15gxY3T58uUsHyM1NVVJSUkONwAAAAAAACA7sjWS62ZpaWl64YUX1KRJE1WvXt3e/vjjjysiIkJhYWGKi4vT6NGjtX//fi1evDjTx5k0aZImTJhwr2UAAAAAAAAA9x5yDR48WLt379aGDRsc2p955hn73zVq1FBoaKhat26tQ4cOqXz58hkeZ8yYMRo+fLj9flJSkkqXLn2vZQEAAAAAACAfuqeQa8iQIfryyy+1bt06lSpV6rbLNmzYUJJ08ODBTEMub29veXt730sZAAAAAAAAgKRshlzGGA0dOlRLlizR2rVrFRkZecd1du7cKUkKDQ29pwIBAAAAAACAO8lWyDV48GDNnz9fy5Ytk7+/v06fPi1JCgwMlK+vrw4dOqT58+erffv2KlasmOLi4vTiiy+qefPmqlmzZq68AAAAAAAAACBbIdfMmTMlSS1btnRonz17tvr27SsvLy+tWrVK06ZNU3JyskqXLq0uXbpo7NixTisYAAAAAAAAuFW2T1e8ndKlSys2NjZHBQEAAAAAAADZ5eHqAgAAAAAAAICcIuQCAAAAAACA5RFyAQAAAAAAwPIIuQAAAAAAAGB5hFwAAAAAAACwPEIuAAAAAAAAWB4hFwAAAAAAACyPkAsAAAAAAACWR8gFAAAAAAAAyyPkAgAAAAAAgOURcgEAAAAAAMDyCLkAAAAAAABgeYRcAAAAAAAAsDxCLgAAAAAAAFgeIRcAAAAAAAAsj5ALAAAAAAAAlkfIBQAAAAAAAMsj5AIAAAAAAIDlEXIBAAAAAADA8gi5AAAAAAAAYHmEXAAAAAAAALA8Qi4AAAAAAABYHiEXAAAAAAAALI+QCwAAAAAAAJZHyAUAAAAAAADLI+QCAAAAAACA5RFyAQAAAAAAwPIIuQAAAAAAAGB5hFwAAAAAAACwPEIuAAAAAAAAWB4hFwAAAAAAACyPkAsAAAAAAACWR8gFAAAAAAAAyyPkAgAAAAAAgOURcgEAAAAAAMDyCLkAAAAAAABgeYRcAAAAAAAAsDxCLgAAAAAAAFgeIRcAAAAAAAAsj5ALAAAAAAAAlkfIBQAAAAAAAMsj5AIAAAAAAIDlEXIBAAAAAADA8rIVck2aNEn169eXv7+/goOD1bFjR+3fv99hmZSUFA0ePFjFihVT4cKF1aVLFyUkJDi1aAAAAAAAAOBm2Qq5YmNjNXjwYG3evFkrV67UtWvX1KZNGyUnJ9uXefHFF7V8+XJ9/vnnio2N1cmTJ9W5c2enFw4AAAAAAACkK5CdhVesWOFwf86cOQoODtb27dvVvHlzXbhwQR9//LHmz5+vVq1aSZJmz56tqlWravPmzXrggQecVzkAAAAAAADw/8vRnFwXLlyQJAUFBUmStm/frmvXrikqKsq+TJUqVVSmTBlt2rQp08dITU1VUlKSww0AAAAAAADIjnsOudLS0vTCCy+oSZMmql69uiTp9OnT8vLyUpEiRRyWLVmypE6fPp3p40yaNEmBgYH2W+nSpe+1JAAAAAAAAORT9xxyDR48WLt379aCBQtyVMCYMWN04cIF++3YsWM5ejwAAAAAAADkP9makyvdkCFD9OWXX2rdunUqVaqUvT0kJERXr17V+fPnHUZzJSQkKCQkJNPH8vb2lre3972UAQAAAAAAAEjK5kguY4yGDBmiJUuW6Pvvv1dkZKRDf926dVWwYEGtXr3a3rZ//34dPXpUjRo1ck7FAAAAAAAAwC2yNZJr8ODBmj9/vpYtWyZ/f3/7PFuBgYHy9fVVYGCgnnrqKQ0fPlxBQUEKCAjQ0KFD1ahRI66sCAAAAAAAgFyTrZBr5syZkqSWLVs6tM+ePVt9+/aVJE2dOlUeHh7q0qWLUlNTFR0drffff98pxQIAAAAAAACZyVbIZYy54zI+Pj6aMWOGZsyYcc9FAQAAAAAAANlxz1dXBAAAAAAAAP4qCLkAAAAAAABgeYRcAAAAAAAAsDxCLgAAAAAAAFgeIRcAAAAAAAAsj5ALAAAAAAAAlkfIBQAAAAAAAMsj5AIAAAAAAIDlEXIBAAAAAADA8gi5AAAAAAAAYHmEXAAAAAAAALA8Qi4AAAAAAABYHiEXAAAAAAAALI+QCwAAAAAAAJZHyAUAAAAAAADLI+QCAAAAAACA5RFyAQAAAAAAwPIIuQAAAAAAAGB5hFwAAAAAAACwPEIuAAAAAAAAWB4hFwAAAAAAACyPkAsAAAAAAACWR8gFAAAAAAAAyyPkAgAAAAAAgOURcgEAAAAAAMDyCLkAAAAAAABgeYRcAAAAAAAAsDxCLgAAAAAAAFgeIRcAAAAAAAAsj5ALAAAAAAAAlkfIBQAAAAAAAMsj5AIAAAAAAIDlEXIBAAAAAADA8gi5AAAAAAAAYHmEXAAAAAAAALA8Qi4AAAAAAABYHiEXAAAAAAAALI+QCwAAAAAAAJZHyAUAAAAAAADLI+QCAAAAAACA5RFyAQAAAAAAwPIIuQAAAAAAAGB52Q651q1bpw4dOigsLEw2m01Lly516O/bt69sNpvDrW3bts6qFwAAAAAAAMgg2yFXcnKyatWqpRkzZmS5TNu2bXXq1Cn77b///W+OigQAAAAAAABup0B2V2jXrp3atWt322W8vb0VEhJyz0UBAAAAAAAA2ZErc3KtXbtWwcHBqly5sgYNGqRz587lxtMAAAAAAAAAku5hJNedtG3bVp07d1ZkZKQOHTqkV155Re3atdOmTZvk6emZYfnU1FSlpqba7yclJTm7JAAAAAAAALg5p4dcjz32mP3vGjVqqGbNmipfvrzWrl2r1q1bZ1h+0qRJmjBhgrPLAAAAAAAAQD6SK6cr3qxcuXIqXry4Dh48mGn/mDFjdOHCBfvt2LFjuV0SAAAAAAAA3IzTR3Ld6vjx4zp37pxCQ0Mz7ff29pa3t3dulwEAAAAAAAA3lu2Q69KlSw6jsuLj47Vz504FBQUpKChIEyZMUJcuXRQSEqJDhw5p1KhRqlChgqKjo51aOAAAAAAAAJAu2yHXtm3b9OCDD9rvDx8+XJLUp08fzZw5U3FxcZo7d67Onz+vsLAwtWnTRn/7298YrQUAAAAAAIBck+2Qq2XLljLGZNn/7bff5qggAAAAAAAAILtyfeJ5AAAAAAAAILcRcgEAAAAAAMDyCLkAAAAAAABgeYRcAAAAAAAAsDxCLgAAAAAAAFgeIRcAAAAAAAAsj5ALAAAAAAAAlkfIBQAAAAAAAMsj5AIAAAAAAIDlEXIBAAAAAADA8gi5AAAAAAAAYHmEXAAAAAAAALA8Qi4AAAAAAABYHiEXAAAAAAAALI+QCwAAAAAAAJZHyAUAAAAAAADLI+QCAAAAAACA5RFyAQAAAAAAwPIIuQAAAAAAAGB5hFwAAAAAAACwPEIuAAAAAAAAWB4hFwAAAAAAACyPkAsAAAAAAACWR8gFAAAAAAAAyyPkAgAAAAAAgOURcgEAAAAAAMDyCLkAAAAAAABgeYRcAAAAAAAAsDxCLgAAAAAAAFgeIRcAAAAAAAAsj5ALAAAAAAAAlkfIBQAAAAAAAMsj5AIAAAAAAIDlEXIBAAAAAADA8gi5AAAAAAAAYHmEXAAAAAAAALA8Qi4AAAAAAABYHiEXAAAAAAAALI+QCwAAAAAAAJZHyAUAAAAAAADLI+QCAAAAAACA5RFyAQAAAAAAwPIIuQAAAAAAAGB52Q651q1bpw4dOigsLEw2m01Lly516DfG6PXXX1doaKh8fX0VFRWlAwcOOKteAAAAAAAAIINsh1zJycmqVauWZsyYkWn/5MmTNX36dH3wwQfasmWLChUqpOjoaKWkpOS4WAAAAAAAACAzBbK7Qrt27dSuXbtM+4wxmjZtmsaOHatHH31UkjRv3jyVLFlSS5cu1WOPPZazagEAAAAAAIBMOHVOrvj4eJ0+fVpRUVH2tsDAQDVs2FCbNm1y5lMBAAAAAAAAdtkeyXU7p0+fliSVLFnSob1kyZL2vlulpqYqNTXVfj8pKcmZJQEAAAAAACAfcPnVFSdNmqTAwED7rXTp0q4uCQAAAAAAABbj1JArJCREkpSQkODQnpCQYO+71ZgxY3ThwgX77dixY84sCQAAAAAAAPmAU0OuyMhIhYSEaPXq1fa2pKQkbdmyRY0aNcp0HW9vbwUEBDjcAAAAAAAAgOzI9pxcly5d0sGDB+334+PjtXPnTgUFBalMmTJ64YUX9Oabb6pixYqKjIzUa6+9prCwMHXs2NGZdQMAAAAAAAB22Q65tm3bpgcffNB+f/jw4ZKkPn36aM6cORo1apSSk5P1zDPP6Pz582ratKlWrFghHx8f51UNAAAAAAAA3CTbIVfLli1ljMmy32az6Y033tAbb7yRo8IAAAAAAACAu+XyqysCAAAAAAAAOUXIBQAAAAAAAMsj5AIAAAAAAIDlEXIBAAAAAADA8gi5AAAAAAAAYHmEXAAAAAAAALA8Qi4AAAAAAABYHiEXAAAAAAAALI+QCwAAAAAAAJZHyAUAAAAAAADLI+QCAAAAAACA5RFyAQAAAAAAwPIIuQAAAAAAAGB5hFwAAAAAAACwPEIuAAAAAAAAWB4hFwAAAAAAACyPkAsAAAAAAACWR8gFAAAAAAAAyyPkAgAAAAAAgOURcgEAAAAAAMDyCLkAAAAAAABgeYRcAAAAAAAAsDxCLgAAAAAAAFgeIRcAAAAAAAAsj5ALAAAAAAAAlkfIBQAAAAAAAMsj5AIAAAAAAIDlEXIBAAAAAADA8gi5AAAAAAAAYHmEXAAAAAAAALA8Qi4AAAAAAABYHiEXAAAAAAAALI+QCwAAAAAAAJZHyAUAAAAAAADLI+QCAAAAAACA5RFyAQAAAAAAwPIIuQAAAAAAAGB5hFwAAAAAAACwPEIuAAAAAAAAWB4hFwAAAAAAACyPkAsAAAAAAACWR8gFAAAAAAAAyyPkAgAAAAAAgOU5PeQaP368bDabw61KlSrOfhoAAAAAAADArkBuPGi1atW0atWq/3uSArnyNAAAAAAAAICkXAq5ChQooJCQkNx4aAAAAAAAACCDXJmT68CBAwoLC1O5cuXUq1cvHT16NMtlU1NTlZSU5HADAAAAAAAAssPpIVfDhg01Z84crVixQjNnzlR8fLyaNWumixcvZrr8pEmTFBgYaL+VLl3a2SUBAAAAAADAzTk95GrXrp26deummjVrKjo6Wl9//bXOnz+vhQsXZrr8mDFjdOHCBfvt2LFjzi4JAAAAAAAAbi7XZ4QvUqSIKlWqpIMHD2ba7+3tLW9v79wuAwAAAAAAAG4sV+bkutmlS5d06NAhhYaG5vZTAQAAAAAAIJ9yesg1YsQIxcbG6vDhw9q4caM6deokT09P9ezZ09lPBQAAAAAAAEjKhdMVjx8/rp49e+rcuXMqUaKEmjZtqs2bN6tEiRLOfioAAAAAAABAUi6EXAsWLHD2QwIAAAAAAAC3letzcgEAAAAAAAC5jZALAAAAAAAAlkfIBQAAAAAAAMsj5AIAAAAAAIDlEXIBAAAAAADA8gi5AAAAAAAAYHmEXAAAAAAAALA8Qi4AAAAAAABYHiEXAAAAAAAALI+QCwAAAAAAAJZHyAUAAAAAAADLI+QCAAAAAACA5RFyAQAAAAAAwPIIuQAAAAAAAGB5hFwAAAAAAACwPEIuAAAAAAAAWB4hFwAAAAAAACyPkAsAAAAAAACWR8gFAAAAAAAAyyPkAgAAAAAAgOURcgEAAAAAAMDyCLkAAAAAAABgeYRcAAAAAAAAsDxCLgAAAAAAAFgeIRcAAAAAAAAsj5ALAAAAAAAAlkfIBQAAAAAAAMsj5AIAAAAAAIDlEXIBAAAAAADA8gq4ugAAAIA78R33jqtLQB67MmGEq0sAAAAWw0guAAAAAAAAWB4hFwAAAAAAACyPkAsAAAAAAACWR8gFAAAAAAAAyyPkAgAAAAAAgOURcgEAAAAAAMDyCri6AAAAAOCvxHfcO64uAXnoyoQRri4BAOAkjOQCAAAAAACA5RFyAQAAAAAAwPIIuQAAAAAAAGB5hFwAAAAAAACwPEIuAAAAAAAAWB4hFwAAAAAAACwv10KuGTNmqGzZsvLx8VHDhg31448/5tZTAQAAAAAAIJ8rkBsP+tlnn2n48OH64IMP1LBhQ02bNk3R0dHav3+/goODc+MpAQAAAMBSfMe94+oSkMeuTBjh6hIAt5YrI7mmTJmiAQMGqF+/frrvvvv0wQcfyM/PT5988kluPB0AAAAAAADyOaeP5Lp69aq2b9+uMWPG2Ns8PDwUFRWlTZs2ZVg+NTVVqamp9vsXLlyQJCUlJTm7NJcyqSmuLgF5zJXHMMdb/sKxhrzCsYa8xPGGvMKxhrzkbr9zgbyS/t4xxtx2OZu50xLZdPLkSYWHh2vjxo1q1KiRvX3UqFGKjY3Vli1bHJYfP368JkyY4MwSAAAAAAAA4GaOHTumUqVKZdmfK3NyZceYMWM0fPhw+/20tDQlJiaqWLFistlsLqwMOZWUlKTSpUvr2LFjCggIcHU5cHMcb8grHGvISxxvyCsca8grHGvISxxv7sMYo4sXLyosLOy2yzk95CpevLg8PT2VkJDg0J6QkKCQkJAMy3t7e8vb29uhrUiRIs4uCy4UEBDABwryDMcb8grHGvISxxvyCsca8grHGvISx5t7CAwMvOMyTp943svLS3Xr1tXq1avtbWlpaVq9erXD6YsAAAAAAACAs+TK6YrDhw9Xnz59VK9ePTVo0EDTpk1TcnKy+vXrlxtPBwAAAAAAgHwuV0KuHj166MyZM3r99dd1+vRp3X///VqxYoVKliyZG0+Hvyhvb2+NGzcuw+moQG7geENe4VhDXuJ4Q17hWENe4VhDXuJ4y3+cfnVFAAAAAAAAIK85fU4uAAAAAAAAIK8RcgEAAAAAAMDyCLkAAAAAAABgeYRcAAAAAAAAsDxCLuTYlStX9MUXX+jixYsZ+pKSkvTFF18oNTXVBZUBAAAAAID8gpALOfavf/1L7733nvz9/TP0BQQEaPr06froo49cUBkAAAAyc+rUKX366af6+uuvdfXqVYe+5ORkvfHGGy6qDAByZuvWrdqyZUuG9i1btmjbtm0uqAh5yWaMMa4uAtbWoEEDvfbaa+rQoUOm/V9++aXeeOMN/fjjj3lcGdzZyZMnFRYWdttlFi9erM6dO+dRRXBn165d05EjRxQZGSlPT09dvnxZS5cuVVpamjp06KDAwEBXlwgAd23r1q1q06aN0tLSdO3aNYWHh2vp0qWqVq2aJCkhIUFhYWG6ceOGiyuFu9m2bZsWLlyoo0ePZghXFy9e7KKq4G4aNGigUaNGqWvXrg7tixcv1ttvv51pAAb3QciFHCtatKh27dqlMmXKZNp/9OhR1apVS3/88UceVwZ3VqlSJa1duzbToOvChQsaPHiwFi1apJSUFBdUB3dy7NgxNWvWTMeOHVPlypX11VdfqW3btjpx4oSMMQoKCtKPP/6o0NBQV5cKN3GncJ4fgsiphx56SKVLl9ZHH32k5ORkjR49WgsXLtTKlStVu3ZtQi7kigULFqh3796Kjo7Wd999pzZt2ujXX39VQkKCOnXqpNmzZ7u6RLiJwoULKy4uTuXKlXNoj4+PV82aNTOdZgfug9MVkWPXr1/XmTNnsuw/c+aMrl+/nocVIT8ICwtTixYtdOLECYf2b7/9VtWqVdPu3bv5Xxo4xcSJE3X//fdr586datOmjaKjo1WlShX98ccf+uOPP1S3bl29/fbbri4TbiQwMNB+++qrr+Th4eHQBuTU9u3b9fLLL8vDw0P+/v56//33NWLECLVu3Vpbt251dXlwU3//+981depULV++XF5eXnrvvfe0b98+de/ePcv/LAfuhbe3txISEjK0nzp1SgUKFHBBRchLjORCjj3wwAPq1KmTRo8enWn/pEmTtGzZMm3evDmPK4M7u3Llih5++GEdPXpUa9euVVBQkIYPH66PP/5YI0eO1IQJE1SwYEFXlwk3EBkZqc8//1z16tXTxYsXFRgYqDVr1qhFixaSpI0bN6p37946ePCgiyuFO/L399euXbsy/G80kBNBQUFau3atatas6dD+zjvvaOLEifrkk0/UtWtXRnLBqQoVKqRffvlFZcuWVbFixbR27VrVqFFDe/fuVatWrXTq1ClXlwg30bNnT506dUrLli2z/+fQ+fPn1bFjRwUHB2vhwoUurhC5iRgTOda/f38NHz5c1apV0yOPPOLQt3z5ck2cOFFTpkxxUXVwV76+vvrqq6/UoUMHNW/eXB4eHipQoIA2bNighg0buro8uJFTp07ZT4v19/eXn5+fwsPD7f0RERE6efKkq8oDgGyrXr26Nm7cmCHkGjFihNLS0tSzZ08XVQZ3VrRoUftpYuHh4dq9e7dq1Kih8+fP6/Llyy6uDu7knXfeUfPmzRUREaHatWtLknbu3KmSJUvq3//+t4urQ24j5EKOPfPMM1q3bp1iYmJUpUoVVa5cWZK0b98+/frrr+revbueeeYZF1cJd+Tr66svv/xSMTExWrdunXbs2KH77rvP1WXBzfj7++vKlSv2+9WqVZOvr6/9/sWLFxUcHOyK0gDgnvTu3VuxsbEaOHBghr5Ro0bJGKMPPvjABZXBnTVv3lwrV65UjRo11K1bNz3//PP6/vvvtXLlSrVu3drV5cGNhIeHKy4uTv/5z3+0a9cu+fr6ql+/furZsydneuQDnK4Ip1m4cKHmz5+vAwcOyBijSpUq6fHHH1f37t1dXRrcXGpqqh599FHt379fa9asUdmyZV1dEtxIy5Yt9cQTT+jpp5/OtH/69On65ptv9M033+RxZXBX06dPt/89evRojRw5UsWLF7e3DRs2zBVlAUCOJCYmKiUlRWFhYUpLS9PkyZO1ceNGVaxYUWPHjlXRokVdXSLcRHJysgoVKuTqMuAihFzIEzdu3JCnp6ery4AbqV27tmw2m/1+SkqK9u3bp6CgIIfJS3fs2OGK8uBGvv/+e6WkpKh9+/aZ9r/77ruqXr26oqOj87gyuKvIyMgs+2w2m3777bc8rAYAAGspXLiwunfvrv79+6tp06auLgd5jJALOfb2229nOem8JO3Zs0e9e/fWtm3b8rAquLsJEybc1XLjxo3L5UoAAABwJ56enjp16hSn+CPXLV26VHPmzNHXX3+tsmXLqn///urdu7d9jlW4N0Iu5Ji3t7cmTJigl19+OUPfP/7xD73++uuKiYnRZ5995oLqACBn2rRpo88//9x+dR4gL6X/M+3mkasAYEUeHh46ffo0IRfyzJkzZ/Tvf/9bc+bM0d69exUdHa3+/fsrJiZGBQowPbm78nB1AbC+zz77TOPGjdPf//53e9tvv/2mpk2bavLkyZo9ezYBF/LcsWPH1L9/f/Xv31/jx493dTmwsFWrVik1NdXVZSCfmTdvnmrUqCFfX1/5+vqqZs2aXBEKgOUR2CMvlShRQsOHD1dcXJymTJmiVatWqWvXrgoLC9Prr7/OVT3dFPElcqxjx476/PPP1b17d6WlpalYsWIaNWqUWrZsqZ9//lkhISGuLhFu6OaJmTNz4sQJzZ07V6+//jrHIABLmTJlil577TUNGTJETZo0kSRt2LBBAwcO1NmzZ/Xiiy+6uEK4u7S0NB0/flzSn6eYhYeHu7giuIvb/Zvsxo0beVgJ8oOEhATNnTtXc+bM0ZEjR9S1a1c99dRTOn78uN5++21t3rxZ3333navLhJNxuiKc5ssvv1TXrl0lSR988IH69u3r2oLg1m43MbMkXb16VadPn+YfTMgxTq9AXouMjNSECRPUu3dvh/a5c+dq/Pjxio+Pd1FlyC8SEhIUFhYmY4xCQkJ08uRJV5cEN+Dh4aH//e9/CgoKyrS/RYsWeVwR3NXixYs1e/Zsffvtt7rvvvv09NNP64knnlCRIkXsyxw6dEhVq1bV1atXXVcocgUjueA0jzzyiBYvXqwuXbpw5Sfkujv9yNu5c6fq1q2bR9XAndlsNk6vQJ46deqUGjdunKG9cePGOnXqlAsqgrvJKmRIl/5/4GlpaXlRDvIJm82mJk2a8J9GyHX9+vXTY489ph9++EH169fPdJmwsDC9+uqreVwZ8gIhF3Lsiy++cLjft29fTZw4UZcuXVLLli3t7TExMXlcGfIzQgk4izFGrVu3vuMEpTt27MijiuDuKlSooIULF+qVV15xaP/ss89UsWJFF1UFd3L+/HlNmzYtywtqnD9/XsOHD8/jquDuOIEIeeXUqVPy8/O77TK+vr5chd1NcboicszD487XL7DZbJw2hjy1a9cu1alTh+MOOTZhwoS7Wo5/KMFZ/ve//6lHjx6Kioqyz8n1ww8/aPXq1Vq4cKE6derk4gphdXc6DTv9dEW+Q+FMsbGxatKkCVe1Q67z9PTUqVOnGDWYTxFyAXBLhFwArGz79u2aOnWq9u7dK0mqWrWqXnrpJdWuXdvFlcEdEHLB1VJSUjLMhRQQEOCiauBumE81fyNGB2BJSUlJt+2/ePFiHlUCAM5Xt25dffrpp64uA25s06ZNCgoKkre3t/z9/RUaGuowKTPgbJcvX9aoUaO0cOFCnTt3LkM/oSqcialL8i9CLgCWVKRIkdt+eRlj+HKDU9SpU+eulmNOLjjLnUJ8RjvAGW4+7TX9+7JEiRJq3LixoqOjXVUW3NjIkSO1Zs0azZw5U08++aRmzJihEydOaNasWXrrrbdcXR7cTEhISJZ9BKrujZALgCWtWbPG1SUgn9i5c6deeuklFS5c2NWlIJ8oWrRopu3p4T3/OEdO/fHHH5Kk69evKzU1VYmJiTpx4oT27Nmj1atX67nnnnNxhXBHy5cv17x589SyZUv169dPzZo1U4UKFRQREaH//Oc/6tWrl6tLhBtZtGjRHa8kC/fEnFwAANwG8zogr1WoUEG///67Xn75ZfvE8zdr0aKFC6pCfvLxxx9rwIABatmypYKCgrRo0SJXlwQ3ULhwYe3Zs0dlypRRqVKltHjxYjVo0EDx8fGqUaOGLl265OoS4SaYeD5/YyQXAADAX8jevXv1//7f/9PEiRP1008/afLkyYqMjHR1WchHevXqZb8Cnq+vr4urgbsoV66c4uPjVaZMGVWpUkULFy5UgwYNtHz5cuaDg1Mxjid/YyQXnOr8+fNatGiRDh06pJEjRyooKEg7duxQyZIlFR4e7ury4EY8PT3vajlO60FOMZILrpKYmKg33nhDH3/8sZ555hm99tpr/BAEYFlTp06Vp6enhg0bplWrVqlDhw4yxujatWuaMmWKnn/+eVeXCDcRGxurJk2a2MN65C+EXHCauLg4RUVFKTAwUIcPH9b+/ftVrlw5jR07VkePHtW8efNcXSLciIeHhyIiItSnTx/Vrl07y+UeffTRPKwK7oiQC6528OBBjR49WrGxsRo7dqxeeOEFV5cEi+vcufNdLbd48eJcrgT52eHDh7Vjxw5VqFDBHuB7enryH+NwqpSUFF29etWhjQu4uDdCLjhNVFSU6tSpo8mTJ8vf31+7du1SuXLltHHjRj3++OM6fPiwq0uEG9m2bZs+/vhjLViwQJGRkerfv7969eqV5YTNwL3y8PDQm2++eceJ54cNG5ZHFcHd1a5dO8PVYY0xOnjwoC5fvswIVeSYh4eHunfvfsdTEWfPnp1HFSE/+/333xUaGipjjEJCQnTy5ElXlwSLu3z5skaNGqWFCxfq3LlzGfr5HnVvhFxwmsDAQO3YsUPly5d3CLmOHDmiypUrKyUlxdUlwg2lpKRo0aJFmj17tjZv3qwOHTroqaee0kMPPeTq0uAmypYtmyFwuJXNZtNvv/2WRxXB3Y0fP/62x9y4cePysBq4I0aoIi/d6Qp3xhglJSURPMBpBg8erDVr1uhvf/ubnnzySc2YMUMnTpzQrFmz9NZbb3ElTzdHyAWnCQ4O1rfffqvatWs7hFwrV65U//79dezYMVeXCDcXHx+vp556SrGxsTpz5gyXDYZTJCUl3XFY+9atW1W/fv08qggAcoaQC3nJw8ND06ZNU2BgYKb958+f1/Dhwwm54DRlypTRvHnz1LJlSwUEBNhPi/33v/+t//73v/r6669dXSJyETOxwWliYmL0xhtvaOHChZL+HNlw9OhRjR49Wl26dHFxdXBnx48f15w5czRnzhxdvnxZI0eO5Fx7OE10dLRWrlyZ6emK169f14QJE/T2229nmO8BuFejRo3SuHHjVKhQIVeXAgBO8dhjj2UZqiYkJGj48OF5XBHcWWJiosqVKyfpz/m3EhMTJUlNmzbVoEGDXFka8oCHqwuA+3j33Xd16dIlBQcH68qVK2rRooUqVKggf39/TZw40dXlwc1cvXpVn332mdq0aaOKFStqx44dmjZtmo4dO6a33nqLq6nAaS5evKioqCglJSU5tO/evVv169fXJ598oqVLl7qmOLild999V8nJya4uAwAASypXrpzi4+MlSVWqVLEPwli+fDlXKc4HOF0RTrdhwwbFxcXp0qVLqlOnjqKiolxdEtxQsWLF5O/vrz59+ujJJ5/M8n8HGdGFnDpz5oxatWolPz8/rVy5Uv7+/po8ebLGjRunzp07a8aMGVzwAE7FqWTIbR4eHpo7d26Wp4+li4mJyaOK4M48PDy0ZMkSBQUFydvbW/7+/goNDbWHDQkJCQoLC+N0RTjN1KlT5enpqWHDhmnVqlXq0KGDjDG6du2apkyZoueff97VJSIXEXIBsCQPj/8biJrZBM3GGNlsNv7BBKc4c+aMoqKiVLBgQXl7e+vAgQN6//331bVrV1eXBjdEyIXcdvN3aFb4DoWz3Hq8pf+7rUSJEmrcuLGio6P13HPPcbwh1xw5ckTbt29XhQoVVLNmTVeXg1xGyAWnmT59+m37hw0blkeVID+IjY29q+VatGiRy5Ugvzh79qyioqK0b98+7dy5U1WqVHF1SXBTHh4eevPNNzOdB07i+xSAtVy4cEHSn/NYpqamKjExUSdOnNCePXu0evVqffPNN5JEyAWnmTdvnnr06CFvb29XlwIXIOSC03h4eMjPz0/BwcG69bCy2Wz67bffXFQZ3NGt8yNlhdMV4UyJiYn2U7BXr17NaYrIFWXLls10hKrE9ymc4/r163ecu/LIkSOKiIjIo4qQn3388ccaMGCAWrZsqaCgIC1atMjVJcHiPD09derUKUZE51PMzAynefXVV/Xee+8pKipKf/vb31SyZElXlwQ3VqRIkSx/BN6M/xVETnXu3NnhfkBAgNatW6cGDRqoRo0a9vbFixfndWlwU4cPH3Z1CXBzXbp00f/+978sg66PPvpIL730kn0EDpCbevXqZT8WfX19XVwN3AHjePI3Qi44zd/+9jcNHDhQr776qipXrqyRI0dqxIgRDBNFrlizZo39b2OM2rdvr48++kjh4eEurAru6NaJmQMDAxUZGemiapCfXL16VfHx8SpfvjxXjIVTbd++XV26dNGiRYtUsGBBe/vp06f11FNPaf369XrrrbdcWCHyEx8fH/Xp08fVZcDNLFy4MMszOnr37p3H1SAvcboicsWOHTs0YsQIHThwQBMnTuSDBLnO399fu3btUrly5VxdCgDkyOXLlzV06FDNnTtXkvTrr7+qXLlyGjp0qMLDw/Xyyy+7uEJY3YEDB9SqVSvdf//9Wrx4sQoWLKjPPvtMzz33nKpVq6bZs2erfPnyri4TAO6Jh4eHSpUqJU9Pzwx9nPbv/gi54DRxcXEZ2pYtW6Z//OMfqlixorZv3+6CqpBfEHIBcBfPP/+8fvjhB02bNk1t27ZVXFycypUrp2XLlmn8+PH66aefXF0i3MDBgwfVqlUrVa9eXQEBAVq2bJn+9re/6aWXXrqr6QAA4K+KqxTnb4x9h9Pcf//9stls9nOgb/57586dLqwMAADrWLp0qT777DM98MADDmFDtWrVdOjQIRdWBndSoUIFrV27Vi1bttTvv/+uTZs2qXbt2q4uCwCAHCHkgtPEx8e7ugTkc/zPMwB3cObMmUz/9zk5OZnPOThVuXLltG7dOj344IN6+eWX9cUXXzCXKgDLi4iIyPRUReQPhFxwGi4zjbx06xXvUlJSNHDgQBUqVMihnSveAbCaevXq6auvvtLQoUMl/V+A/9FHH6lRo0auLA1uYvjw4Q73a9euraVLl6pVq1Zq2LChvX3KlCl5XRoA3LPvv/9eQ4YM0a5duzJMOn/hwgU1btxYH3zwgZo1a+aiCpEXCLngNF988cVt+2NiYvKoEuQHt17x7oknnnBRJQDgXH//+9/Vrl077dmzR9evX9d7772nPXv2aOPGjYqNjXV1eXADmc3r1qJFC4c+Rg0CsJpp06ZpwIABmV5VMTAwUM8++6ymTJlCyOXmmHgeTuPh4WH/++b5uNLv37hxwxVlAQBgOb/99psmTZqkXbt26dKlS6pTp45Gjx6tGjVquLo0AAD+kiIiIrRixQpVrVo10/59+/apTZs2Onr0aB5XhrzESC44TVpamv1vrnQHAED2pKWl6R//+Ie++OILXb16Va1atVJsbKx8fX1dXRrc3NmzZyVJxYsXd3ElAHDvEhISVLBgwSz7CxQooDNnzuRhRXAFjzsvAgAAgNw2ceJEvfLKKypcuLDCw8M1ffp0DR482NVlwU2dP39egwcPVvHixVWyZEmVLFlSxYsX15AhQ3T+/HlXlwcA2RYeHq7du3dn2R8XF6fQ0NA8rAiuwOmKyBWM5AIAIHsqVqyoESNG6Nlnn5UkrVq1Sg8//LCuXLniMCUAkFOJiYlq1KiRTpw4oV69etlP7dmzZ4/mz5+v0qVLa+PGjSpatKiLKwWAuzd06FCtXbtWW7dulY+Pj0PflStX1KBBAz344IOaPn26iypEXiDkgtMkJSXZ/y5VqpQ2bNigsmXL2tsymwAQAAD8ydvbWwcPHlTp0qXtbT4+Pjp48KBKlSrlwsrgbl544QWtXr1aq1atUsmSJR36Tp8+rTZt2qh169aaOnWqiyoEgOxLSEhQnTp15OnpqSFDhqhy5cqS/pyLa8aMGbpx44Z27NiR4XMP7oWQC07j4eFhvxKPMSbD30w8DwBA1jw9PXX69GmVKFHC3ubv76+4uDhFRka6sDK4m7Jly2rWrFmKjo7OtH/FihUaOHCgDh8+nLeFAUAOHTlyRIMGDdK3335rvxCazWZTdHS0ZsyYwfdpPkDIBae502XN0y9NDQAAMvLw8FC7du3k7e1tb1u+fLlatWqlQoUK2dsWL17sivLgRry9vXXo0KEsRwgeP35cFSpUUEpKSh5XBgDO8ccff+jgwYMyxqhixYqcfp2PcHVFOA0hFgAA965Pnz4Z2p544gkXVAJ3V7x4cR0+fDjLkCs+Pl5BQUF5XBUAOE/RokVVv359V5cBF2AkF5zu8uXLOnr0qK5everQXrNmTRdVBAAAgHT9+/fXoUOHtHLlSnl5eTn0paamKjo6WuXKldMnn3ziogoBALg3hFxwmjNnzqhfv3765ptvMu1nTi4AAADXO378uOrVqydvb28NHjxYVapUkTFGe/fu1fvvv6/U1FRt27bN4SIIAABYAacrwmleeOEFnT9/Xlu2bFHLli21ZMkSJSQk6M0339S7777r6vIAAACgP6+CvWnTJj333HMaM2aMw+TMDz30kP75z38ScAEALImRXHCa0NBQLVu2TA0aNFBAQIC2bdumSpUq6YsvvtDkyZO1YcMGV5cIAACAm/zxxx86cOCAJKlChQrMxQUAsDRGcsFpkpOTFRwcLOnPif7OnDmjSpUqqUaNGtqxY4eLqwMAAMCtihYtqgYNGri6DAAAnMLD1QXAfVSuXFn79++XJNWqVUuzZs3SiRMn9MEHHyg0NNTF1QEAAAAAAHfG6Ypwmk8//VTXr19X3759tX37drVt21aJiYny8vLSnDlz1KNHD1eXCAAAAAAA3BQhF3LN5cuXtW/fPpUpU0bFixd3dTkAAAAAAMCNEXIhV924cUOenp6uLgMAAAAAALg55uSC0/z222/q2bOnBg0apD/++EMxMTHy9vZW5cqVFRcX5+ryAAAAAACAG2MkF5ymTZs2+v333+Xv769Lly6pZMmSGjp0qD766CNduHBB33//vatLBAAAAAAAboqQC04TGBio9evXKyIiQkFBQdq6davq1Kmj/fv3q2HDhjp//ryrSwQAAAAAAG6K0xXhNBcvXlRoaKgCAwPl5+enIkWKSJKKFCmiixcvurY4AAAAAADg1gq4ugC4lxUrVigwMFBpaWlavXq1du/ezQguAAAAAACQ6zhdEU7j4ZH1wECbzaYbN27kYTUAAAAAACA/IeQCAAAAAACA5TEnFwAAAAAAACyPkAtOFRsbqw4dOqhChQqqUKGCYmJitH79eleXBQAAAAAA3BwhF5zm008/VVRUlPz8/DRs2DANGzZMvr6+at26tebPn+/q8gAAAAAAgBtjTi44TdWqVfXMM8/oxRdfdGifMmWKPvzwQ+3du9dFlQEAAAAAAHdHyAWn8fb21i+//KIKFSo4tB88eFDVq1dXSkqKiyoDAAAAAADujtMV4TSlS5fW6tWrM7SvWrVKpUuXdkFFAAAAAAAgvyjg6gLgPl566SUNGzZMO3fuVOPGjSVJP/zwg+bMmaP33nvPxdUBAAAAAAB3xumKcKolS5bo3Xfftc+/VbVqVY0cOVKPPvqoiysDAAAAAADujJALAAAAAAAAlsecXAAAAAAAALA85uSC0xQtWlQ2my3L/sTExDysBgAAAAAA5CeEXHCaadOmSZKMMRo0aJDeeOMNBQcHu7YoAAAAAACQLzAnF3KFv7+/du3apXLlyrm6FAAAAAAAkA8wJxcAAAAAAAAsj5ALueZ283MBAAAAAAA4E3NywWk6d+5s/zslJUUDBw5UoUKF7G2LFy92RVkAAAAAACAfIOSC0wQGBtr/fuKJJ1xYCQAAAAAAyG+YeB4AAAAAAACWx5xccKrr169r1apVmjVrli5evChJOnnypC5duuTiygAAAAAAgDtjJBec5siRI2rbtq2OHj2q1NRU/frrrypXrpyef/55paam6oMPPnB1iQAAAAAAwE0xkgtO8/zzz6tevXr6448/5Ovra2/v1KmTVq9e7cLKAAAAAACAu2PieTjN+vXrtXHjRnl5eTm0ly1bVidOnHBRVQAAAAAAID9gJBecJi0tTTdu3MjQfvz4cfn7+7ugIgAAAAAAkF8QcsFp2rRpo2nTptnv22w2Xbp0SePGjVP79u1dVxgAAAAAAHB7TDwPpzl+/Liio6NljNGBAwdUr149HThwQMWLF9e6desUHBzs6hIBAAAAAICbIuSCU12/fl0LFixQXFycLl26pDp16qhXr14OE9EDAAAAAAA4GyEXAAAAAAAALI+rK8Jp5s2bd9v+3r1751ElAAAAAAAgv2EkF5zGw8NDhQoVUsGCBXXrYWWz2ZSYmOiiygAAAAAAgLvj6opwmkcffVR+fn7q3bu3NmzYoD/++MN+I+ACAAAAAAC5iZALTrNkyRLt3LlTJUqUUIcOHdSkSRPNnTtXV65ccXVpAAAAAADAzXG6InKFMUYLFizQoEGD9Morr2jUqFGuLgkAAAAAALgxJp6H0/3www+aNWuWvv76a3Xu3FkdO3Z0dUkAAAAAAMDNMZILTjNt2jR9+OGH8vLy0oABA9SrVy8FBga6uiwAAAAAAJAPEHLBaTw8PFSqVCnVrVtXNpstQ//ixYtdUBUAAAAAAMgPOF0RTtO7d+9Mwy0AAAAAAIDcxkguAAAAAAAAWJ6HqwsAAAAAAAAAcorTFeE0derUuW3/jh078qgSAAAAAACQ3xBywWl+/vln+fn56emnn1ZAQICrywEAAAAAAPkIc3LBafbv36+RI0dq8+bNGjdunAYOHChPT09XlwUAAAAAAPIBQi443Zo1azRixAhdvnxZkydPVocOHVxdEgAAAAAAcHOEXMg18+bN06uvvqqKFSvq3XffVe3atV1dEgAAAAAAcFOEXHCa4cOHZ2hLTk7W/PnzlZKSomvXrrmgKgAAAAAAkB8w8Tyc5qeffsq0vV69enlcCQAAAAAAyG8YyQUAAAAAAADL83B1AQAAAAAAAEBOcboinGrbtm1auHChjh49qqtXrzr0LV682EVVAQAAAAAAd8dILjjNggUL1LhxY+3du1dLlizRtWvX9Msvv+j7779XYGCgq8sDAAAAAABujJALTvP3v/9dU6dO1fLly+Xl5aX33ntP+/btU/fu3VWmTBlXlwcAAAAAANwYIRec5tChQ3r44YclSV5eXkpOTpbNZtOLL76of/3rXy6uDgAAAAAAuDNCLjhN0aJFdfHiRUlSeHi4du/eLUk6f/68Ll++7MrSAAAAAACAm2PieThN8+bNtXLlStWoUUPdunXT888/r++//14rV65U69atXV0eAAAAAABwYzZjjHF1EXAPiYmJSklJUVhYmNLS0jR58mRt3LhRFStW1NixY1W0aFFXlwgAAAAAANwUIRcAAAAAAAAsj9MVkWNJSUl3tVxAQEAuVwIAAAAAAPIrRnIhxzw8PGSz2bLsN8bIZrPpxo0beVgVAAAAAADITxjJhRxbs2aN/W9jjNq3b6+PPvpI4eHhLqwKAAAAAADkJ4zkgtP5+/tr165dKleunKtLAQAAAAAA+YSHqwsAAAAAAAAAcoqQCwAAAAAAAJZHyIVccbuJ6AEAAAAAAJyNieeRY507d3a4n5KSooEDB6pQoUIO7YsXL87LsgAAAAAAQD5CyIUcCwwMdLj/xBNPuKgSAAAAAACQX3F1RQAAAAAAAFgec3IBAAAAAADA8gi5AAAAAAAAYHmEXAAAAAAAALA8Qi4AAAAAAABYHiEXAAAAAAAALI+QCwAAAAAAAJZHyAUAAAAAAADLI+QCAAAAAACA5RFyAQAAAAAAwPL+P0t2usBtEwNFAAAAAElFTkSuQmCC", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "feature_importances = pd.DataFrame({\n", + " \"Feature\": X.columns,\n", + " \"Importance\": cat.feature_importances_\n", + "})\n", + "\n", + "sorted_importances = feature_importances.sort_values(by='Importance', ascending=False)\n", + "fig, ax = plt.subplots(figsize=(15, 5))\n", + "ax.bar(x=sorted_importances['Feature'], height=sorted_importances['Importance'], color='#087E8B')\n", + "plt.title('Feature importances obtained from coefficients', size=16)\n", + "plt.xticks(rotation='vertical')\n", + "plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": 81, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[INFO] R2 is: 0.1468541310631024\n", + "[INFO] MAE is: 0.12247235689525296\n" + ] + } + ], + "source": [ + "from sklearn.metrics import r2_score, mean_absolute_error\n", + "from sklearn.metrics import precision_score\n", + "\n", + "y_pred = cat.predict(X_test)\n", + "precision = r2_score(y_test, y_pred)\n", + "score = mean_absolute_error(y_test, y_pred)\n", + "\n", + "print(\"[INFO] R2 is: \", precision)\n", + "print(\"[INFO] MAE is: \", score)" + ] + }, + { + "cell_type": "code", + "execution_count": 83, + "metadata": {}, + "outputs": [], + "source": [ + "cat.save_model('bad_model.pickle')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "recsys", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.15 (default, Nov 24 2022, 09:04:07) \n[Clang 14.0.6 ]" + }, + "orig_nbformat": 4, + "vscode": { + "interpreter": { + "hash": "ece8b2452d70fcebba9784c14e274780c494a7a60f6f43c99bb5383ad948cc6f" + } + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/codes/kpgz.xls b/codes/kpgz.xls new file mode 100755 index 0000000..7876ea6 Binary files /dev/null and b/codes/kpgz.xls differ diff --git a/codes/okpd.xls b/codes/okpd.xls new file mode 100755 index 0000000..02ed9bf Binary files /dev/null and b/codes/okpd.xls differ diff --git a/codes/okpd.xlsx b/codes/okpd.xlsx new file mode 100755 index 0000000..0bdc6dc Binary files /dev/null and b/codes/okpd.xlsx differ diff --git "a/data/TenderHack \320\234\320\276\321\201\320\272\320\262\320\260 \320\234\320\265\321\202\320\276\320\264\320\270\321\207\320\272\320\260.pdf" "b/data/TenderHack \320\234\320\276\321\201\320\272\320\262\320\260 \320\234\320\265\321\202\320\276\320\264\320\270\321\207\320\272\320\260.pdf" new file mode 100755 index 0000000..158b19c Binary files /dev/null and "b/data/TenderHack \320\234\320\276\321\201\320\272\320\262\320\260 \320\234\320\265\321\202\320\276\320\264\320\270\321\207\320\272\320\260.pdf" differ diff --git "a/data/TenderHack_\320\234\320\276\321\201\320\272\320\262\320\260_train_data.xls" "b/data/TenderHack_\320\234\320\276\321\201\320\272\320\262\320\260_train_data.xls" new file mode 100755 index 0000000..0c78619 Binary files /dev/null and "b/data/TenderHack_\320\234\320\276\321\201\320\272\320\262\320\260_train_data.xls" differ diff --git "a/data/TenderHack_\320\234\320\276\321\201\320\272\320\262\320\260_train_data.xlsx" "b/data/TenderHack_\320\234\320\276\321\201\320\272\320\262\320\260_train_data.xlsx" new file mode 100755 index 0000000..2278510 Binary files /dev/null and "b/data/TenderHack_\320\234\320\276\321\201\320\272\320\262\320\260_train_data.xlsx" differ diff --git a/functions/fit_eval_funcs.py b/functions/fit_eval_funcs.py new file mode 100755 index 0000000..d372b50 --- /dev/null +++ b/functions/fit_eval_funcs.py @@ -0,0 +1,66 @@ +from typing import List +from catboost import cv +from catboost import Pool +from catboost import CatBoostRegressor +from sklearn.metrics import r2_score, mean_absolute_error +import pandas as pd + + +def train_and_validate_catboost_cv(features: pd.DataFrame, target: pd.Series): + """Train and return the result""" + cat_features = features.select_dtypes('object').columns.to_list() + params = { + 'loss_function': 'MAE', + 'iterations': 1000, + 'custom_loss': 'MAE', + 'random_seed': 42, + 'learning_rate': 0.1 + } + + result = cv( + params=params, + pool=Pool(features, label=target, cat_features=cat_features), + fold_count=5, + shuffle=True, + partition_random_seed=0, + plot=True, + verbose=1 + ) + return result + +def eval_model(model: CatBoostRegressor, X_val: pd.DataFrame, y_val: pd.Series) -> dict: + """Evaluate the model and return the R2 and MAE scores""" + predictions = model.predict(X_val) + r2 = r2_score(y_val, predictions) + mae = mean_absolute_error(y_val, predictions) + return {'R2': r2, 'MAE': mae} + +def train_and_validate_catboost(X_train: pd.DataFrame, X_val: pd.DataFrame, + y_train: pd.DataFrame, y_val: pd.DataFrame, + loss_function: str = 'MAE', custom_metric: str = 'MAE', + iterations: int = 300, lr: float = 0.1, + verbose: int = 1, show_score: bool = True, + use_text_features: bool = False, text_features: List[str] = ['Наименование КС'], + use_gpu: bool=True): + """Fit model on train data and return the model and the score for validation data""" + # if (not use_text_features) and text_features: + # raise AttributeError("either pass in text features or turn off use_text_features argument") + cat_features = X_train.select_dtypes('object').columns.to_list() + + model = CatBoostRegressor( + iterations=iterations, + learning_rate=lr, + loss_function=loss_function, + custom_metric=custom_metric, + task_type="GPU" if use_gpu else "CPU", + devices='0:1' + ) + model.fit( + X_train, y_train, + cat_features=cat_features, + text_features=text_features if use_text_features else None, + verbose=verbose) + scores = eval_model(model, X_val, y_val) + if show_score: + print(pd.DataFrame(scores, index=['Score'])) + return model, scores \ No newline at end of file diff --git a/functions/preprocessing.py b/functions/preprocessing.py new file mode 100755 index 0000000..4a68d8b --- /dev/null +++ b/functions/preprocessing.py @@ -0,0 +1,96 @@ +from typing import List +from dataclasses import dataclass +from sklearn.model_selection import train_test_split +from functions.word_preprocessing import words2vectors, code2words +from functions.utils import * +import pandas as pd + +from pandarallel import pandarallel +pandarallel.initialize() + + +@dataclass +class TrainValData: + """Store the train and valid data in specified dataclasses""" + train: pd.DataFrame + valid: pd.DataFrame + + +def unite_cols(data: pd.DataFrame, col1: str, col2: str) -> pd.Series: + """Unite col1 and col2 columns into 'code' and drop the rest""" + data['code'] = data[col1].combine_first(data[col2]) + data.drop([col1, col2], axis=1, inplace=True) + return data + +def date2features(data: pd.DataFrame, time_col: str = 'Дата'): + """Append datetime features to dataframe""" + data['time'] = pd.to_datetime(data[time_col]) + data['hour'] = data['time'].dt.hour.astype(object) + data['minute'] = data['time'].dt.minute.astype(object) + data['day'] = data['time'].dt.day.astype(object) + data['day_of_week'] = data['time'].dt.day_of_week.astype(object) + data['month'] = data['time'].dt.month.astype(object) + data['quarter'] = data['time'].dt.quarter.astype(object) + data['year'] = data['time'].dt.year.astype(object) + data.drop(time_col, axis=1, inplace=True) + data.drop('time', axis=1, inplace=True) + return data + +def preprocess_data(data: pd.DataFrame, extract_datetime_features: bool, vectorize_features: bool) -> pd.DataFrame: + """Unite classifier columns in one and append datetime features""" + data = unite_cols(data, 'ОКПД 2', 'КПГЗ') + if extract_datetime_features: + data = date2features(data) + if vectorize_features: + print('[INFO] Loading classifier database...') + code_base = load_classifier_database() + print('[INFO] Starting code to words process...') + code_names = code2words(data['code'], code_base) + print('[INFO] Transform words to vectors...') + code_vector = words2vectors(code_names) + ks_names_vector = words2vectors(data['Наименование КС']) + print('[INFO] Unite vectors...') + vector = code_vector + ks_names_vector + data = pd.concat([data.reset_index(drop=True), vector], axis=1) + data.drop(['code', 'Наименование КС'], axis=1, inplace=True) + return data + +def get_train_val_data_for_catboost(data: pd.DataFrame, + test_size=0.2, + use_date_features: bool = False, + vectorize_features: bool = False, + status_columns: List[str] = ['Завершена']): + """ + Return preprocessed X_train, X_val, y_train, y_val and scaler for inverse transform + + Steps: + 1. Filter out only specified status columns + 2. Calculate target drawdown + 3. Apply preprocessing to data + 4. Form feature and target data + 5. Perform train / val splitting + 6. Return feature and target data + """ + # Filter out specified status columns + data = data[data['Статус'].isin(status_columns)].reset_index(drop=True) + data['Процент падения'] = data.apply(lambda x: apply_price_drawdown(x, 'НМЦК', 'Итоговая цена'), axis=1) + + # Data preprocessing + data = preprocess_data( + data, + extract_datetime_features=use_date_features, + vectorize_features=vectorize_features) + + print('[INFO] X y split...') + # Split on features and target variables + X = data.drop(['id', 'Статус', 'Итоговая цена', 'Участники', 'Ставки', 'Процент падения'], axis=1) + y = data[['Участники', 'Процент падения']] + + # Split the data + X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=test_size, random_state=42) + features = TrainValData(train=X_train, valid=X_val) + drawdown_target = TrainValData(train=y_train['Процент падения'], valid=y_val['Процент падения']) + num_competitors_target = TrainValData(train=y_train['Участники'], valid=y_val['Участники']) + + print('[INFO] Done...') + return features, drawdown_target, num_competitors_target diff --git a/functions/utils.py b/functions/utils.py new file mode 100755 index 0000000..3aae716 --- /dev/null +++ b/functions/utils.py @@ -0,0 +1,42 @@ +import pandas as pd + + +def apply_price_drawdown(x: pd.DataFrame, initial_price: str, final_price: str): + """Return percent change between Initial and Final prices for method `apply` in pandas""" + if x[final_price] != 0: + return 100 * ((x[initial_price] - x[final_price]) / x[initial_price]) + else: + return 100 + +def load_classifier_database() -> pd.DataFrame: + """ + Load the classificator code base with their corresponded names, + concatenate them and return as the dataframe + """ + okpd_data = pd.read_excel("codes/okpd.xlsx") + kpgz_data = pd.read_excel("codes/kpgz.xls") + + kpgz_data.rename(columns={ + 'Код КПГЗ': 'Код', + 'Наименование классификации предметов государственного заказа (КПГЗ)': 'Название' + }, inplace=True) + return pd.concat([okpd_data[['Код', 'Название']], kpgz_data[['Код', 'Название']]], axis=0) + +def find_code_name_in_dict(code: str, code_base: pd.DataFrame) -> str: + """ + For each code return its name. + Crop code while the name will be received. + If there is no code in base, return empty string""" + result = "" + while len(code.split(".")) > 1: + try: + # Find name by its code + result = code_base.loc[code_base['Код'] == code]['Название'] + result = str(result.values[0]).strip() # strip text + break + except: + # if current code not in base, crop out the last 2 digits + code = ".".join(code.split(".")[:-1]) # get rid of last sub code + pass + return result + \ No newline at end of file diff --git a/functions/word_preprocessing.py b/functions/word_preprocessing.py new file mode 100755 index 0000000..625d4b9 --- /dev/null +++ b/functions/word_preprocessing.py @@ -0,0 +1,126 @@ +from typing import List +from nltk.corpus import stopwords +from pymorphy2 import MorphAnalyzer +from functions.utils import find_code_name_in_dict + +import nltk +import re +import gensim.downloader as api +import numpy as np +import pandas as pd +import warnings + +warnings.filterwarnings('ignore') +nltk.download('stopwords') + +patterns = "[A-Za-z0-9!#$%&'()*+,./:;<=>?@[\]^_`{|}~—\"\-]+" +stopwords_ru = stopwords.words("russian") +morph = MorphAnalyzer() +vectorizer = api.load("word2vec-ruscorpora-300") + + +def preprocessing(sent: str): + """Return negative and positive words. + If there is the word 'Кроме', then the next words + will be threated as negative + + Args: + sent (str): sentence + + Returns: + str: negative words + str: positive words + """ + minus = "" + sent = sent.lower() + sent = sent.replace(",", "") + sent = sent.replace(";", "") + if "кроме" in sent: + if sent[sent.find("кроме")-1] == "(": + minus = sent[sent.find("кроме")+5:sent.find(")")] + sent = sent[:sent.find("кроме")-1] + sent[sent.find(")")+2:] + else: + minus = sent[sent.find("кроме")+5:] + sent = sent[:sent.find("кроме")-1] + sent = sent.replace("(", "") + sent = sent.replace(")", "") + return minus, sent + +def lemmatize(doc) -> List[str]: + """Remove stop words, transform to normal form + and return the TAG of word + + Args: + doc (str): sentence + + Returns: + set: set of tokens + """ + doc = re.sub(patterns, ' ', doc) + tokens = [morph.normal_forms(token.strip())[0] for token in doc.split() if token not in stopwords_ru] + tokens = [tkn + "_" + morph.parse(tkn)[0].tag.POS if morph.parse(tkn)[0].tag.POS != None else tkn + "_NONE" for tkn in tokens] + return list(set(tokens)) + +def get_vector(pos_tokens, neg_tokens): + """Return the sum for positive vectors, + then subtract negative vector + + Args: + rows (List[str]): Positive tokens + minuses (List[str]): Negative tokens + + Returns: + np.ndarray: vector + """ + pos = np.sum([vectorizer[a] for a in pos_tokens if a in vectorizer.key_to_index], axis=0) + if neg_tokens: + neg = np.sum([vectorizer[a] for a in neg_tokens if a in vectorizer.key_to_index], axis=0) + return (pos - neg).astype(np.float32) + if len(pos.shape) == 0: + return np.array([0] * 300).astype(np.float32) + return pos.astype(np.float32) + +def vectorize(sentence: str) -> np.ndarray: + """ + Take sentence and perform: + 1. Preprocessing + 2. Lemmatize + 3. Get vector from + + Args: + sentence (str): sentence that we need to proceeed + + Returns: + np.ndarray: word vector + """ + negative_string, positive_string = preprocessing(sentence) + positive_tokens, negative_tokens = lemmatize(positive_string), lemmatize(negative_string) + vector = get_vector(positive_tokens, negative_tokens) + # If our vector is the number -> return zeros like vector + if isinstance(vector, np.float32): + return np.zeros(shape=(300,), dtype=np.float32) + return vector + +def code2words(col: pd.Series, code_base: pd.DataFrame): + """ + Transform the "code" column to their names + Function search for the code in classifier base and return its name as string + + args: + col: (pd.Series) column with code classifiers + code_base: (pd.DataFrame) data base, that contains all codes and their corresponding names ('Код', 'Названия') + + return: + pd.Series: code names as string + """ + splitted_codes = col.str.split(";") + splitted_codes = splitted_codes.parallel_apply(lambda row: list(set(row))) # Get rid of dublicates + splitted_codes = splitted_codes.explode() # unzip list of codes + words = splitted_codes.parallel_apply(lambda x: find_code_name_in_dict(x, code_base=code_base)) + words = words.groupby(words.index).apply(lambda x: " ".join(x)) # join all words in one sentence + return words + +def words2vectors(col: pd.Series): + """Transform text columns to vectors and return vectors as the dataframe""" + vectors = col.parallel_apply(vectorize) + return pd.DataFrame(vectors.to_list(), columns=np.arange(0, 300)) diff --git a/models/drawdown/drawdown_dt_features_model.cbm b/models/drawdown/drawdown_dt_features_model.cbm new file mode 100755 index 0000000..8c8bdb4 Binary files /dev/null and b/models/drawdown/drawdown_dt_features_model.cbm differ diff --git a/models/drawdown/drawdown_text_processing_model.cbm b/models/drawdown/drawdown_text_processing_model.cbm new file mode 100755 index 0000000..8a2f220 Binary files /dev/null and b/models/drawdown/drawdown_text_processing_model.cbm differ diff --git a/models/drawdown/drawdown_vector_model.cbm b/models/drawdown/drawdown_vector_model.cbm new file mode 100755 index 0000000..29ec0d9 Binary files /dev/null and b/models/drawdown/drawdown_vector_model.cbm differ diff --git a/models/num_competitors/num_comp_dt_features_model.cbm b/models/num_competitors/num_comp_dt_features_model.cbm new file mode 100755 index 0000000..ef11305 Binary files /dev/null and b/models/num_competitors/num_comp_dt_features_model.cbm differ diff --git a/models/num_competitors/num_comp_text_processing_model.cbm b/models/num_competitors/num_comp_text_processing_model.cbm new file mode 100755 index 0000000..bdc4e29 Binary files /dev/null and b/models/num_competitors/num_comp_text_processing_model.cbm differ diff --git a/models/num_competitors/num_comp_vector_model.cbm b/models/num_competitors/num_comp_vector_model.cbm new file mode 100755 index 0000000..56b0262 Binary files /dev/null and b/models/num_competitors/num_comp_vector_model.cbm differ