loose dependencies, update notebooks, update code to use newest featu…

…res, black + flake
damianhorna · mateusz-wozny · Nov 20, 2023 · Nov 20, 2023 · May 17, 2024 · May 17, 2024
commit 9d872b6cef8a95cef1f70d8e57e2c7a3ad5b848f
diff --git a/examples/datasets/analysis.ipynb b/examples/datasets/analysis.ipynb
diff --git a/examples/ensemble/SOUPBagging.ipynb b/examples/ensemble/SOUPBagging.ipynb
@@ -1,179 +1,165 @@
 {
- "cells": [
- {
- "cell_type": "markdown",
- "source": [
- "Unzip datasets and prepare data:"
- ],
- "metadata": {
- "collapsed": false
- }
- },
- {
- "cell_type": "code",
- "execution_count": 4,
- "outputs": [],
- "source": [
- "import os\n",
- "\n",
- "import seaborn as sns\n",
- "from imblearn.metrics import geometric_mean_score\n",
- "from sklearn.model_selection import cross_val_score, ShuffleSplit\n",
- "from sklearn.model_selection import train_test_split\n",
- "from sklearn.neighbors import KNeighborsClassifier\n",
- "from sklearn.pipeline import make_pipeline\n",
- "from sklearn.preprocessing import StandardScaler\n",
- "\n",
- "from multi_imbalance.datasets import load_datasets\n",
- "from multi_imbalance.ensemble.soup_bagging import SOUPBagging\n",
- "from multi_imbalance.utils.data import load_arff_dataset\n",
- "from multi_imbalance.utils.min_int_maj import maj_int_min\n",
- "\n",
- "%matplotlib inline\n",
- "sns.set_style(\"darkgrid\")"
- ],
- "metadata": {
- "collapsed": false,
- "pycharm": {
- "name": "#%%\n"
- }
- }
- },
- {
- "cell_type": "code",
- "execution_count": 5,
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "[[0.49 0.29 0.48 0.5 0.56 0.24 0.35]\n",
- " [0.07 0.4 0.48 0.5 0.54 0.35 0.44]\n",
- " [0.56 0.4 0.48 0.5 0.49 0.37 0.46]\n",
- " [0.59 0.49 0.48 0.5 0.52 0.45 0.36]\n",
- " [0.23 0.32 0.48 0.5 0.55 0.25 0.35]]\n",
- "[0 0 0 0 0]\n"
- ]
- }
- ],
- "source": [
- "dataset = load_datasets()[\"new_ecoli\"]\n",
- "\n",
- "X, y = dataset.data, dataset.target\n",
- "print(X[:5])\n",
- "print(y[:5])"
- ],
- "metadata": {
- "collapsed": false,
- "pycharm": {
- "name": "#%%\n"
- }
- }
- },
- {
- "cell_type": "code",
- "execution_count": 6,
- "outputs": [],
- "source": [
- "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25)"
- ],
- "metadata": {
- "collapsed": false,
- "pycharm": {
- "name": "#%%\n"
- }
- }
- },
- {
- "cell_type": "code",
- "execution_count": 7,
- "outputs": [
- {
- "data": {
- "text/plain": "0.7550748879971014"
- },
- "execution_count": 7,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "clf = KNeighborsClassifier()\n",
- "vote_classifier = SOUPBagging(\n",
- " clf, n_classifiers=50, maj_int_min=maj_int_min[\"new_ecoli\"]\n",
- ")\n",
- "vote_classifier.fit(X_train, y_train)\n",
- "y_pred = vote_classifier.predict(X_test)\n",
- "geometric_mean_score(y_test, y_pred, correction=0.001)"
- ],
- "metadata": {
- "collapsed": false,
- "pycharm": {
- "name": "#%%\n"
- }
- }
- },
- {
- "cell_type": "code",
- "execution_count": 8,
- "outputs": [
- {
- "name": "stderr",
- "output_type": "stream",
- "text": [
- "/home/plutasnyy/anaconda3/envs/multi-imbalance/lib/python3.7/site-packages/sklearn/base.py:197: FutureWarning: From version 0.24, get_params will raise an AttributeError if a parameter cannot be retrieved as an instance attribute. Previously it would return None.\n",
- " FutureWarning)\n",
- "/home/plutasnyy/anaconda3/envs/multi-imbalance/lib/python3.7/site-packages/sklearn/base.py:197: FutureWarning: From version 0.24, get_params will raise an AttributeError if a parameter cannot be retrieved as an instance attribute. Previously it would return None.\n",
- " FutureWarning)\n",
- "/home/plutasnyy/anaconda3/envs/multi-imbalance/lib/python3.7/site-packages/sklearn/base.py:197: FutureWarning: From version 0.24, get_params will raise an AttributeError if a parameter cannot be retrieved as an instance attribute. Previously it would return None.\n",
- " FutureWarning)\n",
- "/home/plutasnyy/anaconda3/envs/multi-imbalance/lib/python3.7/site-packages/sklearn/base.py:197: FutureWarning: From version 0.24, get_params will raise an AttributeError if a parameter cannot be retrieved as an instance attribute. Previously it would return None.\n",
- " FutureWarning)\n",
- "/home/plutasnyy/anaconda3/envs/multi-imbalance/lib/python3.7/site-packages/sklearn/base.py:197: FutureWarning: From version 0.24, get_params will raise an AttributeError if a parameter cannot be retrieved as an instance attribute. Previously it would return None.\n",
- " FutureWarning)\n"
- ]
+ "cells": [
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "collapsed": false
+ },
+ "source": [
+ "Unzip datasets and prepare data:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 1,
+ "metadata": {
+ "collapsed": false,
+ "pycharm": {
+ "name": "#%%\n"
+ }
+ },
+ "outputs": [],
+ "source": [
+ "import os\n",
+ "\n",
+ "import seaborn as sns\n",
+ "from imblearn.metrics import geometric_mean_score\n",
+ "from sklearn.model_selection import cross_val_score, ShuffleSplit\n",
+ "from sklearn.model_selection import train_test_split\n",
+ "from sklearn.neighbors import KNeighborsClassifier\n",
+ "from sklearn.pipeline import make_pipeline\n",
+ "from sklearn.preprocessing import StandardScaler\n",
+ "\n",
+ "from multi_imbalance.datasets import load_datasets\n",
+ "from multi_imbalance.ensemble.soup_bagging import SOUPBagging\n",
+ "from multi_imbalance.utils.data import load_arff_dataset\n",
+ "from multi_imbalance.utils.min_int_maj import maj_int_min\n",
+ "\n",
+ "%matplotlib inline\n",
+ "sns.set_style(\"darkgrid\")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 2,
+ "metadata": {
+ "collapsed": false,
+ "pycharm": {
+ "name": "#%%\n"
+ }
+ },
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "[[0.49 0.29 0.48 0.5 0.56 0.24 0.35]\n",
+ " [0.07 0.4 0.48 0.5 0.54 0.35 0.44]\n",
+ " [0.56 0.4 0.48 0.5 0.49 0.37 0.46]\n",
+ " [0.59 0.49 0.48 0.5 0.52 0.45 0.36]\n",
+ " [0.23 0.32 0.48 0.5 0.55 0.25 0.35]]\n",
+ "[0 0 0 0 0]\n"
+ ]
+ }
+ ],
+ "source": [
+ "dataset = load_datasets()[\"new_ecoli\"]\n",
+ "\n",
+ "X, y = dataset.data, dataset.target\n",
+ "print(X[:5])\n",
+ "print(y[:5])"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 3,
+ "metadata": {
+ "collapsed": false,
+ "pycharm": {
+ "name": "#%%\n"
+ }
+ },
+ "outputs": [],
+ "source": [
+ "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 4,
+ "metadata": {
+ "collapsed": false,
+ "pycharm": {
+ "name": "#%%\n"
+ }
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "0.7784345944274128"
+ ]
+ },
+ "execution_count": 4,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "clf = KNeighborsClassifier()\n",
+ "vote_classifier = SOUPBagging(\n",
+ " clf, n_classifiers=50, maj_int_min=maj_int_min[\"new_ecoli\"]\n",
+ ")\n",
+ "vote_classifier.fit(X_train, y_train)\n",
+ "y_pred = vote_classifier.predict(X_test)\n",
+ "geometric_mean_score(y_test, y_pred, correction=0.001)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 5,
+ "metadata": {
+ "collapsed": false,
+ "pycharm": {
+ "name": "#%%\n"
+ }
+ },
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "[0.85148515 0.8019802 0.89108911 0.84158416 0.86138614]\n"
+ ]
+ }
+ ],
+ "source": [
+ "X, y = load_arff_dataset(f\"{os.getcwd()}/../../data/arff/new_ecoli.arff\")\n",
+ "clf = make_pipeline(StandardScaler(), SOUPBagging())\n",
+ "cv = ShuffleSplit(n_splits=5, test_size=0.3, random_state=0)\n",
+ "print(cross_val_score(clf, X, y, cv=cv))"
+ ]
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "Python 3",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.9.2"
+ }
  },
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "[0.85148515 0.8019802 0.89108911 0.84158416 0.86138614]\n"
- ]
- }
- ],
- "source": [
- "X, y = load_arff_dataset(f\"{os.getcwd()}/../../data/arff/new_ecoli.arff\")\n",
- "clf = make_pipeline(StandardScaler(), SOUPBagging())\n",
- "cv = ShuffleSplit(n_splits=5, test_size=0.3, random_state=0)\n",
- "print(cross_val_score(clf, X, y, cv=cv))"
- ],
- "metadata": {
- "collapsed": false,
- "pycharm": {
- "name": "#%%\n"
- }
- }
- }
- ],
- "metadata": {
- "language_info": {
- "codemirror_mode": {
- "name": "ipython",
- "version": 2
- },
- "file_extension": ".py",
- "mimetype": "text/x-python",
- "name": "python",
- "nbconvert_exporter": "python",
- "pygments_lexer": "ipython2",
- "version": "2.7.6"
- },
- "kernelspec": {
- "name": "python3",
- "language": "python",
- "display_name": "Python 3"
- }
- },
- "nbformat": 4,
- "nbformat_minor": 0
+ "nbformat": 4,
+ "nbformat_minor": 0
 }