From 1e71c48c3c60c192e7ae848f6df4bca0b985ec56 Mon Sep 17 00:00:00 2001 From: damian-horna Date: Thu, 29 Apr 2021 15:06:22 +0200 Subject: [PATCH 1/5] Fix StaticSMOTE pipeline API --- multi_imbalance/resampling/static_smote.py | 19 ++++++++++++++++--- .../resampling/tests/test_static_smote.py | 2 +- 2 files changed, 17 insertions(+), 4 deletions(-) diff --git a/multi_imbalance/resampling/static_smote.py b/multi_imbalance/resampling/static_smote.py index 0983ade..a4b209f 100644 --- a/multi_imbalance/resampling/static_smote.py +++ b/multi_imbalance/resampling/static_smote.py @@ -2,9 +2,10 @@ import numpy as np from imblearn.over_sampling import SMOTE +from imblearn.base import BaseSampler -class StaticSMOTE: +class StaticSMOTE(BaseSampler): """ Static SMOTE implementation: @@ -13,9 +14,21 @@ class StaticSMOTE: procedure based on sensitivity for multi-class problems. Pattern Recognit. 44, 1821–1833 (2011) """ + def __init__(self): + super().__init__() + self._sampling_type = 'over-sampling' - # TODO add docstring - def fit_transform(self, X, y): + def _fit_resample(self, X, y): + """ + Performs resampling + + :param X: + two dimensional numpy array (number of samples x number of features) with float numbers + :param y: + one dimensional numpy array with labels for rows in X + :return: + Resampled X and y as numpy arrays + """ cnt = Counter(y) min_class = min(cnt, key=cnt.get) X_original, y_original = X.copy(), y.copy() diff --git a/multi_imbalance/resampling/tests/test_static_smote.py b/multi_imbalance/resampling/tests/test_static_smote.py index 4efba2c..687673e 100644 --- a/multi_imbalance/resampling/tests/test_static_smote.py +++ b/multi_imbalance/resampling/tests/test_static_smote.py @@ -12,7 +12,7 @@ def test_static_smote(): y = np.array([1] * 100 + [2] * 30 + [3] * 20) ssm = StaticSMOTE() - X_resampled, y_resampled = ssm.fit_transform(X, y) + X_resampled, y_resampled = ssm.fit_resample(X, y) cnt = Counter(y_resampled) assert cnt[1] == 100 assert cnt[2] == 60 From 493c4b533ff59f3853ba3fdadab9b0d6eacc1052 Mon Sep 17 00:00:00 2001 From: damian-horna Date: Thu, 29 Apr 2021 15:59:50 +0200 Subject: [PATCH 2/5] Update setup --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 2db29a9..6d42b98 100644 --- a/setup.py +++ b/setup.py @@ -26,7 +26,7 @@ ], install_requires=[ "numpy>=1.17.0", - "scikit-learn==0.21.3", + "scikit-learn>=0.21.3", "pandas>=0.25.1", "pytest>=5.1.2", "imbalanced-learn>=0.6.1", From 091fa2c0b30c9d96c13dd57119bdd2847df8107a Mon Sep 17 00:00:00 2001 From: damian-horna Date: Thu, 29 Apr 2021 16:03:21 +0200 Subject: [PATCH 3/5] Update setup --- setup.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/setup.py b/setup.py index 6d42b98..2c33553 100644 --- a/setup.py +++ b/setup.py @@ -26,10 +26,10 @@ ], install_requires=[ "numpy>=1.17.0", - "scikit-learn>=0.21.3", + "scikit-learn==0.21.3", "pandas>=0.25.1", "pytest>=5.1.2", - "imbalanced-learn>=0.6.1", + "imbalanced-learn==0.6.1", "coverage>=5.1", "pytest-cov>=2.8.1", "IPython>=7.13.0", From 3ca0ac1f15db552a3475ca2bf03247683d024963 Mon Sep 17 00:00:00 2001 From: damian-horna Date: Thu, 29 Apr 2021 16:09:22 +0200 Subject: [PATCH 4/5] Update setup --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 2c33553..1817b34 100644 --- a/setup.py +++ b/setup.py @@ -26,7 +26,7 @@ ], install_requires=[ "numpy>=1.17.0", - "scikit-learn==0.21.3", + "scikit-learn>=0.22.1", "pandas>=0.25.1", "pytest>=5.1.2", "imbalanced-learn==0.6.1", From b1cb3a14b7445f761e11b4e1ef771752514784b7 Mon Sep 17 00:00:00 2001 From: damian-horna Date: Thu, 29 Apr 2021 16:15:56 +0200 Subject: [PATCH 5/5] Fixes regarding sklearn upgrade --- multi_imbalance/datasets/_data_loader.py | 2 +- .../ensemble/tests/test_mrbbagging.py | 18 +++++++++--------- setup.py | 2 +- 3 files changed, 11 insertions(+), 11 deletions(-) diff --git a/multi_imbalance/datasets/_data_loader.py b/multi_imbalance/datasets/_data_loader.py index 03d6ebb..9a63c7d 100644 --- a/multi_imbalance/datasets/_data_loader.py +++ b/multi_imbalance/datasets/_data_loader.py @@ -24,7 +24,7 @@ import numpy as np -from sklearn.datasets.base import Bunch +from sklearn.datasets._base import Bunch PRE_FILENAME = 'x' POST_FILENAME = 'data.npz' diff --git a/multi_imbalance/ensemble/tests/test_mrbbagging.py b/multi_imbalance/ensemble/tests/test_mrbbagging.py index 26b54dd..af15adf 100644 --- a/multi_imbalance/ensemble/tests/test_mrbbagging.py +++ b/multi_imbalance/ensemble/tests/test_mrbbagging.py @@ -1,7 +1,7 @@ import unittest from unittest.mock import MagicMock -from sklearn.tree import tree +from sklearn.tree import DecisionTreeClassifier from multi_imbalance.ensemble.mrbbagging import MRBBagging import numpy as np @@ -36,39 +36,39 @@ class TestMRBBagging(unittest.TestCase): def test_api(self): - mrbbagging = MRBBagging(1, tree.DecisionTreeClassifier(random_state=0), random_state=0) + mrbbagging = MRBBagging(1, DecisionTreeClassifier(random_state=0), random_state=0) mrbbagging.fit(X_train, y_train) y_pred = mrbbagging.predict(X_test) assert all(y_pred == y_test) def test_api_multiple_trees(self): - mrbbagging = MRBBagging(5, tree.DecisionTreeClassifier(random_state=0), random_state=0) + mrbbagging = MRBBagging(5, DecisionTreeClassifier(random_state=0), random_state=0) mrbbagging.fit(X_train, y_train) y_pred = mrbbagging.predict(X_test) assert all(y_pred == y_test) def test_api_with_feature_selection(self): - mrbbagging = MRBBagging(1, tree.DecisionTreeClassifier(random_state=0), feature_selection=True, random_state=0) + mrbbagging = MRBBagging(1, DecisionTreeClassifier(random_state=0), feature_selection=True, random_state=0) mrbbagging.fit(X_train, y_train) y_pred = mrbbagging.predict(X_test) assert all(y_pred == y_test) def test_api_with_random_feature_selection(self): - mrbbagging = MRBBagging(1, tree.DecisionTreeClassifier(random_state=0), feature_selection=True, random_fs=True, + mrbbagging = MRBBagging(1, DecisionTreeClassifier(random_state=0), feature_selection=True, random_fs=True, random_state=0) mrbbagging.fit(X_train, y_train) y_pred = mrbbagging.predict(X_test) assert all(y_pred == y_test) def test_api_with_feature_selection_sqrt_features(self): - mrbbagging = MRBBagging(1, tree.DecisionTreeClassifier(random_state=0), feature_selection=True, + mrbbagging = MRBBagging(1, DecisionTreeClassifier(random_state=0), feature_selection=True, half_features=False, random_state=0) mrbbagging.fit(X_train, y_train) y_pred = mrbbagging.predict(X_test) assert all(y_pred == y_test) def test__group_data(self): - mrbbagging = MRBBagging(1, tree.DecisionTreeClassifier()) + mrbbagging = MRBBagging(1, DecisionTreeClassifier()) x = [[1, 1, 1], [2, 2, 2], [3, 3, 3]] y = ["A", "B", "C"] classes, grouped_data = mrbbagging._group_data(x, y) @@ -76,7 +76,7 @@ def test__group_data(self): self.assertEqual(grouped_data, {'C': [[[3, 3, 3], 'C']], 'A': [[[1, 1, 1], 'A']], 'B': [[[2, 2, 2], 'B']]}) def test__group_data_with_none(self): - mrbbagging = MRBBagging(1, tree.DecisionTreeClassifier()) + mrbbagging = MRBBagging(1, DecisionTreeClassifier()) x = [[1, 1, 1], [2, 2, 2], [3, 3, 3]] y = ["A", None, "C"] with self.assertRaises(AssertionError): @@ -95,7 +95,7 @@ def test_fit_with_invalid_classifier(self): def test_with_invalid_k(self): with self.assertRaises(AssertionError): - MRBBagging(0, tree.DecisionTreeClassifier()) + MRBBagging(0, DecisionTreeClassifier()) if __name__ == '__main__': diff --git a/setup.py b/setup.py index 1817b34..e84f8cb 100644 --- a/setup.py +++ b/setup.py @@ -29,7 +29,7 @@ "scikit-learn>=0.22.1", "pandas>=0.25.1", "pytest>=5.1.2", - "imbalanced-learn==0.6.1", + "imbalanced-learn>=0.6.1", "coverage>=5.1", "pytest-cov>=2.8.1", "IPython>=7.13.0",