Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Develop #73

Merged
merged 7 commits into from
Apr 29, 2021
Merged
Show file tree
Hide file tree
Changes from 6 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion multi_imbalance/datasets/_data_loader.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@

import numpy as np

from sklearn.datasets.base import Bunch
from sklearn.datasets._base import Bunch

PRE_FILENAME = 'x'
POST_FILENAME = 'data.npz'
Expand Down
18 changes: 9 additions & 9 deletions multi_imbalance/ensemble/tests/test_mrbbagging.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import unittest
from unittest.mock import MagicMock

from sklearn.tree import tree
from sklearn.tree import DecisionTreeClassifier

from multi_imbalance.ensemble.mrbbagging import MRBBagging
import numpy as np
Expand Down Expand Up @@ -36,47 +36,47 @@

class TestMRBBagging(unittest.TestCase):
def test_api(self):
mrbbagging = MRBBagging(1, tree.DecisionTreeClassifier(random_state=0), random_state=0)
mrbbagging = MRBBagging(1, DecisionTreeClassifier(random_state=0), random_state=0)
mrbbagging.fit(X_train, y_train)
y_pred = mrbbagging.predict(X_test)
assert all(y_pred == y_test)

def test_api_multiple_trees(self):
mrbbagging = MRBBagging(5, tree.DecisionTreeClassifier(random_state=0), random_state=0)
mrbbagging = MRBBagging(5, DecisionTreeClassifier(random_state=0), random_state=0)
mrbbagging.fit(X_train, y_train)
y_pred = mrbbagging.predict(X_test)
assert all(y_pred == y_test)

def test_api_with_feature_selection(self):
mrbbagging = MRBBagging(1, tree.DecisionTreeClassifier(random_state=0), feature_selection=True, random_state=0)
mrbbagging = MRBBagging(1, DecisionTreeClassifier(random_state=0), feature_selection=True, random_state=0)
mrbbagging.fit(X_train, y_train)
y_pred = mrbbagging.predict(X_test)
assert all(y_pred == y_test)

def test_api_with_random_feature_selection(self):
mrbbagging = MRBBagging(1, tree.DecisionTreeClassifier(random_state=0), feature_selection=True, random_fs=True,
mrbbagging = MRBBagging(1, DecisionTreeClassifier(random_state=0), feature_selection=True, random_fs=True,
random_state=0)
mrbbagging.fit(X_train, y_train)
y_pred = mrbbagging.predict(X_test)
assert all(y_pred == y_test)

def test_api_with_feature_selection_sqrt_features(self):
mrbbagging = MRBBagging(1, tree.DecisionTreeClassifier(random_state=0), feature_selection=True,
mrbbagging = MRBBagging(1, DecisionTreeClassifier(random_state=0), feature_selection=True,
half_features=False, random_state=0)
mrbbagging.fit(X_train, y_train)
y_pred = mrbbagging.predict(X_test)
assert all(y_pred == y_test)

def test__group_data(self):
mrbbagging = MRBBagging(1, tree.DecisionTreeClassifier())
mrbbagging = MRBBagging(1, DecisionTreeClassifier())
x = [[1, 1, 1], [2, 2, 2], [3, 3, 3]]
y = ["A", "B", "C"]
classes, grouped_data = mrbbagging._group_data(x, y)
self.assertEqual(classes, {'A', 'B', 'C'})
self.assertEqual(grouped_data, {'C': [[[3, 3, 3], 'C']], 'A': [[[1, 1, 1], 'A']], 'B': [[[2, 2, 2], 'B']]})

def test__group_data_with_none(self):
mrbbagging = MRBBagging(1, tree.DecisionTreeClassifier())
mrbbagging = MRBBagging(1, DecisionTreeClassifier())
x = [[1, 1, 1], [2, 2, 2], [3, 3, 3]]
y = ["A", None, "C"]
with self.assertRaises(AssertionError):
Expand All @@ -95,7 +95,7 @@ def test_fit_with_invalid_classifier(self):

def test_with_invalid_k(self):
with self.assertRaises(AssertionError):
MRBBagging(0, tree.DecisionTreeClassifier())
MRBBagging(0, DecisionTreeClassifier())


if __name__ == '__main__':
Expand Down
19 changes: 16 additions & 3 deletions multi_imbalance/resampling/static_smote.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,10 @@

import numpy as np
from imblearn.over_sampling import SMOTE
from imblearn.base import BaseSampler


class StaticSMOTE:
class StaticSMOTE(BaseSampler):
"""
Static SMOTE implementation:

Expand All @@ -13,9 +14,21 @@ class StaticSMOTE:
procedure based on sensitivity for multi-class problems. Pattern Recognit. 44, 1821–1833
(2011)
"""
def __init__(self):
super().__init__()
self._sampling_type = 'over-sampling'

# TODO add docstring
def fit_transform(self, X, y):
def _fit_resample(self, X, y):
"""
Performs resampling

:param X:
two dimensional numpy array (number of samples x number of features) with float numbers
:param y:
one dimensional numpy array with labels for rows in X
:return:
Resampled X and y as numpy arrays
"""
cnt = Counter(y)
min_class = min(cnt, key=cnt.get)
X_original, y_original = X.copy(), y.copy()
Expand Down
2 changes: 1 addition & 1 deletion multi_imbalance/resampling/tests/test_static_smote.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ def test_static_smote():

y = np.array([1] * 100 + [2] * 30 + [3] * 20)
ssm = StaticSMOTE()
X_resampled, y_resampled = ssm.fit_transform(X, y)
X_resampled, y_resampled = ssm.fit_resample(X, y)
cnt = Counter(y_resampled)
assert cnt[1] == 100
assert cnt[2] == 60
Expand Down
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@
],
install_requires=[
"numpy>=1.17.0",
"scikit-learn==0.21.3",
"scikit-learn>=0.22.1",
"pandas>=0.25.1",
"pytest>=5.1.2",
"imbalanced-learn>=0.6.1",
Expand Down