modify CCR to use it in MC-CCR

damianhorna · dddddddddtd · Jun 21, 2023 · Jan 6, 2023 · Jan 6, 2023 · Jan 6, 2023
commit a2d9e0fda9664e779a96c6ca81942c7309a131e1
diff --git a/multi_imbalance/resampling/ccr.py b/multi_imbalance/resampling/ccr.py
@@ -33,14 +33,33 @@ def _fit_resample(self, X: np.ndarray, y: np.ndarray) -> Tuple[np.ndarray, np.nd
  :param X:
  two-dimensional numpy array (number of samples x number of features) with float numbers
  :param y:
- one-dimensional numpy array with labels for rows in X, assumes minority class = 1 and majority class = 0
+ one-dimensional numpy array with labels for rows in X
  :return:
  resampled X, resampled y
  """
- oversampled_X, oversampled_y = np.copy(X), np.copy(y)
+ minority_class = min(list(Counter(y).items()), key=lambda x: x[1])[0]
 
- majority_examples = X[y == 0]
- minority_examples = X[y == 1]
+ minority_examples = X[y == minority_class]
+ majority_examples = X[y != minority_class]
+
+ clean_majority, synthetic_minority = self.clean_and_generate(minority_examples, majority_examples)
+
+ return np.vstack([minority_examples, clean_majority, synthetic_minority]), np.hstack([
+ np.full((minority_examples.shape[0],), minority_class),
+ y[y != minority_class],
+ np.full((synthetic_minority.shape[0],), minority_class)
+ ])
+
+ def clean_and_generate(self, minority_examples: np.ndarray, majority_examples: np.ndarray) -> Tuple[np.ndarray, np.ndarray]:
+ """
+ :param minority_examples:
+ two-dimensional numpy array (number of samples x number of features) with float numbers of minority class
+ :param majority_examples:
+ two-dimensional numpy array (number of samples x number of features) with float numbers of majority class
+ :return:
+ clean majority X, synthetic minority X
+ """
+ clean_majority_examples = np.copy(majority_examples)
 
  majority_count = len(majority_examples)
 
@@ -81,7 +100,7 @@ def _fit_resample(self, X: np.ndarray, y: np.ndarray) -> Tuple[np.ndarray, np.nd
  translation = majority_examples[j] - x
  t[j] += (r[i] - d) / d * translation
 
- oversampled_X[y == 0] += t
+ clean_majority_examples += t
 
  number_of_synthetic_examples = majority_examples.shape[0] - minority_examples.shape[0]
  inverse_radius_sum = (r ** -1).sum()
@@ -94,8 +113,8 @@ def _fit_resample(self, X: np.ndarray, y: np.ndarray) -> Tuple[np.ndarray, np.nd
  multiplier = random_translation / abs(random_translation).sum()
  new_point = x + multiplier * r[i] * np.random.rand(1)
  generated.append(new_point)
-
- return np.concatenate([oversampled_X, generated]), np.concatenate([oversampled_y, [1 for x in generated]])
+ generated = np.vstack(generated)
+ return clean_majority_examples, generated
 
  def distances(self, minority_example, majority_examples):
  return (abs(minority_example - majority_examples)).sum(1)
diff --git a/tests/resampling/test_ccr.py b/tests/resampling/test_ccr.py
@@ -35,7 +35,7 @@
 
 @pytest.fixture()
 def ccr_mock():
- def _get_parametrized_ccr(X, y):
+ def _get_parametrized_ccr():
  clf = CCR(energy=0.5)
  return clf