Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

MC-CCR implementation #102

Merged
merged 22 commits into from
Jun 21, 2023
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
add radius and translations tests, refactor code
  • Loading branch information
dddddddddtd committed Jun 5, 2023
commit c8a301291984253f8eec892ca5d21dec80644495
6 changes: 3 additions & 3 deletions multi_imbalance/resampling/ccr.py
Original file line number Diff line number Diff line change
Expand Up @@ -88,9 +88,9 @@ def _calculate_radius_and_translations(self, minority_examples, majority_example
current_example = 0
number_of_points_in_radius = 1

while current_example != majority_count and energy > 0:
example_distance_index = sorted_distances_index[current_example]
distance = distances[example_distance_index]
while current_example < majority_count and energy > 0:
majority_distance_index = sorted_distances_index[current_example]
distance = distances[majority_distance_index]
if distance <= r[i]:
number_of_points_in_radius += 1
dr = energy / number_of_points_in_radius
Expand Down
54 changes: 46 additions & 8 deletions tests/resampling/test_ccr.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,14 +33,14 @@
])

multiclass_X = np.vstack(
[
np.random.normal(0, 1, (100, 2)),
np.random.normal(3, 5, (30, 2)),
np.random.normal(-2, 2, (20, 2)),
np.random.normal(-4, 1, (10, 2)),
np.random.normal(10, 1, (5, 2)),
]
)
[
np.random.normal(0, 1, (100, 2)),
np.random.normal(3, 5, (30, 2)),
np.random.normal(-2, 2, (20, 2)),
np.random.normal(-4, 1, (10, 2)),
np.random.normal(10, 1, (5, 2)),
]
)

multiclass_y = np.array([1] * 100 + [2] * 30 + [3] * 20 + [4] * 10 + [5] * 5)

Expand All @@ -51,6 +51,44 @@ def test_compare_cleaning_results_to_original_article_implementation():
assert_array_equal(np.sort(resampled_X[:X.shape[0]], axis=0), np.sort(original_cleaning_results, axis=0))


def test_radius_equal_to_energy_and_translations_equal_zero_when_majority_not_in_range():
clf = CCR(energy=0.5)
minority_examples = np.array([[0, 0]])
majority_examples = np.array([[1, 1], [-1, -1]])
r, t = clf._calculate_radius_and_translations(minority_examples, majority_examples)

assert_array_equal(r, np.array([0.5]))
assert_array_equal(t, np.array([[0, 0], [0, 0]]))

def test_radius_decreases_and_translation_nonequal_zero_when_majority_in_range():
clf = CCR(energy=1)
minority_examples = np.array([[0, 0]])
majority_examples = np.array([[0.5, 0], [1, 0]])
r, t = clf._calculate_radius_and_translations(minority_examples, majority_examples)

assert_array_equal(r, np.array([0.75]))
assert_array_equal(t, np.array([[0.25, 0], [0, 0]]))


def test_energy_cost_should_be_inversely_proportional_to_number_of_examples_in_radius():
clf = CCR(energy=10)
minority_examples = np.array([[0, 0]])
majority_examples = np.array([[0.5, 0], [1, 0], [1.5, 0], [2, 0], [2.5, 0]])
r, t = clf._calculate_radius_and_translations(minority_examples, majority_examples)

print(np.array([[2.5+2.5/6, 0]]) - majority_examples)
assert_array_equal(r, np.array([3]))
assert_array_equal(t, np.array([[2.5, 0], [2, 0], [1.5, 0], [1, 0], [0.5, 0]]))


def test_translations_should_accumulate():
clf = CCR(energy=1)
minority_examples = np.array([[0, 0], [2, 0]])
majority_examples = np.array([[1, 0]])
_, t = clf._calculate_radius_and_translations(minority_examples, majority_examples)
assert_array_equal(t, np.array([[0, 0]]))


def test_multiclass_ccr_call_count():
clf = MultiClassCCR(energy=0.5)

Expand Down
Loading