Skip to content

Commit

Permalink
fins src files
Browse files Browse the repository at this point in the history
adding files
  • Loading branch information
KCachel committed May 9, 2022
1 parent ab8f5ac commit 2a44cf8
Show file tree
Hide file tree
Showing 12 changed files with 78,973 additions and 0 deletions.
34,043 changes: 34,043 additions & 0 deletions NewZealand.csv

Large diffs are not rendered by default.

1,023 changes: 1,023 additions & 0 deletions auditing_sample.ipynb

Large diffs are not rendered by default.

42,933 changes: 42,933 additions & 0 deletions geocoded_burke_county.csv

Large diffs are not rendered by default.

8 changes: 8 additions & 0 deletions src/fins/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
from fins.utils_error_handling import *
from fins.balance_metric import *
from fins.conditioned_metrics import *
from fins.calibrated_metrics import *
from fins.qualified_metrics import *
from fins.relevance_parity import *
from fins.score_metrics import *
from fins.statistical_parity_metric import *
63 changes: 63 additions & 0 deletions src/fins/balance_metric.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
"""Metrics to assess the balance fairness of a subset selection
References
----------
Kathleen Cachel and Elke Rundensteiner.
"FINS Auditing Framework: Group Fairness for Subset Selections"
in the proceedings of the AAAI/ACM conference on Artificial Intelligence,
Ethics, and Society (AIES 2022)
"""


# Authors: Kathleen Cachel <[email protected]>
# License:


import numpy as np
import fins as fins

def balance(pool_groups, subset_items, subset_groups):
"""Compute the balance fairness metric.
Parameters
----------
pool_groups: numpy array of shape = (n_items)
The group identity of the items in the pool (corresponding to order of items in pool_items).
subset_items : numpy array of shape = (n_items)
The items in the subset(sorted by relevance score).
subset_groups: numpy array of shape = (n_items)
The group identity of the items in the subset (corresponding to order of items in subset_items).
Returns
----------
propOfS: numpy array of shape = (n_groups)
Each group's proportion of the subset.
bal_val: float
The balance emetric value.
Examples
--------
--------
>>> pool_items = np.asarray([1,2,3,4])
>>> pool_scores = np.asarray([100, 85, 54, 12])
>>> pool_groups = np.asarray([0, 0, 1, 1])
>>> subset_items = np.asarray([1,4])
>>> subset_scores = np.asarray([100,12])
>>> subset_groups = np.asarray([0, 1])
>>> balance(pool_groups, subset_items, subset_groups)
[0.5 0.5] 0.0
"""

fins.check_subset_items_groups(pool_groups, subset_items, subset_groups) # error handling
unique_grps = np.unique(pool_groups)
num_unique_grps = unique_grps.shape[0]
propOfS = np.full((num_unique_grps,), -np.Inf)
total_items_subset = subset_items.shape[0]
for grp in unique_grps:
subset_mask = subset_groups == grp
num_grp_items_in_subset = np.count_nonzero(subset_mask)
propOfS[grp] = num_grp_items_in_subset /total_items_subset

min_group_proportion_subset = np.min(propOfS)
max_group_proportion_subset = np.max(propOfS)
bal_val = min_group_proportion_subset / max_group_proportion_subset



return propOfS, bal_val
194 changes: 194 additions & 0 deletions src/fins/calibrated_metrics.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,194 @@
"""Metrics to assess the calibrated balance fairness and the calibrated parity
fairness of a subset selection
References
----------
Kathleen Cachel and Elke Rundensteiner.
"FINS Auditing Framework: Group Fairness for Subset Selections"
in the proceedings of the AAAI/ACM conference on Artificial Intelligence,
Ethics, and Society (AIES 2022)
"""


# Authors: Kathleen Cachel <[email protected]>
# License:


import numpy as np
import fins as fins



def calibrated_parity(pool_items, pool_scores, pool_groups, subset_items, subset_scores, subset_groups, lb_bin, ub_bin):
"""Compute the calibrated parity.
Parameters
----------
pool_items : numpy array of shape = (n_items)
The items in the pool (sorted by relevance score).
pool_scores: numpy array of shape = (n_items)
The scores of the items in the pool (sorted by relevance score).
pool_groups: numpy array of shape = (n_items)
The group identity of the items in the pool (corresponding to order of items in pool_items).
subset_items : numpy array of shape = (n_items)
The items in the subset(sorted by relevance score).
subset_scores : numpy array of shape = (n_items)
The scores of the items in the subset(sorted by relevance score).
subset_groups: numpy array of shape = (n_items)
The group identity of the items in the subset (corresponding to order of items in subset_items).
lb_bin: numpy array of shape = (n_bins)
The lower bound scores for each bin (bin is greater than or equal to lower bound).
ub_bin: numpy array of shape = (n_bins)
The upper bound scores for each bin (bin is less than upper bound).
Returns
----------
bin_group_selection_proportions: numpy array of shape = (n_bins,n_groups)
The proportion of each group selected into the subset from the bin
dp_val: float
Calibrated parity value.
Examples
--------
--------
>>> pool_items = np.asarray([1,2,3,4])
>>> pool_scores = np.asarray([100, 85, 54, 12])
>>> pool_groups = np.asarray([0, 0, 1, 1])
>>> subset_items = np.asarray([1,4])
>>> subset_scores = np.asarray([100,12])
>>> subset_groups = np.asarray([0, 1])
>>> lb_bin = np.asarray([0, 50])
>>> ub_bin = np.asarray([49, 100])
>>> calibrated_parity(pool_items, pool_scores, pool_groups, subset_items, subset_scores, subset_groups, lb_bin, ub_bin)
(array([[0. , 1. ],
[0.5, 0. ]]), array([0., 0.]), 0.0)
"""

fins.check_pool_subset_groups(pool_items, pool_scores, pool_groups, subset_items, subset_scores, subset_groups) #error handling
n_bins = lb_bin.shape[0]
unique_grps = np.unique(pool_groups)
num_unique_grps = unique_grps.shape[0]
bin_group_selectr = np.full((n_bins, num_unique_grps), -np.Inf)

for bin_i in range(0,n_bins):
lb = lb_bin[bin_i]
ub = ub_bin[bin_i]
greaterthanequal_lb_pool = pool_scores > lb
lessthan_ub_pool = pool_scores <= ub
bin_mask_pool = np.bitwise_and(greaterthanequal_lb_pool,lessthan_ub_pool)
greaterthanequal_lb_subset = subset_scores > lb
lessthan_ub_subset = subset_scores <= ub
bin_mask_subset = np.bitwise_and(greaterthanequal_lb_subset, lessthan_ub_subset)
bin_pool_items = pool_items[bin_mask_pool]
bin_pool_groups = pool_groups[bin_mask_pool]
bin_subset_items = subset_items[bin_mask_subset]
bin_subset_groups = subset_groups[bin_mask_subset]
for grp in unique_grps:
grp_bin_pool_mask = bin_pool_groups == grp
grp_bin_pool_items = bin_pool_items[grp_bin_pool_mask]
num_grp_bin_pool_items = np.count_nonzero(grp_bin_pool_items)
grp_bin_subset_mask = bin_subset_groups == grp
grp_bin_subset_items = bin_subset_items[grp_bin_subset_mask]
num_grp_bin_subset_items = np.count_nonzero(grp_bin_subset_items)
if num_grp_bin_pool_items == 0:
bin_group_selectr[bin_i, grp] = 0.0
else:
bin_group_selectr[bin_i, grp] = num_grp_bin_subset_items / num_grp_bin_pool_items

max_prop_each_bin = np.max(bin_group_selectr, axis = 1)
min_prop_each_bin = np.min(bin_group_selectr, axis = 1)


if np.all(min_prop_each_bin == max_prop_each_bin):
cp_val = 1 #totally fair since max = min in all bins
else:
different_bin_selection_rates_mask = min_prop_each_bin != max_prop_each_bin
cp_val = np.min(min_prop_each_bin[different_bin_selection_rates_mask] / max_prop_each_bin[different_bin_selection_rates_mask])
return bin_group_selectr, cp_val


def calibrated_balance(pool_items, pool_scores, pool_groups, subset_items, subset_scores, subset_groups, lb_bin, ub_bin):
"""Compute the calibrated balance.
Parameters
----------
pool_items : numpy array of shape = (n_items)
The items in the pool (sorted by relevance score).
pool_scores: numpy array of shape = (n_items)
The scores of the items in the pool (sorted by relevance score).
pool_groups: numpy array of shape = (n_items)
The group identity of the items in the pool (corresponding to order of items in pool_items).
subset_items : numpy array of shape = (n_items)
The items in the subset(sorted by relevance score).
subset_scores : numpy array of shape = (n_items)
The scores of the items in the subset(sorted by relevance score).
subset_groups: numpy array of shape = (n_items)
The group identity of the items in the subset (corresponding to order of items in subset_items).
lb_bin: numpy array of shape = (n_bins)
The lower bound scores for each bin (bin is greater than or equal to lower bound).
ub_bin: numpy array of shape = (n_bins)
The upper bound scores for each bin (bin is less than upper bound).
Returns
----------
bin_group_proportions: numpy array of shape = (n_bins,n_groups)
The proportion of each group selected into the subset from the bin
db_val: float
Distributed parity value.
Examples
--------
--------
>>> pool_items = np.asarray([1,2,3,4])
>>> pool_scores = np.asarray([100, 85, 54, 12])
>>> pool_groups = np.asarray([0, 0, 1, 1])
>>> subset_items = np.asarray([2,4])
>>> subset_scores = np.asarray([85,12])
>>> subset_groups = np.asarray([0, 1])
>>> lb_bin = np.asarray([0, 87])
>>> ub_bin = np.asarray([86, 100])
>>> calibrated_balance(pool_items, pool_scores, pool_groups, subset_items, subset_scores, subset_groups, lb_bin, ub_bin)
(array([[0.5, 0.5],
[0. , 0. ]]), 1)
"""
fins.check_pool_subset_groups(pool_items, pool_scores, pool_groups, subset_items, subset_scores,
subset_groups) # error handling
n_bins = lb_bin.shape[0]
unique_grps = np.unique(pool_groups)
num_unique_grps = unique_grps.shape[0]
bin_group_proportions = np.full((n_bins, num_unique_grps), -np.Inf)

for bin_i in range(0,n_bins):
lb = lb_bin[bin_i]
ub = ub_bin[bin_i]
greaterthanequal_lb_pool = pool_scores > lb
lessthan_ub_pool = pool_scores <= ub
bin_mask_pool = np.bitwise_and(greaterthanequal_lb_pool,lessthan_ub_pool)
greaterthanequal_lb_subset = subset_scores > lb
lessthan_ub_subset = subset_scores <= ub
bin_mask_subset = np.bitwise_and(greaterthanequal_lb_subset, lessthan_ub_subset)
bin_pool_items = pool_items[bin_mask_pool]
bin_pool_groups = pool_groups[bin_mask_pool]
bin_subset_items = subset_items[bin_mask_subset]
bin_subset_groups = subset_groups[bin_mask_subset]
for grp in unique_grps:
grp_bin_pool_mask = bin_pool_groups == grp
grp_bin_pool_items = bin_pool_items[grp_bin_pool_mask]
num_grp_bin_pool_items = np.count_nonzero(grp_bin_pool_items)
grp_bin_subset_mask = bin_subset_groups == grp
grp_bin_subset_items = bin_subset_items[grp_bin_subset_mask]
num_grp_bin_subset_items = np.count_nonzero(grp_bin_subset_items)
num_subset_items = np.count_nonzero(subset_items)
if num_grp_bin_pool_items == 0:
bin_group_proportions[bin_i, grp] = 0.0
else:
bin_group_proportions[bin_i, grp] = num_grp_bin_subset_items / num_subset_items

max_props_each_bin = np.max(bin_group_proportions, axis = 1)
min_props_each_bin = np.min(bin_group_proportions, axis = 1)




if np.all(min_props_each_bin == max_props_each_bin):
cb_val = 1 #totally fair since max = min in all bins
else:
different_bin_selection_rates_mask = min_props_each_bin != max_props_each_bin
cb_val = np.min(min_props_each_bin[different_bin_selection_rates_mask] / max_props_each_bin[different_bin_selection_rates_mask])
return bin_group_proportions, cb_val

Loading

0 comments on commit 2a44cf8

Please sign in to comment.