Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Refactor evaluate.py. #10

Merged
merged 16 commits into from
Apr 22, 2021
Merged
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
replace macro-f1 & micro-f1 to sklearn's
  • Loading branch information
Eleven1Liu committed Apr 18, 2021
commit 7a60564a903314ab5500ac27b0e6b2e7eddf5ffd
54 changes: 3 additions & 51 deletions evaluate.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

import numpy as np
import pandas as pd
from sklearn.metrics import f1_score, multilabel_confusion_matrix, ndcg_score
from sklearn.metrics import f1_score
from tqdm import tqdm

from utils import log
Expand Down Expand Up @@ -95,11 +95,8 @@ def eval(self, y_true, y_pred, threshold=0.5):
result['# Instance'] = len(target_y_true)

# micro/macro f1 of the target groups
# micro_f1 = f1_score(y_true=target_y_true, y_pred=target_y_pred > threshold, average='micro')
# macro_f1 = f1_score(y_true=target_y_true, y_pred=target_y_pred > threshold, average='macro')

result['Micro-F1'] = micro_f1((target_y_pred > threshold).ravel(), target_y_true.ravel())
result['Macro-F1'] = macro_f1(target_y_pred > threshold, target_y_true)
result['Micro-F1'] = f1_score(y_true=target_y_true, y_pred=target_y_pred > threshold, average='micro')
result['Macro-F1'] = f1_score(y_true=target_y_true, y_pred=target_y_pred > threshold, average='macro')

# find all metric starts with P(Precition) or R(Recall)
pattern = re.compile('(?:P|R)@\d+')
Expand Down Expand Up @@ -127,24 +124,6 @@ def __repr__(self):
return df.to_markdown(index=False)


def macro_precision(yhat, y):
num = intersect_size(yhat, y, 0) / (yhat.sum(axis=0) + 1e-10)
return np.mean(num)

def macro_recall(yhat, y):
num = intersect_size(yhat, y, 0) / (y.sum(axis=0) + 1e-10)
return np.mean(num)

def macro_f1(yhat, y):
prec = macro_precision(yhat, y)
rec = macro_recall(yhat, y)
if prec + rec == 0:
f1 = 0.
else:
f1 = 2*(prec*rec)/(prec+rec)
return f1


def recall_at_k(yhat_raw, y, k):
#num true labels in top k predictions / num true labels
sortd = np.argsort(yhat_raw)[:,::-1]
Expand Down Expand Up @@ -177,30 +156,3 @@ def precision_at_k(yhat_raw, y, k):
vals.append(num_true_in_top_k / float(denom))

return np.mean(vals)


# ##########################################################################
# #MICRO METRICS: treat every prediction as an individual binary prediction
# ##########################################################################

# def micro_accuracy(yhatmic, ymic):
# return intersect_size(yhatmic, ymic, 0) / union_size(yhatmic, ymic, 0)

def micro_precision(yhatmic, ymic):
return intersect_size(yhatmic, ymic, 0) / yhatmic.sum(axis=0)

def micro_recall(yhatmic, ymic):
return intersect_size(yhatmic, ymic, 0) / ymic.sum(axis=0)

def micro_f1(yhatmic, ymic):
prec = micro_precision(yhatmic, ymic)
rec = micro_recall(yhatmic, ymic)
if prec + rec == 0:
f1 = 0.
else:
f1 = 2*(prec*rec)/(prec+rec)
return f1

def intersect_size(yhat, y, axis):
#axis=0 for label-level union (macro). axis=1 for instance-level
return np.logical_and(yhat, y).sum(axis=axis).astype(float)