Skip to content

Commit

Permalink
return dict instead of array for easier maintainance
Browse files Browse the repository at this point in the history
  • Loading branch information
MaximilianFranz committed Jan 7, 2020
1 parent 3a793c3 commit 14353af
Show file tree
Hide file tree
Showing 3 changed files with 30 additions and 47 deletions.
26 changes: 13 additions & 13 deletions notebooks/example_evaluation.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -130,13 +130,13 @@
" train_ite, test_ite = weighted_slearner(train, test)\n",
"\n",
" # Calculate the scores and append them to a dataframe\n",
" test_scores.loc[len(test_scores)] = calc_scores(test[Col.ite],\n",
" test_ite,\n",
" metrics)\n",
" test_scores = test_scores.append(calc_scores(\n",
" test[Col.ite], test_ite, metrics\n",
" ), ignore_index=True)\n",
"\n",
" train_scores.loc[len(train_scores)] = calc_scores(train[Col.ite],\n",
" train_ite,\n",
" metrics)\n",
" train_scores = train_scores.append(calc_scores(\n",
" train[Col.ite], train_ite, metrics\n",
" ), ignore_index=True)\n",
"\n",
"# Summarize the scores and save them in a dataframe\n",
"train_result, test_result = summarize_scores(train_scores), summarize_scores(test_scores)\n",
Expand Down Expand Up @@ -273,13 +273,13 @@
" train_ite, test_ite = method(train, test)\n",
"\n",
" # Calculate the scores and append them to a dataframe\n",
" test_scores.loc[len(test_scores)] = calc_scores(test[Col.ite],\n",
" test_ite,\n",
" metrics)\n",
" test_scores = test_scores.append(calc_scores(\n",
" test[Col.ite], test_ite, metrics\n",
" ), ignore_index=True)\n",
"\n",
" train_scores.loc[len(train_scores)] = calc_scores(train[Col.ite],\n",
" train_ite,\n",
" metrics)\n",
" train_scores = train_scores.append(calc_scores(\n",
" train[Col.ite], train_ite, metrics\n",
" ), ignore_index=True)\n",
"\n",
" # Summarize the scores and save them in a dataframe\n",
" train_result, test_result = summarize_scores(train_scores), summarize_scores(test_scores)\n",
Expand Down Expand Up @@ -784,7 +784,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.5"
"version": "3.7.6"
}
},
"nbformat": 4,
Expand Down
47 changes: 14 additions & 33 deletions src/justcause/evaluation.py
Original file line number Diff line number Diff line change
Expand Up @@ -123,7 +123,7 @@ def _evaluate_single_method(
train_size=0.8,
random_state=None,
):
"""Helper to evaluate method with multiple metrics on the given replications
"""Helper to evaluate method with multiple metrics on the given replications.
This is the standard variant of an evaluation loop, which the user can implement
manually to modify parts of it. Here, only ITE prediction and evaluation is
Expand All @@ -149,12 +149,12 @@ def _evaluate_single_method(
else:
train_ite, test_ite = default_predictions(method, train, test)

test_scores.loc[len(test_scores)] = calc_scores(
test[Col.ite], test_ite, metrics
test_scores = test_scores.append(
calc_scores(test[Col.ite], test_ite, metrics), ignore_index=True
)

train_scores.loc[len(train_scores)] = calc_scores(
train[Col.ite], train_ite, metrics
train_scores = train_scores.append(
calc_scores(train[Col.ite], train_ite, metrics), ignore_index=True
)

train_results = summarize_scores(train_scores, formats)
Expand All @@ -163,25 +163,29 @@ def _evaluate_single_method(
return train_results, test_results


def calc_scores(true: np.array, pred: np.array, metrics):
def calc_scores(
true: np.array, pred: np.array, metrics: Union[List[Metric], Metric]
) -> dict:
"""Compare ground-truth to predictions with given metrics for one replication
Call for train and test separately
TODO: Also replace np.array with dict
Args:
true: True ITE
true: true ITE
pred: predicted ITE
metrics: metrics to evaluate on the ITEs
Returns: a list of scores with length == len(metrics), i.e. the row to be added to
the scores dataframe
Returns:
dict: a dict of (score_name, scores) pairs with len(metrics) entries
"""
# ensure metrics and replications are lists, even if with just one element
if not isinstance(metrics, list):
metrics = [metrics]

return np.array([metric(true, pred) for metric in metrics])
return {metric.__name__: metric(true, pred) for metric in metrics}


def default_predictions(
Expand Down Expand Up @@ -211,29 +215,6 @@ def default_predictions(
return train_ite, test_ite


def get_default_callable(method):
"""Helper to get an evaluation callable for standard methods
Args:
method: Method to use for the standard callable
Returns: Callable for evaluation in custom loop
"""

def default_callable(train, test):
train_X, train_t, train_y = train.np.X, train.np.t, train.np.y
test_X, test_t, test_y = test.np.X, test.np.t, test.np.y

method.fit(train_X, train_t, train_y)

train_ite = method.predict_ite(train_X, train_t, train_y)
test_ite = method.predict_ite(test_X, test_t, test_y)

return train_ite, test_ite

return default_callable


def summarize_scores(
scores_df: pd.DataFrame,
formats: Union[List[Format], Format] = (np.mean, np.median, np.std),
Expand Down
4 changes: 3 additions & 1 deletion tests/test_evaluation.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,9 @@ def test_summary():
def test_calc_scores():
true = np.full(100, 1)
pred = np.full(100, 0)
assert calc_scores(true, pred, pehe_score)[0] == 1
score_dict = calc_scores(true, pred, pehe_score)
assert list(score_dict.values())[0] == 1
assert "pehe_score" in score_dict.keys()


def test_setup_df():
Expand Down

0 comments on commit 14353af

Please sign in to comment.