-
Notifications
You must be signed in to change notification settings - Fork 21
/
utils.py
120 lines (88 loc) · 4.03 KB
/
utils.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np
from sklearn.metrics import confusion_matrix, accuracy_score, precision_score, recall_score, f1_score
def plot_countplot(dados, x, titulo, label_x: str, figsize: tuple = (8, 5), hue=None, small: bool = False):
plt.figure(figsize=figsize)
custom_params = {"axes.spines.right": False, "axes.spines.top": False,
"axes.spines.left": False, "axes.spines.bottom": False}
sns.set_theme(style="ticks", rc=custom_params)
ax = sns.countplot(x=x, hue=hue, data=dados, palette='viridis')
ax.get_yaxis().set_visible(False)
plt.title(titulo, fontsize=22, loc='left')
plt.xlabel(label_x, fontsize=17)
plt.xticks(fontsize=15)
for container in ax.containers:
if small:
ax.bar_label(container, fontsize=15)
else:
ax.bar_label(container, fontsize=15, padding=-17, color='white')
plt.show()
def heatmap_corr(df, figsize: tuple = (8, 6)):
corr = df.corr()
plt.figure(figsize=figsize)
sns.heatmap(corr,
xticklabels=corr.columns.values,
yticklabels=corr.columns.values,
annot=True,
cmap='viridis')
def plot_matriz_confusao(y_true_teste, y_pred_teste, group_names=None,
categories='auto', count=True, cbar=True,
xyticks=True, sum_stats=True, figsize=None,
cmap='viridis', title=None):
cf = confusion_matrix(y_true_teste, y_pred_teste)
blanks = ['' for i in range(cf.size)]
if group_names and len(group_names) == cf.size:
group_labels = ["{}\n".format(value) for value in group_names]
else:
group_labels = blanks
if count:
group_counts = ["{0:0.0f}\n".format(value) for value in cf.flatten()]
else:
group_counts = blanks
box_labels = [f"{v1}{v2}".strip()
for v1, v2 in zip(group_labels, group_counts)]
box_labels = np.asarray(box_labels).reshape(cf.shape[0], cf.shape[1])
if sum_stats:
accuracy = accuracy_score(y_true_teste, y_pred_teste)
precision = precision_score(y_true_teste, y_pred_teste)
recall = recall_score(y_true_teste, y_pred_teste)
f1_score_metric = f1_score(y_true_teste, y_pred_teste)
stats_text = "\n\nAcurácia={:0.3f}\nPrecisão={:0.3f}\nRecall={:0.3f}\nF1 Score={:0.3f}".format(
accuracy, precision, recall, f1_score_metric)
else:
stats_text = ""
if figsize == None:
figsize = plt.rcParams.get('figure.figsize')
if xyticks == False:
categories = False
plt.figure(figsize=figsize)
sns.set(font_scale=1.4) # for label size
sns.heatmap(cf, annot=box_labels, fmt="", cmap=cmap, cbar=cbar,
xticklabels=categories, yticklabels=categories)
plt.ylabel('Valores verdadeiros', fontsize=16)
plt.xlabel('Valores preditos' + stats_text, fontsize=16)
if title:
plt.title(title, fontsize=20)
def compara_modelos_metricas(metrica, nomes_modelos, y_true_treino, y_pred_treinos, y_true_teste, y_pred_testes):
"""
metrica: {'Acurácia Treino', 'Acurácia Teste', 'Precisão', 'Recall', 'F1-Score'}
Returns:
DataFrame ordenado de acordo com a métrica passada.
"""
acc = []
precision = []
recall = []
f1 = []
for y_pred_teste in y_pred_testes:
acc.append(accuracy_score(y_true_teste, y_pred_teste))
precision.append(precision_score(y_true_teste, y_pred_teste))
recall.append(recall_score(y_true_teste, y_pred_teste))
f1.append(f1_score(y_true_teste, y_pred_teste))
acc_treino = []
for y_pred_treino in y_pred_treinos:
acc_treino.append(accuracy_score(y_true_treino, y_pred_treino))
tabela = pd.DataFrame({'Modelo': nomes_modelos, 'Acurácia Treino': acc_treino,
'Acurácia Teste': acc, 'Precisão': precision, 'Recall': recall, 'F1-Score': f1})
return tabela.sort_values(by=metrica, ascending=False).reset_index(drop=True)