-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmetric.py
107 lines (73 loc) · 5.24 KB
/
metric.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
##Local Metrics implementation .
##https://www.kaggle.com/corochann/bengali-seresnext-training-with-pytorch
import numpy as np
import sklearn.metrics
import torch
def macro_recall(pred_y, y, n_grapheme=168, n_vowel=11, n_consonant=7):
pred_y = torch.split(pred_y, [n_grapheme, n_vowel, n_consonant], dim=1)
pred_labels = [torch.argmax(py, dim=1).cpu().numpy() for py in pred_y]
y = y.cpu().numpy()
# pred_y = [p.cpu().numpy() for p in pred_y]
recall_grapheme = sklearn.metrics.recall_score(pred_labels[0], y[:, 0], average='macro')
recall_vowel = sklearn.metrics.recall_score(pred_labels[1], y[:, 1], average='macro')
recall_consonant = sklearn.metrics.recall_score(pred_labels[2], y[:, 2], average='macro')
scores = [recall_grapheme, recall_vowel, recall_consonant]
final_score = np.average(scores, weights=[2, 1, 1])
# print(f'recall: grapheme {recall_grapheme}, vowel {recall_vowel}, consonant {recall_consonant}, '
# f'total {final_score}, y {y.shape}')
return final_score
def macro_recall_multi(pred_graphemes, true_graphemes, pred_vowels, true_vowels, pred_consonants, true_consonants,
n_grapheme=168, n_vowel=11, n_consonant=7):
# pred_y = torch.split(pred_y, [n_grapheme], dim=1)
pred_label_graphemes = torch.argmax(pred_graphemes, dim=1).cpu().numpy()
true_label_graphemes = true_graphemes.cpu().numpy()
pred_label_vowels = torch.argmax(pred_vowels, dim=1).cpu().numpy()
true_label_vowels = true_vowels.cpu().numpy()
pred_label_consonants = torch.argmax(pred_consonants, dim=1).cpu().numpy()
true_label_consonants = true_consonants.cpu().numpy()
# pred_y = [p.cpu().numpy() for p in pred_y]
recall_grapheme = sklearn.metrics.recall_score(pred_label_graphemes, true_label_graphemes, average='macro')
recall_vowel = sklearn.metrics.recall_score(pred_label_vowels, true_label_vowels, average='macro')
recall_consonant = sklearn.metrics.recall_score(pred_label_consonants, true_label_consonants, average='macro')
scores = [recall_grapheme, recall_vowel, recall_consonant]
final_score = np.average(scores, weights=[2, 1, 1])
# print(f'recall: grapheme {recall_grapheme}, vowel {recall_vowel}, consonant {recall_consonant}, '
# f'total {final_score}')
return final_score, recall_grapheme, recall_vowel, recall_consonant
def macro_recall_multi_mixup(pred_graphemes, true_graphemes1, true_graphemes2, alpha_graphemes, pred_vowels, true_vowels1, true_vowels2, alpha_vowels, pred_consonants,true_consonants1, true_consonants2, alpha_consonants, n_grapheme=168, n_vowel=11, n_consonant=7):
# pred_y = torch.split(pred_y, [n_grapheme], dim=1)
pred_label_graphemes = torch.argmax(pred_graphemes, dim=1).cpu().numpy()
true_label_graphemes1 = true_graphemes1.cpu().numpy()
true_label_graphemes2 = true_graphemes2.cpu().numpy()
pred_label_vowels = torch.argmax(pred_vowels, dim=1).cpu().numpy()
true_label_vowels1 = true_vowels1.cpu().numpy()
true_label_vowels2 = true_vowels2.cpu().numpy()
pred_label_consonants = torch.argmax(pred_consonants, dim=1).cpu().numpy()
true_label_consonants1 = true_consonants1.cpu().numpy()
true_label_consonants2 = true_consonants2.cpu().numpy()
# print('pred_label_graphemes:', pred_label_graphemes.shape)
# print('true_label_graphemes1:', true_label_graphemes1.shape)
# print('true_label_graphemes2:', true_label_graphemes2.shape)
#print(sklearn.metrics.recall_score(pred_label_graphemes, true_label_graphemes1, average='macro'))
#print(sklearn.metrics.recall_score(pred_label_graphemes, true_label_graphemes2, average='macro'))
recall_grapheme = alpha_graphemes * sklearn.metrics.recall_score(pred_label_graphemes, true_label_graphemes1, average='macro') \
+ (1-alpha_graphemes) * sklearn.metrics.recall_score(pred_label_graphemes, true_label_graphemes2, average='macro')
recall_vowel = alpha_vowels * sklearn.metrics.recall_score(pred_label_vowels, true_label_vowels1, average='macro')\
+ (1-alpha_vowels) * sklearn.metrics.recall_score(pred_label_vowels, true_label_vowels2, average='macro')
recall_consonant = alpha_consonants * sklearn.metrics.recall_score(pred_label_consonants, true_label_consonants1, average='macro') \
+ (1 - alpha_consonants) * sklearn.metrics.recall_score(pred_label_consonants, true_label_consonants2, average='macro')
scores = [recall_grapheme, recall_vowel, recall_consonant]
final_score = np.average(scores, weights=[2, 1, 1])
# print(f'recall: grapheme {recall_grapheme}, vowel {recall_vowel}, consonant {recall_consonant}, '
# f'total {final_score}')
return final_score, recall_grapheme, recall_vowel, recall_consonant
def calc_macro_recall(solution, submission):
# solution df, submission df
scores = []
for component in ['grapheme_root', 'consonant_diacritic', 'vowel_diacritic']:
y_true_subset = solution[solution[component] == component]['target'].values
y_pred_subset = submission[submission[component] == component]['target'].values
scores.append(sklearn.metrics.recall_score(
y_true_subset, y_pred_subset, average='macro'))
final_score = np.average(scores, weights=[2, 1, 1])
return final_score