-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathCCMetric.py
More file actions
246 lines (198 loc) · 7.29 KB
/
CCMetric.py
File metadata and controls
246 lines (198 loc) · 7.29 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
from CCScoreManager import *
import os
import pickle
from utils_gen import *
from AllenTool import *
class CCMetric:
"""
similar to Openie6.metric.Conjunction
This class does scoring for task="cc". Class ExMetric does scoring for
task="ex".
This class stores scores of model weights. There are 4 types of scoring
managers, instances of CCScoreManager: `kind_to_manager[kind]`,
where kind\in SCORE_KINDS. Each of these managers uses a different
scoring procedure.
Setting the parameter `save` to True makes this class store the tree
structure.
Attributes
----------
kind_to_manager: dict[str, CCScoreManager]
save: bool
score_d: dict[str, float]
verbose: bool
"""
def __init__(self, verbose=False):
"""
Constructor
Parameters
----------
verbose: bool
"""
self.kind_to_manager = {}
for kind in SCORE_KINDS:
self.kind_to_manager[kind] = CCScoreManager(kind)
self.save = CC_METRIC_SAVE
self.score_d = CCMetric.get_zero_score_d()
self.verbose = verbose
# self.n_complete = 0 # not used
# self.n_sentence = 0 # not used
if self.save:
print("CCMetric deleting previous pkl files.")
di = CC_METRIC_STORAGE_DIR
if os.path.exists(di + '/l_osent.pkl'):
os.remove(di + '/l_osent.pkl')
if os.path.exists(di + '/l_pred_ccnodes.pkl'):
os.remove(di + '/l_pred_ccnodes.pkl')
if os.path.exists(di + '/l_true_ccnodes.pkl'):
os.remove(di + '/l_true_ccnodes.pkl')
def __call__(self,
l_osent, # Openie6.meta_data
lll_pred_ilabel, # Openie6.predictions
lll_ilabel): # Openie6.ground_truth
"""
similar to Openie6.metric.Conjunction.__call__
A __call__() method is a new chance to load attributes into the
class after the __init__() has been called.
Whereas __init__() is called only once, __call__() can be called
multiple times for the same class instance. For CCMetric,
this __call__() method is called for each batch of an epoch. Each
time, the scores in the managers grow. At the end of an epoch,
get_score_d() is called. That method averages, saves and resets the
cummulative scores, before commencing a new epoch.
Parameters
----------
l_osent: list[str]
lll_pred_ilabel: list[list[list[[int]]]
lll_ilabel: list[list[list[[int]]]
"""
num_samples = len(lll_ilabel)
if self.verbose:
print("Entering CCMetric.__call__() method.")
print("number of samples=", num_samples)
for k in range(num_samples):
pred_ccnodes = CCTree(l_osent[k],
lll_pred_ilabel[k],
calc_tree_struc=True).ccnodes
true_ccnodes = CCTree(l_osent[k],
lll_ilabel[k],
calc_tree_struc=True).ccnodes
for kind in SCORE_KINDS:
self.kind_to_manager[kind]. \
absorb_new_sample(pred_ccnodes, true_ccnodes)
if self.save:
# we append to pickle files for each sample.
# print("Storing new cc metric pkl files.")
di = CC_METRIC_STORAGE_DIR
pickle.dump(l_osent[k], open(
di + '/l_osent.pkl', 'ab'))
pickle.dump(pred_ccnodes, open(
di + '/l_pred_ccnodes.pkl', 'ab'))
pickle.dump(true_ccnodes, open(
di + '/l_true_ccnodes.pkl', 'ab'))
def get_all_node_ccscore(self, kind='exact'):
"""
Similar to Openie6.metric.Conjunction.get_overall_score().
This method returns the all_node_ccscore` for kind `kind`.
Parameters
----------
kind: str
Returns
-------
CCScore
"""
return self.kind_to_manager[kind].all_node_ccscore
@staticmethod
def get_zero_score_d():
"""
This method returns a new copy of the `score_d` dictionary with all
values zero.
Returns
-------
dict[str, float]
"""
score_d = {}
for kind in SCORE_KINDS:
score_d[f"acc_nsam_{kind}"] = (0, 0)
score_d[f"acc_nsam_{kind}11"] = (0, 0)
return score_d
def reset_score_d(self):
"""
Unlike the method get_zero_score_d(), this method does not create a
new `score_d` dictionary. Instead, it sets to zero all values of the
existing `self.score_d`.
Returns
-------
None
"""
for name in self.score_d.keys():
self.score_d[name] = (0, 0)
def reset_managers(self):
"""
similar to Openie6.metric.Conjunction.reset()
This method sets to zero (resets) the 4 managers.
Note that reset_managers() and reset_score_d() are separate methods.
Openie6 lumps them together.
Returns
-------
None
"""
for kind in SCORE_KINDS:
self.kind_to_manager[kind].reset()
# self.n_complete = 0
# self.n_sentence = 0
def get_score_d(self, ttt, do_reset=True):
"""
similar to Openie6.metric.Conjunction.get_metric()
This method returns the current `score_d`. It resets the managers iff
do_reset=True.
Parameters
----------
ttt: str
never used, except as placeholder.
ExMetric.get_score_d() has same signature and uses it.
do_reset: bool
Returns
-------
dict[str, float]
"""
if self.verbose:
print("Entering CCMetric.get_score_d method.")
score_d = {}
for kind in SCORE_KINDS:
score_d[f"acc_nsam_{kind}"] = \
self.get_all_node_ccscore(kind).get_acc_nsam()
score_d[f"acc_nsam_{kind}11"] = \
self.get_all_node_ccscore(kind).get_acc_nsam11()
self.score_d = copy(score_d)
if do_reset:
self.reset_score_d()
return score_d
if __name__ == "__main__":
def main():
cc_met = CCMetric(verbose=True)
in_fp = "tests/cc_ilabels.txt"
with open(in_fp, "r", encoding="utf-8") as f:
in_lines = get_ascii(f.readlines())
l_osent = []
lll_ilabel = []
ll_ilabel = []
for in_line in in_lines:
if in_line:
if in_line[0].isalpha():
l_osent.append(in_line.strip())
if ll_ilabel:
lll_ilabel.append(ll_ilabel)
ll_ilabel = []
elif in_line[0].isdigit():
words = get_words(in_line)
# print("lkll", words)
ll_ilabel.append([int(x) for x in words])
# last one
if ll_ilabel:
lll_ilabel.append(ll_ilabel)
cc_met(l_osent, lll_ilabel, lll_ilabel)
score_d = cc_met.get_score_d(ttt="train", do_reset=True)
print(score_d)
print("acc-nsam score:", cc_met.get_all_node_ccscore(
"exact").get_acc_nsam())
main()