-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathutils.py
48 lines (42 loc) · 1.5 KB
/
utils.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
import csv
import numpy as np
import pandas as pd
import urllib.request
from scipy.special import softmax
def preprocess(text):
new_text = []
for t in text.split(" "):
t = '@user' if t.startswith('@') and len(t) > 1 else t
t = 'http' if t.startswith('http') else t
new_text.append(t)
return " ".join(new_text)
def download_label_mapping():
labels=[]
mapping_link = f"https://raw.githubusercontent.com/cardiffnlp/tweeteval/main/datasets/sentiment/mapping.txt"
with urllib.request.urlopen(mapping_link) as f:
html = f.read().decode('utf-8').split("\n")
csvreader = csv.reader(html, delimiter='\t')
labels = [row[1] for row in csvreader if len(row) > 1]
return labels
def output_vector_to_labels(output, labels_map):
if isinstance(output[0][0], np.ndarray):
scores = output[0][0]
else:
scores = output[0][0].detach().numpy()
scores = softmax(scores)
scores_map = {}
ranking = np.argsort(scores)
ranking = ranking[::-1]
for i in range(scores.shape[0]):
l = labels_map[ranking[i]]
s = scores[ranking[i]]
scores_map[l] = np.round(float(s), 4)
# print(f"{i+1}) {l} {np.round(float(s), 4)}")
return scores_map
def measurements_to_dataframe(measurements, indices):
data = []
for seq_id, time in zip(indices, measurements):
data.append([seq_id, time, 'Unknown'])
df = pd.DataFrame(data, columns=['SequenceId', 'TimeInSeconds', 'Mode'])
print(df)
return df