-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathprompt.txt
112 lines (82 loc) · 3.33 KB
/
prompt.txt
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
I want to implement Performance Benchmark python module.
The idea is for a task (ex. text-classification),
given models and given corresponding dataset
compute performance metrics, disk size and latency time.
Here is abstract class:
from abc import ABC, abstractclassmethod
from datasets import load_metric
import transformers
import numpy as np
import torch
from time import perf_counter
from pathlib import Path
""" This is TASK-Agnostic Base class.
"""
class PerformanceBenchmark:
def __init__(self, pipeline):
assert isinstance(pipeline, transformers.Pipeline)
self.pipeline = pipeline
self.dataset = None # task specific
@abstractclassmethod
def compute_performance(self, dataset) -> dict:
"""Abstract method.
Example:
preds, labels = [], []
for example in self.dataset:
preds.append(self.pipeline(example['input']))
labels.append(example['label'])
score = self.metric.compute(predictions=preds, references=labels)
return {self.metric.name : score}
"""
pass
def compute_size(self) -> dict:
state_dict = self.pipeline.model.state_dict()
tmp = Path("model.pt")
torch.save(state_dict, tmp)
size_mb = Path(tmp).stat().st_size / (1024 * 1024)
tmp.unlink()
print(f"Model size (MB) - {size_mb:.2f}")
return {'size_mb' : size_mb}
def compute_time(self) -> dict:
latencies = []
for _ in range(100):
start = perf_counter()
_ = self.pipeline(self.dataset[0]['text'])
latencies.append(perf_counter() - start)
time_avg_ms = 1_000 * np.mean(latencies)
time_std_ms = 1_000 * np.std(latencies)
print(f"Average latency (ms) - {time_avg_ms:.2f} +\- {time_std_ms:.2f}")
return {'time_avg_ms' : time_avg_ms, 'time_std_ms' : time_std_ms}
def run_benchmark(self):
metrics = {
**self.compute_size(),
**self.compute_time(),
**self.compute_performance(self.dataset),
}
return metrics
Here is specific class for text-classification:
from nlphub import PerformanceBenchmark
import datasets
from nlphub.utils import rename_dataset_label_key
"""The MODELBenchmark:
1. measure time, memory and the performance on input dataset.
"""
class ClassificationBenchmark(PerformanceBenchmark):
def __init__(self, pipeline, dataset):
super().__init__(pipeline)
# parse the label regex from data set (ex. 'label')
self._label = list(dataset.features.keys())[1]
self.features = dataset.features[self._label]
def compuet_performance(self, dataset) -> dict:
assert isinstance(dataset, datasets.Dataset), 'dataset is not of type datasets.Dataset'
rename_dataset_label_key(dataset)
assert 'text' in dataset and 'label' in dataset, "dataset doesn't contain 'text' or 'label' attributes"
preds, labels = [], []
for example in self.dataset:
pred = self.pipeline(example['text'])
pred_int = self.features.str2int(pred[self._label])
preds.append(pred_int)
labels.append(example['label'])
score = self.metric.compute(predictions=preds, references=labels)
return {self.metric : score}
as you have noticed self.metric is no