1
- import numpy as np
2
1
import matplotlib .pyplot as plt
3
- # https://stackoverflow.com/questions/7370801/how-to-measure-elapsed-time-in-python
4
- from timeit import default_timer as timer
2
+ import numpy as np
3
+ import pandas as pd
4
+ import seaborn as sns
5
5
6
6
import torch
7
7
from transformers import AutoTokenizer
8
8
from transformers import AutoModelForSequenceClassification
9
9
10
- from utils import preprocess , download_label_mapping , output_vector_to_labels
10
+ from onnxruntime import ExecutionMode , InferenceSession , SessionOptions
11
11
12
+ from inference import PythonInference , OnxxInference
13
+ from utils import preprocess , download_label_mapping , output_vector_to_labels , measurements_to_dataframe
12
14
13
15
def read_test_sequences (path : str ):
14
16
with open (path , 'r' ) as f :
15
17
sequences = [x .rstrip () for x in f .readlines ()]
16
18
return sequences
17
19
18
-
19
- def run_model (model , tokenized_input ):
20
- output = model (** tokenized_input )
21
- return output_vector_to_labels (output , download_label_mapping ())
22
-
23
-
24
- def check_inference_time (model , tokenized_input ):
25
- t = timer ()
26
- scores = run_model (model , tokenized_input )
27
- elapsed_time = timer ()- t
28
- return elapsed_time
29
-
30
-
31
20
if __name__ == "__main__" :
32
21
tokenizer = AutoTokenizer .from_pretrained ("cardiffnlp/twitter-roberta-base-sentiment" , torchscript = True )
33
22
clf = AutoModelForSequenceClassification .from_pretrained ("cardiffnlp/twitter-roberta-base-sentiment" , torchscript = True )
34
23
35
- n_experiments = 5
36
24
input_texts = [preprocess (x ) for x in read_test_sequences ("test_sequences.txt" )]
37
-
38
- # 1. Eager
39
- eager_measurements = np .zeros ((n_experiments , len (input_texts )))
40
25
tokenized_inputs = [tokenizer (x , return_tensors = 'pt' ) for x in input_texts ]
26
+
27
+ n_experiments = 2
28
+ indices = np .tile (np .arange (len (input_texts )), n_experiments )
41
29
42
- for i in range ( n_experiments ):
43
- # outputs = [run_model(clf, x) for x in tokenized_inputs]
44
- eager_measurements [ i ] = [ check_inference_time (clf , x ) for x in tokenized_inputs ]
45
- # for inp, out in zip(input_texts, outputs):
46
- # print(inp, '\n', out, '\n' )
47
- # print(output_times)
30
+
31
+ # 1. Eager
32
+ eager_model = PythonInference (clf )
33
+ eager_measurements = eager_model . check_inference_time_all ( tokenized_inputs , n_experiments )
34
+ df_eager = measurements_to_dataframe ( eager_measurements . flatten (), indices )
35
+ df_eager [ 'Mode' ] = 'Eager'
48
36
49
37
50
38
# 2. TorchScript (JIT)
51
- script_measurements = np .zeros ((n_experiments , len (input_texts )))
52
- tokenized_inputs = [tokenizer (x , return_tensors = 'pt' ) for x in input_texts ]
53
- traced_model = torch .jit .trace (clf , (tokenized_inputs [0 ]['input_ids' ], tokenized_inputs [0 ]['attention_mask' ]))
54
- # torch.jit.save(traced_model, "traced_twitter_roberta_base_sentiment.pt")
55
- # loaded_model = torch.jit.load("traced_twitter_roberta_base_sentiment.pt")
56
-
57
- for i in range (n_experiments ):
58
- # outputs = [run_model(traced_model, x) for x in tokenized_inputs]
59
- script_measurements [i ] = [check_inference_time (traced_model , x ) for x in tokenized_inputs ]
60
- # for inp, out in zip(input_texts, outputs):
61
- # print(inp, '\n', out, '\n')
62
- # print(output_times)
39
+ traced_model = PythonInference (model = torch .jit .trace (clf , (tokenized_inputs [0 ]['input_ids' ], tokenized_inputs [0 ]['attention_mask' ])))
40
+ script_measurements = traced_model .check_inference_time_all (tokenized_inputs , n_experiments )
41
+ df_script = measurements_to_dataframe (script_measurements .flatten (), indices )
42
+ df_script ['Mode' ] = 'Script'
63
43
64
- print (eager_measurements )
65
- print (script_measurements )
66
-
67
- # Box Plot
68
44
69
- eager_avgs = np .mean (eager_measurements , axis = 0 )
70
- script_avgs = np .mean (script_measurements , axis = 0 )
71
- print (eager_avgs )
72
- print (script_avgs )
73
-
74
- # Scatter Plot
45
+ # 3. ONNX Runtime
46
+ model = OnxxInference (session = InferenceSession ("onnx_model/twitter-roberta-base-sentiment-optimized-quantized.onnx" ))
47
+ onnx_measurements = model .check_inference_time_all (tokenized_inputs , n_experiments )
48
+ df_onnx = measurements_to_dataframe (onnx_measurements .flatten (), indices )
49
+ df_onnx ['Mode' ] = 'ONNX'
75
50
76
- indices = np .tile (np .arange (len (input_texts )), n_experiments )
77
- eager_measurements = eager_measurements .flatten ()
78
- script_measurements = script_measurements .flatten ()
79
- print (indices )
80
- print (eager_measurements )
81
51
52
+ # Statistics
82
53
plt .style .use ('seaborn' )
83
- plt .scatter (indices , eager_measurements , label = 'Eager mode' )
84
- plt .scatter (indices , script_measurements , label = 'Script mode' )
54
+ plt .figure ()
55
+ plt .scatter (x = df_eager ['SequenceId' ], y = df_eager ['TimeInSeconds' ], label = 'Eager mode' )
56
+ plt .scatter (x = df_script ['SequenceId' ], y = df_script ['TimeInSeconds' ], label = 'Script mode' )
57
+ plt .scatter (x = df_onnx ['SequenceId' ], y = df_onnx ['TimeInSeconds' ], label = 'ONNX mode' )
85
58
plt .xlabel ('Sequence ID' )
86
59
plt .ylabel ('Inference time [s]' )
87
60
plt .legend ()
88
61
plt .show ()
62
+
63
+
64
+ plt .figure ()
65
+ df_all = pd .concat ([df_eager , df_script , df_onnx ])
66
+ df_all .groupby ('Mode' ).mean ().TimeInSeconds .plot (kind = 'bar' )
67
+ plt .title ('Avg. inference time in seconds' )
68
+ plt .ylabel ('Inference time [s]' )
69
+ plt .show ()
70
+
71
+ # Box plots
0 commit comments