Skip to content

Commit 2e38bf5

Browse files
authored
add TensorRT configuration to OrtProviderOptions (microsoft#6979)
* add TensorRT configurations in provider options * Update ort_test_session.cc * Update tensorrt_execution_provider.cc * Update onnxruntime_pybind_state.cc * Update main.cc
1 parent 783acb1 commit 2e38bf5

File tree

9 files changed

+221
-23
lines changed

9 files changed

+221
-23
lines changed

include/onnxruntime/core/session/onnxruntime_c_api.h

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -289,9 +289,15 @@ typedef struct OrtROCMProviderOptions {
289289
/// Options for the TensorRT provider that are passed to SessionOptionsAppendExecutionProvider_TensorRT
290290
/// </summary>
291291
typedef struct OrtTensorRTProviderOptions {
292-
int device_id;
293-
int has_user_compute_stream;
294-
void* user_compute_stream;
292+
int device_id; // cuda device id.
293+
int has_user_compute_stream; // indicator of user specified CUDA compute stream.
294+
void* user_compute_stream; // user specified CUDA compute stream.
295+
int has_trt_options; // override environment variables with following TensorRT settings at runtime.
296+
size_t trt_max_workspace_size; // maximum workspace size for TensorRT.
297+
int trt_fp16_enable; // enable TensorRT FP16 precision. Default 0 = false, nonzero = true
298+
int trt_int8_enable; // enable TensorRT INT8 precision. Default 0 = false, nonzero = true
299+
const char* trt_int8_calibration_table_name; // TensorRT INT8 calibration table name.
300+
int trt_int8_use_native_calibration_table; // use native TensorRT generated calibration table. Default 0 = false, nonzero = true
295301
} OrtTensorRTProviderOptions;
296302

297303
/// <summary>

onnxruntime/core/providers/tensorrt/tensorrt_execution_provider.cc

Lines changed: 35 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -404,30 +404,50 @@ TensorrtExecutionProvider::TensorrtExecutionProvider(const TensorrtExecutionProv
404404
min_subgraph_size_ = std::stoi(min_subgraph_size_env);
405405
}
406406

407-
const std::string max_workspace_size_env = onnxruntime::GetEnvironmentVar(tensorrt_env_vars::kMaxWorkspaceSize);
408-
if (!max_workspace_size_env.empty()) {
409-
max_workspace_size_ = std::stoull(max_workspace_size_env);
407+
if (info.has_trt_options) {
408+
max_workspace_size_ = info.max_workspace_size;
409+
} else {
410+
const std::string max_workspace_size_env = onnxruntime::GetEnvironmentVar(tensorrt_env_vars::kMaxWorkspaceSize);
411+
if (!max_workspace_size_env.empty()) {
412+
max_workspace_size_ = std::stoull(max_workspace_size_env);
413+
}
410414
}
411415

412-
const std::string fp16_enable_env = onnxruntime::GetEnvironmentVar(tensorrt_env_vars::kFP16Enable);
413-
if (!fp16_enable_env.empty()) {
414-
fp16_enable_ = (std::stoi(fp16_enable_env) == 0 ? false : true);
416+
if (info.has_trt_options) {
417+
fp16_enable_ = info.fp16_enable;
418+
} else {
419+
const std::string fp16_enable_env = onnxruntime::GetEnvironmentVar(tensorrt_env_vars::kFP16Enable);
420+
if (!fp16_enable_env.empty()) {
421+
fp16_enable_ = (std::stoi(fp16_enable_env) == 0 ? false : true);
422+
}
415423
}
416424

417-
const std::string int8_enable_env = onnxruntime::GetEnvironmentVar(tensorrt_env_vars::kINT8Enable);
418-
if (!int8_enable_env.empty()) {
419-
int8_enable_ = (std::stoi(int8_enable_env) == 0 ? false : true);
425+
if (info.has_trt_options) {
426+
int8_enable_ = info.int8_enable;
427+
} else {
428+
const std::string int8_enable_env = onnxruntime::GetEnvironmentVar(tensorrt_env_vars::kINT8Enable);
429+
if (!int8_enable_env.empty()) {
430+
int8_enable_ = (std::stoi(int8_enable_env) == 0 ? false : true);
431+
}
420432
}
421433

422434
if (int8_enable_) {
423-
const std::string int8_calibration_cache_name_env = onnxruntime::GetEnvironmentVar(tensorrt_env_vars::kINT8CalibrationTableName);
424-
if (!int8_calibration_cache_name_env.empty()) {
425-
int8_calibration_cache_name_ = int8_calibration_cache_name_env;
435+
if (info.has_trt_options) {
436+
int8_calibration_cache_name_ = info.int8_calibration_table_name;
437+
} else {
438+
const std::string int8_calibration_cache_name_env = onnxruntime::GetEnvironmentVar(tensorrt_env_vars::kINT8CalibrationTableName);
439+
if (!int8_calibration_cache_name_env.empty()) {
440+
int8_calibration_cache_name_ = int8_calibration_cache_name_env;
441+
}
426442
}
427443

428-
const std::string int8_use_native_tensorrt_calibration_table_env = onnxruntime::GetEnvironmentVar(tensorrt_env_vars::kINT8UseNativeTensorrtCalibrationTable);
429-
if (!int8_use_native_tensorrt_calibration_table_env.empty()) {
430-
int8_use_native_tensorrt_calibration_table_ = (std::stoi(int8_use_native_tensorrt_calibration_table_env) == 0 ? false : true);
444+
if (info.has_trt_options) {
445+
int8_use_native_tensorrt_calibration_table_ = info.int8_use_native_calibration_table;
446+
} else {
447+
const std::string int8_use_native_tensorrt_calibration_table_env = onnxruntime::GetEnvironmentVar(tensorrt_env_vars::kINT8UseNativeTensorrtCalibrationTable);
448+
if (!int8_use_native_tensorrt_calibration_table_env.empty()) {
449+
int8_use_native_tensorrt_calibration_table_ = (std::stoi(int8_use_native_tensorrt_calibration_table_env) == 0 ? false : true);
450+
}
431451
}
432452
}
433453

onnxruntime/core/providers/tensorrt/tensorrt_execution_provider.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -71,6 +71,12 @@ struct TensorrtExecutionProviderInfo {
7171
int device_id{0};
7272
bool has_user_compute_stream{false};
7373
void* user_compute_stream{nullptr};
74+
bool has_trt_options{false};
75+
size_t max_workspace_size{1 << 30};
76+
bool fp16_enable{false};
77+
bool int8_enable{false};
78+
std::string int8_calibration_table_name{""};
79+
bool int8_use_native_calibration_table{false};
7480
};
7581

7682
// Information to construct kernel function state.

onnxruntime/core/providers/tensorrt/tensorrt_provider_factory.cc

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,12 @@ struct Tensorrt_Provider : Provider {
4949
info.device_id = options.device_id;
5050
info.has_user_compute_stream = options.has_user_compute_stream;
5151
info.user_compute_stream = options.user_compute_stream;
52+
info.has_trt_options = options.has_trt_options;
53+
info.max_workspace_size = options.trt_max_workspace_size;
54+
info.fp16_enable = options.trt_fp16_enable;
55+
info.int8_enable = options.trt_int8_enable;
56+
info.int8_calibration_table_name = options.trt_int8_calibration_table_name == nullptr ? "" : options.trt_int8_calibration_table_name;
57+
info.int8_use_native_calibration_table = options.trt_int8_use_native_calibration_table;
5258
return std::make_shared<TensorrtProviderFactory>(info);
5359
}
5460

onnxruntime/python/onnxruntime_pybind_state.cc

Lines changed: 55 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -490,7 +490,61 @@ static void RegisterExecutionProviders(InferenceSession* sess, const std::vector
490490
sess->GetSessionOptions().enable_cpu_mem_arena));
491491
} else if (type == kTensorrtExecutionProvider) {
492492
#ifdef USE_TENSORRT
493-
OrtTensorRTProviderOptions params{0, 0, nullptr};
493+
OrtTensorRTProviderOptions params{0, 0, nullptr, 0, 1 << 30, 0, 0, nullptr, 0};
494+
std::string trt_int8_calibration_table_name;
495+
auto it = provider_options_map.find(type);
496+
if (it != provider_options_map.end()) {
497+
for (auto option : it->second) {
498+
if (option.first == "has_trt_options") {
499+
if (option.second == "True" || option.second == "true") {
500+
params.has_trt_options = true;
501+
} else if (option.second == "False" || option.second == "false") {
502+
params.has_trt_options = false;
503+
} else {
504+
ORT_THROW("[ERROR] [TensorRT] The value for the key 'has_trt_options' should be a boolean i.e. 'True' or 'False'. Default value is False.\n");
505+
}
506+
} else if (option.first == "trt_max_workspace_size") {
507+
if (!option.second.empty()) {
508+
params.trt_max_workspace_size = std::stoull(option.second);
509+
} else {
510+
ORT_THROW("[ERROR] [TensorRT] The value for the key 'trt_max_workspace_size' should be a number in byte i.e. '1073741824'.\n");
511+
}
512+
} else if (option.first == "trt_fp16_enable") {
513+
if (option.second == "True" || option.second == "true") {
514+
params.trt_fp16_enable = true;
515+
} else if (option.second == "False" || option.second == "false") {
516+
params.trt_fp16_enable = false;
517+
} else {
518+
ORT_THROW("[ERROR] [TensorRT] The value for the key 'trt_fp16_enable' should be a boolean i.e. 'True' or 'False'. Default value is False.\n");
519+
}
520+
} else if (option.first == "trt_int8_enable") {
521+
if (option.second == "True" || option.second == "true") {
522+
params.trt_int8_enable = true;
523+
} else if (option.second == "False" || option.second == "false") {
524+
params.trt_int8_enable = false;
525+
} else {
526+
ORT_THROW("[ERROR] [TensorRT] The value for the key 'trt_int8_enable' should be a boolean i.e. 'True' or 'False'. Default value is False.\n");
527+
}
528+
} else if (option.first == "trt_int8_calibration_table_name") {
529+
if (!option.second.empty()) {
530+
trt_int8_calibration_table_name = option.second;
531+
params.trt_int8_calibration_table_name = trt_int8_calibration_table_name.c_str();
532+
} else {
533+
ORT_THROW("[ERROR] [TensorRT] The value for the key 'trt_int8_calibration_table_name' should be a file name i.e. 'cal_table'.\n");
534+
}
535+
} else if (option.first == "trt_int8_use_native_calibration_table") {
536+
if (option.second == "True" || option.second == "true") {
537+
params.trt_int8_use_native_calibration_table = true;
538+
} else if (option.second == "False" || option.second == "false") {
539+
params.trt_int8_use_native_calibration_table = false;
540+
} else {
541+
ORT_THROW("[ERROR] [TensorRT] The value for the key 'trt_int8_use_native_calibration_table' should be a boolean i.e. 'True' or 'False'. Default value is False.\n");
542+
}
543+
} else {
544+
ORT_THROW("Invalid TensorRT EP option: ", option.first);
545+
}
546+
}
547+
}
494548
RegisterExecutionProvider(sess, *onnxruntime::CreateExecutionProviderFactory_Tensorrt(&params));
495549
#endif
496550
} else if (type == kMIGraphXExecutionProvider) {

onnxruntime/test/onnx/main.cc

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -312,7 +312,13 @@ int real_main(int argc, char* argv[], Ort::Env& env) {
312312
OrtTensorRTProviderOptions tensorrt_options{
313313
0,
314314
0,
315-
nullptr};
315+
nullptr,
316+
0,
317+
1 << 30,
318+
0,
319+
0,
320+
nullptr,
321+
0};
316322

317323
OrtCUDAProviderOptions cuda_options{
318324
0,

onnxruntime/test/perftest/command_args_parser.cc

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -62,6 +62,14 @@ namespace perftest {
6262
"\t [OpenVINO only] [num_of_threads]: Overrides the accelerator hardware type and precision with these values at runtime.\n"
6363
"\t [Usage]: -e <provider_name> -i '<key1>|<value1> <key2>|<value2>'\n\n"
6464
"\t [Example] [For OpenVINO EP] -e openvino -i 'device_type|CPU_FP32 enable_vpu_fast_compile|true num_of_threads|5'\n"
65+
"\t [TensorRT only] [use_trt_options]: Overrides TensorRT environment variables (if any) with following settings at runtime.\n"
66+
"\t [TensorRT only] [trt_max_workspace_size]: Set TensorRT maximum workspace size in byte.\n"
67+
"\t [TensorRT only] [trt_fp16_enable]: Enable TensorRT FP16 precision.\n"
68+
"\t [TensorRT only] [trt_int8_enable]: Enable TensorRT INT8 precision.\n"
69+
"\t [TensorRT only] [trt_int8_calibration_table_name]: Specify INT8 calibration table name.\n"
70+
"\t [TensorRT only] [trt_int8_use_native_calibration_table]: Use Native TensorRT calibration table.\n"
71+
"\t [Usage]: -e <provider_name> -i '<key1>|<value1> <key2>|<value2>'\n\n"
72+
"\t [Example] [For TensorRT EP] -e tensorrt -i 'use_trt_options|true trt_fp16_enable|true trt_int8_enable|true trt_int8_calibration_table_name|calibration.flatbuffers trt_int8_use_native_calibration_table|false'\n"
6573
"\t-h: help\n");
6674
}
6775
#ifdef _WIN32

onnxruntime/test/perftest/ort_test_session.cc

Lines changed: 94 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -62,8 +62,100 @@ OnnxRuntimeTestSession::OnnxRuntimeTestSession(Ort::Env& env, std::random_device
6262
#endif
6363
} else if (provider_name == onnxruntime::kTensorrtExecutionProvider) {
6464
#ifdef USE_TENSORRT
65-
Ort::ThrowOnError(OrtSessionOptionsAppendExecutionProvider_Tensorrt(session_options, 0));
66-
Ort::ThrowOnError(OrtSessionOptionsAppendExecutionProvider_CUDA(session_options, 0));
65+
bool has_trt_options = false;
66+
size_t trt_max_workspace_size = 1 << 30;
67+
bool trt_fp16_enable = false;
68+
bool trt_int8_enable = false;
69+
std::string trt_int8_calibration_table_name = "";
70+
bool trt_int8_use_native_calibration_table = false;
71+
72+
#ifdef _MSC_VER
73+
std::string ov_string = ToMBString(performance_test_config.run_config.ep_runtime_config_string);
74+
#else
75+
std::string ov_string = performance_test_config.run_config.ep_runtime_config_string;
76+
#endif
77+
std::istringstream ss(ov_string);
78+
std::string token;
79+
while (ss >> token) {
80+
if(token == "") {
81+
continue;
82+
}
83+
auto pos = token.find("|");
84+
if (pos == std::string::npos || pos == 0 || pos == token.length()) {
85+
ORT_THROW("[ERROR] [TensorRT] Use a '|' to separate the key and value for the run-time option you are trying to use.\n");
86+
}
87+
88+
auto key = token.substr(0,pos);
89+
auto value = token.substr(pos+1);
90+
if (key == "has_trt_options") {
91+
if(value == "true" || value == "True"){
92+
has_trt_options = true;
93+
} else if (value == "false" || value == "False") {
94+
has_trt_options = false;
95+
} else {
96+
ORT_THROW("[ERROR] [TensorRT] The value for the key 'has_trt_options' should be a boolean i.e. true or false. Default value is false.\n");
97+
}
98+
} else if (key == "trt_max_workspace_size") {
99+
if(!value.empty()) {
100+
trt_max_workspace_size = std::stoull(value);
101+
} else {
102+
ORT_THROW("[ERROR] [TensorRT] The value for the key 'trt_max_workspace_size' should be a number.\n");
103+
}
104+
} else if (key == "trt_fp16_enable") {
105+
if(value == "true" || value == "True"){
106+
trt_fp16_enable = true;
107+
} else if (value == "false" || value == "False") {
108+
trt_fp16_enable = false;
109+
} else {
110+
ORT_THROW("[ERROR] [TensorRT] The value for the key 'trt_fp16_enable' should be a boolean i.e. true or false. Default value is false.\n");
111+
}
112+
} else if (key == "trt_int8_enable") {
113+
if(value == "true" || value == "True"){
114+
trt_int8_enable = true;
115+
} else if (value == "false" || value == "False") {
116+
trt_int8_enable = false;
117+
} else {
118+
ORT_THROW("[ERROR] [TensorRT] The value for the key 'trt_int8_enable' should be a boolean i.e. true or false. Default value is false.\n");
119+
}
120+
} else if (key == "trt_int8_calibration_table_name") {
121+
if(!value.empty()) {
122+
trt_int8_calibration_table_name = value;
123+
} else {
124+
ORT_THROW("[ERROR] [TensorRT] The value for the key 'trt_int8_calibration_table_name' should be a non-emtpy string.\n");
125+
}
126+
} else if (key == "trt_int8_use_native_calibration_table") {
127+
if(value == "true" || value == "True"){
128+
trt_int8_use_native_calibration_table = true;
129+
} else if (value == "false" || value == "False") {
130+
trt_int8_use_native_calibration_table = false;
131+
} else {
132+
ORT_THROW("[ERROR] [TensorRT] The value for the key 'trt_int8_use_native_calibration_table' should be a boolean i.e. true or false. Default value is false.\n");
133+
}
134+
} else {
135+
ORT_THROW("[ERROR] [TensorRT] wrong key type entered. Choose from the following runtime key options that are available for TensorRT. ['use_trt_options', 'trt_fp16_enable', 'trt_int8_enable', 'trt_int8_calibration_table_name', 'trt_int8_use_native_calibration_table'] \n");
136+
}
137+
}
138+
OrtTensorRTProviderOptions tensorrt_options;
139+
tensorrt_options.device_id = 0;
140+
tensorrt_options.has_user_compute_stream = 0;
141+
tensorrt_options.user_compute_stream = nullptr;
142+
tensorrt_options.has_trt_options = has_trt_options;
143+
tensorrt_options.trt_max_workspace_size = trt_max_workspace_size;
144+
tensorrt_options.trt_fp16_enable = trt_fp16_enable;
145+
tensorrt_options.trt_int8_enable = trt_int8_enable;
146+
tensorrt_options.trt_int8_calibration_table_name = trt_int8_calibration_table_name.c_str();
147+
tensorrt_options.trt_int8_use_native_calibration_table = trt_int8_use_native_calibration_table;
148+
session_options.AppendExecutionProvider_TensorRT(tensorrt_options);
149+
150+
OrtCUDAProviderOptions cuda_options{
151+
0,
152+
static_cast<OrtCudnnConvAlgoSearch>(performance_test_config.run_config.cudnn_conv_algo),
153+
std::numeric_limits<size_t>::max(),
154+
0,
155+
!performance_test_config.run_config.do_cuda_copy_in_separate_stream,
156+
0,
157+
nullptr};
158+
session_options.AppendExecutionProvider_CUDA(cuda_options);
67159
#else
68160
ORT_THROW("TensorRT is not supported in this build\n");
69161
#endif

onnxruntime/test/util/default_providers.cc

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,7 @@ std::unique_ptr<IExecutionProvider> DefaultCpuExecutionProvider(bool enable_aren
4343

4444
std::unique_ptr<IExecutionProvider> DefaultTensorrtExecutionProvider() {
4545
#ifdef USE_TENSORRT
46-
OrtTensorRTProviderOptions params{0, 0, nullptr};
46+
OrtTensorRTProviderOptions params{0, 0, nullptr, 0, 1 << 30, 0, 0, nullptr, 0};
4747
if (auto factory = CreateExecutionProviderFactory_Tensorrt(&params))
4848
return factory->CreateProvider();
4949
#endif

0 commit comments

Comments
 (0)