Skip to content

Commit 1edfb3c

Browse files
committed
Fix hang in reload during predict unit test
The issue was improper order of destructor calls which could occur with specific timing that caused caling method returnStream() on object that was removed. This resulted in program hanged on notify_one(). This was specific to that one test that did not mimmick correctly predict path behaviour. In test it could occur that OVInferRequestsQueue destructor could be called first, and then its method returnStream(). *Remove unnecesary *1 suffix JIRA:CVS-34909 See merge request vmc-eip/OVMS/ovms-c!146
2 parents f7d2ae6 + 3c9c5e6 commit 1edfb3c

File tree

1 file changed

+15
-14
lines changed

1 file changed

+15
-14
lines changed

src/test/prediction_service_test.cpp

Lines changed: 15 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -141,15 +141,15 @@ void TestPredict::performPredict(const std::string modelName,
141141
std::unique_ptr<std::future<void>> waitBeforePerformInference) {
142142
// only validation is skipped
143143
std::shared_ptr<ovms::ModelInstance> modelInstance;
144-
std::unique_ptr<ovms::ModelInstancePredictRequestsHandlesCountGuard> modelInstancePredictRequestsHandlesCountGuard1;
144+
std::unique_ptr<ovms::ModelInstancePredictRequestsHandlesCountGuard> modelInstancePredictRequestsHandlesCountGuard;
145145

146146
if (waitBeforeGettingModelInstance) {
147147
std::cout << "Waiting before getModelInstance. Batch size:" << batchSize << std::endl;
148148
waitBeforeGettingModelInstance->get();
149149
}
150-
ASSERT_EQ(getModelInstance(manager, modelName, modelVersion, modelInstance, modelInstancePredictRequestsHandlesCountGuard1),
150+
ASSERT_EQ(getModelInstance(manager, modelName, modelVersion, modelInstance, modelInstancePredictRequestsHandlesCountGuard),
151151
ovms::StatusCode::OK);
152-
ASSERT_EQ(assureModelInstanceLoadedWithProperBatchSize(*modelInstance, batchSize, modelInstancePredictRequestsHandlesCountGuard1),
152+
ASSERT_EQ(assureModelInstanceLoadedWithProperBatchSize(*modelInstance, batchSize, modelInstancePredictRequestsHandlesCountGuard),
153153
ovms::StatusCode::OK);
154154
ovms::OVInferRequestsQueue& inferRequestsQueue = modelInstance->getInferRequestsQueue();
155155
ovms::ExecutingStreamIdGuard executingStreamIdGuard(inferRequestsQueue);
@@ -166,7 +166,6 @@ void TestPredict::performPredict(const std::string modelName,
166166
auto status = performInference(inferRequestsQueue, executingInferId, inferRequest);
167167
ASSERT_EQ(status, ovms::StatusCode::OK);
168168
serializeAndCheck(DUMMY_MODEL_OUTPUT_SIZE * batchSize, inferRequest);
169-
modelInstancePredictRequestsHandlesCountGuard1.reset();
170169
}
171170

172171
TEST_F(TestPredict, SuccesfullOnDummyModel) {
@@ -205,11 +204,11 @@ TEST_F(TestPredict, SuccesfullReloadWhen1InferRequestJustBeforePredict) {
205204
});
206205
std::this_thread::sleep_for(std::chrono::seconds(1));
207206
std::shared_ptr<ovms::ModelInstance> modelInstance;
208-
std::unique_ptr<ovms::ModelInstancePredictRequestsHandlesCountGuard> modelInstancePredictRequestsHandlesCountGuard1;
209-
ASSERT_EQ(getModelInstance(manager, config.getName(), config.getVersion(), modelInstance, modelInstancePredictRequestsHandlesCountGuard1),
207+
std::unique_ptr<ovms::ModelInstancePredictRequestsHandlesCountGuard> modelInstancePredictRequestsHandlesCountGuard;
208+
ASSERT_EQ(getModelInstance(manager, config.getName(), config.getVersion(), modelInstance, modelInstancePredictRequestsHandlesCountGuard),
210209
ovms::StatusCode::OK);
211210

212-
std::thread t2([modelInstance, newBatchSize, &modelInstancePredictRequestsHandlesCountGuard1](){modelInstance->reloadModel(newBatchSize, modelInstancePredictRequestsHandlesCountGuard1);});
211+
std::thread t2([modelInstance, newBatchSize, &modelInstancePredictRequestsHandlesCountGuard](){modelInstance->reloadModel(newBatchSize, modelInstancePredictRequestsHandlesCountGuard);});
213212
std::this_thread::sleep_for(std::chrono::milliseconds(100));
214213
releaseWaitBeforePerformInference.set_value();
215214
t2.join();
@@ -247,11 +246,11 @@ TEST_F(TestPredict, SuccesfullReloadWhen1InferRequestJustBeforeGettingModelInsta
247246
});
248247
std::this_thread::sleep_for(std::chrono::seconds(1));
249248
std::shared_ptr<ovms::ModelInstance> modelInstance;
250-
std::unique_ptr<ovms::ModelInstancePredictRequestsHandlesCountGuard> modelInstancePredictRequestsHandlesCountGuard1;
251-
ASSERT_EQ(getModelInstance(manager, config.getName(), config.getVersion(), modelInstance, modelInstancePredictRequestsHandlesCountGuard1),
249+
std::unique_ptr<ovms::ModelInstancePredictRequestsHandlesCountGuard> modelInstancePredictRequestsHandlesCountGuard;
250+
ASSERT_EQ(getModelInstance(manager, config.getName(), config.getVersion(), modelInstance, modelInstancePredictRequestsHandlesCountGuard),
252251
ovms::StatusCode::OK);
253-
std::thread assureProperBSLoadedThread([modelInstance, newBatchSize, &modelInstancePredictRequestsHandlesCountGuard1](){
254-
auto status = assureModelInstanceLoadedWithProperBatchSize(*modelInstance, newBatchSize, modelInstancePredictRequestsHandlesCountGuard1);
252+
std::thread assureProperBSLoadedThread([modelInstance, newBatchSize, &modelInstancePredictRequestsHandlesCountGuard](){
253+
auto status = assureModelInstanceLoadedWithProperBatchSize(*modelInstance, newBatchSize, modelInstancePredictRequestsHandlesCountGuard);
255254
ASSERT_EQ(status, ovms::StatusCode::OK);
256255
});
257256
std::this_thread::sleep_for(std::chrono::milliseconds(100));
@@ -260,8 +259,9 @@ TEST_F(TestPredict, SuccesfullReloadWhen1InferRequestJustBeforeGettingModelInsta
260259
assureProperBSLoadedThread.join();
261260

262261
ovms::OVInferRequestsQueue& inferRequestsQueue = modelInstance->getInferRequestsQueue();
263-
ovms::ExecutingStreamIdGuard executingStreamIdGuard(inferRequestsQueue);
264-
int executingInferId = executingStreamIdGuard.getId();
262+
// exception from keeping the same as predict path - using unique_ptr to keep order of destructors the same
263+
auto executingStreamIdGuard = std::make_unique<ovms::ExecutingStreamIdGuard>(inferRequestsQueue);
264+
int executingInferId = executingStreamIdGuard->getId();
265265
InferenceEngine::InferRequest& inferRequest = inferRequestsQueue.getInferRequest(executingInferId);
266266

267267
std::vector<float> input(DUMMY_MODEL_INPUT_SIZE * newBatchSize);
@@ -273,7 +273,8 @@ TEST_F(TestPredict, SuccesfullReloadWhen1InferRequestJustBeforeGettingModelInsta
273273
serializeAndCheck(DUMMY_MODEL_OUTPUT_SIZE * newBatchSize, inferRequest);
274274

275275
std::cout << "Now releasing blockade from reloading." << std::endl;
276-
modelInstancePredictRequestsHandlesCountGuard1.reset();
276+
executingStreamIdGuard.reset();
277+
modelInstancePredictRequestsHandlesCountGuard.reset();
277278
secondPredictRequest.join();
278279
}
279280

0 commit comments

Comments
 (0)