Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
475 changes: 267 additions & 208 deletions BasePreparedModel.cpp

Large diffs are not rendered by default.

22 changes: 13 additions & 9 deletions BasePreparedModel.h
Original file line number Diff line number Diff line change
Expand Up @@ -49,14 +49,16 @@ namespace android::hardware::neuralnetworks::nnhal {
template <class T>
using vec = std::vector<T>;
typedef uint8_t* memory;
extern bool mRemoteCheck;
extern std::shared_ptr<DetectionClient> mDetectionClient;

class BasePreparedModel : public V1_3::IPreparedModel {
public:
bool mRemoteCheck = false;
std::string mXmlFile;
std::string mBinFile;
BasePreparedModel(const IntelDeviceType device, const Model& model) : mTargetDevice(device) {
mModelInfo = std::make_shared<NnapiModelInfo>(model);
mXmlFile = std::string("/data/vendor/neuralnetworks/") + std::to_string(mFileId) + std::string(".xml");
mBinFile = std::string("/data/vendor/neuralnetworks/") + std::to_string(mFileId) + std::string(".bin");
mXmlFile = MODEL_DIR + std::to_string(mFileId) + std::string(".xml");
mBinFile = MODEL_DIR + std::to_string(mFileId) + std::string(".bin");
mFileId++;
}

Expand Down Expand Up @@ -89,27 +91,29 @@ class BasePreparedModel : public V1_3::IPreparedModel {

virtual bool initialize();
virtual bool checkRemoteConnection();
virtual bool loadRemoteModel(const std::string& ir_xml, const std::string& ir_bin);
virtual void loadRemoteModel(const std::string& ir_xml, const std::string& ir_bin);
virtual void setRemoteEnabled(bool flag);

std::shared_ptr<NnapiModelInfo> getModelInfo() { return mModelInfo; }

std::shared_ptr<NgraphNetworkCreator> getNgraphNwCreator() { return mNgraphNetCreator; }
size_t getInputTensorIndex(size_t input);
size_t getOutputTensorIndex(size_t output);

std::shared_ptr<IIENetwork> getPlugin() { return mPlugin; }

std::shared_ptr<ov::Model> modelPtr;
std::shared_ptr<DetectionClient> mDetectionClient;

protected:
virtual void deinitialize();

IntelDeviceType mTargetDevice;
std::shared_ptr<NnapiModelInfo> mModelInfo;
std::shared_ptr<NgraphNetworkCreator> mNgraphNetCreator;
std::shared_ptr<IIENetwork> mPlugin;
private:
static uint32_t mFileId;
std::string mXmlFile;
std::string mBinFile;
std::unordered_map<size_t, size_t> mInputsToTensorMap;
std::unordered_map<size_t, size_t> mOutputsToTensorMap;
};

class BaseFencedExecutionCallback : public V1_3::IFencedExecutionCallback {
Expand Down
112 changes: 99 additions & 13 deletions DetectionClient.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5,10 +5,10 @@

std::string DetectionClient::prepare(bool& flag) {
RequestString request;
request.set_value("");
request.mutable_token()->set_data(mToken);
ReplyStatus reply;
ClientContext context;
time_point deadline = std::chrono::system_clock::now() + std::chrono::milliseconds(100);
time_point deadline = std::chrono::system_clock::now() + std::chrono::milliseconds(10000);
context.set_deadline(deadline);

Status status = stub_->prepare(&context, request, &reply);
Expand All @@ -21,10 +21,27 @@ std::string DetectionClient::prepare(bool& flag) {
}
}

std::string DetectionClient::release(bool& flag) {
RequestString request;
request.mutable_token()->set_data(mToken);
ReplyStatus reply;
ClientContext context;

Status status = stub_->release(&context, request, &reply);

if (status.ok()) {
flag = reply.status();
return (flag ? "status True" : "status False");
} else {
return std::string(status.error_message());
}
}

Status DetectionClient::sendFile(std::string fileName,
std::unique_ptr<ClientWriter<RequestDataChunks> >& writer) {
RequestDataChunks request;
uint32_t CHUNK_SIZE = 1024 * 1024;
request.mutable_token()->set_data(mToken);
uint32_t CHUNK_SIZE = 10 * 1024 * 1024;
std::ifstream fin(fileName, std::ifstream::binary);
std::vector<char> buffer(CHUNK_SIZE, 0);
ALOGV("GRPC sendFile %s", fileName.c_str());
Expand All @@ -37,7 +54,7 @@ Status DetectionClient::sendFile(std::string fileName,
// ALOGI("GRPC sendFile read %d", s);
request.set_data(buffer.data(), s);
if (!writer->Write(request)) {
ALOGE("GRPC Broken Stream ");
ALOGE("GRPC broken stream ");
break;
}
}
Expand All @@ -47,6 +64,22 @@ Status DetectionClient::sendFile(std::string fileName,
return writer->Finish();
}

bool DetectionClient::isModelLoaded(std::string fileName) {
ReplyStatus reply;
ClientContext context;
RequestString request;
request.mutable_token()->set_data(mToken);
time_point deadline = std::chrono::system_clock::now() + std::chrono::milliseconds(300000);
context.set_deadline(deadline);
status = stub_->loadModel(&context, request, &reply);
if(status.ok()) {
return reply.status();
} else {
ALOGE("Model load failure: %s", status.error_message().c_str());
}
return false;
}

std::string DetectionClient::sendIRs(bool& flag, const std::string& ir_xml, const std::string& ir_bin) {
ReplyStatus reply;
ClientContext context;
Expand All @@ -62,35 +95,84 @@ std::string DetectionClient::sendIRs(bool& flag, const std::string& ir_xml, cons
status = sendFile(ir_bin, writerBin);
if (status.ok()) {
flag = reply.status();
return (flag ? "status True" : "status False");
//if model is sent succesfully trigger model loading
if (flag && isModelLoaded(ir_xml) ) {
flag = true;
return ("status True");
} else {
flag = false;
ALOGE("Model loading failed!!!");
return ("status False");
}
} else {
return ("status False");
}
}
return std::string(status.error_message());
}

void DetectionClient::add_input_data(std::string label, const uint8_t* buffer, std::vector<size_t> shape, uint32_t size) {
void DetectionClient::add_input_data(std::string label, const uint8_t* buffer, std::vector<uint32_t> shape, uint32_t size, android::hardware::neuralnetworks::nnhal::OperandType operandType) {
const float* src;
size_t index;

DataTensor* input = request.add_data_tensors();
input->set_node_name(label);
switch(operandType) {
case android::hardware::neuralnetworks::nnhal::OperandType::TENSOR_INT32: {
input->set_data_type(DataTensor::i32);
break;
}
case android::hardware::neuralnetworks::nnhal::OperandType::TENSOR_FLOAT16: {
input->set_data_type(DataTensor::f16);
break;
}
case android::hardware::neuralnetworks::nnhal::OperandType::TENSOR_FLOAT32: {
input->set_data_type(DataTensor::f32);
break;
}
case android::hardware::neuralnetworks::nnhal::OperandType::TENSOR_BOOL8: {
input->set_data_type(DataTensor::boolean);
break;
}
case android::hardware::neuralnetworks::nnhal::OperandType::TENSOR_QUANT8_ASYMM: {
input->set_data_type(DataTensor::u8);
break;
}
case android::hardware::neuralnetworks::nnhal::OperandType::TENSOR_QUANT8_SYMM:
case android::hardware::neuralnetworks::nnhal::OperandType::TENSOR_QUANT8_SYMM_PER_CHANNEL:
case android::hardware::neuralnetworks::nnhal::OperandType::TENSOR_QUANT8_ASYMM_SIGNED: {
input->set_data_type(DataTensor::i8);
break;
}
case android::hardware::neuralnetworks::nnhal::OperandType::TENSOR_QUANT16_SYMM: {
input->set_data_type(DataTensor::i16);
break;
}
case android::hardware::neuralnetworks::nnhal::OperandType::TENSOR_QUANT16_ASYMM: {
input->set_data_type(DataTensor::u16);
break;
}
default: {
input->set_data_type(DataTensor::u8);
break;
}
}
for (index = 0; index < shape.size(); index++) {
input->add_tensor_shape(shape[index]);
}
input->set_data(buffer, size);
}

void DetectionClient::get_output_data(std::string label, uint8_t* buffer, std::vector<size_t> shape) {
void DetectionClient::get_output_data(std::string label, uint8_t* buffer, uint32_t expectedLength) {
std::string src;
size_t index;
size_t size = 1;

for (index = 0; index < shape.size(); index++) {
size *= shape[index];
}
for (index = 0; index < reply.data_tensors_size(); index++) {
if (label.compare(reply.data_tensors(index).node_name()) == 0) {
src = reply.data_tensors(index).data();
if(expectedLength != src.length()) {
ALOGE("Length mismatch error: expected length %d , actual length %d", expectedLength, src.length());
}
memcpy(buffer, src.data(), src.length());
break;
}
Expand All @@ -104,12 +186,16 @@ void DetectionClient::clear_data() {

std::string DetectionClient::remote_infer() {
ClientContext context;
time_point deadline = std::chrono::system_clock::now() + std::chrono::milliseconds(20000);
time_point deadline = std::chrono::system_clock::now() + std::chrono::milliseconds(10000);
context.set_deadline(deadline);

request.mutable_token()->set_data(mToken);
status = stub_->getInferResult(&context, request, &reply);
if (status.ok()) {
if (reply.data_tensors_size() == 0) ALOGE("GRPC reply empty, ovms failure ?");
if (reply.data_tensors_size() == 0) {
ALOGE("GRPC reply empty, ovms failure ?");
return "Failure";
}
return "Success";
} else {
ALOGE("GRPC Error code: %d, message: %s", status.error_code(),
Expand Down
12 changes: 9 additions & 3 deletions DetectionClient.h
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
#include <android/log.h>
#include <log/log.h>
#include <android-base/logging.h>
#include "Driver.h"
#include "nnhal_object_detection.grpc.pb.h"

using grpc::Channel;
Expand All @@ -22,19 +23,23 @@ using objectDetection::RequestDataTensors;
using objectDetection::RequestString;
using time_point = std::chrono::system_clock::time_point;

#define MODEL_DIR std::string("/data/vendor/neuralnetworks/")

class DetectionClient {
public:
DetectionClient(std::shared_ptr<Channel> channel) : stub_(Detection::NewStub(channel)){}
DetectionClient(std::shared_ptr<Channel> channel, uint32_t token) : stub_(Detection::NewStub(channel)), mToken(token) {}

std::string prepare(bool& flag);
std::string release(bool& flag);

Status sendFile(std::string fileName,
std::unique_ptr<ClientWriter<RequestDataChunks> >& writer);

std::string sendIRs(bool& flag, const std::string& ir_xml, const std::string& ir_bin);

void add_input_data(std::string label, const uint8_t* buffer, std::vector<size_t> shape, uint32_t size);
void get_output_data(std::string label, uint8_t* buffer, std::vector<size_t> shape);
bool isModelLoaded(std::string fileName);
void add_input_data(std::string label, const uint8_t* buffer, std::vector<uint32_t> shape, uint32_t size, android::hardware::neuralnetworks::nnhal::OperandType operandType);
void get_output_data(std::string label, uint8_t* buffer, uint32_t expectedLength);
void clear_data();
std::string remote_infer();
bool get_status();
Expand All @@ -44,6 +49,7 @@ class DetectionClient {
RequestDataTensors request;
ReplyDataTensors reply;
Status status;
uint32_t mToken;
};

#endif
8 changes: 4 additions & 4 deletions Driver.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -245,8 +245,8 @@ Return<void> Driver::getSupportedOperations_1_2(const V1_2_Model& model,
}

auto modelInfo = std::make_shared<NnapiModelInfo>(convertToV1_3(model));
NgraphNetworkCreator ngraphCreatorInst(modelInfo, mDeviceType);
ngraphCreatorInst.getSupportedOperations(supported);
std::shared_ptr<NgraphNetworkCreator> ngraphCreatorInst = std::make_shared<NgraphNetworkCreator>(modelInfo, mDeviceType);
ngraphCreatorInst->getSupportedOperations(supported);

cb(ErrorStatus::NONE, supported);
ALOGV("Exiting %s", __func__);
Expand Down Expand Up @@ -373,8 +373,8 @@ Return<void> Driver::getSupportedOperations_1_3(const Model& model,
}

auto modelInfo = std::make_shared<NnapiModelInfo>(model);
NgraphNetworkCreator ngraphCreatorInst(modelInfo, mDeviceType);
ngraphCreatorInst.getSupportedOperations(supported);
std::shared_ptr<NgraphNetworkCreator> ngraphCreatorInst = std::make_shared<NgraphNetworkCreator>(modelInfo, mDeviceType);
ngraphCreatorInst->getSupportedOperations(supported);

cb(V1_3::ErrorStatus::NONE, supported);
ALOGV("Exiting %s", __func__);
Expand Down
52 changes: 39 additions & 13 deletions IENetwork.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@

namespace android::hardware::neuralnetworks::nnhal {

bool IENetwork::loadNetwork(const std::string& ir_xml, const std::string& ir_bin) {
bool IENetwork::createNetwork(std::shared_ptr<ov::Model> network, const std::string& ir_xml, const std::string& ir_bin) {
ALOGV("%s", __func__);

#if __ANDROID__
Expand All @@ -32,30 +32,56 @@ bool IENetwork::loadNetwork(const std::string& ir_xml, const std::string& ir_bin
break;
}

ALOGD("Creating infer request for Intel Device Type : %s", deviceStr.c_str());
ALOGD("creating infer request for Intel Device Type : %s", deviceStr.c_str());

if (mNetwork) {
compiled_model = ie.compile_model(mNetwork, deviceStr);
ALOGD("loadNetwork is done....");
if (!network) {
ALOGE("Invalid Network pointer");
return false;
} else {
ov::CompiledModel compiled_model = ie.compile_model(network, deviceStr);
ALOGD("createNetwork is done....");
#if __ANDROID__
ov::serialize(mNetwork, ir_xml, ir_bin,
ov::serialize(network, ir_xml, ir_bin,
ov::pass::Serialize::Version::IR_V11);
#else
ov::pass::Manager manager;
manager.register_pass<ov::pass::Serialize>("/tmp/model.xml", "/tmp/model.bin");
manager.run_passes(mNetwork);
manager.run_passes(network);
#endif
mInferRequest = compiled_model.create_infer_request();
ALOGD("CreateInferRequest is done....");

} else {
ALOGE("Invalid Network pointer");
return false;
}

return true;
}

void IENetwork::loadNetwork(const std::string& modelName) {
#if __ANDROID__
ov::Core ie(std::string("/vendor/etc/openvino/plugins.xml"));
#else
ov::Core ie(std::string("/usr/local/lib64/plugins.xml"));
#endif

std::string deviceStr;
switch (mTargetDevice) {
case IntelDeviceType::GNA:
deviceStr = "GNA";
break;
case IntelDeviceType::VPU:
deviceStr = "VPUX";
break;
case IntelDeviceType::CPU:
default:
deviceStr = "CPU";
break;
}

ALOGD("loading infer request for Intel Device Type : %s", deviceStr.c_str());

ov::CompiledModel compiled_model = ie.compile_model(modelName, deviceStr);
mInferRequest = compiled_model.create_infer_request();
isLoaded = true;
ALOGD("Load InferRequest is done....");
}

// Need to be called before loadnetwork.. But not sure whether need to be called for
// all the inputs in case multiple input / output
ov::Tensor IENetwork::getTensor(const std::string& outName) {
Expand Down
Loading