diff --git a/Android.bp b/Android.bp index c4d807098..35e1f6a4f 100644 --- a/Android.bp +++ b/Android.bp @@ -21,9 +21,7 @@ cc_library_shared { ], include_dirs: [ - "packages/modules/NeuralNetworks/common/include", - "packages/modules/NeuralNetworks/common/types/include", - "packages/modules/NeuralNetworks/runtime/include", + "frameworks/ml/nn/runtime/include/", "frameworks/native/libs/nativewindow/include", "external/mesa3d/include/android_stub", "external/grpc-grpc", @@ -168,9 +166,8 @@ cc_binary { srcs: ["service.cpp"], include_dirs: [ - "packages/modules/NeuralNetworks/common/include", - "packages/modules/NeuralNetworks/common/types/include", - "packages/modules/NeuralNetworks/runtime/include", + "frameworks/ml/nn/common/include", + "frameworks/ml/nn/runtime/include/", "frameworks/native/libs/nativewindow/include", "external/mesa3d/include/android_stub", ], @@ -186,7 +183,6 @@ cc_binary { shared_libs: [ "libhidlbase", - "libhidltransport", "libhidlmemory", "libutils", "liblog", diff --git a/BasePreparedModel.cpp b/BasePreparedModel.cpp index 31c04b49c..59e9d9737 100644 --- a/BasePreparedModel.cpp +++ b/BasePreparedModel.cpp @@ -33,18 +33,20 @@ namespace android::hardware::neuralnetworks::nnhal { using namespace android::nn; static const Timing kNoTiming = {.timeOnDevice = UINT64_MAX, .timeInDriver = UINT64_MAX}; -bool mRemoteCheck = false; -std::shared_ptr mDetectionClient; uint32_t BasePreparedModel::mFileId = 0; void BasePreparedModel::deinitialize() { ALOGV("Entering %s", __func__); + bool is_success = false; mModelInfo->unmapRuntimeMemPools(); auto ret_xml = std::remove(mXmlFile.c_str()); auto ret_bin = std::remove(mBinFile.c_str()); if ((ret_xml != 0) || (ret_bin != 0)) { ALOGW("%s Deletion status of xml:%d, bin:%d", __func__, ret_xml, ret_bin); } + auto reply = mDetectionClient->release(is_success); + ALOGI("GRPC release response is %d : %s", is_success, reply.c_str()); + setRemoteEnabled(false); ALOGV("Exiting %s", __func__); } @@ -62,7 +64,23 @@ bool BasePreparedModel::initialize() { ALOGE("Failed to initialize Model runtime parameters!!"); return false; } - checkRemoteConnection(); + + setRemoteEnabled(checkRemoteConnection()); + if (mRemoteCheck) { + for (auto i : mModelInfo->getModelInputIndexes()) { + auto& nnapiOperandType = mModelInfo->getOperand(i).type; + switch (nnapiOperandType) { + case OperandType::FLOAT32: + case OperandType::TENSOR_FLOAT32: + break; + default: + ALOGD("GRPC Remote Infer not enabled for %d", nnapiOperandType); + setRemoteEnabled(false); + break; + } + if (!mRemoteCheck) break; + } + } mNgraphNetCreator = std::make_shared(mModelInfo, mTargetDevice); if (!mNgraphNetCreator->validateOperations()) return false; @@ -95,8 +113,11 @@ bool BasePreparedModel::checkRemoteConnection() { bool is_success = false; if(getGrpcIpPort(grpc_prop)) { ALOGV("Attempting GRPC via TCP : %s", grpc_prop); + grpc::ChannelArguments args; + args.SetMaxReceiveMessageSize(INT_MAX); + args.SetMaxSendMessageSize(INT_MAX); mDetectionClient = std::make_shared( - grpc::CreateChannel(grpc_prop, grpc::InsecureChannelCredentials())); + grpc::CreateCustomChannel(grpc_prop, grpc::InsecureChannelCredentials(), args), mFileId); if(mDetectionClient) { auto reply = mDetectionClient->prepare(is_success); ALOGI("GRPC(TCP) prepare response is %d : %s", is_success, reply.c_str()); @@ -104,14 +125,17 @@ bool BasePreparedModel::checkRemoteConnection() { } if (!is_success && getGrpcSocketPath(grpc_prop)) { ALOGV("Attempting GRPC via unix : %s", grpc_prop); + grpc::ChannelArguments args; + args.SetMaxReceiveMessageSize(INT_MAX); + args.SetMaxSendMessageSize(INT_MAX); mDetectionClient = std::make_shared( - grpc::CreateChannel(std::string("unix:") + grpc_prop, grpc::InsecureChannelCredentials())); + grpc::CreateCustomChannel(std::string("unix:") + grpc_prop, grpc::InsecureChannelCredentials(), args), mFileId); if(mDetectionClient) { auto reply = mDetectionClient->prepare(is_success); ALOGI("GRPC(unix) prepare response is %d : %s", is_success, reply.c_str()); } } - mRemoteCheck = is_success; + setRemoteEnabled(is_success); return is_success; } @@ -121,14 +145,24 @@ bool BasePreparedModel::loadRemoteModel(const std::string& ir_xml, const std::st if(mDetectionClient) { auto reply = mDetectionClient->sendIRs(is_success, ir_xml, ir_bin); ALOGI("sendIRs response GRPC %d %s", is_success, reply.c_str()); + if (reply == "status False") { + ALOGE("%s Model Load Failed",__func__); + } } else { ALOGE("%s mDetectionClient is null",__func__); } - mRemoteCheck = is_success; + setRemoteEnabled(is_success); return is_success; } +void BasePreparedModel::setRemoteEnabled(bool flag) { + if(mRemoteCheck != flag) { + ALOGD("GRPC %s Remote Connection", flag ? "ACQUIRED" : "RELEASED"); + mRemoteCheck = flag; + } +} + static Return notify(const sp& callback, const ErrorStatus& status, const hidl_vec&, Timing) { return callback->notify(status); @@ -268,12 +302,12 @@ void asyncExecute(const Request& request, MeasureTiming measure, BasePreparedMod ALOGD("%s Run", __func__); if (measure == MeasureTiming::YES) deviceStart = now(); - if(mRemoteCheck) { + if(preparedModel->mRemoteCheck) { ALOGI("%s GRPC Remote Infer", __func__); - auto reply = mDetectionClient->remote_infer(); + auto reply = preparedModel->mDetectionClient->remote_infer(); ALOGI("***********GRPC server response************* %s", reply.c_str()); } - if (!mRemoteCheck || !mDetectionClient->get_status()){ + if (!preparedModel->mRemoteCheck || !preparedModel->mDetectionClient->get_status()){ try { plugin->infer(); } catch (const std::exception& ex) { @@ -332,9 +366,9 @@ void asyncExecute(const Request& request, MeasureTiming measure, BasePreparedMod return; } - if (mRemoteCheck && mDetectionClient && mDetectionClient->get_status()) { - mDetectionClient->get_output_data(std::to_string(i), (uint8_t*)destPtr, - ngraphNw->getOutputShape(outIndex)); + if (preparedModel->mRemoteCheck && preparedModel->mDetectionClient && preparedModel->mDetectionClient->get_status()) { + preparedModel->mDetectionClient->get_output_data(std::to_string(i), (uint8_t*)destPtr, + ngraphNw->getOutputShape(outIndex), expectedLength); } else { switch (operandType) { case OperandType::TENSOR_INT32: @@ -399,7 +433,7 @@ void asyncExecute(const Request& request, MeasureTiming measure, BasePreparedMod } static std::tuple, Timing> executeSynchronouslyBase( - const Request& request, MeasureTiming measure, BasePreparedModel* preparedModel, + const V1_3::Request& request, MeasureTiming measure, BasePreparedModel* preparedModel, time_point driverStart) { ALOGV("Entering %s", __func__); auto modelInfo = preparedModel->getModelInfo(); @@ -408,7 +442,7 @@ static std::tuple, Timing> executeSynch time_point driverEnd, deviceStart, deviceEnd; std::vector requestPoolInfos; auto errorStatus = modelInfo->setRunTimePoolInfosFromHidlMemories(request.pools); - if (errorStatus != ErrorStatus::NONE) { + if (errorStatus != V1_3::ErrorStatus::NONE) { ALOGE("Failed to set runtime pool info from HIDL memories"); return {ErrorStatus::GENERAL_FAILURE, {}, kNoTiming}; } @@ -427,8 +461,9 @@ static std::tuple, Timing> executeSynch ALOGV("Input index: %d layername : %s", inIndex, inputNodeName.c_str()); //check if remote infer is available //TODO: Need to add FLOAT16 support for remote inferencing - if(mRemoteCheck && mDetectionClient) { - mDetectionClient->add_input_data(std::to_string(i), (uint8_t*)srcPtr, ngraphNw->getOutputShape(inIndex), len); + if(preparedModel->mRemoteCheck && preparedModel->mDetectionClient) { + auto inOperandType = modelInfo->getOperandType(inIndex); + preparedModel->mDetectionClient->add_input_data(std::to_string(i), (uint8_t*)srcPtr, ngraphNw->getOutputShape(inIndex), len, inOperandType); } else { ov::Tensor destTensor; try { @@ -493,12 +528,15 @@ static std::tuple, Timing> executeSynch ALOGV("%s Run", __func__); if (measure == MeasureTiming::YES) deviceStart = now(); - if(mRemoteCheck) { + if(preparedModel->mRemoteCheck) { ALOGI("%s GRPC Remote Infer", __func__); - auto reply = mDetectionClient->remote_infer(); + auto reply = preparedModel->mDetectionClient->remote_infer(); ALOGI("***********GRPC server response************* %s", reply.c_str()); } - if (!mRemoteCheck || !mDetectionClient->get_status()){ + if (!preparedModel->mRemoteCheck || !preparedModel->mDetectionClient->get_status()){ + if(preparedModel->mRemoteCheck) { + preparedModel->setRemoteEnabled(false); + } try { ALOGV("%s Client Infer", __func__); plugin->infer(); @@ -555,9 +593,9 @@ static std::tuple, Timing> executeSynch } //copy output from remote infer //TODO: Add support for other OperandType - if (mRemoteCheck && mDetectionClient && mDetectionClient->get_status()) { - mDetectionClient->get_output_data(std::to_string(i), (uint8_t*)destPtr, - ngraphNw->getOutputShape(outIndex)); + if (preparedModel->mRemoteCheck && preparedModel->mDetectionClient && preparedModel->mDetectionClient->get_status()) { + preparedModel->mDetectionClient->get_output_data(std::to_string(i), (uint8_t*)destPtr, + ngraphNw->getOutputShape(outIndex), expectedLength); } else { switch (operandType) { case OperandType::TENSOR_INT32: @@ -606,8 +644,8 @@ static std::tuple, Timing> executeSynch ALOGE("Failed to update the request pool infos"); return {ErrorStatus::GENERAL_FAILURE, {}, kNoTiming}; } - if (mRemoteCheck && mDetectionClient && mDetectionClient->get_status()) { - mDetectionClient->clear_data(); + if (preparedModel->mRemoteCheck && preparedModel->mDetectionClient && preparedModel->mDetectionClient->get_status()) { + preparedModel->mDetectionClient->clear_data(); } if (measure == MeasureTiming::YES) { @@ -631,7 +669,7 @@ Return BasePreparedModel::executeSynchronously(const Request& request, Mea return Void(); } auto [status, outputShapes, timing] = - executeSynchronouslyBase(request, measure, this, driverStart); + executeSynchronouslyBase(convertToV1_3(request), measure, this, driverStart); cb(status, std::move(outputShapes), timing); ALOGV("Exiting %s", __func__); return Void(); @@ -646,12 +684,12 @@ Return BasePreparedModel::executeSynchronously_1_3(const V1_3::Request& re time_point driverStart; if (measure == MeasureTiming::YES) driverStart = now(); - if (!validateRequest(convertToV1_0(request), convertToV1_2(mModelInfo->getModel()))) { + if (!validateRequest(request, mModelInfo->getModel())) { cb(V1_3::ErrorStatus::INVALID_ARGUMENT, {}, kNoTiming); return Void(); } auto [status, outputShapes, timing] = - executeSynchronouslyBase(convertToV1_0(request), measure, this, driverStart); + executeSynchronouslyBase(request, measure, this, driverStart); cb(convertToV1_3(status), std::move(outputShapes), timing); ALOGV("Exiting %s", __func__); return Void(); @@ -872,7 +910,7 @@ Return BasePreparedModel::executeFenced(const V1_3::Request& request1_3, if (mRemoteCheck && mDetectionClient && mDetectionClient->get_status()) { mDetectionClient->get_output_data(std::to_string(i), (uint8_t*)destPtr, - mNgraphNetCreator->getOutputShape(outIndex)); + mNgraphNetCreator->getOutputShape(outIndex), expectedLength); } else { switch (operandType) { case OperandType::TENSOR_INT32: diff --git a/BasePreparedModel.h b/BasePreparedModel.h index 9fbdd1abd..ec8f5dcc6 100755 --- a/BasePreparedModel.h +++ b/BasePreparedModel.h @@ -49,14 +49,13 @@ namespace android::hardware::neuralnetworks::nnhal { template using vec = std::vector; typedef uint8_t* memory; -extern bool mRemoteCheck; -extern std::shared_ptr mDetectionClient; class BasePreparedModel : public V1_3::IPreparedModel { public: + bool mRemoteCheck = false; BasePreparedModel(const IntelDeviceType device, const Model& model) : mTargetDevice(device) { mModelInfo = std::make_shared(model); - mXmlFile = std::string("/data/vendor/neuralnetworks/") + std::to_string(mFileId) + std::string(".xml"); - mBinFile = std::string("/data/vendor/neuralnetworks/") + std::to_string(mFileId) + std::string(".bin"); + mXmlFile = MODEL_DIR + std::to_string(mFileId) + std::string(".xml"); + mBinFile = MODEL_DIR + std::to_string(mFileId) + std::string(".bin"); mFileId++; } @@ -90,6 +89,7 @@ class BasePreparedModel : public V1_3::IPreparedModel { virtual bool initialize(); virtual bool checkRemoteConnection(); virtual bool loadRemoteModel(const std::string& ir_xml, const std::string& ir_bin); + virtual void setRemoteEnabled(bool flag); std::shared_ptr getModelInfo() { return mModelInfo; } @@ -98,6 +98,7 @@ class BasePreparedModel : public V1_3::IPreparedModel { std::shared_ptr getPlugin() { return mPlugin; } std::shared_ptr modelPtr; + std::shared_ptr mDetectionClient; protected: virtual void deinitialize(); diff --git a/DetectionClient.cpp b/DetectionClient.cpp index 4d0716180..de08783db 100644 --- a/DetectionClient.cpp +++ b/DetectionClient.cpp @@ -5,7 +5,7 @@ std::string DetectionClient::prepare(bool& flag) { RequestString request; - request.set_value(""); + request.mutable_token()->set_data(mToken); ReplyStatus reply; ClientContext context; time_point deadline = std::chrono::system_clock::now() + std::chrono::milliseconds(100); @@ -21,9 +21,26 @@ std::string DetectionClient::prepare(bool& flag) { } } +std::string DetectionClient::release(bool& flag) { + RequestString request; + request.mutable_token()->set_data(mToken); + ReplyStatus reply; + ClientContext context; + + Status status = stub_->release(&context, request, &reply); + + if (status.ok()) { + flag = reply.status(); + return (flag ? "status True" : "status False"); + } else { + return std::string(status.error_message()); + } +} + Status DetectionClient::sendFile(std::string fileName, std::unique_ptr >& writer) { RequestDataChunks request; + request.mutable_token()->set_data(mToken); uint32_t CHUNK_SIZE = 1024 * 1024; std::ifstream fin(fileName, std::ifstream::binary); std::vector buffer(CHUNK_SIZE, 0); @@ -47,6 +64,22 @@ Status DetectionClient::sendFile(std::string fileName, return writer->Finish(); } +bool DetectionClient::isModelLoaded(std::string fileName) { + ReplyStatus reply; + ClientContext context; + RequestString request; + request.mutable_token()->set_data(mToken); + time_point deadline = std::chrono::system_clock::now() + std::chrono::milliseconds(20000); + context.set_deadline(deadline); + status = stub_->loadModel(&context, request, &reply); + if(status.ok()) { + return reply.status(); + } else { + ALOGE("Model Load failure: %s", status.error_message().c_str()); + } + return false; +} + std::string DetectionClient::sendIRs(bool& flag, const std::string& ir_xml, const std::string& ir_bin) { ReplyStatus reply; ClientContext context; @@ -62,25 +95,75 @@ std::string DetectionClient::sendIRs(bool& flag, const std::string& ir_xml, cons status = sendFile(ir_bin, writerBin); if (status.ok()) { flag = reply.status(); - return (flag ? "status True" : "status False"); + //if model is sent succesfully trigger model loading + if (flag && isModelLoaded(ir_xml) ) { + flag = true; + return ("status True"); + } else { + flag = false; + ALOGE("Model Loading Failed!!!"); + return ("status False"); + } + } else { + return ("status False"); } } return std::string(status.error_message()); } -void DetectionClient::add_input_data(std::string label, const uint8_t* buffer, std::vector shape, uint32_t size) { +void DetectionClient::add_input_data(std::string label, const uint8_t* buffer, std::vector shape, uint32_t size, android::hardware::neuralnetworks::nnhal::OperandType operandType) { const float* src; size_t index; DataTensor* input = request.add_data_tensors(); input->set_node_name(label); + switch(operandType) { + case android::hardware::neuralnetworks::nnhal::OperandType::TENSOR_INT32: { + input->set_data_type(DataTensor::i32); + break; + } + case android::hardware::neuralnetworks::nnhal::OperandType::TENSOR_FLOAT16: { + input->set_data_type(DataTensor::f16); + break; + } + case android::hardware::neuralnetworks::nnhal::OperandType::TENSOR_FLOAT32: { + input->set_data_type(DataTensor::f32); + break; + } + case android::hardware::neuralnetworks::nnhal::OperandType::TENSOR_BOOL8: { + input->set_data_type(DataTensor::boolean); + break; + } + case android::hardware::neuralnetworks::nnhal::OperandType::TENSOR_QUANT8_ASYMM: { + input->set_data_type(DataTensor::u8); + break; + } + case android::hardware::neuralnetworks::nnhal::OperandType::TENSOR_QUANT8_SYMM: + case android::hardware::neuralnetworks::nnhal::OperandType::TENSOR_QUANT8_SYMM_PER_CHANNEL: + case android::hardware::neuralnetworks::nnhal::OperandType::TENSOR_QUANT8_ASYMM_SIGNED: { + input->set_data_type(DataTensor::i8); + break; + } + case android::hardware::neuralnetworks::nnhal::OperandType::TENSOR_QUANT16_SYMM: { + input->set_data_type(DataTensor::i16); + break; + } + case android::hardware::neuralnetworks::nnhal::OperandType::TENSOR_QUANT16_ASYMM: { + input->set_data_type(DataTensor::u16); + break; + } + default: { + input->set_data_type(DataTensor::u8); + break; + } + } for (index = 0; index < shape.size(); index++) { input->add_tensor_shape(shape[index]); } input->set_data(buffer, size); } -void DetectionClient::get_output_data(std::string label, uint8_t* buffer, std::vector shape) { +void DetectionClient::get_output_data(std::string label, uint8_t* buffer, std::vector shape, uint32_t expectedLength) { std::string src; size_t index; size_t size = 1; @@ -91,6 +174,9 @@ void DetectionClient::get_output_data(std::string label, uint8_t* buffer, std::v for (index = 0; index < reply.data_tensors_size(); index++) { if (label.compare(reply.data_tensors(index).node_name()) == 0) { src = reply.data_tensors(index).data(); + if(expectedLength != src.length()) { + ALOGE("Length Mismatch error: expected length %d , actual length %d", expectedLength, src.length()); + } memcpy(buffer, src.data(), src.length()); break; } @@ -104,9 +190,10 @@ void DetectionClient::clear_data() { std::string DetectionClient::remote_infer() { ClientContext context; - time_point deadline = std::chrono::system_clock::now() + std::chrono::milliseconds(20000); + time_point deadline = std::chrono::system_clock::now() + std::chrono::milliseconds(5000); context.set_deadline(deadline); + request.mutable_token()->set_data(mToken); status = stub_->getInferResult(&context, request, &reply); if (status.ok()) { if (reply.data_tensors_size() == 0) ALOGE("GRPC reply empty, ovms failure ?"); diff --git a/DetectionClient.h b/DetectionClient.h index dece36ae4..dcfe2ec70 100644 --- a/DetectionClient.h +++ b/DetectionClient.h @@ -8,6 +8,7 @@ #include #include #include "nnhal_object_detection.grpc.pb.h" +#include "Driver.h" using grpc::Channel; using grpc::ClientContext; @@ -22,19 +23,23 @@ using objectDetection::RequestDataTensors; using objectDetection::RequestString; using time_point = std::chrono::system_clock::time_point; +#define MODEL_DIR std::string("/data/vendor/neuralnetworks/") + class DetectionClient { public: - DetectionClient(std::shared_ptr channel) : stub_(Detection::NewStub(channel)){} + DetectionClient(std::shared_ptr channel, uint32_t token) : stub_(Detection::NewStub(channel)), mToken(token) {} std::string prepare(bool& flag); + std::string release(bool& flag); Status sendFile(std::string fileName, std::unique_ptr >& writer); std::string sendIRs(bool& flag, const std::string& ir_xml, const std::string& ir_bin); + bool isModelLoaded(std::string fileName); - void add_input_data(std::string label, const uint8_t* buffer, std::vector shape, uint32_t size); - void get_output_data(std::string label, uint8_t* buffer, std::vector shape); + void add_input_data(std::string label, const uint8_t* buffer, std::vector shape, uint32_t size, android::hardware::neuralnetworks::nnhal::OperandType operandType); + void get_output_data(std::string label, uint8_t* buffer, std::vector shape, uint32_t expectedLength); void clear_data(); std::string remote_infer(); bool get_status(); @@ -44,6 +49,7 @@ class DetectionClient { RequestDataTensors request; ReplyDataTensors reply; Status status; + uint32_t mToken; }; #endif \ No newline at end of file diff --git a/Driver.cpp b/Driver.cpp index 36fcec5a1..df1bc3729 100644 --- a/Driver.cpp +++ b/Driver.cpp @@ -403,6 +403,7 @@ Return Driver::prepareModel_1_3( // TODO: make asynchronous later sp driverPreparedModel = ModelFactory(mDeviceType, model); + for (auto& opn : model.main.operations) dumpOperation(opn); if (!driverPreparedModel->initialize()) { ALOGI("Failed to initialize prepared model"); cb->notify_1_3(convertToV1_3(ErrorStatus::INVALID_ARGUMENT), nullptr); diff --git a/ModelManager.cpp b/ModelManager.cpp index e162ec55a..e1d0e6ebe 100644 --- a/ModelManager.cpp +++ b/ModelManager.cpp @@ -233,6 +233,31 @@ void* NnapiModelInfo::getBlobFromMemoryPoolIn(const Request& request, uint32_t i return (r.buffer + arg.location.offset); } +void* NnapiModelInfo::getBlobFromMemoryPoolIn(const V1_3::Request& request, uint32_t index, + uint32_t& rBufferLength) { + RunTimeOperandInfo& operand = mOperands[mModel.main.inputIndexes[index]]; + const V1_0::RequestArgument& arg = request.inputs[index]; + auto poolIndex = arg.location.poolIndex; + nnAssert(poolIndex < mRequestPoolInfos.size()); + auto& r = mRequestPoolInfos[poolIndex]; + + if (arg.dimensions.size() > 0) { + // It's the responsibility of the caller to validate that + // from.dimensions only modifies the dimensions that were + // unspecified in the model. That's the case in SampleDriver.cpp + // with the call to validateRequest(). + operand.dimensions = arg.dimensions; + } + + operand.buffer = r.buffer + arg.location.offset; + operand.length = arg.location.length; + ALOGV("%s Operand length:%d pointer:%p offset:%d pool index: %d", __func__, operand.length, + (r.buffer + arg.location.offset), arg.location.offset, poolIndex); + rBufferLength = operand.length; + + return (r.buffer + arg.location.offset); +} + void* NnapiModelInfo::getBlobFromMemoryPoolOut(const Request& request, uint32_t index, uint32_t& rBufferLength) { RunTimeOperandInfo& operand = mOperands[mModel.main.outputIndexes[index]]; @@ -260,6 +285,33 @@ void* NnapiModelInfo::getBlobFromMemoryPoolOut(const Request& request, uint32_t return (r.buffer + arg.location.offset); } +void* NnapiModelInfo::getBlobFromMemoryPoolOut(const V1_3::Request& request, uint32_t index, + uint32_t& rBufferLength) { + RunTimeOperandInfo& operand = mOperands[mModel.main.outputIndexes[index]]; + const V1_0::RequestArgument& arg = request.outputs[index]; + auto poolIndex = arg.location.poolIndex; + nnAssert(poolIndex < mRequestPoolInfos.size()); + auto& r = mRequestPoolInfos[poolIndex]; + + ALOGV("%s lifetime:%d location offset:%d length:%d pool index:%d", __func__, operand.lifetime, + arg.location.offset, arg.location.length, poolIndex); + + if (arg.dimensions.size() > 0) { + // It's the responsibility of the caller to validate that + // from.dimensions only modifies the dimensions that were + // unspecified in the model. That's the case in SampleDriver.cpp + // with the call to validateRequest(). + operand.dimensions = arg.dimensions; + } + + operand.buffer = r.buffer + arg.location.offset; + operand.length = arg.location.length; + rBufferLength = operand.length; + ALOGV("%s Operand length:%d pointer:%p", __func__, operand.length, + (r.buffer + arg.location.offset)); + return (r.buffer + arg.location.offset); +} + bool NnapiModelInfo::isOmittedInput(int operationIndex, uint32_t index) { uint32_t inputIndex = mModel.main.operations[operationIndex].inputs[index]; const auto op = mModel.main.operands[inputIndex]; diff --git a/ModelManager.h b/ModelManager.h index 67e1a4b3d..cf853ab3e 100755 --- a/ModelManager.h +++ b/ModelManager.h @@ -133,7 +133,6 @@ class NnapiModelInfo { const auto value = GetConstOperand(inputIndex); ALOGV("Operation input index: %d, operand index: %d", index, inputIndex); ALOGV("Operation: %s", toString(mModel.main.operations[operationIndex]).c_str()); - printHelper::print(value, toString(operand).c_str()); return value; } @@ -167,7 +166,9 @@ class NnapiModelInfo { T GetConstFromBuffer(const uint8_t* buf, uint32_t len); void* getBlobFromMemoryPoolIn(const Request& request, uint32_t index, uint32_t& rBufferLength); + void* getBlobFromMemoryPoolIn(const V1_3::Request& request, uint32_t index, uint32_t& rBufferLength); void* getBlobFromMemoryPoolOut(const Request& request, uint32_t index, uint32_t& rBufferLength); + void* getBlobFromMemoryPoolOut(const V1_3::Request& request, uint32_t index, uint32_t& rBufferLength); Model getModel() { return mModel; } diff --git a/ngraph_creator/Android.bp b/ngraph_creator/Android.bp index 5a4175aa6..7f4f08397 100755 --- a/ngraph_creator/Android.bp +++ b/ngraph_creator/Android.bp @@ -99,9 +99,6 @@ cc_library_static { ], include_dirs: [ - "packages/modules/NeuralNetworks/common/include", - "packages/modules/NeuralNetworks/common/types/include", - "packages/modules/NeuralNetworks/runtime/include", "external/mesa3d/include/android_stub", ], diff --git a/proto/nnhal_object_detection.proto b/proto/nnhal_object_detection.proto index e0f14722a..d129ac575 100644 --- a/proto/nnhal_object_detection.proto +++ b/proto/nnhal_object_detection.proto @@ -1,16 +1,18 @@ -// Copyright 2015 gRPC authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. +/* +* Copyright (c) 2022 Intel Corporation +* +* Licensed under the Apache License, Version 2.0 (the "License"); +* you may not use this file except in compliance with the License. +* You may obtain a copy of the License at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* Unless required by applicable law or agreed to in writing, software +* distributed under the License is distributed on an "AS IS" BASIS, +* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +* See the License for the specific language governing permissions and +* limitations under the License. +*/ syntax = "proto3"; @@ -27,16 +29,23 @@ service Detection { rpc getInferResult (RequestDataTensors) returns (ReplyDataTensors) {} rpc sendXml (stream RequestDataChunks) returns (ReplyStatus) {} rpc sendBin (stream RequestDataChunks) returns (ReplyStatus) {} + rpc loadModel(RequestString) returns (ReplyStatus) {} rpc prepare (RequestString) returns (ReplyStatus) {} //Placeholder for any future support : RequestString + rpc release (RequestString) returns (ReplyStatus) {} } +message Token { + uint32 data = 1; +} + message RequestDataChunks { bytes data = 1; + Token token = 2; } message RequestString { - string value = 1; + Token token = 1; } message ReplyStatus { bool status = 1; @@ -47,6 +56,25 @@ message DataTensor { bytes data = 1; string node_name = 2; repeated int32 tensor_shape = 3; + enum DATA_TYPE { + boolean = 0; + bf16 = 1; + f16 = 2; + f32 = 3; + f64 = 4; + i4 = 5; + i8 = 6; + i16 = 7; + i32 = 8; + i64 = 9; + u1 = 10; + u4 = 11; + u8 = 12; + u16 = 13; + u32 = 14; + u64 = 15; + } + DATA_TYPE data_type = 4; } // Reply message containing the Output Data Tensors(blobs) @@ -57,4 +85,5 @@ message ReplyDataTensors { // Request message containing the Input Data Tensors(blobs) message RequestDataTensors { repeated DataTensor data_tensors = 1; -} \ No newline at end of file + Token token = 2; +} diff --git a/utils.h b/utils.h index aea1cafc5..a67d051e9 100644 --- a/utils.h +++ b/utils.h @@ -96,12 +96,12 @@ enum PaddingScheme { #define dumpOperand(index, model) \ do { \ const auto op = model.operands[index]; \ - ALOGV("Operand (%zu) %s", index, toString(op).c_str()); \ + ALOGD("Operand (%zu) %s", index, toString(op).c_str()); \ } while (0) #define dumpOperation(operation) \ do { \ - ALOGV("Operation: %s", toString(operation).c_str()); \ + ALOGD("Operation: %s", toString(operation).c_str()); \ } while (0) #define WRONG_DIM (-1)