diff --git a/Android.bp b/Android.bp
index c4d807098..35e1f6a4f 100644
--- a/Android.bp
+++ b/Android.bp
@@ -21,9 +21,7 @@ cc_library_shared {
     ],
 
     include_dirs: [
-        "packages/modules/NeuralNetworks/common/include",
-        "packages/modules/NeuralNetworks/common/types/include",
-        "packages/modules/NeuralNetworks/runtime/include",
+        "frameworks/ml/nn/runtime/include/",
         "frameworks/native/libs/nativewindow/include",
         "external/mesa3d/include/android_stub",
         "external/grpc-grpc",
@@ -168,9 +166,8 @@ cc_binary {
     srcs: ["service.cpp"],
 
     include_dirs: [
-        "packages/modules/NeuralNetworks/common/include",
-        "packages/modules/NeuralNetworks/common/types/include",
-        "packages/modules/NeuralNetworks/runtime/include",
+        "frameworks/ml/nn/common/include",
+        "frameworks/ml/nn/runtime/include/",
         "frameworks/native/libs/nativewindow/include",
         "external/mesa3d/include/android_stub",
     ],
@@ -186,7 +183,6 @@ cc_binary {
 
     shared_libs: [
         "libhidlbase",
-        "libhidltransport",
         "libhidlmemory",
         "libutils",
         "liblog",
diff --git a/BasePreparedModel.cpp b/BasePreparedModel.cpp
index 31c04b49c..59e9d9737 100644
--- a/BasePreparedModel.cpp
+++ b/BasePreparedModel.cpp
@@ -33,18 +33,20 @@ namespace android::hardware::neuralnetworks::nnhal {
 using namespace android::nn;
 
 static const Timing kNoTiming = {.timeOnDevice = UINT64_MAX, .timeInDriver = UINT64_MAX};
-bool mRemoteCheck = false;
-std::shared_ptr<DetectionClient> mDetectionClient;
 uint32_t BasePreparedModel::mFileId = 0;
 
 void BasePreparedModel::deinitialize() {
     ALOGV("Entering %s", __func__);
+    bool is_success = false;
     mModelInfo->unmapRuntimeMemPools();
     auto ret_xml = std::remove(mXmlFile.c_str());
     auto ret_bin = std::remove(mBinFile.c_str());
     if ((ret_xml != 0) || (ret_bin != 0)) {
         ALOGW("%s Deletion status of xml:%d, bin:%d", __func__, ret_xml, ret_bin);
     }
+    auto reply = mDetectionClient->release(is_success);
+    ALOGI("GRPC release response is %d : %s", is_success, reply.c_str());
+    setRemoteEnabled(false);
 
     ALOGV("Exiting %s", __func__);
 }
@@ -62,7 +64,23 @@ bool BasePreparedModel::initialize() {
         ALOGE("Failed to initialize Model runtime parameters!!");
         return false;
     }
-    checkRemoteConnection();
+
+    setRemoteEnabled(checkRemoteConnection());
+    if (mRemoteCheck) {
+        for (auto i : mModelInfo->getModelInputIndexes()) {
+            auto& nnapiOperandType = mModelInfo->getOperand(i).type;
+            switch (nnapiOperandType) {
+                case OperandType::FLOAT32:
+                case OperandType::TENSOR_FLOAT32:
+                    break;
+                default:
+                    ALOGD("GRPC Remote Infer not enabled for %d", nnapiOperandType);
+                    setRemoteEnabled(false);
+                    break;
+            }
+            if (!mRemoteCheck) break;
+        }
+    }
     mNgraphNetCreator = std::make_shared<NgraphNetworkCreator>(mModelInfo, mTargetDevice);
 
     if (!mNgraphNetCreator->validateOperations()) return false;
@@ -95,8 +113,11 @@ bool BasePreparedModel::checkRemoteConnection() {
     bool is_success = false;
     if(getGrpcIpPort(grpc_prop)) {
         ALOGV("Attempting GRPC via TCP : %s", grpc_prop);
+        grpc::ChannelArguments args;
+        args.SetMaxReceiveMessageSize(INT_MAX);
+        args.SetMaxSendMessageSize(INT_MAX);
         mDetectionClient = std::make_shared<DetectionClient>(
-            grpc::CreateChannel(grpc_prop, grpc::InsecureChannelCredentials()));
+            grpc::CreateCustomChannel(grpc_prop, grpc::InsecureChannelCredentials(), args), mFileId);
         if(mDetectionClient) {
             auto reply = mDetectionClient->prepare(is_success);
             ALOGI("GRPC(TCP) prepare response is %d : %s", is_success, reply.c_str());
@@ -104,14 +125,17 @@ bool BasePreparedModel::checkRemoteConnection() {
     }
     if (!is_success && getGrpcSocketPath(grpc_prop)) {
         ALOGV("Attempting GRPC via unix : %s", grpc_prop);
+        grpc::ChannelArguments args;
+        args.SetMaxReceiveMessageSize(INT_MAX);
+        args.SetMaxSendMessageSize(INT_MAX);
         mDetectionClient = std::make_shared<DetectionClient>(
-            grpc::CreateChannel(std::string("unix:") + grpc_prop, grpc::InsecureChannelCredentials()));
+            grpc::CreateCustomChannel(std::string("unix:") + grpc_prop, grpc::InsecureChannelCredentials(), args), mFileId);
         if(mDetectionClient) {
             auto reply = mDetectionClient->prepare(is_success);
             ALOGI("GRPC(unix) prepare response is %d : %s", is_success, reply.c_str());
         }
     }
-    mRemoteCheck = is_success;
+    setRemoteEnabled(is_success);
     return is_success;
 }
 
@@ -121,14 +145,24 @@ bool BasePreparedModel::loadRemoteModel(const std::string& ir_xml, const std::st
     if(mDetectionClient) {
         auto reply = mDetectionClient->sendIRs(is_success, ir_xml, ir_bin);
         ALOGI("sendIRs response GRPC %d  %s", is_success, reply.c_str());
+        if (reply == "status False") {
+            ALOGE("%s Model Load Failed",__func__);
+        }
     }
     else {
         ALOGE("%s mDetectionClient is null",__func__);
     }
-    mRemoteCheck = is_success;
+    setRemoteEnabled(is_success);
     return is_success;
 }
 
+void BasePreparedModel::setRemoteEnabled(bool flag) {
+    if(mRemoteCheck != flag) {
+        ALOGD("GRPC %s Remote Connection", flag ? "ACQUIRED" : "RELEASED");
+        mRemoteCheck = flag;
+    }
+}
+
 static Return<void> notify(const sp<V1_0::IExecutionCallback>& callback, const ErrorStatus& status,
                            const hidl_vec<OutputShape>&, Timing) {
     return callback->notify(status);
@@ -268,12 +302,12 @@ void asyncExecute(const Request& request, MeasureTiming measure, BasePreparedMod
     ALOGD("%s Run", __func__);
 
     if (measure == MeasureTiming::YES) deviceStart = now();
-    if(mRemoteCheck) {
+    if(preparedModel->mRemoteCheck) {
         ALOGI("%s GRPC Remote Infer", __func__);
-        auto reply = mDetectionClient->remote_infer();
+        auto reply = preparedModel->mDetectionClient->remote_infer();
         ALOGI("***********GRPC server response************* %s", reply.c_str());
     }
-    if (!mRemoteCheck || !mDetectionClient->get_status()){
+    if (!preparedModel->mRemoteCheck || !preparedModel->mDetectionClient->get_status()){
         try {
             plugin->infer();
         } catch (const std::exception& ex) {
@@ -332,9 +366,9 @@ void asyncExecute(const Request& request, MeasureTiming measure, BasePreparedMod
             return;
         }
 
-        if (mRemoteCheck && mDetectionClient && mDetectionClient->get_status()) {
-            mDetectionClient->get_output_data(std::to_string(i), (uint8_t*)destPtr,
-                                              ngraphNw->getOutputShape(outIndex));
+        if (preparedModel->mRemoteCheck && preparedModel->mDetectionClient && preparedModel->mDetectionClient->get_status()) {
+            preparedModel->mDetectionClient->get_output_data(std::to_string(i), (uint8_t*)destPtr,
+                                              ngraphNw->getOutputShape(outIndex), expectedLength);
         } else {
             switch (operandType) {
                 case OperandType::TENSOR_INT32:
@@ -399,7 +433,7 @@ void asyncExecute(const Request& request, MeasureTiming measure, BasePreparedMod
 }
 
 static std::tuple<ErrorStatus, hidl_vec<V1_2::OutputShape>, Timing> executeSynchronouslyBase(
-    const Request& request, MeasureTiming measure, BasePreparedModel* preparedModel,
+    const V1_3::Request& request, MeasureTiming measure, BasePreparedModel* preparedModel,
     time_point driverStart) {
     ALOGV("Entering %s", __func__);
     auto modelInfo = preparedModel->getModelInfo();
@@ -408,7 +442,7 @@ static std::tuple<ErrorStatus, hidl_vec<V1_2::OutputShape>, Timing> executeSynch
     time_point driverEnd, deviceStart, deviceEnd;
     std::vector<RunTimePoolInfo> requestPoolInfos;
     auto errorStatus = modelInfo->setRunTimePoolInfosFromHidlMemories(request.pools);
-    if (errorStatus != ErrorStatus::NONE) {
+    if (errorStatus != V1_3::ErrorStatus::NONE) {
         ALOGE("Failed to set runtime pool info from HIDL memories");
         return {ErrorStatus::GENERAL_FAILURE, {}, kNoTiming};
     }
@@ -427,8 +461,9 @@ static std::tuple<ErrorStatus, hidl_vec<V1_2::OutputShape>, Timing> executeSynch
         ALOGV("Input index: %d layername : %s", inIndex, inputNodeName.c_str());
         //check if remote infer is available
         //TODO: Need to add FLOAT16 support for remote inferencing
-        if(mRemoteCheck && mDetectionClient) {
-            mDetectionClient->add_input_data(std::to_string(i), (uint8_t*)srcPtr, ngraphNw->getOutputShape(inIndex), len);
+        if(preparedModel->mRemoteCheck && preparedModel->mDetectionClient) {
+            auto inOperandType = modelInfo->getOperandType(inIndex);
+            preparedModel->mDetectionClient->add_input_data(std::to_string(i), (uint8_t*)srcPtr, ngraphNw->getOutputShape(inIndex), len, inOperandType);
         } else {
             ov::Tensor destTensor;
             try {
@@ -493,12 +528,15 @@ static std::tuple<ErrorStatus, hidl_vec<V1_2::OutputShape>, Timing> executeSynch
     ALOGV("%s Run", __func__);
 
     if (measure == MeasureTiming::YES) deviceStart = now();
-    if(mRemoteCheck) {
+    if(preparedModel->mRemoteCheck) {
         ALOGI("%s GRPC Remote Infer", __func__);
-        auto reply = mDetectionClient->remote_infer();
+        auto reply = preparedModel->mDetectionClient->remote_infer();
         ALOGI("***********GRPC server response************* %s", reply.c_str());
     }
-    if (!mRemoteCheck || !mDetectionClient->get_status()){
+    if (!preparedModel->mRemoteCheck || !preparedModel->mDetectionClient->get_status()){
+        if(preparedModel->mRemoteCheck) {
+            preparedModel->setRemoteEnabled(false);
+        }
         try {
             ALOGV("%s Client Infer", __func__);
             plugin->infer();
@@ -555,9 +593,9 @@ static std::tuple<ErrorStatus, hidl_vec<V1_2::OutputShape>, Timing> executeSynch
         }
         //copy output from remote infer
         //TODO: Add support for other OperandType
-        if (mRemoteCheck && mDetectionClient && mDetectionClient->get_status()) {
-            mDetectionClient->get_output_data(std::to_string(i), (uint8_t*)destPtr,
-                                              ngraphNw->getOutputShape(outIndex));
+        if (preparedModel->mRemoteCheck && preparedModel->mDetectionClient && preparedModel->mDetectionClient->get_status()) {
+            preparedModel->mDetectionClient->get_output_data(std::to_string(i), (uint8_t*)destPtr,
+                                              ngraphNw->getOutputShape(outIndex), expectedLength);
         } else {
             switch (operandType) {
                 case OperandType::TENSOR_INT32:
@@ -606,8 +644,8 @@ static std::tuple<ErrorStatus, hidl_vec<V1_2::OutputShape>, Timing> executeSynch
         ALOGE("Failed to update the request pool infos");
         return {ErrorStatus::GENERAL_FAILURE, {}, kNoTiming};
     }
-    if (mRemoteCheck && mDetectionClient && mDetectionClient->get_status()) {
-        mDetectionClient->clear_data();
+    if (preparedModel->mRemoteCheck && preparedModel->mDetectionClient && preparedModel->mDetectionClient->get_status()) {
+        preparedModel->mDetectionClient->clear_data();
     }
 
     if (measure == MeasureTiming::YES) {
@@ -631,7 +669,7 @@ Return<void> BasePreparedModel::executeSynchronously(const Request& request, Mea
         return Void();
     }
     auto [status, outputShapes, timing] =
-        executeSynchronouslyBase(request, measure, this, driverStart);
+        executeSynchronouslyBase(convertToV1_3(request), measure, this, driverStart);
     cb(status, std::move(outputShapes), timing);
     ALOGV("Exiting %s", __func__);
     return Void();
@@ -646,12 +684,12 @@ Return<void> BasePreparedModel::executeSynchronously_1_3(const V1_3::Request& re
     time_point driverStart;
     if (measure == MeasureTiming::YES) driverStart = now();
 
-    if (!validateRequest(convertToV1_0(request), convertToV1_2(mModelInfo->getModel()))) {
+    if (!validateRequest(request, mModelInfo->getModel())) {
         cb(V1_3::ErrorStatus::INVALID_ARGUMENT, {}, kNoTiming);
         return Void();
     }
     auto [status, outputShapes, timing] =
-        executeSynchronouslyBase(convertToV1_0(request), measure, this, driverStart);
+        executeSynchronouslyBase(request, measure, this, driverStart);
     cb(convertToV1_3(status), std::move(outputShapes), timing);
     ALOGV("Exiting %s", __func__);
     return Void();
@@ -872,7 +910,7 @@ Return<void> BasePreparedModel::executeFenced(const V1_3::Request& request1_3,
 
         if (mRemoteCheck && mDetectionClient && mDetectionClient->get_status()) {
             mDetectionClient->get_output_data(std::to_string(i), (uint8_t*)destPtr,
-                                              mNgraphNetCreator->getOutputShape(outIndex));
+                                              mNgraphNetCreator->getOutputShape(outIndex), expectedLength);
         } else {
             switch (operandType) {
                 case OperandType::TENSOR_INT32:
diff --git a/BasePreparedModel.h b/BasePreparedModel.h
index 9fbdd1abd..ec8f5dcc6 100755
--- a/BasePreparedModel.h
+++ b/BasePreparedModel.h
@@ -49,14 +49,13 @@ namespace android::hardware::neuralnetworks::nnhal {
 template <class T>
 using vec = std::vector<T>;
 typedef uint8_t* memory;
-extern bool mRemoteCheck;
-extern std::shared_ptr<DetectionClient> mDetectionClient;
 class BasePreparedModel : public V1_3::IPreparedModel {
 public:
+    bool mRemoteCheck = false;
     BasePreparedModel(const IntelDeviceType device, const Model& model) : mTargetDevice(device) {
         mModelInfo = std::make_shared<NnapiModelInfo>(model);
-        mXmlFile = std::string("/data/vendor/neuralnetworks/") + std::to_string(mFileId) + std::string(".xml");
-        mBinFile = std::string("/data/vendor/neuralnetworks/") + std::to_string(mFileId) + std::string(".bin");
+        mXmlFile = MODEL_DIR + std::to_string(mFileId) + std::string(".xml");
+        mBinFile = MODEL_DIR + std::to_string(mFileId) + std::string(".bin");
         mFileId++;
     }
 
@@ -90,6 +89,7 @@ class BasePreparedModel : public V1_3::IPreparedModel {
     virtual bool initialize();
     virtual bool checkRemoteConnection();
     virtual bool loadRemoteModel(const std::string& ir_xml, const std::string& ir_bin);
+    virtual void setRemoteEnabled(bool flag);
 
     std::shared_ptr<NnapiModelInfo> getModelInfo() { return mModelInfo; }
 
@@ -98,6 +98,7 @@ class BasePreparedModel : public V1_3::IPreparedModel {
     std::shared_ptr<IIENetwork> getPlugin() { return mPlugin; }
 
     std::shared_ptr<ov::Model> modelPtr;
+    std::shared_ptr<DetectionClient> mDetectionClient;
 
 protected:
     virtual void deinitialize();
diff --git a/DetectionClient.cpp b/DetectionClient.cpp
index 4d0716180..de08783db 100644
--- a/DetectionClient.cpp
+++ b/DetectionClient.cpp
@@ -5,7 +5,7 @@
 
 std::string DetectionClient::prepare(bool& flag) {
     RequestString request;
-    request.set_value("");
+    request.mutable_token()->set_data(mToken);
     ReplyStatus reply;
     ClientContext context;
     time_point deadline = std::chrono::system_clock::now() + std::chrono::milliseconds(100);
@@ -21,9 +21,26 @@ std::string DetectionClient::prepare(bool& flag) {
     }
 }
 
+std::string DetectionClient::release(bool& flag) {
+    RequestString request;
+    request.mutable_token()->set_data(mToken);
+    ReplyStatus reply;
+    ClientContext context;
+
+    Status status = stub_->release(&context, request, &reply);
+
+    if (status.ok()) {
+        flag = reply.status();
+        return (flag ? "status True" : "status False");
+    } else {
+        return std::string(status.error_message());
+    }
+}
+
 Status DetectionClient::sendFile(std::string fileName,
                 std::unique_ptr<ClientWriter<RequestDataChunks> >& writer) {
     RequestDataChunks request;
+    request.mutable_token()->set_data(mToken);
     uint32_t CHUNK_SIZE = 1024 * 1024;
     std::ifstream fin(fileName, std::ifstream::binary);
     std::vector<char> buffer(CHUNK_SIZE, 0);
@@ -47,6 +64,22 @@ Status DetectionClient::sendFile(std::string fileName,
     return writer->Finish();
 }
 
+bool DetectionClient::isModelLoaded(std::string fileName) {
+    ReplyStatus reply;
+    ClientContext context;
+    RequestString request;
+    request.mutable_token()->set_data(mToken);
+    time_point deadline = std::chrono::system_clock::now() + std::chrono::milliseconds(20000);
+    context.set_deadline(deadline);
+    status = stub_->loadModel(&context, request, &reply);
+    if(status.ok()) {
+        return reply.status();
+    } else {
+        ALOGE("Model Load failure: %s", status.error_message().c_str());
+    }
+    return false;
+}
+
 std::string DetectionClient::sendIRs(bool& flag, const std::string& ir_xml, const std::string& ir_bin) {
     ReplyStatus reply;
     ClientContext context;
@@ -62,25 +95,75 @@ std::string DetectionClient::sendIRs(bool& flag, const std::string& ir_xml, cons
         status = sendFile(ir_bin, writerBin);
         if (status.ok()) {
             flag = reply.status();
-            return (flag ? "status True" : "status False");
+            //if model is sent succesfully trigger model loading
+            if (flag && isModelLoaded(ir_xml) ) {
+                flag = true;
+                return ("status True");
+            } else {
+                flag = false;
+                ALOGE("Model Loading Failed!!!");
+                return ("status False");
+            }
+        } else {
+            return ("status False");
         }
     }
     return std::string(status.error_message());
 }
 
-void DetectionClient::add_input_data(std::string label, const uint8_t* buffer, std::vector<size_t> shape, uint32_t size) {
+void DetectionClient::add_input_data(std::string label, const uint8_t* buffer, std::vector<size_t> shape, uint32_t size, android::hardware::neuralnetworks::nnhal::OperandType operandType) {
     const float* src;
     size_t index;
 
     DataTensor* input = request.add_data_tensors();
     input->set_node_name(label);
+    switch(operandType) {
+        case android::hardware::neuralnetworks::nnhal::OperandType::TENSOR_INT32: {
+            input->set_data_type(DataTensor::i32);
+            break;
+        }
+        case android::hardware::neuralnetworks::nnhal::OperandType::TENSOR_FLOAT16: {
+            input->set_data_type(DataTensor::f16);
+            break;
+        }
+        case android::hardware::neuralnetworks::nnhal::OperandType::TENSOR_FLOAT32: {
+            input->set_data_type(DataTensor::f32);
+            break;
+        }
+        case android::hardware::neuralnetworks::nnhal::OperandType::TENSOR_BOOL8: {
+            input->set_data_type(DataTensor::boolean);
+            break;
+        }
+        case android::hardware::neuralnetworks::nnhal::OperandType::TENSOR_QUANT8_ASYMM: {
+            input->set_data_type(DataTensor::u8);
+            break;
+        }
+        case android::hardware::neuralnetworks::nnhal::OperandType::TENSOR_QUANT8_SYMM:
+        case android::hardware::neuralnetworks::nnhal::OperandType::TENSOR_QUANT8_SYMM_PER_CHANNEL:
+        case android::hardware::neuralnetworks::nnhal::OperandType::TENSOR_QUANT8_ASYMM_SIGNED: {
+            input->set_data_type(DataTensor::i8);
+            break;
+        }
+        case android::hardware::neuralnetworks::nnhal::OperandType::TENSOR_QUANT16_SYMM: {
+            input->set_data_type(DataTensor::i16);
+            break;
+        }
+        case android::hardware::neuralnetworks::nnhal::OperandType::TENSOR_QUANT16_ASYMM: {
+            input->set_data_type(DataTensor::u16);
+            break;
+        }
+        default: {
+            input->set_data_type(DataTensor::u8);
+            break;
+        }
+    }
     for (index = 0; index < shape.size(); index++) {
         input->add_tensor_shape(shape[index]);
     }
     input->set_data(buffer, size);
 }
 
-void DetectionClient::get_output_data(std::string label, uint8_t* buffer, std::vector<size_t> shape) {
+void DetectionClient::get_output_data(std::string label, uint8_t* buffer, std::vector<size_t> shape, uint32_t expectedLength) {
     std::string src;
     size_t index;
     size_t size = 1;
@@ -91,6 +174,9 @@ void DetectionClient::get_output_data(std::string label, uint8_t* buffer, std::v
     for (index = 0; index < reply.data_tensors_size(); index++) {
         if (label.compare(reply.data_tensors(index).node_name()) == 0) {
             src = reply.data_tensors(index).data();
+            if(expectedLength != src.length()) {
+                ALOGE("Length Mismatch error: expected length %d , actual length %d", expectedLength, src.length());
+            }
             memcpy(buffer, src.data(), src.length());
             break;
         }
@@ -104,9 +190,10 @@ void DetectionClient::clear_data() {
 
 std::string DetectionClient::remote_infer() {
     ClientContext context;
-    time_point deadline = std::chrono::system_clock::now() + std::chrono::milliseconds(20000);
+    time_point deadline = std::chrono::system_clock::now() + std::chrono::milliseconds(5000);
     context.set_deadline(deadline);
 
+    request.mutable_token()->set_data(mToken);
     status = stub_->getInferResult(&context, request, &reply);
     if (status.ok()) {
         if (reply.data_tensors_size() == 0) ALOGE("GRPC reply empty, ovms failure ?");
diff --git a/DetectionClient.h b/DetectionClient.h
index dece36ae4..dcfe2ec70 100644
--- a/DetectionClient.h
+++ b/DetectionClient.h
@@ -8,6 +8,7 @@
 #include <log/log.h>
 #include <android-base/logging.h>
 #include "nnhal_object_detection.grpc.pb.h"
+#include "Driver.h"
 
 using grpc::Channel;
 using grpc::ClientContext;
@@ -22,19 +23,23 @@ using objectDetection::RequestDataTensors;
 using objectDetection::RequestString;
 using time_point = std::chrono::system_clock::time_point;
 
+#define MODEL_DIR std::string("/data/vendor/neuralnetworks/")
+
 class DetectionClient {
 public:
-    DetectionClient(std::shared_ptr<Channel> channel) : stub_(Detection::NewStub(channel)){}
+    DetectionClient(std::shared_ptr<Channel> channel, uint32_t token) : stub_(Detection::NewStub(channel)), mToken(token) {}
 
     std::string prepare(bool& flag);
+    std::string release(bool& flag);
 
     Status sendFile(std::string fileName,
                     std::unique_ptr<ClientWriter<RequestDataChunks> >& writer);
 
     std::string sendIRs(bool& flag, const std::string& ir_xml, const std::string& ir_bin);
+    bool isModelLoaded(std::string fileName);
 
-    void add_input_data(std::string label, const uint8_t* buffer, std::vector<size_t> shape, uint32_t size);
-    void get_output_data(std::string label, uint8_t* buffer, std::vector<size_t> shape);
+    void add_input_data(std::string label, const uint8_t* buffer, std::vector<size_t> shape, uint32_t size, android::hardware::neuralnetworks::nnhal::OperandType operandType);
+    void get_output_data(std::string label, uint8_t* buffer, std::vector<size_t> shape, uint32_t expectedLength);
     void clear_data();
     std::string remote_infer();
     bool get_status();
@@ -44,6 +49,7 @@ class DetectionClient {
     RequestDataTensors request;
     ReplyDataTensors reply;
     Status status;
+    uint32_t mToken;
 };
 
 #endif
\ No newline at end of file
diff --git a/Driver.cpp b/Driver.cpp
index 36fcec5a1..df1bc3729 100644
--- a/Driver.cpp
+++ b/Driver.cpp
@@ -403,6 +403,7 @@ Return<V1_3::ErrorStatus> Driver::prepareModel_1_3(
 
     // TODO: make asynchronous later
     sp<BasePreparedModel> driverPreparedModel = ModelFactory(mDeviceType, model);
+    for (auto& opn : model.main.operations) dumpOperation(opn);
     if (!driverPreparedModel->initialize()) {
         ALOGI("Failed to initialize prepared model");
         cb->notify_1_3(convertToV1_3(ErrorStatus::INVALID_ARGUMENT), nullptr);
diff --git a/ModelManager.cpp b/ModelManager.cpp
index e162ec55a..e1d0e6ebe 100644
--- a/ModelManager.cpp
+++ b/ModelManager.cpp
@@ -233,6 +233,31 @@ void* NnapiModelInfo::getBlobFromMemoryPoolIn(const Request& request, uint32_t i
     return (r.buffer + arg.location.offset);
 }
 
+void* NnapiModelInfo::getBlobFromMemoryPoolIn(const V1_3::Request& request, uint32_t index,
+                                              uint32_t& rBufferLength) {
+    RunTimeOperandInfo& operand = mOperands[mModel.main.inputIndexes[index]];
+    const V1_0::RequestArgument& arg = request.inputs[index];
+    auto poolIndex = arg.location.poolIndex;
+    nnAssert(poolIndex < mRequestPoolInfos.size());
+    auto& r = mRequestPoolInfos[poolIndex];
+
+    if (arg.dimensions.size() > 0) {
+        // It's the responsibility of the caller to validate that
+        // from.dimensions only modifies the dimensions that were
+        // unspecified in the model.  That's the case in SampleDriver.cpp
+        // with the call to validateRequest().
+        operand.dimensions = arg.dimensions;
+    }
+
+    operand.buffer = r.buffer + arg.location.offset;
+    operand.length = arg.location.length;
+    ALOGV("%s Operand length:%d pointer:%p offset:%d pool index: %d", __func__, operand.length,
+          (r.buffer + arg.location.offset), arg.location.offset, poolIndex);
+    rBufferLength = operand.length;
+
+    return (r.buffer + arg.location.offset);
+}
+
 void* NnapiModelInfo::getBlobFromMemoryPoolOut(const Request& request, uint32_t index,
                                                uint32_t& rBufferLength) {
     RunTimeOperandInfo& operand = mOperands[mModel.main.outputIndexes[index]];
@@ -260,6 +285,33 @@ void* NnapiModelInfo::getBlobFromMemoryPoolOut(const Request& request, uint32_t
     return (r.buffer + arg.location.offset);
 }
 
+void* NnapiModelInfo::getBlobFromMemoryPoolOut(const V1_3::Request& request, uint32_t index,
+                                               uint32_t& rBufferLength) {
+    RunTimeOperandInfo& operand = mOperands[mModel.main.outputIndexes[index]];
+    const V1_0::RequestArgument& arg = request.outputs[index];
+    auto poolIndex = arg.location.poolIndex;
+    nnAssert(poolIndex < mRequestPoolInfos.size());
+    auto& r = mRequestPoolInfos[poolIndex];
+
+    ALOGV("%s lifetime:%d location offset:%d length:%d pool index:%d", __func__, operand.lifetime,
+          arg.location.offset, arg.location.length, poolIndex);
+
+    if (arg.dimensions.size() > 0) {
+        // It's the responsibility of the caller to validate that
+        // from.dimensions only modifies the dimensions that were
+        // unspecified in the model.  That's the case in SampleDriver.cpp
+        // with the call to validateRequest().
+        operand.dimensions = arg.dimensions;
+    }
+
+    operand.buffer = r.buffer + arg.location.offset;
+    operand.length = arg.location.length;
+    rBufferLength = operand.length;
+    ALOGV("%s Operand length:%d pointer:%p", __func__, operand.length,
+          (r.buffer + arg.location.offset));
+    return (r.buffer + arg.location.offset);
+}
+
 bool NnapiModelInfo::isOmittedInput(int operationIndex, uint32_t index) {
     uint32_t inputIndex = mModel.main.operations[operationIndex].inputs[index];
     const auto op = mModel.main.operands[inputIndex];
diff --git a/ModelManager.h b/ModelManager.h
index 67e1a4b3d..cf853ab3e 100755
--- a/ModelManager.h
+++ b/ModelManager.h
@@ -133,7 +133,6 @@ class NnapiModelInfo {
         const auto value = GetConstOperand<T>(inputIndex);
         ALOGV("Operation input index: %d, operand index: %d", index, inputIndex);
         ALOGV("Operation: %s", toString(mModel.main.operations[operationIndex]).c_str());
-        printHelper<T>::print(value, toString(operand).c_str());
 
         return value;
     }
@@ -167,7 +166,9 @@ class NnapiModelInfo {
     T GetConstFromBuffer(const uint8_t* buf, uint32_t len);
 
     void* getBlobFromMemoryPoolIn(const Request& request, uint32_t index, uint32_t& rBufferLength);
+    void* getBlobFromMemoryPoolIn(const V1_3::Request& request, uint32_t index, uint32_t& rBufferLength);
     void* getBlobFromMemoryPoolOut(const Request& request, uint32_t index, uint32_t& rBufferLength);
+    void* getBlobFromMemoryPoolOut(const V1_3::Request& request, uint32_t index, uint32_t& rBufferLength);
 
     Model getModel() { return mModel; }
 
diff --git a/ngraph_creator/Android.bp b/ngraph_creator/Android.bp
index 5a4175aa6..7f4f08397 100755
--- a/ngraph_creator/Android.bp
+++ b/ngraph_creator/Android.bp
@@ -99,9 +99,6 @@ cc_library_static {
     ],
 
     include_dirs: [
-        "packages/modules/NeuralNetworks/common/include",
-        "packages/modules/NeuralNetworks/common/types/include",
-        "packages/modules/NeuralNetworks/runtime/include",
         "external/mesa3d/include/android_stub",
     ],
 
diff --git a/proto/nnhal_object_detection.proto b/proto/nnhal_object_detection.proto
index e0f14722a..d129ac575 100644
--- a/proto/nnhal_object_detection.proto
+++ b/proto/nnhal_object_detection.proto
@@ -1,16 +1,18 @@
-// Copyright 2015 gRPC authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
+/*
+* Copyright (c) 2022 Intel Corporation
+*
+* Licensed under the Apache License, Version 2.0 (the "License");
+* you may not use this file except in compliance with the License.
+* You may obtain a copy of the License at
+*
+*      http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*/
 
 syntax = "proto3";
 
@@ -27,16 +29,23 @@ service Detection {
   rpc getInferResult (RequestDataTensors) returns (ReplyDataTensors) {}
   rpc sendXml (stream RequestDataChunks) returns (ReplyStatus) {}
   rpc sendBin (stream RequestDataChunks) returns (ReplyStatus) {}
+  rpc loadModel(RequestString) returns (ReplyStatus) {}
   rpc prepare (RequestString) returns (ReplyStatus) {} //Placeholder for any future support : RequestString
+  rpc release (RequestString) returns (ReplyStatus) {}
 }
 
 
+message Token {
+  uint32 data = 1;
+}
+
 message RequestDataChunks {
   bytes data = 1;
+  Token token = 2;
 }
 
 message RequestString {
-  string value = 1;
+  Token token = 1;
 }
 message ReplyStatus {
   bool status = 1;
@@ -47,6 +56,25 @@ message DataTensor {
   bytes data = 1;
   string node_name = 2;
   repeated int32 tensor_shape = 3;
+  enum DATA_TYPE {
+    boolean = 0;
+    bf16 = 1;
+    f16 = 2;
+    f32 = 3;
+    f64 = 4;
+    i4 = 5;
+    i8 = 6;
+    i16 = 7;
+    i32 = 8;
+    i64 = 9;
+    u1 = 10;
+    u4 = 11;
+    u8 = 12;
+    u16 = 13;
+    u32 = 14;
+    u64 = 15;
+  }
+  DATA_TYPE data_type = 4;
 }
 
 // Reply message containing the Output Data Tensors(blobs)
@@ -57,4 +85,5 @@ message ReplyDataTensors {
 // Request message containing the Input Data Tensors(blobs)
 message RequestDataTensors {
   repeated DataTensor data_tensors = 1;
-}
\ No newline at end of file
+  Token token = 2;
+}
diff --git a/utils.h b/utils.h
index aea1cafc5..a67d051e9 100644
--- a/utils.h
+++ b/utils.h
@@ -96,12 +96,12 @@ enum PaddingScheme {
 #define dumpOperand(index, model)                               \
     do {                                                        \
         const auto op = model.operands[index];                  \
-        ALOGV("Operand (%zu) %s", index, toString(op).c_str()); \
+        ALOGD("Operand (%zu) %s", index, toString(op).c_str()); \
     } while (0)
 
 #define dumpOperation(operation)                             \
     do {                                                     \
-        ALOGV("Operation: %s", toString(operation).c_str()); \
+        ALOGD("Operation: %s", toString(operation).c_str()); \
     } while (0)
 
 #define WRONG_DIM (-1)