Skip to content

Commit ba27d5f

Browse files
Adding the new option and some code for converting between the two switches
1 parent 902cffe commit ba27d5f

File tree

4 files changed

+104
-0
lines changed

4 files changed

+104
-0
lines changed

src/plugins/intel_npu/src/al/include/intel_npu/config/options.hpp

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1426,4 +1426,35 @@ struct USE_BASE_MODEL_SERIALIZER final : OptionBase<USE_BASE_MODEL_SERIALIZER, b
14261426
}
14271427
};
14281428

1429+
struct MODEL_SERIALIZER_VERSION final : OptionBase<MODEL_SERIALIZER_VERSION, ov::intel_npu::ModelSerializerVersion> {
1430+
static std::string_view key() {
1431+
return ov::intel_npu::model_serializer_version.name();
1432+
}
1433+
1434+
static constexpr std::string_view getTypeName() {
1435+
return "ov::intel_npu::ModelSerializerVersion";
1436+
}
1437+
1438+
static ov::intel_npu::ModelSerializerVersion defaultValue() {
1439+
return ov::intel_npu::ModelSerializerVersion::AUTO;
1440+
}
1441+
1442+
static ov::intel_npu::ModelSerializerVersion parse(std::string_view val) {
1443+
std::istringstream stringStream = std::istringstream(std::string(val));
1444+
ov::intel_npu::ModelSerializerVersion version;
1445+
stringStream >> version;
1446+
return version;
1447+
}
1448+
1449+
static std::string toString(const ov::intel_npu::ModelSerializerVersion& val) {
1450+
std::stringstream strStream;
1451+
strStream << val;
1452+
return strStream.str();
1453+
}
1454+
1455+
static OptionMode mode() {
1456+
return OptionMode::CompileTime;
1457+
}
1458+
};
1459+
14291460
} // namespace intel_npu

src/plugins/intel_npu/src/al/include/intel_npu/npu_private_properties.hpp

Lines changed: 59 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -188,6 +188,50 @@ inline std::istream& operator>>(std::istream& is, WSVersion& wsVersion) {
188188
return is;
189189
}
190190

191+
/**
192+
* @brief [Only for NPU Plugin]
193+
* Default is "AUTO".
194+
* Switches between different implementations of the VCL serializer.
195+
*/
196+
enum class ModelSerializerVersion {
197+
AUTO = 0,
198+
ALL_WEIGHTS_COPY = 1,
199+
NO_WEIGHTS_COPY = 2,
200+
};
201+
202+
inline std::ostream& operator<<(std::ostream& out, const ModelSerializerVersion& modelSerializerVersion) {
203+
switch (modelSerializerVersion) {
204+
case ModelSerializerVersion::AUTO: {
205+
out << "AUTO";
206+
} break;
207+
case ModelSerializerVersion::ALL_WEIGHTS_COPY: {
208+
out << "ALL_WEIGHTS_COPY";
209+
} break;
210+
case ModelSerializerVersion::NO_WEIGHTS_COPY: {
211+
out << "NO_WEIGHTS_COPY";
212+
} break;
213+
default: {
214+
OPENVINO_THROW("Unsupported value for the model serializer version:", modelSerializerVersion);
215+
}
216+
}
217+
return out;
218+
}
219+
220+
inline std::istream& operator>>(std::istream& is, ModelSerializerVersion& modelSerializerVersion) {
221+
std::string str;
222+
is >> str;
223+
if (str == "AUTO") {
224+
modelSerializerVersion = ModelSerializerVersion::AUTO;
225+
} else if (str == "ALL_WEIGHTS_COPY") {
226+
modelSerializerVersion = ModelSerializerVersion::ALL_WEIGHTS_COPY;
227+
} else if (str == "NO_WEIGHTS_COPY") {
228+
modelSerializerVersion = ModelSerializerVersion::NO_WEIGHTS_COPY;
229+
} else {
230+
OPENVINO_THROW("Unsupported value for the model serializer version:", str);
231+
}
232+
return is;
233+
}
234+
191235
/**
192236
* @brief [Only for NPU Plugin]
193237
* Type: string, default is MODEL.
@@ -362,6 +406,21 @@ static constexpr ov::Property<bool> weightless_blob{"NPU_WEIGHTLESS_BLOB"};
362406
*/
363407
static constexpr ov::Property<bool> use_base_model_serializer{"NPU_USE_BASE_MODEL_SERIALIZER"};
364408

409+
/**
410+
* @brief [Only for NPU Plugin]
411+
* Type: enum. Default is "AUTO".
412+
*
413+
* This config option concerns the algorithm used for serializing the "ov::Model" at compilation time in order to be
414+
* passed through the driver.
415+
*
416+
* The value chosen for this option will impact memory usage, since some versions clone the values of the weights in a
417+
* separate buffer. If this option is set to "AUTO", the plugin will use the latest version that is compatible with the
418+
* current compiler.
419+
*
420+
* @note This feature is a work-in-progress and may not yet work as intended.
421+
*/
422+
static constexpr ov::Property<ModelSerializerVersion> model_serializer_version{"NPU_MODEL_SERIALIZER_VERSION"};
423+
365424
/**
366425
* @brief [Experimental, only for NPU Plugin]
367426
* Type: integer.

src/plugins/intel_npu/src/plugin/src/plugin.cpp

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -321,6 +321,7 @@ void Plugin::init_options() {
321321
REGISTER_OPTION(SEPARATE_WEIGHTS_VERSION);
322322
REGISTER_OPTION(WS_COMPILE_CALL_NUMBER);
323323
REGISTER_OPTION(USE_BASE_MODEL_SERIALIZER);
324+
REGISTER_OPTION(MODEL_SERIALIZER_VERSION);
324325

325326
if (_backend) {
326327
if (_backend->isCommandQueueExtSupported()) {
@@ -747,6 +748,18 @@ std::shared_ptr<ov::ICompiledModel> Plugin::compile_model(const std::shared_ptr<
747748
localConfig.update({{ov::intel_npu::weightless_blob.name(), cacheModeOptimizeSize ? "YES" : "NO"}});
748749
}
749750

751+
// There is an on-going migration from "USE_BASE_MODEL_SERIALIZER" to "MODEL_SERIALIZER_VERSION". Until done, make
752+
// sure both options have the same value if only one is defined.
753+
if (localConfig.has<MODEL_SERIALIZER_VERSION>() && !localConfig.has<USE_BASE_MODEL_SERIALIZER>()) {
754+
const bool useBaseModelSerializer =
755+
(localConfig.get<MODEL_SERIALIZER_VERSION>() == ov::intel_npu::ModelSerializerVersion::AUTO ||
756+
localConfig.get<MODEL_SERIALIZER_VERSION>() == ov::intel_npu::ModelSerializerVersion::ALL_WEIGHTS_COPY);
757+
localConfig.update({{ov::intel_npu::use_base_model_serializer.name(), useBaseModelSerializer ? "YES" : "NO"}});
758+
} else if (!localConfig.has<MODEL_SERIALIZER_VERSION>() && localConfig.has<USE_BASE_MODEL_SERIALIZER>()) {
759+
localConfig.update({{ov::intel_npu::model_serializer_version.name(),
760+
localConfig.get<USE_BASE_MODEL_SERIALIZER>() ? "ALL_WEIGHTS_COPY" : "NO_WEIGHTS_COPY"}});
761+
}
762+
750763
std::shared_ptr<intel_npu::IGraph> graph;
751764

752765
auto compileWithConfig = [&](const auto& modelToCompile, const auto& config) {

src/plugins/intel_npu/src/plugin/src/properties.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -385,6 +385,7 @@ void Properties::registerPluginProperties() {
385385
TRY_REGISTER_SIMPLE_PROPERTY(ov::intel_npu::weightless_blob, WEIGHTLESS_BLOB);
386386
TRY_REGISTER_SIMPLE_PROPERTY(ov::intel_npu::separate_weights_version, SEPARATE_WEIGHTS_VERSION);
387387
TRY_REGISTER_SIMPLE_PROPERTY(ov::intel_npu::use_base_model_serializer, USE_BASE_MODEL_SERIALIZER);
388+
TRY_REGISTER_SIMPLE_PROPERTY(ov::intel_npu::model_serializer_version, MODEL_SERIALIZER_VERSION);
388389

389390
TRY_REGISTER_CUSTOMFUNC_PROPERTY(ov::intel_npu::stepping, STEPPING, [&](const Config& config) {
390391
if (!config.has<STEPPING>()) {

0 commit comments

Comments
 (0)