Skip to content

Commit e482870

Browse files
committed
test rls-v3.10 ci
1 parent cb7c4f2 commit e482870

File tree

13 files changed

+58
-43
lines changed

13 files changed

+58
-43
lines changed

.gitmodules

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
[submodule "src/plugins/intel_cpu/thirdparty/onednn"]
22
path = src/plugins/intel_cpu/thirdparty/onednn
3-
url = https://github.com/openvinotoolkit/oneDNN.git
3+
url = https://github.com/azhai219/oneDNN.git
44
ignore = dirty
55
[submodule "thirdparty/xbyak"]
66
path = thirdparty/xbyak

src/plugins/intel_cpu/src/emitters/snippets/x64/kernel_executors/brgemm_amx.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -288,7 +288,7 @@ void BrgemmAMXKernelExecutor::execute_brgemm_copy_a_kernel(
288288
ctx.current_M_blk = M;
289289
ctx.zp_b_compensation_buffer_ptr = nullptr;
290290
ctx.zp_a_compensation_result_ptr = nullptr;
291-
ctx.zp_b_neg_value_ptr = nullptr;
291+
ctx.zp_b_neg_val_ptr = nullptr;
292292
ctx.zp_ab_comp_ptr = nullptr;
293293
ctx.src = src;
294294
ctx.tr_src = tr_src;

src/plugins/intel_cpu/src/memory_desc/dnnl_blocked_memory_desc.cpp

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -298,7 +298,7 @@ bool DnnlBlockedMemoryDesc::isCompatible(const BlockedMemoryDesc& rhs, CmpMask c
298298

299299
bool DnnlBlockedMemoryDesc::isCompatible(const CpuBlockedMemoryDesc& rhs, CmpMask cmpMask) const {
300300
dnnl::impl::memory_desc_wrapper wrapped(desc.get());
301-
return wrapped.extra().flags == dnnl_memory_extra_flag_none &&
301+
return wrapped.extra().flags == dnnl::impl::memory_extra_flags_t::dnnl_memory_extra_flag_none &&
302302
BlockedMemoryDesc::isCompatibleInternal(rhs, cmpMask);
303303
}
304304

@@ -470,11 +470,13 @@ static dnnl::memory::desc cloneDescWithNewDims(const dnnl::memory::desc& desc,
470470
dnnl::memory::desc clonedDesc(DnnlExtensionUtils::clone_desc(desc.get()));
471471

472472
array_copy(clonedDesc.get()->dims, mklDims.data(), mklDims.size());
473-
dnnl::memory::dims perm(convert_to_vector<dnnl::memory::dim, size_t>(order.data(), mklDims.size()));
473+
std::vector<int> perm(convert_to_vector<int, size_t>(order.data(), mklDims.size()));
474474
auto innerBlks = clonedDesc.get_inner_blks();
475475
auto innerIdxs = clonedDesc.get_inner_idxs();
476+
std::vector<int> innerBlksInt(innerBlks.begin(), innerBlks.end());
477+
std::vector<int> innerIdxsInt(innerIdxs.begin(), innerIdxs.end());
476478

477-
auto retCode = dnnl::impl::fill_blocked(*clonedDesc.get(), perm, innerBlks, innerIdxs);
479+
auto retCode = dnnl::impl::fill_blocked(*clonedDesc.get(), perm, innerBlksInt, innerIdxsInt);
478480
OPENVINO_ASSERT(retCode == dnnl::impl::status::success,
479481
"Can not clone DnnlBlockedMemoryDesc with dims: ",
480482
dims2str(dims));

src/plugins/intel_cpu/src/memory_desc/dnnl_memory_desc.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -99,7 +99,7 @@ dnnl::memory::format_kind DnnlMemoryDesc::getFormatKind() const {
9999

100100
bool DnnlMemoryDesc::hasEmptyExtraData() const {
101101
dnnl::impl::memory_desc_wrapper wrapped(desc.get());
102-
return wrapped.extra().flags == dnnl_memory_extra_flag_none;
102+
return wrapped.extra().flags == dnnl::impl::dnnl_memory_extra_flag_none;
103103
}
104104

105105
bool DnnlMemoryDesc::canComputeMemSizeZeroDims() const {

src/plugins/intel_cpu/src/nodes/common/cpu_convert.cpp

Lines changed: 17 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -165,20 +165,20 @@ class jit_convert_array : public jit_kernel {
165165
_dst_size(sizeof(dst_t)) {
166166
const auto type = get_f8_type<src_t, dst_t>();
167167
if (type == f8_type::f8e4m3) {
168-
f8_e4m3_emu_ = std::make_shared<fp8_emulation_e4m3_t>(this,
169-
fp8_emu_reserv_1_,
170-
fp8_emu_reserv_2_,
171-
fp8_emu_reserv_3_,
172-
fp8_emu_reserv_4_,
173-
fp8_emu_reserv_5_,
174-
fp8_emu_scratch_);
168+
f8_e4m3_emu_ = std::make_shared<fp8_conversion_e4m3_t>(this,
169+
fp8_emu_reserv_1_,
170+
fp8_emu_reserv_2_,
171+
fp8_emu_reserv_3_,
172+
fp8_emu_reserv_4_,
173+
fp8_emu_reserv_5_,
174+
fp8_emu_scratch_);
175175
} else if (type == f8_type::f8e5m2) {
176-
f8_e5m2_emu_ = std::make_shared<fp8_emulation_e5m2_t>(this,
177-
fp8_emu_reserv_1_,
178-
fp8_emu_reserv_2_,
179-
fp8_emu_reserv_3_,
180-
fp8_emu_kmask_aux_,
181-
fp8_emu_scratch_);
176+
f8_e5m2_emu_ = std::make_shared<fp8_conversion_e5m2_t>(this,
177+
fp8_emu_reserv_1_,
178+
fp8_emu_reserv_2_,
179+
fp8_emu_reserv_3_,
180+
fp8_emu_kmask_aux_,
181+
fp8_emu_scratch_);
182182
}
183183
const bool is_dst_bf16 = std::is_same_v<dst_t, ov::intel_cpu::bfloat16_t>;
184184
if (is_dst_bf16 && mayiuse(cpu_isa_t::avx512_core)) {
@@ -196,11 +196,11 @@ class jit_convert_array : public jit_kernel {
196196
return nullptr;
197197
}
198198

199-
std::shared_ptr<fp8_emulation_e4m3_t> get_f8_e4m3_emu() const {
199+
std::shared_ptr<fp8_conversion_e4m3_t> get_f8_e4m3_emu() const {
200200
return f8_e4m3_emu_;
201201
}
202202

203-
std::shared_ptr<fp8_emulation_e5m2_t> get_f8_e5m2_emu() const {
203+
std::shared_ptr<fp8_conversion_e5m2_t> get_f8_e5m2_emu() const {
204204
return f8_e5m2_emu_;
205205
}
206206

@@ -213,8 +213,8 @@ class jit_convert_array : public jit_kernel {
213213
size_t _src_size;
214214
size_t _dst_size;
215215

216-
std::shared_ptr<fp8_emulation_e4m3_t> f8_e4m3_emu_;
217-
std::shared_ptr<fp8_emulation_e5m2_t> f8_e5m2_emu_;
216+
std::shared_ptr<fp8_conversion_e4m3_t> f8_e4m3_emu_;
217+
std::shared_ptr<fp8_conversion_e5m2_t> f8_e5m2_emu_;
218218
std::shared_ptr<jit_uni_vcvtneps2bf16> uni_vcvtneps2bf16_;
219219

220220
const Reg64 fp8_emu_scratch_ = rax;

src/plugins/intel_cpu/src/nodes/executors/dnnl/dnnl_fullyconnected_primitive.cpp

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -105,6 +105,7 @@ std::shared_ptr<DnnlFCPrimitive> DnnlFCPrimitive::create(const MemoryArgs& memor
105105
dstDesc,
106106
shapeAgnosticData->m_primAttrs.attr,
107107
attrs.sparseWeights,
108+
attrs.sparseWeightsNonZeroSize,
108109
attrs.modelType};
109110

110111
auto builder = [&context](const Key& dnnlKey) {
@@ -305,6 +306,7 @@ static dnnl::inner_product_forward::primitive_desc createDescriptorInternal(cons
305306
const dnnl::primitive_attr& attr,
306307
const dnnl::engine& engine,
307308
const bool useSparseWeights,
309+
const size_t useSparseWeightsNonZeroSize,
308310
const bool useWeightsDecompression) {
309311
const auto normalizedInputDesc = normalizeDescriptor(inputDesc);
310312
const auto normalizedOutputDesc = normalizeDescriptor(outputDesc);
@@ -331,8 +333,9 @@ static dnnl::inner_product_forward::primitive_desc createDescriptorInternal(cons
331333
wdt = memory::data_type::s8;
332334
}
333335

336+
// TODO: @Xiuchuan support the native sparse feature of stock oneDNN.
334337
const dnnl::memory::desc weightsDesc =
335-
useSparseWeights ? dnnl::memory::desc().sparse_desc(normalizedWeightDesc.get_dims(), wdt)
338+
useSparseWeights ? dnnl::memory::desc::packed(normalizedWeightDesc.get_dims(), wdt, useSparseWeightsNonZeroSize)
336339
: dnnl::memory::desc(normalizedWeightDesc.get_dims(), wdt, memory::format_tag::any);
337340

338341
return {engine,
@@ -352,6 +355,7 @@ static primitive_desc createPrimitiveDesc(const dnnl::memory::desc& inputDesc,
352355
const dnnl::engine& engine,
353356
const std::vector<impl_desc_type>& implPriorities,
354357
const bool useSparseWeights,
358+
const size_t useSparseWeightsNonZeroSize,
355359
const bool useWeightsDecompression) {
356360
auto prim_desc = createDescriptorInternal(inputDesc,
357361
weightDesc,
@@ -360,6 +364,7 @@ static primitive_desc createPrimitiveDesc(const dnnl::memory::desc& inputDesc,
360364
attr,
361365
engine,
362366
useSparseWeights,
367+
useSparseWeightsNonZeroSize,
363368
useWeightsDecompression);
364369
OPENVINO_ASSERT(prim_desc, "Failed to create inner_product primitive descriptor");
365370
auto first_desc = dnnl::inner_product_forward::primitive_desc(prim_desc.get());
@@ -444,6 +449,7 @@ DnnlShapeAgnosticDataPtr DnnlFCPrimitive::createShapeAgnosticData(const FCAttrs&
444449
const dnnl::memory::desc biaDnnlDesc = MemoryDescUtils::convertToDnnlMemoryDesc(biasDesc)->getDnnlDesc();
445450

446451
const auto useSparseWeights = attrs.sparseWeights;
452+
const auto useSparseWeightsNonZeroSize = attrs.sparseWeightsNonZeroSize;
447453
const auto primDesc = createPrimitiveDesc(srcDnnlDesc,
448454
weiDnnlDesc,
449455
biaDnnlDesc,
@@ -452,6 +458,7 @@ DnnlShapeAgnosticDataPtr DnnlFCPrimitive::createShapeAgnosticData(const FCAttrs&
452458
context->getEngine(),
453459
context->getImplPriorities(),
454460
useSparseWeights,
461+
useSparseWeightsNonZeroSize,
455462
useWeightsDecompression);
456463

457464
const auto weightsDesc = DnnlExtensionUtils::makeDescriptor(primDesc.weights_desc());
@@ -474,7 +481,7 @@ DnnlShapeAgnosticDataPtr DnnlFCPrimitive::createShapeAgnosticData(const FCAttrs&
474481
static impl_desc_type implTypeFromPrimDesc(const dnnl::primitive_desc& primDesc) {
475482
const auto implType = parse_impl_name(primDesc.impl_info_str());
476483
if (implType == ov::intel_cpu::brgemm_avx512_amx &&
477-
primDesc.weights_desc().get_format_kind() == memory::format_kind::sparsed) {
484+
primDesc.weights_desc().get_format_kind() == memory::format_kind::sparse) {
478485
return ov::intel_cpu::brgemm_sparse_avx512_amx;
479486
}
480487

@@ -495,6 +502,7 @@ DnnlFCPrimitive::DnnlFCPrimitive(const Key& key,
495502
engine,
496503
implPriorities,
497504
key.sparseWeights,
505+
key.sparseWeightsNonZeroSize,
498506
useWeightsDecompressionImpl(key.src->getPrecision(), key.wei->getPrecision(), key.modelType))),
499507
m_implType(implTypeFromPrimDesc(m_primDesc)),
500508
m_srcDesc(DnnlExtensionUtils::makeDescriptor(m_primDesc.src_desc())),

src/plugins/intel_cpu/src/nodes/executors/dnnl/dnnl_fullyconnected_primitive.hpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@ class DnnlFCPrimitive {
3131
DnnlMemoryDescCPtr dst;
3232
dnnl::primitive_attr attr;
3333
bool sparseWeights;
34+
size_t sparseWeightsNonZeroSize;
3435
Config::ModelType modelType;
3536

3637
[[nodiscard]] size_t hash() const;

src/plugins/intel_cpu/src/nodes/executors/dnnl/dnnl_matmul_primitive.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -577,7 +577,7 @@ DnnlShapeAgnosticDataPtr DnnlMatMulPrimitive::createShapeAgnosticData(const MatM
577577
static impl_desc_type implTypeFromPrimDesc(const dnnl::primitive_desc& primDesc) {
578578
const auto implType = parse_impl_name(primDesc.impl_info_str());
579579
if (implType == ov::intel_cpu::brgemm_avx512_amx &&
580-
primDesc.weights_desc().get_format_kind() == memory::format_kind::sparsed) {
580+
primDesc.weights_desc().get_format_kind() == memory::format_kind::sparse) {
581581
return ov::intel_cpu::brgemm_sparse_avx512_amx;
582582
}
583583

src/plugins/intel_cpu/src/nodes/executors/fullyconnected_config.hpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@ struct FCAttrs {
1919
bool withBias = false;
2020
bool weightsNonTransposed = false;
2121
bool sparseWeights = false;
22+
size_t sparseWeightsNonZeroSize = 0;
2223
uint64_t dynamicQuantizationGroupSize = 0;
2324
bool constantWeights = true;
2425

src/plugins/intel_cpu/src/nodes/fullyconnected.cpp

Lines changed: 15 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -502,35 +502,35 @@ const std::vector<impl_desc_type>& FullyConnected::getDefaultImplPriority() {
502502
}
503503

504504
// @todo Should be moved to the transformations / optimization stages?
505-
static bool useSparseWeightsDecompression(const NodePtr& weightsInput,
506-
const ov::element::Type inputType,
507-
const float sparseWeiDecompressionRate) {
505+
static std::pair<bool, size_t> useSparseWeightsDecompression(const NodePtr& weightsInput,
506+
const ov::element::Type inputType,
507+
const float sparseWeiDecompressionRate) {
508508
const auto minSparseRate = sparseWeiDecompressionRate;
509509

510510
if (minSparseRate == 1.F) {
511-
return false;
511+
return {false, 0};
512512
}
513513

514514
if (!dnnl::impl::cpu::x64::mayiuse(dnnl::impl::cpu::x64::avx512_core_amx)) {
515-
return false;
515+
return {false, 0};
516516
}
517517

518518
const auto constNode = std::dynamic_pointer_cast<Input>(weightsInput);
519519
if (!constNode) {
520-
return false;
520+
return {false, 0};
521521
}
522522

523523
const auto weiMemory = constNode->getMemoryPtr();
524524
OPENVINO_ASSERT(weiMemory, "Cannot get const blob");
525525

526526
const auto weiDims = weiMemory->getShape().getStaticDims();
527527
if (weiDims.size() != 2 || weiDims[0] % 64 != 0 || weiDims[1] % 64 != 0) {
528-
return false;
528+
return {false, 0};
529529
}
530530

531531
const auto weightsType = weiMemory->getPrecision();
532532
if (none_of(inputType, u8, i8) || weightsType != i8) {
533-
return false;
533+
return {false, 0};
534534
}
535535

536536
const auto* const weightsData = weiMemory->getDataAs<const int8_t>();
@@ -558,15 +558,18 @@ static bool useSparseWeightsDecompression(const NodePtr& weightsInput,
558558
"%, use sparse weights = ",
559559
sparseRate >= minSparseRate);
560560

561-
return sparseRate >= minSparseRate;
561+
return {sparseRate >= minSparseRate, elementsCount - zerosCount};
562562
}
563563

564564
void FullyConnected::initSupportedPrimitiveDescriptors() {
565565
attrs.withBias = getOriginalInputPrecisionAtPort(BIAS) != ov::element::dynamic;
566566

567-
attrs.sparseWeights = useSparseWeightsDecompression(getParentEdgeAt(WEIGHTS)->getParent(),
568-
getOriginalInputPrecisionAtPort(DATA),
569-
context->getConfig().fcSparseWeiDecompressionRate);
567+
auto sparseAttr = useSparseWeightsDecompression(getParentEdgeAt(WEIGHTS)->getParent(),
568+
getOriginalInputPrecisionAtPort(DATA),
569+
context->getConfig().fcSparseWeiDecompressionRate);
570+
attrs.sparseWeights = sparseAttr.first;
571+
attrs.sparseWeightsNonZeroSize = sparseAttr.second;
572+
570573
attrs.dynamicQuantizationGroupSize = context->getConfig().fcDynamicQuantizationGroupSize;
571574
attrs.modelType = context->getConfig().modelType;
572575

0 commit comments

Comments
 (0)