From 95b1f266f48941d9f087fd7caca8dd60d752f169 Mon Sep 17 00:00:00 2001 From: Xiuchuan Zhai Date: Wed, 19 Nov 2025 16:49:12 +0800 Subject: [PATCH 1/3] test rls-v3.10 ci --- .gitmodules | 2 +- .../x64/kernel_executors/brgemm_amx.cpp | 2 +- .../memory_desc/dnnl_blocked_memory_desc.cpp | 8 +++-- .../src/memory_desc/dnnl_memory_desc.cpp | 2 +- .../src/nodes/common/cpu_convert.cpp | 34 +++++++++---------- .../dnnl/dnnl_fullyconnected_primitive.cpp | 12 +++++-- .../dnnl/dnnl_fullyconnected_primitive.hpp | 1 + .../executors/dnnl/dnnl_matmul_primitive.cpp | 2 +- .../nodes/executors/fullyconnected_config.hpp | 1 + .../intel_cpu/src/nodes/fullyconnected.cpp | 27 ++++++++------- .../src/nodes/kernels/x64/brgemm_kernel.cpp | 6 ++-- .../src/nodes/kernels/x64/registers_pool.hpp | 2 ++ .../tests/unit/dnnl_memory_desc_test.cpp | 2 +- src/plugins/intel_cpu/thirdparty/onednn | 2 +- 14 files changed, 60 insertions(+), 43 deletions(-) diff --git a/.gitmodules b/.gitmodules index d9733bc0d844c1..e811fc285b689d 100644 --- a/.gitmodules +++ b/.gitmodules @@ -1,6 +1,6 @@ [submodule "src/plugins/intel_cpu/thirdparty/onednn"] path = src/plugins/intel_cpu/thirdparty/onednn - url = https://github.com/openvinotoolkit/oneDNN.git + url = https://github.com/azhai219/oneDNN.git ignore = dirty [submodule "thirdparty/xbyak"] path = thirdparty/xbyak diff --git a/src/plugins/intel_cpu/src/emitters/snippets/x64/kernel_executors/brgemm_amx.cpp b/src/plugins/intel_cpu/src/emitters/snippets/x64/kernel_executors/brgemm_amx.cpp index 1ce1cc749ea4b3..61afa03c57a6cc 100644 --- a/src/plugins/intel_cpu/src/emitters/snippets/x64/kernel_executors/brgemm_amx.cpp +++ b/src/plugins/intel_cpu/src/emitters/snippets/x64/kernel_executors/brgemm_amx.cpp @@ -288,7 +288,7 @@ void BrgemmAMXKernelExecutor::execute_brgemm_copy_a_kernel( ctx.current_M_blk = M; ctx.zp_b_compensation_buffer_ptr = nullptr; ctx.zp_a_compensation_result_ptr = nullptr; - ctx.zp_b_neg_value_ptr = nullptr; + ctx.zp_b_neg_val_ptr = nullptr; ctx.zp_ab_comp_ptr = nullptr; ctx.src = src; ctx.tr_src = tr_src; diff --git a/src/plugins/intel_cpu/src/memory_desc/dnnl_blocked_memory_desc.cpp b/src/plugins/intel_cpu/src/memory_desc/dnnl_blocked_memory_desc.cpp index 10da9c270a84da..5b6531bdf2feaf 100644 --- a/src/plugins/intel_cpu/src/memory_desc/dnnl_blocked_memory_desc.cpp +++ b/src/plugins/intel_cpu/src/memory_desc/dnnl_blocked_memory_desc.cpp @@ -298,7 +298,7 @@ bool DnnlBlockedMemoryDesc::isCompatible(const BlockedMemoryDesc& rhs, CmpMask c bool DnnlBlockedMemoryDesc::isCompatible(const CpuBlockedMemoryDesc& rhs, CmpMask cmpMask) const { dnnl::impl::memory_desc_wrapper wrapped(desc.get()); - return wrapped.extra().flags == dnnl_memory_extra_flag_none && + return wrapped.extra().flags == dnnl::impl::memory_extra_flags_t::dnnl_memory_extra_flag_none && BlockedMemoryDesc::isCompatibleInternal(rhs, cmpMask); } @@ -470,11 +470,13 @@ static dnnl::memory::desc cloneDescWithNewDims(const dnnl::memory::desc& desc, dnnl::memory::desc clonedDesc(DnnlExtensionUtils::clone_desc(desc.get())); array_copy(clonedDesc.get()->dims, mklDims.data(), mklDims.size()); - dnnl::memory::dims perm(convert_to_vector(order.data(), mklDims.size())); + std::vector perm(convert_to_vector(order.data(), mklDims.size())); auto innerBlks = clonedDesc.get_inner_blks(); auto innerIdxs = clonedDesc.get_inner_idxs(); + std::vector innerBlksInt(innerBlks.begin(), innerBlks.end()); + std::vector innerIdxsInt(innerIdxs.begin(), innerIdxs.end()); - auto retCode = dnnl::impl::fill_blocked(*clonedDesc.get(), perm, innerBlks, innerIdxs); + auto retCode = dnnl::impl::fill_blocked(*clonedDesc.get(), perm, innerBlksInt, innerIdxsInt); OPENVINO_ASSERT(retCode == dnnl::impl::status::success, "Can not clone DnnlBlockedMemoryDesc with dims: ", dims2str(dims)); diff --git a/src/plugins/intel_cpu/src/memory_desc/dnnl_memory_desc.cpp b/src/plugins/intel_cpu/src/memory_desc/dnnl_memory_desc.cpp index 380fa7e1f20b03..a44ab64464859b 100644 --- a/src/plugins/intel_cpu/src/memory_desc/dnnl_memory_desc.cpp +++ b/src/plugins/intel_cpu/src/memory_desc/dnnl_memory_desc.cpp @@ -99,7 +99,7 @@ dnnl::memory::format_kind DnnlMemoryDesc::getFormatKind() const { bool DnnlMemoryDesc::hasEmptyExtraData() const { dnnl::impl::memory_desc_wrapper wrapped(desc.get()); - return wrapped.extra().flags == dnnl_memory_extra_flag_none; + return wrapped.extra().flags == dnnl::impl::dnnl_memory_extra_flag_none; } bool DnnlMemoryDesc::canComputeMemSizeZeroDims() const { diff --git a/src/plugins/intel_cpu/src/nodes/common/cpu_convert.cpp b/src/plugins/intel_cpu/src/nodes/common/cpu_convert.cpp index 40ba976c80cf43..7c26308f97335e 100644 --- a/src/plugins/intel_cpu/src/nodes/common/cpu_convert.cpp +++ b/src/plugins/intel_cpu/src/nodes/common/cpu_convert.cpp @@ -165,20 +165,20 @@ class jit_convert_array : public jit_kernel { _dst_size(sizeof(dst_t)) { const auto type = get_f8_type(); if (type == f8_type::f8e4m3) { - f8_e4m3_emu_ = std::make_shared(this, - fp8_emu_reserv_1_, - fp8_emu_reserv_2_, - fp8_emu_reserv_3_, - fp8_emu_reserv_4_, - fp8_emu_reserv_5_, - fp8_emu_scratch_); + f8_e4m3_emu_ = std::make_shared(this, + fp8_emu_reserv_1_, + fp8_emu_reserv_2_, + fp8_emu_reserv_3_, + fp8_emu_reserv_4_, + fp8_emu_reserv_5_, + fp8_emu_scratch_); } else if (type == f8_type::f8e5m2) { - f8_e5m2_emu_ = std::make_shared(this, - fp8_emu_reserv_1_, - fp8_emu_reserv_2_, - fp8_emu_reserv_3_, - fp8_emu_kmask_aux_, - fp8_emu_scratch_); + f8_e5m2_emu_ = std::make_shared(this, + fp8_emu_reserv_1_, + fp8_emu_reserv_2_, + fp8_emu_reserv_3_, + fp8_emu_kmask_aux_, + fp8_emu_scratch_); } const bool is_dst_bf16 = std::is_same_v; if (is_dst_bf16 && mayiuse(cpu_isa_t::avx512_core)) { @@ -196,11 +196,11 @@ class jit_convert_array : public jit_kernel { return nullptr; } - std::shared_ptr get_f8_e4m3_emu() const { + std::shared_ptr get_f8_e4m3_emu() const { return f8_e4m3_emu_; } - std::shared_ptr get_f8_e5m2_emu() const { + std::shared_ptr get_f8_e5m2_emu() const { return f8_e5m2_emu_; } @@ -213,8 +213,8 @@ class jit_convert_array : public jit_kernel { size_t _src_size; size_t _dst_size; - std::shared_ptr f8_e4m3_emu_; - std::shared_ptr f8_e5m2_emu_; + std::shared_ptr f8_e4m3_emu_; + std::shared_ptr f8_e5m2_emu_; std::shared_ptr uni_vcvtneps2bf16_; const Reg64 fp8_emu_scratch_ = rax; diff --git a/src/plugins/intel_cpu/src/nodes/executors/dnnl/dnnl_fullyconnected_primitive.cpp b/src/plugins/intel_cpu/src/nodes/executors/dnnl/dnnl_fullyconnected_primitive.cpp index 1d502efc6ec9a3..39f126a4769a88 100644 --- a/src/plugins/intel_cpu/src/nodes/executors/dnnl/dnnl_fullyconnected_primitive.cpp +++ b/src/plugins/intel_cpu/src/nodes/executors/dnnl/dnnl_fullyconnected_primitive.cpp @@ -105,6 +105,7 @@ std::shared_ptr DnnlFCPrimitive::create(const MemoryArgs& memor dstDesc, shapeAgnosticData->m_primAttrs.attr, attrs.sparseWeights, + attrs.sparseWeightsNonZeroSize, attrs.modelType}; auto builder = [&context](const Key& dnnlKey) { @@ -305,6 +306,7 @@ static dnnl::inner_product_forward::primitive_desc createDescriptorInternal(cons const dnnl::primitive_attr& attr, const dnnl::engine& engine, const bool useSparseWeights, + const size_t useSparseWeightsNonZeroSize, const bool useWeightsDecompression) { const auto normalizedInputDesc = normalizeDescriptor(inputDesc); const auto normalizedOutputDesc = normalizeDescriptor(outputDesc); @@ -331,8 +333,9 @@ static dnnl::inner_product_forward::primitive_desc createDescriptorInternal(cons wdt = memory::data_type::s8; } + // TODO: @Xiuchuan support the native sparse feature of stock oneDNN. const dnnl::memory::desc weightsDesc = - useSparseWeights ? dnnl::memory::desc().sparse_desc(normalizedWeightDesc.get_dims(), wdt) + useSparseWeights ? dnnl::memory::desc::packed(normalizedWeightDesc.get_dims(), wdt, useSparseWeightsNonZeroSize) : dnnl::memory::desc(normalizedWeightDesc.get_dims(), wdt, memory::format_tag::any); return {engine, @@ -352,6 +355,7 @@ static primitive_desc createPrimitiveDesc(const dnnl::memory::desc& inputDesc, const dnnl::engine& engine, const std::vector& implPriorities, const bool useSparseWeights, + const size_t useSparseWeightsNonZeroSize, const bool useWeightsDecompression) { auto prim_desc = createDescriptorInternal(inputDesc, weightDesc, @@ -360,6 +364,7 @@ static primitive_desc createPrimitiveDesc(const dnnl::memory::desc& inputDesc, attr, engine, useSparseWeights, + useSparseWeightsNonZeroSize, useWeightsDecompression); OPENVINO_ASSERT(prim_desc, "Failed to create inner_product primitive descriptor"); auto first_desc = dnnl::inner_product_forward::primitive_desc(prim_desc.get()); @@ -444,6 +449,7 @@ DnnlShapeAgnosticDataPtr DnnlFCPrimitive::createShapeAgnosticData(const FCAttrs& const dnnl::memory::desc biaDnnlDesc = MemoryDescUtils::convertToDnnlMemoryDesc(biasDesc)->getDnnlDesc(); const auto useSparseWeights = attrs.sparseWeights; + const auto useSparseWeightsNonZeroSize = attrs.sparseWeightsNonZeroSize; const auto primDesc = createPrimitiveDesc(srcDnnlDesc, weiDnnlDesc, biaDnnlDesc, @@ -452,6 +458,7 @@ DnnlShapeAgnosticDataPtr DnnlFCPrimitive::createShapeAgnosticData(const FCAttrs& context->getEngine(), context->getImplPriorities(), useSparseWeights, + useSparseWeightsNonZeroSize, useWeightsDecompression); const auto weightsDesc = DnnlExtensionUtils::makeDescriptor(primDesc.weights_desc()); @@ -474,7 +481,7 @@ DnnlShapeAgnosticDataPtr DnnlFCPrimitive::createShapeAgnosticData(const FCAttrs& static impl_desc_type implTypeFromPrimDesc(const dnnl::primitive_desc& primDesc) { const auto implType = parse_impl_name(primDesc.impl_info_str()); if (implType == ov::intel_cpu::brgemm_avx512_amx && - primDesc.weights_desc().get_format_kind() == memory::format_kind::sparsed) { + primDesc.weights_desc().get_format_kind() == memory::format_kind::sparse) { return ov::intel_cpu::brgemm_sparse_avx512_amx; } @@ -495,6 +502,7 @@ DnnlFCPrimitive::DnnlFCPrimitive(const Key& key, engine, implPriorities, key.sparseWeights, + key.sparseWeightsNonZeroSize, useWeightsDecompressionImpl(key.src->getPrecision(), key.wei->getPrecision(), key.modelType))), m_implType(implTypeFromPrimDesc(m_primDesc)), m_srcDesc(DnnlExtensionUtils::makeDescriptor(m_primDesc.src_desc())), diff --git a/src/plugins/intel_cpu/src/nodes/executors/dnnl/dnnl_fullyconnected_primitive.hpp b/src/plugins/intel_cpu/src/nodes/executors/dnnl/dnnl_fullyconnected_primitive.hpp index f8638e8b2efa53..788250f1aa0137 100644 --- a/src/plugins/intel_cpu/src/nodes/executors/dnnl/dnnl_fullyconnected_primitive.hpp +++ b/src/plugins/intel_cpu/src/nodes/executors/dnnl/dnnl_fullyconnected_primitive.hpp @@ -31,6 +31,7 @@ class DnnlFCPrimitive { DnnlMemoryDescCPtr dst; dnnl::primitive_attr attr; bool sparseWeights; + size_t sparseWeightsNonZeroSize; Config::ModelType modelType; [[nodiscard]] size_t hash() const; diff --git a/src/plugins/intel_cpu/src/nodes/executors/dnnl/dnnl_matmul_primitive.cpp b/src/plugins/intel_cpu/src/nodes/executors/dnnl/dnnl_matmul_primitive.cpp index dddb8dd863abf2..47f88086b067fa 100644 --- a/src/plugins/intel_cpu/src/nodes/executors/dnnl/dnnl_matmul_primitive.cpp +++ b/src/plugins/intel_cpu/src/nodes/executors/dnnl/dnnl_matmul_primitive.cpp @@ -577,7 +577,7 @@ DnnlShapeAgnosticDataPtr DnnlMatMulPrimitive::createShapeAgnosticData(const MatM static impl_desc_type implTypeFromPrimDesc(const dnnl::primitive_desc& primDesc) { const auto implType = parse_impl_name(primDesc.impl_info_str()); if (implType == ov::intel_cpu::brgemm_avx512_amx && - primDesc.weights_desc().get_format_kind() == memory::format_kind::sparsed) { + primDesc.weights_desc().get_format_kind() == memory::format_kind::sparse) { return ov::intel_cpu::brgemm_sparse_avx512_amx; } diff --git a/src/plugins/intel_cpu/src/nodes/executors/fullyconnected_config.hpp b/src/plugins/intel_cpu/src/nodes/executors/fullyconnected_config.hpp index 0f413463f0cf65..b4635f63226e8f 100644 --- a/src/plugins/intel_cpu/src/nodes/executors/fullyconnected_config.hpp +++ b/src/plugins/intel_cpu/src/nodes/executors/fullyconnected_config.hpp @@ -16,6 +16,7 @@ namespace ov::intel_cpu { struct FCAttrs { bool weightsNonTransposed = false; bool sparseWeights = false; + size_t sparseWeightsNonZeroSize = 0; uint64_t dynamicQuantizationGroupSize = 0; bool constantWeights = true; diff --git a/src/plugins/intel_cpu/src/nodes/fullyconnected.cpp b/src/plugins/intel_cpu/src/nodes/fullyconnected.cpp index 8b495d66fb662a..014b05bc0b5145 100644 --- a/src/plugins/intel_cpu/src/nodes/fullyconnected.cpp +++ b/src/plugins/intel_cpu/src/nodes/fullyconnected.cpp @@ -502,22 +502,22 @@ const std::vector& FullyConnected::getDefaultImplPriority() { } // @todo Should be moved to the transformations / optimization stages? -static bool useSparseWeightsDecompression(const NodePtr& weightsInput, - const ov::element::Type inputType, - const float sparseWeiDecompressionRate) { +static std::pair useSparseWeightsDecompression(const NodePtr& weightsInput, + const ov::element::Type inputType, + const float sparseWeiDecompressionRate) { const auto minSparseRate = sparseWeiDecompressionRate; if (minSparseRate == 1.F) { - return false; + return {false, 0}; } if (!dnnl::impl::cpu::x64::mayiuse(dnnl::impl::cpu::x64::avx512_core_amx)) { - return false; + return {false, 0}; } const auto constNode = std::dynamic_pointer_cast(weightsInput); if (!constNode) { - return false; + return {false, 0}; } const auto weiMemory = constNode->getMemoryPtr(); @@ -525,12 +525,12 @@ static bool useSparseWeightsDecompression(const NodePtr& weightsInput, const auto weiDims = weiMemory->getShape().getStaticDims(); if (weiDims.size() != 2 || weiDims[0] % 64 != 0 || weiDims[1] % 64 != 0) { - return false; + return {false, 0}; } const auto weightsType = weiMemory->getPrecision(); if (none_of(inputType, u8, i8) || weightsType != i8) { - return false; + return {false, 0}; } const auto* const weightsData = weiMemory->getDataAs(); @@ -558,13 +558,16 @@ static bool useSparseWeightsDecompression(const NodePtr& weightsInput, "%, use sparse weights = ", sparseRate >= minSparseRate); - return sparseRate >= minSparseRate; + return {sparseRate >= minSparseRate, elementsCount - zerosCount}; } void FullyConnected::initSupportedPrimitiveDescriptors() { - attrs.sparseWeights = useSparseWeightsDecompression(getParentEdgeAt(WEIGHTS)->getParent(), - getOriginalInputPrecisionAtPort(DATA), - context->getConfig().fcSparseWeiDecompressionRate); + auto sparseAttr = useSparseWeightsDecompression(getParentEdgeAt(WEIGHTS)->getParent(), + getOriginalInputPrecisionAtPort(DATA), + context->getConfig().fcSparseWeiDecompressionRate); + attrs.sparseWeights = sparseAttr.first; + attrs.sparseWeightsNonZeroSize = sparseAttr.second; + attrs.dynamicQuantizationGroupSize = context->getConfig().fcDynamicQuantizationGroupSize; attrs.modelType = context->getConfig().modelType; diff --git a/src/plugins/intel_cpu/src/nodes/kernels/x64/brgemm_kernel.cpp b/src/plugins/intel_cpu/src/nodes/kernels/x64/brgemm_kernel.cpp index 2cb0ff856d6592..295febf70410be 100644 --- a/src/plugins/intel_cpu/src/nodes/kernels/x64/brgemm_kernel.cpp +++ b/src/plugins/intel_cpu/src/nodes/kernels/x64/brgemm_kernel.cpp @@ -493,7 +493,7 @@ void BrgemmKernel::execute_without_scale(bool is_M_tail, void* a, void* b, void* ctx.current_M_blk = cur_M_blk; ctx.zp_b_compensation_buffer_ptr = nullptr; ctx.zp_a_compensation_result_ptr = nullptr; - ctx.zp_b_neg_value_ptr = nullptr; + ctx.zp_b_neg_val_ptr = nullptr; ctx.zp_ab_comp_ptr = nullptr; ctx.src = pCopyKernelIn; ctx.tr_src = pCopyKernelOut; @@ -554,7 +554,7 @@ void BrgemmKernel::callBrgemm(brgemmCtx& ctx, } if (doPostops) { brgemm_post_ops_data_t post_ops_data; - post_ops_data.scales = bScale; + post_ops_data.wei_scales = bScale; brgemm_batch_element_t addr_batch; addr_batch.ptr.A = pin0; addr_batch.ptr.B = pin1; @@ -620,7 +620,7 @@ void BrgemmKernelQuantized::executeGemm(bool is_M_tail, ctx.current_M_blk = cur_M_blk; ctx.zp_b_compensation_buffer_ptr = nullptr; ctx.zp_a_compensation_result_ptr = nullptr; - ctx.zp_b_neg_value_ptr = nullptr; + ctx.zp_b_neg_val_ptr = nullptr; ctx.zp_ab_comp_ptr = nullptr; ctx.src = pCopyKernelIn; ctx.tr_src = pCopyKernelOut; diff --git a/src/plugins/intel_cpu/src/nodes/kernels/x64/registers_pool.hpp b/src/plugins/intel_cpu/src/nodes/kernels/x64/registers_pool.hpp index e9a593beaea954..25cb0625290778 100644 --- a/src/plugins/intel_cpu/src/nodes/kernels/x64/registers_pool.hpp +++ b/src/plugins/intel_cpu/src/nodes/kernels/x64/registers_pool.hpp @@ -390,6 +390,8 @@ inline RegistersPool::Ptr RegistersPool::create(dnnl::impl::cpu::x64::cpu_isa_t case dnnl::impl::cpu::x64::amx_fp16: case dnnl::impl::cpu::x64::avx512_core_amx_fp16: case dnnl::impl::cpu::x64::isa_all: + case dnnl::impl::cpu::x64::avx10_2_512: + case dnnl::impl::cpu::x64::avx10_2_512_amx_2: OPENVINO_THROW("Invalid isa argument in RegistersPool::create()"); } OPENVINO_THROW("Invalid isa argument in RegistersPool::create()"); diff --git a/src/plugins/intel_cpu/tests/unit/dnnl_memory_desc_test.cpp b/src/plugins/intel_cpu/tests/unit/dnnl_memory_desc_test.cpp index 80c2a5833a268c..dcc5a1c34d51d4 100644 --- a/src/plugins/intel_cpu/tests/unit/dnnl_memory_desc_test.cpp +++ b/src/plugins/intel_cpu/tests/unit/dnnl_memory_desc_test.cpp @@ -399,7 +399,7 @@ TEST(MakeUndefinedDnnlDesc, extraData) { const auto& [fmt, dims] = item; memory::desc origin(dims, dataType, fmt); - origin.get()->extra.flags = dnnl_memory_extra_flag_compensation_conv_s8s8; + origin.get()->extra.flags = dnnl::impl::dnnl_memory_extra_flag_compensation_conv_s8s8; origin.get()->extra.compensation_mask = 1; origin.get()->extra.scale_adjust = 2.0f; diff --git a/src/plugins/intel_cpu/thirdparty/onednn b/src/plugins/intel_cpu/thirdparty/onednn index 31dd31b5bcc410..42dac278351807 160000 --- a/src/plugins/intel_cpu/thirdparty/onednn +++ b/src/plugins/intel_cpu/thirdparty/onednn @@ -1 +1 @@ -Subproject commit 31dd31b5bcc410b87ca32764b458846d05c615cc +Subproject commit 42dac2783518072d2c9c21dfd678f61f75f8587c From 05ba78e3d40f114c3eb066296b9b5a77a9dfbaf0 Mon Sep 17 00:00:00 2001 From: Xiuchuan Zhai Date: Mon, 24 Nov 2025 11:30:23 +0800 Subject: [PATCH 2/3] fix avgpool primitive mismatch --- src/plugins/intel_cpu/thirdparty/onednn | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/plugins/intel_cpu/thirdparty/onednn b/src/plugins/intel_cpu/thirdparty/onednn index 42dac278351807..a5562e40063fc6 160000 --- a/src/plugins/intel_cpu/thirdparty/onednn +++ b/src/plugins/intel_cpu/thirdparty/onednn @@ -1 +1 @@ -Subproject commit 42dac2783518072d2c9c21dfd678f61f75f8587c +Subproject commit a5562e40063fc6d7d5d549c873bb6b5ee31bb57b From 3f550a0cc8a2ffea55dec63dae42119c4f3bd8c9 Mon Sep 17 00:00:00 2001 From: Xiuchuan Zhai Date: Tue, 25 Nov 2025 15:53:44 +0800 Subject: [PATCH 3/3] update jit_gemm primitive for small K --- .../instances/x64/matmul.cpp | 112 +++++++++++++++--- 1 file changed, 93 insertions(+), 19 deletions(-) diff --git a/src/plugins/intel_cpu/tests/functional/custom/single_layer_tests/instances/x64/matmul.cpp b/src/plugins/intel_cpu/tests/functional/custom/single_layer_tests/instances/x64/matmul.cpp index cca93304e3c835..cefeb78c5c4901 100644 --- a/src/plugins/intel_cpu/tests/functional/custom/single_layer_tests/instances/x64/matmul.cpp +++ b/src/plugins/intel_cpu/tests/functional/custom/single_layer_tests/instances/x64/matmul.cpp @@ -164,6 +164,19 @@ std::vector filterAdditionalConfig_Brgemm() { return additionalConfig; } +std::vector filterAdditionalConfig_Brgemm_Small_K() { +#ifndef OV_CPU_WITH_MLAS + // FP32 precision is covered by MLAS + std::vector additionalConfig = { + ov::AnyMap{/* empty config */} + }; +#else + std::vector additionalConfig = {{}}; +#endif + + return additionalConfig; +} + //For FP32 precision, FC has brgemm avx2 support but Matmul doen't have brgemm avx2. //Need to specify tryBrgAVX2 based on test case. std::vector filterSpecificParams_Brgemm(bool tryBrgAVX2 = false) { @@ -177,6 +190,15 @@ std::vector filterSpecificParams_Brgemm(bool tryBrgAVX2 = fal return specificParams; } +std::vector filterSpecificParams_Brgemm_Small_K() { + std::vector specificParams; + if (with_cpu_x86_avx512_core()) { + specificParams.push_back(CPUSpecificParams{{}, {}, {"jit_gemm"}, "jit_gemm"}); + } + + return specificParams; +} + const std::vector IS2D_Brgemm_smoke = { // needed by 'IS2D_Brgconv1x1_smoke' {static_shapes_to_test_representation({{1, 120}, {120, 120}}), {true, false}}, @@ -278,6 +300,25 @@ const auto testParams2D_Brgemm_FP16_smoke = ::testing::Combine(fullyConnectedPar INSTANTIATE_TEST_SUITE_P(smoke_FC_2D_Brgemm, MatMulLayerCPUTest, testParams2D_Brgemm_smoke, MatMulLayerCPUTest::getTestCaseName); INSTANTIATE_TEST_SUITE_P(smoke_FC_2D_Brgemm_FP16, MatMulLayerCPUTest, testParams2D_Brgemm_FP16_smoke, MatMulLayerCPUTest::getTestCaseName); +const std::vector IS_brgemm_small_k_smoke = { + {static_shapes_to_test_representation({{55, 12}, {12, 55}}), {false, true}}, + {static_shapes_to_test_representation({{55, 12}, {12, 55}}), {true, true}}, +}; +const auto matMulBrgemmSmallKParams_smoke = ::testing::Combine(::testing::ValuesIn(IS_brgemm_small_k_smoke), + ::testing::Values(ElementType::f32), + ::testing::Values(ElementType::dynamic), + ::testing::Values(ElementType::dynamic), + ::testing::Values(utils::InputLayerType::PARAMETER), + ::testing::Values(ov::test::utils::DEVICE_CPU), + ::testing::ValuesIn(filterAdditionalConfig_Brgemm_Small_K())); + +const auto testBrgemmSmallKParams_smoke = ::testing::Combine(matMulBrgemmSmallKParams_smoke, + ::testing::Values(MatMulNodeType::MatMul), + ::testing::ValuesIn(matmulFusingParams()), + ::testing::ValuesIn(filterSpecificParams_Brgemm_Small_K())); + +INSTANTIATE_TEST_SUITE_P(smoke_MM_Brgemm_Small_K_Static, MatMulLayerCPUTest, testBrgemmSmallKParams_smoke, MatMulLayerCPUTest::getTestCaseName); + const std::vector IS_brgemm_smoke = { {static_shapes_to_test_representation({{1, 2, 32, 120}, {120, 5}}), {false, false}}, {static_shapes_to_test_representation({{1, 2, 32, 120}, {120, 5}}), {true, false}}, @@ -287,9 +328,6 @@ const std::vector IS_brgemm_smoke = { {static_shapes_to_test_representation({{10, 10, 10}, {10, 10, 10}}), {false, false}}, {static_shapes_to_test_representation({{10, 10, 10}, {10, 10, 10}}), {true, false}}, - - {static_shapes_to_test_representation({{55, 12}, {12, 55}}), {false, true}}, - {static_shapes_to_test_representation({{55, 12}, {12, 55}}), {true, true}}, }; const auto matMulBrgemmParams_smoke = ::testing::Combine(::testing::ValuesIn(IS_brgemm_smoke), @@ -366,21 +404,39 @@ const auto testBrgemmParams_FP16_nightly = ::testing::Combine(matMulBrgemmParams INSTANTIATE_TEST_SUITE_P(nightly_MM_Brgemm_Static_FP16, MatMulLayerCPUTest, testBrgemmParams_FP16_nightly, MatMulLayerCPUTest::getTestCaseName); +const std::vector IS_Brgemm_Small_K_Dynamic = { + { + { + {{-1, 256}, {{1, 256}}}, + {{256, 384}, {{256, 384}}} + }, + {false, false} + }, + { + { + {{-1, -1}, {{55, 12}, {33, 7}}}, + {{-1, -1}, {{12, 55}, {7, 33}}} + }, + {false, false} + }, +}; + +const auto matMulBrgemmSmallKParamsDynamic = ::testing::Combine(::testing::ValuesIn(IS_Brgemm_Small_K_Dynamic), + ::testing::Values(ElementType::f32), + ::testing::Values(ElementType::dynamic), + ::testing::Values(ElementType::dynamic), + ::testing::Values(utils::InputLayerType::PARAMETER), + ::testing::Values(ov::test::utils::DEVICE_CPU), + ::testing::ValuesIn(filterAdditionalConfig_Brgemm_Small_K())); + +const auto testBrgemmSmallKParamsDynamic = ::testing::Combine(matMulBrgemmSmallKParamsDynamic, + ::testing::Values(MatMulNodeType::MatMul), + ::testing::Values(emptyFusingSpec), + ::testing::ValuesIn(filterSpecificParams_Brgemm_Small_K())); + +INSTANTIATE_TEST_SUITE_P(smoke_MM_Brgemm_Small_K_Dynamic, MatMulLayerCPUTest, testBrgemmSmallKParamsDynamic, MatMulLayerCPUTest::getTestCaseName); + const std::vector IS_Brgemm_Dynamic = { - { - { - {{-1, 256}, {{1, 256}}}, - {{256, 384}, {{256, 384}}} - }, - {false, false} - }, - { - { - {{-1, -1}, {{55, 12}, {33, 7}}}, - {{-1, -1}, {{12, 55}, {7, 33}}} - }, - {false, false} - }, { { {{-1, -1, -1, -1}, {{1, 2, 32, 60}, {1, 2, 32, 30}}}, @@ -431,7 +487,7 @@ const auto matMulBrgemmParamsDynamic = ::testing::Combine(::testing::ValuesIn(IS ::testing::Values(ElementType::dynamic), ::testing::Values(utils::InputLayerType::PARAMETER), ::testing::Values(ov::test::utils::DEVICE_CPU), - ::testing::ValuesIn(filterAdditionalConfig_Brgemm())); + ::testing::ValuesIn(filterAdditionalConfig_Brgemm_Small_K())); const auto testBrgemmParamsDynamic = ::testing::Combine(matMulBrgemmParamsDynamic, ::testing::Values(MatMulNodeType::MatMul), @@ -455,7 +511,7 @@ const auto testBrgemmParamsDynamic_FP16 = ::testing::Combine(matMulBrgemmParamsD INSTANTIATE_TEST_SUITE_P(smoke_MM_Brgemm_Dynamic_FP16, MatMulLayerCPUTest, testBrgemmParamsDynamic_FP16, MatMulLayerCPUTest::getTestCaseName); -const std::vector IS_Dynamic_Fusing = { +const std::vector IS_Dynamic_Fusing_Small_K = { { { //dynamic case description each pair per each input has {{dynamic shape}, {{static shape case1}, {static shape case2}, ...} {{-1, -1}, {{16, 12}, {33, 7}, {16, 12}}}, // input 0 @@ -463,6 +519,9 @@ const std::vector IS_Dynamic_Fusing = { }, {false, false} }, +}; + +const std::vector IS_Dynamic_Fusing = { { { //dynamic case description each pair per each input has {{dynamic shape}, {{static shape case1}, {static shape case2}, ...} {{-1, -1, -1, -1}, {{1, 2, 32, 60}, {1, 2, 32, 30}}}, // input 0 @@ -534,6 +593,21 @@ const auto testParamsDynamicFusing_FP16 = ::testing::Combine(matMulParamsDynamic INSTANTIATE_TEST_SUITE_P(smoke_MM_Dynamic_Fusing_FP16, MatMulLayerCPUTest, testParamsDynamicFusing_FP16, MatMulLayerCPUTest::getTestCaseName); +const auto matMulParamsBrgemmSmallKDynamicFusing = ::testing::Combine(::testing::ValuesIn(IS_Dynamic_Fusing_Small_K), + ::testing::Values(ElementType::f32), + ::testing::Values(ElementType::dynamic), + ::testing::Values(ElementType::dynamic), + ::testing::Values(utils::InputLayerType::PARAMETER), + ::testing::Values(ov::test::utils::DEVICE_CPU), + ::testing::ValuesIn(filterAdditionalConfig_Brgemm())); + +const auto testParamsBrgemmSmallKDynamicFusing = ::testing::Combine(matMulParamsBrgemmSmallKDynamicFusing, + ::testing::Values(MatMulNodeType::MatMul), + ::testing::ValuesIn(matmulFusingParams()), + ::testing::ValuesIn(filterSpecificParams_Brgemm_Small_K())); + +INSTANTIATE_TEST_SUITE_P(smoke_MM_Brgemm_Small_K_Dynamic_Fusing, MatMulLayerCPUTest, testParamsBrgemmSmallKDynamicFusing, MatMulLayerCPUTest::getTestCaseName); + const auto matMulParamsBrgemmDynamicFusing = ::testing::Combine(::testing::ValuesIn(IS_Dynamic_Fusing), ::testing::Values(ElementType::f32), ::testing::Values(ElementType::dynamic),