Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .gitmodules
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[submodule "src/plugins/intel_cpu/thirdparty/onednn"]
path = src/plugins/intel_cpu/thirdparty/onednn
url = https://github.com/openvinotoolkit/oneDNN.git
url = https://github.com/azhai219/oneDNN.git
ignore = dirty
[submodule "thirdparty/xbyak"]
path = thirdparty/xbyak
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -288,7 +288,7 @@ void BrgemmAMXKernelExecutor::execute_brgemm_copy_a_kernel(
ctx.current_M_blk = M;
ctx.zp_b_compensation_buffer_ptr = nullptr;
ctx.zp_a_compensation_result_ptr = nullptr;
ctx.zp_b_neg_value_ptr = nullptr;
ctx.zp_b_neg_val_ptr = nullptr;
ctx.zp_ab_comp_ptr = nullptr;
ctx.src = src;
ctx.tr_src = tr_src;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -298,7 +298,7 @@ bool DnnlBlockedMemoryDesc::isCompatible(const BlockedMemoryDesc& rhs, CmpMask c

bool DnnlBlockedMemoryDesc::isCompatible(const CpuBlockedMemoryDesc& rhs, CmpMask cmpMask) const {
dnnl::impl::memory_desc_wrapper wrapped(desc.get());
return wrapped.extra().flags == dnnl_memory_extra_flag_none &&
return wrapped.extra().flags == dnnl::impl::memory_extra_flags_t::dnnl_memory_extra_flag_none &&
BlockedMemoryDesc::isCompatibleInternal(rhs, cmpMask);
}

Expand Down Expand Up @@ -470,11 +470,13 @@ static dnnl::memory::desc cloneDescWithNewDims(const dnnl::memory::desc& desc,
dnnl::memory::desc clonedDesc(DnnlExtensionUtils::clone_desc(desc.get()));

array_copy(clonedDesc.get()->dims, mklDims.data(), mklDims.size());
dnnl::memory::dims perm(convert_to_vector<dnnl::memory::dim, size_t>(order.data(), mklDims.size()));
std::vector<int> perm(convert_to_vector<int, size_t>(order.data(), mklDims.size()));
auto innerBlks = clonedDesc.get_inner_blks();
auto innerIdxs = clonedDesc.get_inner_idxs();
std::vector<int> innerBlksInt(innerBlks.begin(), innerBlks.end());
std::vector<int> innerIdxsInt(innerIdxs.begin(), innerIdxs.end());

auto retCode = dnnl::impl::fill_blocked(*clonedDesc.get(), perm, innerBlks, innerIdxs);
auto retCode = dnnl::impl::fill_blocked(*clonedDesc.get(), perm, innerBlksInt, innerIdxsInt);
OPENVINO_ASSERT(retCode == dnnl::impl::status::success,
"Can not clone DnnlBlockedMemoryDesc with dims: ",
dims2str(dims));
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -99,7 +99,7 @@ dnnl::memory::format_kind DnnlMemoryDesc::getFormatKind() const {

bool DnnlMemoryDesc::hasEmptyExtraData() const {
dnnl::impl::memory_desc_wrapper wrapped(desc.get());
return wrapped.extra().flags == dnnl_memory_extra_flag_none;
return wrapped.extra().flags == dnnl::impl::dnnl_memory_extra_flag_none;
}

bool DnnlMemoryDesc::canComputeMemSizeZeroDims() const {
Expand Down
34 changes: 17 additions & 17 deletions src/plugins/intel_cpu/src/nodes/common/cpu_convert.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -165,20 +165,20 @@ class jit_convert_array : public jit_kernel {
_dst_size(sizeof(dst_t)) {
const auto type = get_f8_type<src_t, dst_t>();
if (type == f8_type::f8e4m3) {
f8_e4m3_emu_ = std::make_shared<fp8_emulation_e4m3_t>(this,
fp8_emu_reserv_1_,
fp8_emu_reserv_2_,
fp8_emu_reserv_3_,
fp8_emu_reserv_4_,
fp8_emu_reserv_5_,
fp8_emu_scratch_);
f8_e4m3_emu_ = std::make_shared<fp8_conversion_e4m3_t>(this,
fp8_emu_reserv_1_,
fp8_emu_reserv_2_,
fp8_emu_reserv_3_,
fp8_emu_reserv_4_,
fp8_emu_reserv_5_,
fp8_emu_scratch_);
} else if (type == f8_type::f8e5m2) {
f8_e5m2_emu_ = std::make_shared<fp8_emulation_e5m2_t>(this,
fp8_emu_reserv_1_,
fp8_emu_reserv_2_,
fp8_emu_reserv_3_,
fp8_emu_kmask_aux_,
fp8_emu_scratch_);
f8_e5m2_emu_ = std::make_shared<fp8_conversion_e5m2_t>(this,
fp8_emu_reserv_1_,
fp8_emu_reserv_2_,
fp8_emu_reserv_3_,
fp8_emu_kmask_aux_,
fp8_emu_scratch_);
}
const bool is_dst_bf16 = std::is_same_v<dst_t, ov::intel_cpu::bfloat16_t>;
if (is_dst_bf16 && mayiuse(cpu_isa_t::avx512_core)) {
Expand All @@ -196,11 +196,11 @@ class jit_convert_array : public jit_kernel {
return nullptr;
}

std::shared_ptr<fp8_emulation_e4m3_t> get_f8_e4m3_emu() const {
std::shared_ptr<fp8_conversion_e4m3_t> get_f8_e4m3_emu() const {
return f8_e4m3_emu_;
}

std::shared_ptr<fp8_emulation_e5m2_t> get_f8_e5m2_emu() const {
std::shared_ptr<fp8_conversion_e5m2_t> get_f8_e5m2_emu() const {
return f8_e5m2_emu_;
}

Expand All @@ -213,8 +213,8 @@ class jit_convert_array : public jit_kernel {
size_t _src_size;
size_t _dst_size;

std::shared_ptr<fp8_emulation_e4m3_t> f8_e4m3_emu_;
std::shared_ptr<fp8_emulation_e5m2_t> f8_e5m2_emu_;
std::shared_ptr<fp8_conversion_e4m3_t> f8_e4m3_emu_;
std::shared_ptr<fp8_conversion_e5m2_t> f8_e5m2_emu_;
std::shared_ptr<jit_uni_vcvtneps2bf16> uni_vcvtneps2bf16_;

const Reg64 fp8_emu_scratch_ = rax;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -105,6 +105,7 @@ std::shared_ptr<DnnlFCPrimitive> DnnlFCPrimitive::create(const MemoryArgs& memor
dstDesc,
shapeAgnosticData->m_primAttrs.attr,
attrs.sparseWeights,
attrs.sparseWeightsNonZeroSize,
attrs.modelType};

auto builder = [&context](const Key& dnnlKey) {
Expand Down Expand Up @@ -305,6 +306,7 @@ static dnnl::inner_product_forward::primitive_desc createDescriptorInternal(cons
const dnnl::primitive_attr& attr,
const dnnl::engine& engine,
const bool useSparseWeights,
const size_t useSparseWeightsNonZeroSize,
const bool useWeightsDecompression) {
const auto normalizedInputDesc = normalizeDescriptor(inputDesc);
const auto normalizedOutputDesc = normalizeDescriptor(outputDesc);
Expand All @@ -331,8 +333,9 @@ static dnnl::inner_product_forward::primitive_desc createDescriptorInternal(cons
wdt = memory::data_type::s8;
}

// TODO: @Xiuchuan support the native sparse feature of stock oneDNN.
const dnnl::memory::desc weightsDesc =
useSparseWeights ? dnnl::memory::desc().sparse_desc(normalizedWeightDesc.get_dims(), wdt)
useSparseWeights ? dnnl::memory::desc::packed(normalizedWeightDesc.get_dims(), wdt, useSparseWeightsNonZeroSize)
: dnnl::memory::desc(normalizedWeightDesc.get_dims(), wdt, memory::format_tag::any);

return {engine,
Expand All @@ -352,6 +355,7 @@ static primitive_desc createPrimitiveDesc(const dnnl::memory::desc& inputDesc,
const dnnl::engine& engine,
const std::vector<impl_desc_type>& implPriorities,
const bool useSparseWeights,
const size_t useSparseWeightsNonZeroSize,
const bool useWeightsDecompression) {
auto prim_desc = createDescriptorInternal(inputDesc,
weightDesc,
Expand All @@ -360,6 +364,7 @@ static primitive_desc createPrimitiveDesc(const dnnl::memory::desc& inputDesc,
attr,
engine,
useSparseWeights,
useSparseWeightsNonZeroSize,
useWeightsDecompression);
OPENVINO_ASSERT(prim_desc, "Failed to create inner_product primitive descriptor");
auto first_desc = dnnl::inner_product_forward::primitive_desc(prim_desc.get());
Expand Down Expand Up @@ -444,6 +449,7 @@ DnnlShapeAgnosticDataPtr DnnlFCPrimitive::createShapeAgnosticData(const FCAttrs&
const dnnl::memory::desc biaDnnlDesc = MemoryDescUtils::convertToDnnlMemoryDesc(biasDesc)->getDnnlDesc();

const auto useSparseWeights = attrs.sparseWeights;
const auto useSparseWeightsNonZeroSize = attrs.sparseWeightsNonZeroSize;
const auto primDesc = createPrimitiveDesc(srcDnnlDesc,
weiDnnlDesc,
biaDnnlDesc,
Expand All @@ -452,6 +458,7 @@ DnnlShapeAgnosticDataPtr DnnlFCPrimitive::createShapeAgnosticData(const FCAttrs&
context->getEngine(),
context->getImplPriorities(),
useSparseWeights,
useSparseWeightsNonZeroSize,
useWeightsDecompression);

const auto weightsDesc = DnnlExtensionUtils::makeDescriptor(primDesc.weights_desc());
Expand All @@ -474,7 +481,7 @@ DnnlShapeAgnosticDataPtr DnnlFCPrimitive::createShapeAgnosticData(const FCAttrs&
static impl_desc_type implTypeFromPrimDesc(const dnnl::primitive_desc& primDesc) {
const auto implType = parse_impl_name(primDesc.impl_info_str());
if (implType == ov::intel_cpu::brgemm_avx512_amx &&
primDesc.weights_desc().get_format_kind() == memory::format_kind::sparsed) {
primDesc.weights_desc().get_format_kind() == memory::format_kind::sparse) {
return ov::intel_cpu::brgemm_sparse_avx512_amx;
}

Expand All @@ -495,6 +502,7 @@ DnnlFCPrimitive::DnnlFCPrimitive(const Key& key,
engine,
implPriorities,
key.sparseWeights,
key.sparseWeightsNonZeroSize,
useWeightsDecompressionImpl(key.src->getPrecision(), key.wei->getPrecision(), key.modelType))),
m_implType(implTypeFromPrimDesc(m_primDesc)),
m_srcDesc(DnnlExtensionUtils::makeDescriptor(m_primDesc.src_desc())),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ class DnnlFCPrimitive {
DnnlMemoryDescCPtr dst;
dnnl::primitive_attr attr;
bool sparseWeights;
size_t sparseWeightsNonZeroSize;
Config::ModelType modelType;

[[nodiscard]] size_t hash() const;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -577,7 +577,7 @@ DnnlShapeAgnosticDataPtr DnnlMatMulPrimitive::createShapeAgnosticData(const MatM
static impl_desc_type implTypeFromPrimDesc(const dnnl::primitive_desc& primDesc) {
const auto implType = parse_impl_name(primDesc.impl_info_str());
if (implType == ov::intel_cpu::brgemm_avx512_amx &&
primDesc.weights_desc().get_format_kind() == memory::format_kind::sparsed) {
primDesc.weights_desc().get_format_kind() == memory::format_kind::sparse) {
return ov::intel_cpu::brgemm_sparse_avx512_amx;
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ namespace ov::intel_cpu {
struct FCAttrs {
bool weightsNonTransposed = false;
bool sparseWeights = false;
size_t sparseWeightsNonZeroSize = 0;
uint64_t dynamicQuantizationGroupSize = 0;
bool constantWeights = true;

Expand Down
27 changes: 15 additions & 12 deletions src/plugins/intel_cpu/src/nodes/fullyconnected.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -502,35 +502,35 @@ const std::vector<impl_desc_type>& FullyConnected::getDefaultImplPriority() {
}

// @todo Should be moved to the transformations / optimization stages?
static bool useSparseWeightsDecompression(const NodePtr& weightsInput,
const ov::element::Type inputType,
const float sparseWeiDecompressionRate) {
static std::pair<bool, size_t> useSparseWeightsDecompression(const NodePtr& weightsInput,
const ov::element::Type inputType,
const float sparseWeiDecompressionRate) {
const auto minSparseRate = sparseWeiDecompressionRate;

if (minSparseRate == 1.F) {
return false;
return {false, 0};
}

if (!dnnl::impl::cpu::x64::mayiuse(dnnl::impl::cpu::x64::avx512_core_amx)) {
return false;
return {false, 0};
}

const auto constNode = std::dynamic_pointer_cast<Input>(weightsInput);
if (!constNode) {
return false;
return {false, 0};
}

const auto weiMemory = constNode->getMemoryPtr();
OPENVINO_ASSERT(weiMemory, "Cannot get const blob");

const auto weiDims = weiMemory->getShape().getStaticDims();
if (weiDims.size() != 2 || weiDims[0] % 64 != 0 || weiDims[1] % 64 != 0) {
return false;
return {false, 0};
}

const auto weightsType = weiMemory->getPrecision();
if (none_of(inputType, u8, i8) || weightsType != i8) {
return false;
return {false, 0};
}

const auto* const weightsData = weiMemory->getDataAs<const int8_t>();
Expand Down Expand Up @@ -558,13 +558,16 @@ static bool useSparseWeightsDecompression(const NodePtr& weightsInput,
"%, use sparse weights = ",
sparseRate >= minSparseRate);

return sparseRate >= minSparseRate;
return {sparseRate >= minSparseRate, elementsCount - zerosCount};
}

void FullyConnected::initSupportedPrimitiveDescriptors() {
attrs.sparseWeights = useSparseWeightsDecompression(getParentEdgeAt(WEIGHTS)->getParent(),
getOriginalInputPrecisionAtPort(DATA),
context->getConfig().fcSparseWeiDecompressionRate);
auto sparseAttr = useSparseWeightsDecompression(getParentEdgeAt(WEIGHTS)->getParent(),
getOriginalInputPrecisionAtPort(DATA),
context->getConfig().fcSparseWeiDecompressionRate);
attrs.sparseWeights = sparseAttr.first;
attrs.sparseWeightsNonZeroSize = sparseAttr.second;

attrs.dynamicQuantizationGroupSize = context->getConfig().fcDynamicQuantizationGroupSize;
attrs.modelType = context->getConfig().modelType;

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -493,7 +493,7 @@ void BrgemmKernel::execute_without_scale(bool is_M_tail, void* a, void* b, void*
ctx.current_M_blk = cur_M_blk;
ctx.zp_b_compensation_buffer_ptr = nullptr;
ctx.zp_a_compensation_result_ptr = nullptr;
ctx.zp_b_neg_value_ptr = nullptr;
ctx.zp_b_neg_val_ptr = nullptr;
ctx.zp_ab_comp_ptr = nullptr;
ctx.src = pCopyKernelIn;
ctx.tr_src = pCopyKernelOut;
Expand Down Expand Up @@ -554,7 +554,7 @@ void BrgemmKernel::callBrgemm(brgemmCtx& ctx,
}
if (doPostops) {
brgemm_post_ops_data_t post_ops_data;
post_ops_data.scales = bScale;
post_ops_data.wei_scales = bScale;
brgemm_batch_element_t addr_batch;
addr_batch.ptr.A = pin0;
addr_batch.ptr.B = pin1;
Expand Down Expand Up @@ -620,7 +620,7 @@ void BrgemmKernelQuantized::executeGemm(bool is_M_tail,
ctx.current_M_blk = cur_M_blk;
ctx.zp_b_compensation_buffer_ptr = nullptr;
ctx.zp_a_compensation_result_ptr = nullptr;
ctx.zp_b_neg_value_ptr = nullptr;
ctx.zp_b_neg_val_ptr = nullptr;
ctx.zp_ab_comp_ptr = nullptr;
ctx.src = pCopyKernelIn;
ctx.tr_src = pCopyKernelOut;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -390,6 +390,8 @@ inline RegistersPool::Ptr RegistersPool::create(dnnl::impl::cpu::x64::cpu_isa_t
case dnnl::impl::cpu::x64::amx_fp16:
case dnnl::impl::cpu::x64::avx512_core_amx_fp16:
case dnnl::impl::cpu::x64::isa_all:
case dnnl::impl::cpu::x64::avx10_2_512:
case dnnl::impl::cpu::x64::avx10_2_512_amx_2:
OPENVINO_THROW("Invalid isa argument in RegistersPool::create()");
}
OPENVINO_THROW("Invalid isa argument in RegistersPool::create()");
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -399,7 +399,7 @@ TEST(MakeUndefinedDnnlDesc, extraData) {
const auto& [fmt, dims] = item;
memory::desc origin(dims, dataType, fmt);

origin.get()->extra.flags = dnnl_memory_extra_flag_compensation_conv_s8s8;
origin.get()->extra.flags = dnnl::impl::dnnl_memory_extra_flag_compensation_conv_s8s8;
origin.get()->extra.compensation_mask = 1;
origin.get()->extra.scale_adjust = 2.0f;

Expand Down
2 changes: 1 addition & 1 deletion src/plugins/intel_cpu/thirdparty/onednn
Submodule onednn updated 2599 files
Loading