Skip to content

Commit c734ebe

Browse files
committed
BrgemmExternalRepackingAdjuster: fixed src offsets calculation
1 parent f3f5f6d commit c734ebe

File tree

3 files changed

+45
-12
lines changed

3 files changed

+45
-12
lines changed

src/plugins/intel_cpu/src/transformations/snippets/x64/pass/lowered/external_repacking_adjuster.cpp

Lines changed: 30 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -182,28 +182,48 @@ bool BrgemmExternalRepackingAdjuster::run(const snippets::lowered::LinearIR& lin
182182
}
183183

184184
const auto& config = static_cast<const BrgemmCopyBKernelConfig&>(executor->get_config());
185-
const auto desc = get_desc(planar_shape,
186-
prc,
187-
config.get_wei_K_blk(),
188-
config.get_wei_N_blk(),
189-
config.are_wei_blocked(),
190-
config.is_transposed_B());
185+
const auto dst_desc = get_desc(planar_shape,
186+
prc,
187+
config.get_wei_K_blk(),
188+
config.get_wei_N_blk(),
189+
config.are_wei_blocked(),
190+
config.is_transposed_B());
191+
192+
auto process_src_offsets = [&config](const ov::snippets::VectorDims& cpu_config_offsets) {
193+
const auto orig_size = dnnl_data_type_size(config.get_original_wei_dt());
194+
const auto wei_size = dnnl_data_type_size(config.get_wei_dt());
195+
if (orig_size == wei_size) {
196+
return cpu_config_offsets;
197+
}
198+
// Note: original cpu config offsets are calculated for the repacked output
199+
// If repacked out precision is not equal to original weights precision,
200+
// we need to correspondingly scale offsets to correct positions in original memory
201+
ov::snippets::VectorDims recalculated_offsets(cpu_config_offsets.size());
202+
std::transform(cpu_config_offsets.begin(),
203+
cpu_config_offsets.end(),
204+
recalculated_offsets.begin(),
205+
[orig_size, wei_size](size_t offset) {
206+
return offset / wei_size * orig_size;
207+
});
208+
return recalculated_offsets;
209+
};
191210

192211
// Save original input offsets for input before repacking.
193212
// If the shape has not been changed, it means that we already created `InputRepacker` for this input
194213
// on previous pass call and now `cpu_config->io_data_offsets[i]` contains offsets not for original input -
195214
// they were updated for blocked shapes/zeroed for previous initialization and we cannot use them as original
196215
// offsets.
197-
const auto in_offsets =
198-
shape == cpu_config->latest_shapes[i] ? input_repacker.in_offsets() : cpu_config->io_data_offsets[i];
216+
const auto in_offsets = shape == cpu_config->latest_shapes[i]
217+
? input_repacker.in_offsets()
218+
: process_src_offsets(cpu_config->io_data_offsets[i]);
199219

200220
// In parallel case Kernel should not add offsets to repacked inputs because
201221
// they will be applied during repacking in execution stage
202222
if (is_impl_parallel) {
203223
auto& offsets = cpu_config->io_data_offsets[i];
204224
std::fill(offsets.begin(), offsets.end(), 0);
205225
} else {
206-
const auto blocked_dims = desc->getBlockDims();
226+
const auto blocked_dims = dst_desc->getBlockDims();
207227
const auto inner_blocks_num = blocked_dims.size() - planar_shape.size();
208228
const auto rank = in_offsets.size() + inner_blocks_num; // to align with src offsets rank
209229
OPENVINO_ASSERT(rank >= blocked_dims.size(), "Incorrect target rank for dst offsets");
@@ -218,7 +238,7 @@ bool BrgemmExternalRepackingAdjuster::run(const snippets::lowered::LinearIR& lin
218238
}
219239
const auto out_offsets = cpu_config->io_data_offsets[i];
220240

221-
input_repacker = InputRepacker(p.second->get_kernel(), desc, in_offsets, out_offsets);
241+
input_repacker = InputRepacker(p.second->get_kernel(), dst_desc, in_offsets, out_offsets);
222242
}
223243

224244
return true;

src/plugins/intel_cpu/tests/functional/shared_tests_instances/snippets/matmul.cpp

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -81,6 +81,17 @@ INSTANTIATE_TEST_SUITE_P(smoke_Snippets_MatMul, MatMul,
8181
::testing::Values(CPUTestUtils::empty_plugin_config)),
8282
MatMul::getTestCaseName);
8383

84+
INSTANTIATE_TEST_SUITE_P(smoke_Snippets_MatMulEnforceBF16, MatMul,
85+
::testing::Combine(
86+
::testing::ValuesIn(input_shapes),
87+
::testing::ValuesIn(precision_f32(2)),
88+
::testing::Values(MatMulType::MatMul),
89+
::testing::Values(1), // MatMul
90+
::testing::Values(1), // Tokenized MatMul
91+
::testing::Values(ov::test::utils::DEVICE_CPU),
92+
::testing::Values(CPUTestUtils::cpu_bf16_plugin_config)),
93+
MatMul::getTestCaseName);
94+
8495
std::vector<std::vector<ov::test::InputShape>> transpose_b_shapes{
8596
{ {{}, {{3, 3, 64, 64}}}, {{}, {{3, 3, 64, 64}}} },
8697
{ {{}, {{1, 1, 32, 128}}}, {{}, {{1, 1, 64, 128}}} },

src/tests/functional/plugin/shared/src/snippets/matmul.cpp

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -55,8 +55,11 @@ void MatMulBase::generate_inputs(const std::vector<ov::Shape>& targetInputStatic
5555
const auto& model_input = model_inputs[i];
5656
ov::Tensor tensor;
5757
ov::test::utils::InputGenerateData in_data;
58+
const bool bf16_precision =
59+
configuration.at(ov::hint::inference_precision.name()).as<ov::element::Type>() == ov::element::bf16 ||
60+
model_input.get_element_type() == ov::element::bf16;
5861
// To avoid big relative errors in the vicinity of zero, only positive values are generated for bf16 precision
59-
in_data.start_from = model_input.get_element_type() == ov::element::bf16 ? 0 : -1;
62+
in_data.start_from = bf16_precision ? 0 : -1;
6063
in_data.range = 5;
6164
in_data.resolution = 256;
6265
tensor =
@@ -65,7 +68,6 @@ void MatMulBase::generate_inputs(const std::vector<ov::Shape>& targetInputStatic
6568
}
6669
}
6770

68-
6971
void MatMul::SetUp() {
7072
const auto& [input_shapes,
7173
elem_types,

0 commit comments

Comments
 (0)