@@ -182,28 +182,48 @@ bool BrgemmExternalRepackingAdjuster::run(const snippets::lowered::LinearIR& lin
182182 }
183183
184184 const auto & config = static_cast <const BrgemmCopyBKernelConfig&>(executor->get_config ());
185- const auto desc = get_desc (planar_shape,
186- prc,
187- config.get_wei_K_blk (),
188- config.get_wei_N_blk (),
189- config.are_wei_blocked (),
190- config.is_transposed_B ());
185+ const auto dst_desc = get_desc (planar_shape,
186+ prc,
187+ config.get_wei_K_blk (),
188+ config.get_wei_N_blk (),
189+ config.are_wei_blocked (),
190+ config.is_transposed_B ());
191+
192+ auto process_src_offsets = [&config](const ov::snippets::VectorDims& cpu_config_offsets) {
193+ const auto orig_size = dnnl_data_type_size (config.get_original_wei_dt ());
194+ const auto wei_size = dnnl_data_type_size (config.get_wei_dt ());
195+ if (orig_size == wei_size) {
196+ return cpu_config_offsets;
197+ }
198+ // Note: original cpu config offsets are calculated for the repacked output
199+ // If repacked out precision is not equal to original weights precision,
200+ // we need to correspondingly scale offsets to correct positions in original memory
201+ ov::snippets::VectorDims recalculated_offsets (cpu_config_offsets.size ());
202+ std::transform (cpu_config_offsets.begin (),
203+ cpu_config_offsets.end (),
204+ recalculated_offsets.begin (),
205+ [orig_size, wei_size](size_t offset) {
206+ return offset / wei_size * orig_size;
207+ });
208+ return recalculated_offsets;
209+ };
191210
192211 // Save original input offsets for input before repacking.
193212 // If the shape has not been changed, it means that we already created `InputRepacker` for this input
194213 // on previous pass call and now `cpu_config->io_data_offsets[i]` contains offsets not for original input -
195214 // they were updated for blocked shapes/zeroed for previous initialization and we cannot use them as original
196215 // offsets.
197- const auto in_offsets =
198- shape == cpu_config->latest_shapes [i] ? input_repacker.in_offsets () : cpu_config->io_data_offsets [i];
216+ const auto in_offsets = shape == cpu_config->latest_shapes [i]
217+ ? input_repacker.in_offsets ()
218+ : process_src_offsets (cpu_config->io_data_offsets [i]);
199219
200220 // In parallel case Kernel should not add offsets to repacked inputs because
201221 // they will be applied during repacking in execution stage
202222 if (is_impl_parallel) {
203223 auto & offsets = cpu_config->io_data_offsets [i];
204224 std::fill (offsets.begin (), offsets.end (), 0 );
205225 } else {
206- const auto blocked_dims = desc ->getBlockDims ();
226+ const auto blocked_dims = dst_desc ->getBlockDims ();
207227 const auto inner_blocks_num = blocked_dims.size () - planar_shape.size ();
208228 const auto rank = in_offsets.size () + inner_blocks_num; // to align with src offsets rank
209229 OPENVINO_ASSERT (rank >= blocked_dims.size (), " Incorrect target rank for dst offsets" );
@@ -218,7 +238,7 @@ bool BrgemmExternalRepackingAdjuster::run(const snippets::lowered::LinearIR& lin
218238 }
219239 const auto out_offsets = cpu_config->io_data_offsets [i];
220240
221- input_repacker = InputRepacker (p.second ->get_kernel (), desc , in_offsets, out_offsets);
241+ input_repacker = InputRepacker (p.second ->get_kernel (), dst_desc , in_offsets, out_offsets);
222242 }
223243
224244 return true ;
0 commit comments