From 2e0294d8869fccc6edb01db1c5cdb08925601fd8 Mon Sep 17 00:00:00 2001 From: Adam Siemieniuk Date: Mon, 13 Jan 2025 15:11:43 +0100 Subject: [PATCH] WIP update vnni matchers --- test/BF16/Integration/tpp-run-splat-shape.mlir | 2 +- test/BF16/brgemm-tpp.mlir | 8 ++++---- test/BF16/brgemm-vnni.mlir | 16 ++++++++-------- test/BF16/matmul-untiled-vnni.mlir | 2 +- test/BF16/matmul-vnni.mlir | 16 ++++++++-------- 5 files changed, 22 insertions(+), 22 deletions(-) diff --git a/test/BF16/Integration/tpp-run-splat-shape.mlir b/test/BF16/Integration/tpp-run-splat-shape.mlir index 935586599..4a865ab09 100644 --- a/test/BF16/Integration/tpp-run-splat-shape.mlir +++ b/test/BF16/Integration/tpp-run-splat-shape.mlir @@ -41,7 +41,7 @@ func.func @entry(%arg0: tensor<4x8x8x8xbf16>, %output: tensor<4x8x8x8xbf16>) -> // due to compile time packing. // CHECK-NOT: memref.global "private" constant @__constant_{{.*}}: memref<8x8xbf16> // CHECK-DAG: memref.global "private" constant @__constant_{{.*}}: memref<4x8x8x8xbf16> -// CHECK-DAG: memref.global "private" constant @__constant_{{.*}}: memref<8x8x4x8x{{[2|4|8]}}xbf16> +// CHECK-DAG: memref.global "private" constant @__constant_{{.*}}: memref<8x8x{{[4|2]}}x8x{{2|4}}xbf16> // CHECK: xsmm_brgemm_invoke // CHECK: xsmm_binary_invoke // CHECK: xsmm_unary_invoke diff --git a/test/BF16/brgemm-tpp.mlir b/test/BF16/brgemm-tpp.mlir index 78caff1ed..08408e57b 100644 --- a/test/BF16/brgemm-tpp.mlir +++ b/test/BF16/brgemm-tpp.mlir @@ -14,10 +14,10 @@ func.func @brgemm(%arg0: tensor<32x4x4xbf16>, %arg1: tensor<32x4x4xbf16>, // CHECK-LABEL: brgemm // CHECK-SAME: %[[ARG0:.+]]: tensor<32x4x4xbf16>, %[[ARG1:.+]]: tensor<32x4x4xbf16>, // CHECK-SAME: %[[ARG2:.+]]: tensor<4x4xbf16> -// CHECK: %[[VNNI_A:.+]] = tensor.expand_shape %[[ARG0]] {{\[}}[0], [1], [2, 3]] output_shape [32, 4, 2, 2] : tensor<32x4x4xbf16> into tensor<32x4x2x2xbf16> -// CHECK: %[[EMPTY:.+]] = tensor.empty() : tensor<32x2x4x2xbf16> -// CHECK: %[[PACK:.+]] = tensor.pack %[[ARG1]] inner_dims_pos = [1] inner_tiles = [2] -// CHECK-SAME: into %[[EMPTY]] : tensor<32x4x4xbf16> -> tensor<32x2x4x2xbf16> +// CHECK: %[[VNNI_A:.+]] = tensor.expand_shape %[[ARG0]] {{\[}}[0], [1], [2, 3]] output_shape{{.*}}: tensor<32x4x4xbf16> into tensor<32x4x{{2|1}}x{{2|4}}xbf16> +// CHECK: %[[EMPTY:.+]] = tensor.empty() : tensor<32x2x4x{{2|4}}xbf16> +// CHECK: %[[PACK:.+]] = tensor.pack %[[ARG1]] inner_dims_pos = [1] inner_tiles = [{{2|4}}] +// CHECK-SAME: into %[[EMPTY]] : tensor<32x4x4xbf16> -> tensor<32x{{2|1}}x4x{{2|4}}xbf16> // CHECK: %{{.+}} = linalg.generic // CHECK-SAME: indexing_maps = [#[[MAP]], #[[MAP1]], #[[MAP2]]] // CHECK-SAME: iterator_types = ["reduction", "parallel", "parallel", "reduction", "reduction"] diff --git a/test/BF16/brgemm-vnni.mlir b/test/BF16/brgemm-vnni.mlir index aa6d069d2..5970ebec4 100644 --- a/test/BF16/brgemm-vnni.mlir +++ b/test/BF16/brgemm-vnni.mlir @@ -14,11 +14,11 @@ func.func @brgemm(%arg0: tensor<32x4x4xbf16>, %arg1: tensor<32x4x4xbf16>, // CHECK-LABEL: brgemm // CHECK-SAME: %[[ARG0:.+]]: tensor<32x4x4xbf16>, %[[ARG1:.+]]: tensor<32x4x4xbf16>, // CHECK-SAME: %[[ARG2:.+]]: tensor<4x4xbf16> -// CHECK: %[[VNNI_A:.+]] = tensor.expand_shape %[[ARG0]] {{\[}}[0], [1], [2, 3]] output_shape [32, 4, 2, 2] : tensor<32x4x4xbf16> into tensor<32x4x2x2xbf16> -// CHECK: %[[EMPTY:.+]] = tensor.empty() : tensor<32x2x4x2xbf16> +// CHECK: %[[VNNI_A:.+]] = tensor.expand_shape %[[ARG0]] {{\[}}[0], [1], [2, 3]] output_shape{{.*}}: tensor<32x4x4xbf16> into tensor<32x4x{{2|1}}x{{2|4}}xbf16> +// CHECK: %[[EMPTY:.+]] = tensor.empty() : tensor<32x{{2|1}}x4x{{2|4}}xbf16> // CHECK: %[[PACK:.+]] = tensor.pack %[[ARG1]] -// CHECK-SAME: inner_dims_pos = [1] inner_tiles = [2] into %[[EMPTY]] -// CHECK-SAME: : tensor<32x4x4xbf16> -> tensor<32x2x4x2xbf16> +// CHECK-SAME: inner_dims_pos = [1] inner_tiles = [{{2|4}}] into %[[EMPTY]] +// CHECK-SAME: : tensor<32x4x4xbf16> -> tensor<32x{{2|1}}x4x{{2|4}}xbf16> // CHECK: linalg.generic // CHECK-SAME: indexing_maps = [#[[MAP]], #[[MAP1]], #[[MAP2]]] // CHECK-SAME: iterator_types = ["reduction", "parallel", "parallel", "reduction", "reduction"] @@ -69,10 +69,10 @@ func.func @prepacked_matmul(%pack: tensor<4x4x32x32xbf16>, %pack_0: tensor<4x4x3 // CHECK-SAME: %[[ARG0:.+]]: tensor<4x4x32x32xbf16>, %[[ARG1:.+]]: tensor<4x4x32x32xbf16>, // CHECK-SAME: %[[ARG2:.+]]: tensor<4x4x32x32xbf16> // CHECK: %[[VNNI_A:.+]] = tensor.expand_shape %[[ARG0]] {{\[}}[0], [1], [2], [3, 4]] -// CHECK-SAME: output_shape [4, 4, 32, 16, 2] : tensor<4x4x32x32xbf16> into tensor<4x4x32x16x2xbf16> -// CHECK: %[[EMPTY:.+]] = tensor.empty() : tensor<4x4x16x32x2xbf16> -// CHECK: %[[PACK:.+]] = tensor.pack %[[ARG1]] inner_dims_pos = [2] inner_tiles = [2] into %[[EMPTY]] -// CHECK-SAME: : tensor<4x4x32x32xbf16> -> tensor<4x4x16x32x2xbf16> +// CHECK-SAME: output_shape{{.*}}: tensor<4x4x32x32xbf16> into tensor<4x4x32x{{16|8}}x{{2|4}}xbf16> +// CHECK: %[[EMPTY:.+]] = tensor.empty() : tensor<4x4x{{16|8}}x32x{{2|4}}xbf16> +// CHECK: %[[PACK:.+]] = tensor.pack %[[ARG1]] inner_dims_pos = [2] inner_tiles = [{{2|4}}] into %[[EMPTY]] +// CHECK-SAME: : tensor<4x4x32x32xbf16> -> tensor<4x4x{{16|8}}x32x{{2|4}}xbf16> // CHECK: {{.+}} = linalg.generic // CHECK-SAME: indexing_maps = [#[[MAP]], #[[MAP1]], #[[MAP2]]] // CHECK-SAME: iterator_types = ["parallel", "parallel", "reduction", "parallel", "parallel", "reduction", "reduction"] diff --git a/test/BF16/matmul-untiled-vnni.mlir b/test/BF16/matmul-untiled-vnni.mlir index 2609ca90f..7a47d9b07 100644 --- a/test/BF16/matmul-untiled-vnni.mlir +++ b/test/BF16/matmul-untiled-vnni.mlir @@ -26,7 +26,7 @@ func.func @blocked_matmul(%arg0: tensor<32x64x4x4xbf16>, %arg1: tensor<128x64x4x // CHECK: %[[ARG0:.*]]: tensor<32x64x4x4xbf16>, // CHECK: %[[ARG1:.*]]: tensor<128x64x4x4xbf16>, // CHECK: %[[ARG2:.*]]: tensor<32x128x4x4xbf16>) -> tensor<32x128x4x4xbf16> { -// CHECK: %[[PACKBUF:.*]] = tensor.empty() : tensor<128x64x2x4x2xbf16> +// CHECK: %[[PACKBUF:.*]] = tensor.empty() : tensor<128x64x{{2|1}}x4x{{2|4}}xbf16> // CHECK: linalg.generic // CHECK: indexing_maps = [#[[MAP]], #[[MAP1]], #[[MAP2]]] // CHECK: iterator_types = ["parallel", "parallel", "reduction", "parallel", "parallel", "reduction", "reduction"] diff --git a/test/BF16/matmul-vnni.mlir b/test/BF16/matmul-vnni.mlir index 2d4a5ffda..24e83a8b3 100644 --- a/test/BF16/matmul-vnni.mlir +++ b/test/BF16/matmul-vnni.mlir @@ -25,17 +25,17 @@ func.func @matmul_static( // CHECK: %[[PACK_0:.+]] = tensor.pack %[[ARG1]] outer_dims_perm = [1, 0] inner_dims_pos = [0, 1] inner_tiles = [32, 32] // CHECK-SAME: into %{{.+}} : tensor<512x1024xbf16> -> tensor<32x16x32x32xbf16> // CHECK: %[[VNNI_A:.+]] = tensor.expand_shape %[[PACK]] {{\[}}[0], [1], [2], [3, 4]] -// CHECK-SAME: output_shape [8, 16, 32, 16, 2] : tensor<8x16x32x32xbf16> into tensor<8x16x32x16x2xbf16> -// CHECK: %[[EMPTY_2:.+]] = tensor.empty() : tensor<32x16x16x32x2xbf16> -// CHECK: %[[PACK_1:.+]] = tensor.pack %[[PACK_0]] inner_dims_pos = [2] inner_tiles = [2] into %[[EMPTY_2]] -// CHECK-SAME: : tensor<32x16x32x32xbf16> -> tensor<32x16x16x32x2xbf16> +// CHECK-SAME: output_shape{{.*}}: tensor<8x16x32x32xbf16> into tensor<8x16x32x{{16|8}}x{{2|4}}xbf16> +// CHECK: %[[EMPTY_2:.+]] = tensor.empty() : tensor<32x16x{{16|8}}x32x{{2|4}}xbf16> +// CHECK: %[[PACK_1:.+]] = tensor.pack %[[PACK_0]] inner_dims_pos = [2] inner_tiles = [{{2|4}}] into %[[EMPTY_2]] +// CHECK-SAME: : tensor<32x16x32x32xbf16> -> tensor<32x16x{{16|8}}x32x{{2|4}}xbf16> // CHECK: %{{.+}} = scf.forall (%[[ARG3:.+]], %[[ARG4:.+]]) in (8, 32) shared_outs(%[[ARG5:.+]] = %[[ARG2]]) // CHECK: %[[APPLY:.+]] = affine.apply #[[MAP]](%[[ARG3]]) // CHECK: %[[APPLY_1:.+]] = affine.apply #[[MAP]](%[[ARG4]]) -// CHECK: %[[SLICE:.+]] = tensor.extract_slice %[[VNNI_A]][%[[ARG3]], 0, 0, 0, 0] [1, 16, 32, 16, 2] [1, 1, 1, 1, 1] -// CHECK-SAME: : tensor<8x16x32x16x2xbf16> to tensor<16x32x16x2xbf16> -// CHECK: %[[SLICE_2:.+]] = tensor.extract_slice %[[PACK_1]][%[[ARG4]], 0, 0, 0, 0] [1, 16, 16, 32, 2] [1, 1, 1, 1, 1] -// CHECK-SAME: : tensor<32x16x16x32x2xbf16> to tensor<16x16x32x2xbf16> +// CHECK: %[[SLICE:.+]] = tensor.extract_slice %[[VNNI_A]][%[[ARG3]], 0, 0, 0, 0] [1, 16, 32, {{16|8}}, {{2|4}}] [1, 1, 1, 1, 1] +// CHECK-SAME: : tensor<8x16x32x{{16|8}}x{{2|4}}xbf16> to tensor<16x32x{{16|8}}x{{2|4}}xbf16> +// CHECK: %[[SLICE_2:.+]] = tensor.extract_slice %[[PACK_1]][%[[ARG4]], 0, 0, 0, 0] [1, 16, {{16|8}}, 32, {{2|4}}] [1, 1, 1, 1, 1] +// CHECK-SAME: : tensor<32x16x{{16|8}}x32x{{2|4}}xbf16> to tensor<16x{{16|8}}x32x{{2|4}}xbf16> // CHECK: %[[SLICE_3:.+]] = tensor.extract_slice %[[ARG5]][%[[APPLY]], %[[APPLY_1]]] [32, 32] [1, 1] // CHECK-SAME: : tensor<256x1024xbf16> to tensor<32x32xbf16> // CHECK: %[[GEMM:.+]] = linalg.generic