Skip to content

Commit 04915be

Browse files
committed
Add vector registers to clobber list to prevent compiler optimization.
SME based SGEMMDIRECT kernel uses the vector registers (z) and adding clobber list informs compiler not to optimize these registers.
1 parent f33943d commit 04915be

File tree

1 file changed

+18
-3
lines changed

1 file changed

+18
-3
lines changed

kernel/arm64/sgemm_direct_arm64_sme1.c

Lines changed: 18 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,6 @@
77
#include <stdlib.h>
88
#include <inttypes.h>
99
#include <math.h>
10-
1110
#if defined(HAVE_SME)
1211

1312
/* Function prototypes */
@@ -44,15 +43,31 @@ void CNAME (BLASLONG M, BLASLONG N, BLASLONG K, float * __restrict A,\
4443
m_mod = ceil((double)M/(double)vl_elms) * vl_elms;
4544

4645
float *A_mod = (float *) malloc(m_mod*K*sizeof(float));
47-
46+
47+
/* Prevent compiler optimization by reading from memory instead
48+
* of reading directly from vector (z) registers.
49+
* */
50+
asm volatile("" : : :"p0", "p1", "p2", "p3", "p4", "p5", "p6", "p7",
51+
"p8", "p9", "p10", "p11", "p12", "p13", "p14", "p15",
52+
"z0", "z1", "z2", "z3", "z4", "z5", "z6", "z7",
53+
"z8", "z9", "z10", "z11", "z12", "z13", "z14", "z15",
54+
"z16", "z17", "z18", "z19", "z20", "z21", "z22", "z23",
55+
"z24", "z25", "z26", "z27", "z28", "z29", "z30", "z31");
56+
4857
/* Pre-process the left matrix to make it suitable for
4958
matrix sum of outer-product calculation
5059
*/
5160
sgemm_direct_sme1_preprocess(M, K, A, A_mod);
5261

5362
/* Calculate C = A*B */
5463
sgemm_direct_sme1_2VLx2VL(M, K, N, A_mod, B, R);
55-
64+
65+
asm volatile("" : : :"p0", "p1", "p2", "p3", "p4", "p5", "p6", "p7",
66+
"p8", "p9", "p10", "p11", "p12", "p13", "p14", "p15",
67+
"z0", "z1", "z2", "z3", "z4", "z5", "z6", "z7",
68+
"z8", "z9", "z10", "z11", "z12", "z13", "z14", "z15",
69+
"z16", "z17", "z18", "z19", "z20", "z21", "z22", "z23",
70+
"z24", "z25", "z26", "z27", "z28", "z29", "z30", "z31");
5671
free(A_mod);
5772
}
5873

0 commit comments

Comments
 (0)