Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 9 additions & 3 deletions test/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -234,6 +234,9 @@ ifeq ($(BUILD_BFLOAT16),1)
BF3= test_bgemm
B3 = test_sbgemm
endif
ifeq ($(BUILD_HFLOAT16),1)
H3 = test_shgemm
endif
ifeq ($(BUILD_SINGLE),1)
S3=sblat3
endif
Expand All @@ -257,9 +260,9 @@ endif


ifeq ($(SUPPORT_GEMM3M),1)
level3: $(BF3) $(B3) $(S3) $(D3) $(C3) $(Z3) level3_3m
level3: $(BF3) $(B3) $(H3) $(S3) $(D3) $(C3) $(Z3) level3_3m
else
level3: $(BF3) $(B3) $(S3) $(D3) $(C3) $(Z3)
level3: $(BF3) $(B3) $(H3) $(S3) $(D3) $(C3) $(Z3)
endif

ifneq ($(CROSS), 1)
Expand Down Expand Up @@ -454,6 +457,9 @@ test_sbgemv : compare_sgemv_sbgemv.c ../$(LIBNAME)
endif

ifeq ($(BUILD_HFLOAT16),1)
test_shgemm : compare_sgemm_shgemm.c test_helpers.h ../$(LIBNAME)
$(CC) $(CLDFLAGS) -DIHFLOAT16 -o test_shgemm compare_sgemm_shgemm.c ../$(LIBNAME) $(EXTRALIB) $(CEXTRALIB)

test_shgemv : compare_sgemv_shgemv.c ../$(LIBNAME)
$(CC) $(CLDFLAGS) -o test_shgemv compare_sgemv_shgemv.c ../$(LIBNAME) $(EXTRALIB) $(CEXTRALIB)
endif
Expand All @@ -475,7 +481,7 @@ clean:
@rm -f *.$(SUFFIX) *.$(PSUFFIX) gmon.$(SUFFIX)ut *.SUMM *.cxml *.exe *.pdb *.dwf \
sblat1 dblat1 cblat1 zblat1 \
sblat2 dblat2 cblat2 zblat2 \
test_bgemm test_bgemv test_sbgemm test_sbgemv test_shgemv sblat3 dblat3 cblat3 zblat3 \
test_bgemm test_bgemv test_sbgemm test_sbgemv test_shgemm test_shgemv sblat3 dblat3 cblat3 zblat3 \
sblat1p dblat1p cblat1p zblat1p \
sblat2p dblat2p cblat2p zblat2p \
sblat3p dblat3p cblat3p zblat3p \
Expand Down
147 changes: 147 additions & 0 deletions test/compare_sgemm_shgemm.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,147 @@
/***************************************************************************
Copyright (c) 2020,2025 The OpenBLAS Project
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
1. Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in
the documentation and/or other materials provided with the
distribution.
3. Neither the name of the OpenBLAS project nor the names of
its contributors may be used to endorse or promote products
derived from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************/
#include <stdio.h>
#include <stdint.h>
#include "../common.h"

#include "test_helpers.h"

#define SGEMM BLASFUNC(sgemm)
#define SHGEMM BLASFUNC(shgemm)
#define SGEMV BLASFUNC(sgemv)
#define SHGEMV BLASFUNC(shgemv)
Comment on lines +35 to +36
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

These shouldn't be necessary as shgemv is tested separately?

#define SHGEMM_LARGEST 256

int
main (int argc, char *argv[])
{
blasint m, n, k;
int i, j, l;
blasint x, y;
int ret = 0;
int loop = SHGEMM_LARGEST;
char transA = 'N', transB = 'N';
float alpha = 1.0, beta = 0.0;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Given #5485, is it worth setting beta to 1.0 here, or testing both variations? Similarly with alpha, as 1.0 and 0.0 can have faster paths but in-between values usually don't.

I haven't got a good answer to this in the C tests, but we could try adding some Python-based tests with hypothesis or Rust tests with proptest.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Worth noting we also don't test anything but square, and it'd be nice to sample in the various dimensions - though we could maybe just randomly sample to start with?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Agree on the variations of alpha and beta, but we need to keep in mind that these test are run on each and every build, so time will become a problem sooner or later, and I think non-trivial dependencies like python or rust are completely out of the question for a default build.
Perhaps it would make sense to put those in a completely separate project similar to BLAS-Tester

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

we need to keep in mind that these test are run on each and every build

Yeah, that's the reason I moved to a more randomly sampled approach, as it'll likely hit the relevant cases and shrink to the specific problem case quite quickly in such a small space. Otherwise, yes, the amount of permutations becomes a bit tricky to manage 😿

I think non-trivial dependencies like python or rust are completely out of the question for a default build.

This is where I got to, I haven't found something that fits just yet.

Perhaps it would make sense to put those in a completely separate project similar to BLAS-Tester

Hmm, does that cause an issue that they're not run on the default build? And does it discourage people from adding tests for the default build?

It would be fun though, we could also potentially combine it with the benchmarking repo?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, something like that that runs as CI but does not (necessarily) run as part of a normal build process. I'm not even totally averse to having something like a pytest subdirectory as long as it doesn't become mandatory for user builds and doesn't take up too much space.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

But short-term I'd like to get the code stable enough to do another release, and perhaps focus on my other work for a while so that I do not run myself out of business.


for (x = 0; x <= loop; x++)
{
if ((x > 100) && (x != SHGEMM_LARGEST)) continue;
m = k = n = x;
float *A = (float *)malloc_safe(m * k * sizeof(FLOAT));
float *B = (float *)malloc_safe(k * n * sizeof(FLOAT));
float *C = (float *)malloc_safe(m * n * sizeof(FLOAT));
hfloat16 *AA = (hfloat16 *)malloc_safe(m * k * sizeof(hfloat16));
hfloat16 *BB = (hfloat16 *)malloc_safe(k * n * sizeof(hfloat16));
float *DD = (float *)malloc_safe(m * n * sizeof(FLOAT));
float *CC = (float *)malloc_safe(m * n * sizeof(FLOAT));
if ((A == NULL) || (B == NULL) || (C == NULL) || (AA == NULL) || (BB == NULL) ||
(DD == NULL) || (CC == NULL))
return 1;

for (j = 0; j < m; j++)
{
for (i = 0; i < k; i++)
{
A[j * k + i] = ((FLOAT) rand () / (FLOAT) RAND_MAX) + 0.5;
AA[j * k + i] = (hfloat16) A[j * k + i];
}
}
for (j = 0; j < n; j++)
{
for (i = 0; i < k; i++)
{
B[j * k + i] = ((FLOAT) rand () / (FLOAT) RAND_MAX) + 0.5;
BB[j * k + i] = (hfloat16) A[j * k + i];
}
}
for (y = 0; y < 4; y++)
{
if ((y == 0) || (y == 2)) {
transA = 'N';
} else {
transA = 'T';
}
if ((y == 0) || (y == 1)) {
transB = 'N';
} else {
transB = 'T';
}

memset(CC, 0, m * n * sizeof(FLOAT));
memset(DD, 0, m * n * sizeof(FLOAT));
memset(C, 0, m * n * sizeof(FLOAT));

SGEMM (&transA, &transB, &m, &n, &k, &alpha, A,
&m, B, &k, &beta, C, &m);
SHGEMM (&transA, &transB, &m, &n, &k, &alpha, AA,
&m, BB, &k, &beta, CC, &m);

for (i = 0; i < n; i++)
for (j = 0; j < m; j++)
{
for (l = 0; l < k; l++)
if (transA == 'N' && transB == 'N')
{
DD[i * m + j] +=
(float) AA[l * m + j] * (float)BB[l + k * i];
} else if (transA == 'T' && transB == 'N')
{
DD[i * m + j] +=
(float)AA[k * j + l] * (float)BB[l + k * i];
} else if (transA == 'N' && transB == 'T')
{
DD[i * m + j] +=
(float)AA[l * m + j] * (float)BB[i + l * n];
} else if (transA == 'T' && transB == 'T')
{
DD[i * m + j] +=
(float)AA[k * j + l] * (float)BB[i + l * n];
}
if (!is_close(CC[i * m + j], C[i * m + j], 0.01, 0.001)) {
ret++;
}
if (!is_close(CC[i * m + j], DD[i * m + j], 0.001, 0.0001)) {
ret++;
}
}
}
free(A);
free(B);
free(C);
free(AA);
free(BB);
free(DD);
free(CC);
}

if (ret != 0) {
fprintf(stderr, "SHGEMM FAILURES: %d\n", ret);
return 1;
}

return ret;
}
Loading