Skip to content

Commit

Permalink
Merge pull request #71 from amcamd/add_gemm_ex
Browse files Browse the repository at this point in the history
add hipblasGemmEx
  • Loading branch information
amcamd authored Sep 12, 2018
2 parents d5bb637 + 4d105e9 commit 7e8045b
Show file tree
Hide file tree
Showing 8 changed files with 1,161 additions and 13 deletions.
66 changes: 66 additions & 0 deletions clients/common/cblas_interface.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,11 @@


#include <typeinfo>
#include <memory>
#include "hipblas.h"
#include "cblas_interface.h"
#include "cblas.h"
#include "utility.h"

/*!\file
* \brief provide template functions interfaces to CBLAS C89 interfaces, it is only used for testing not part of the GPU library
Expand Down Expand Up @@ -385,6 +387,70 @@ extern "C" {

//gemm

template <>
void cblas_gemm<hipblasHalf>(hipblasOperation_t transA,
hipblasOperation_t transB,
int m,
int n,
int k,
hipblasHalf alpha,
hipblasHalf* A,
int lda,
hipblasHalf* B,
int ldb,
hipblasHalf beta,
hipblasHalf* C,
int ldc)
{
// cblas does not support hipblasHalf, so convert to higher precision float
// This will give more precise result which is acceptable for testing
float alpha_float = half_to_float(alpha);
float beta_float = half_to_float(beta);

int sizeA = transA == HIPBLAS_OP_N ? k * lda : m * lda;
int sizeB = transB == HIPBLAS_OP_N ? n * ldb : k * ldb;
int sizeC = n * ldc;

std::unique_ptr<float[]> A_float(new float[sizeA]());
std::unique_ptr<float[]> B_float(new float[sizeB]());
std::unique_ptr<float[]> C_float(new float[sizeC]());

for(int i = 0; i < sizeA; i++)
{
A_float[i] = half_to_float(A[i]);
}
for(int i = 0; i < sizeB; i++)
{
B_float[i] = half_to_float(B[i]);
}
for(int i = 0; i < sizeC; i++)
{
C_float[i] = half_to_float(C[i]);
}

// just directly cast, since transA, transB are integers in the enum
// printf("transA: rocblas =%d, cblas=%d\n", transA, (CBLAS_TRANSPOSE)transA );
cblas_sgemm(CblasColMajor,
(CBLAS_TRANSPOSE)transA,
(CBLAS_TRANSPOSE)transB,
m,
n,
k,
alpha_float,
const_cast<const float*>(A_float.get()),
lda,
const_cast<const float*>(B_float.get()),
ldb,
beta_float,
static_cast<float*>(C_float.get()),
ldc);

for(int i = 0; i < sizeC; i++)
{
C[i] = float_to_half(C_float[i]);
}
}

template<>
void cblas_gemm<float>( hipblasOperation_t transA, hipblasOperation_t transB,
int m, int n, int k,
Expand Down
20 changes: 20 additions & 0 deletions clients/common/unit.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@

#include "hipblas.h"
#include "unit.h"
#include "utility.h"

/* ========================================Gtest Unit Check ==================================================== */

Expand All @@ -13,6 +14,25 @@
//Do not put a wrapper over ASSERT_FLOAT_EQ, sincer assert exit the current function NOT the test case
// a wrapper will cause the loop keep going

template <>
void unit_check_general(
int M, int N, int lda, hipblasHalf* hCPU, hipblasHalf* hGPU)
{
#pragma unroll
for(int j = 0; j < N; j++)
{
#pragma unroll
for(int i = 0; i < M; i++)
{
#ifdef GOOGLE_TEST
float cpu_float = half_to_float(hCPU[i + j * lda]);
float gpu_float = half_to_float(hGPU[i + j * lda]);
ASSERT_FLOAT_EQ(cpu_float, gpu_float);
#endif
}
}
}



template<>
Expand Down
10 changes: 9 additions & 1 deletion clients/gtest/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@ set(hipblas_test_source
gemv_gtest.cpp
ger_gtest.cpp
gemm_gtest.cpp
gemm_ex_gtest.cpp
gemm_strided_batched_gtest.cpp
gemm_batched_gtest.cpp
geam_gtest.cpp
Expand Down Expand Up @@ -92,11 +93,18 @@ if( NOT CUDA_FOUND )
get_target_property( HCC_AM_LOCATION hcc::hc_am IMPORTED_LOCATION_RELEASE )
target_link_libraries( hipblas-test PRIVATE ${HIP_HCC_LOCATION} ${HCC_AM_LOCATION} )


if( CMAKE_CXX_COMPILER MATCHES ".*/hcc$|.*/hipcc$" )
# Remove following when hcc is fixed; hcc emits following spurious warning
# "clang-5.0: warning: argument unused during compilation: '-isystem /opt/rocm/include'"
target_compile_options( hipblas-test PRIVATE -Wno-unused-command-line-argument )
target_compile_options( hipblas-test PRIVATE -Wno-unused-command-line-argument -mf16c)

elseif( CMAKE_COMPILER_IS_GNUCXX )
# GCC needs specific flag to turn on f16c intrinsics
target_compile_options( hipblas-test PRIVATE -mf16c )

endif( )

if( CMAKE_CXX_COMPILER MATCHES ".*/hcc$|.*/hipcc$" )
# hip-clang needs specific flag to turn on pthread and m
target_link_libraries( hipblas-test PRIVATE -lpthread -lm )
Expand Down
Loading

0 comments on commit 7e8045b

Please sign in to comment.