Skip to content

Commit 8749677

Browse files
authored
Merge pull request #39 from GPUEngineering/f/min-and-max
Min/max methods
2 parents 39d3524 + fb831fb commit 8749677

File tree

4 files changed

+100
-6
lines changed

4 files changed

+100
-6
lines changed

.github/workflows/ci.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@ jobs:
1010
runs-on: ${{ matrix.runner }}
1111
strategy:
1212
matrix:
13-
runner: [orin, a40]
13+
runner: [a40, orin]
1414
steps:
1515
- name: checkout code
1616
uses: actions/checkout@v4

CHANGELOG.md

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,15 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/),
66
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
77

88

9+
<!-- ---------------------
10+
v1.1.0
11+
--------------------- -->
12+
## v1.1.0 - 03-08-2024
13+
14+
### Added
15+
16+
- Implementation and test of methods `.maxAbs()` and `.minAbs()` for any tensor.
17+
918
<!-- ---------------------
1019
v1.0.0
1120
--------------------- -->
@@ -21,7 +30,6 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
2130
- Using a function `numBlocks` instead of the macro `DIM2BLOCKS`
2231
- Using `TEMPLATE_WITH_TYPE_T` and `TEMPLATE_CONSTRAINT_REQUIRES_FPX` for the code to run on both C++17 and C++20
2332

24-
2533
<!-- ---------------------
2634
v0.1.0
2735
--------------------- -->

include/tensor.cuh

Lines changed: 55 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -377,6 +377,19 @@ public:
377377
*/
378378
T sumAbs() const;
379379

380+
/**
381+
* Maximum of absolute of all elements.
382+
* Equivalent to inf-norm, max(|x_i|) for all i.
383+
* @return max of absolute as same data type
384+
*/
385+
T maxAbs() const;
386+
387+
/**
388+
* Minimum of absolute of all elements, min(|x_i|) for all i.
389+
* @return min of absolute as same data type
390+
*/
391+
T minAbs() const;
392+
380393
/**
381394
* Solves for the least squares solution of A \ b.
382395
* A is this tensor and b is the provided tensor.
@@ -405,7 +418,7 @@ public:
405418

406419
DTensor &operator=(const DTensor &other);
407420

408-
T operator()(size_t i, size_t j = 0, size_t k = 0);
421+
T operator()(size_t i, size_t j = 0, size_t k = 0) const;
409422

410423
DTensor &operator*=(T scalar);
411424

@@ -605,7 +618,6 @@ inline float DTensor<float>::normF() const {
605618
return the_norm;
606619
}
607620

608-
609621
template<>
610622
inline float DTensor<float>::sumAbs() const {
611623
float sumAbsAllElements;
@@ -622,6 +634,46 @@ inline double DTensor<double>::sumAbs() const {
622634
return sumAbsAllElements;
623635
}
624636

637+
template<>
638+
inline float DTensor<float>::maxAbs() const {
639+
int idx;
640+
float hostDst;
641+
gpuErrChk(cublasIsamax(Session::getInstance().cuBlasHandle(), m_numRows * m_numCols * m_numMats, m_d_data, 1,
642+
&idx));
643+
gpuErrChk(cudaMemcpy(&hostDst, m_d_data + idx - 1, sizeof(float), cudaMemcpyDeviceToHost));
644+
return std::signbit(hostDst) ? -hostDst : hostDst;
645+
}
646+
647+
template<>
648+
inline double DTensor<double>::maxAbs() const {
649+
int idx;
650+
double hostDst;
651+
gpuErrChk(cublasIdamax(Session::getInstance().cuBlasHandle(), m_numRows * m_numCols * m_numMats, m_d_data, 1,
652+
&idx));
653+
gpuErrChk(cudaMemcpy(&hostDst, m_d_data + idx - 1, sizeof(double), cudaMemcpyDeviceToHost));
654+
return std::signbit(hostDst) ? -hostDst : hostDst;
655+
}
656+
657+
template<>
658+
inline float DTensor<float>::minAbs() const {
659+
int idx;
660+
float hostDst;
661+
gpuErrChk(cublasIsamin(Session::getInstance().cuBlasHandle(), m_numRows * m_numCols * m_numMats, m_d_data, 1,
662+
&idx));
663+
gpuErrChk(cudaMemcpy(&hostDst, m_d_data + idx - 1, sizeof(float), cudaMemcpyDeviceToHost));
664+
return std::signbit(hostDst) ? -hostDst : hostDst;
665+
}
666+
667+
template<>
668+
inline double DTensor<double>::minAbs() const {
669+
int idx;
670+
double hostDst;
671+
gpuErrChk(cublasIdamin(Session::getInstance().cuBlasHandle(), m_numRows * m_numCols * m_numMats, m_d_data, 1,
672+
&idx));
673+
gpuErrChk(cudaMemcpy(&hostDst, m_d_data + idx - 1, sizeof(double), cudaMemcpyDeviceToHost));
674+
return std::signbit(hostDst) ? -hostDst : hostDst;
675+
}
676+
625677
template<typename T>
626678
inline bool DTensor<T>::allocateOnDevice(size_t size, bool zero) {
627679
if (size <= 0) return false;
@@ -772,7 +824,7 @@ inline DTensor<double> &DTensor<double>::operator-=(const DTensor<double> &rhs)
772824
}
773825

774826
template<typename T>
775-
inline T DTensor<T>::operator()(size_t i, size_t j, size_t k) {
827+
inline T DTensor<T>::operator()(size_t i, size_t j, size_t k) const {
776828
T hostDst;
777829
size_t offset = i + m_numRows * (j + m_numCols * k);
778830
gpuErrChk(cudaMemcpy(&hostDst, m_d_data + offset, sizeof(T), cudaMemcpyDeviceToHost));

test/testTensor.cu

Lines changed: 35 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -352,11 +352,45 @@ void tensorSumAbs() {
352352
EXPECT_NEAR(112, tenz.sumAbs(), PRECISION_HIGH); // from MATLAB
353353
}
354354

355-
TEST_F(TensorTest, tensorNormFtensorSumAbs) {
355+
TEST_F(TensorTest, tensorSumAbs) {
356356
tensorSumAbs<float>();
357357
tensorSumAbs<double>();
358358
}
359359

360+
/* ---------------------------------------
361+
* Tensor: max of absolute of all elements
362+
* --------------------------------------- */
363+
364+
TEMPLATE_WITH_TYPE_T
365+
void tensorMax() {
366+
std::vector<T> data = TENSOR_DATA_234AMB;
367+
DTensor<T> tenz(data, 2, 3, 4);
368+
T m = tenz.maxAbs();
369+
EXPECT_EQ(27, m);
370+
}
371+
372+
TEST_F(TensorTest, tensorMax) {
373+
tensorMax<float>();
374+
tensorMax<double>();
375+
}
376+
377+
/* ---------------------------------------
378+
* Tensor: min of absolute of all elements
379+
* --------------------------------------- */
380+
381+
TEMPLATE_WITH_TYPE_T
382+
void tensorMin() {
383+
std::vector<T> data = TENSOR_DATA_234AMB;
384+
DTensor<T> tenz(data, 2, 3, 4);
385+
T m = tenz.minAbs();
386+
EXPECT_EQ(0, m);
387+
}
388+
389+
TEST_F(TensorTest, tensorMin) {
390+
tensorMin<float>();
391+
tensorMin<double>();
392+
}
393+
360394
/* ---------------------------------------
361395
* Tensor operator() to access element
362396
* e.g., t(2, 3, 4)

0 commit comments

Comments
 (0)