@@ -377,6 +377,19 @@ public:
377377 */
378378 T sumAbs () const ;
379379
380+ /* *
381+ * Maximum of absolute of all elements.
382+ * Equivalent to inf-norm, max(|x_i|) for all i.
383+ * @return max of absolute as same data type
384+ */
385+ T maxAbs () const ;
386+
387+ /* *
388+ * Minimum of absolute of all elements, min(|x_i|) for all i.
389+ * @return min of absolute as same data type
390+ */
391+ T minAbs () const ;
392+
380393 /* *
381394 * Solves for the least squares solution of A \ b.
382395 * A is this tensor and b is the provided tensor.
@@ -405,7 +418,7 @@ public:
405418
406419 DTensor &operator =(const DTensor &other);
407420
408- T operator ()(size_t i, size_t j = 0 , size_t k = 0 );
421+ T operator ()(size_t i, size_t j = 0 , size_t k = 0 ) const ;
409422
410423 DTensor &operator *=(T scalar);
411424
@@ -605,7 +618,6 @@ inline float DTensor<float>::normF() const {
605618 return the_norm;
606619}
607620
608-
609621template <>
610622inline float DTensor<float >::sumAbs() const {
611623 float sumAbsAllElements;
@@ -622,6 +634,46 @@ inline double DTensor<double>::sumAbs() const {
622634 return sumAbsAllElements;
623635}
624636
637+ template <>
638+ inline float DTensor<float >::maxAbs() const {
639+ int idx;
640+ float hostDst;
641+ gpuErrChk (cublasIsamax (Session::getInstance ().cuBlasHandle (), m_numRows * m_numCols * m_numMats, m_d_data, 1 ,
642+ &idx));
643+ gpuErrChk (cudaMemcpy (&hostDst, m_d_data + idx - 1 , sizeof (float ), cudaMemcpyDeviceToHost));
644+ return std::signbit (hostDst) ? -hostDst : hostDst;
645+ }
646+
647+ template <>
648+ inline double DTensor<double >::maxAbs() const {
649+ int idx;
650+ double hostDst;
651+ gpuErrChk (cublasIdamax (Session::getInstance ().cuBlasHandle (), m_numRows * m_numCols * m_numMats, m_d_data, 1 ,
652+ &idx));
653+ gpuErrChk (cudaMemcpy (&hostDst, m_d_data + idx - 1 , sizeof (double ), cudaMemcpyDeviceToHost));
654+ return std::signbit (hostDst) ? -hostDst : hostDst;
655+ }
656+
657+ template <>
658+ inline float DTensor<float >::minAbs() const {
659+ int idx;
660+ float hostDst;
661+ gpuErrChk (cublasIsamin (Session::getInstance ().cuBlasHandle (), m_numRows * m_numCols * m_numMats, m_d_data, 1 ,
662+ &idx));
663+ gpuErrChk (cudaMemcpy (&hostDst, m_d_data + idx - 1 , sizeof (float ), cudaMemcpyDeviceToHost));
664+ return std::signbit (hostDst) ? -hostDst : hostDst;
665+ }
666+
667+ template <>
668+ inline double DTensor<double >::minAbs() const {
669+ int idx;
670+ double hostDst;
671+ gpuErrChk (cublasIdamin (Session::getInstance ().cuBlasHandle (), m_numRows * m_numCols * m_numMats, m_d_data, 1 ,
672+ &idx));
673+ gpuErrChk (cudaMemcpy (&hostDst, m_d_data + idx - 1 , sizeof (double ), cudaMemcpyDeviceToHost));
674+ return std::signbit (hostDst) ? -hostDst : hostDst;
675+ }
676+
625677template <typename T>
626678inline bool DTensor<T>::allocateOnDevice(size_t size, bool zero) {
627679 if (size <= 0 ) return false ;
@@ -772,7 +824,7 @@ inline DTensor<double> &DTensor<double>::operator-=(const DTensor<double> &rhs)
772824}
773825
774826template <typename T>
775- inline T DTensor<T>::operator ()(size_t i, size_t j, size_t k) {
827+ inline T DTensor<T>::operator ()(size_t i, size_t j, size_t k) const {
776828 T hostDst;
777829 size_t offset = i + m_numRows * (j + m_numCols * k);
778830 gpuErrChk (cudaMemcpy (&hostDst, m_d_data + offset, sizeof (T), cudaMemcpyDeviceToHost));
0 commit comments