diff --git a/.github/workflows/PR-5.x.yaml b/.github/workflows/PR-5.x.yaml index 03162e9f3e3..e1e12deeee8 100644 --- a/.github/workflows/PR-5.x.yaml +++ b/.github/workflows/PR-5.x.yaml @@ -14,6 +14,9 @@ jobs: Ubuntu2004-ARM64: uses: opencv/ci-gha-workflow/.github/workflows/OCV-Contrib-PR-5.x-ARM64.yaml@main + Ubuntu2004-ARM64-FastCV: + uses: opencv/ci-gha-workflow/.github/workflows/OCV-Contrib-PR-4.x-ARM64-FastCV.yaml@main + Ubuntu2004-x64-CUDA: uses: opencv/ci-gha-workflow/.github/workflows/OCV-Contrib-PR-5.x-U20-Cuda.yaml@main diff --git a/modules/bgsegm/tutorials/bgsegm_bg_subtraction/bgsegm_bg_subtraction.markdown b/modules/bgsegm/tutorials/bgsegm_bg_subtraction/bgsegm_bg_subtraction.markdown index 4340b3654ee..07bfffb2325 100644 --- a/modules/bgsegm/tutorials/bgsegm_bg_subtraction/bgsegm_bg_subtraction.markdown +++ b/modules/bgsegm/tutorials/bgsegm_bg_subtraction/bgsegm_bg_subtraction.markdown @@ -30,8 +30,8 @@ In the following, we will have a look at two algorithms from the `bgsegm` module ### BackgroundSubtractorMOG It is a Gaussian Mixture-based Background/Foreground Segmentation Algorithm. It was introduced in -the paper "An improved adaptive background mixture model for real-time tracking with shadow -detection" by P. KadewTraKuPong and R. Bowden in 2001. It uses a method to model each background +the paper "An Improved Adaptive Background Mixture Model for Realtime Tracking with Shadow +Detection" by P. KaewTraKulPong and R. Bowden in 2001. It uses a method to model each background pixel by a mixture of K Gaussian distributions (K = 3 to 5). The weights of the mixture represent the time proportions that those colours stay in the scene. The probable background colours are the ones which stay longer and more static. diff --git a/modules/cudaimgproc/src/color.cpp b/modules/cudaimgproc/src/color.cpp index cad5dcd394d..94ffe90fa2f 100644 --- a/modules/cudaimgproc/src/color.cpp +++ b/modules/cudaimgproc/src/color.cpp @@ -70,8 +70,8 @@ namespace cv { namespace cuda { template void Bayer2BGR_16u_gpu(PtrStepSzb src, PtrStepSzb dst, bool blue_last, bool start_with_green, cudaStream_t stream); - template - void MHCdemosaic(PtrStepSzb src, int2 sourceOffset, PtrStepSzb dst, int2 firstRed, cudaStream_t stream); + template + void MHCdemosaic(PtrStepSz src, int2 sourceOffset, PtrStepSz dst, int2 firstRed, cudaStream_t stream); } }} @@ -2136,7 +2136,7 @@ void cv::cuda::demosaicing(InputArray _src, OutputArray _dst, int code, int dcn, GpuMat src = _src.getGpuMat(); const int depth = _src.depth(); - CV_Assert( depth == CV_8U ); + CV_Assert( depth == CV_8U || depth == CV_16U); CV_Assert( src.channels() == 1 ); CV_Assert( dcn == 3 || dcn == 4 ); @@ -2148,16 +2148,27 @@ void cv::cuda::demosaicing(InputArray _src, OutputArray _dst, int code, int dcn, Size wholeSize; Point ofs; src.locateROI(wholeSize, ofs); - PtrStepSzb srcWhole(wholeSize.height, wholeSize.width, src.datastart, src.step); const int2 firstRed = make_int2(code == COLOR_BayerRG2BGR_MHT || code == COLOR_BayerGB2BGR_MHT ? 0 : 1, code == COLOR_BayerRG2BGR_MHT || code == COLOR_BayerGR2BGR_MHT ? 0 : 1); - if (dcn == 3) - cv::cuda::device::MHCdemosaic<3>(srcWhole, make_int2(ofs.x, ofs.y), dst, firstRed, StreamAccessor::getStream(stream)); - else - cv::cuda::device::MHCdemosaic<4>(srcWhole, make_int2(ofs.x, ofs.y), dst, firstRed, StreamAccessor::getStream(stream)); - + if (dcn == 3) { + if (depth == CV_8U) { + PtrStepSzb srcWhole(wholeSize.height, wholeSize.width, src.datastart, src.step); + cv::cuda::device::MHCdemosaic<3, uchar>(srcWhole, make_int2(ofs.x, ofs.y), dst, firstRed, StreamAccessor::getStream(stream)); + } else { + PtrStepSz srcWhole(wholeSize.height, wholeSize.width, src.ptr(), src.step); + cv::cuda::device::MHCdemosaic<3, ushort>(srcWhole, make_int2(ofs.x, ofs.y), dst, firstRed, StreamAccessor::getStream(stream)); + } + } else { + if (depth == CV_8U) { + PtrStepSzb srcWhole(wholeSize.height, wholeSize.width, src.datastart, src.step); + cv::cuda::device::MHCdemosaic<4, uchar>(srcWhole, make_int2(ofs.x, ofs.y), dst, firstRed, StreamAccessor::getStream(stream)); + } else { + PtrStepSz srcWhole(wholeSize.height, wholeSize.width, src.ptr(), src.step); + cv::cuda::device::MHCdemosaic<4, ushort>(srcWhole, make_int2(ofs.x, ofs.y), dst, firstRed, StreamAccessor::getStream(stream)); + } + } break; } @@ -2166,7 +2177,7 @@ void cv::cuda::demosaicing(InputArray _src, OutputArray _dst, int code, int dcn, GpuMat src = _src.getGpuMat(); const int depth = _src.depth(); - CV_Assert( depth == CV_8U ); + CV_Assert( depth == CV_8U || depth == CV_16U); _dst.create(_src.size(), CV_MAKE_TYPE(depth, 1)); GpuMat dst = _dst.getGpuMat(); @@ -2176,12 +2187,17 @@ void cv::cuda::demosaicing(InputArray _src, OutputArray _dst, int code, int dcn, Size wholeSize; Point ofs; src.locateROI(wholeSize, ofs); - PtrStepSzb srcWhole(wholeSize.height, wholeSize.width, src.datastart, src.step); const int2 firstRed = make_int2(code == COLOR_BayerRG2BGR_MHT || code == COLOR_BayerGB2BGR_MHT ? 0 : 1, code == COLOR_BayerRG2BGR_MHT || code == COLOR_BayerGR2BGR_MHT ? 0 : 1); - cv::cuda::device::MHCdemosaic<1>(srcWhole, make_int2(ofs.x, ofs.y), dst, firstRed, StreamAccessor::getStream(stream)); + if (depth == CV_8U) { + PtrStepSzb srcWhole(wholeSize.height, wholeSize.width, src.datastart, src.step); + cv::cuda::device::MHCdemosaic<1, uchar>(srcWhole, make_int2(ofs.x, ofs.y), dst, firstRed, StreamAccessor::getStream(stream)); + } else { + PtrStepSz srcWhole(wholeSize.height, wholeSize.width, src.ptr(), src.step); + cv::cuda::device::MHCdemosaic<1, ushort>(srcWhole, make_int2(ofs.x, ofs.y), dst, firstRed, StreamAccessor::getStream(stream)); + } break; } diff --git a/modules/cudaimgproc/src/cuda/debayer.cu b/modules/cudaimgproc/src/cuda/debayer.cu index bfe4b6f5ea8..dfd3b9aa11d 100644 --- a/modules/cudaimgproc/src/cuda/debayer.cu +++ b/modules/cudaimgproc/src/cuda/debayer.cu @@ -390,6 +390,17 @@ namespace cv { namespace cuda { namespace device // // ported to CUDA + template __device__ + typename TypeVec::vec_type make_3(Depth x, Depth y, Depth z); + + template<> __device__ TypeVec::vec_type make_3(uchar x, uchar y, uchar z) { + return make_uchar3(x, y, z); + } + + template<> __device__ TypeVec::vec_type make_3(ushort x, ushort y, ushort z) { + return make_ushort3(x, y, z); + } + template __global__ void MHCdemosaic(PtrStepSz dst, Ptr2D src, const int2 firstRed) { @@ -506,34 +517,36 @@ namespace cv { namespace cuda { namespace device alternate.x = (x + firstRed.x) % 2; alternate.y = (y + firstRed.y) % 2; - // in BGR sequence; - uchar3 pixelColor = + typedef typename VecTraits::elem_type SrcElemType; + typedef typename TypeVec::vec_type SrcType; + + SrcType pixelColor = (alternate.y == 0) ? ((alternate.x == 0) ? - make_uchar3(saturate_cast(PATTERN.y), saturate_cast(PATTERN.x), saturate_cast(C)) : - make_uchar3(saturate_cast(PATTERN.w), saturate_cast(C), saturate_cast(PATTERN.z))) : + make_3(saturate_cast(PATTERN.y), saturate_cast(PATTERN.x), saturate_cast(C)) : + make_3(saturate_cast(PATTERN.w), saturate_cast(C), saturate_cast(PATTERN.z))) : ((alternate.x == 0) ? - make_uchar3(saturate_cast(PATTERN.z), saturate_cast(C), saturate_cast(PATTERN.w)) : - make_uchar3(saturate_cast(C), saturate_cast(PATTERN.x), saturate_cast(PATTERN.y))); + make_3(saturate_cast(PATTERN.z), saturate_cast(C), saturate_cast(PATTERN.w)) : + make_3(saturate_cast(C), saturate_cast(PATTERN.x), saturate_cast(PATTERN.y))); dst(y, x) = toDst(pixelColor); } - template - void MHCdemosaic(PtrStepSzb src, int2 sourceOffset, PtrStepSzb dst, int2 firstRed, cudaStream_t stream) + template + void MHCdemosaic(PtrStepSz src, int2 sourceOffset, PtrStepSz dst, int2 firstRed, cudaStream_t stream) { - typedef typename TypeVec::vec_type dst_t; + typedef typename TypeVec::vec_type dst_t; const dim3 block(32, 8); const dim3 grid(divUp(src.cols, block.x), divUp(src.rows, block.y)); if (sourceOffset.x || sourceOffset.y) { - cv::cudev::TextureOff texSrc(src, sourceOffset.y, sourceOffset.x); - MHCdemosaic><<>>((PtrStepSz)dst, texSrc, firstRed); + cv::cudev::TextureOff texSrc(src, sourceOffset.y, sourceOffset.x); + MHCdemosaic><<>>((PtrStepSz)dst, texSrc, firstRed); } else { - cv::cudev::Texture texSrc(src); - MHCdemosaic><<>>((PtrStepSz)dst, texSrc, firstRed); + cv::cudev::Texture texSrc(src); + MHCdemosaic><<>>((PtrStepSz)dst, texSrc, firstRed); } cudaSafeCall( cudaGetLastError() ); @@ -542,9 +555,12 @@ namespace cv { namespace cuda { namespace device cudaSafeCall( cudaDeviceSynchronize() ); } - template void MHCdemosaic<1>(PtrStepSzb src, int2 sourceOffset, PtrStepSzb dst, int2 firstRed, cudaStream_t stream); - template void MHCdemosaic<3>(PtrStepSzb src, int2 sourceOffset, PtrStepSzb dst, int2 firstRed, cudaStream_t stream); - template void MHCdemosaic<4>(PtrStepSzb src, int2 sourceOffset, PtrStepSzb dst, int2 firstRed, cudaStream_t stream); + template void MHCdemosaic<1, uchar>(PtrStepSzb src, int2 sourceOffset, PtrStepSzb dst, int2 firstRed, cudaStream_t stream); + template void MHCdemosaic<3, uchar>(PtrStepSzb src, int2 sourceOffset, PtrStepSzb dst, int2 firstRed, cudaStream_t stream); + template void MHCdemosaic<4, uchar>(PtrStepSzb src, int2 sourceOffset, PtrStepSzb dst, int2 firstRed, cudaStream_t stream); + template void MHCdemosaic<1, ushort>(PtrStepSz src, int2 sourceOffset, PtrStepSz dst, int2 firstRed, cudaStream_t stream); + template void MHCdemosaic<3, ushort>(PtrStepSz src, int2 sourceOffset, PtrStepSz dst, int2 firstRed, cudaStream_t stream); + template void MHCdemosaic<4, ushort>(PtrStepSz src, int2 sourceOffset, PtrStepSz dst, int2 firstRed, cudaStream_t stream); }}} #endif /* CUDA_DISABLER */ diff --git a/modules/cudev/test/CMakeLists.txt b/modules/cudev/test/CMakeLists.txt index ff936cad54c..d8768ab6c56 100644 --- a/modules/cudev/test/CMakeLists.txt +++ b/modules/cudev/test/CMakeLists.txt @@ -20,9 +20,16 @@ if(OCV_DEPENDENCIES_FOUND) ocv_check_windows_crt_linkage() set(target_libs ${target_libs} ${CUDA_LIBRARIES}) if(CUDA_VERSION VERSION_LESS "11.0") - ocv_update(OPENCV_CUDA_OPTIONS_opencv_test_cudev "-std=c++11") + # Windows version does not support --std option + if(UNIX OR APPLE) + ocv_update(OPENCV_CUDA_OPTIONS_opencv_test_cudev "-std=c++11") + endif() else() - ocv_update(OPENCV_CUDA_OPTIONS_opencv_test_cudev "-std=c++14") + if(CUDA_VERSION VERSION_LESS "12.8") + ocv_update(OPENCV_CUDA_OPTIONS_opencv_test_cudev "-std=c++14") + else() + ocv_update(OPENCV_CUDA_OPTIONS_opencv_test_cudev "-std=c++17") + endif() ocv_warnings_disable(CMAKE_CXX_FLAGS -Wdeprecated-declarations) endif() CUDA_ADD_EXECUTABLE(${the_target} ${OPENCV_TEST_${the_module}_SOURCES} OPTIONS ${OPENCV_CUDA_OPTIONS_opencv_test_cudev}) diff --git a/modules/fastcv/include/opencv2/fastcv.hpp b/modules/fastcv/include/opencv2/fastcv.hpp index af188dfcb09..292e83a2dc3 100644 --- a/modules/fastcv/include/opencv2/fastcv.hpp +++ b/modules/fastcv/include/opencv2/fastcv.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved. + * Copyright (c) 2024-2025 Qualcomm Innovation Center, Inc. All rights reserved. * SPDX-License-Identifier: Apache-2.0 */ @@ -11,6 +11,7 @@ #include "opencv2/fastcv/arithm.hpp" #include "opencv2/fastcv/bilateralFilter.hpp" #include "opencv2/fastcv/blur.hpp" +#include "opencv2/fastcv/channel.hpp" #include "opencv2/fastcv/cluster.hpp" #include "opencv2/fastcv/draw.hpp" #include "opencv2/fastcv/edges.hpp" diff --git a/modules/fastcv/include/opencv2/fastcv/arithm.hpp b/modules/fastcv/include/opencv2/fastcv/arithm.hpp index 5a0c43b2408..29f5fdfe1f9 100644 --- a/modules/fastcv/include/opencv2/fastcv/arithm.hpp +++ b/modules/fastcv/include/opencv2/fastcv/arithm.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved. + * Copyright (c) 2024-2025 Qualcomm Innovation Center, Inc. All rights reserved. * SPDX-License-Identifier: Apache-2.0 */ @@ -8,6 +8,10 @@ #include +#define FCV_CMP_EQ(val1,val2) (fabs(val1 - val2) < FLT_EPSILON) + +#define FCV_OPTYPE(depth,op) ((depth<<3) + op) + namespace cv { namespace fastcv { @@ -26,6 +30,59 @@ CV_EXPORTS_W void matmuls8s32(InputArray src1, InputArray src2, OutputArray dst) //! @} +//! @addtogroup fastcv +//! @{ + +/** + * @brief Arithmetic add and subtract operations for two matrices + * It is optimized for Qualcomm's processors + * @param src1 First source matrix, can be of type CV_8U, CV_16S, CV_32F. + * Note: CV_32F not supported for subtract + * @param src2 Second source matrix of same type and size as src1 + * @param dst Resulting matrix of type as src mats + * @param op type of operation - 0 for add and 1 for subtract + */ +CV_EXPORTS_W void arithmetic_op(InputArray src1, InputArray src2, OutputArray dst, int op); + +//! @} + +//! @addtogroup fastcv +//! @{ + +/** + * @brief Matrix multiplication of two float type matrices + * R = a*A*B + b*C where A,B,C,R are matrices and a,b are constants + * It is optimized for Qualcomm's processors + * @param src1 First source matrix of type CV_32F + * @param src2 Second source matrix of type CV_32F with same rows as src1 cols + * @param dst Resulting matrix of type CV_32F + * @param alpha multiplying factor for src1 and src2 + * @param src3 Optional third matrix of type CV_32F to be added to matrix product + * @param beta multiplying factor for src3 + */ +CV_EXPORTS_W void gemm(InputArray src1, InputArray src2, OutputArray dst, float alpha = 1.0, + InputArray src3 = noArray(), float beta = 0.0); + +//! @} + +//! @addtogroup fastcv +//! @{ + +/** + * @brief Integral of a YCbCr420 image. + * Note: Input height should be multiple of 2. Input width and stride should be multiple of 16. + * Output stride should be multiple of 8. + * It is optimized for Qualcomm's processors + * @param Y Input Y component of 8UC1 YCbCr420 image. + * @param CbCr Input CbCr component(interleaved) of 8UC1 YCbCr420 image. + * @param IY Output Y integral of CV_32S one channel, size (Y height + 1)*(Y width + 1) + * @param ICb Output Cb integral of CV_32S one channel, size (Y height/2 + 1)*(Y width/2 + 1) + * @param ICr Output Cr integral of CV_32S one channel, size (Y height/2 + 1)*(Y width/2 + 1) + */ +CV_EXPORTS_W void integrateYUV(InputArray Y, InputArray CbCr, OutputArray IY, OutputArray ICb, OutputArray ICr); + +//! @} + } // fastcv:: } // cv:: diff --git a/modules/fastcv/include/opencv2/fastcv/blur.hpp b/modules/fastcv/include/opencv2/fastcv/blur.hpp index 99d1cd3d655..fdb2326d993 100644 --- a/modules/fastcv/include/opencv2/fastcv/blur.hpp +++ b/modules/fastcv/include/opencv2/fastcv/blur.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved. + * Copyright (c) 2024-2025 Qualcomm Innovation Center, Inc. All rights reserved. * SPDX-License-Identifier: Apache-2.0 */ @@ -58,6 +58,22 @@ CV_EXPORTS_W void filter2D(InputArray _src, OutputArray _dst, int ddepth, InputA CV_EXPORTS_W void sepFilter2D(InputArray _src, OutputArray _dst, int ddepth, InputArray _kernelX, InputArray _kernelY); //! @} +//! @addtogroup fastcv +//! @{ + +/** + * @brief Calculates the local subtractive and contrastive normalization of the image. + * Each pixel of the image is normalized by the mean and standard deviation of the patch centred at the pixel. + * It is optimized for Qualcomm's processors. + * @param _src Input image, should have one channel CV_8U or CV_32F + * @param _dst Output array, should be one channel, CV_8S if src of type CV_8U, or CV_32F if src of CV_32F + * @param pSize Patch size for mean and std dev calculation + * @param useStdDev If 1, bot mean and std dev will be used for normalization, if 0, only mean used + */ +CV_EXPORTS_W void normalizeLocalBox(InputArray _src, OutputArray _dst, Size pSize, bool useStdDev); + +//! @} + } // fastcv:: } // cv:: diff --git a/modules/fastcv/include/opencv2/fastcv/channel.hpp b/modules/fastcv/include/opencv2/fastcv/channel.hpp new file mode 100644 index 00000000000..7b911a15f71 --- /dev/null +++ b/modules/fastcv/include/opencv2/fastcv/channel.hpp @@ -0,0 +1,45 @@ +/* + * Copyright (c) 2025 Qualcomm Innovation Center, Inc. All rights reserved. + * SPDX-License-Identifier: Apache-2.0 +*/ + +#ifndef OPENCV_FASTCV_CHANNEL_HPP +#define OPENCV_FASTCV_CHANNEL_HPP + +#include + +namespace cv { +namespace fastcv { + +//! @addtogroup fastcv +//! @{ + +/** + * @brief Creates one multi-channel mat out of several single-channel CV_8U mats. + * Optimized for Qualcomm's processors + * @param mv input vector of matrices to be merged; all the matrices in mv must be of CV_8UC1 and have the same size + * Note: numbers of mats can be 2,3 or 4. + * @param dst output array of depth CV_8U and same size as mv[0]; The number of channels + * will be the total number of matrices in the matrix array + */ +CV_EXPORTS_W void merge(InputArrayOfArrays mv, OutputArray dst); + +//! @} + +//! @addtogroup fastcv +//! @{ + +/** + * @brief Splits an CV_8U multi-channel mat into several CV_8UC1 mats + * Optimized for Qualcomm's processors + * @param src input 2,3 or 4 channel mat of depth CV_8U + * @param mv output vector of size src.channels() of CV_8UC1 mats + */ +CV_EXPORTS_W void split(InputArray src, OutputArrayOfArrays mv); + +//! @} + +} // fastcv:: +} // cv:: + +#endif // OPENCV_FASTCV_CHANNEL_HPP diff --git a/modules/fastcv/include/opencv2/fastcv/pyramid.hpp b/modules/fastcv/include/opencv2/fastcv/pyramid.hpp index 6c20a21ab78..962730f40f9 100644 --- a/modules/fastcv/include/opencv2/fastcv/pyramid.hpp +++ b/modules/fastcv/include/opencv2/fastcv/pyramid.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved. + * Copyright (c) 2024-2025 Qualcomm Innovation Center, Inc. All rights reserved. * SPDX-License-Identifier: Apache-2.0 */ @@ -16,7 +16,7 @@ namespace fastcv { /** * @brief Creates a gradient pyramid from an image pyramid - * + * Note: The borders are ignored during gradient calculation. * @param pyr Input pyramid of 1-channel 8-bit images. Only continuous images are supported. * @param dx Horizontal Sobel gradient pyramid of the same size as pyr * @param dy Verical Sobel gradient pyramid of the same size as pyr diff --git a/modules/fastcv/perf/perf_arithm.cpp b/modules/fastcv/perf/perf_arithm.cpp new file mode 100644 index 00000000000..c1cf04a5438 --- /dev/null +++ b/modules/fastcv/perf/perf_arithm.cpp @@ -0,0 +1,32 @@ +/* + * Copyright (c) 2025 Qualcomm Innovation Center, Inc. All rights reserved. + * SPDX-License-Identifier: Apache-2.0 +*/ + +#include "perf_precomp.hpp" + +namespace opencv_test { + +typedef perf::TestBaseWithParam> IntegrateYUVPerfTest; + +PERF_TEST_P(IntegrateYUVPerfTest, run, + ::testing::Combine(::testing::Values(perf::szVGA, perf::sz720p, perf::sz1080p), // image size + ::testing::Values(CV_8U) // image depth + ) + ) +{ + cv::Size srcSize = get<0>(GetParam()); + int depth = get<1>(GetParam()); + + cv::Mat Y(srcSize, depth), CbCr(srcSize.height/2, srcSize.width, depth); + cv::Mat IY, ICb, ICr; + RNG& rng = cv::theRNG(); + cvtest::randUni(rng, Y, Scalar::all(0), Scalar::all(255)); + cvtest::randUni(rng, CbCr, Scalar::all(0), Scalar::all(255)); + + TEST_CYCLE() cv::fastcv::integrateYUV(Y, CbCr, IY, ICb, ICr); + + SANITY_CHECK_NOTHING(); +} + +} // namespace \ No newline at end of file diff --git a/modules/fastcv/perf/perf_blur.cpp b/modules/fastcv/perf/perf_blur.cpp index bca8f80974a..8c8fd59b66a 100644 --- a/modules/fastcv/perf/perf_blur.cpp +++ b/modules/fastcv/perf/perf_blur.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved. + * Copyright (c) 2024-2025 Qualcomm Innovation Center, Inc. All rights reserved. * SPDX-License-Identifier: Apache-2.0 */ @@ -120,4 +120,29 @@ PERF_TEST_P(SepFilter2DPerfTest, run, SANITY_CHECK_NOTHING(); } +typedef perf::TestBaseWithParam> NormalizeLocalBoxPerfTest; + +PERF_TEST_P(NormalizeLocalBoxPerfTest, run, + ::testing::Combine(::testing::Values(perf::szVGA, perf::sz720p, perf::sz1080p), // image size + ::testing::Values(CV_8U,CV_32F), // src image depth + ::testing::Values(Size(3,3),Size(5,5)), // patch size + ::testing::Values(0,1) // use std dev or not + ) + ) +{ + cv::Size srcSize = get<0>(GetParam()); + int depth = get<1>(GetParam()); + Size sz = get<2>(GetParam()); + bool useStdDev = get<3>(GetParam()); + + cv::Mat src(srcSize, depth); + cv::Mat dst; + RNG& rng = cv::theRNG(); + cvtest::randUni(rng, src, Scalar::all(0), Scalar::all(255)); + + TEST_CYCLE() cv::fastcv::normalizeLocalBox(src, dst, sz, useStdDev); + + SANITY_CHECK_NOTHING(); +} + } // namespace \ No newline at end of file diff --git a/modules/fastcv/perf/perf_matmul.cpp b/modules/fastcv/perf/perf_matmul.cpp index 83af7618b31..a8e4f314b55 100644 --- a/modules/fastcv/perf/perf_matmul.cpp +++ b/modules/fastcv/perf/perf_matmul.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved. + * Copyright (c) 2024-2025 Qualcomm Innovation Center, Inc. All rights reserved. * SPDX-License-Identifier: Apache-2.0 */ @@ -10,6 +10,9 @@ namespace opencv_test { typedef std::tuple MatMulPerfParams; typedef perf::TestBaseWithParam MatMulPerfTest; +typedef std::tuple MatMulGemmPerfParams; +typedef perf::TestBaseWithParam MatMulGemmPerfTest; + PERF_TEST_P(MatMulPerfTest, run, ::testing::Combine(::testing::Values(8, 16, 128, 256), // rows1 ::testing::Values(8, 16, 128, 256), // cols1 @@ -37,4 +40,34 @@ PERF_TEST_P(MatMulPerfTest, run, SANITY_CHECK_NOTHING(); } +PERF_TEST_P(MatMulGemmPerfTest, run, + ::testing::Combine(::testing::Values(8, 16, 128, 256), // rows1 + ::testing::Values(8, 16, 128, 256), // cols1 + ::testing::Values(8, 16, 128, 256), // cols2 + ::testing::Values(2.5, 5.8)) // alpha + ) +{ + auto p = GetParam(); + int rows1 = std::get<0>(p); + int cols1 = std::get<1>(p); + int cols2 = std::get<2>(p); + float alpha = std::get<3>(p); + + RNG& rng = cv::theRNG(); + Mat src1(rows1, cols1, CV_32FC1), src2(cols1, cols2, CV_32FC1); + cvtest::randUni(rng, src1, Scalar::all(-128.0), Scalar::all(128.0)); + cvtest::randUni(rng, src2, Scalar::all(-128.0), Scalar::all(128.0)); + + Mat dst; + + while (next()) + { + startTimer(); + cv::fastcv::gemm(src1, src2, dst, alpha, noArray(), 0); + stopTimer(); + } + + SANITY_CHECK_NOTHING(); +} + } // namespace diff --git a/modules/fastcv/src/arithm.cpp b/modules/fastcv/src/arithm.cpp index bf8077cbe7b..8b9a4be1f48 100644 --- a/modules/fastcv/src/arithm.cpp +++ b/modules/fastcv/src/arithm.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved. + * Copyright (c) 2024-2025 Qualcomm Innovation Center, Inc. All rights reserved. * SPDX-License-Identifier: Apache-2.0 */ @@ -32,5 +32,177 @@ void matmuls8s32(InputArray _src1, InputArray _src2, OutputArray _dst) (int32_t*)dst.data, dst.step); } +void arithmetic_op(InputArray _src1, InputArray _src2, OutputArray _dst, int op) +{ + CV_Assert(!_src1.empty() && (_src1.depth() == CV_8U || _src1.depth() == CV_16S || _src1.depth() == CV_32F)); + CV_Assert(!_src2.empty() && _src2.type() == _src1.type()); + CV_Assert(_src2.size() == _src1.size()); + + Mat src1 = _src1.getMat(); + Mat src2 = _src2.getMat(); + + _dst.create(_src1.rows(), _src1.cols(), _src1.type()); + Mat dst = _dst.getMat(); + + INITIALIZATION_CHECK; + + fcvConvertPolicy policy = FASTCV_CONVERT_POLICY_SATURATE; + + int nStripes = cv::getNumThreads(); + + int func = FCV_OPTYPE(_src1.depth(), op); + switch(func) + { + case FCV_OPTYPE(CV_8U, 0): + cv::parallel_for_(cv::Range(0, src1.rows), [&](const cv::Range &range){ + int rangeHeight = range.end - range.start; + const uchar* yS1 = src1.data + static_cast(range.start)*src1.step[0]; + const uchar* yS2 = src2.data + static_cast(range.start)*src2.step[0]; + uchar* yD = dst.data + static_cast(range.start)*dst.step[0]; + fcvAddu8(yS1, src1.cols, rangeHeight, src1.step[0], + yS2, src2.step[0], policy, yD, dst.step[0]); + }, nStripes); + break; + case FCV_OPTYPE(CV_16S, 0): + cv::parallel_for_(cv::Range(0, src1.rows), [&](const cv::Range &range){ + int rangeHeight = range.end - range.start; + const short* yS1 = (short*)src1.data + static_cast(range.start)*(src1.step[0]/sizeof(short)); + const short* yS2 = (short*)src2.data + static_cast(range.start)*(src2.step[0]/sizeof(short)); + short* yD = (short*)dst.data + static_cast(range.start)*(dst.step[0]/sizeof(short)); + fcvAdds16_v2(yS1, src1.cols, rangeHeight, src1.step[0], + yS2, src2.step[0], policy, yD, dst.step[0]); + }, nStripes); + break; + case FCV_OPTYPE(CV_32F, 0): + cv::parallel_for_(cv::Range(0, src1.rows), [&](const cv::Range &range){ + int rangeHeight = range.end - range.start; + const float* yS1 = (float*)src1.data + static_cast(range.start)*(src1.step[0]/sizeof(float)); + const float* yS2 = (float*)src2.data + static_cast(range.start)*(src2.step[0]/sizeof(float)); + float* yD = (float*)dst.data + static_cast(range.start)*(dst.step[0]/sizeof(float)); + fcvAddf32(yS1, src1.cols, rangeHeight, src1.step[0], + yS2, src2.step[0], yD, dst.step[0]); + }, nStripes); + break; + case FCV_OPTYPE(CV_8U, 1): + cv::parallel_for_(cv::Range(0, src1.rows), [&](const cv::Range &range){ + int rangeHeight = range.end - range.start; + const uchar* yS1 = src1.data + static_cast(range.start)*src1.step[0]; + const uchar* yS2 = src2.data + static_cast(range.start)*src2.step[0]; + uchar* yD = dst.data + static_cast(range.start)*dst.step[0]; + fcvSubtractu8(yS1, src1.cols, rangeHeight, src1.step[0], + yS2, src2.step[0], policy, yD, dst.step[0]); + }, nStripes); + break; + case FCV_OPTYPE(CV_16S, 1): + cv::parallel_for_(cv::Range(0, src1.rows), [&](const cv::Range &range){ + int rangeHeight = range.end - range.start; + const short* yS1 = (short*)src1.data + static_cast(range.start)*(src1.step[0]/sizeof(short)); + const short* yS2 = (short*)src2.data + static_cast(range.start)*(src2.step[0]/sizeof(short)); + short* yD = (short*)dst.data + static_cast(range.start)*(dst.step[0]/sizeof(short)); + fcvSubtracts16(yS1, src1.cols, rangeHeight, src1.step[0], + yS2, src2.step[0], policy, yD, dst.step[0]); + }, nStripes); + break; + default: + CV_Error(cv::Error::StsBadArg, cv::format("op type is not supported")); + break; + } +} + + +void gemm(InputArray _src1, InputArray _src2, OutputArray _dst, float alpha, InputArray _src3, float beta) +{ + CV_Assert(!_src1.empty() && _src1.type() == CV_32FC1); + CV_Assert(_src1.cols() == _src2.rows()); + Mat src1 = _src1.getMat(); + + CV_Assert(!_src2.empty() && _src2.type() == CV_32FC1); + Mat src2 = _src2.getMat(); + + bool isSrc3 = !_src3.empty(); + + Mat src3 = _src3.getMat(); + + _dst.create(_src1.rows(), _src2.cols(), CV_32FC1); + + Mat dst = _dst.getMat(); + + CV_Assert(!FCV_CMP_EQ(alpha,0)); + + cv::Mat dst_temp1, dst_temp2; + float *dstp = NULL; + bool inplace = false; + size_t dst_stride; + fcvStatus status = FASTCV_SUCCESS; + + int n = src1.cols, m = src1.rows, k = src2.cols; + + INITIALIZATION_CHECK; + + if(src1.data == dst.data || src2.data == dst.data || (isSrc3 && (src3.data == dst.data))) + { + dst_temp1 = cv::Mat(m, k, CV_32FC1); + dstp = dst_temp1.ptr(); + inplace = true; + dst_stride = dst_temp1.step[0]; + } + else + { + dstp = (float32_t*)dst.data; + dst_stride = dst.step[0]; + } + float32_t *dstp1 = dstp; + status = fcvMatrixMultiplyf32_v2((float32_t*)src1.data, n, m, src1.step[0], (float32_t*)src2.data, k, + src2.step[0], dstp, dst_stride); + + bool isAlpha = !(FCV_CMP_EQ(alpha,0) || FCV_CMP_EQ(alpha,1)); + if(isAlpha && status == FASTCV_SUCCESS) + { + status = fcvMultiplyScalarf32(dstp, k, m, dst_stride, alpha, dstp1, dst_stride); + } + + if(isSrc3 && (!FCV_CMP_EQ(beta,0)) && status == FASTCV_SUCCESS) + { + cv::Mat dst3 = cv::Mat(m, k, CV_32FC1); + if(!FCV_CMP_EQ(beta,1)) + { + status = fcvMultiplyScalarf32((float32_t*)src3.data, k, m, src3.step[0], beta, (float32_t*)dst3.data, dst3.step[0]); + if(status == FASTCV_SUCCESS) + fcvAddf32_v2(dstp, k, m, dst_stride, (float32_t*)dst3.data, dst3.step[0], dstp1, dst_stride); + } + else + fcvAddf32_v2(dstp, k, m, dst_stride, (float32_t*)src3.data, src3.step[0], dstp1, dst_stride); + } + + if(inplace == true) + { + dst_temp1(cv::Rect(0, 0, k, m)).copyTo(dst(cv::Rect(0, 0, k, m))); + } +} + +void integrateYUV(InputArray _Y, InputArray _CbCr, OutputArray _IY, OutputArray _ICb, OutputArray _ICr) +{ + CV_Assert(!_Y.empty() && !_CbCr.empty()); + CV_Assert(_Y.type() == _CbCr.type() && _Y.type() == CV_8UC1); + Mat Y = _Y.getMat(); + Mat CbCr = _CbCr.getMat(); + int Ywidth = Y.cols; + int Yheight = Y.rows; + + INITIALIZATION_CHECK; + + _IY.create(Yheight + 1, Ywidth + 1, CV_32SC1); + _ICb.create(Yheight/2 + 1, Ywidth/2 + 1, CV_32SC1); + _ICr.create(Yheight/2 + 1, Ywidth/2 + 1, CV_32SC1); + + Mat IY_ = _IY.getMat(); + Mat ICb_ = _ICb.getMat(); + Mat ICr_ = _ICr.getMat(); + + fcvIntegrateImageYCbCr420PseudoPlanaru8(Y.data, CbCr.data, Ywidth, Yheight, Y.step[0], + CbCr.step[0], (uint32_t*)IY_.data, (uint32_t*)ICb_.data, (uint32_t*)ICr_.data, + IY_.step[0], ICb_.step[0], ICr_.step[0]); +} + } // fastcv:: } // cv:: diff --git a/modules/fastcv/src/blur.cpp b/modules/fastcv/src/blur.cpp index 66058a37b5a..3ce22e07375 100644 --- a/modules/fastcv/src/blur.cpp +++ b/modules/fastcv/src/blur.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved. + * Copyright (c) 2024-2025 Qualcomm Innovation Center, Inc. All rights reserved. * SPDX-License-Identifier: Apache-2.0 */ @@ -361,5 +361,26 @@ void sepFilter2D(InputArray _src, OutputArray _dst, int ddepth, InputArray _kern } } +void normalizeLocalBox(InputArray _src, OutputArray _dst, Size pSize, bool useStdDev) +{ + CV_Assert(!_src.empty()); + int type = _src.type(); + CV_Assert(type == CV_8UC1 || type == CV_32FC1); + + Size size = _src.size(); + int dst_type = type == CV_8UC1 ? CV_8SC1 : CV_32FC1; + _dst.create(size, dst_type); + + Mat src = _src.getMat(); + Mat dst = _dst.getMat(); + + if(type == CV_8UC1) + fcvNormalizeLocalBoxu8(src.data, src.cols, src.rows, src.step[0], + pSize.width, pSize.height, useStdDev, (int8_t*)dst.data, dst.step[0]); + else if(type == CV_32FC1) + fcvNormalizeLocalBoxf32((float*)src.data, src.cols, src.rows, src.step[0], + pSize.width, pSize.height, useStdDev, (float*)dst.data, dst.step[0]); +} + } // fastcv:: } // cv:: \ No newline at end of file diff --git a/modules/fastcv/src/channel.cpp b/modules/fastcv/src/channel.cpp new file mode 100644 index 00000000000..0283ac4ab7c --- /dev/null +++ b/modules/fastcv/src/channel.cpp @@ -0,0 +1,143 @@ +/* + * Copyright (c) 2025 Qualcomm Innovation Center, Inc. All rights reserved. + * SPDX-License-Identifier: Apache-2.0 +*/ + +#include "precomp.hpp" + +namespace cv { +namespace fastcv { + +void merge(InputArrayOfArrays _mv, OutputArray _dst) +{ + CV_Assert(!_mv.empty()); + std::vector mv; + _mv.getMatVector(mv); + int count = mv.size(); + + CV_Assert(!mv.empty()); + + CV_Assert(count == 2 || count == 3 || count == 4); + CV_Assert(!mv[0].empty()); + CV_Assert(mv[0].dims <= 2); + + for(int i = 0; i < count; i++ ) + { + CV_Assert(mv[i].size == mv[0].size && mv[i].step[0] == mv[0].step[0] && mv[i].type() == CV_8UC1); + } + + _dst.create(mv[0].dims, mv[0].size, CV_MAKE_TYPE(CV_8U,count)); + Mat dst = _dst.getMat(); + + INITIALIZATION_CHECK; + + int nStripes = cv::getNumThreads(); + + switch(count) + { + case 2: + cv::parallel_for_(cv::Range(0, mv[0].rows), [&](const cv::Range &range){ + int height_ = range.end - range.start; + const uchar* yS1 = mv[0].data + static_cast(range.start) * mv[0].step[0]; + const uchar* yS2 = mv[1].data + static_cast(range.start) * mv[1].step[0]; + uchar* yD = dst.data + static_cast(range.start) * dst.step[0]; + fcvChannelCombine2Planesu8(yS1, mv[0].cols, height_, mv[0].step[0], yS2, mv[1].step[0], yD, dst.step[0]); + }, nStripes); + + break; + + case 3: + cv::parallel_for_(cv::Range(0, mv[0].rows), [&](const cv::Range &range){ + int height_ = range.end - range.start; + const uchar* yS1 = mv[0].data + static_cast(range.start) * mv[0].step[0]; + const uchar* yS2 = mv[1].data + static_cast(range.start) * mv[1].step[0]; + const uchar* yS3 = mv[2].data + static_cast(range.start) * mv[2].step[0]; + uchar* yD = dst.data + static_cast(range.start) * dst.step[0]; + fcvChannelCombine3Planesu8(yS1, mv[0].cols, height_, mv[0].step[0], yS2, mv[1].step[0], yS3, mv[2].step[0], yD, dst.step[0]); + }, nStripes); + + break; + + case 4: + cv::parallel_for_(cv::Range(0, mv[0].rows), [&](const cv::Range &range){ + int height_ = range.end - range.start; + const uchar* yS1 = mv[0].data + static_cast(range.start) * mv[0].step[0]; + const uchar* yS2 = mv[1].data + static_cast(range.start) * mv[1].step[0]; + const uchar* yS3 = mv[2].data + static_cast(range.start) * mv[2].step[0]; + const uchar* yS4 = mv[3].data + static_cast(range.start) * mv[3].step[0]; + uchar* yD = dst.data + static_cast(range.start) * dst.step[0]; + fcvChannelCombine4Planesu8(yS1, mv[0].cols, height_, mv[0].step[0], yS2, mv[1].step[0], yS3, mv[2].step[0], yS4, mv[3].step[0], yD, dst.step[0]); + }, nStripes); + + break; + + default: + CV_Error(cv::Error::StsBadArg, cv::format("count is not supported")); + break; + } +} + +void split(InputArray _src, OutputArrayOfArrays _mv) +{ + CV_Assert(!_src.empty()); + Mat src = _src.getMat(); + + int depth = src.depth(), cn = src.channels(); + + CV_Assert(depth == CV_8U && (cn == 2 || cn == 3 || cn == 4)); + CV_Assert(src.dims <= 2); + _mv.create(cn, 1, depth); + for( int k = 0; k < cn; k++ ) + { + _mv.create(src.dims, src.size, depth, k); + } + + std::vector mv(cn); + _mv.getMatVector(mv); + + INITIALIZATION_CHECK; + + int nStripes = cv::getNumThreads(); + + if(src.rows * src.cols < 640 * 480) + if(cn == 3 || cn == 4) + nStripes = 1; + + if(cn == 2) + { + cv::parallel_for_(cv::Range(0, src.rows), [&](const cv::Range &range){ + int height_ = range.end - range.start; + const uchar* yS = src.data + static_cast(range.start) * src.step[0]; + uchar* y1D = mv[0].data + static_cast(range.start) * mv[0].step[0]; + uchar* y2D = mv[1].data + static_cast(range.start) * mv[1].step[0]; + fcvDeinterleaveu8(yS, src.cols, height_, src.step[0], y1D, mv[0].step[0], y2D, mv[1].step[0]); + }, nStripes); + } + else if(cn == 3) + { + for(int i=0; i(range.start) * src.step[0]; + uchar* yD = mv[i].data + static_cast(range.start) * mv[i].step[0]; + fcvChannelExtractu8(yS, src.cols, height_, src.step[0], NULL, 0, NULL, 0, (fcvChannelType)i, (fcvImageFormat)FASTCV_RGB, yD, mv[i].step[0]); + }, nStripes); + } + } + else if(cn == 4) + { + for(int i=0; i(range.start) * src.step[0]; + uchar* yD = mv[i].data + static_cast(range.start) * mv[i].step[0]; + fcvChannelExtractu8(yS, src.cols, height_, src.step[0], NULL, 0, NULL, 0, (fcvChannelType)i, (fcvImageFormat)FASTCV_RGBX, yD, mv[i].step[0]); + }, nStripes); + } + } +} + +} // fastcv:: +} // cv:: diff --git a/modules/fastcv/test/test_arithm.cpp b/modules/fastcv/test/test_arithm.cpp index 39979908136..216faa7282c 100644 --- a/modules/fastcv/test/test_arithm.cpp +++ b/modules/fastcv/test/test_arithm.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved. + * Copyright (c) 2024-2025 Qualcomm Innovation Center, Inc. All rights reserved. * SPDX-License-Identifier: Apache-2.0 */ @@ -10,6 +10,9 @@ namespace opencv_test { namespace { typedef std::tuple MatMulTestParams; class MatMulTest : public ::testing::TestWithParam {}; +typedef std::tuple ArithmOpTestParams; +class ArithmOpTest : public ::testing::TestWithParam {}; + TEST_P(MatMulTest, accuracy) { auto p = GetParam(); @@ -48,9 +51,75 @@ TEST_P(MatMulTest, accuracy) } } +TEST_P(ArithmOpTest, accuracy) +{ + auto p = GetParam(); + Size sz = std::get<0>(p); + int depth = std::get<1>(p); + int op = std::get<2>(p); + RNG& rng = cv::theRNG(); + Mat src1(sz, depth), src2(sz, depth); + + cvtest::randUni(rng, src1, Scalar::all(0), Scalar::all(128)); + cvtest::randUni(rng, src2, Scalar::all(0), Scalar::all(128)); + + Mat dst; + cv::fastcv::arithmetic_op(src1, src2, dst, op); + + Mat ref; + if(op == 0) + cv::add(src1, src2, ref); + else if(op == 1) + cv::subtract(src1, src2, ref); + + double normInf = cvtest::norm(ref, dst, cv::NORM_INF); + double normL2 = cvtest::norm(ref, dst, cv::NORM_L2); + + EXPECT_EQ(normInf, 0); + EXPECT_EQ(normL2, 0); +} + +typedef testing::TestWithParam> IntegrateYUVTest; + +TEST_P(IntegrateYUVTest, accuracy) +{ + auto p = GetParam(); + Size srcSize = std::get<0>(p); + int depth = CV_8U; + + cv::Mat Y(srcSize, depth), CbCr(srcSize.height/2, srcSize.width, depth); + cv::Mat IY, ICb, ICr; + RNG& rng = cv::theRNG(); + cvtest::randUni(rng, Y, Scalar::all(0), Scalar::all(255)); + cvtest::randUni(rng, CbCr, Scalar::all(0), Scalar::all(255)); + + cv::fastcv::integrateYUV(Y, CbCr, IY, ICb, ICr); + + CbCr = CbCr.reshape(2,0); + std::vector ref; + cv::fastcv::split(CbCr, ref); + + cv::Mat IY_ref, ICb_ref, ICr_ref; + cv::integral(Y,IY_ref,CV_32S); + cv::integral(ref[0],ICb_ref,CV_32S); + cv::integral(ref[1],ICr_ref,CV_32S); + + EXPECT_EQ(IY_ref.at(IY_ref.rows - 1, IY_ref.cols - 1), IY.at(IY.rows - 1, IY.cols - 1)); + EXPECT_EQ(ICb_ref.at(ICb_ref.rows - 1, ICb_ref.cols - 1), ICb.at(ICb.rows - 1, ICb.cols - 1)); + EXPECT_EQ(ICr_ref.at(ICr_ref.rows - 1, ICr_ref.cols - 1), ICr.at(ICr.rows - 1, ICr.cols - 1)); +} + INSTANTIATE_TEST_CASE_P(FastCV_Extension, MatMulTest, ::testing::Combine(::testing::Values(8, 16, 128, 256), // rows1 ::testing::Values(8, 16, 128, 256), // cols1 ::testing::Values(8, 16, 128, 256))); // cols2 +INSTANTIATE_TEST_CASE_P(FastCV_Extension, ArithmOpTest, + ::testing::Combine(::testing::Values(perf::szVGA, perf::sz720p, perf::sz1080p), // sz + ::testing::Values(CV_8U, CV_16S), // depth + ::testing::Values(0,1))); // op type + +INSTANTIATE_TEST_CASE_P(FastCV_Extension, IntegrateYUVTest, + Values(perf::szVGA, perf::sz720p, perf::sz1080p)); // sz + }} // namespaces opencv_test, :: diff --git a/modules/fastcv/test/test_blur.cpp b/modules/fastcv/test/test_blur.cpp index 1dde0261f28..dd7aaacf54f 100644 --- a/modules/fastcv/test/test_blur.cpp +++ b/modules/fastcv/test/test_blur.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved. + * Copyright (c) 2024-2025 Qualcomm Innovation Center, Inc. All rights reserved. * SPDX-License-Identifier: Apache-2.0 */ @@ -108,6 +108,24 @@ TEST_P(SepFilter2DTest, accuracy) EXPECT_LT(num_diff_pixels, (src.rows+src.cols)*ksize); } +typedef testing::TestWithParam> NormalizeLocalBoxTest; + +TEST_P(NormalizeLocalBoxTest, accuracy) +{ + bool use_stddev = get<0>(GetParam()); + cv::Mat src, dst; + src = imread(cvtest::findDataFile("cv/shared/baboon.png"), cv::IMREAD_GRAYSCALE); + + cv::fastcv::normalizeLocalBox(src, dst, Size(5,5), use_stddev); + Scalar s = cv::mean(dst); + + if(use_stddev) + EXPECT_LT(s[0],1); + else + EXPECT_LT(s[0],50); +} + + INSTANTIATE_TEST_CASE_P(FastCV_Extension, GaussianBlurTest, Combine( /*image size*/ ::testing::Values(perf::szVGA, perf::sz720p, perf::sz1080p), /*image depth*/ ::testing::Values(CV_8U,CV_16S,CV_32S), @@ -126,4 +144,7 @@ INSTANTIATE_TEST_CASE_P(FastCV_Extension, SepFilter2DTest, Combine( /*kernel size*/ Values(3, 5, 7, 9, 11) )); +INSTANTIATE_TEST_CASE_P(FastCV_Extension, NormalizeLocalBoxTest, Values(0,1)); + + }} // namespaces opencv_test, :: \ No newline at end of file diff --git a/modules/fastcv/test/test_channel.cpp b/modules/fastcv/test/test_channel.cpp new file mode 100644 index 00000000000..b3ed86ecfed --- /dev/null +++ b/modules/fastcv/test/test_channel.cpp @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2025 Qualcomm Innovation Center, Inc. All rights reserved. + * SPDX-License-Identifier: Apache-2.0 +*/ + +#include "test_precomp.hpp" + +namespace opencv_test { namespace { + +typedef std::tuple ChannelMergeTestParams; +class ChannelMergeTest : public ::testing::TestWithParam {}; + +typedef std::tuple ChannelSplitTestParams; +class ChannelSplitTest : public ::testing::TestWithParam {}; + +TEST_P(ChannelMergeTest, accuracy) +{ + int depth = CV_8UC1; + Size sz = std::get<0>(GetParam()); + int count = std::get<1>(GetParam()); + std::vector src_mats; + + RNG& rng = cv::theRNG(); + + for(int i = 0; i < count; i++) + { + Mat tmp(sz, depth); + src_mats.push_back(tmp); + cvtest::randUni(rng, src_mats[i], Scalar::all(0), Scalar::all(127)); + } + + Mat dst; + cv::fastcv::merge(src_mats, dst); + + Mat ref; + cv::merge(src_mats, ref); + + double normInf = cvtest::norm(ref, dst, cv::NORM_INF); + + EXPECT_EQ(normInf, 0); +} + +TEST_P(ChannelSplitTest, accuracy) +{ + Size sz = std::get<0>(GetParam()); + int cn = std::get<1>(GetParam()); + std::vector dst_mats(cn), ref_mats(cn); + + RNG& rng = cv::theRNG(); + Mat src(sz, CV_MAKE_TYPE(CV_8U,cn)); + cvtest::randUni(rng, src, Scalar::all(0), Scalar::all(127)); + + cv::fastcv::split(src, dst_mats); + + cv::split(src, ref_mats); + + for(int i=0; i &detections, if (tracking_per_class_ && (detections[d].class_label != tracklets[t]->label)) continue; + // Check if RGB features are available + auto t_rgb_features = tracklets[t]->GetRgbFeatures(); + if (!t_rgb_features || t_rgb_features->empty()) { + continue; // Skip if no RGB features are available + } + // Find best match in rgb feature history float min_dist = 1000.0f; for (const auto &t_rgb_feature : *(tracklets[t]->GetRgbFeatures())) { diff --git a/modules/gapi/src/3rdparty/vasot/src/components/ot/tracklet.cpp b/modules/gapi/src/3rdparty/vasot/src/components/ot/tracklet.cpp index 62e8d10cf67..7327325a4a7 100644 --- a/modules/gapi/src/3rdparty/vasot/src/components/ot/tracklet.cpp +++ b/modules/gapi/src/3rdparty/vasot/src/components/ot/tracklet.cpp @@ -7,13 +7,15 @@ #include "tracklet.hpp" #include +#include namespace vas { namespace ot { Tracklet::Tracklet() : id(0), label(-1), association_idx(kNoMatchDetection), status(ST_DEAD), age(0), confidence(0.f), - occlusion_ratio(0.f), association_delta_t(0.f), association_fail_count(0) { + occlusion_ratio(0.f), association_delta_t(0.f), association_fail_count(0), + rgb_features_(std::make_shared>()) { } Tracklet::~Tracklet() { @@ -45,12 +47,13 @@ void Tracklet::RenewTrajectory(const cv::Rect2f &bounding_box) { trajectory_filtered.push_back(bounding_box); } -#define DEFINE_STRING_VAR(var_name, value) \ - std::stringstream __##var_name; \ - __##var_name << value; \ - std::string var_name = __##var_name.str(); +std::deque *Tracklet::GetRgbFeatures() { + return rgb_features_.get(); // Return the raw pointer from the shared_ptr +} -#define ROUND_F(value, scale) (round((value)*scale) / scale) +void Tracklet::AddRgbFeature(const cv::Mat &feature) { + rgb_features_->push_back(feature); +} std::string Tracklet::Serialize() const { #ifdef DUMP_OTAV @@ -97,10 +100,6 @@ std::string Tracklet::Serialize() const { #endif } -std::deque *Tracklet::GetRgbFeatures() { - return nullptr; -} - ZeroTermImagelessTracklet::ZeroTermImagelessTracklet() : Tracklet(), birth_count(1) { } diff --git a/modules/gapi/src/3rdparty/vasot/src/components/ot/tracklet.hpp b/modules/gapi/src/3rdparty/vasot/src/components/ot/tracklet.hpp index 762e3f6ea65..5282a183357 100644 --- a/modules/gapi/src/3rdparty/vasot/src/components/ot/tracklet.hpp +++ b/modules/gapi/src/3rdparty/vasot/src/components/ot/tracklet.hpp @@ -13,6 +13,7 @@ #include #include +#include namespace vas { namespace ot { @@ -45,6 +46,7 @@ class Tracklet { virtual void RenewTrajectory(const cv::Rect2f &bounding_box); virtual std::deque *GetRgbFeatures(); + void AddRgbFeature(const cv::Mat &feature); virtual std::string Serialize() const; // Returns key:value with comma separated format public: @@ -63,6 +65,9 @@ class Tracklet { std::deque trajectory_filtered; cv::Rect2f predicted; // Result from Kalman prediction. It is for debugging (OTAV) mutable std::vector otav_msg; // Messages for OTAV + +private: + std::shared_ptr> rgb_features_; }; class ZeroTermImagelessTracklet : public Tracklet { diff --git a/modules/matlab/CMakeLists.txt b/modules/matlab/CMakeLists.txt index 17fd8427445..0bc89cb1ea6 100644 --- a/modules/matlab/CMakeLists.txt +++ b/modules/matlab/CMakeLists.txt @@ -113,6 +113,8 @@ ocv_add_module(matlab BINDINGS opencv_calib opencv_3d opencv_stitching opencv_superres opencv_xfeatures2d + opencv_optflow + opencv_xphoto ) # get the commit information @@ -156,6 +158,8 @@ endforeach() # add extra headers by hand list(APPEND opencv_extra_hdrs "core=${OPENCV_MODULE_opencv_core_LOCATION}/include/opencv2/core/base.hpp") list(APPEND opencv_extra_hdrs "video=${OPENCV_MODULE_opencv_video_LOCATION}/include/opencv2/video/tracking.hpp") +list(APPEND opencv_extra_hdrs "optflow=${OPENCV_MODULE_opencv_optflow_LOCATION}/include/opencv2/optflow.hpp") + # pass the OPENCV_CXX_EXTRA_FLAGS through to the mex compiler # remove the visibility modifiers, so the mex gateway is visible diff --git a/modules/matlab/generator/parse_tree.py b/modules/matlab/generator/parse_tree.py index 0a7ef364841..41d6d24d123 100644 --- a/modules/matlab/generator/parse_tree.py +++ b/modules/matlab/generator/parse_tree.py @@ -1,4 +1,10 @@ -import collections +import sys +if sys.version_info >= (3, 10): + import collections.abc + IterableType = collections.abc.Iterable +else: + import collections + IterableType = collections.Iterable from textwrap import fill from filters import * try: @@ -371,7 +377,7 @@ def todict(obj): return obj elif isinstance(obj, dict): return dict((key, todict(val)) for key, val in obj.items()) - elif isinstance(obj, collections.Iterable): + elif isinstance(obj, IterableType): return [todict(val) for val in obj] elif hasattr(obj, '__dict__'): return todict(vars(obj)) diff --git a/modules/matlab/include/opencv2/matlab/bridge.hpp b/modules/matlab/include/opencv2/matlab/bridge.hpp index c6fe605b2a4..f0e947a7da0 100644 --- a/modules/matlab/include/opencv2/matlab/bridge.hpp +++ b/modules/matlab/include/opencv2/matlab/bridge.hpp @@ -55,6 +55,21 @@ #include #include #include +#include +#include + +/* This 'using' line was added in order to fix the following Error. + * Failed to compile currentUIFramework: + * modules/matlab/src/currentUIFramework.cpp: + * In function void mexFunction(int, mxArray**, int, const mxArray**) + * error: string was not declared in this scope + * string retval; in line 41 + * + * This error happens at the last stage of opencv build, when compiling the mex bindings + * TODO: This is NOT the optimal fix, and needs to be addressed + */ +using std::string; + namespace cv { namespace bridge { @@ -85,17 +100,21 @@ typedef cv::Ptr Ptr_AlignMTB; typedef cv::Ptr Ptr_CalibrateDebevec; typedef cv::Ptr Ptr_CalibrateRobertson; typedef cv::Ptr Ptr_DenseOpticalFlow; -typedef cv::Ptr Ptr_DualTVL1OpticalFlow; +typedef cv::Ptr Ptr_DualTVL1OpticalFlow; typedef cv::Ptr Ptr_MergeDebevec; typedef cv::Ptr Ptr_MergeMertens; typedef cv::Ptr Ptr_MergeRobertson; typedef cv::Ptr Ptr_Stitcher; typedef cv::Ptr Ptr_Tonemap; typedef cv::Ptr Ptr_TonemapDrago; -typedef cv::Ptr Ptr_TonemapDurand; +typedef cv::Ptr Ptr_TonemapDurand; typedef cv::Ptr Ptr_TonemapMantiuk; typedef cv::Ptr Ptr_TonemapReinhard; typedef cv::Ptr Ptr_float; +typedef cv::Ptr Ptr_GeneralizedHoughBallard; +typedef cv::Ptr Ptr_GeneralizedHoughGuil; + + // ---------------------------------------------------------------------------- // PREDECLARATIONS @@ -527,6 +546,15 @@ class Bridge { Bridge& operator=(const Ptr_float& ) { return *this; } Ptr_float toPtrFloat() { return Ptr_float(); } operator Ptr_float() { return toPtrFloat(); } + + // --------------------------- Ptr_GeneralizedHoughBallard -------------- + Bridge& operator=(const Ptr_GeneralizedHoughBallard& obj) { return *this; } + operator Ptr_GeneralizedHoughBallard() { return Ptr_GeneralizedHoughBallard(); } + + // --------------------------- Ptr_GeneralizedHoughGuil ---------------------- + Bridge& operator=(const Ptr_GeneralizedHoughGuil& obj) { return *this; } + operator Ptr_GeneralizedHoughGuil() { return Ptr_GeneralizedHoughGuil(); } + }; // class Bridge