From b66fe3b6d1496cdba4e0a5fcbd86e02f0b79e61c Mon Sep 17 00:00:00 2001 From: svcbuild Date: Mon, 8 Dec 2025 14:35:36 -0800 Subject: [PATCH 1/3] Added ConvertTo operator --- include/core/detail/casting.hpp | 2 +- include/core/image_format.hpp | 6 + include/kernels/device/convert_to_device.hpp | 47 +++ include/kernels/host/convert_to_host.hpp | 47 +++ include/op_convert_to.hpp | 74 +++++ include/roccv_operators.hpp | 3 +- python/include/operators/py_op_convert_to.hpp | 40 +++ python/src/main.cpp | 2 + python/src/operators/py_op_convert_to.cpp | 85 ++++++ src/op_convert_to.cpp | 156 ++++++++++ tests/roccv/cpp/test_op_convert_to.cpp | 271 ++++++++++++++++++ tests/roccv/python/test_op_convert_to.py | 50 ++++ 12 files changed, 781 insertions(+), 2 deletions(-) create mode 100644 include/kernels/device/convert_to_device.hpp create mode 100644 include/kernels/host/convert_to_host.hpp create mode 100644 include/op_convert_to.hpp create mode 100644 python/include/operators/py_op_convert_to.hpp create mode 100644 python/src/operators/py_op_convert_to.cpp create mode 100644 src/op_convert_to.cpp create mode 100644 tests/roccv/cpp/test_op_convert_to.cpp create mode 100644 tests/roccv/python/test_op_convert_to.py diff --git a/include/core/detail/casting.hpp b/include/core/detail/casting.hpp index 391c457a..ea21b0bc 100644 --- a/include/core/detail/casting.hpp +++ b/include/core/detail/casting.hpp @@ -61,7 +61,7 @@ __device__ __host__ T ScalarSaturateCast(U v) { // Any integral signed -> Any integral unsigned, big -> small return v <= static_cast(std::numeric_limits::min()) ? std::numeric_limits::min() - : (v >= static_cast(std::numeric_limits::max()) ? std::numeric_limits::max + : (v >= static_cast(std::numeric_limits::max()) ? std::numeric_limits::max() : static_cast(v)); } else { // All other cases fall into this diff --git a/include/core/image_format.hpp b/include/core/image_format.hpp index 66227da5..48dbc486 100644 --- a/include/core/image_format.hpp +++ b/include/core/image_format.hpp @@ -114,6 +114,12 @@ constexpr ImageFormat FMT_RGB32(eDataType::DATA_TYPE_U32, 3, eSwizzle::XYZW); // Single plane with interleaved RGBA 32-bit channel. constexpr ImageFormat FMT_RGBA32(eDataType::DATA_TYPE_U32, 4, eSwizzle::XYZW); +// Single plane with interleaved RGB signed 32-bit channel. +constexpr ImageFormat FMT_RGBs32(eDataType::DATA_TYPE_S32, 3, eSwizzle::XYZW); + +// Single plane with interleaved RGBA signed 32-bit channel. +constexpr ImageFormat FMT_RGBAs32(eDataType::DATA_TYPE_S32, 4, eSwizzle::XYZW); + // Single plane with interleaved RGB float32 channel. constexpr ImageFormat FMT_RGBf32(eDataType::DATA_TYPE_F32, 3, eSwizzle::XYZW); diff --git a/include/kernels/device/convert_to_device.hpp b/include/kernels/device/convert_to_device.hpp new file mode 100644 index 00000000..b2144218 --- /dev/null +++ b/include/kernels/device/convert_to_device.hpp @@ -0,0 +1,47 @@ +/** +Copyright (c) 2025 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#pragma once + +#include +#include "core/detail/casting.hpp" +#include +#include + +namespace Kernels { +namespace Device { +template +__global__ void convert_to(SrcWrapper input, DstWrapper output, DT_AB alpha, DT_AB beta) { + using namespace roccv::detail; // For RangeCast, NumElements, etc. + using dst_type = typename DstWrapper::ValueType; + + const int x = threadIdx.x + blockIdx.x * blockDim.x; + const int y = threadIdx.y + blockIdx.y * blockDim.y; + const int batch = blockIdx.z; + + if (x >= output.width() || y >= output.height() || batch >= output.batches()) return; + + output.at(batch, y, x, 0) = SaturateCast(alpha * (input.at(batch, y, x, 0)) + beta); + +} +} // namespace Device +} // namespace Kernels \ No newline at end of file diff --git a/include/kernels/host/convert_to_host.hpp b/include/kernels/host/convert_to_host.hpp new file mode 100644 index 00000000..9c368d21 --- /dev/null +++ b/include/kernels/host/convert_to_host.hpp @@ -0,0 +1,47 @@ +/** +Copyright (c) 2025 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#pragma once + +#include +#include "core/detail/casting.hpp" +#include +#include + +namespace Kernels { +namespace Host { +template +void convert_to(SrcWrapper input, DstWrapper output, DT_AB alpha, DT_AB beta) { + using namespace roccv::detail; // For RangeCast, NumElements, etc. + using dst_type = typename DstWrapper::ValueType; + + for (int batch = 0; batch < output.batches(); batch++) { +#pragma omp parallel for + for (int y = 0; y < output.height(); y++) { + for (int x = 0; x < output.width(); x++) { + output.at(batch, y, x, 0) = SaturateCast(alpha * (input.at(batch, y, x, 0)) + beta); + } + } + } +} +} // namespace Host +} // namespace Kernels \ No newline at end of file diff --git a/include/op_convert_to.hpp b/include/op_convert_to.hpp new file mode 100644 index 00000000..baddab00 --- /dev/null +++ b/include/op_convert_to.hpp @@ -0,0 +1,74 @@ +/** +Copyright (c) 2025 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ +#pragma once +#include +#include + +#include + +#include "core/tensor.hpp" + +namespace roccv { +/** + * @brief Class for managing the Warp Perspective operator. + * + */ +class ConvertTo final : public IOperator { + public: + /** + * @brief Construct a new Op Convert To object. The object can be used + * to convert the datatype of an image. + * outputs(x,y) = SaturateCast(alpha * inputs(x,y) + beta) + * + * Limitations: + * + * Input: + * Supported TensorLayout(s): [HWC, NHWC] + * Channels: [1, 2, 3, 4] + * Supported DataType(s): [U8, S8, U16, S16, S32, F32, F64] + * + * Output: + * Supported TensorLayout(s): [HWC, NHWC] + * Channels: [1, 2, 3, 4] + * Supported DataType(s): [U8, S8, U16, S16, S32, F32, F64] + * + * Input/Output dependency: + * + * Property | Input == Output + * -------------- | ------------- + * TensorLayout | Yes + * DataType | No + * Channels | Yes + * Width | Yes + * Height | Yes + * + * @param[in] stream The HIP stream to run this operator on. + * @param[in] input Input tensor with image data. + * @param[out] output Output tensor for storing modified image data. + * @param[in] alpha Scalar for output data. + * @param[in] beta Offset for the data. + * @param[in] device The device to run this operator on. (Default: GPU) + */ + void operator()(hipStream_t stream, const roccv::Tensor &input, const roccv::Tensor &output, + const double alpha, const double beta, const eDeviceType device = eDeviceType::GPU) const; +}; +} // namespace roccv \ No newline at end of file diff --git a/include/roccv_operators.hpp b/include/roccv_operators.hpp index 1a015a46..68621a7f 100644 --- a/include/roccv_operators.hpp +++ b/include/roccv_operators.hpp @@ -39,4 +39,5 @@ THE SOFTWARE. #include "op_rotate.hpp" #include "op_thresholding.hpp" #include "op_warp_affine.hpp" -#include "op_warp_perspective.hpp" \ No newline at end of file +#include "op_warp_perspective.hpp" +#include "op_convert_to.hpp" \ No newline at end of file diff --git a/python/include/operators/py_op_convert_to.hpp b/python/include/operators/py_op_convert_to.hpp new file mode 100644 index 00000000..3d0de16d --- /dev/null +++ b/python/include/operators/py_op_convert_to.hpp @@ -0,0 +1,40 @@ +/** +Copyright (c) 2025 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#pragma once + +#include +#include + +#include "py_stream.hpp" +#include "py_tensor.hpp" + +namespace py = pybind11; + +class PyOpConvertTo { + public: + static void Export(py::module& m); + static PyTensor Execute(PyTensor& input, eDataType dtype, double alpha, double beta, + std::optional> stream, eDeviceType device); + static void ExecuteInto(PyTensor& output, PyTensor& input, double alpha, double beta, + std::optional> stream, eDeviceType device); +}; \ No newline at end of file diff --git a/python/src/main.cpp b/python/src/main.cpp index ef697dab..a26c0f2b 100644 --- a/python/src/main.cpp +++ b/python/src/main.cpp @@ -41,6 +41,7 @@ THE SOFTWARE. #include "operators/py_op_thresholding.hpp" #include "operators/py_op_warp_affine.hpp" #include "operators/py_op_warp_perspective.hpp" +#include "operators/py_op_convert_to.hpp" #include "py_enums.hpp" #include "py_exception.hpp" #include "py_stream.hpp" @@ -76,4 +77,5 @@ PYBIND11_MODULE(rocpycv, m) { PyOpCopyMakeBorder::Export(m); PyOpCenterCrop::Export(m); PyOpHistogram::Export(m); + PyOpConvertTo::Export(m); } \ No newline at end of file diff --git a/python/src/operators/py_op_convert_to.cpp b/python/src/operators/py_op_convert_to.cpp new file mode 100644 index 00000000..bc2092d8 --- /dev/null +++ b/python/src/operators/py_op_convert_to.cpp @@ -0,0 +1,85 @@ +/** +Copyright (c) 2025 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include "operators/py_op_convert_to.hpp" + +#include + +PyTensor PyOpConvertTo::Execute(PyTensor& input, eDataType dtype, double alpha, double beta, + std::optional> stream, eDeviceType device) { + hipStream_t hipStream = stream.has_value() ? stream.value().get().getStream() : nullptr; + auto inputTensor = input.getTensor(); + auto outputTensor = std::make_shared(inputTensor->shape(), roccv::DataType(dtype), device); + + roccv::ConvertTo op; + op(hipStream, *inputTensor, *outputTensor, alpha, beta, device); + return PyTensor(outputTensor); +} + +void PyOpConvertTo::ExecuteInto(PyTensor& output, PyTensor& input, double alpha, double beta, + std::optional> stream, eDeviceType device) { + hipStream_t hipStream = stream.has_value() ? stream.value().get().getStream() : nullptr; + roccv::ConvertTo op; + op(hipStream, *input.getTensor(), *output.getTensor(), alpha, beta, device); +} + +void PyOpConvertTo::Export(py::module& m) { + using namespace py::literals; + m.def("convert_to", &PyOpConvertTo::Execute, "src"_a, "dtype"_a, "alpha"_a, "beta"_a, + "stream"_a = nullptr, "device"_a = eDeviceType::GPU, R"pbdoc( + + Executes the Convert To operation on the given HIP stream. + + See also: + Refer to the rocCV C++ API reference for more information on this operation. + + Args: + src (rocpycv.Tensor): Input tensor containing one or more images. + dtype (eDataType): Datatype of the output tensor. + alpha (double): Scalar for output data. + beta (double): Offset for the data. + stream (rocpycv.Stream, optional): HIP stream to run this operation on. + device (rocpycv.Device, optional): The device to run this operation on. Defaults to GPU. + + Returns: + rocpycv.Tensor: The output tensor. + )pbdoc"); + m.def("convert_to_into", &PyOpConvertTo::ExecuteInto, "dst"_a, "src"_a, "alpha"_a, "beta"_a, + "stream"_a = nullptr, "device"_a = eDeviceType::GPU, R"pbdoc( + + Executes the Convert To operation on the given HIP stream. + + See also: + Refer to the rocCV C++ API reference for more information on this operation. + + Args: + dst (rocpycv.Tensor): The output tensor with gamma correction applied. + src (rocpycv.Tensor): Input tensor containing one or more images. + alpha (double): Scalar for output data. + beta (double): Offset for the data. + stream (rocpycv.Stream, optional): HIP stream to run this operation on. + device (rocpycv.Device, optional): The device to run this operation on. Defaults to GPU. + + Returns: + None + )pbdoc"); +} \ No newline at end of file diff --git a/src/op_convert_to.cpp b/src/op_convert_to.cpp new file mode 100644 index 00000000..8aca15f6 --- /dev/null +++ b/src/op_convert_to.cpp @@ -0,0 +1,156 @@ +/** +Copyright (c) 2025 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ +#include "op_convert_to.hpp" + +#include + +#include +#include "core/wrappers/image_wrapper.hpp" +#include "common/validation_helpers.hpp" +#include "core/detail/casting.hpp" +#include "core/detail/type_traits.hpp" +#include "kernels/device/convert_to_device.hpp" +#include "kernels/host/convert_to_host.hpp" + +namespace roccv { + +template +void dispatch_convert_to_channels(hipStream_t stream, const Tensor &input, const Tensor &output, + const double alpha, const double beta, const eDeviceType device) { + + using SRC_DT_NC = detail::MakeType; + using DST_DT_NC = detail::MakeType; + + ImageWrapper inputWrapper(input); + ImageWrapper outputWrapper(output); + + using SRC_BT = detail::BaseType; + using DST_BT = detail::BaseType; + + using DT_AB = decltype(float() * SRC_BT() * DST_BT()); + + DT_AB alpha_ab = detail::SaturateCast(alpha); + DT_AB beta_ab = detail::SaturateCast(beta); + + // Launch CPU/GPU kernel depending on requested device type. + switch (device) { + case eDeviceType::GPU: { + dim3 block(64, 16); + dim3 grid((outputWrapper.width() + block.x - 1) / block.x, (outputWrapper.height() + block.y - 1) / block.y, + outputWrapper.batches()); + Kernels::Device::convert_to<<>>(inputWrapper, outputWrapper, alpha_ab, beta_ab); + break; + } + case eDeviceType::CPU: { + Kernels::Host::convert_to(inputWrapper, outputWrapper, alpha_ab, beta_ab); + break; + } + } +} + +template +void dispatch_convert_to_output_dtype(hipStream_t stream, const Tensor &input, const Tensor &output, + const double alpha, const double beta, const eDeviceType device) { + + int64_t channels = output.shape(output.layout().channels_index()); + // Select kernel dispatcher based on number of channels. + // clang-format off + static const std::array, 4> + funcs = {dispatch_convert_to_channels, dispatch_convert_to_channels, dispatch_convert_to_channels, dispatch_convert_to_channels}; + + + // clang-format on + + auto func = funcs.at(channels - 1); + if (func == 0) throw Exception("Not mapped to a defined function.", eStatusType::INVALID_OPERATION); + func(stream, input, output, alpha, beta, device); +} + +template +void dispatch_convert_to_input_dtype(hipStream_t stream, const Tensor &input, const Tensor &output, + const double alpha, const double beta, const eDeviceType device) { + + eDataType output_dtype = output.dtype().etype(); + + // Select kernel dispatcher based on a base input datatype. + // clang-format off + static const std::unordered_map> + funcs = { + {eDataType::DATA_TYPE_U8, dispatch_convert_to_output_dtype}, + {eDataType::DATA_TYPE_S8, dispatch_convert_to_output_dtype}, + {eDataType::DATA_TYPE_U16, dispatch_convert_to_output_dtype}, + {eDataType::DATA_TYPE_S16, dispatch_convert_to_output_dtype}, + {eDataType::DATA_TYPE_S32, dispatch_convert_to_output_dtype}, + {eDataType::DATA_TYPE_F32, dispatch_convert_to_output_dtype}, + {eDataType::DATA_TYPE_F64, dispatch_convert_to_output_dtype} + }; + // clang-format on + // std make pair possibly needed + auto func = funcs.at(output_dtype); + if (func == 0) throw Exception("Not mapped to a defined function.", eStatusType::INVALID_OPERATION); + func(stream, input, output, alpha, beta, device); + +} + +void ConvertTo::operator()(hipStream_t stream, const Tensor &input, const Tensor &output, + const double alpha, const double beta, const eDeviceType device) const { + + // Validate input tensor + CHECK_TENSOR_DEVICE(input, device); + CHECK_TENSOR_DATATYPES(input, DATA_TYPE_S8, DATA_TYPE_U8, DATA_TYPE_U16, DATA_TYPE_S16, + DATA_TYPE_S32, DATA_TYPE_F32, DATA_TYPE_F64); + CHECK_TENSOR_DATATYPES(output, DATA_TYPE_S8, DATA_TYPE_U8, DATA_TYPE_U16, DATA_TYPE_S16, + DATA_TYPE_S32, DATA_TYPE_F32, DATA_TYPE_F64); + CHECK_TENSOR_LAYOUT(input, TENSOR_LAYOUT_HWC, TENSOR_LAYOUT_NHWC); + CHECK_TENSOR_CHANNELS(input, 1, 2, 3, 4); + + eDataType input_dtype = input.dtype().etype(); + int64_t channels = input.shape(input.layout().channels_index()); + + // Validate output tensor + CHECK_TENSOR_COMPARISON(input.device() == output.device()); + CHECK_TENSOR_COMPARISON(output.shape(output.layout().channels_index()) == channels); + CHECK_TENSOR_COMPARISON(output.layout() == input.layout()); + if (output.layout().batch_index() != -1) { + CHECK_TENSOR_COMPARISON(output.shape(output.layout().batch_index()) == + input.shape(input.layout().batch_index())); + } + + // Select kernel dispatcher based on a base input datatype. + // clang-format off + static const std::unordered_map> + funcs = { + {eDataType::DATA_TYPE_U8, dispatch_convert_to_input_dtype}, + {eDataType::DATA_TYPE_S8, dispatch_convert_to_input_dtype}, + {eDataType::DATA_TYPE_U16, dispatch_convert_to_input_dtype}, + {eDataType::DATA_TYPE_S16, dispatch_convert_to_input_dtype}, + {eDataType::DATA_TYPE_S32, dispatch_convert_to_input_dtype}, + {eDataType::DATA_TYPE_F32, dispatch_convert_to_input_dtype}, + {eDataType::DATA_TYPE_F64, dispatch_convert_to_input_dtype} + }; + // clang-format on + // std make pair possibly needed + auto func = funcs.at(input_dtype); + if (func == 0) throw Exception("Not mapped to a defined function.", eStatusType::INVALID_OPERATION); + func(stream, input, output, alpha, beta, device); +} +} // namespace roccv \ No newline at end of file diff --git a/tests/roccv/cpp/test_op_convert_to.cpp b/tests/roccv/cpp/test_op_convert_to.cpp new file mode 100644 index 00000000..c354d0dd --- /dev/null +++ b/tests/roccv/cpp/test_op_convert_to.cpp @@ -0,0 +1,271 @@ +/** +Copyright (c) 2025 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include +#include +#include +#include +#include +#include "test_helpers.hpp" + +using namespace roccv; +using namespace roccv::tests; + +// Keep all non-entrypoint functions in an anonymous namespace to prevent redefinition errors across translation units. +namespace { + +/** + * @brief Verified golden C++ model for the ConvertTo operation. + * + * @tparam T Vectorized datatype of the image's pixels. + * @tparam BT Base type of the image's data. + * @param[in] input An input vector containing image data. + * @param[in] batchSize The number of images in the batch. + * @param[in] width Image width. + * @param[in] height Image height. + * @param[in] channels Number of channels in the image. + * @param[in] alpha Scalar for output data. + * @param[in] beta Offset for the data. + * @return Vector containing the results of the operation. + */ +template ,typename BT_DEST = detail::BaseType> +std::vector GoldenConvertTo(std::vector& input, int32_t batchSize, int32_t width, int32_t height, double alpha, double beta) { + // Create an output vector the same size as the input vector + std::vector output(input.size()); + + // Wrap input/output vectors for simplified data access + ImageWrapper src(input, batchSize, width, height); + ImageWrapper dst(output, batchSize, width, height); + + using AB_DT = decltype(float() * BT_SRC() * BT_DEST()); + + AB_DT alpha_dt = detail::SaturateCast(alpha); + AB_DT beta_dt = detail::SaturateCast(beta); + + for (int b = 0; b < batchSize; ++b) { + for (int y = 0; y < height; ++y) { + for (int x = 0; x < width; ++x) { + dst.at(b, y, x, 0) = detail::SaturateCast(alpha_dt * (src.at(b, y, x, 0)) + beta_dt); + } + } + } + return output; +} + +template , typename BT_DEST = detail::BaseType> +void TestCorrectness(int batchSize, int width, int height, ImageFormat inFormat, ImageFormat outFormat, double alpha, double beta, eDeviceType device) { + // Create input and output tensor based on test parameters + Tensor input(batchSize, {width, height}, inFormat, device); + Tensor output(batchSize, {width, height}, outFormat, device); + + // Create a vector and fill it with random data. + std::vector inputData(input.shape().size()); + FillVector(inputData); + + // Copy generated input data into input tensor + CopyVectorIntoTensor(input, inputData); + + // Calculate golden output reference + std::vector ref = GoldenConvertTo(inputData, batchSize, width, height, alpha, beta); + + // Run roccv::Convert To operator to obtain actual results + hipStream_t stream; + HIP_VALIDATE_NO_ERRORS(hipStreamCreate(&stream)); + + ConvertTo op; + op(stream, input, output, alpha, beta, device); + HIP_VALIDATE_NO_ERRORS(hipStreamSynchronize(stream)); + HIP_VALIDATE_NO_ERRORS(hipStreamDestroy(stream)); + + // Copy data from output tensor into a host allocated vector + std::vector result(output.shape().size()); + CopyTensorIntoVector(result, output); + + // Compare data in actual output versus the generated golden reference image + CompareVectors(result, ref); +} + +} // namespace + +eTestStatusType test_op_convert_to(int argc, char** argv) { + TEST_CASES_BEGIN(); + + // CPU correctness tests + // 1 Channel + TEST_CASE((TestCorrectness(1, 480, 360, FMT_U8, FMT_U8, 1.2, 10.2, eDeviceType::CPU))); + TEST_CASE((TestCorrectness(1, 480, 360, FMT_S8, FMT_S8, 1.2, 10.2, eDeviceType::CPU))); + TEST_CASE((TestCorrectness(1, 480, 360, FMT_U8, FMT_S8, 1.2, 10.2, eDeviceType::CPU))); + TEST_CASE((TestCorrectness(1, 480, 360, FMT_S8, FMT_U8, 1.2, 10.2, eDeviceType::CPU))); + TEST_CASE((TestCorrectness(1, 480, 360, FMT_U8, FMT_U16, 1.2, 10.2, eDeviceType::CPU))); + TEST_CASE((TestCorrectness(1, 480, 360, FMT_S8, FMT_S16, 1.2, 10.2, eDeviceType::CPU))); + TEST_CASE((TestCorrectness(1, 480, 360, FMT_U16, FMT_U8, 1.2, 10.2, eDeviceType::CPU))); + TEST_CASE((TestCorrectness(1, 480, 360, FMT_S16, FMT_S8, 1.2, 10.2, eDeviceType::CPU))); + TEST_CASE((TestCorrectness(1, 480, 360, FMT_U16, FMT_S8, 1.2, 10.2, eDeviceType::CPU))); + TEST_CASE((TestCorrectness(1, 480, 360, FMT_S16, FMT_U8, 1.2, 10.2, eDeviceType::CPU))); + TEST_CASE((TestCorrectness(1, 480, 360, FMT_U16, FMT_U16, 1.2, 10.2, eDeviceType::CPU))); + TEST_CASE((TestCorrectness(1, 480, 360, FMT_S16, FMT_S16, 1.2, 10.2, eDeviceType::CPU))); + TEST_CASE((TestCorrectness(1, 480, 360, FMT_U16, FMT_S16, 1.2, 10.2, eDeviceType::CPU))); + TEST_CASE((TestCorrectness(1, 480, 360, FMT_S16, FMT_U16, 1.2, 10.2, eDeviceType::CPU))); + + TEST_CASE((TestCorrectness(1, 480, 360, FMT_U8, FMT_S32, 1.2, 10.2, eDeviceType::CPU))); + TEST_CASE((TestCorrectness(1, 480, 360, FMT_S8, FMT_S32, 1.2, 10.2, eDeviceType::CPU))); + TEST_CASE((TestCorrectness(1, 480, 360, FMT_S32, FMT_U8, 1.2, 10.2, eDeviceType::CPU))); + TEST_CASE((TestCorrectness(1, 480, 360, FMT_S32, FMT_S8, 1.2, 10.2, eDeviceType::CPU))); + + TEST_CASE((TestCorrectness(1, 480, 360, FMT_U8, FMT_F32, 1.2, 10.2, eDeviceType::CPU))); + TEST_CASE((TestCorrectness(1, 480, 360, FMT_S8, FMT_F32, 1.2, 10.2, eDeviceType::CPU))); + TEST_CASE((TestCorrectness(1, 480, 360, FMT_F32, FMT_U8, 1.2, 10.2, eDeviceType::CPU))); + TEST_CASE((TestCorrectness(1, 480, 360, FMT_F32, FMT_S8, 1.2, 10.2, eDeviceType::CPU))); + + // 3 Channels + TEST_CASE((TestCorrectness(1, 480, 360, FMT_RGB8, FMT_RGB8, 1.2, 10.2, eDeviceType::CPU))); + TEST_CASE((TestCorrectness(1, 480, 360, FMT_RGBs8, FMT_RGBs8, 1.2, 10.2, eDeviceType::CPU))); + TEST_CASE((TestCorrectness(1, 480, 360, FMT_RGB8, FMT_RGBs8, 1.2, 10.2, eDeviceType::CPU))); + TEST_CASE((TestCorrectness(1, 480, 360, FMT_RGBs8, FMT_RGB8, 1.2, 10.2, eDeviceType::CPU))); + TEST_CASE((TestCorrectness(1, 480, 360, FMT_RGB8, FMT_RGB16, 1.2, 10.2, eDeviceType::CPU))); + TEST_CASE((TestCorrectness(1, 480, 360, FMT_RGBs8, FMT_RGBs16, 1.2, 10.2, eDeviceType::CPU))); + TEST_CASE((TestCorrectness(1, 480, 360, FMT_RGB16, FMT_RGB8, 1.2, 10.2, eDeviceType::CPU))); + TEST_CASE((TestCorrectness(1, 480, 360, FMT_RGBs16, FMT_RGBs8, 1.2, 10.2, eDeviceType::CPU))); + TEST_CASE((TestCorrectness(1, 480, 360, FMT_RGB16, FMT_RGBs8, 1.2, 10.2, eDeviceType::CPU))); + TEST_CASE((TestCorrectness(1, 480, 360, FMT_RGBs16, FMT_RGB8, 1.2, 10.2, eDeviceType::CPU))); + TEST_CASE((TestCorrectness(1, 480, 360, FMT_RGB16, FMT_RGB16, 1.2, 10.2, eDeviceType::CPU))); + TEST_CASE((TestCorrectness(1, 480, 360, FMT_RGBs16, FMT_RGBs16, 1.2, 10.2, eDeviceType::CPU))); + TEST_CASE((TestCorrectness(1, 480, 360, FMT_RGB16, FMT_RGBs16, 1.2, 10.2, eDeviceType::CPU))); + TEST_CASE((TestCorrectness(1, 480, 360, FMT_RGBs16, FMT_RGB16, 1.2, 10.2, eDeviceType::CPU))); + + TEST_CASE((TestCorrectness(1, 480, 360, FMT_RGB8, FMT_RGBs32, 1.2, 10.2, eDeviceType::CPU))); + TEST_CASE((TestCorrectness(1, 480, 360, FMT_RGBs8, FMT_RGBs32, 1.2, 10.2, eDeviceType::CPU))); + TEST_CASE((TestCorrectness(1, 480, 360, FMT_RGBs32, FMT_RGB8, 1.2, 10.2, eDeviceType::CPU))); + TEST_CASE((TestCorrectness(1, 480, 360, FMT_RGBs32, FMT_RGBs8, 1.2, 10.2, eDeviceType::CPU))); + + TEST_CASE((TestCorrectness(1, 480, 360, FMT_RGB8, FMT_RGBf32, 1.2, 10.2, eDeviceType::CPU))); + TEST_CASE((TestCorrectness(1, 480, 360, FMT_RGBs8, FMT_RGBf32, 1.2, 10.2, eDeviceType::CPU))); + TEST_CASE((TestCorrectness(1, 480, 360, FMT_RGBf32, FMT_RGB8, 1.2, 10.2, eDeviceType::CPU))); + TEST_CASE((TestCorrectness(1, 480, 360, FMT_RGBf32, FMT_RGBs8, 1.2, 10.2, eDeviceType::CPU))); + + // 4 Channels + TEST_CASE((TestCorrectness(1, 480, 360, FMT_RGBA8, FMT_RGBA8, 1.2, 10.2, eDeviceType::CPU))); + TEST_CASE((TestCorrectness(1, 480, 360, FMT_RGBAs8, FMT_RGBAs8, 1.2, 10.2, eDeviceType::CPU))); + TEST_CASE((TestCorrectness(1, 480, 360, FMT_RGBA8, FMT_RGBAs8, 1.2, 10.2, eDeviceType::CPU))); + TEST_CASE((TestCorrectness(1, 480, 360, FMT_RGBAs8, FMT_RGBA8, 1.2, 10.2, eDeviceType::CPU))); + TEST_CASE((TestCorrectness(1, 480, 360, FMT_RGBA8, FMT_RGBA16, 1.2, 10.2, eDeviceType::CPU))); + TEST_CASE((TestCorrectness(1, 480, 360, FMT_RGBAs8, FMT_RGBAs16, 1.2, 10.2, eDeviceType::CPU))); + TEST_CASE((TestCorrectness(1, 480, 360, FMT_RGBA16, FMT_RGBA8, 1.2, 10.2, eDeviceType::CPU))); + TEST_CASE((TestCorrectness(1, 480, 360, FMT_RGBAs16, FMT_RGBAs8, 1.2, 10.2, eDeviceType::CPU))); + TEST_CASE((TestCorrectness(1, 480, 360, FMT_RGBA16, FMT_RGBAs8, 1.2, 10.2, eDeviceType::CPU))); + TEST_CASE((TestCorrectness(1, 480, 360, FMT_RGBAs16, FMT_RGBA8, 1.2, 10.2, eDeviceType::CPU))); + TEST_CASE((TestCorrectness(1, 480, 360, FMT_RGBA16, FMT_RGBA16, 1.2, 10.2, eDeviceType::CPU))); + TEST_CASE((TestCorrectness(1, 480, 360, FMT_RGBAs16, FMT_RGBAs16, 1.2, 10.2, eDeviceType::CPU))); + TEST_CASE((TestCorrectness(1, 480, 360, FMT_RGBA16, FMT_RGBAs16, 1.2, 10.2, eDeviceType::CPU))); + TEST_CASE((TestCorrectness(1, 480, 360, FMT_RGBAs16, FMT_RGBA16, 1.2, 10.2, eDeviceType::CPU))); + + TEST_CASE((TestCorrectness(1, 480, 360, FMT_RGBA8, FMT_RGBAs32, 1.2, 10.2, eDeviceType::CPU))); + TEST_CASE((TestCorrectness(1, 480, 360, FMT_RGBAs8, FMT_RGBAs32, 1.2, 10.2, eDeviceType::CPU))); + TEST_CASE((TestCorrectness(1, 480, 360, FMT_RGBAs32, FMT_RGBA8, 1.2, 10.2, eDeviceType::CPU))); + TEST_CASE((TestCorrectness(1, 480, 360, FMT_RGBAs32, FMT_RGBAs8, 1.2, 10.2, eDeviceType::CPU))); + + TEST_CASE((TestCorrectness(1, 480, 360, FMT_RGBA8, FMT_RGBAf32, 1.2, 10.2, eDeviceType::CPU))); + TEST_CASE((TestCorrectness(1, 480, 360, FMT_RGBAs8, FMT_RGBAf32, 1.2, 10.2, eDeviceType::CPU))); + TEST_CASE((TestCorrectness(1, 480, 360, FMT_RGBAf32, FMT_RGBA8, 1.2, 10.2, eDeviceType::CPU))); + TEST_CASE((TestCorrectness(1, 480, 360, FMT_RGBAf32, FMT_RGBAs8, 1.2, 10.2, eDeviceType::CPU))); + + // GPU Correctness Tests + // 1 Channels + TEST_CASE((TestCorrectness(1, 480, 360, FMT_U8, FMT_U8, 1.2, 10.2, eDeviceType::GPU))); + TEST_CASE((TestCorrectness(1, 480, 360, FMT_S8, FMT_S8, 1.2, 10.2, eDeviceType::GPU))); + TEST_CASE((TestCorrectness(1, 480, 360, FMT_U8, FMT_S8, 1.2, 10.2, eDeviceType::GPU))); + TEST_CASE((TestCorrectness(1, 480, 360, FMT_S8, FMT_U8, 1.2, 10.2, eDeviceType::GPU))); + TEST_CASE((TestCorrectness(1, 480, 360, FMT_U8, FMT_U16, 1.2, 10.2, eDeviceType::GPU))); + TEST_CASE((TestCorrectness(1, 480, 360, FMT_S8, FMT_S16, 1.2, 10.2, eDeviceType::GPU))); + TEST_CASE((TestCorrectness(1, 480, 360, FMT_U16, FMT_U8, 1.2, 10.2, eDeviceType::GPU))); + TEST_CASE((TestCorrectness(1, 480, 360, FMT_S16, FMT_S8, 1.2, 10.2, eDeviceType::GPU))); + TEST_CASE((TestCorrectness(1, 480, 360, FMT_U16, FMT_S8, 1.2, 10.2, eDeviceType::GPU))); + TEST_CASE((TestCorrectness(1, 480, 360, FMT_S16, FMT_U8, 1.2, 10.2, eDeviceType::GPU))); + TEST_CASE((TestCorrectness(1, 480, 360, FMT_U16, FMT_U16, 1.2, 10.2, eDeviceType::GPU))); + TEST_CASE((TestCorrectness(1, 480, 360, FMT_S16, FMT_S16, 1.2, 10.2, eDeviceType::GPU))); + TEST_CASE((TestCorrectness(1, 480, 360, FMT_U16, FMT_S16, 1.2, 10.2, eDeviceType::GPU))); + TEST_CASE((TestCorrectness(1, 480, 360, FMT_S16, FMT_U16, 1.2, 10.2, eDeviceType::GPU))); + + TEST_CASE((TestCorrectness(1, 480, 360, FMT_U8, FMT_S32, 1.2, 10.2, eDeviceType::GPU))); + TEST_CASE((TestCorrectness(1, 480, 360, FMT_S8, FMT_S32, 1.2, 10.2, eDeviceType::GPU))); + TEST_CASE((TestCorrectness(1, 480, 360, FMT_S32, FMT_U8, 1.2, 10.2, eDeviceType::GPU))); + TEST_CASE((TestCorrectness(1, 480, 360, FMT_S32, FMT_S8, 1.2, 10.2, eDeviceType::GPU))); + + TEST_CASE((TestCorrectness(1, 480, 360, FMT_U8, FMT_F32, 1.2, 10.2, eDeviceType::GPU))); + TEST_CASE((TestCorrectness(1, 480, 360, FMT_S8, FMT_F32, 1.2, 10.2, eDeviceType::GPU))); + TEST_CASE((TestCorrectness(1, 480, 360, FMT_F32, FMT_U8, 1.2, 10.2, eDeviceType::GPU))); + TEST_CASE((TestCorrectness(1, 480, 360, FMT_F32, FMT_S8, 1.2, 10.2, eDeviceType::GPU))); + + // 3 Channels + TEST_CASE((TestCorrectness(1, 480, 360, FMT_RGB8, FMT_RGB8, 1.2, 10.2, eDeviceType::GPU))); + TEST_CASE((TestCorrectness(1, 480, 360, FMT_RGBs8, FMT_RGBs8, 1.2, 10.2, eDeviceType::GPU))); + TEST_CASE((TestCorrectness(1, 480, 360, FMT_RGB8, FMT_RGBs8, 1.2, 10.2, eDeviceType::GPU))); + TEST_CASE((TestCorrectness(1, 480, 360, FMT_RGBs8, FMT_RGB8, 1.2, 10.2, eDeviceType::GPU))); + TEST_CASE((TestCorrectness(1, 480, 360, FMT_RGB8, FMT_RGB16, 1.2, 10.2, eDeviceType::GPU))); + TEST_CASE((TestCorrectness(1, 480, 360, FMT_RGBs8, FMT_RGBs16, 1.2, 10.2, eDeviceType::GPU))); + TEST_CASE((TestCorrectness(1, 480, 360, FMT_RGB16, FMT_RGB8, 1.2, 10.2, eDeviceType::GPU))); + TEST_CASE((TestCorrectness(1, 480, 360, FMT_RGBs16, FMT_RGBs8, 1.2, 10.2, eDeviceType::GPU))); + TEST_CASE((TestCorrectness(1, 480, 360, FMT_RGB16, FMT_RGBs8, 1.2, 10.2, eDeviceType::GPU))); + TEST_CASE((TestCorrectness(1, 480, 360, FMT_RGBs16, FMT_RGB8, 1.2, 10.2, eDeviceType::GPU))); + TEST_CASE((TestCorrectness(1, 480, 360, FMT_RGB16, FMT_RGB16, 1.2, 10.2, eDeviceType::GPU))); + TEST_CASE((TestCorrectness(1, 480, 360, FMT_RGBs16, FMT_RGBs16, 1.2, 10.2, eDeviceType::GPU))); + TEST_CASE((TestCorrectness(1, 480, 360, FMT_RGB16, FMT_RGBs16, 1.2, 10.2, eDeviceType::GPU))); + TEST_CASE((TestCorrectness(1, 480, 360, FMT_RGBs16, FMT_RGB16, 1.2, 10.2, eDeviceType::GPU))); + + TEST_CASE((TestCorrectness(1, 480, 360, FMT_RGB8, FMT_RGBs32, 1.2, 10.2, eDeviceType::GPU))); + TEST_CASE((TestCorrectness(1, 480, 360, FMT_RGBs8, FMT_RGBs32, 1.2, 10.2, eDeviceType::GPU))); + TEST_CASE((TestCorrectness(1, 480, 360, FMT_RGBs32, FMT_RGB8, 1.2, 10.2, eDeviceType::GPU))); + TEST_CASE((TestCorrectness(1, 480, 360, FMT_RGBs32, FMT_RGBs8, 1.2, 10.2, eDeviceType::GPU))); + + TEST_CASE((TestCorrectness(1, 480, 360, FMT_RGB8, FMT_RGBf32, 1.2, 10.2, eDeviceType::GPU))); + TEST_CASE((TestCorrectness(1, 480, 360, FMT_RGBs8, FMT_RGBf32, 1.2, 10.2, eDeviceType::GPU))); + TEST_CASE((TestCorrectness(1, 480, 360, FMT_RGBf32, FMT_RGB8, 1.2, 10.2, eDeviceType::GPU))); + TEST_CASE((TestCorrectness(1, 480, 360, FMT_RGBf32, FMT_RGBs8, 1.2, 10.2, eDeviceType::GPU))); + + // 4 Channels + TEST_CASE((TestCorrectness(1, 480, 360, FMT_RGBA8, FMT_RGBA8, 1.2, 10.2, eDeviceType::GPU))); + TEST_CASE((TestCorrectness(1, 480, 360, FMT_RGBAs8, FMT_RGBAs8, 1.2, 10.2, eDeviceType::GPU))); + TEST_CASE((TestCorrectness(1, 480, 360, FMT_RGBA8, FMT_RGBAs8, 1.2, 10.2, eDeviceType::GPU))); + TEST_CASE((TestCorrectness(1, 480, 360, FMT_RGBAs8, FMT_RGBA8, 1.2, 10.2, eDeviceType::GPU))); + TEST_CASE((TestCorrectness(1, 480, 360, FMT_RGBA8, FMT_RGBA16, 1.2, 10.2, eDeviceType::GPU))); + TEST_CASE((TestCorrectness(1, 480, 360, FMT_RGBAs8, FMT_RGBAs16, 1.2, 10.2, eDeviceType::GPU))); + TEST_CASE((TestCorrectness(1, 480, 360, FMT_RGBA16, FMT_RGBA8, 1.2, 10.2, eDeviceType::GPU))); + TEST_CASE((TestCorrectness(1, 480, 360, FMT_RGBAs16, FMT_RGBAs8, 1.2, 10.2, eDeviceType::GPU))); + TEST_CASE((TestCorrectness(1, 480, 360, FMT_RGBA16, FMT_RGBAs8, 1.2, 10.2, eDeviceType::GPU))); + TEST_CASE((TestCorrectness(1, 480, 360, FMT_RGBAs16, FMT_RGBA8, 1.2, 10.2, eDeviceType::GPU))); + TEST_CASE((TestCorrectness(1, 480, 360, FMT_RGBA16, FMT_RGBA16, 1.2, 10.2, eDeviceType::GPU))); + TEST_CASE((TestCorrectness(1, 480, 360, FMT_RGBAs16, FMT_RGBAs16, 1.2, 10.2, eDeviceType::GPU))); + TEST_CASE((TestCorrectness(1, 480, 360, FMT_RGBA16, FMT_RGBAs16, 1.2, 10.2, eDeviceType::GPU))); + TEST_CASE((TestCorrectness(1, 480, 360, FMT_RGBAs16, FMT_RGBA16, 1.2, 10.2, eDeviceType::GPU))); + + TEST_CASE((TestCorrectness(1, 480, 360, FMT_RGBA8, FMT_RGBAs32, 1.2, 10.2, eDeviceType::GPU))); + TEST_CASE((TestCorrectness(1, 480, 360, FMT_RGBAs8, FMT_RGBAs32, 1.2, 10.2, eDeviceType::GPU))); + TEST_CASE((TestCorrectness(1, 480, 360, FMT_RGBAs32, FMT_RGBA8, 1.2, 10.2, eDeviceType::GPU))); + TEST_CASE((TestCorrectness(1, 480, 360, FMT_RGBAs32, FMT_RGBAs8, 1.2, 10.2, eDeviceType::GPU))); + + TEST_CASE((TestCorrectness(1, 480, 360, FMT_RGBA8, FMT_RGBAf32, 1.2, 10.2, eDeviceType::GPU))); + TEST_CASE((TestCorrectness(1, 480, 360, FMT_RGBAs8, FMT_RGBAf32, 1.2, 10.2, eDeviceType::GPU))); + TEST_CASE((TestCorrectness(1, 480, 360, FMT_RGBAf32, FMT_RGBA8, 1.2, 10.2, eDeviceType::GPU))); + TEST_CASE((TestCorrectness(1, 480, 360, FMT_RGBAf32, FMT_RGBAs8, 1.2, 10.2, eDeviceType::GPU))); + + TEST_CASES_END(); +} \ No newline at end of file diff --git a/tests/roccv/python/test_op_convert_to.py b/tests/roccv/python/test_op_convert_to.py new file mode 100644 index 00000000..1a4dfad8 --- /dev/null +++ b/tests/roccv/python/test_op_convert_to.py @@ -0,0 +1,50 @@ +# ############################################################################## +# Copyright (c) - 2025 Advanced Micro Devices, Inc. +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. +# +# ############################################################################## + +import pytest +import rocpycv + +from test_helpers import generate_tensor, compare_tensors + + +@pytest.mark.parametrize("device", [rocpycv.eDeviceType.GPU, rocpycv.eDeviceType.CPU]) +@pytest.mark.parametrize("dtype", [rocpycv.eDataType.U8, rocpycv.eDataType.S8, rocpycv.eDataType.U16, rocpycv.eDataType.S16, rocpycv.eDataType.S32, rocpycv.eDataType.F32]) +@pytest.mark.parametrize("out_dtype", [rocpycv.eDataType.U8, rocpycv.eDataType.S8, rocpycv.eDataType.U16, rocpycv.eDataType.S16, rocpycv.eDataType.S32, rocpycv.eDataType.F32]) +@pytest.mark.parametrize("channels", [1, 3, 4]) +@pytest.mark.parametrize("alpha", [1.2]) +@pytest.mark.parametrize("beta", [10.2]) +@pytest.mark.parametrize("samples,height,width", [ + (1, 45, 23), + (3, 67, 85), + (7, 25, 95) +]) +def test_op_convert_to(samples, height, width, channels, device, dtype, out_dtype, alpha, beta): + input = generate_tensor(samples, width, height, channels, dtype, device) + output_golden = rocpycv.Tensor([samples, height, width, channels], rocpycv.eTensorLayout.NHWC, out_dtype, device) + + stream = rocpycv.Stream() + rocpycv.convert_to_into(output_golden, input, alpha, beta, stream, device) + output = rocpycv.convert_to(input, out_dtype, alpha, beta, stream, device) + stream.synchronize() + + compare_tensors(output, output_golden) From 9360b53f5d6bec00e56edbc378c224f4d514b4ee Mon Sep 17 00:00:00 2001 From: svcbuild Date: Mon, 15 Dec 2025 13:02:03 -0800 Subject: [PATCH 2/3] added default arguments for alpha and beta --- include/op_convert_to.hpp | 6 +++--- python/src/operators/py_op_convert_to.cpp | 12 ++++++------ 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/include/op_convert_to.hpp b/include/op_convert_to.hpp index baddab00..3f4452b0 100644 --- a/include/op_convert_to.hpp +++ b/include/op_convert_to.hpp @@ -64,11 +64,11 @@ class ConvertTo final : public IOperator { * @param[in] stream The HIP stream to run this operator on. * @param[in] input Input tensor with image data. * @param[out] output Output tensor for storing modified image data. - * @param[in] alpha Scalar for output data. - * @param[in] beta Offset for the data. + * @param[in] alpha Scalar for output data. (Default: 1.0) + * @param[in] beta Offset for the data. (Default: 0.0) * @param[in] device The device to run this operator on. (Default: GPU) */ void operator()(hipStream_t stream, const roccv::Tensor &input, const roccv::Tensor &output, - const double alpha, const double beta, const eDeviceType device = eDeviceType::GPU) const; + const double alpha = 1.0, const double beta = 0.0, const eDeviceType device = eDeviceType::GPU) const; }; } // namespace roccv \ No newline at end of file diff --git a/python/src/operators/py_op_convert_to.cpp b/python/src/operators/py_op_convert_to.cpp index bc2092d8..e8828ec8 100644 --- a/python/src/operators/py_op_convert_to.cpp +++ b/python/src/operators/py_op_convert_to.cpp @@ -44,7 +44,7 @@ void PyOpConvertTo::ExecuteInto(PyTensor& output, PyTensor& input, double alpha, void PyOpConvertTo::Export(py::module& m) { using namespace py::literals; - m.def("convert_to", &PyOpConvertTo::Execute, "src"_a, "dtype"_a, "alpha"_a, "beta"_a, + m.def("convert_to", &PyOpConvertTo::Execute, "src"_a, "dtype"_a, "alpha"_a = 1.0, "beta"_a = 0.0, "stream"_a = nullptr, "device"_a = eDeviceType::GPU, R"pbdoc( Executes the Convert To operation on the given HIP stream. @@ -55,15 +55,15 @@ void PyOpConvertTo::Export(py::module& m) { Args: src (rocpycv.Tensor): Input tensor containing one or more images. dtype (eDataType): Datatype of the output tensor. - alpha (double): Scalar for output data. - beta (double): Offset for the data. + alpha (double, optional): Scalar for output data. Defaults to 1.0. + beta (double, optional): Offset for the data. Defaults to 0.0. stream (rocpycv.Stream, optional): HIP stream to run this operation on. device (rocpycv.Device, optional): The device to run this operation on. Defaults to GPU. Returns: rocpycv.Tensor: The output tensor. )pbdoc"); - m.def("convert_to_into", &PyOpConvertTo::ExecuteInto, "dst"_a, "src"_a, "alpha"_a, "beta"_a, + m.def("convert_to_into", &PyOpConvertTo::ExecuteInto, "dst"_a, "src"_a, "alpha"_a = 1.0, "beta"_a = 0.0, "stream"_a = nullptr, "device"_a = eDeviceType::GPU, R"pbdoc( Executes the Convert To operation on the given HIP stream. @@ -74,8 +74,8 @@ void PyOpConvertTo::Export(py::module& m) { Args: dst (rocpycv.Tensor): The output tensor with gamma correction applied. src (rocpycv.Tensor): Input tensor containing one or more images. - alpha (double): Scalar for output data. - beta (double): Offset for the data. + alpha (double): Scalar for output data. Defaults to 1.0. + beta (double): Offset for the data. Defaults to 0.0. stream (rocpycv.Stream, optional): HIP stream to run this operation on. device (rocpycv.Device, optional): The device to run this operation on. Defaults to GPU. From 184b90f4c5da22545b408a9eaf115ac24af81604 Mon Sep 17 00:00:00 2001 From: svcbuild Date: Mon, 15 Dec 2025 13:06:52 -0800 Subject: [PATCH 3/3] minor fix --- python/src/operators/py_op_convert_to.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/python/src/operators/py_op_convert_to.cpp b/python/src/operators/py_op_convert_to.cpp index e8828ec8..483903a1 100644 --- a/python/src/operators/py_op_convert_to.cpp +++ b/python/src/operators/py_op_convert_to.cpp @@ -74,8 +74,8 @@ void PyOpConvertTo::Export(py::module& m) { Args: dst (rocpycv.Tensor): The output tensor with gamma correction applied. src (rocpycv.Tensor): Input tensor containing one or more images. - alpha (double): Scalar for output data. Defaults to 1.0. - beta (double): Offset for the data. Defaults to 0.0. + alpha (double, optional): Scalar for output data. Defaults to 1.0. + beta (double, optional): Offset for the data. Defaults to 0.0. stream (rocpycv.Stream, optional): HIP stream to run this operation on. device (rocpycv.Device, optional): The device to run this operation on. Defaults to GPU.