From b66fe3b6d1496cdba4e0a5fcbd86e02f0b79e61c Mon Sep 17 00:00:00 2001
From: svcbuild <paveltc>
Date: Mon, 8 Dec 2025 14:35:36 -0800
Subject: [PATCH 1/3] Added ConvertTo operator

---
 include/core/detail/casting.hpp               |   2 +-
 include/core/image_format.hpp                 |   6 +
 include/kernels/device/convert_to_device.hpp  |  47 +++
 include/kernels/host/convert_to_host.hpp      |  47 +++
 include/op_convert_to.hpp                     |  74 +++++
 include/roccv_operators.hpp                   |   3 +-
 python/include/operators/py_op_convert_to.hpp |  40 +++
 python/src/main.cpp                           |   2 +
 python/src/operators/py_op_convert_to.cpp     |  85 ++++++
 src/op_convert_to.cpp                         | 156 ++++++++++
 tests/roccv/cpp/test_op_convert_to.cpp        | 271 ++++++++++++++++++
 tests/roccv/python/test_op_convert_to.py      |  50 ++++
 12 files changed, 781 insertions(+), 2 deletions(-)
 create mode 100644 include/kernels/device/convert_to_device.hpp
 create mode 100644 include/kernels/host/convert_to_host.hpp
 create mode 100644 include/op_convert_to.hpp
 create mode 100644 python/include/operators/py_op_convert_to.hpp
 create mode 100644 python/src/operators/py_op_convert_to.cpp
 create mode 100644 src/op_convert_to.cpp
 create mode 100644 tests/roccv/cpp/test_op_convert_to.cpp
 create mode 100644 tests/roccv/python/test_op_convert_to.py
diff --git a/include/core/detail/casting.hpp b/include/core/detail/casting.hpp
index 391c457a..ea21b0bc 100644
--- a/include/core/detail/casting.hpp
+++ b/include/core/detail/casting.hpp
@@ -61,7 +61,7 @@ __device__ __host__ T ScalarSaturateCast(U v) {
         // Any integral signed -> Any integral unsigned, big -> small
         return v <= static_cast<U>(std::numeric_limits<T>::min())
                    ? std::numeric_limits<T>::min()
-                   : (v >= static_cast<U>(std::numeric_limits<T>::max()) ? std::numeric_limits<T>::max
+                   : (v >= static_cast<U>(std::numeric_limits<T>::max()) ? std::numeric_limits<T>::max()
                                                                          : static_cast<T>(v));
     } else {
         // All other cases fall into this
diff --git a/include/core/image_format.hpp b/include/core/image_format.hpp
index 66227da5..48dbc486 100644
--- a/include/core/image_format.hpp
+++ b/include/core/image_format.hpp
@@ -114,6 +114,12 @@ constexpr ImageFormat FMT_RGB32(eDataType::DATA_TYPE_U32, 3, eSwizzle::XYZW);
 // Single plane with interleaved RGBA 32-bit channel.
 constexpr ImageFormat FMT_RGBA32(eDataType::DATA_TYPE_U32, 4, eSwizzle::XYZW);
 
+// Single plane with interleaved RGB signed 32-bit channel.
+constexpr ImageFormat FMT_RGBs32(eDataType::DATA_TYPE_S32, 3, eSwizzle::XYZW);
+
+// Single plane with interleaved RGBA signed 32-bit channel.
+constexpr ImageFormat FMT_RGBAs32(eDataType::DATA_TYPE_S32, 4, eSwizzle::XYZW);
+
 // Single plane with interleaved RGB float32 channel.
 constexpr ImageFormat FMT_RGBf32(eDataType::DATA_TYPE_F32, 3, eSwizzle::XYZW);
 
diff --git a/include/kernels/device/convert_to_device.hpp b/include/kernels/device/convert_to_device.hpp
new file mode 100644
index 00000000..b2144218
--- /dev/null
+++ b/include/kernels/device/convert_to_device.hpp
@@ -0,0 +1,47 @@
+/**
+Copyright (c) 2025 Advanced Micro Devices, Inc. All rights reserved.
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
+*/
+
+#pragma once
+
+#include <hip/hip_runtime.h>
+#include "core/detail/casting.hpp"
+#include <core/detail/type_traits.hpp>
+#include <core/wrappers/image_wrapper.hpp>
+
+namespace Kernels {
+namespace Device {
+template <typename SrcWrapper, typename DstWrapper, typename DT_AB>
+__global__ void convert_to(SrcWrapper input, DstWrapper output, DT_AB alpha, DT_AB beta) {
+    using namespace roccv::detail;  // For RangeCast, NumElements, etc.
+    using dst_type = typename DstWrapper::ValueType;
+
+    const int x = threadIdx.x + blockIdx.x * blockDim.x;
+    const int y = threadIdx.y + blockIdx.y * blockDim.y;
+    const int batch = blockIdx.z;
+
+    if (x >= output.width() || y >= output.height() || batch >= output.batches()) return;
+
+    output.at(batch, y, x, 0) = SaturateCast<dst_type>(alpha * (input.at(batch, y, x, 0)) + beta);
+
+}
+}   // namespace Device
+}   // namespace Kernels
\ No newline at end of file
diff --git a/include/kernels/host/convert_to_host.hpp b/include/kernels/host/convert_to_host.hpp
new file mode 100644
index 00000000..9c368d21
--- /dev/null
+++ b/include/kernels/host/convert_to_host.hpp
@@ -0,0 +1,47 @@
+/**
+Copyright (c) 2025 Advanced Micro Devices, Inc. All rights reserved.
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
+*/
+
+#pragma once
+
+#include <hip/hip_runtime.h>
+#include "core/detail/casting.hpp"
+#include <core/detail/type_traits.hpp>
+#include <core/wrappers/image_wrapper.hpp>
+
+namespace Kernels {
+namespace Host {
+template <typename SrcWrapper, typename DstWrapper, typename DT_AB>
+void convert_to(SrcWrapper input, DstWrapper output, DT_AB alpha, DT_AB beta) {
+    using namespace roccv::detail;  // For RangeCast, NumElements, etc.
+    using dst_type = typename DstWrapper::ValueType;
+    
+    for (int batch = 0; batch < output.batches(); batch++) {
+#pragma omp parallel for
+        for (int y = 0; y < output.height(); y++) {
+            for (int x = 0; x < output.width(); x++) {
+                output.at(batch, y, x, 0) = SaturateCast<dst_type>(alpha * (input.at(batch, y, x, 0)) + beta);
+            }
+        }
+    }
+}
+}  // namespace Host
+}  // namespace Kernels
\ No newline at end of file
diff --git a/include/op_convert_to.hpp b/include/op_convert_to.hpp
new file mode 100644
index 00000000..baddab00
--- /dev/null
+++ b/include/op_convert_to.hpp
@@ -0,0 +1,74 @@
+/**
+Copyright (c) 2025 Advanced Micro Devices, Inc. All rights reserved.
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
+*/
+#pragma once
+#include <hip/hip_runtime.h>
+#include <operator_types.h>
+
+#include <i_operator.hpp>
+
+#include "core/tensor.hpp"
+
+namespace roccv {
+/**
+ * @brief Class for managing the Warp Perspective operator.
+ *
+ */
+class ConvertTo final : public IOperator {
+   public:
+    /**
+     * @brief Construct a new Op Convert To object. The object can be used
+     * to convert the datatype of an image.
+     * outputs(x,y) = SaturateCast<out_Datatype>(alpha * inputs(x,y) + beta)
+     * 
+     * Limitations:
+     *
+     * Input:
+     *       Supported TensorLayout(s): [HWC, NHWC]
+     *                        Channels: [1, 2, 3, 4]
+     *       Supported DataType(s):     [U8, S8, U16, S16, S32, F32, F64]
+     *
+     * Output:
+     *       Supported TensorLayout(s): [HWC, NHWC]
+     *                        Channels: [1, 2, 3, 4]
+     *       Supported DataType(s):     [U8, S8, U16, S16, S32, F32, F64]
+     *
+     * Input/Output dependency:
+     *
+     *       Property      |  Input == Output
+     *      -------------- | -------------
+     *       TensorLayout  | Yes
+     *       DataType      | No
+     *       Channels      | Yes
+     *       Width         | Yes
+     *       Height        | Yes
+     *
+     * @param[in] stream The HIP stream to run this operator on.
+     * @param[in] input Input tensor with image data.
+     * @param[out] output  Output tensor for storing modified image data.
+     * @param[in] alpha Scalar for output data.
+     * @param[in] beta Offset for the data.
+     * @param[in] device The device to run this operator on. (Default: GPU)
+     */
+    void operator()(hipStream_t stream, const roccv::Tensor &input, const roccv::Tensor &output,
+                    const double alpha, const double beta, const eDeviceType device = eDeviceType::GPU) const;
+};
+}  // namespace roccv
\ No newline at end of file
diff --git a/include/roccv_operators.hpp b/include/roccv_operators.hpp
index 1a015a46..68621a7f 100644
--- a/include/roccv_operators.hpp
+++ b/include/roccv_operators.hpp
@@ -39,4 +39,5 @@ THE SOFTWARE.
 #include "op_rotate.hpp"
 #include "op_thresholding.hpp"
 #include "op_warp_affine.hpp"
-#include "op_warp_perspective.hpp"
\ No newline at end of file
+#include "op_warp_perspective.hpp"
+#include "op_convert_to.hpp"
\ No newline at end of file
diff --git a/python/include/operators/py_op_convert_to.hpp b/python/include/operators/py_op_convert_to.hpp
new file mode 100644
index 00000000..3d0de16d
--- /dev/null
+++ b/python/include/operators/py_op_convert_to.hpp
@@ -0,0 +1,40 @@
+/**
+Copyright (c) 2025 Advanced Micro Devices, Inc. All rights reserved.
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
+*/
+
+#pragma once
+
+#include <operator_types.h>
+#include <pybind11/pybind11.h>
+
+#include "py_stream.hpp"
+#include "py_tensor.hpp"
+
+namespace py = pybind11;
+
+class PyOpConvertTo {
+   public:
+    static void Export(py::module& m);
+    static PyTensor Execute(PyTensor& input, eDataType dtype, double alpha, double beta,
+                            std::optional<std::reference_wrapper<PyStream>> stream, eDeviceType device);
+    static void ExecuteInto(PyTensor& output, PyTensor& input, double alpha, double beta,
+                            std::optional<std::reference_wrapper<PyStream>> stream, eDeviceType device);
+};
\ No newline at end of file
diff --git a/python/src/main.cpp b/python/src/main.cpp
index ef697dab..a26c0f2b 100644
--- a/python/src/main.cpp
+++ b/python/src/main.cpp
@@ -41,6 +41,7 @@ THE SOFTWARE.
 #include "operators/py_op_thresholding.hpp"
 #include "operators/py_op_warp_affine.hpp"
 #include "operators/py_op_warp_perspective.hpp"
+#include "operators/py_op_convert_to.hpp"
 #include "py_enums.hpp"
 #include "py_exception.hpp"
 #include "py_stream.hpp"
@@ -76,4 +77,5 @@ PYBIND11_MODULE(rocpycv, m) {
     PyOpCopyMakeBorder::Export(m);
     PyOpCenterCrop::Export(m);
     PyOpHistogram::Export(m);
+    PyOpConvertTo::Export(m);
 }
\ No newline at end of file
diff --git a/python/src/operators/py_op_convert_to.cpp b/python/src/operators/py_op_convert_to.cpp
new file mode 100644
index 00000000..bc2092d8
--- /dev/null
+++ b/python/src/operators/py_op_convert_to.cpp
@@ -0,0 +1,85 @@
+/**
+Copyright (c) 2025 Advanced Micro Devices, Inc. All rights reserved.
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
+*/
+
+#include "operators/py_op_convert_to.hpp"
+
+#include <op_convert_to.hpp>
+
+PyTensor PyOpConvertTo::Execute(PyTensor& input, eDataType dtype, double alpha, double beta,
+                                        std::optional<std::reference_wrapper<PyStream>> stream, eDeviceType device) {
+    hipStream_t hipStream = stream.has_value() ? stream.value().get().getStream() : nullptr;
+    auto inputTensor = input.getTensor();  
+    auto outputTensor = std::make_shared<roccv::Tensor>(inputTensor->shape(), roccv::DataType(dtype), device);
+
+    roccv::ConvertTo op;
+    op(hipStream, *inputTensor, *outputTensor, alpha, beta, device);
+    return PyTensor(outputTensor);                                        
+}
+
+void PyOpConvertTo::ExecuteInto(PyTensor& output, PyTensor& input, double alpha, double beta,
+                                            std::optional<std::reference_wrapper<PyStream>> stream, eDeviceType device) {
+    hipStream_t hipStream = stream.has_value() ? stream.value().get().getStream() : nullptr;
+    roccv::ConvertTo op;
+    op(hipStream, *input.getTensor(), *output.getTensor(), alpha, beta, device);
+}
+
+void PyOpConvertTo::Export(py::module& m) {
+    using namespace py::literals;
+    m.def("convert_to", &PyOpConvertTo::Execute, "src"_a, "dtype"_a, "alpha"_a, "beta"_a, 
+                                                    "stream"_a = nullptr, "device"_a = eDeviceType::GPU, R"pbdoc(
+            
+            Executes the Convert To operation on the given HIP stream.
+
+            See also:
+                Refer to the rocCV C++ API reference for more information on this operation.
+            
+            Args:
+                src (rocpycv.Tensor): Input tensor containing one or more images.
+                dtype (eDataType): Datatype of the output tensor.
+                alpha (double): Scalar for output data.
+                beta (double): Offset for the data.
+                stream (rocpycv.Stream, optional): HIP stream to run this operation on.
+                device (rocpycv.Device, optional): The device to run this operation on. Defaults to GPU.
+
+            Returns:
+                rocpycv.Tensor: The output tensor.
+        )pbdoc");
+    m.def("convert_to_into", &PyOpConvertTo::ExecuteInto, "dst"_a, "src"_a, "alpha"_a, "beta"_a, 
+                                                    "stream"_a = nullptr, "device"_a = eDeviceType::GPU, R"pbdoc(
+            
+            Executes the Convert To operation on the given HIP stream.
+
+            See also:
+                Refer to the rocCV C++ API reference for more information on this operation.
+            
+            Args:
+                dst (rocpycv.Tensor): The output tensor with gamma correction applied.
+                src (rocpycv.Tensor): Input tensor containing one or more images.
+                alpha (double): Scalar for output data.
+                beta (double): Offset for the data.
+                stream (rocpycv.Stream, optional): HIP stream to run this operation on.
+                device (rocpycv.Device, optional): The device to run this operation on. Defaults to GPU.
+
+            Returns:
+                None
+        )pbdoc");
+}
\ No newline at end of file
diff --git a/src/op_convert_to.cpp b/src/op_convert_to.cpp
new file mode 100644
index 00000000..8aca15f6
--- /dev/null
+++ b/src/op_convert_to.cpp
@@ -0,0 +1,156 @@
+/**
+Copyright (c) 2025 Advanced Micro Devices, Inc. All rights reserved.
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
+*/
+#include "op_convert_to.hpp"
+
+#include <functional>
+
+#include <hip/hip_runtime.h>
+#include "core/wrappers/image_wrapper.hpp"
+#include "common/validation_helpers.hpp"
+#include "core/detail/casting.hpp"
+#include "core/detail/type_traits.hpp"
+#include "kernels/device/convert_to_device.hpp"
+#include "kernels/host/convert_to_host.hpp"
+
+namespace roccv {
+
+template <typename SRC_DT, typename DST_DT, int NC>
+void dispatch_convert_to_channels(hipStream_t stream, const Tensor &input, const Tensor &output,
+                                       const double alpha, const double beta, const eDeviceType device) {
+    
+    using SRC_DT_NC = detail::MakeType<SRC_DT, NC>;
+    using DST_DT_NC = detail::MakeType<DST_DT, NC>;
+
+    ImageWrapper<SRC_DT_NC> inputWrapper(input);
+    ImageWrapper<DST_DT_NC> outputWrapper(output);
+
+    using SRC_BT = detail::BaseType<SRC_DT>;
+    using DST_BT = detail::BaseType<DST_DT>;
+
+    using DT_AB = decltype(float() * SRC_BT() * DST_BT());
+    
+    DT_AB alpha_ab = detail::SaturateCast<DT_AB>(alpha);
+    DT_AB beta_ab = detail::SaturateCast<DT_AB>(beta);
+
+    // Launch CPU/GPU kernel depending on requested device type.
+    switch (device) {
+        case eDeviceType::GPU: {
+            dim3 block(64, 16);
+            dim3 grid((outputWrapper.width() + block.x - 1) / block.x, (outputWrapper.height() + block.y - 1) / block.y,
+                      outputWrapper.batches());
+            Kernels::Device::convert_to<<<grid, block, 0, stream>>>(inputWrapper, outputWrapper, alpha_ab, beta_ab);
+            break;
+        }
+        case eDeviceType::CPU: {
+            Kernels::Host::convert_to(inputWrapper, outputWrapper, alpha_ab, beta_ab);
+            break;
+        }
+    }
+}
+
+template <typename SRC_DT, typename DST_DT>
+void dispatch_convert_to_output_dtype(hipStream_t stream, const Tensor &input, const Tensor &output,
+                                       const double alpha, const double beta, const eDeviceType device) {
+
+    int64_t channels = output.shape(output.layout().channels_index());
+    // Select kernel dispatcher based on number of channels.
+    // clang-format off
+    static const std::array<std::function<void(hipStream_t, const Tensor &, const Tensor &, const double, const double, const eDeviceType)>, 4>
+        funcs = {dispatch_convert_to_channels<SRC_DT, DST_DT, 1>, dispatch_convert_to_channels<SRC_DT, DST_DT, 2>, dispatch_convert_to_channels<SRC_DT, DST_DT, 3>, dispatch_convert_to_channels<SRC_DT, DST_DT, 4>};
+        
+            
+    // clang-format on
+
+    auto func = funcs.at(channels - 1);
+    if (func == 0) throw Exception("Not mapped to a defined function.", eStatusType::INVALID_OPERATION);
+    func(stream, input, output, alpha, beta, device);
+}
+
+template <typename SRC_DT>
+void dispatch_convert_to_input_dtype(hipStream_t stream, const Tensor &input, const Tensor &output,
+                                       const double alpha, const double beta, const eDeviceType device) {
+    
+    eDataType output_dtype = output.dtype().etype();
+    
+    // Select kernel dispatcher based on a base input datatype.
+    // clang-format off
+    static const std::unordered_map<eDataType, std::function<void(hipStream_t, const Tensor &, const Tensor &, const double, const double, const eDeviceType)>>
+        funcs = {
+            {eDataType::DATA_TYPE_U8, dispatch_convert_to_output_dtype<SRC_DT, uchar>},
+            {eDataType::DATA_TYPE_S8,  dispatch_convert_to_output_dtype<SRC_DT, signed char>},
+            {eDataType::DATA_TYPE_U16,  dispatch_convert_to_output_dtype<SRC_DT, ushort>},
+            {eDataType::DATA_TYPE_S16,  dispatch_convert_to_output_dtype<SRC_DT, short>},
+            {eDataType::DATA_TYPE_S32,  dispatch_convert_to_output_dtype<SRC_DT, int>},
+            {eDataType::DATA_TYPE_F32, dispatch_convert_to_output_dtype<SRC_DT, float>},
+            {eDataType::DATA_TYPE_F64, dispatch_convert_to_output_dtype<SRC_DT, double>}
+        };
+    // clang-format on
+    // std make pair possibly needed
+    auto func = funcs.at(output_dtype);
+    if (func == 0) throw Exception("Not mapped to a defined function.", eStatusType::INVALID_OPERATION);
+    func(stream, input, output, alpha, beta, device);
+
+}
+
+void ConvertTo::operator()(hipStream_t stream, const Tensor &input, const Tensor &output,
+                                const double alpha, const double beta, const eDeviceType device) const {
+    
+    // Validate input tensor
+    CHECK_TENSOR_DEVICE(input, device);
+    CHECK_TENSOR_DATATYPES(input, DATA_TYPE_S8, DATA_TYPE_U8, DATA_TYPE_U16, DATA_TYPE_S16,
+                           DATA_TYPE_S32, DATA_TYPE_F32, DATA_TYPE_F64);
+    CHECK_TENSOR_DATATYPES(output, DATA_TYPE_S8, DATA_TYPE_U8, DATA_TYPE_U16, DATA_TYPE_S16,
+                           DATA_TYPE_S32, DATA_TYPE_F32, DATA_TYPE_F64);
+    CHECK_TENSOR_LAYOUT(input, TENSOR_LAYOUT_HWC, TENSOR_LAYOUT_NHWC);
+    CHECK_TENSOR_CHANNELS(input, 1, 2, 3, 4);
+
+    eDataType input_dtype = input.dtype().etype();
+    int64_t channels = input.shape(input.layout().channels_index());
+
+    // Validate output tensor
+    CHECK_TENSOR_COMPARISON(input.device() == output.device());
+    CHECK_TENSOR_COMPARISON(output.shape(output.layout().channels_index()) == channels);
+    CHECK_TENSOR_COMPARISON(output.layout() == input.layout());
+    if (output.layout().batch_index() != -1) {
+        CHECK_TENSOR_COMPARISON(output.shape(output.layout().batch_index()) ==
+                                input.shape(input.layout().batch_index()));
+    }
+
+    // Select kernel dispatcher based on a base input datatype.
+    // clang-format off
+    static const std::unordered_map<eDataType, std::function<void(hipStream_t, const Tensor &, const Tensor &, const double, const double, const eDeviceType)>>
+        funcs = {
+            {eDataType::DATA_TYPE_U8, dispatch_convert_to_input_dtype<uchar>},
+            {eDataType::DATA_TYPE_S8,  dispatch_convert_to_input_dtype<signed char>},
+            {eDataType::DATA_TYPE_U16,  dispatch_convert_to_input_dtype<ushort>},
+            {eDataType::DATA_TYPE_S16,  dispatch_convert_to_input_dtype<short>},
+            {eDataType::DATA_TYPE_S32,  dispatch_convert_to_input_dtype<int>},
+            {eDataType::DATA_TYPE_F32, dispatch_convert_to_input_dtype<float>},
+            {eDataType::DATA_TYPE_F64, dispatch_convert_to_input_dtype<double>}
+        };
+    // clang-format on
+    // std make pair possibly needed
+    auto func = funcs.at(input_dtype);
+    if (func == 0) throw Exception("Not mapped to a defined function.", eStatusType::INVALID_OPERATION);
+    func(stream, input, output, alpha, beta, device);
+}
+}   // namespace roccv
\ No newline at end of file
diff --git a/tests/roccv/cpp/test_op_convert_to.cpp b/tests/roccv/cpp/test_op_convert_to.cpp
new file mode 100644
index 00000000..c354d0dd
--- /dev/null
+++ b/tests/roccv/cpp/test_op_convert_to.cpp
@@ -0,0 +1,271 @@
+/**
+Copyright (c) 2025 Advanced Micro Devices, Inc. All rights reserved.
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
+*/
+
+#include <algorithm>
+#include <core/detail/casting.hpp>
+#include <core/detail/type_traits.hpp>
+#include <core/wrappers/image_wrapper.hpp>
+#include <op_convert_to.hpp>
+#include "test_helpers.hpp"
+
+using namespace roccv;
+using namespace roccv::tests;
+
+// Keep all non-entrypoint functions in an anonymous namespace to prevent redefinition errors across translation units.
+namespace {
+
+/**
+ * @brief Verified golden C++ model for the ConvertTo operation.
+ *
+ * @tparam T Vectorized datatype of the image's pixels.
+ * @tparam BT Base type of the image's data.
+ * @param[in] input An input vector containing image data.
+ * @param[in] batchSize The number of images in the batch.
+ * @param[in] width Image width.
+ * @param[in] height Image height.
+ * @param[in] channels Number of channels in the image.
+ * @param[in] alpha Scalar for output data.
+ * @param[in] beta Offset for the data.
+ * @return Vector containing the results of the operation.
+ */
+template <typename SRC_DT, typename DEST_DT, typename BT_SRC = detail::BaseType<SRC_DT>,typename BT_DEST = detail::BaseType<DEST_DT>>
+std::vector<BT_DEST> GoldenConvertTo(std::vector<BT_SRC>& input, int32_t batchSize, int32_t width, int32_t height, double alpha, double beta) {
+    // Create an output vector the same size as the input vector
+    std::vector<BT_DEST> output(input.size());
+
+    // Wrap input/output vectors for simplified data access
+    ImageWrapper<SRC_DT> src(input, batchSize, width, height);
+    ImageWrapper<DEST_DT> dst(output, batchSize, width, height);
+
+    using AB_DT = decltype(float() * BT_SRC() * BT_DEST());
+
+    AB_DT alpha_dt = detail::SaturateCast<AB_DT>(alpha);
+    AB_DT beta_dt = detail::SaturateCast<AB_DT>(beta);
+
+    for (int b = 0; b < batchSize; ++b) {
+        for (int y = 0; y < height; ++y) {
+            for (int x = 0; x < width; ++x) {
+                dst.at(b, y, x, 0) = detail::SaturateCast<DEST_DT>(alpha_dt * (src.at(b, y, x, 0)) + beta_dt);
+            }
+        }
+    }
+    return output;
+}
+
+template <typename SRC_DT, typename DEST_DT, typename BT_SRC = detail::BaseType<SRC_DT>, typename BT_DEST = detail::BaseType<DEST_DT>>
+void TestCorrectness(int batchSize, int width, int height, ImageFormat inFormat, ImageFormat outFormat, double alpha, double beta, eDeviceType device) {
+    // Create input and output tensor based on test parameters
+    Tensor input(batchSize, {width, height}, inFormat, device);
+    Tensor output(batchSize, {width, height}, outFormat, device);
+
+    // Create a vector and fill it with random data.
+    std::vector<BT_SRC> inputData(input.shape().size());
+    FillVector(inputData);
+
+    // Copy generated input data into input tensor
+    CopyVectorIntoTensor(input, inputData);
+
+    // Calculate golden output reference
+    std::vector<BT_DEST> ref = GoldenConvertTo<SRC_DT, DEST_DT>(inputData, batchSize, width, height, alpha, beta);
+
+    // Run roccv::Convert To operator to obtain actual results
+    hipStream_t stream;
+    HIP_VALIDATE_NO_ERRORS(hipStreamCreate(&stream));
+
+    ConvertTo op;
+    op(stream, input, output, alpha, beta, device);
+    HIP_VALIDATE_NO_ERRORS(hipStreamSynchronize(stream));
+    HIP_VALIDATE_NO_ERRORS(hipStreamDestroy(stream));
+
+    // Copy data from output tensor into a host allocated vector
+    std::vector<BT_DEST> result(output.shape().size());
+    CopyTensorIntoVector(result, output);
+
+    // Compare data in actual output versus the generated golden reference image
+    CompareVectors(result, ref);
+}
+
+} // namespace
+
+eTestStatusType test_op_convert_to(int argc, char** argv) {
+    TEST_CASES_BEGIN();
+
+    // CPU correctness tests
+    // 1 Channel
+    TEST_CASE((TestCorrectness<uchar1, uchar1>(1, 480, 360, FMT_U8, FMT_U8, 1.2, 10.2, eDeviceType::CPU)));
+    TEST_CASE((TestCorrectness<char1, char1>(1, 480, 360, FMT_S8, FMT_S8, 1.2, 10.2, eDeviceType::CPU)));
+    TEST_CASE((TestCorrectness<uchar1, char1>(1, 480, 360, FMT_U8, FMT_S8, 1.2, 10.2, eDeviceType::CPU)));
+    TEST_CASE((TestCorrectness<char1, uchar1>(1, 480, 360, FMT_S8, FMT_U8, 1.2, 10.2, eDeviceType::CPU)));
+    TEST_CASE((TestCorrectness<uchar1, ushort1>(1, 480, 360, FMT_U8, FMT_U16, 1.2, 10.2, eDeviceType::CPU)));
+    TEST_CASE((TestCorrectness<char1, short1>(1, 480, 360, FMT_S8, FMT_S16, 1.2, 10.2, eDeviceType::CPU)));
+    TEST_CASE((TestCorrectness<ushort1, uchar1>(1, 480, 360, FMT_U16, FMT_U8, 1.2, 10.2, eDeviceType::CPU)));
+    TEST_CASE((TestCorrectness<short1, char1>(1, 480, 360, FMT_S16, FMT_S8, 1.2, 10.2, eDeviceType::CPU)));
+    TEST_CASE((TestCorrectness<ushort1, char1>(1, 480, 360, FMT_U16, FMT_S8, 1.2, 10.2, eDeviceType::CPU)));
+    TEST_CASE((TestCorrectness<short1, uchar1>(1, 480, 360, FMT_S16, FMT_U8, 1.2, 10.2, eDeviceType::CPU)));
+    TEST_CASE((TestCorrectness<ushort1, ushort1>(1, 480, 360, FMT_U16, FMT_U16, 1.2, 10.2, eDeviceType::CPU)));
+    TEST_CASE((TestCorrectness<short1, short1>(1, 480, 360, FMT_S16, FMT_S16, 1.2, 10.2, eDeviceType::CPU)));
+    TEST_CASE((TestCorrectness<ushort1, short1>(1, 480, 360, FMT_U16, FMT_S16, 1.2, 10.2, eDeviceType::CPU)));
+    TEST_CASE((TestCorrectness<short1, ushort1>(1, 480, 360, FMT_S16, FMT_U16, 1.2, 10.2, eDeviceType::CPU)));
+    
+    TEST_CASE((TestCorrectness<uchar1, int1>(1, 480, 360, FMT_U8, FMT_S32, 1.2, 10.2, eDeviceType::CPU)));
+    TEST_CASE((TestCorrectness<char1, int1>(1, 480, 360, FMT_S8, FMT_S32, 1.2, 10.2, eDeviceType::CPU)));
+    TEST_CASE((TestCorrectness<int1, uchar1>(1, 480, 360, FMT_S32, FMT_U8, 1.2, 10.2, eDeviceType::CPU)));
+    TEST_CASE((TestCorrectness<int1, char1>(1, 480, 360, FMT_S32, FMT_S8, 1.2, 10.2, eDeviceType::CPU)));
+    
+    TEST_CASE((TestCorrectness<uchar1, float1>(1, 480, 360, FMT_U8, FMT_F32, 1.2, 10.2, eDeviceType::CPU)));
+    TEST_CASE((TestCorrectness<char1, float1>(1, 480, 360, FMT_S8, FMT_F32, 1.2, 10.2, eDeviceType::CPU)));
+    TEST_CASE((TestCorrectness<float1, uchar1>(1, 480, 360, FMT_F32, FMT_U8, 1.2, 10.2, eDeviceType::CPU)));
+    TEST_CASE((TestCorrectness<float1, char1>(1, 480, 360, FMT_F32, FMT_S8, 1.2, 10.2, eDeviceType::CPU)));
+    
+    // 3 Channels
+    TEST_CASE((TestCorrectness<uchar3, uchar3>(1, 480, 360, FMT_RGB8, FMT_RGB8, 1.2, 10.2, eDeviceType::CPU)));
+    TEST_CASE((TestCorrectness<char3, char3>(1, 480, 360, FMT_RGBs8, FMT_RGBs8, 1.2, 10.2, eDeviceType::CPU)));
+    TEST_CASE((TestCorrectness<uchar3, char3>(1, 480, 360, FMT_RGB8, FMT_RGBs8, 1.2, 10.2, eDeviceType::CPU)));
+    TEST_CASE((TestCorrectness<char3, uchar3>(1, 480, 360, FMT_RGBs8, FMT_RGB8, 1.2, 10.2, eDeviceType::CPU)));
+    TEST_CASE((TestCorrectness<uchar3, ushort3>(1, 480, 360, FMT_RGB8, FMT_RGB16, 1.2, 10.2, eDeviceType::CPU)));
+    TEST_CASE((TestCorrectness<char3, short3>(1, 480, 360, FMT_RGBs8, FMT_RGBs16, 1.2, 10.2, eDeviceType::CPU)));
+    TEST_CASE((TestCorrectness<ushort3, uchar3>(1, 480, 360, FMT_RGB16, FMT_RGB8, 1.2, 10.2, eDeviceType::CPU)));
+    TEST_CASE((TestCorrectness<short3, char3>(1, 480, 360, FMT_RGBs16, FMT_RGBs8, 1.2, 10.2, eDeviceType::CPU)));
+    TEST_CASE((TestCorrectness<ushort3, char3>(1, 480, 360, FMT_RGB16, FMT_RGBs8, 1.2, 10.2, eDeviceType::CPU)));
+    TEST_CASE((TestCorrectness<short3, uchar3>(1, 480, 360, FMT_RGBs16, FMT_RGB8, 1.2, 10.2, eDeviceType::CPU)));
+    TEST_CASE((TestCorrectness<ushort3, ushort3>(1, 480, 360, FMT_RGB16, FMT_RGB16, 1.2, 10.2, eDeviceType::CPU)));
+    TEST_CASE((TestCorrectness<short3, short3>(1, 480, 360, FMT_RGBs16, FMT_RGBs16, 1.2, 10.2, eDeviceType::CPU)));
+    TEST_CASE((TestCorrectness<ushort3, short3>(1, 480, 360, FMT_RGB16, FMT_RGBs16, 1.2, 10.2, eDeviceType::CPU)));
+    TEST_CASE((TestCorrectness<short3, ushort3>(1, 480, 360, FMT_RGBs16, FMT_RGB16, 1.2, 10.2, eDeviceType::CPU)));
+    
+    TEST_CASE((TestCorrectness<uchar3, int3>(1, 480, 360, FMT_RGB8, FMT_RGBs32, 1.2, 10.2, eDeviceType::CPU)));
+    TEST_CASE((TestCorrectness<char3, int3>(1, 480, 360, FMT_RGBs8, FMT_RGBs32, 1.2, 10.2, eDeviceType::CPU)));
+    TEST_CASE((TestCorrectness<int3, uchar3>(1, 480, 360, FMT_RGBs32, FMT_RGB8, 1.2, 10.2, eDeviceType::CPU)));
+    TEST_CASE((TestCorrectness<int3, char3>(1, 480, 360, FMT_RGBs32, FMT_RGBs8, 1.2, 10.2, eDeviceType::CPU)));
+    
+    TEST_CASE((TestCorrectness<uchar3, float3>(1, 480, 360, FMT_RGB8, FMT_RGBf32, 1.2, 10.2, eDeviceType::CPU)));
+    TEST_CASE((TestCorrectness<char3, float3>(1, 480, 360, FMT_RGBs8, FMT_RGBf32, 1.2, 10.2, eDeviceType::CPU)));
+    TEST_CASE((TestCorrectness<float3, uchar3>(1, 480, 360, FMT_RGBf32, FMT_RGB8, 1.2, 10.2, eDeviceType::CPU)));
+    TEST_CASE((TestCorrectness<float3, char3>(1, 480, 360, FMT_RGBf32, FMT_RGBs8, 1.2, 10.2, eDeviceType::CPU)));
+
+    // 4 Channels
+    TEST_CASE((TestCorrectness<uchar4, uchar4>(1, 480, 360, FMT_RGBA8, FMT_RGBA8, 1.2, 10.2, eDeviceType::CPU)));
+    TEST_CASE((TestCorrectness<char4, char4>(1, 480, 360, FMT_RGBAs8, FMT_RGBAs8, 1.2, 10.2, eDeviceType::CPU)));
+    TEST_CASE((TestCorrectness<uchar4, char4>(1, 480, 360, FMT_RGBA8, FMT_RGBAs8, 1.2, 10.2, eDeviceType::CPU)));
+    TEST_CASE((TestCorrectness<char4, uchar4>(1, 480, 360, FMT_RGBAs8, FMT_RGBA8, 1.2, 10.2, eDeviceType::CPU)));
+    TEST_CASE((TestCorrectness<uchar4, ushort4>(1, 480, 360, FMT_RGBA8, FMT_RGBA16, 1.2, 10.2, eDeviceType::CPU)));
+    TEST_CASE((TestCorrectness<char4, short4>(1, 480, 360, FMT_RGBAs8, FMT_RGBAs16, 1.2, 10.2, eDeviceType::CPU)));
+    TEST_CASE((TestCorrectness<ushort4, uchar4>(1, 480, 360, FMT_RGBA16, FMT_RGBA8, 1.2, 10.2, eDeviceType::CPU)));
+    TEST_CASE((TestCorrectness<short4, char4>(1, 480, 360, FMT_RGBAs16, FMT_RGBAs8, 1.2, 10.2, eDeviceType::CPU)));
+    TEST_CASE((TestCorrectness<ushort4, char4>(1, 480, 360, FMT_RGBA16, FMT_RGBAs8, 1.2, 10.2, eDeviceType::CPU)));
+    TEST_CASE((TestCorrectness<short4, uchar4>(1, 480, 360, FMT_RGBAs16, FMT_RGBA8, 1.2, 10.2, eDeviceType::CPU)));
+    TEST_CASE((TestCorrectness<ushort4, ushort4>(1, 480, 360, FMT_RGBA16, FMT_RGBA16, 1.2, 10.2, eDeviceType::CPU)));
+    TEST_CASE((TestCorrectness<short4, short4>(1, 480, 360, FMT_RGBAs16, FMT_RGBAs16, 1.2, 10.2, eDeviceType::CPU)));
+    TEST_CASE((TestCorrectness<ushort4, short4>(1, 480, 360, FMT_RGBA16, FMT_RGBAs16, 1.2, 10.2, eDeviceType::CPU)));
+    TEST_CASE((TestCorrectness<short4, ushort4>(1, 480, 360, FMT_RGBAs16, FMT_RGBA16, 1.2, 10.2, eDeviceType::CPU)));
+    
+    TEST_CASE((TestCorrectness<uchar4, int4>(1, 480, 360, FMT_RGBA8, FMT_RGBAs32, 1.2, 10.2, eDeviceType::CPU)));
+    TEST_CASE((TestCorrectness<char4, int4>(1, 480, 360, FMT_RGBAs8, FMT_RGBAs32, 1.2, 10.2, eDeviceType::CPU)));
+    TEST_CASE((TestCorrectness<int4, uchar4>(1, 480, 360, FMT_RGBAs32, FMT_RGBA8, 1.2, 10.2, eDeviceType::CPU)));
+    TEST_CASE((TestCorrectness<int4, char4>(1, 480, 360, FMT_RGBAs32, FMT_RGBAs8, 1.2, 10.2, eDeviceType::CPU)));
+
+    TEST_CASE((TestCorrectness<uchar4, float4>(1, 480, 360, FMT_RGBA8, FMT_RGBAf32, 1.2, 10.2, eDeviceType::CPU)));
+    TEST_CASE((TestCorrectness<char4, float4>(1, 480, 360, FMT_RGBAs8, FMT_RGBAf32, 1.2, 10.2, eDeviceType::CPU)));
+    TEST_CASE((TestCorrectness<float4, uchar4>(1, 480, 360, FMT_RGBAf32, FMT_RGBA8, 1.2, 10.2, eDeviceType::CPU)));
+    TEST_CASE((TestCorrectness<float4, char4>(1, 480, 360, FMT_RGBAf32, FMT_RGBAs8, 1.2, 10.2, eDeviceType::CPU)));
+
+    // GPU Correctness Tests
+    // 1 Channels
+    TEST_CASE((TestCorrectness<uchar1, uchar1>(1, 480, 360, FMT_U8, FMT_U8, 1.2, 10.2, eDeviceType::GPU)));
+    TEST_CASE((TestCorrectness<char1, char1>(1, 480, 360, FMT_S8, FMT_S8, 1.2, 10.2, eDeviceType::GPU)));
+    TEST_CASE((TestCorrectness<uchar1, char1>(1, 480, 360, FMT_U8, FMT_S8, 1.2, 10.2, eDeviceType::GPU)));
+    TEST_CASE((TestCorrectness<char1, uchar1>(1, 480, 360, FMT_S8, FMT_U8, 1.2, 10.2, eDeviceType::GPU)));
+    TEST_CASE((TestCorrectness<uchar1, ushort1>(1, 480, 360, FMT_U8, FMT_U16, 1.2, 10.2, eDeviceType::GPU)));
+    TEST_CASE((TestCorrectness<char1, short1>(1, 480, 360, FMT_S8, FMT_S16, 1.2, 10.2, eDeviceType::GPU)));
+    TEST_CASE((TestCorrectness<ushort1, uchar1>(1, 480, 360, FMT_U16, FMT_U8, 1.2, 10.2, eDeviceType::GPU)));
+    TEST_CASE((TestCorrectness<short1, char1>(1, 480, 360, FMT_S16, FMT_S8, 1.2, 10.2, eDeviceType::GPU)));
+    TEST_CASE((TestCorrectness<ushort1, char1>(1, 480, 360, FMT_U16, FMT_S8, 1.2, 10.2, eDeviceType::GPU)));
+    TEST_CASE((TestCorrectness<short1, uchar1>(1, 480, 360, FMT_S16, FMT_U8, 1.2, 10.2, eDeviceType::GPU)));
+    TEST_CASE((TestCorrectness<ushort1, ushort1>(1, 480, 360, FMT_U16, FMT_U16, 1.2, 10.2, eDeviceType::GPU)));
+    TEST_CASE((TestCorrectness<short1, short1>(1, 480, 360, FMT_S16, FMT_S16, 1.2, 10.2, eDeviceType::GPU)));
+    TEST_CASE((TestCorrectness<ushort1, short1>(1, 480, 360, FMT_U16, FMT_S16, 1.2, 10.2, eDeviceType::GPU)));
+    TEST_CASE((TestCorrectness<short1, ushort1>(1, 480, 360, FMT_S16, FMT_U16, 1.2, 10.2, eDeviceType::GPU)));
+    
+    TEST_CASE((TestCorrectness<uchar1, int1>(1, 480, 360, FMT_U8, FMT_S32, 1.2, 10.2, eDeviceType::GPU)));
+    TEST_CASE((TestCorrectness<char1, int1>(1, 480, 360, FMT_S8, FMT_S32, 1.2, 10.2, eDeviceType::GPU)));
+    TEST_CASE((TestCorrectness<int1, uchar1>(1, 480, 360, FMT_S32, FMT_U8, 1.2, 10.2, eDeviceType::GPU)));
+    TEST_CASE((TestCorrectness<int1, char1>(1, 480, 360, FMT_S32, FMT_S8, 1.2, 10.2, eDeviceType::GPU)));
+    
+    TEST_CASE((TestCorrectness<uchar1, float1>(1, 480, 360, FMT_U8, FMT_F32, 1.2, 10.2, eDeviceType::GPU)));
+    TEST_CASE((TestCorrectness<char1, float1>(1, 480, 360, FMT_S8, FMT_F32, 1.2, 10.2, eDeviceType::GPU)));
+    TEST_CASE((TestCorrectness<float1, uchar1>(1, 480, 360, FMT_F32, FMT_U8, 1.2, 10.2, eDeviceType::GPU)));
+    TEST_CASE((TestCorrectness<float1, char1>(1, 480, 360, FMT_F32, FMT_S8, 1.2, 10.2, eDeviceType::GPU)));
+
+    // 3 Channels
+    TEST_CASE((TestCorrectness<uchar3, uchar3>(1, 480, 360, FMT_RGB8, FMT_RGB8, 1.2, 10.2, eDeviceType::GPU)));
+    TEST_CASE((TestCorrectness<char3, char3>(1, 480, 360, FMT_RGBs8, FMT_RGBs8, 1.2, 10.2, eDeviceType::GPU)));
+    TEST_CASE((TestCorrectness<uchar3, char3>(1, 480, 360, FMT_RGB8, FMT_RGBs8, 1.2, 10.2, eDeviceType::GPU)));
+    TEST_CASE((TestCorrectness<char3, uchar3>(1, 480, 360, FMT_RGBs8, FMT_RGB8, 1.2, 10.2, eDeviceType::GPU)));
+    TEST_CASE((TestCorrectness<uchar3, ushort3>(1, 480, 360, FMT_RGB8, FMT_RGB16, 1.2, 10.2, eDeviceType::GPU)));
+    TEST_CASE((TestCorrectness<char3, short3>(1, 480, 360, FMT_RGBs8, FMT_RGBs16, 1.2, 10.2, eDeviceType::GPU)));
+    TEST_CASE((TestCorrectness<ushort3, uchar3>(1, 480, 360, FMT_RGB16, FMT_RGB8, 1.2, 10.2, eDeviceType::GPU)));
+    TEST_CASE((TestCorrectness<short3, char3>(1, 480, 360, FMT_RGBs16, FMT_RGBs8, 1.2, 10.2, eDeviceType::GPU)));
+    TEST_CASE((TestCorrectness<ushort3, char3>(1, 480, 360, FMT_RGB16, FMT_RGBs8, 1.2, 10.2, eDeviceType::GPU)));
+    TEST_CASE((TestCorrectness<short3, uchar3>(1, 480, 360, FMT_RGBs16, FMT_RGB8, 1.2, 10.2, eDeviceType::GPU)));
+    TEST_CASE((TestCorrectness<ushort3, ushort3>(1, 480, 360, FMT_RGB16, FMT_RGB16, 1.2, 10.2, eDeviceType::GPU)));
+    TEST_CASE((TestCorrectness<short3, short3>(1, 480, 360, FMT_RGBs16, FMT_RGBs16, 1.2, 10.2, eDeviceType::GPU)));
+    TEST_CASE((TestCorrectness<ushort3, short3>(1, 480, 360, FMT_RGB16, FMT_RGBs16, 1.2, 10.2, eDeviceType::GPU)));
+    TEST_CASE((TestCorrectness<short3, ushort3>(1, 480, 360, FMT_RGBs16, FMT_RGB16, 1.2, 10.2, eDeviceType::GPU)));
+    
+    TEST_CASE((TestCorrectness<uchar3, int3>(1, 480, 360, FMT_RGB8, FMT_RGBs32, 1.2, 10.2, eDeviceType::GPU)));
+    TEST_CASE((TestCorrectness<char3, int3>(1, 480, 360, FMT_RGBs8, FMT_RGBs32, 1.2, 10.2, eDeviceType::GPU)));
+    TEST_CASE((TestCorrectness<int3, uchar3>(1, 480, 360, FMT_RGBs32, FMT_RGB8, 1.2, 10.2, eDeviceType::GPU)));
+    TEST_CASE((TestCorrectness<int3, char3>(1, 480, 360, FMT_RGBs32, FMT_RGBs8, 1.2, 10.2, eDeviceType::GPU)));
+
+     TEST_CASE((TestCorrectness<uchar3, float3>(1, 480, 360, FMT_RGB8, FMT_RGBf32, 1.2, 10.2, eDeviceType::GPU)));
+    TEST_CASE((TestCorrectness<char3, float3>(1, 480, 360, FMT_RGBs8, FMT_RGBf32, 1.2, 10.2, eDeviceType::GPU)));
+    TEST_CASE((TestCorrectness<float3, uchar3>(1, 480, 360, FMT_RGBf32, FMT_RGB8, 1.2, 10.2, eDeviceType::GPU)));
+    TEST_CASE((TestCorrectness<float3, char3>(1, 480, 360, FMT_RGBf32, FMT_RGBs8, 1.2, 10.2, eDeviceType::GPU)));
+
+    // 4 Channels
+    TEST_CASE((TestCorrectness<uchar4, uchar4>(1, 480, 360, FMT_RGBA8, FMT_RGBA8, 1.2, 10.2, eDeviceType::GPU)));
+    TEST_CASE((TestCorrectness<char4, char4>(1, 480, 360, FMT_RGBAs8, FMT_RGBAs8, 1.2, 10.2, eDeviceType::GPU)));
+    TEST_CASE((TestCorrectness<uchar4, char4>(1, 480, 360, FMT_RGBA8, FMT_RGBAs8, 1.2, 10.2, eDeviceType::GPU)));
+    TEST_CASE((TestCorrectness<char4, uchar4>(1, 480, 360, FMT_RGBAs8, FMT_RGBA8, 1.2, 10.2, eDeviceType::GPU)));
+    TEST_CASE((TestCorrectness<uchar4, ushort4>(1, 480, 360, FMT_RGBA8, FMT_RGBA16, 1.2, 10.2, eDeviceType::GPU)));
+    TEST_CASE((TestCorrectness<char4, short4>(1, 480, 360, FMT_RGBAs8, FMT_RGBAs16, 1.2, 10.2, eDeviceType::GPU)));
+    TEST_CASE((TestCorrectness<ushort4, uchar4>(1, 480, 360, FMT_RGBA16, FMT_RGBA8, 1.2, 10.2, eDeviceType::GPU)));
+    TEST_CASE((TestCorrectness<short4, char4>(1, 480, 360, FMT_RGBAs16, FMT_RGBAs8, 1.2, 10.2, eDeviceType::GPU)));
+    TEST_CASE((TestCorrectness<ushort4, char4>(1, 480, 360, FMT_RGBA16, FMT_RGBAs8, 1.2, 10.2, eDeviceType::GPU)));
+    TEST_CASE((TestCorrectness<short4, uchar4>(1, 480, 360, FMT_RGBAs16, FMT_RGBA8, 1.2, 10.2, eDeviceType::GPU)));
+    TEST_CASE((TestCorrectness<ushort4, ushort4>(1, 480, 360, FMT_RGBA16, FMT_RGBA16, 1.2, 10.2, eDeviceType::GPU)));
+    TEST_CASE((TestCorrectness<short4, short4>(1, 480, 360, FMT_RGBAs16, FMT_RGBAs16, 1.2, 10.2, eDeviceType::GPU)));
+    TEST_CASE((TestCorrectness<ushort4, short4>(1, 480, 360, FMT_RGBA16, FMT_RGBAs16, 1.2, 10.2, eDeviceType::GPU)));
+    TEST_CASE((TestCorrectness<short4, ushort4>(1, 480, 360, FMT_RGBAs16, FMT_RGBA16, 1.2, 10.2, eDeviceType::GPU)));
+    
+    TEST_CASE((TestCorrectness<uchar4, int4>(1, 480, 360, FMT_RGBA8, FMT_RGBAs32, 1.2, 10.2, eDeviceType::GPU)));
+    TEST_CASE((TestCorrectness<char4, int4>(1, 480, 360, FMT_RGBAs8, FMT_RGBAs32, 1.2, 10.2, eDeviceType::GPU)));
+    TEST_CASE((TestCorrectness<int4, uchar4>(1, 480, 360, FMT_RGBAs32, FMT_RGBA8, 1.2, 10.2, eDeviceType::GPU)));
+    TEST_CASE((TestCorrectness<int4, char4>(1, 480, 360, FMT_RGBAs32, FMT_RGBAs8, 1.2, 10.2, eDeviceType::GPU)));
+
+    TEST_CASE((TestCorrectness<uchar4, float4>(1, 480, 360, FMT_RGBA8, FMT_RGBAf32, 1.2, 10.2, eDeviceType::GPU)));
+    TEST_CASE((TestCorrectness<char4, float4>(1, 480, 360, FMT_RGBAs8, FMT_RGBAf32, 1.2, 10.2, eDeviceType::GPU)));
+    TEST_CASE((TestCorrectness<float4, uchar4>(1, 480, 360, FMT_RGBAf32, FMT_RGBA8, 1.2, 10.2, eDeviceType::GPU)));
+    TEST_CASE((TestCorrectness<float4, char4>(1, 480, 360, FMT_RGBAf32, FMT_RGBAs8, 1.2, 10.2, eDeviceType::GPU)));
+
+    TEST_CASES_END();
+}
\ No newline at end of file
diff --git a/tests/roccv/python/test_op_convert_to.py b/tests/roccv/python/test_op_convert_to.py
new file mode 100644
index 00000000..1a4dfad8
--- /dev/null
+++ b/tests/roccv/python/test_op_convert_to.py
@@ -0,0 +1,50 @@
+# ##############################################################################
+# Copyright (c)  - 2025 Advanced Micro Devices, Inc.
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in all
+# copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+# SOFTWARE.
+#
+# ##############################################################################
+
+import pytest
+import rocpycv
+
+from test_helpers import generate_tensor, compare_tensors
+
+
+@pytest.mark.parametrize("device", [rocpycv.eDeviceType.GPU, rocpycv.eDeviceType.CPU])
+@pytest.mark.parametrize("dtype", [rocpycv.eDataType.U8, rocpycv.eDataType.S8, rocpycv.eDataType.U16, rocpycv.eDataType.S16, rocpycv.eDataType.S32, rocpycv.eDataType.F32])
+@pytest.mark.parametrize("out_dtype", [rocpycv.eDataType.U8, rocpycv.eDataType.S8, rocpycv.eDataType.U16, rocpycv.eDataType.S16, rocpycv.eDataType.S32, rocpycv.eDataType.F32])
+@pytest.mark.parametrize("channels", [1, 3, 4])
+@pytest.mark.parametrize("alpha", [1.2])
+@pytest.mark.parametrize("beta", [10.2])
+@pytest.mark.parametrize("samples,height,width", [
+    (1, 45, 23),
+    (3, 67, 85),
+    (7, 25, 95)
+])
+def test_op_convert_to(samples, height, width, channels, device, dtype, out_dtype, alpha, beta):
+    input = generate_tensor(samples, width, height, channels, dtype, device)
+    output_golden = rocpycv.Tensor([samples, height, width, channels], rocpycv.eTensorLayout.NHWC, out_dtype, device)
+
+    stream = rocpycv.Stream()
+    rocpycv.convert_to_into(output_golden, input, alpha, beta, stream, device)
+    output = rocpycv.convert_to(input, out_dtype, alpha, beta, stream, device)
+    stream.synchronize()
+
+    compare_tensors(output, output_golden)

From 9360b53f5d6bec00e56edbc378c224f4d514b4ee Mon Sep 17 00:00:00 2001
From: svcbuild <paveltc>
Date: Mon, 15 Dec 2025 13:02:03 -0800
Subject: [PATCH 2/3] added default arguments for alpha and beta

---
 include/op_convert_to.hpp                 |  6 +++---
 python/src/operators/py_op_convert_to.cpp | 12 ++++++------
 2 files changed, 9 insertions(+), 9 deletions(-)

diff --git a/include/op_convert_to.hpp b/include/op_convert_to.hpp
index baddab00..3f4452b0 100644
--- a/include/op_convert_to.hpp
+++ b/include/op_convert_to.hpp
@@ -64,11 +64,11 @@ class ConvertTo final : public IOperator {
      * @param[in] stream The HIP stream to run this operator on.
      * @param[in] input Input tensor with image data.
      * @param[out] output  Output tensor for storing modified image data.
-     * @param[in] alpha Scalar for output data.
-     * @param[in] beta Offset for the data.
+     * @param[in] alpha Scalar for output data. (Default: 1.0)
+     * @param[in] beta Offset for the data. (Default: 0.0)
      * @param[in] device The device to run this operator on. (Default: GPU)
      */
     void operator()(hipStream_t stream, const roccv::Tensor &input, const roccv::Tensor &output,
-                    const double alpha, const double beta, const eDeviceType device = eDeviceType::GPU) const;
+                    const double alpha = 1.0, const double beta = 0.0, const eDeviceType device = eDeviceType::GPU) const;
 };
 }  // namespace roccv
\ No newline at end of file
diff --git a/python/src/operators/py_op_convert_to.cpp b/python/src/operators/py_op_convert_to.cpp
index bc2092d8..e8828ec8 100644
--- a/python/src/operators/py_op_convert_to.cpp
+++ b/python/src/operators/py_op_convert_to.cpp
@@ -44,7 +44,7 @@ void PyOpConvertTo::ExecuteInto(PyTensor& output, PyTensor& input, double alpha,
 
 void PyOpConvertTo::Export(py::module& m) {
     using namespace py::literals;
-    m.def("convert_to", &PyOpConvertTo::Execute, "src"_a, "dtype"_a, "alpha"_a, "beta"_a, 
+    m.def("convert_to", &PyOpConvertTo::Execute, "src"_a, "dtype"_a, "alpha"_a = 1.0, "beta"_a = 0.0, 
                                                     "stream"_a = nullptr, "device"_a = eDeviceType::GPU, R"pbdoc(
             
             Executes the Convert To operation on the given HIP stream.
@@ -55,15 +55,15 @@ void PyOpConvertTo::Export(py::module& m) {
             Args:
                 src (rocpycv.Tensor): Input tensor containing one or more images.
                 dtype (eDataType): Datatype of the output tensor.
-                alpha (double): Scalar for output data.
-                beta (double): Offset for the data.
+                alpha (double, optional): Scalar for output data. Defaults to 1.0.
+                beta (double, optional): Offset for the data. Defaults to 0.0.
                 stream (rocpycv.Stream, optional): HIP stream to run this operation on.
                 device (rocpycv.Device, optional): The device to run this operation on. Defaults to GPU.
 
             Returns:
                 rocpycv.Tensor: The output tensor.
         )pbdoc");
-    m.def("convert_to_into", &PyOpConvertTo::ExecuteInto, "dst"_a, "src"_a, "alpha"_a, "beta"_a, 
+    m.def("convert_to_into", &PyOpConvertTo::ExecuteInto, "dst"_a, "src"_a, "alpha"_a = 1.0, "beta"_a = 0.0, 
                                                     "stream"_a = nullptr, "device"_a = eDeviceType::GPU, R"pbdoc(
             
             Executes the Convert To operation on the given HIP stream.
@@ -74,8 +74,8 @@ void PyOpConvertTo::Export(py::module& m) {
             Args:
                 dst (rocpycv.Tensor): The output tensor with gamma correction applied.
                 src (rocpycv.Tensor): Input tensor containing one or more images.
-                alpha (double): Scalar for output data.
-                beta (double): Offset for the data.
+                alpha (double): Scalar for output data. Defaults to 1.0.
+                beta (double): Offset for the data. Defaults to 0.0.
                 stream (rocpycv.Stream, optional): HIP stream to run this operation on.
                 device (rocpycv.Device, optional): The device to run this operation on. Defaults to GPU.
 

From 184b90f4c5da22545b408a9eaf115ac24af81604 Mon Sep 17 00:00:00 2001
From: svcbuild <paveltc>
Date: Mon, 15 Dec 2025 13:06:52 -0800
Subject: [PATCH 3/3] minor fix

---
 python/src/operators/py_op_convert_to.cpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/python/src/operators/py_op_convert_to.cpp b/python/src/operators/py_op_convert_to.cpp
index e8828ec8..483903a1 100644
--- a/python/src/operators/py_op_convert_to.cpp
+++ b/python/src/operators/py_op_convert_to.cpp
@@ -74,8 +74,8 @@ void PyOpConvertTo::Export(py::module& m) {
             Args:
                 dst (rocpycv.Tensor): The output tensor with gamma correction applied.
                 src (rocpycv.Tensor): Input tensor containing one or more images.
-                alpha (double): Scalar for output data. Defaults to 1.0.
-                beta (double): Offset for the data. Defaults to 0.0.
+                alpha (double, optional): Scalar for output data. Defaults to 1.0.
+                beta (double, optional): Offset for the data. Defaults to 0.0.
                 stream (rocpycv.Stream, optional): HIP stream to run this operation on.
                 device (rocpycv.Device, optional): The device to run this operation on. Defaults to GPU.