From c8691ab99254af3a26f634b845ef15ac19415521 Mon Sep 17 00:00:00 2001
From: AlenkaF <frim.alenka@gmail.com>
Date: Tue, 28 May 2024 12:50:25 +0200
Subject: [PATCH 01/23] Add Table::ToTensor and bindings to Python with Python
 tests

---
 cpp/src/arrow/record_batch.h         |   1 -
 cpp/src/arrow/table.cc               | 210 +++++++++++++++++++
 cpp/src/arrow/table.h                |  13 ++
 python/pyarrow/includes/libarrow.pxd |   3 +
 python/pyarrow/table.pxi             |  85 +++++++-
 python/pyarrow/tests/test_table.py   | 289 +++++++++++++++++++++++++++
 6 files changed, 598 insertions(+), 3 deletions(-)
diff --git a/cpp/src/arrow/record_batch.h b/cpp/src/arrow/record_batch.h
index 0d1d2d4ac359..4601b1ba9d6a 100644
--- a/cpp/src/arrow/record_batch.h
+++ b/cpp/src/arrow/record_batch.h
@@ -94,7 +94,6 @@ class ARROW_EXPORT RecordBatch {
   ///
   /// Create a Tensor object with shape (number of rows, number of columns) and
   /// strides (type size in bytes, type size in bytes * number of rows).
-  /// Generated Tensor will have column-major layout.
   ///
   /// \param[in] null_to_nan if true, convert nulls to NaN
   /// \param[in] row_major if true, create row-major Tensor else column-major Tensor
diff --git a/cpp/src/arrow/table.cc b/cpp/src/arrow/table.cc
index 68a8a1951f1c..c7e357ee03b9 100644
--- a/cpp/src/arrow/table.cc
+++ b/cpp/src/arrow/table.cc
@@ -36,11 +36,14 @@
 #include "arrow/record_batch.h"
 #include "arrow/result.h"
 #include "arrow/status.h"
+#include "arrow/tensor.h"
 #include "arrow/type.h"
 #include "arrow/type_fwd.h"
 #include "arrow/type_traits.h"
 #include "arrow/util/checked_cast.h"
 #include "arrow/util/logging_internal.h"
+#include "arrow/util/logging.h"
+#include "arrow/util/unreachable.h"
 #include "arrow/util/vector.h"
 
 namespace arrow {
@@ -346,6 +349,213 @@ Result<std::shared_ptr<Table>> Table::FromChunkedStructArray(
                      array->length());
 }
 
+template <typename Out>
+struct ConvertChunksToTensorVisitor {
+  Out*& out_values;
+  const ArrayData& in_data;
+
+  template <typename T>
+  Status Visit(const T&) {
+    if constexpr (is_numeric(T::type_id)) {
+      using In = typename T::c_type;
+      auto in_values = ArraySpan(in_data).GetSpan<In>(1, in_data.length);
+
+      if (in_data.null_count == 0) {
+        if constexpr (std::is_same_v<In, Out>) {
+          memcpy(out_values, in_values.data(), in_values.size_bytes());
+          out_values += in_values.size();
+        } else {
+          for (In in_value : in_values) {
+            *out_values++ = static_cast<Out>(in_value);
+          }
+        }
+      } else {
+        for (int64_t i = 0; i < in_data.length; ++i) {
+          *out_values++ =
+              in_data.IsNull(i) ? static_cast<Out>(NAN) : static_cast<Out>(in_values[i]);
+        }
+      }
+      return Status::OK();
+    }
+    Unreachable();
+  }
+};
+
+template <typename Out>
+struct ConvertChunksToTensorRowMajorVisitor {
+  Out*& out_values;
+  const ArrayData& in_data;
+  int num_cols;
+  int col_idx;
+  int chunk_idx;
+
+  template <typename T>
+  Status Visit(const T&) {
+    if constexpr (is_numeric(T::type_id)) {
+      using In = typename T::c_type;
+      auto in_values = ArraySpan(in_data).GetSpan<In>(1, in_data.length);
+
+      if (in_data.null_count == 0) {
+        for (int64_t data_idx = 0; data_idx < in_data.length; ++data_idx) {
+          out_values[(data_idx + chunk_idx) * num_cols + col_idx] =
+              static_cast<Out>(in_values[data_idx]);
+        }
+      } else {
+        for (int64_t data_idx = 0; data_idx < in_data.length; ++data_idx) {
+          out_values[(data_idx + chunk_idx) * num_cols + col_idx] =
+              in_data.IsNull(data_idx) ? static_cast<Out>(NAN)
+                                       : static_cast<Out>(in_values[data_idx]);
+        }
+      }
+      return Status::OK();
+    }
+    Unreachable();
+  }
+};
+
+template <typename DataType>
+inline void ConvertColumnsToTensor(const Table& table, uint8_t* out, bool row_major) {
+  using CType = typename arrow::TypeTraits<DataType>::CType;
+  auto* out_values = reinterpret_cast<CType*>(out);
+
+  int i = 0;
+  for (const auto& column : table.columns()) {
+    int j = 0;
+    for (const auto& chunk : column->chunks()) {
+      if (row_major) {
+        ConvertChunksToTensorRowMajorVisitor<CType> visitor{out_values, *chunk->data(),
+                                                            table.num_columns(), i, j};
+        DCHECK_OK(VisitTypeInline(*column->type(), &visitor));
+        j = j + static_cast<int>(chunk->length());
+      } else {
+        ConvertChunksToTensorVisitor<CType> visitor{out_values, *chunk->data()};
+        DCHECK_OK(VisitTypeInline(*column->type(), &visitor));
+      }
+    }
+    i++;
+  }
+}
+
+Result<std::shared_ptr<Tensor>> Table::ToTensor(bool null_to_nan, bool row_major,
+                                                MemoryPool* pool) const {
+  if (num_columns() == 0) {
+    return Status::TypeError(
+        "Conversion to Tensor for Table without columns/schema is not supported.");
+  }
+  // Check for no validity bitmap of each field
+  // if null_to_nan conversion is set to false
+  for (int i = 0; i < num_columns(); ++i) {
+    if (column(i)->null_count() > 0 && !null_to_nan) {
+      return Status::TypeError(
+          "Can only convert a Table with no nulls. Set null_to_nan to true to "
+          "convert nulls to NaN");
+    }
+  }
+
+  // Check for supported data types and merge fields
+  // to get the resulting uniform data type
+  if (!is_integer(column(0)->type()->id()) && !is_floating(column(0)->type()->id())) {
+    return Status::TypeError("DataType is not supported: ",
+                             column(0)->type()->ToString());
+  }
+  std::shared_ptr<Field> result_field = schema_->field(0);
+  std::shared_ptr<DataType> result_type = result_field->type();
+
+  Field::MergeOptions options;
+  options.promote_integer_to_float = true;
+  options.promote_integer_sign = true;
+  options.promote_numeric_width = true;
+
+  if (num_columns() > 1) {
+    for (int i = 1; i < num_columns(); ++i) {
+      if (!is_numeric(column(i)->type()->id())) {
+        return Status::TypeError("DataType is not supported: ",
+                                 column(i)->type()->ToString());
+      }
+
+      // Casting of float16 is not supported, throw an error in this case
+      if ((column(i)->type()->id() == Type::HALF_FLOAT ||
+           result_field->type()->id() == Type::HALF_FLOAT) &&
+          column(i)->type()->id() != result_field->type()->id()) {
+        return Status::NotImplemented("Casting from or to halffloat is not supported.");
+      }
+
+      ARROW_ASSIGN_OR_RAISE(
+          result_field, result_field->MergeWith(
+                            schema_->field(i)->WithName(result_field->name()), options));
+    }
+    result_type = result_field->type();
+  }
+
+  // Check if result_type is signed or unsigned integer and null_to_nan is set to true
+  // Then all columns should be promoted to float type
+  if (is_integer(result_type->id()) && null_to_nan) {
+    ARROW_ASSIGN_OR_RAISE(
+        result_field,
+        result_field->MergeWith(arrow::field(result_field->name(), float32()), options));
+    result_type = result_field->type();
+  }
+
+  // Allocate memory
+  ARROW_ASSIGN_OR_RAISE(
+      std::shared_ptr<Buffer> result,
+      AllocateBuffer(result_type->bit_width() * num_columns() * num_rows(), pool));
+  // Copy data
+  switch (result_type->id()) {
+    case Type::UINT8:
+      ConvertColumnsToTensor<UInt8Type>(*this, result->mutable_data(), row_major);
+      break;
+    case Type::UINT16:
+    case Type::HALF_FLOAT:
+      ConvertColumnsToTensor<UInt16Type>(*this, result->mutable_data(), row_major);
+      break;
+    case Type::UINT32:
+      ConvertColumnsToTensor<UInt32Type>(*this, result->mutable_data(), row_major);
+      break;
+    case Type::UINT64:
+      ConvertColumnsToTensor<UInt64Type>(*this, result->mutable_data(), row_major);
+      break;
+    case Type::INT8:
+      ConvertColumnsToTensor<Int8Type>(*this, result->mutable_data(), row_major);
+      break;
+    case Type::INT16:
+      ConvertColumnsToTensor<Int16Type>(*this, result->mutable_data(), row_major);
+      break;
+    case Type::INT32:
+      ConvertColumnsToTensor<Int32Type>(*this, result->mutable_data(), row_major);
+      break;
+    case Type::INT64:
+      ConvertColumnsToTensor<Int64Type>(*this, result->mutable_data(), row_major);
+      break;
+    case Type::FLOAT:
+      ConvertColumnsToTensor<FloatType>(*this, result->mutable_data(), row_major);
+      break;
+    case Type::DOUBLE:
+      ConvertColumnsToTensor<DoubleType>(*this, result->mutable_data(), row_major);
+      break;
+    default:
+      return Status::TypeError("DataType is not supported: ", result_type->ToString());
+  }
+
+  // Construct Tensor object
+  const auto& fixed_width_type =
+      internal::checked_cast<const FixedWidthType&>(*result_type);
+  std::vector<int64_t> shape = {num_rows(), num_columns()};
+  std::vector<int64_t> strides;
+  std::shared_ptr<Tensor> tensor;
+
+  if (row_major) {
+    ARROW_RETURN_NOT_OK(
+        internal::ComputeRowMajorStrides(fixed_width_type, shape, &strides));
+  } else {
+    ARROW_RETURN_NOT_OK(
+        internal::ComputeColumnMajorStrides(fixed_width_type, shape, &strides));
+  }
+  ARROW_ASSIGN_OR_RAISE(tensor,
+                        Tensor::Make(result_type, std::move(result), shape, strides));
+  return tensor;
+}
+
 std::vector<std::string> Table::ColumnNames() const {
   std::vector<std::string> names(num_columns());
   for (int i = 0; i < num_columns(); ++i) {
diff --git a/cpp/src/arrow/table.h b/cpp/src/arrow/table.h
index dee6f6fdd3cb..f57e23aaf5dd 100644
--- a/cpp/src/arrow/table.h
+++ b/cpp/src/arrow/table.h
@@ -102,6 +102,19 @@ class ARROW_EXPORT Table {
   static Result<std::shared_ptr<Table>> FromChunkedStructArray(
       const std::shared_ptr<ChunkedArray>& array);
 
+  /// \brief Convert table with one data type to Tensor
+  ///
+  /// Create a Tensor object with shape (number of rows, number of columns) and
+  /// strides (type size in bytes, type size in bytes * number of rows).
+  ///
+  /// \param[in] null_to_nan if true, convert nulls to NaN
+  /// \param[in] row_major if true, create row-major Tensor else column-major Tensor
+  /// \param[in] pool the memory pool to allocate the tensor buffer
+  /// \return the resulting Tensor
+  Result<std::shared_ptr<Tensor>> ToTensor(
+      bool null_to_nan = false, bool row_major = true,
+      MemoryPool* pool = default_memory_pool()) const;
+
   /// \brief Return the table schema
   const std::shared_ptr<Schema>& schema() const { return schema_; }
 
diff --git a/python/pyarrow/includes/libarrow.pxd b/python/pyarrow/includes/libarrow.pxd
index e96a7d84696d..767e21f01bda 100644
--- a/python/pyarrow/includes/libarrow.pxd
+++ b/python/pyarrow/includes/libarrow.pxd
@@ -1139,6 +1139,9 @@ cdef extern from "arrow/api.h" namespace "arrow" nogil:
             const shared_ptr[CSchema]& schema,
             const vector[shared_ptr[CRecordBatch]]& batches)
 
+        CResult[shared_ptr[CTensor]] ToTensor(c_bool null_to_nan, c_bool row_major,
+                                              CMemoryPool* pool) const
+
         int num_columns()
         int64_t num_rows()
 
diff --git a/python/pyarrow/table.pxi b/python/pyarrow/table.pxi
index 2e04fa75b8b7..3299ccae9997 100644
--- a/python/pyarrow/table.pxi
+++ b/python/pyarrow/table.pxi
@@ -2292,7 +2292,8 @@ cdef class _Tabular(_PandasConvertible):
         >>> animals = pa.array(["Flamingo", "Parrot", "Dog", "Horse", "Brittle stars", "Centipede"])
         >>> table = pa.Table.from_arrays([n_legs, animals], names=["n_legs", "animals"])
         >>> table.to_pydict()
-        {'n_legs': [2, 2, 4, 4, 5, 100], 'animals': ['Flamingo', 'Parrot', ..., 'Centipede']}
+        {'n_legs': [2, 2, 4, 4, 5, 100], 'animals': [
+            'Flamingo', 'Parrot', ..., 'Centipede']}
         """
         entries = []
         for i in range(self.num_columns):
@@ -4989,7 +4990,8 @@ cdef class Table(_Tabular):
         animals: string
         ----
         n_legs: [[2,4,5,100],[2,4,5,100]]
-        animals: [["Flamingo","Horse","Brittle stars","Centipede"],["Flamingo","Horse","Brittle stars","Centipede"]]
+        animals: [["Flamingo","Horse","Brittle stars","Centipede"],
+            ["Flamingo","Horse","Brittle stars","Centipede"]]
         """
         cdef:
             vector[shared_ptr[CRecordBatch]] c_batches
@@ -5084,6 +5086,85 @@ cdef class Table(_Tabular):
 
         return result
 
+    def to_tensor(self, c_bool null_to_nan=False, c_bool row_major=True, MemoryPool memory_pool=None):
+        """
+        Convert to a :class:`~pyarrow.Tensor`.
+
+        Tables that can be converted have fields of type signed or unsigned integer or float,
+        including all bit-widths.
+
+        ``null_to_nan`` is ``False`` by default and this method will raise an error in case
+        any nulls are present. Tables with nulls can be converted with ``null_to_nan`` set to
+        ``True``. In this case null values are converted to ``NaN`` and integer type arrays are
+        promoted to the appropriate float type.
+
+        Parameters
+        ----------
+        null_to_nan : bool, default False
+            Whether to write null values in the result as ``NaN``.
+        row_major : bool, default True
+            Whether resulting Tensor is row-major or column-major
+        memory_pool : MemoryPool, default None
+            For memory allocations, if required, otherwise use default pool
+
+        Examples
+        --------
+        >>> import pyarrow as pa
+        >>> table = pa.table(
+        ...    [
+        ...       pa.chunked_array([[1, 2], [3, 4, None]], type=pa.int32()),
+        ...       pa.chunked_array([[10, 20, 30], [40, None]], type=pa.float32()),
+        ...    ], names = ["a", "b"]
+        ... )
+
+        >>> table
+        pyarrow.Table
+        a: int32
+        b: float
+        ----
+        a: [[1,2],[3,4,null]]
+        b: [[10,20,30],[40,null]]
+
+        Convert a Table to row-major Tensor with null values written as ``NaN``s:
+
+        >>> table.to_tensor(null_to_nan=True)
+        <pyarrow.Tensor>
+        type: double
+        shape: (5, 2)
+        strides: (16, 8)
+        >>> table.to_tensor(null_to_nan=True).to_numpy()
+        array([[ 1., 10.],
+               [ 2., 20.],
+               [ 3., 30.],
+               [ 4., 40.],
+               [nan, nan]])
+
+        Convert a Table to column-major Tensor
+
+        >>> table.to_tensor(null_to_nan=True, row_major=False)
+        <pyarrow.Tensor>
+        type: double
+        shape: (5, 2)
+        strides: (8, 40)
+        >>> table.to_tensor(null_to_nan=True, row_major=False).to_numpy()
+        array([[ 1., 10.],
+               [ 2., 20.],
+               [ 3., 30.],
+               [ 4., 40.],
+               [nan, nan]])
+        """
+        cdef:
+            shared_ptr[CTable] c_table
+            shared_ptr[CTensor] c_tensor
+            CMemoryPool* pool = maybe_unbox_memory_pool(memory_pool)
+
+        c_table = pyarrow_unwrap_table(self)
+        with nogil:
+            c_tensor = GetResultValue(
+                <CResult[shared_ptr[CTensor]]>deref(c_table).ToTensor(null_to_nan,
+                                                                      row_major, pool))
+        return pyarrow_wrap_tensor(c_tensor)
+
     def to_reader(self, max_chunksize=None):
         """
         Convert the Table to a RecordBatchReader.
diff --git a/python/pyarrow/tests/test_table.py b/python/pyarrow/tests/test_table.py
index b65fb7d952c8..d12a61063bef 100644
--- a/python/pyarrow/tests/test_table.py
+++ b/python/pyarrow/tests/test_table.py
@@ -1269,6 +1269,295 @@ def test_recordbatch_to_tensor_unsupported():
         batch.to_tensor()
 
 
+@pytest.mark.parametrize('typ', [
+    np.uint8, np.uint16, np.uint32, np.uint64,
+    np.int8, np.int16, np.int32, np.int64,
+    np.float32, np.float64,
+])
+def test_table_to_tensor_uniform_type(typ):
+    arr1 = [[1, 2, 3], [4, 5, 6, 7, 8, 9]]
+    arr2 = [[10, 20], [30, 40, 50, 60, 70, 80, 90]]
+    arr3 = [[100, 100, 100, 100, 100, 100], [100, 100, 100]]
+    table = pa.Table.from_arrays(
+        [
+            pa.chunked_array(arr1, type=pa.from_numpy_dtype(typ)),
+            pa.chunked_array(arr2, type=pa.from_numpy_dtype(typ)),
+            pa.chunked_array(arr3, type=pa.from_numpy_dtype(typ)),
+        ], ["a", "b", "c"]
+    )
+
+    arr1_f = [1, 2, 3, 4, 5, 6, 7, 8, 9]
+    arr2_f = [10, 20, 30, 40, 50, 60, 70, 80, 90]
+    arr3_f = [100, 100, 100, 100, 100, 100, 100, 100, 100]
+
+    result = table.to_tensor(row_major=False)
+    x = np.column_stack([arr1_f, arr2_f, arr3_f]).astype(typ, order="F")
+    expected = pa.Tensor.from_numpy(x)
+    check_tensors(result, expected, pa.from_numpy_dtype(typ), 27)
+
+    result = table.to_tensor()
+    x = np.column_stack([arr1_f, arr2_f, arr3_f]).astype(typ, order="C")
+    expected = pa.Tensor.from_numpy(x)
+    check_tensors(result, expected, pa.from_numpy_dtype(typ), 27)
+
+    # Test offset
+    table1 = table.slice(1)
+    arr1_f = [2, 3, 4, 5, 6, 7, 8, 9]
+    arr2_f = [20, 30, 40, 50, 60, 70, 80, 90]
+    arr3_f = [100, 100, 100, 100, 100, 100, 100, 100]
+
+    result = table1.to_tensor(row_major=False)
+    x = np.column_stack([arr1_f, arr2_f, arr3_f]).astype(typ, order="F")
+    expected = pa.Tensor.from_numpy(x)
+    check_tensors(result, expected, pa.from_numpy_dtype(typ), 24)
+
+    result = table1.to_tensor()
+    x = np.column_stack([arr1_f, arr2_f, arr3_f]).astype(typ, order="C")
+    expected = pa.Tensor.from_numpy(x)
+    check_tensors(result, expected, pa.from_numpy_dtype(typ), 24)
+
+    table2 = table.slice(1, 5)
+    arr1_f = [2, 3, 4, 5, 6]
+    arr2_f = [20, 30, 40, 50, 60]
+    arr3_f = [100, 100, 100, 100, 100]
+
+    result = table2.to_tensor(row_major=False)
+    x = np.column_stack([arr1_f, arr2_f, arr3_f]).astype(typ, order="F")
+    expected = pa.Tensor.from_numpy(x)
+    check_tensors(result, expected, pa.from_numpy_dtype(typ), 15)
+
+    result = table2.to_tensor()
+    x = np.column_stack([arr1_f, arr2_f, arr3_f]).astype(typ, order="C")
+    expected = pa.Tensor.from_numpy(x)
+    check_tensors(result, expected, pa.from_numpy_dtype(typ), 15)
+
+
+def test_table_to_tensor_uniform_float_16():
+    arr1 = [np.array([1, 2, 3], dtype=np.float16),
+            np.array([4, 5, 6, 7, 8, 9], dtype=np.float16)]
+    arr2 = [np.array([10, 20], dtype=np.float16),
+            np.array([30, 40, 50, 60, 70, 80, 90], dtype=np.float16)]
+    arr3 = [np.array([100, 100, 100, 100, 100, 100], dtype=np.float16),
+            np.array([100, 100, 100], dtype=np.float16)]
+    table = pa.Table.from_arrays(
+        [
+            pa.chunked_array(arr1, type=pa.float16()),
+            pa.chunked_array(arr2, type=pa.float16()),
+            pa.chunked_array(arr3, type=pa.float16()),
+        ], ["a", "b", "c"]
+    )
+
+    arr1_f = [1, 2, 3, 4, 5, 6, 7, 8, 9]
+    arr2_f = [10, 20, 30, 40, 50, 60, 70, 80, 90]
+    arr3_f = [100, 100, 100, 100, 100, 100, 100, 100, 100]
+
+    result = table.to_tensor(row_major=False)
+    x = np.column_stack([arr1_f, arr2_f, arr3_f]).astype(np.float16, order="F")
+    expected = pa.Tensor.from_numpy(x)
+    check_tensors(result, expected, pa.float16(), 27)
+
+    result = table.to_tensor()
+    x = np.column_stack([arr1_f, arr2_f, arr3_f]).astype(np.float16, order="C")
+    expected = pa.Tensor.from_numpy(x)
+    check_tensors(result, expected, pa.float16(), 27)
+
+
+def test_table_to_tensor_mixed_type():
+    # uint16 + int16 = int32
+    arr1 = [[1, 2, 3], [4, 5, 6, 7, 8, 9]]
+    arr2 = [[10, 20], [30, 40, 50, 60, 70, 80, 90]]
+    arr3 = [[100, 200, 300, np.nan, 500, 600], [700, 800, 900]]
+    table = pa.Table.from_arrays(
+        [
+            pa.chunked_array(arr1, type=pa.uint16()),
+            pa.chunked_array(arr2, type=pa.int16()),
+        ], ["a", "b"]
+    )
+
+    arr1_f = [1, 2, 3, 4, 5, 6, 7, 8, 9]
+    arr2_f = [10, 20, 30, 40, 50, 60, 70, 80, 90]
+    arr3_f = [100, 200, 300, np.nan, 500, 600, 700, 800, 900]
+
+    result = table.to_tensor(row_major=False)
+    x = np.column_stack([arr1_f, arr2_f]).astype(np.int32, order="F")
+    expected = pa.Tensor.from_numpy(x)
+    check_tensors(result, expected, pa.int32(), 18)
+
+    result = table.to_tensor()
+    x = np.column_stack([arr1_f, arr2_f]).astype(np.int32, order="C")
+    expected = pa.Tensor.from_numpy(x)
+    check_tensors(result, expected, pa.int32(), 18)
+
+    # uint16 + int16 + float32 = float64
+    table = pa.Table.from_arrays(
+        [
+            pa.chunked_array(arr1, type=pa.uint16()),
+            pa.chunked_array(arr2, type=pa.int16()),
+            pa.chunked_array(arr3, type=pa.float32()),
+        ], ["a", "b", "c"]
+    )
+    result = table.to_tensor(row_major=False)
+    x = np.column_stack([arr1_f, arr2_f, arr3_f]).astype(np.float64, order="F")
+    expected = pa.Tensor.from_numpy(x)
+
+    np.testing.assert_equal(result.to_numpy(), x)
+    assert result.size == 27
+    assert result.type == pa.float64()
+    assert result.shape == expected.shape
+    assert result.strides == expected.strides
+
+    result = table.to_tensor()
+    x = np.column_stack([arr1_f, arr2_f, arr3_f]).astype(np.float64, order="C")
+    expected = pa.Tensor.from_numpy(x)
+
+    np.testing.assert_equal(result.to_numpy(), x)
+    assert result.size == 27
+    assert result.type == pa.float64()
+    assert result.shape == expected.shape
+    assert result.strides == expected.strides
+
+
+def test_table_to_tensor_unsupported_mixed_type_with_float16():
+    arr1 = [[1, 2, 3], [4, 5, 6, 7, 8, 9]]
+    arr2 = [10, 20, 30, 40, 50, 60, 70, 80, 90]
+    arr3 = [[100, 200, 300, 400, 500, 600], [700, 800, 900]]
+    table = pa.Table.from_arrays(
+        [
+            pa.chunked_array(arr1, type=pa.uint16()),
+            pa.chunked_array([np.array(arr2, dtype=np.float16)], type=pa.float16()),
+            pa.chunked_array(arr3, type=pa.float32()),
+        ], ["a", "b", "c"]
+    )
+
+    with pytest.raises(
+        NotImplementedError,
+        match="Casting from or to halffloat is not supported."
+    ):
+        table.to_tensor()
+
+
+def test_table_to_tensor_nan():
+    arr1 = [[1, 2, 3], [4, np.nan, 6, 7, 8, 9]]
+    arr2 = [[10, 20], [30, 40, 50, 60, 70, np.nan, 90]]
+    table = pa.Table.from_arrays(
+        [
+            pa.chunked_array(arr1, type=pa.float32()),
+            pa.chunked_array(arr2, type=pa.float32()),
+        ], ["a", "b"]
+    )
+
+    arr1_f = [1, 2, 3, 4, np.nan, 6, 7, 8, 9]
+    arr2_f = [10, 20, 30, 40, 50, 60, 70, np.nan, 90]
+
+    result = table.to_tensor(row_major=False)
+    x = np.column_stack([arr1_f, arr2_f]).astype(np.float32, order="F")
+    expected = pa.Tensor.from_numpy(x)
+
+    np.testing.assert_equal(result.to_numpy(), x)
+    assert result.size == 18
+    assert result.type == pa.float32()
+    assert result.shape == expected.shape
+    assert result.strides == expected.strides
+
+
+def test_table_to_tensor_null():
+    arr1 = [[1, 2, 3], [4, None, 6, 7, 8, 9]]
+    arr2 = [[10, 20], [30, 40, 50, 60, 70, None, 90]]
+    table = pa.Table.from_arrays(
+        [
+            pa.chunked_array(arr1, type=pa.int32()),
+            pa.chunked_array(arr2, type=pa.float32()),
+        ], ["a", "b"]
+    )
+    with pytest.raises(
+        pa.ArrowTypeError,
+        match="Can only convert a Table with no nulls."
+    ):
+        table.to_tensor()
+
+    arr1_f = [1, 2, 3, 4, np.nan, 6, 7, 8, 9]
+    arr2_f = [10, 20, 30, 40, 50, 60, 70, np.nan, 90]
+
+    result = table.to_tensor(null_to_nan=True, row_major=False)
+    x = np.column_stack([arr1_f, arr2_f]).astype(np.float64, order="F")
+    expected = pa.Tensor.from_numpy(x)
+
+    np.testing.assert_equal(result.to_numpy(), x)
+    assert result.size == 18
+    assert result.type == pa.float64()
+    assert result.shape == expected.shape
+    assert result.strides == expected.strides
+
+    # int32 -> float64
+    table = pa.Table.from_arrays(
+        [
+            pa.chunked_array(arr1, type=pa.int32()),
+            pa.chunked_array(arr2, type=pa.int32()),
+        ], ["a", "b"]
+    )
+
+    result = table.to_tensor(null_to_nan=True, row_major=False)
+
+    np.testing.assert_equal(result.to_numpy(), x)
+    assert result.size == 18
+    assert result.type == pa.float64()
+    assert result.shape == expected.shape
+    assert result.strides == expected.strides
+
+    # int8 -> float32
+    table = pa.Table.from_arrays(
+        [
+            pa.chunked_array(arr1, type=pa.int8()),
+            pa.chunked_array(arr2, type=pa.int8()),
+        ], ["a", "b"]
+    )
+
+    result = table.to_tensor(null_to_nan=True, row_major=False)
+    x = np.column_stack([arr1_f, arr2_f]).astype(np.float32, order="F")
+    expected = pa.Tensor.from_numpy(x)
+
+    np.testing.assert_equal(result.to_numpy(), x)
+    assert result.size == 18
+    assert result.type == pa.float32()
+    assert result.shape == expected.shape
+    assert result.strides == expected.strides
+
+
+def test_table_to_tensor_empty():
+    table = pa.Table.from_arrays(
+        [
+            pa.chunked_array([], type=pa.float32()),
+            pa.chunked_array([], type=pa.float32()),
+        ], ["a", "b"]
+    )
+    result = table.to_tensor()
+
+    x = np.column_stack([[], []]).astype(np.float32, order="F")
+    expected = pa.Tensor.from_numpy(x)
+
+    assert result.size == expected.size
+    assert result.type == pa.float32()
+    assert result.shape == expected.shape
+    assert result.strides == (4, 4)
+
+
+def test_table_to_tensor_unsupported():
+    arr1 = [[1, 2, 3], [4, 5, 6, 7, 8, 9]]
+    # Unsupported data type
+    arr2 = [["a", "b", "c", "a"], ["b", "c", "a", "b", "c"]]
+    table = pa.Table.from_arrays(
+        [
+            pa.chunked_array(arr1, type=pa.int32()),
+            pa.chunked_array(arr2, type=pa.utf8()),
+        ], ["a", "b"]
+    )
+    with pytest.raises(
+        pa.ArrowTypeError,
+        match="DataType is not supported"
+    ):
+        table.to_tensor()
+
+
 def _table_like_slice_tests(factory):
     data = [
         pa.array(range(5)),

From cd74794fa3b4c342592c33d336a1721b348cc4f4 Mon Sep 17 00:00:00 2001
From: AlenkaF <frim.alenka@gmail.com>
Date: Wed, 29 May 2024 09:16:36 +0200
Subject: [PATCH 02/23] Add C++ tests

---
 cpp/src/arrow/table.cc      |   2 +-
 cpp/src/arrow/table_test.cc | 529 ++++++++++++++++++++++++++++++++++++
 2 files changed, 530 insertions(+), 1 deletion(-)

diff --git a/cpp/src/arrow/table.cc b/cpp/src/arrow/table.cc
index c7e357ee03b9..42fc9d0ce894 100644
--- a/cpp/src/arrow/table.cc
+++ b/cpp/src/arrow/table.cc
@@ -440,7 +440,7 @@ Result<std::shared_ptr<Tensor>> Table::ToTensor(bool null_to_nan, bool row_major
                                                 MemoryPool* pool) const {
   if (num_columns() == 0) {
     return Status::TypeError(
-        "Conversion to Tensor for Table without columns/schema is not supported.");
+        "Conversion to Tensor for Tables without columns/schema is not supported.");
   }
   // Check for no validity bitmap of each field
   // if null_to_nan conversion is set to false
diff --git a/cpp/src/arrow/table_test.cc b/cpp/src/arrow/table_test.cc
index 692671910b89..c08a5dca81c2 100644
--- a/cpp/src/arrow/table_test.cc
+++ b/cpp/src/arrow/table_test.cc
@@ -33,6 +33,7 @@
 #include "arrow/compute/cast.h"
 #include "arrow/record_batch.h"
 #include "arrow/status.h"
+#include "arrow/tensor.h"
 #include "arrow/testing/gtest_util.h"
 #include "arrow/testing/random.h"
 #include "arrow/type.h"
@@ -520,6 +521,534 @@ TEST_F(TestTable, ConcatenateTables) {
   ASSERT_RAISES(Invalid, ConcatenateTables({t1, t3}));
 }
 
+TEST_F(TestTable, ToTensorUnsupportedType) {
+  auto f0 = field("f0", int32());
+  // Unsupported data type
+  auto f1 = field("f1", utf8());
+
+  std::vector<std::shared_ptr<Field>> fields = {f0, f1};
+  auto schema = ::arrow::schema(fields);
+
+  auto a0 = ChunkedArrayFromJSON(int32(), {"[1, 2, 3]", "[4, 5, 6, 7, 8, 9]"});
+  auto a1 = ChunkedArrayFromJSON(
+      utf8(), {R"(["a", "b", "c", "a", "b"])", R"(["c", "a", "b", "c"])"});
+
+  auto table = Table::Make(schema, {a0, a1});
+
+  ASSERT_RAISES_WITH_MESSAGE(
+      TypeError, "Type error: DataType is not supported: " + a1->type()->ToString(),
+      table->ToTensor());
+
+  // Unsupported boolean data type
+  auto f2 = field("f2", boolean());
+
+  std::vector<std::shared_ptr<Field>> fields2 = {f0, f2};
+  auto schema2 = ::arrow::schema(fields2);
+  auto a2 = ChunkedArrayFromJSON(
+      boolean(), {"[true, false, true, true, false, true, false, true, true]"});
+  auto table2 = Table::Make(schema2, {a0, a2});
+
+  ASSERT_RAISES_WITH_MESSAGE(
+      TypeError, "Type error: DataType is not supported: " + a2->type()->ToString(),
+      table2->ToTensor());
+}
+
+TEST_F(TestTable, ToTensorUnsupportedMissing) {
+  auto f0 = field("f0", int32());
+  auto f1 = field("f1", int32());
+
+  std::vector<std::shared_ptr<Field>> fields = {f0, f1};
+  auto schema = ::arrow::schema(fields);
+
+  auto a0 = ChunkedArrayFromJSON(int32(), {"[1, 2, 3]", "[4, 5, 6, 7, 8, 9]"});
+  auto a1 = ChunkedArrayFromJSON(int32(), {"[10, 20]", "[30, 40, null, 60, 70, 80, 90]"});
+
+  auto table = Table::Make(schema, {a0, a1});
+
+  ASSERT_RAISES_WITH_MESSAGE(TypeError,
+                             "Type error: Can only convert a Table with no nulls. Set "
+                             "null_to_nan to true to convert nulls to NaN",
+                             table->ToTensor());
+}
+
+TEST_F(TestTable, ToTensorEmptyTable) {
+  auto f0 = field("f0", int32());
+  auto f1 = field("f1", int32());
+
+  std::vector<std::shared_ptr<Field>> fields = {f0, f1};
+  auto schema = ::arrow::schema(fields);
+
+  ASSERT_OK_AND_ASSIGN(std::shared_ptr<Table> empty, Table::MakeEmpty(schema));
+
+  ASSERT_OK_AND_ASSIGN(auto tensor_column,
+                       empty->ToTensor(/*null_to_nan=*/false, /*row_major=*/false));
+  ASSERT_OK(tensor_column->Validate());
+
+  ASSERT_OK_AND_ASSIGN(auto tensor_row, empty->ToTensor());
+  ASSERT_OK(tensor_row->Validate());
+
+  const std::vector<int64_t> strides = {4, 4};
+  const std::vector<int64_t> shape = {0, 2};
+
+  EXPECT_EQ(strides, tensor_column->strides());
+  EXPECT_EQ(shape, tensor_column->shape());
+  EXPECT_EQ(strides, tensor_row->strides());
+  EXPECT_EQ(shape, tensor_row->shape());
+
+  std::vector<std::shared_ptr<Array>> columns;
+  auto t2 = Table::Make(::arrow::schema({}), columns);
+  auto table_no_columns =
+      Table::Make(::arrow::schema({}), std::vector<std::shared_ptr<Array>>{});
+
+  ASSERT_RAISES_WITH_MESSAGE(TypeError,
+                             "Type error: Conversion to Tensor for Tables without "
+                             "columns/schema is not supported.",
+                             table_no_columns->ToTensor());
+}
+
+template <typename DataType>
+void CheckTensor(const std::shared_ptr<Tensor>& tensor, const int size,
+                 const std::vector<int64_t> shape, const std::vector<int64_t> f_strides) {
+  EXPECT_EQ(size, tensor->size());
+  EXPECT_EQ(TypeTraits<DataType>::type_singleton(), tensor->type());
+  EXPECT_EQ(shape, tensor->shape());
+  EXPECT_EQ(f_strides, tensor->strides());
+  EXPECT_FALSE(tensor->is_row_major());
+  EXPECT_TRUE(tensor->is_column_major());
+  EXPECT_TRUE(tensor->is_contiguous());
+}
+
+template <typename DataType>
+void CheckTensorRowMajor(const std::shared_ptr<Tensor>& tensor, const int size,
+                         const std::vector<int64_t> shape,
+                         const std::vector<int64_t> strides) {
+  EXPECT_EQ(size, tensor->size());
+  EXPECT_EQ(TypeTraits<DataType>::type_singleton(), tensor->type());
+  EXPECT_EQ(shape, tensor->shape());
+  EXPECT_EQ(strides, tensor->strides());
+  EXPECT_TRUE(tensor->is_row_major());
+  EXPECT_FALSE(tensor->is_column_major());
+  EXPECT_TRUE(tensor->is_contiguous());
+}
+
+TEST_F(TestTable, ToTensorSupportedNaN) {
+  auto f0 = field("f0", float32());
+  auto f1 = field("f1", float32());
+
+  std::vector<std::shared_ptr<Field>> fields = {f0, f1};
+  auto schema = ::arrow::schema(fields);
+
+  auto a0 = ChunkedArrayFromJSON(float32(), {"[NaN, 2, 3]", "[4, 5, 6, 7, 8, 9]"});
+  auto a1 =
+      ChunkedArrayFromJSON(float32(), {"[10, 20]", "[30, 40, NaN, 60, 70, 80, 90]"});
+
+  auto table = Table::Make(schema, {a0, a1});
+
+  ASSERT_OK_AND_ASSIGN(auto tensor,
+                       table->ToTensor(/*null_to_nan=*/false, /*row_major=*/false));
+  ASSERT_OK(tensor->Validate());
+
+  std::vector<int64_t> shape = {9, 2};
+  const int64_t f32_size = sizeof(float);
+  std::vector<int64_t> f_strides = {f32_size, f32_size * shape[0]};
+  std::shared_ptr<Tensor> tensor_expected = TensorFromJSON(
+      float32(), "[NaN, 2,  3,  4,  5, 6, 7, 8, 9, 10, 20, 30, 40, NaN, 60, 70, 80, 90]",
+      shape, f_strides);
+
+  EXPECT_FALSE(tensor_expected->Equals(*tensor));
+  EXPECT_TRUE(tensor_expected->Equals(*tensor, EqualOptions().nans_equal(true)));
+  CheckTensor<FloatType>(tensor, 18, shape, f_strides);
+}
+
+TEST_F(TestTable, ToTensorSupportedNullToNan) {
+  // int32 + float32 = float64
+  auto f0 = field("f0", int32());
+  auto f1 = field("f1", float32());
+
+  std::vector<std::shared_ptr<Field>> fields = {f0, f1};
+  auto schema = ::arrow::schema(fields);
+
+  auto a0 = ChunkedArrayFromJSON(int32(), {"[null, 2, 3]", "[4, 5, 6, 7, 8, 9]"});
+  auto a1 =
+      ChunkedArrayFromJSON(float32(), {"[10, 20]", "[30, 40, null, 60, 70, 80, 90]"});
+
+  auto table = Table::Make(schema, {a0, a1});
+
+  ASSERT_OK_AND_ASSIGN(auto tensor,
+                       table->ToTensor(/*null_to_nan=*/true, /*row_major=*/false));
+  ASSERT_OK(tensor->Validate());
+
+  std::vector<int64_t> shape = {9, 2};
+  const int64_t f64_size = sizeof(double);
+  std::vector<int64_t> f_strides = {f64_size, f64_size * shape[0]};
+  std::shared_ptr<Tensor> tensor_expected = TensorFromJSON(
+      float64(), "[NaN, 2,  3,  4,  5, 6, 7, 8, 9, 10, 20, 30, 40, NaN, 60, 70, 80, 90]",
+      shape, f_strides);
+
+  EXPECT_FALSE(tensor_expected->Equals(*tensor));
+  EXPECT_TRUE(tensor_expected->Equals(*tensor, EqualOptions().nans_equal(true)));
+
+  CheckTensor<DoubleType>(tensor, 18, shape, f_strides);
+
+  ASSERT_OK_AND_ASSIGN(auto tensor_row, table->ToTensor(/*null_to_nan=*/true));
+  ASSERT_OK(tensor_row->Validate());
+
+  std::vector<int64_t> strides = {f64_size * shape[1], f64_size};
+  std::shared_ptr<Tensor> tensor_expected_row = TensorFromJSON(
+      float64(), "[NaN, 10, 2,  20, 3, 30,  4, 40, 5, NaN, 6, 60, 7, 70, 8, 80, 9, 90]",
+      shape, strides);
+
+  EXPECT_FALSE(tensor_expected_row->Equals(*tensor_row));
+  EXPECT_TRUE(tensor_expected_row->Equals(*tensor_row, EqualOptions().nans_equal(true)));
+
+  CheckTensorRowMajor<DoubleType>(tensor_row, 18, shape, strides);
+
+  // int32 -> float64
+  auto f2 = field("f2", int32());
+
+  std::vector<std::shared_ptr<Field>> fields1 = {f0, f2};
+  auto schema1 = ::arrow::schema(fields1);
+
+  auto a2 = ChunkedArrayFromJSON(int32(), {"[10, 20]", "[30, 40, null, 60, 70, 80, 90]"});
+  auto table1 = Table::Make(schema1, {a0, a2});
+
+  ASSERT_OK_AND_ASSIGN(auto tensor1,
+                       table1->ToTensor(/*null_to_nan=*/true, /*row_major=*/false));
+  ASSERT_OK(tensor1->Validate());
+
+  EXPECT_FALSE(tensor_expected->Equals(*tensor1));
+  EXPECT_TRUE(tensor_expected->Equals(*tensor1, EqualOptions().nans_equal(true)));
+
+  CheckTensor<DoubleType>(tensor1, 18, shape, f_strides);
+
+  ASSERT_OK_AND_ASSIGN(auto tensor1_row, table1->ToTensor(/*null_to_nan=*/true));
+  ASSERT_OK(tensor1_row->Validate());
+
+  EXPECT_FALSE(tensor_expected_row->Equals(*tensor1_row));
+  EXPECT_TRUE(tensor_expected_row->Equals(*tensor1_row, EqualOptions().nans_equal(true)));
+
+  CheckTensorRowMajor<DoubleType>(tensor1_row, 18, shape, strides);
+
+  // int8 -> float32
+  auto f3 = field("f3", int8());
+  auto f4 = field("f4", int8());
+
+  std::vector<std::shared_ptr<Field>> fields2 = {f3, f4};
+  auto schema2 = ::arrow::schema(fields2);
+
+  auto a3 = ChunkedArrayFromJSON(int8(), {"[null, 2, 3]", "[4, 5, 6, 7, 8, 9]"});
+  auto a4 = ChunkedArrayFromJSON(int8(), {"[10, 20]", "[30, 40, null, 60, 70, 80, 90]"});
+  auto table2 = Table::Make(schema2, {a3, a4});
+
+  ASSERT_OK_AND_ASSIGN(auto tensor2,
+                       table2->ToTensor(/*null_to_nan=*/true, /*row_major=*/false));
+  ASSERT_OK(tensor2->Validate());
+
+  const int64_t f32_size = sizeof(float);
+  std::vector<int64_t> f_strides_2 = {f32_size, f32_size * shape[0]};
+  std::shared_ptr<Tensor> tensor_expected_2 = TensorFromJSON(
+      float32(), "[NaN, 2,  3,  4,  5, 6, 7, 8, 9, 10, 20, 30, 40, NaN, 60, 70, 80, 90]",
+      shape, f_strides_2);
+
+  EXPECT_FALSE(tensor_expected_2->Equals(*tensor2));
+  EXPECT_TRUE(tensor_expected_2->Equals(*tensor2, EqualOptions().nans_equal(true)));
+
+  CheckTensor<FloatType>(tensor2, 18, shape, f_strides_2);
+
+  ASSERT_OK_AND_ASSIGN(auto tensor2_row, table2->ToTensor(/*null_to_nan=*/true));
+  ASSERT_OK(tensor2_row->Validate());
+
+  std::vector<int64_t> strides_2 = {f32_size * shape[1], f32_size};
+  std::shared_ptr<Tensor> tensor2_expected_row = TensorFromJSON(
+      float32(), "[NaN, 10, 2,  20, 3, 30,  4, 40, 5, NaN, 6, 60, 7, 70, 8, 80, 9, 90]",
+      shape, strides_2);
+
+  EXPECT_FALSE(tensor2_expected_row->Equals(*tensor2_row));
+  EXPECT_TRUE(
+      tensor2_expected_row->Equals(*tensor2_row, EqualOptions().nans_equal(true)));
+
+  CheckTensorRowMajor<FloatType>(tensor2_row, 18, shape, strides_2);
+}
+
+TEST_F(TestTable, ToTensorSupportedTypesMixed) {
+  auto f0 = field("f0", uint16());
+  auto f1 = field("f1", int16());
+  auto f2 = field("f2", float32());
+
+  auto a0 = ChunkedArrayFromJSON(uint16(), {"[1, 2, 3]", "[4, 5, 6, 7, 8, 9]"});
+  auto a1 = ChunkedArrayFromJSON(int16(), {"[10, 20]", "[30, 40, 50, 60, 70, 80, 90]"});
+  auto a2 = ChunkedArrayFromJSON(float32(),
+                                 {"[100, 200, 300, NaN, 500, 600]", "[700, 800, 900]"});
+
+  // Single column
+  std::vector<std::shared_ptr<Field>> fields = {f0};
+  auto schema = ::arrow::schema(fields);
+  auto table = Table::Make(schema, {a0});
+
+  ASSERT_OK_AND_ASSIGN(auto tensor,
+                       table->ToTensor(/*null_to_nan=*/false, /*row_major=*/false));
+  ASSERT_OK(tensor->Validate());
+
+  std::vector<int64_t> shape = {9, 1};
+  const int64_t uint16_size = sizeof(uint16_t);
+  std::vector<int64_t> f_strides = {uint16_size, uint16_size * shape[0]};
+  std::shared_ptr<Tensor> tensor_expected =
+      TensorFromJSON(uint16(), "[1, 2, 3, 4, 5, 6, 7, 8, 9]", shape, f_strides);
+
+  EXPECT_TRUE(tensor_expected->Equals(*tensor));
+  CheckTensor<UInt16Type>(tensor, 9, shape, f_strides);
+
+  // uint16 + int16 = int32
+  std::vector<std::shared_ptr<Field>> fields1 = {f0, f1};
+  auto schema1 = ::arrow::schema(fields1);
+  auto table1 = Table::Make(schema1, {a0, a1});
+
+  ASSERT_OK_AND_ASSIGN(auto tensor1,
+                       table1->ToTensor(/*null_to_nan=*/false, /*row_major=*/false));
+  ASSERT_OK(tensor1->Validate());
+
+  std::vector<int64_t> shape1 = {9, 2};
+  const int64_t int32_size = sizeof(int32_t);
+  std::vector<int64_t> f_strides_1 = {int32_size, int32_size * shape1[0]};
+  std::shared_ptr<Tensor> tensor_expected_1 = TensorFromJSON(
+      int32(), "[1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 20, 30, 40, 50, 60, 70, 80, 90]",
+      shape1, f_strides_1);
+
+  EXPECT_TRUE(tensor_expected_1->Equals(*tensor1));
+
+  CheckTensor<Int32Type>(tensor1, 18, shape1, f_strides_1);
+
+  ASSERT_EQ(tensor1->type()->bit_width(), tensor_expected_1->type()->bit_width());
+
+  ASSERT_EQ(1, tensor_expected_1->Value<Int32Type>({0, 0}));
+  ASSERT_EQ(2, tensor_expected_1->Value<Int32Type>({1, 0}));
+  ASSERT_EQ(10, tensor_expected_1->Value<Int32Type>({0, 1}));
+
+  // uint16 + int16 + float32 = float64
+  std::vector<std::shared_ptr<Field>> fields2 = {f0, f1, f2};
+  auto schema2 = ::arrow::schema(fields2);
+  auto table2 = Table::Make(schema2, {a0, a1, a2});
+
+  ASSERT_OK_AND_ASSIGN(auto tensor2,
+                       table2->ToTensor(/*null_to_nan=*/false, /*row_major=*/false));
+  ASSERT_OK(tensor2->Validate());
+
+  std::vector<int64_t> shape2 = {9, 3};
+  const int64_t f64_size = sizeof(double);
+  std::vector<int64_t> f_strides_2 = {f64_size, f64_size * shape2[0]};
+  std::shared_ptr<Tensor> tensor_expected_2 =
+      TensorFromJSON(float64(),
+                     "[1,   2,   3,   4,   5,  6,  7,  8,   9,   10,  20, 30,  40,  50,"
+                     "60,  70, 80, 90, 100, 200, 300, NaN, 500, 600, 700, 800, 900]",
+                     shape2, f_strides_2);
+
+  EXPECT_FALSE(tensor_expected_2->Equals(*tensor2));
+  EXPECT_TRUE(tensor_expected_2->Equals(*tensor2, EqualOptions().nans_equal(true)));
+
+  CheckTensor<DoubleType>(tensor2, 27, shape2, f_strides_2);
+}
+
+TEST_F(TestTable, ToTensorUnsupportedMixedFloat16) {
+  auto f0 = field("f0", float16());
+  auto f1 = field("f1", float64());
+
+  auto a0 = ChunkedArrayFromJSON(float16(), {"[1, 2, 3]", "[4, 5, 6, 7, 8, 9]"});
+  auto a1 = ChunkedArrayFromJSON(float64(), {"[10, 20]", "[30, 40, 50, 60, 70, 80, 90]"});
+
+  std::vector<std::shared_ptr<Field>> fields = {f0, f1};
+  auto schema = ::arrow::schema(fields);
+  auto table = Table::Make(schema, {a0, a1});
+
+  ASSERT_RAISES_WITH_MESSAGE(
+      NotImplemented, "NotImplemented: Casting from or to halffloat is not supported.",
+      table->ToTensor());
+
+  std::vector<std::shared_ptr<Field>> fields1 = {f1, f0};
+  auto schema1 = ::arrow::schema(fields1);
+  auto table1 = Table::Make(schema1, {a1, a0});
+
+  ASSERT_RAISES_WITH_MESSAGE(
+      NotImplemented, "NotImplemented: Casting from or to halffloat is not supported.",
+      table1->ToTensor());
+}
+
+template <typename DataType>
+class TestTableToTensorColumnMajor : public ::testing::Test {};
+
+TYPED_TEST_SUITE_P(TestTableToTensorColumnMajor);
+
+TYPED_TEST_P(TestTableToTensorColumnMajor, SupportedTypes) {
+  using DataType = TypeParam;
+  using c_data_type = typename DataType::c_type;
+  const int unit_size = sizeof(c_data_type);
+
+  auto f0 = field("f0", TypeTraits<DataType>::type_singleton());
+  auto f1 = field("f1", TypeTraits<DataType>::type_singleton());
+  auto f2 = field("f2", TypeTraits<DataType>::type_singleton());
+
+  std::vector<std::shared_ptr<Field>> fields = {f0, f1, f2};
+  auto schema = ::arrow::schema(fields);
+
+  auto a0 = ChunkedArrayFromJSON(TypeTraits<DataType>::type_singleton(),
+                                 {"[1, 2, 3]", "[4, 5, 6, 7, 8, 9]"});
+  auto a1 = ChunkedArrayFromJSON(TypeTraits<DataType>::type_singleton(),
+                                 {"[10, 20]", "[30, 40, 50, 60, 70, 80, 90]"});
+  auto a2 = ChunkedArrayFromJSON(TypeTraits<DataType>::type_singleton(),
+                                 {"[100, 100, 100, 100, 100, 100]", "[100, 100, 100]"});
+
+  auto table = Table::Make(schema, {a0, a1, a2});
+
+  ASSERT_OK_AND_ASSIGN(auto tensor,
+                       table->ToTensor(/*null_to_nan=*/false, /*row_major=*/false));
+  ASSERT_OK(tensor->Validate());
+
+  std::vector<int64_t> shape = {9, 3};
+  std::vector<int64_t> f_strides = {unit_size, unit_size * shape[0]};
+  std::shared_ptr<Tensor> tensor_expected = TensorFromJSON(
+      TypeTraits<DataType>::type_singleton(),
+      "[1,   2,   3,   4,   5,   6,   7,   8,   9, 10,  20,  30,  40,  50,  60,  70,  "
+      "80,  90, 100, 100, 100, 100, 100, 100, 100, 100, 100]",
+      shape, f_strides);
+
+  EXPECT_TRUE(tensor_expected->Equals(*tensor));
+  CheckTensor<DataType>(tensor, 27, shape, f_strides);
+
+  // Test offsets
+  auto table_slice = table->Slice(1);
+
+  ASSERT_OK_AND_ASSIGN(auto tensor_sliced, table_slice->ToTensor(/*null_to_nan=*/false,
+                                                                 /*row_major=*/false));
+  ASSERT_OK(tensor_sliced->Validate());
+
+  std::vector<int64_t> shape_sliced = {8, 3};
+  std::vector<int64_t> f_strides_sliced = {unit_size, unit_size * shape_sliced[0]};
+  std::shared_ptr<Tensor> tensor_expected_sliced =
+      TensorFromJSON(TypeTraits<DataType>::type_singleton(),
+                     "[2,   3,   4,   5,   6,   7,   8,   9, 20,  30,  40,  50,  60,  "
+                     "70,  80,  90, 100, 100, 100, 100, 100, 100, 100, 100]",
+                     shape_sliced, f_strides_sliced);
+
+  EXPECT_TRUE(tensor_expected_sliced->Equals(*tensor_sliced));
+  CheckTensor<DataType>(tensor_expected_sliced, 24, shape_sliced, f_strides_sliced);
+
+  auto table_slice_1 = table->Slice(1, 5);
+
+  ASSERT_OK_AND_ASSIGN(
+      auto tensor_sliced_1,
+      table_slice_1->ToTensor(/*null_to_nan=*/false, /*row_major=*/false));
+  ASSERT_OK(tensor_sliced_1->Validate());
+
+  std::vector<int64_t> shape_sliced_1 = {5, 3};
+  std::vector<int64_t> f_strides_sliced_1 = {unit_size, unit_size * shape_sliced_1[0]};
+  std::shared_ptr<Tensor> tensor_expected_sliced_1 =
+      TensorFromJSON(TypeTraits<DataType>::type_singleton(),
+                     "[2, 3, 4, 5, 6, 20, 30, 40, 50, 60, 100, 100, 100, 100, 100]",
+                     shape_sliced_1, f_strides_sliced_1);
+
+  EXPECT_TRUE(tensor_expected_sliced_1->Equals(*tensor_sliced_1));
+  CheckTensor<DataType>(tensor_expected_sliced_1, 15, shape_sliced_1, f_strides_sliced_1);
+}
+
+REGISTER_TYPED_TEST_SUITE_P(TestTableToTensorColumnMajor, SupportedTypes);
+
+INSTANTIATE_TYPED_TEST_SUITE_P(UInt8, TestTableToTensorColumnMajor, UInt8Type);
+INSTANTIATE_TYPED_TEST_SUITE_P(UInt16, TestTableToTensorColumnMajor, UInt16Type);
+INSTANTIATE_TYPED_TEST_SUITE_P(UInt32, TestTableToTensorColumnMajor, UInt32Type);
+INSTANTIATE_TYPED_TEST_SUITE_P(UInt64, TestTableToTensorColumnMajor, UInt64Type);
+INSTANTIATE_TYPED_TEST_SUITE_P(Int8, TestTableToTensorColumnMajor, Int8Type);
+INSTANTIATE_TYPED_TEST_SUITE_P(Int16, TestTableToTensorColumnMajor, Int16Type);
+INSTANTIATE_TYPED_TEST_SUITE_P(Int32, TestTableToTensorColumnMajor, Int32Type);
+INSTANTIATE_TYPED_TEST_SUITE_P(Int64, TestTableToTensorColumnMajor, Int64Type);
+INSTANTIATE_TYPED_TEST_SUITE_P(Float16, TestTableToTensorColumnMajor, HalfFloatType);
+INSTANTIATE_TYPED_TEST_SUITE_P(Float32, TestTableToTensorColumnMajor, FloatType);
+INSTANTIATE_TYPED_TEST_SUITE_P(Float64, TestTableToTensorColumnMajor, DoubleType);
+
+template <typename DataType>
+class TestTableToTensorRowMajor : public ::testing::Test {};
+
+TYPED_TEST_SUITE_P(TestTableToTensorRowMajor);
+
+TYPED_TEST_P(TestTableToTensorRowMajor, SupportedTypes) {
+  using DataType = TypeParam;
+  using c_data_type = typename DataType::c_type;
+  const int unit_size = sizeof(c_data_type);
+
+  auto f0 = field("f0", TypeTraits<DataType>::type_singleton());
+  auto f1 = field("f1", TypeTraits<DataType>::type_singleton());
+  auto f2 = field("f2", TypeTraits<DataType>::type_singleton());
+
+  std::vector<std::shared_ptr<Field>> fields = {f0, f1, f2};
+  auto schema = ::arrow::schema(fields);
+
+  auto a0 = ChunkedArrayFromJSON(TypeTraits<DataType>::type_singleton(),
+                                 {"[1, 2, 3]", "[4, 5, 6, 7, 8, 9]"});
+  auto a1 = ChunkedArrayFromJSON(TypeTraits<DataType>::type_singleton(),
+                                 {"[10, 20]", "[30, 40, 50, 60, 70, 80, 90]"});
+  auto a2 = ChunkedArrayFromJSON(TypeTraits<DataType>::type_singleton(),
+                                 {"[100, 100, 100, 100, 100, 100]", "[100, 100, 100]"});
+
+  auto table = Table::Make(schema, {a0, a1, a2});
+
+  ASSERT_OK_AND_ASSIGN(auto tensor, table->ToTensor());
+  ASSERT_OK(tensor->Validate());
+
+  std::vector<int64_t> shape = {9, 3};
+  std::vector<int64_t> strides = {unit_size * shape[1], unit_size};
+  std::shared_ptr<Tensor> tensor_expected =
+      TensorFromJSON(TypeTraits<DataType>::type_singleton(),
+                     "[1,   10, 100, 2, 20, 100, 3, 30, 100, 4, 40, 100, 5, 50, 100, 6, "
+                     "60, 100, 7, 70, 100, 8, 80, 100, 9, 90, 100]", shape, strides);
+
+  EXPECT_TRUE(tensor_expected->Equals(*tensor));
+  CheckTensorRowMajor<DataType>(tensor, 27, shape, strides);
+
+  // Test offsets
+  auto table_slice = table->Slice(1);
+
+  ASSERT_OK_AND_ASSIGN(auto tensor_sliced, table_slice->ToTensor());
+  ASSERT_OK(tensor_sliced->Validate());
+
+  std::vector<int64_t> shape_sliced = {8, 3};
+  std::vector<int64_t> strides_sliced = {unit_size * shape[1], unit_size};
+  std::shared_ptr<Tensor> tensor_expected_sliced =
+      TensorFromJSON(TypeTraits<DataType>::type_singleton(),
+                     "[2, 20, 100, 3, 30, 100, 4, 40, 100, 5, 50, 100, 6, "
+                     "60, 100, 7, 70, 100, 8, 80, 100, 9, 90, 100]",
+                     shape_sliced, strides_sliced);
+
+  EXPECT_TRUE(tensor_expected_sliced->Equals(*tensor_sliced));
+  CheckTensorRowMajor<DataType>(tensor_sliced, 24, shape_sliced, strides_sliced);
+
+  auto table_slice_1 = table->Slice(1, 5);
+
+  ASSERT_OK_AND_ASSIGN(auto tensor_sliced_1, table_slice_1->ToTensor());
+  ASSERT_OK(tensor_sliced_1->Validate());
+
+  std::vector<int64_t> shape_sliced_1 = {5, 3};
+  std::vector<int64_t> strides_sliced_1 = {unit_size * shape_sliced_1[1], unit_size};
+  std::shared_ptr<Tensor> tensor_expected_sliced_1 =
+      TensorFromJSON(TypeTraits<DataType>::type_singleton(),
+                     "[2, 20, 100, 3, 30, 100, 4, 40, 100, 5, 50, 100, 6, 60, 100]",
+                     shape_sliced_1, strides_sliced_1);
+
+  EXPECT_TRUE(tensor_expected_sliced_1->Equals(*tensor_sliced_1));
+  CheckTensorRowMajor<DataType>(tensor_sliced_1, 15, shape_sliced_1, strides_sliced_1);
+}
+
+REGISTER_TYPED_TEST_SUITE_P(TestTableToTensorRowMajor, SupportedTypes);
+
+INSTANTIATE_TYPED_TEST_SUITE_P(UInt8, TestTableToTensorRowMajor, UInt8Type);
+INSTANTIATE_TYPED_TEST_SUITE_P(UInt16, TestTableToTensorRowMajor, UInt16Type);
+INSTANTIATE_TYPED_TEST_SUITE_P(UInt32, TestTableToTensorRowMajor, UInt32Type);
+INSTANTIATE_TYPED_TEST_SUITE_P(UInt64, TestTableToTensorRowMajor, UInt64Type);
+INSTANTIATE_TYPED_TEST_SUITE_P(Int8, TestTableToTensorRowMajor, Int8Type);
+INSTANTIATE_TYPED_TEST_SUITE_P(Int16, TestTableToTensorRowMajor, Int16Type);
+INSTANTIATE_TYPED_TEST_SUITE_P(Int32, TestTableToTensorRowMajor, Int32Type);
+INSTANTIATE_TYPED_TEST_SUITE_P(Int64, TestTableToTensorRowMajor, Int64Type);
+INSTANTIATE_TYPED_TEST_SUITE_P(Float16, TestTableToTensorRowMajor, HalfFloatType);
+INSTANTIATE_TYPED_TEST_SUITE_P(Float32, TestTableToTensorRowMajor, FloatType);
+INSTANTIATE_TYPED_TEST_SUITE_P(Float64, TestTableToTensorRowMajor, DoubleType);
+
 std::shared_ptr<Table> MakeTableWithOneNullFilledColumn(
     const std::string& column_name, const std::shared_ptr<DataType>& data_type,
     const int length) {

From 09843d7c4e94db51854173312a19b716514ebcf9 Mon Sep 17 00:00:00 2001
From: AlenkaF <frim.alenka@gmail.com>
Date: Wed, 29 May 2024 11:16:18 +0200
Subject: [PATCH 03/23] Add benchmarks

---
 cpp/src/arrow/tensor_benchmark.cc | 38 +++++++++++++++++++++++++++++++
 1 file changed, 38 insertions(+)

diff --git a/cpp/src/arrow/tensor_benchmark.cc b/cpp/src/arrow/tensor_benchmark.cc
index 91a9270ef347..30969995ee2b 100644
--- a/cpp/src/arrow/tensor_benchmark.cc
+++ b/cpp/src/arrow/tensor_benchmark.cc
@@ -18,6 +18,7 @@
 #include "benchmark/benchmark.h"
 
 #include "arrow/record_batch.h"
+#include "arrow/table.h"
 #include "arrow/testing/gtest_util.h"
 #include "arrow/testing/random.h"
 #include "arrow/type.h"
@@ -51,6 +52,34 @@ static void BatchToTensorSimple(benchmark::State& state) {
   state.SetBytesProcessed(state.iterations() * ty->byte_width() * num_rows * num_cols);
 }
 
+template <typename ValueType, bool row_major>
+static void TableToTensorSimple(benchmark::State& state) {
+  using CType = typename ValueType::c_type;
+  std::shared_ptr<DataType> ty = TypeTraits<ValueType>::type_singleton();
+
+  const int64_t num_cols = state.range(1);
+  const int64_t num_rows = state.range(0) / num_cols / sizeof(CType);
+  arrow::random::RandomArrayGenerator gen_{42};
+
+  std::vector<std::shared_ptr<Field>> fields = {};
+  std::vector<std::shared_ptr<ChunkedArray>> columns = {};
+
+  for (int64_t i = 0; i < num_cols; ++i) {
+    fields.push_back(field("f" + std::to_string(i), ty));
+    ArrayVector arrays = {gen_.ArrayOf(ty, num_rows / 2), gen_.ArrayOf(ty, num_rows / 2)};
+    auto chunks = std::make_shared<ChunkedArray>(arrays, ty);
+    columns.push_back(chunks);
+  }
+  auto schema = std::make_shared<Schema>(std::move(fields));
+  auto table = Table::Make(schema, columns);
+
+  for (auto _ : state) {
+    ASSERT_OK_AND_ASSIGN(auto tensor, table->ToTensor(/*row_major=*/row_major));
+  }
+  state.SetItemsProcessed(state.iterations() * num_rows * num_cols);
+  state.SetBytesProcessed(state.iterations() * ty->byte_width() * num_rows * num_cols);
+}
+
 void SetArgs(benchmark::internal::Benchmark* bench) {
   for (int64_t size : {kL1Size, kL2Size}) {
     for (int64_t num_columns : {3, 30, 300}) {
@@ -65,4 +94,13 @@ BENCHMARK_TEMPLATE(BatchToTensorSimple, Int16Type)->Apply(SetArgs);
 BENCHMARK_TEMPLATE(BatchToTensorSimple, Int32Type)->Apply(SetArgs);
 BENCHMARK_TEMPLATE(BatchToTensorSimple, Int64Type)->Apply(SetArgs);
 
+#define DECLARE_TABLE_TO_TENSOR_BENCHMARKS(row_major)                            \
+  BENCHMARK_TEMPLATE(TableToTensorSimple, Int8Type, row_major)->Apply(SetArgs);  \
+  BENCHMARK_TEMPLATE(TableToTensorSimple, Int16Type, row_major)->Apply(SetArgs); \
+  BENCHMARK_TEMPLATE(TableToTensorSimple, Int32Type, row_major)->Apply(SetArgs); \
+  BENCHMARK_TEMPLATE(TableToTensorSimple, Int64Type, row_major)->Apply(SetArgs);
+
+DECLARE_TABLE_TO_TENSOR_BENCHMARKS(false);
+DECLARE_TABLE_TO_TENSOR_BENCHMARKS(true);
+
 }  // namespace arrow

From 9c0c6f6ce315cb36236ce46221009edc6873e585 Mon Sep 17 00:00:00 2001
From: AlenkaF <frim.alenka@gmail.com>
Date: Wed, 29 May 2024 11:28:21 +0200
Subject: [PATCH 04/23] Fix linter error

---
 cpp/src/arrow/table_test.cc | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/cpp/src/arrow/table_test.cc b/cpp/src/arrow/table_test.cc
index c08a5dca81c2..1d6b20142bd2 100644
--- a/cpp/src/arrow/table_test.cc
+++ b/cpp/src/arrow/table_test.cc
@@ -997,7 +997,8 @@ TYPED_TEST_P(TestTableToTensorRowMajor, SupportedTypes) {
   std::shared_ptr<Tensor> tensor_expected =
       TensorFromJSON(TypeTraits<DataType>::type_singleton(),
                      "[1,   10, 100, 2, 20, 100, 3, 30, 100, 4, 40, 100, 5, 50, 100, 6, "
-                     "60, 100, 7, 70, 100, 8, 80, 100, 9, 90, 100]", shape, strides);
+                     "60, 100, 7, 70, 100, 8, 80, 100, 9, 90, 100]",
+                     shape, strides);
 
   EXPECT_TRUE(tensor_expected->Equals(*tensor));
   CheckTensorRowMajor<DataType>(tensor, 27, shape, strides);

From e1562f41a4475fd5626955db3c2218081c907f91 Mon Sep 17 00:00:00 2001
From: AlenkaF <frim.alenka@gmail.com>
Date: Wed, 29 May 2024 11:37:09 +0200
Subject: [PATCH 05/23] Add cmath include

---
 cpp/src/arrow/table.cc | 1 +
 1 file changed, 1 insertion(+)

diff --git a/cpp/src/arrow/table.cc b/cpp/src/arrow/table.cc
index 42fc9d0ce894..fe2cd12373ea 100644
--- a/cpp/src/arrow/table.cc
+++ b/cpp/src/arrow/table.cc
@@ -18,6 +18,7 @@
 #include "arrow/table.h"
 
 #include <algorithm>
+#include <cmath>
 #include <cstdlib>
 #include <limits>
 #include <memory>

From d5a6eafd1788fed405735798329be491a04b9dc3 Mon Sep 17 00:00:00 2001
From: AlenkaF <frim.alenka@gmail.com>
Date: Wed, 29 May 2024 13:25:19 +0200
Subject: [PATCH 06/23] Change helper function names in C++ tests, fix doctest
 errors

---
 cpp/src/arrow/table_test.cc | 36 ++++++++++++++++++------------------
 python/pyarrow/table.pxi    |  6 ++----
 2 files changed, 20 insertions(+), 22 deletions(-)

diff --git a/cpp/src/arrow/table_test.cc b/cpp/src/arrow/table_test.cc
index 1d6b20142bd2..beb2d48fc113 100644
--- a/cpp/src/arrow/table_test.cc
+++ b/cpp/src/arrow/table_test.cc
@@ -607,7 +607,7 @@ TEST_F(TestTable, ToTensorEmptyTable) {
 }
 
 template <typename DataType>
-void CheckTensor(const std::shared_ptr<Tensor>& tensor, const int size,
+void CheckTableToTensor(const std::shared_ptr<Tensor>& tensor, const int size,
                  const std::vector<int64_t> shape, const std::vector<int64_t> f_strides) {
   EXPECT_EQ(size, tensor->size());
   EXPECT_EQ(TypeTraits<DataType>::type_singleton(), tensor->type());
@@ -619,7 +619,7 @@ void CheckTensor(const std::shared_ptr<Tensor>& tensor, const int size,
 }
 
 template <typename DataType>
-void CheckTensorRowMajor(const std::shared_ptr<Tensor>& tensor, const int size,
+void CheckTableToTensorRowMajor(const std::shared_ptr<Tensor>& tensor, const int size,
                          const std::vector<int64_t> shape,
                          const std::vector<int64_t> strides) {
   EXPECT_EQ(size, tensor->size());
@@ -657,7 +657,7 @@ TEST_F(TestTable, ToTensorSupportedNaN) {
 
   EXPECT_FALSE(tensor_expected->Equals(*tensor));
   EXPECT_TRUE(tensor_expected->Equals(*tensor, EqualOptions().nans_equal(true)));
-  CheckTensor<FloatType>(tensor, 18, shape, f_strides);
+  CheckTableToTensor<FloatType>(tensor, 18, shape, f_strides);
 }
 
 TEST_F(TestTable, ToTensorSupportedNullToNan) {
@@ -688,7 +688,7 @@ TEST_F(TestTable, ToTensorSupportedNullToNan) {
   EXPECT_FALSE(tensor_expected->Equals(*tensor));
   EXPECT_TRUE(tensor_expected->Equals(*tensor, EqualOptions().nans_equal(true)));
 
-  CheckTensor<DoubleType>(tensor, 18, shape, f_strides);
+  CheckTableToTensor<DoubleType>(tensor, 18, shape, f_strides);
 
   ASSERT_OK_AND_ASSIGN(auto tensor_row, table->ToTensor(/*null_to_nan=*/true));
   ASSERT_OK(tensor_row->Validate());
@@ -701,7 +701,7 @@ TEST_F(TestTable, ToTensorSupportedNullToNan) {
   EXPECT_FALSE(tensor_expected_row->Equals(*tensor_row));
   EXPECT_TRUE(tensor_expected_row->Equals(*tensor_row, EqualOptions().nans_equal(true)));
 
-  CheckTensorRowMajor<DoubleType>(tensor_row, 18, shape, strides);
+  CheckTableToTensorRowMajor<DoubleType>(tensor_row, 18, shape, strides);
 
   // int32 -> float64
   auto f2 = field("f2", int32());
@@ -719,7 +719,7 @@ TEST_F(TestTable, ToTensorSupportedNullToNan) {
   EXPECT_FALSE(tensor_expected->Equals(*tensor1));
   EXPECT_TRUE(tensor_expected->Equals(*tensor1, EqualOptions().nans_equal(true)));
 
-  CheckTensor<DoubleType>(tensor1, 18, shape, f_strides);
+  CheckTableToTensor<DoubleType>(tensor1, 18, shape, f_strides);
 
   ASSERT_OK_AND_ASSIGN(auto tensor1_row, table1->ToTensor(/*null_to_nan=*/true));
   ASSERT_OK(tensor1_row->Validate());
@@ -727,7 +727,7 @@ TEST_F(TestTable, ToTensorSupportedNullToNan) {
   EXPECT_FALSE(tensor_expected_row->Equals(*tensor1_row));
   EXPECT_TRUE(tensor_expected_row->Equals(*tensor1_row, EqualOptions().nans_equal(true)));
 
-  CheckTensorRowMajor<DoubleType>(tensor1_row, 18, shape, strides);
+  CheckTableToTensorRowMajor<DoubleType>(tensor1_row, 18, shape, strides);
 
   // int8 -> float32
   auto f3 = field("f3", int8());
@@ -753,7 +753,7 @@ TEST_F(TestTable, ToTensorSupportedNullToNan) {
   EXPECT_FALSE(tensor_expected_2->Equals(*tensor2));
   EXPECT_TRUE(tensor_expected_2->Equals(*tensor2, EqualOptions().nans_equal(true)));
 
-  CheckTensor<FloatType>(tensor2, 18, shape, f_strides_2);
+  CheckTableToTensor<FloatType>(tensor2, 18, shape, f_strides_2);
 
   ASSERT_OK_AND_ASSIGN(auto tensor2_row, table2->ToTensor(/*null_to_nan=*/true));
   ASSERT_OK(tensor2_row->Validate());
@@ -767,7 +767,7 @@ TEST_F(TestTable, ToTensorSupportedNullToNan) {
   EXPECT_TRUE(
       tensor2_expected_row->Equals(*tensor2_row, EqualOptions().nans_equal(true)));
 
-  CheckTensorRowMajor<FloatType>(tensor2_row, 18, shape, strides_2);
+  CheckTableToTensorRowMajor<FloatType>(tensor2_row, 18, shape, strides_2);
 }
 
 TEST_F(TestTable, ToTensorSupportedTypesMixed) {
@@ -796,7 +796,7 @@ TEST_F(TestTable, ToTensorSupportedTypesMixed) {
       TensorFromJSON(uint16(), "[1, 2, 3, 4, 5, 6, 7, 8, 9]", shape, f_strides);
 
   EXPECT_TRUE(tensor_expected->Equals(*tensor));
-  CheckTensor<UInt16Type>(tensor, 9, shape, f_strides);
+  CheckTableToTensor<UInt16Type>(tensor, 9, shape, f_strides);
 
   // uint16 + int16 = int32
   std::vector<std::shared_ptr<Field>> fields1 = {f0, f1};
@@ -816,7 +816,7 @@ TEST_F(TestTable, ToTensorSupportedTypesMixed) {
 
   EXPECT_TRUE(tensor_expected_1->Equals(*tensor1));
 
-  CheckTensor<Int32Type>(tensor1, 18, shape1, f_strides_1);
+  CheckTableToTensor<Int32Type>(tensor1, 18, shape1, f_strides_1);
 
   ASSERT_EQ(tensor1->type()->bit_width(), tensor_expected_1->type()->bit_width());
 
@@ -845,7 +845,7 @@ TEST_F(TestTable, ToTensorSupportedTypesMixed) {
   EXPECT_FALSE(tensor_expected_2->Equals(*tensor2));
   EXPECT_TRUE(tensor_expected_2->Equals(*tensor2, EqualOptions().nans_equal(true)));
 
-  CheckTensor<DoubleType>(tensor2, 27, shape2, f_strides_2);
+  CheckTableToTensor<DoubleType>(tensor2, 27, shape2, f_strides_2);
 }
 
 TEST_F(TestTable, ToTensorUnsupportedMixedFloat16) {
@@ -911,7 +911,7 @@ TYPED_TEST_P(TestTableToTensorColumnMajor, SupportedTypes) {
       shape, f_strides);
 
   EXPECT_TRUE(tensor_expected->Equals(*tensor));
-  CheckTensor<DataType>(tensor, 27, shape, f_strides);
+  CheckTableToTensor<DataType>(tensor, 27, shape, f_strides);
 
   // Test offsets
   auto table_slice = table->Slice(1);
@@ -929,7 +929,7 @@ TYPED_TEST_P(TestTableToTensorColumnMajor, SupportedTypes) {
                      shape_sliced, f_strides_sliced);
 
   EXPECT_TRUE(tensor_expected_sliced->Equals(*tensor_sliced));
-  CheckTensor<DataType>(tensor_expected_sliced, 24, shape_sliced, f_strides_sliced);
+  CheckTableToTensor<DataType>(tensor_expected_sliced, 24, shape_sliced, f_strides_sliced);
 
   auto table_slice_1 = table->Slice(1, 5);
 
@@ -946,7 +946,7 @@ TYPED_TEST_P(TestTableToTensorColumnMajor, SupportedTypes) {
                      shape_sliced_1, f_strides_sliced_1);
 
   EXPECT_TRUE(tensor_expected_sliced_1->Equals(*tensor_sliced_1));
-  CheckTensor<DataType>(tensor_expected_sliced_1, 15, shape_sliced_1, f_strides_sliced_1);
+  CheckTableToTensor<DataType>(tensor_expected_sliced_1, 15, shape_sliced_1, f_strides_sliced_1);
 }
 
 REGISTER_TYPED_TEST_SUITE_P(TestTableToTensorColumnMajor, SupportedTypes);
@@ -1001,7 +1001,7 @@ TYPED_TEST_P(TestTableToTensorRowMajor, SupportedTypes) {
                      shape, strides);
 
   EXPECT_TRUE(tensor_expected->Equals(*tensor));
-  CheckTensorRowMajor<DataType>(tensor, 27, shape, strides);
+  CheckTableToTensorRowMajor<DataType>(tensor, 27, shape, strides);
 
   // Test offsets
   auto table_slice = table->Slice(1);
@@ -1018,7 +1018,7 @@ TYPED_TEST_P(TestTableToTensorRowMajor, SupportedTypes) {
                      shape_sliced, strides_sliced);
 
   EXPECT_TRUE(tensor_expected_sliced->Equals(*tensor_sliced));
-  CheckTensorRowMajor<DataType>(tensor_sliced, 24, shape_sliced, strides_sliced);
+  CheckTableToTensorRowMajor<DataType>(tensor_sliced, 24, shape_sliced, strides_sliced);
 
   auto table_slice_1 = table->Slice(1, 5);
 
@@ -1033,7 +1033,7 @@ TYPED_TEST_P(TestTableToTensorRowMajor, SupportedTypes) {
                      shape_sliced_1, strides_sliced_1);
 
   EXPECT_TRUE(tensor_expected_sliced_1->Equals(*tensor_sliced_1));
-  CheckTensorRowMajor<DataType>(tensor_sliced_1, 15, shape_sliced_1, strides_sliced_1);
+  CheckTableToTensorRowMajor<DataType>(tensor_sliced_1, 15, shape_sliced_1, strides_sliced_1);
 }
 
 REGISTER_TYPED_TEST_SUITE_P(TestTableToTensorRowMajor, SupportedTypes);
diff --git a/python/pyarrow/table.pxi b/python/pyarrow/table.pxi
index 3299ccae9997..38d920e0d20f 100644
--- a/python/pyarrow/table.pxi
+++ b/python/pyarrow/table.pxi
@@ -2292,8 +2292,7 @@ cdef class _Tabular(_PandasConvertible):
         >>> animals = pa.array(["Flamingo", "Parrot", "Dog", "Horse", "Brittle stars", "Centipede"])
         >>> table = pa.Table.from_arrays([n_legs, animals], names=["n_legs", "animals"])
         >>> table.to_pydict()
-        {'n_legs': [2, 2, 4, 4, 5, 100], 'animals': [
-            'Flamingo', 'Parrot', ..., 'Centipede']}
+        {'n_legs': [2, 2, 4, 4, 5, 100], 'animals': ['Flamingo', 'Parrot', ..., 'Centipede']}
         """
         entries = []
         for i in range(self.num_columns):
@@ -4990,8 +4989,7 @@ cdef class Table(_Tabular):
         animals: string
         ----
         n_legs: [[2,4,5,100],[2,4,5,100]]
-        animals: [["Flamingo","Horse","Brittle stars","Centipede"],
-            ["Flamingo","Horse","Brittle stars","Centipede"]]
+        animals: [["Flamingo",...,"Centipede"],["Flamingo",...,"Centipede"]]
         """
         cdef:
             vector[shared_ptr[CRecordBatch]] c_batches

From 8854a82cf21a5e381c9a22e9ac06e58a1f9ab9f8 Mon Sep 17 00:00:00 2001
From: AlenkaF <frim.alenka@gmail.com>
Date: Wed, 29 May 2024 15:41:30 +0200
Subject: [PATCH 07/23] Correct indentations

---
 cpp/src/arrow/table_test.cc | 16 ++++++++++------
 1 file changed, 10 insertions(+), 6 deletions(-)

diff --git a/cpp/src/arrow/table_test.cc b/cpp/src/arrow/table_test.cc
index beb2d48fc113..548e4be2278c 100644
--- a/cpp/src/arrow/table_test.cc
+++ b/cpp/src/arrow/table_test.cc
@@ -608,7 +608,8 @@ TEST_F(TestTable, ToTensorEmptyTable) {
 
 template <typename DataType>
 void CheckTableToTensor(const std::shared_ptr<Tensor>& tensor, const int size,
-                 const std::vector<int64_t> shape, const std::vector<int64_t> f_strides) {
+                        const std::vector<int64_t> shape,
+                        const std::vector<int64_t> f_strides) {
   EXPECT_EQ(size, tensor->size());
   EXPECT_EQ(TypeTraits<DataType>::type_singleton(), tensor->type());
   EXPECT_EQ(shape, tensor->shape());
@@ -620,8 +621,8 @@ void CheckTableToTensor(const std::shared_ptr<Tensor>& tensor, const int size,
 
 template <typename DataType>
 void CheckTableToTensorRowMajor(const std::shared_ptr<Tensor>& tensor, const int size,
-                         const std::vector<int64_t> shape,
-                         const std::vector<int64_t> strides) {
+                                const std::vector<int64_t> shape,
+                                const std::vector<int64_t> strides) {
   EXPECT_EQ(size, tensor->size());
   EXPECT_EQ(TypeTraits<DataType>::type_singleton(), tensor->type());
   EXPECT_EQ(shape, tensor->shape());
@@ -929,7 +930,8 @@ TYPED_TEST_P(TestTableToTensorColumnMajor, SupportedTypes) {
                      shape_sliced, f_strides_sliced);
 
   EXPECT_TRUE(tensor_expected_sliced->Equals(*tensor_sliced));
-  CheckTableToTensor<DataType>(tensor_expected_sliced, 24, shape_sliced, f_strides_sliced);
+  CheckTableToTensor<DataType>(tensor_expected_sliced, 24, shape_sliced,
+                               f_strides_sliced);
 
   auto table_slice_1 = table->Slice(1, 5);
 
@@ -946,7 +948,8 @@ TYPED_TEST_P(TestTableToTensorColumnMajor, SupportedTypes) {
                      shape_sliced_1, f_strides_sliced_1);
 
   EXPECT_TRUE(tensor_expected_sliced_1->Equals(*tensor_sliced_1));
-  CheckTableToTensor<DataType>(tensor_expected_sliced_1, 15, shape_sliced_1, f_strides_sliced_1);
+  CheckTableToTensor<DataType>(tensor_expected_sliced_1, 15, shape_sliced_1,
+                               f_strides_sliced_1);
 }
 
 REGISTER_TYPED_TEST_SUITE_P(TestTableToTensorColumnMajor, SupportedTypes);
@@ -1033,7 +1036,8 @@ TYPED_TEST_P(TestTableToTensorRowMajor, SupportedTypes) {
                      shape_sliced_1, strides_sliced_1);
 
   EXPECT_TRUE(tensor_expected_sliced_1->Equals(*tensor_sliced_1));
-  CheckTableToTensorRowMajor<DataType>(tensor_sliced_1, 15, shape_sliced_1, strides_sliced_1);
+  CheckTableToTensorRowMajor<DataType>(tensor_sliced_1, 15, shape_sliced_1,
+                                       strides_sliced_1);
 }
 
 REGISTER_TYPED_TEST_SUITE_P(TestTableToTensorRowMajor, SupportedTypes);

From d2be9252a68264313ea5d0c441381d265bba0665 Mon Sep 17 00:00:00 2001
From: AlenkaF <frim.alenka@gmail.com>
Date: Wed, 29 May 2024 17:52:07 +0200
Subject: [PATCH 08/23] Remove code from RecordBatch::ToTensor and use Table
 implementation

---
 cpp/src/arrow/record_batch_test.cc | 18 ++++++++++--------
 cpp/src/arrow/table.cc             |  7 ++++---
 cpp/src/arrow/table_test.cc        | 18 ++++++++++--------
 python/pyarrow/tests/test_table.py |  4 ++--
 4 files changed, 26 insertions(+), 21 deletions(-)

diff --git a/cpp/src/arrow/record_batch_test.cc b/cpp/src/arrow/record_batch_test.cc
index a037d7261efb..0cc7def76796 100644
--- a/cpp/src/arrow/record_batch_test.cc
+++ b/cpp/src/arrow/record_batch_test.cc
@@ -906,10 +906,11 @@ TEST_F(TestRecordBatch, ToTensorUnsupportedMissing) {
 
   auto batch = RecordBatch::Make(schema, length, {a0, a1});
 
-  ASSERT_RAISES_WITH_MESSAGE(TypeError,
-                             "Type error: Can only convert a RecordBatch with no nulls. "
-                             "Set null_to_nan to true to convert nulls to NaN",
-                             batch->ToTensor());
+  ASSERT_RAISES_WITH_MESSAGE(
+      TypeError,
+      "Type error: Can only convert a Table or RecordBatch with no "
+      "nulls. Set null_to_nan to true to convert nulls to NaN",
+      batch->ToTensor());
 }
 
 TEST_F(TestRecordBatch, ToTensorEmptyBatch) {
@@ -940,10 +941,11 @@ TEST_F(TestRecordBatch, ToTensorEmptyBatch) {
   auto batch_no_columns =
       RecordBatch::Make(::arrow::schema({}), 10, std::vector<std::shared_ptr<Array>>{});
 
-  ASSERT_RAISES_WITH_MESSAGE(TypeError,
-                             "Type error: Conversion to Tensor for RecordBatches without "
-                             "columns/schema is not supported.",
-                             batch_no_columns->ToTensor());
+  ASSERT_RAISES_WITH_MESSAGE(
+      TypeError,
+      "Type error: Conversion to Tensor for Tables or RecordBatches "
+      "without columns/schema is not supported.",
+      batch_no_columns->ToTensor());
 }
 
 template <typename DataType>
diff --git a/cpp/src/arrow/table.cc b/cpp/src/arrow/table.cc
index fe2cd12373ea..89c69a778e8c 100644
--- a/cpp/src/arrow/table.cc
+++ b/cpp/src/arrow/table.cc
@@ -441,15 +441,16 @@ Result<std::shared_ptr<Tensor>> Table::ToTensor(bool null_to_nan, bool row_major
                                                 MemoryPool* pool) const {
   if (num_columns() == 0) {
     return Status::TypeError(
-        "Conversion to Tensor for Tables without columns/schema is not supported.");
+        "Conversion to Tensor for Tables or RecordBatches without columns/schema is "
+        "not supported.");
   }
   // Check for no validity bitmap of each field
   // if null_to_nan conversion is set to false
   for (int i = 0; i < num_columns(); ++i) {
     if (column(i)->null_count() > 0 && !null_to_nan) {
       return Status::TypeError(
-          "Can only convert a Table with no nulls. Set null_to_nan to true to "
-          "convert nulls to NaN");
+          "Can only convert a Table or RecordBatch with no nulls. Set null_to_nan to "
+          "true to convert nulls to NaN");
     }
   }
 
diff --git a/cpp/src/arrow/table_test.cc b/cpp/src/arrow/table_test.cc
index 548e4be2278c..c19541a14925 100644
--- a/cpp/src/arrow/table_test.cc
+++ b/cpp/src/arrow/table_test.cc
@@ -565,10 +565,11 @@ TEST_F(TestTable, ToTensorUnsupportedMissing) {
 
   auto table = Table::Make(schema, {a0, a1});
 
-  ASSERT_RAISES_WITH_MESSAGE(TypeError,
-                             "Type error: Can only convert a Table with no nulls. Set "
-                             "null_to_nan to true to convert nulls to NaN",
-                             table->ToTensor());
+  ASSERT_RAISES_WITH_MESSAGE(
+      TypeError,
+      "Type error: Can only convert a Table or RecordBatch with no "
+      "nulls. Set null_to_nan to true to convert nulls to NaN",
+      table->ToTensor());
 }
 
 TEST_F(TestTable, ToTensorEmptyTable) {
@@ -600,10 +601,11 @@ TEST_F(TestTable, ToTensorEmptyTable) {
   auto table_no_columns =
       Table::Make(::arrow::schema({}), std::vector<std::shared_ptr<Array>>{});
 
-  ASSERT_RAISES_WITH_MESSAGE(TypeError,
-                             "Type error: Conversion to Tensor for Tables without "
-                             "columns/schema is not supported.",
-                             table_no_columns->ToTensor());
+  ASSERT_RAISES_WITH_MESSAGE(
+      TypeError,
+      "Type error: Conversion to Tensor for Tables or RecordBatches "
+      "without columns/schema is not supported.",
+      table_no_columns->ToTensor());
 }
 
 template <typename DataType>
diff --git a/python/pyarrow/tests/test_table.py b/python/pyarrow/tests/test_table.py
index d12a61063bef..b82cf507a890 100644
--- a/python/pyarrow/tests/test_table.py
+++ b/python/pyarrow/tests/test_table.py
@@ -1184,7 +1184,7 @@ def test_recordbatch_to_tensor_null():
     )
     with pytest.raises(
         pa.ArrowTypeError,
-        match="Can only convert a RecordBatch with no nulls."
+        match="Can only convert a Table or RecordBatch with no nulls."
     ):
         batch.to_tensor()
 
@@ -1471,7 +1471,7 @@ def test_table_to_tensor_null():
     )
     with pytest.raises(
         pa.ArrowTypeError,
-        match="Can only convert a Table with no nulls."
+        match="Can only convert a Table or RecordBatch with no nulls."
     ):
         table.to_tensor()
 

From 520561c43ae6e25b4efc29b9f93f5a46e3681499 Mon Sep 17 00:00:00 2001
From: AlenkaF <frim.alenka@gmail.com>
Date: Wed, 5 Jun 2024 10:33:51 +0200
Subject: [PATCH 09/23] Add RecordBatchToTensor code to tensor.cc

---
 cpp/src/arrow/record_batch.cc | 1 -
 cpp/src/arrow/tensor.cc       | 6 +++---
 2 files changed, 3 insertions(+), 4 deletions(-)

diff --git a/cpp/src/arrow/record_batch.cc b/cpp/src/arrow/record_batch.cc
index 12e0f553b740..bc2612f92add 100644
--- a/cpp/src/arrow/record_batch.cc
+++ b/cpp/src/arrow/record_batch.cc
@@ -18,7 +18,6 @@
 #include "arrow/record_batch.h"
 
 #include <algorithm>
-#include <cmath>
 #include <cstdlib>
 #include <memory>
 #include <mutex>
diff --git a/cpp/src/arrow/tensor.cc b/cpp/src/arrow/tensor.cc
index 8cdf7f82d264..80a97c1a44fd 100644
--- a/cpp/src/arrow/tensor.cc
+++ b/cpp/src/arrow/tensor.cc
@@ -307,7 +307,7 @@ Status RecordBatchToTensor(const RecordBatch& batch, bool null_to_nan, bool row_
                            MemoryPool* pool, std::shared_ptr<Tensor>* tensor) {
   if (batch.num_columns() == 0) {
     return Status::TypeError(
-        "Conversion to Tensor for RecordBatches without columns/schema is not "
+        "Conversion to Tensor for Tables or RecordBatches without columns/schema is not "
         "supported.");
   }
   // Check for no validity bitmap of each field
@@ -315,8 +315,8 @@ Status RecordBatchToTensor(const RecordBatch& batch, bool null_to_nan, bool row_
   for (int i = 0; i < batch.num_columns(); ++i) {
     if (batch.column(i)->null_count() > 0 && !null_to_nan) {
       return Status::TypeError(
-          "Can only convert a RecordBatch with no nulls. Set null_to_nan to true to "
-          "convert nulls to NaN");
+          "Can only convert a Table or RecordBatch with no nulls. Set null_to_nan to "
+          "true to convert nulls to NaN");
     }
   }
 

From 6bd177dad1196825f7494c70a023ff2225ff79eb Mon Sep 17 00:00:00 2001
From: AlenkaF <frim.alenka@gmail.com>
Date: Wed, 5 Jun 2024 10:56:54 +0200
Subject: [PATCH 10/23] Change RecordBatchToTensor to TableToTensor and update
 the code to work for Arrow Tables

---
 cpp/src/arrow/record_batch.cc |  3 +-
 cpp/src/arrow/tensor.cc       | 99 +++++++++++++++++++----------------
 cpp/src/arrow/tensor.h        |  4 +-
 3 files changed, 57 insertions(+), 49 deletions(-)

diff --git a/cpp/src/arrow/record_batch.cc b/cpp/src/arrow/record_batch.cc
index bc2612f92add..3271f26be8c8 100644
--- a/cpp/src/arrow/record_batch.cc
+++ b/cpp/src/arrow/record_batch.cc
@@ -295,9 +295,10 @@ Result<std::shared_ptr<StructArray>> RecordBatch::ToStructArray() const {
 
 Result<std::shared_ptr<Tensor>> RecordBatch::ToTensor(bool null_to_nan, bool row_major,
                                                       MemoryPool* pool) const {
+  std::shared_ptr<Table> table = Table::Make(schema(), columns());
   std::shared_ptr<Tensor> tensor;
   ARROW_RETURN_NOT_OK(
-      internal::RecordBatchToTensor(*this, null_to_nan, row_major, pool, &tensor));
+      internal::TableToTensor(*table, null_to_nan, row_major, pool, &tensor));
   return tensor;
 }
 
diff --git a/cpp/src/arrow/tensor.cc b/cpp/src/arrow/tensor.cc
index 80a97c1a44fd..1ecaa384631b 100644
--- a/cpp/src/arrow/tensor.cc
+++ b/cpp/src/arrow/tensor.cc
@@ -28,8 +28,8 @@
 #include <type_traits>
 #include <vector>
 
-#include "arrow/record_batch.h"
 #include "arrow/status.h"
+#include "arrow/table.h"
 #include "arrow/type.h"
 #include "arrow/type_traits.h"
 #include "arrow/util/checked_cast.h"
@@ -224,7 +224,7 @@ Status ValidateTensorParameters(const std::shared_ptr<DataType>& type,
 }
 
 template <typename Out>
-struct ConvertColumnsToTensorVisitor {
+struct ConvertArrayToTensorVisitor {
   Out*& out_values;
   const ArrayData& in_data;
 
@@ -256,11 +256,12 @@ struct ConvertColumnsToTensorVisitor {
 };
 
 template <typename Out>
-struct ConvertColumnsToTensorRowMajorVisitor {
+struct ConvertArrayToTensorRowMajorVisitor {
   Out*& out_values;
   const ArrayData& in_data;
   int num_cols;
   int col_idx;
+  int chunk_idx;
 
   template <typename T>
   Status Visit(const T&) {
@@ -269,13 +270,15 @@ struct ConvertColumnsToTensorRowMajorVisitor {
       auto in_values = ArraySpan(in_data).GetSpan<In>(1, in_data.length);
 
       if (in_data.null_count == 0) {
-        for (int64_t i = 0; i < in_data.length; ++i) {
-          out_values[i * num_cols + col_idx] = static_cast<Out>(in_values[i]);
+        for (int64_t data_idx = 0; data_idx < in_data.length; ++data_idx) {
+          out_values[(data_idx + chunk_idx) * num_cols + col_idx] =
+              static_cast<Out>(in_values[data_idx]);
         }
       } else {
-        for (int64_t i = 0; i < in_data.length; ++i) {
-          out_values[i * num_cols + col_idx] =
-              in_data.IsNull(i) ? static_cast<Out>(NAN) : static_cast<Out>(in_values[i]);
+        for (int64_t data_idx = 0; data_idx < in_data.length; ++data_idx) {
+          out_values[(data_idx + chunk_idx) * num_cols + col_idx] =
+              in_data.IsNull(data_idx) ? static_cast<Out>(NAN)
+                                       : static_cast<Out>(in_values[data_idx]);
         }
       }
       return Status::OK();
@@ -285,35 +288,39 @@ struct ConvertColumnsToTensorRowMajorVisitor {
 };
 
 template <typename DataType>
-inline void ConvertColumnsToTensor(const RecordBatch& batch, uint8_t* out,
-                                   bool row_major) {
+inline void ConvertColumnsToTensor(const Table& table, uint8_t* out, bool row_major) {
   using CType = typename arrow::TypeTraits<DataType>::CType;
   auto* out_values = reinterpret_cast<CType*>(out);
 
   int i = 0;
-  for (const auto& column : batch.columns()) {
-    if (row_major) {
-      ConvertColumnsToTensorRowMajorVisitor<CType> visitor{out_values, *column->data(),
-                                                           batch.num_columns(), i++};
-      DCHECK_OK(VisitTypeInline(*column->type(), &visitor));
-    } else {
-      ConvertColumnsToTensorVisitor<CType> visitor{out_values, *column->data()};
-      DCHECK_OK(VisitTypeInline(*column->type(), &visitor));
+  for (const auto& column : table.columns()) {
+    int j = 0;
+    for (const auto& chunk : column->chunks()) {
+      if (row_major) {
+        ConvertArrayToTensorRowMajorVisitor<CType> visitor{out_values, *chunk->data(),
+                                                           table.num_columns(), i, j};
+        DCHECK_OK(VisitTypeInline(*chunk->type(), &visitor));
+        j = j + static_cast<int>(chunk->length());
+      } else {
+        ConvertArrayToTensorVisitor<CType> visitor{out_values, *chunk->data()};
+        DCHECK_OK(VisitTypeInline(*chunk->type(), &visitor));
+      }
     }
+    i++;
   }
 }
 
-Status RecordBatchToTensor(const RecordBatch& batch, bool null_to_nan, bool row_major,
-                           MemoryPool* pool, std::shared_ptr<Tensor>* tensor) {
-  if (batch.num_columns() == 0) {
+Status TableToTensor(const Table& table, bool null_to_nan, bool row_major,
+                     MemoryPool* pool, std::shared_ptr<Tensor>* tensor) {
+  if (table.num_columns() == 0) {
     return Status::TypeError(
         "Conversion to Tensor for Tables or RecordBatches without columns/schema is not "
         "supported.");
   }
   // Check for no validity bitmap of each field
   // if null_to_nan conversion is set to false
-  for (int i = 0; i < batch.num_columns(); ++i) {
-    if (batch.column(i)->null_count() > 0 && !null_to_nan) {
+  for (int i = 0; i < table.num_columns(); ++i) {
+    if (table.column(i)->null_count() > 0 && !null_to_nan) {
       return Status::TypeError(
           "Can only convert a Table or RecordBatch with no nulls. Set null_to_nan to "
           "true to convert nulls to NaN");
@@ -322,12 +329,12 @@ Status RecordBatchToTensor(const RecordBatch& batch, bool null_to_nan, bool row_
 
   // Check for supported data types and merge fields
   // to get the resulting uniform data type
-  if (!is_integer(batch.column(0)->type()->id()) &&
-      !is_floating(batch.column(0)->type()->id())) {
+  if (!is_integer(table.column(0)->type()->id()) &&
+      !is_floating(table.column(0)->type()->id())) {
     return Status::TypeError("DataType is not supported: ",
-                             batch.column(0)->type()->ToString());
+                             table.column(0)->type()->ToString());
   }
-  std::shared_ptr<Field> result_field = batch.schema()->field(0);
+  std::shared_ptr<Field> result_field = table.schema()->field(0);
   std::shared_ptr<DataType> result_type = result_field->type();
 
   Field::MergeOptions options;
@@ -335,24 +342,24 @@ Status RecordBatchToTensor(const RecordBatch& batch, bool null_to_nan, bool row_
   options.promote_integer_sign = true;
   options.promote_numeric_width = true;
 
-  if (batch.num_columns() > 1) {
-    for (int i = 1; i < batch.num_columns(); ++i) {
-      if (!is_numeric(batch.column(i)->type()->id())) {
+  if (table.num_columns() > 1) {
+    for (int i = 1; i < table.num_columns(); ++i) {
+      if (!is_numeric(table.column(i)->type()->id())) {
         return Status::TypeError("DataType is not supported: ",
-                                 batch.column(i)->type()->ToString());
+                                 table.column(i)->type()->ToString());
       }
 
       // Casting of float16 is not supported, throw an error in this case
-      if ((batch.column(i)->type()->id() == Type::HALF_FLOAT ||
+      if ((table.column(i)->type()->id() == Type::HALF_FLOAT ||
            result_field->type()->id() == Type::HALF_FLOAT) &&
-          batch.column(i)->type()->id() != result_field->type()->id()) {
+          table.column(i)->type()->id() != result_field->type()->id()) {
         return Status::NotImplemented("Casting from or to halffloat is not supported.");
       }
 
       ARROW_ASSIGN_OR_RAISE(
           result_field,
           result_field->MergeWith(
-              batch.schema()->field(i)->WithName(result_field->name()), options));
+              table.schema()->field(i)->WithName(result_field->name()), options));
     }
     result_type = result_field->type();
   }
@@ -369,40 +376,40 @@ Status RecordBatchToTensor(const RecordBatch& batch, bool null_to_nan, bool row_
   // Allocate memory
   ARROW_ASSIGN_OR_RAISE(
       std::shared_ptr<Buffer> result,
-      AllocateBuffer(result_type->bit_width() * batch.num_columns() * batch.num_rows(),
+      AllocateBuffer(result_type->bit_width() * table.num_columns() * table.num_rows(),
                      pool));
   // Copy data
   switch (result_type->id()) {
     case Type::UINT8:
-      ConvertColumnsToTensor<UInt8Type>(batch, result->mutable_data(), row_major);
+      ConvertColumnsToTensor<UInt8Type>(table, result->mutable_data(), row_major);
       break;
     case Type::UINT16:
     case Type::HALF_FLOAT:
-      ConvertColumnsToTensor<UInt16Type>(batch, result->mutable_data(), row_major);
+      ConvertColumnsToTensor<UInt16Type>(table, result->mutable_data(), row_major);
       break;
     case Type::UINT32:
-      ConvertColumnsToTensor<UInt32Type>(batch, result->mutable_data(), row_major);
+      ConvertColumnsToTensor<UInt32Type>(table, result->mutable_data(), row_major);
       break;
     case Type::UINT64:
-      ConvertColumnsToTensor<UInt64Type>(batch, result->mutable_data(), row_major);
+      ConvertColumnsToTensor<UInt64Type>(table, result->mutable_data(), row_major);
       break;
     case Type::INT8:
-      ConvertColumnsToTensor<Int8Type>(batch, result->mutable_data(), row_major);
+      ConvertColumnsToTensor<Int8Type>(table, result->mutable_data(), row_major);
       break;
     case Type::INT16:
-      ConvertColumnsToTensor<Int16Type>(batch, result->mutable_data(), row_major);
+      ConvertColumnsToTensor<Int16Type>(table, result->mutable_data(), row_major);
       break;
     case Type::INT32:
-      ConvertColumnsToTensor<Int32Type>(batch, result->mutable_data(), row_major);
+      ConvertColumnsToTensor<Int32Type>(table, result->mutable_data(), row_major);
       break;
     case Type::INT64:
-      ConvertColumnsToTensor<Int64Type>(batch, result->mutable_data(), row_major);
+      ConvertColumnsToTensor<Int64Type>(table, result->mutable_data(), row_major);
       break;
     case Type::FLOAT:
-      ConvertColumnsToTensor<FloatType>(batch, result->mutable_data(), row_major);
+      ConvertColumnsToTensor<FloatType>(table, result->mutable_data(), row_major);
       break;
     case Type::DOUBLE:
-      ConvertColumnsToTensor<DoubleType>(batch, result->mutable_data(), row_major);
+      ConvertColumnsToTensor<DoubleType>(table, result->mutable_data(), row_major);
       break;
     default:
       return Status::TypeError("DataType is not supported: ", result_type->ToString());
@@ -411,7 +418,7 @@ Status RecordBatchToTensor(const RecordBatch& batch, bool null_to_nan, bool row_
   // Construct Tensor object
   const auto& fixed_width_type =
       internal::checked_cast<const FixedWidthType&>(*result_type);
-  std::vector<int64_t> shape = {batch.num_rows(), batch.num_columns()};
+  std::vector<int64_t> shape = {table.num_rows(), table.num_columns()};
   std::vector<int64_t> strides;
 
   if (row_major) {
diff --git a/cpp/src/arrow/tensor.h b/cpp/src/arrow/tensor.h
index beb62a11bdce..b1c98bf733c7 100644
--- a/cpp/src/arrow/tensor.h
+++ b/cpp/src/arrow/tensor.h
@@ -78,8 +78,8 @@ Status ValidateTensorParameters(const std::shared_ptr<DataType>& type,
                                 const std::vector<std::string>& dim_names);
 
 ARROW_EXPORT
-Status RecordBatchToTensor(const RecordBatch& batch, bool null_to_nan, bool row_major,
-                           MemoryPool* pool, std::shared_ptr<Tensor>* tensor);
+Status TableToTensor(const Table& table, bool null_to_nan, bool row_major,
+                     MemoryPool* pool, std::shared_ptr<Tensor>* tensor);
 
 }  // namespace internal
 

From 8306e730db490bc88e921fabeb186e265f3694e0 Mon Sep 17 00:00:00 2001
From: AlenkaF <frim.alenka@gmail.com>
Date: Wed, 5 Jun 2024 14:09:50 +0200
Subject: [PATCH 11/23] Use TableToTensor in Table::ToTensor

---
 cpp/src/arrow/table.cc | 206 +----------------------------------------
 1 file changed, 2 insertions(+), 204 deletions(-)

diff --git a/cpp/src/arrow/table.cc b/cpp/src/arrow/table.cc
index 89c69a778e8c..b2b18bd510bc 100644
--- a/cpp/src/arrow/table.cc
+++ b/cpp/src/arrow/table.cc
@@ -18,7 +18,6 @@
 #include "arrow/table.h"
 
 #include <algorithm>
-#include <cmath>
 #include <cstdlib>
 #include <limits>
 #include <memory>
@@ -44,7 +43,6 @@
 #include "arrow/util/checked_cast.h"
 #include "arrow/util/logging_internal.h"
 #include "arrow/util/logging.h"
-#include "arrow/util/unreachable.h"
 #include "arrow/util/vector.h"
 
 namespace arrow {
@@ -350,211 +348,11 @@ Result<std::shared_ptr<Table>> Table::FromChunkedStructArray(
                      array->length());
 }
 
-template <typename Out>
-struct ConvertChunksToTensorVisitor {
-  Out*& out_values;
-  const ArrayData& in_data;
-
-  template <typename T>
-  Status Visit(const T&) {
-    if constexpr (is_numeric(T::type_id)) {
-      using In = typename T::c_type;
-      auto in_values = ArraySpan(in_data).GetSpan<In>(1, in_data.length);
-
-      if (in_data.null_count == 0) {
-        if constexpr (std::is_same_v<In, Out>) {
-          memcpy(out_values, in_values.data(), in_values.size_bytes());
-          out_values += in_values.size();
-        } else {
-          for (In in_value : in_values) {
-            *out_values++ = static_cast<Out>(in_value);
-          }
-        }
-      } else {
-        for (int64_t i = 0; i < in_data.length; ++i) {
-          *out_values++ =
-              in_data.IsNull(i) ? static_cast<Out>(NAN) : static_cast<Out>(in_values[i]);
-        }
-      }
-      return Status::OK();
-    }
-    Unreachable();
-  }
-};
-
-template <typename Out>
-struct ConvertChunksToTensorRowMajorVisitor {
-  Out*& out_values;
-  const ArrayData& in_data;
-  int num_cols;
-  int col_idx;
-  int chunk_idx;
-
-  template <typename T>
-  Status Visit(const T&) {
-    if constexpr (is_numeric(T::type_id)) {
-      using In = typename T::c_type;
-      auto in_values = ArraySpan(in_data).GetSpan<In>(1, in_data.length);
-
-      if (in_data.null_count == 0) {
-        for (int64_t data_idx = 0; data_idx < in_data.length; ++data_idx) {
-          out_values[(data_idx + chunk_idx) * num_cols + col_idx] =
-              static_cast<Out>(in_values[data_idx]);
-        }
-      } else {
-        for (int64_t data_idx = 0; data_idx < in_data.length; ++data_idx) {
-          out_values[(data_idx + chunk_idx) * num_cols + col_idx] =
-              in_data.IsNull(data_idx) ? static_cast<Out>(NAN)
-                                       : static_cast<Out>(in_values[data_idx]);
-        }
-      }
-      return Status::OK();
-    }
-    Unreachable();
-  }
-};
-
-template <typename DataType>
-inline void ConvertColumnsToTensor(const Table& table, uint8_t* out, bool row_major) {
-  using CType = typename arrow::TypeTraits<DataType>::CType;
-  auto* out_values = reinterpret_cast<CType*>(out);
-
-  int i = 0;
-  for (const auto& column : table.columns()) {
-    int j = 0;
-    for (const auto& chunk : column->chunks()) {
-      if (row_major) {
-        ConvertChunksToTensorRowMajorVisitor<CType> visitor{out_values, *chunk->data(),
-                                                            table.num_columns(), i, j};
-        DCHECK_OK(VisitTypeInline(*column->type(), &visitor));
-        j = j + static_cast<int>(chunk->length());
-      } else {
-        ConvertChunksToTensorVisitor<CType> visitor{out_values, *chunk->data()};
-        DCHECK_OK(VisitTypeInline(*column->type(), &visitor));
-      }
-    }
-    i++;
-  }
-}
-
 Result<std::shared_ptr<Tensor>> Table::ToTensor(bool null_to_nan, bool row_major,
                                                 MemoryPool* pool) const {
-  if (num_columns() == 0) {
-    return Status::TypeError(
-        "Conversion to Tensor for Tables or RecordBatches without columns/schema is "
-        "not supported.");
-  }
-  // Check for no validity bitmap of each field
-  // if null_to_nan conversion is set to false
-  for (int i = 0; i < num_columns(); ++i) {
-    if (column(i)->null_count() > 0 && !null_to_nan) {
-      return Status::TypeError(
-          "Can only convert a Table or RecordBatch with no nulls. Set null_to_nan to "
-          "true to convert nulls to NaN");
-    }
-  }
-
-  // Check for supported data types and merge fields
-  // to get the resulting uniform data type
-  if (!is_integer(column(0)->type()->id()) && !is_floating(column(0)->type()->id())) {
-    return Status::TypeError("DataType is not supported: ",
-                             column(0)->type()->ToString());
-  }
-  std::shared_ptr<Field> result_field = schema_->field(0);
-  std::shared_ptr<DataType> result_type = result_field->type();
-
-  Field::MergeOptions options;
-  options.promote_integer_to_float = true;
-  options.promote_integer_sign = true;
-  options.promote_numeric_width = true;
-
-  if (num_columns() > 1) {
-    for (int i = 1; i < num_columns(); ++i) {
-      if (!is_numeric(column(i)->type()->id())) {
-        return Status::TypeError("DataType is not supported: ",
-                                 column(i)->type()->ToString());
-      }
-
-      // Casting of float16 is not supported, throw an error in this case
-      if ((column(i)->type()->id() == Type::HALF_FLOAT ||
-           result_field->type()->id() == Type::HALF_FLOAT) &&
-          column(i)->type()->id() != result_field->type()->id()) {
-        return Status::NotImplemented("Casting from or to halffloat is not supported.");
-      }
-
-      ARROW_ASSIGN_OR_RAISE(
-          result_field, result_field->MergeWith(
-                            schema_->field(i)->WithName(result_field->name()), options));
-    }
-    result_type = result_field->type();
-  }
-
-  // Check if result_type is signed or unsigned integer and null_to_nan is set to true
-  // Then all columns should be promoted to float type
-  if (is_integer(result_type->id()) && null_to_nan) {
-    ARROW_ASSIGN_OR_RAISE(
-        result_field,
-        result_field->MergeWith(arrow::field(result_field->name(), float32()), options));
-    result_type = result_field->type();
-  }
-
-  // Allocate memory
-  ARROW_ASSIGN_OR_RAISE(
-      std::shared_ptr<Buffer> result,
-      AllocateBuffer(result_type->bit_width() * num_columns() * num_rows(), pool));
-  // Copy data
-  switch (result_type->id()) {
-    case Type::UINT8:
-      ConvertColumnsToTensor<UInt8Type>(*this, result->mutable_data(), row_major);
-      break;
-    case Type::UINT16:
-    case Type::HALF_FLOAT:
-      ConvertColumnsToTensor<UInt16Type>(*this, result->mutable_data(), row_major);
-      break;
-    case Type::UINT32:
-      ConvertColumnsToTensor<UInt32Type>(*this, result->mutable_data(), row_major);
-      break;
-    case Type::UINT64:
-      ConvertColumnsToTensor<UInt64Type>(*this, result->mutable_data(), row_major);
-      break;
-    case Type::INT8:
-      ConvertColumnsToTensor<Int8Type>(*this, result->mutable_data(), row_major);
-      break;
-    case Type::INT16:
-      ConvertColumnsToTensor<Int16Type>(*this, result->mutable_data(), row_major);
-      break;
-    case Type::INT32:
-      ConvertColumnsToTensor<Int32Type>(*this, result->mutable_data(), row_major);
-      break;
-    case Type::INT64:
-      ConvertColumnsToTensor<Int64Type>(*this, result->mutable_data(), row_major);
-      break;
-    case Type::FLOAT:
-      ConvertColumnsToTensor<FloatType>(*this, result->mutable_data(), row_major);
-      break;
-    case Type::DOUBLE:
-      ConvertColumnsToTensor<DoubleType>(*this, result->mutable_data(), row_major);
-      break;
-    default:
-      return Status::TypeError("DataType is not supported: ", result_type->ToString());
-  }
-
-  // Construct Tensor object
-  const auto& fixed_width_type =
-      internal::checked_cast<const FixedWidthType&>(*result_type);
-  std::vector<int64_t> shape = {num_rows(), num_columns()};
-  std::vector<int64_t> strides;
   std::shared_ptr<Tensor> tensor;
-
-  if (row_major) {
-    ARROW_RETURN_NOT_OK(
-        internal::ComputeRowMajorStrides(fixed_width_type, shape, &strides));
-  } else {
-    ARROW_RETURN_NOT_OK(
-        internal::ComputeColumnMajorStrides(fixed_width_type, shape, &strides));
-  }
-  ARROW_ASSIGN_OR_RAISE(tensor,
-                        Tensor::Make(result_type, std::move(result), shape, strides));
+  ARROW_RETURN_NOT_OK(
+      internal::TableToTensor(*this, null_to_nan, row_major, pool, &tensor));
   return tensor;
 }
 

From afe3d1e16f50a728b8bcf5bf20c50119b392923b Mon Sep 17 00:00:00 2001
From: AlenkaF <frim.alenka@gmail.com>
Date: Mon, 10 Jun 2024 17:55:53 +0200
Subject: [PATCH 12/23] Fix docstrings and change index names

---
 cpp/src/arrow/record_batch.h |  2 +-
 cpp/src/arrow/table.h        |  2 +-
 cpp/src/arrow/tensor.cc      | 25 ++++++++++++-------------
 3 files changed, 14 insertions(+), 15 deletions(-)

diff --git a/cpp/src/arrow/record_batch.h b/cpp/src/arrow/record_batch.h
index 4601b1ba9d6a..a6ef744ac121 100644
--- a/cpp/src/arrow/record_batch.h
+++ b/cpp/src/arrow/record_batch.h
@@ -90,7 +90,7 @@ class ARROW_EXPORT RecordBatch {
   /// in the resulting struct array.
   Result<std::shared_ptr<StructArray>> ToStructArray() const;
 
-  /// \brief Convert record batch with one data type to Tensor
+  /// \brief Convert RecordBatch to Tensor
   ///
   /// Create a Tensor object with shape (number of rows, number of columns) and
   /// strides (type size in bytes, type size in bytes * number of rows).
diff --git a/cpp/src/arrow/table.h b/cpp/src/arrow/table.h
index f57e23aaf5dd..3558cb46d8c5 100644
--- a/cpp/src/arrow/table.h
+++ b/cpp/src/arrow/table.h
@@ -102,7 +102,7 @@ class ARROW_EXPORT Table {
   static Result<std::shared_ptr<Table>> FromChunkedStructArray(
       const std::shared_ptr<ChunkedArray>& array);
 
-  /// \brief Convert table with one data type to Tensor
+  /// \brief Convert Table to Tensor
   ///
   /// Create a Tensor object with shape (number of rows, number of columns) and
   /// strides (type size in bytes, type size in bytes * number of rows).
diff --git a/cpp/src/arrow/tensor.cc b/cpp/src/arrow/tensor.cc
index 1ecaa384631b..07563a406bee 100644
--- a/cpp/src/arrow/tensor.cc
+++ b/cpp/src/arrow/tensor.cc
@@ -270,15 +270,14 @@ struct ConvertArrayToTensorRowMajorVisitor {
       auto in_values = ArraySpan(in_data).GetSpan<In>(1, in_data.length);
 
       if (in_data.null_count == 0) {
-        for (int64_t data_idx = 0; data_idx < in_data.length; ++data_idx) {
-          out_values[(data_idx + chunk_idx) * num_cols + col_idx] =
-              static_cast<Out>(in_values[data_idx]);
+        for (int64_t i = 0; i < in_data.length; ++i) {
+          out_values[(i + chunk_idx) * num_cols + col_idx] =
+              static_cast<Out>(in_values[i]);
         }
       } else {
-        for (int64_t data_idx = 0; data_idx < in_data.length; ++data_idx) {
-          out_values[(data_idx + chunk_idx) * num_cols + col_idx] =
-              in_data.IsNull(data_idx) ? static_cast<Out>(NAN)
-                                       : static_cast<Out>(in_values[data_idx]);
+        for (int64_t i = 0; i < in_data.length; ++i) {
+          out_values[(i + chunk_idx) * num_cols + col_idx] =
+              in_data.IsNull(i) ? static_cast<Out>(NAN) : static_cast<Out>(in_values[i]);
         }
       }
       return Status::OK();
@@ -292,21 +291,21 @@ inline void ConvertColumnsToTensor(const Table& table, uint8_t* out, bool row_ma
   using CType = typename arrow::TypeTraits<DataType>::CType;
   auto* out_values = reinterpret_cast<CType*>(out);
 
-  int i = 0;
+  int col_idx = 0;
   for (const auto& column : table.columns()) {
-    int j = 0;
+    int chunk_idx = 0;
     for (const auto& chunk : column->chunks()) {
       if (row_major) {
-        ConvertArrayToTensorRowMajorVisitor<CType> visitor{out_values, *chunk->data(),
-                                                           table.num_columns(), i, j};
+        ConvertArrayToTensorRowMajorVisitor<CType> visitor{
+            out_values, *chunk->data(), table.num_columns(), col_idx, chunk_idx};
         DCHECK_OK(VisitTypeInline(*chunk->type(), &visitor));
-        j = j + static_cast<int>(chunk->length());
+        chunk_idx = chunk_idx + static_cast<int>(chunk->length());
       } else {
         ConvertArrayToTensorVisitor<CType> visitor{out_values, *chunk->data()};
         DCHECK_OK(VisitTypeInline(*chunk->type(), &visitor));
       }
     }
-    i++;
+    col_idx++;
   }
 }
 

From 2fcc6b6f618404571e1e8465a0ac18a99449b3a0 Mon Sep 17 00:00:00 2001
From: AlenkaF <frim.alenka@gmail.com>
Date: Tue, 11 Jun 2024 08:08:49 +0200
Subject: [PATCH 13/23] Remove most of table_to_tensor tests in python and
 parametrize one test for both batch and table

---
 python/pyarrow/tests/test_table.py | 248 ++---------------------------
 1 file changed, 14 insertions(+), 234 deletions(-)

diff --git a/python/pyarrow/tests/test_table.py b/python/pyarrow/tests/test_table.py
index b82cf507a890..7158c303e53d 100644
--- a/python/pyarrow/tests/test_table.py
+++ b/python/pyarrow/tests/test_table.py
@@ -1079,38 +1079,44 @@ def test_recordbatch_to_tensor_uniform_float_16():
     check_tensors(result, expected, pa.float16(), 27)
 
 
-@pytest.mark.numpy
-def test_recordbatch_to_tensor_mixed_type():
+@pytest.mark.parametrize(
+    ('cls'),
+    [
+        (pa.Table),
+        (pa.RecordBatch)
+    ]
+)
+def test_to_tensor_mixed_type(cls):
     # uint16 + int16 = int32
     arr1 = [1, 2, 3, 4, 5, 6, 7, 8, 9]
     arr2 = [10, 20, 30, 40, 50, 60, 70, 80, 90]
     arr3 = [100, 200, 300, np.nan, 500, 600, 700, 800, 900]
-    batch = pa.RecordBatch.from_arrays(
+    tabular = cls.from_arrays(
         [
             pa.array(arr1, type=pa.uint16()),
             pa.array(arr2, type=pa.int16()),
         ], ["a", "b"]
     )
 
-    result = batch.to_tensor(row_major=False)
+    result = tabular.to_tensor(row_major=False)
     x = np.column_stack([arr1, arr2]).astype(np.int32, order="F")
     expected = pa.Tensor.from_numpy(x)
     check_tensors(result, expected, pa.int32(), 18)
 
-    result = batch.to_tensor()
+    result = tabular.to_tensor()
     x = np.column_stack([arr1, arr2]).astype(np.int32, order="C")
     expected = pa.Tensor.from_numpy(x)
     check_tensors(result, expected, pa.int32(), 18)
 
     # uint16 + int16 + float32 = float64
-    batch = pa.RecordBatch.from_arrays(
+    tabular = cls.from_arrays(
         [
             pa.array(arr1, type=pa.uint16()),
             pa.array(arr2, type=pa.int16()),
             pa.array(arr3, type=pa.float32()),
         ], ["a", "b", "c"]
     )
-    result = batch.to_tensor(row_major=False)
+    result = tabular.to_tensor(row_major=False)
     x = np.column_stack([arr1, arr2, arr3]).astype(np.float64, order="F")
     expected = pa.Tensor.from_numpy(x)
 
@@ -1120,7 +1126,7 @@ def test_recordbatch_to_tensor_mixed_type():
     assert result.shape == expected.shape
     assert result.strides == expected.strides
 
-    result = batch.to_tensor()
+    result = tabular.to_tensor()
     x = np.column_stack([arr1, arr2, arr3]).astype(np.float64, order="C")
     expected = pa.Tensor.from_numpy(x)
 
@@ -1332,232 +1338,6 @@ def test_table_to_tensor_uniform_type(typ):
     check_tensors(result, expected, pa.from_numpy_dtype(typ), 15)
 
 
-def test_table_to_tensor_uniform_float_16():
-    arr1 = [np.array([1, 2, 3], dtype=np.float16),
-            np.array([4, 5, 6, 7, 8, 9], dtype=np.float16)]
-    arr2 = [np.array([10, 20], dtype=np.float16),
-            np.array([30, 40, 50, 60, 70, 80, 90], dtype=np.float16)]
-    arr3 = [np.array([100, 100, 100, 100, 100, 100], dtype=np.float16),
-            np.array([100, 100, 100], dtype=np.float16)]
-    table = pa.Table.from_arrays(
-        [
-            pa.chunked_array(arr1, type=pa.float16()),
-            pa.chunked_array(arr2, type=pa.float16()),
-            pa.chunked_array(arr3, type=pa.float16()),
-        ], ["a", "b", "c"]
-    )
-
-    arr1_f = [1, 2, 3, 4, 5, 6, 7, 8, 9]
-    arr2_f = [10, 20, 30, 40, 50, 60, 70, 80, 90]
-    arr3_f = [100, 100, 100, 100, 100, 100, 100, 100, 100]
-
-    result = table.to_tensor(row_major=False)
-    x = np.column_stack([arr1_f, arr2_f, arr3_f]).astype(np.float16, order="F")
-    expected = pa.Tensor.from_numpy(x)
-    check_tensors(result, expected, pa.float16(), 27)
-
-    result = table.to_tensor()
-    x = np.column_stack([arr1_f, arr2_f, arr3_f]).astype(np.float16, order="C")
-    expected = pa.Tensor.from_numpy(x)
-    check_tensors(result, expected, pa.float16(), 27)
-
-
-def test_table_to_tensor_mixed_type():
-    # uint16 + int16 = int32
-    arr1 = [[1, 2, 3], [4, 5, 6, 7, 8, 9]]
-    arr2 = [[10, 20], [30, 40, 50, 60, 70, 80, 90]]
-    arr3 = [[100, 200, 300, np.nan, 500, 600], [700, 800, 900]]
-    table = pa.Table.from_arrays(
-        [
-            pa.chunked_array(arr1, type=pa.uint16()),
-            pa.chunked_array(arr2, type=pa.int16()),
-        ], ["a", "b"]
-    )
-
-    arr1_f = [1, 2, 3, 4, 5, 6, 7, 8, 9]
-    arr2_f = [10, 20, 30, 40, 50, 60, 70, 80, 90]
-    arr3_f = [100, 200, 300, np.nan, 500, 600, 700, 800, 900]
-
-    result = table.to_tensor(row_major=False)
-    x = np.column_stack([arr1_f, arr2_f]).astype(np.int32, order="F")
-    expected = pa.Tensor.from_numpy(x)
-    check_tensors(result, expected, pa.int32(), 18)
-
-    result = table.to_tensor()
-    x = np.column_stack([arr1_f, arr2_f]).astype(np.int32, order="C")
-    expected = pa.Tensor.from_numpy(x)
-    check_tensors(result, expected, pa.int32(), 18)
-
-    # uint16 + int16 + float32 = float64
-    table = pa.Table.from_arrays(
-        [
-            pa.chunked_array(arr1, type=pa.uint16()),
-            pa.chunked_array(arr2, type=pa.int16()),
-            pa.chunked_array(arr3, type=pa.float32()),
-        ], ["a", "b", "c"]
-    )
-    result = table.to_tensor(row_major=False)
-    x = np.column_stack([arr1_f, arr2_f, arr3_f]).astype(np.float64, order="F")
-    expected = pa.Tensor.from_numpy(x)
-
-    np.testing.assert_equal(result.to_numpy(), x)
-    assert result.size == 27
-    assert result.type == pa.float64()
-    assert result.shape == expected.shape
-    assert result.strides == expected.strides
-
-    result = table.to_tensor()
-    x = np.column_stack([arr1_f, arr2_f, arr3_f]).astype(np.float64, order="C")
-    expected = pa.Tensor.from_numpy(x)
-
-    np.testing.assert_equal(result.to_numpy(), x)
-    assert result.size == 27
-    assert result.type == pa.float64()
-    assert result.shape == expected.shape
-    assert result.strides == expected.strides
-
-
-def test_table_to_tensor_unsupported_mixed_type_with_float16():
-    arr1 = [[1, 2, 3], [4, 5, 6, 7, 8, 9]]
-    arr2 = [10, 20, 30, 40, 50, 60, 70, 80, 90]
-    arr3 = [[100, 200, 300, 400, 500, 600], [700, 800, 900]]
-    table = pa.Table.from_arrays(
-        [
-            pa.chunked_array(arr1, type=pa.uint16()),
-            pa.chunked_array([np.array(arr2, dtype=np.float16)], type=pa.float16()),
-            pa.chunked_array(arr3, type=pa.float32()),
-        ], ["a", "b", "c"]
-    )
-
-    with pytest.raises(
-        NotImplementedError,
-        match="Casting from or to halffloat is not supported."
-    ):
-        table.to_tensor()
-
-
-def test_table_to_tensor_nan():
-    arr1 = [[1, 2, 3], [4, np.nan, 6, 7, 8, 9]]
-    arr2 = [[10, 20], [30, 40, 50, 60, 70, np.nan, 90]]
-    table = pa.Table.from_arrays(
-        [
-            pa.chunked_array(arr1, type=pa.float32()),
-            pa.chunked_array(arr2, type=pa.float32()),
-        ], ["a", "b"]
-    )
-
-    arr1_f = [1, 2, 3, 4, np.nan, 6, 7, 8, 9]
-    arr2_f = [10, 20, 30, 40, 50, 60, 70, np.nan, 90]
-
-    result = table.to_tensor(row_major=False)
-    x = np.column_stack([arr1_f, arr2_f]).astype(np.float32, order="F")
-    expected = pa.Tensor.from_numpy(x)
-
-    np.testing.assert_equal(result.to_numpy(), x)
-    assert result.size == 18
-    assert result.type == pa.float32()
-    assert result.shape == expected.shape
-    assert result.strides == expected.strides
-
-
-def test_table_to_tensor_null():
-    arr1 = [[1, 2, 3], [4, None, 6, 7, 8, 9]]
-    arr2 = [[10, 20], [30, 40, 50, 60, 70, None, 90]]
-    table = pa.Table.from_arrays(
-        [
-            pa.chunked_array(arr1, type=pa.int32()),
-            pa.chunked_array(arr2, type=pa.float32()),
-        ], ["a", "b"]
-    )
-    with pytest.raises(
-        pa.ArrowTypeError,
-        match="Can only convert a Table or RecordBatch with no nulls."
-    ):
-        table.to_tensor()
-
-    arr1_f = [1, 2, 3, 4, np.nan, 6, 7, 8, 9]
-    arr2_f = [10, 20, 30, 40, 50, 60, 70, np.nan, 90]
-
-    result = table.to_tensor(null_to_nan=True, row_major=False)
-    x = np.column_stack([arr1_f, arr2_f]).astype(np.float64, order="F")
-    expected = pa.Tensor.from_numpy(x)
-
-    np.testing.assert_equal(result.to_numpy(), x)
-    assert result.size == 18
-    assert result.type == pa.float64()
-    assert result.shape == expected.shape
-    assert result.strides == expected.strides
-
-    # int32 -> float64
-    table = pa.Table.from_arrays(
-        [
-            pa.chunked_array(arr1, type=pa.int32()),
-            pa.chunked_array(arr2, type=pa.int32()),
-        ], ["a", "b"]
-    )
-
-    result = table.to_tensor(null_to_nan=True, row_major=False)
-
-    np.testing.assert_equal(result.to_numpy(), x)
-    assert result.size == 18
-    assert result.type == pa.float64()
-    assert result.shape == expected.shape
-    assert result.strides == expected.strides
-
-    # int8 -> float32
-    table = pa.Table.from_arrays(
-        [
-            pa.chunked_array(arr1, type=pa.int8()),
-            pa.chunked_array(arr2, type=pa.int8()),
-        ], ["a", "b"]
-    )
-
-    result = table.to_tensor(null_to_nan=True, row_major=False)
-    x = np.column_stack([arr1_f, arr2_f]).astype(np.float32, order="F")
-    expected = pa.Tensor.from_numpy(x)
-
-    np.testing.assert_equal(result.to_numpy(), x)
-    assert result.size == 18
-    assert result.type == pa.float32()
-    assert result.shape == expected.shape
-    assert result.strides == expected.strides
-
-
-def test_table_to_tensor_empty():
-    table = pa.Table.from_arrays(
-        [
-            pa.chunked_array([], type=pa.float32()),
-            pa.chunked_array([], type=pa.float32()),
-        ], ["a", "b"]
-    )
-    result = table.to_tensor()
-
-    x = np.column_stack([[], []]).astype(np.float32, order="F")
-    expected = pa.Tensor.from_numpy(x)
-
-    assert result.size == expected.size
-    assert result.type == pa.float32()
-    assert result.shape == expected.shape
-    assert result.strides == (4, 4)
-
-
-def test_table_to_tensor_unsupported():
-    arr1 = [[1, 2, 3], [4, 5, 6, 7, 8, 9]]
-    # Unsupported data type
-    arr2 = [["a", "b", "c", "a"], ["b", "c", "a", "b", "c"]]
-    table = pa.Table.from_arrays(
-        [
-            pa.chunked_array(arr1, type=pa.int32()),
-            pa.chunked_array(arr2, type=pa.utf8()),
-        ], ["a", "b"]
-    )
-    with pytest.raises(
-        pa.ArrowTypeError,
-        match="DataType is not supported"
-    ):
-        table.to_tensor()
-
-
 def _table_like_slice_tests(factory):
     data = [
         pa.array(range(5)),

From c817081625866f718265290fcd7a31f75b8b9642 Mon Sep 17 00:00:00 2001
From: AlenkaF <frim.alenka@gmail.com>
Date: Tue, 11 Jun 2024 08:17:49 +0200
Subject: [PATCH 14/23] Use self.table and self.batch, run linter

---
 python/pyarrow/table.pxi | 18 ++++++------------
 1 file changed, 6 insertions(+), 12 deletions(-)

diff --git a/python/pyarrow/table.pxi b/python/pyarrow/table.pxi
index 38d920e0d20f..f9dbb52597fb 100644
--- a/python/pyarrow/table.pxi
+++ b/python/pyarrow/table.pxi
@@ -1413,7 +1413,8 @@ cdef class ChunkedArray(_PandasConvertible):
                     chunked = self.cast(target_type, safe=True)
                 except ArrowInvalid as e:
                     raise ValueError(
-                        f"Could not cast {self.type} to requested type {target_type}: {e}"
+                        f"Could not cast {self.type} to requested type {
+                            target_type}: {e}"
                     )
             else:
                 chunked = self
@@ -3659,15 +3660,11 @@ cdef class RecordBatch(_Tabular):
         """
         self._assert_cpu()
         cdef:
-            shared_ptr[CRecordBatch] c_record_batch
             shared_ptr[CTensor] c_tensor
             CMemoryPool* pool = maybe_unbox_memory_pool(memory_pool)
 
-        c_record_batch = pyarrow_unwrap_batch(self)
         with nogil:
-            c_tensor = GetResultValue(
-                <CResult[shared_ptr[CTensor]]>deref(c_record_batch).ToTensor(null_to_nan,
-                                                                             row_major, pool))
+            c_tensor = GetResultValue(self.batch.ToTensor(null_to_nan, row_major, pool))
         return pyarrow_wrap_tensor(c_tensor)
 
     def copy_to(self, destination):
@@ -3798,7 +3795,8 @@ cdef class RecordBatch(_Tabular):
                     inner_batch = pyarrow_unwrap_batch(casted_batch)
                 except ArrowInvalid as e:
                     raise ValueError(
-                        f"Could not cast {self.schema} to requested schema {target_schema}: {e}"
+                        f"Could not cast {self.schema} to requested schema {
+                            target_schema}: {e}"
                     )
             else:
                 inner_batch = self.sp_batch
@@ -5152,15 +5150,11 @@ cdef class Table(_Tabular):
                [nan, nan]])
         """
         cdef:
-            shared_ptr[CTable] c_table
             shared_ptr[CTensor] c_tensor
             CMemoryPool* pool = maybe_unbox_memory_pool(memory_pool)
 
-        c_table = pyarrow_unwrap_table(self)
         with nogil:
-            c_tensor = GetResultValue(
-                <CResult[shared_ptr[CTensor]]>deref(c_table).ToTensor(null_to_nan,
-                                                                      row_major, pool))
+            c_tensor = GetResultValue(self.table.ToTensor(null_to_nan, row_major, pool))
         return pyarrow_wrap_tensor(c_tensor)
 
     def to_reader(self, max_chunksize=None):

From 3e213ddd05ce65fcaa1ea073c53da2af4050f024 Mon Sep 17 00:00:00 2001
From: AlenkaF <frim.alenka@gmail.com>
Date: Tue, 11 Jun 2024 08:33:02 +0200
Subject: [PATCH 15/23] Redu unrelated linter changes

---
 python/pyarrow/table.pxi | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/python/pyarrow/table.pxi b/python/pyarrow/table.pxi
index f9dbb52597fb..f29b4a130e86 100644
--- a/python/pyarrow/table.pxi
+++ b/python/pyarrow/table.pxi
@@ -1413,8 +1413,7 @@ cdef class ChunkedArray(_PandasConvertible):
                     chunked = self.cast(target_type, safe=True)
                 except ArrowInvalid as e:
                     raise ValueError(
-                        f"Could not cast {self.type} to requested type {
-                            target_type}: {e}"
+                        f"Could not cast {self.type} to requested type {target_type}: {e}"
                     )
             else:
                 chunked = self
@@ -3795,8 +3794,7 @@ cdef class RecordBatch(_Tabular):
                     inner_batch = pyarrow_unwrap_batch(casted_batch)
                 except ArrowInvalid as e:
                     raise ValueError(
-                        f"Could not cast {self.schema} to requested schema {
-                            target_schema}: {e}"
+                        f"Could not cast {self.schema} to requested schema {target_schema}: {e}"
                     )
             else:
                 inner_batch = self.sp_batch

From 4bc7e39e65fd8528912eddc4a6ce6998b8970de1 Mon Sep 17 00:00:00 2001
From: AlenkaF <frim.alenka@gmail.com>
Date: Tue, 11 Jun 2024 09:16:48 +0200
Subject: [PATCH 16/23] Remove shape and strides from ToTensor docstrings

---
 cpp/src/arrow/record_batch.h | 3 +--
 cpp/src/arrow/table.h        | 3 +--
 2 files changed, 2 insertions(+), 4 deletions(-)

diff --git a/cpp/src/arrow/record_batch.h b/cpp/src/arrow/record_batch.h
index a6ef744ac121..d1e5d541821f 100644
--- a/cpp/src/arrow/record_batch.h
+++ b/cpp/src/arrow/record_batch.h
@@ -92,8 +92,7 @@ class ARROW_EXPORT RecordBatch {
 
   /// \brief Convert RecordBatch to Tensor
   ///
-  /// Create a Tensor object with shape (number of rows, number of columns) and
-  /// strides (type size in bytes, type size in bytes * number of rows).
+  /// Create a Tensor object.
   ///
   /// \param[in] null_to_nan if true, convert nulls to NaN
   /// \param[in] row_major if true, create row-major Tensor else column-major Tensor
diff --git a/cpp/src/arrow/table.h b/cpp/src/arrow/table.h
index 3558cb46d8c5..051060a52c4b 100644
--- a/cpp/src/arrow/table.h
+++ b/cpp/src/arrow/table.h
@@ -104,8 +104,7 @@ class ARROW_EXPORT Table {
 
   /// \brief Convert Table to Tensor
   ///
-  /// Create a Tensor object with shape (number of rows, number of columns) and
-  /// strides (type size in bytes, type size in bytes * number of rows).
+  /// Create a Tensor object.
   ///
   /// \param[in] null_to_nan if true, convert nulls to NaN
   /// \param[in] row_major if true, create row-major Tensor else column-major Tensor

From 7f58c559679ed6f557f054504132c4dabb515cd1 Mon Sep 17 00:00:00 2001
From: AlenkaF <frim.alenka@gmail.com>
Date: Tue, 11 Jun 2024 09:18:23 +0200
Subject: [PATCH 17/23] Remove s in NaNs

---
 python/pyarrow/table.pxi | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/python/pyarrow/table.pxi b/python/pyarrow/table.pxi
index f29b4a130e86..0c35c915015a 100644
--- a/python/pyarrow/table.pxi
+++ b/python/pyarrow/table.pxi
@@ -3629,7 +3629,7 @@ cdef class RecordBatch(_Tabular):
         b: [10,20,30,40,null]
 
         Convert a RecordBatch to row-major Tensor with null values
-        written as NaN values
+        written as ``NaN``:
 
         >>> batch.to_tensor(null_to_nan=True)
         <pyarrow.Tensor>
@@ -3643,7 +3643,7 @@ cdef class RecordBatch(_Tabular):
                [ 4., 40.],
                [nan, nan]])
 
-        Convert a RecordBatch to column-major Tensor
+        Convert a RecordBatch to column-major Tensor:
 
         >>> batch.to_tensor(null_to_nan=True, row_major=False)
         <pyarrow.Tensor>
@@ -5119,7 +5119,7 @@ cdef class Table(_Tabular):
         a: [[1,2],[3,4,null]]
         b: [[10,20,30],[40,null]]
 
-        Convert a Table to row-major Tensor with null values written as ``NaN``s:
+        Convert a Table to row-major Tensor with null values written as ``NaN``:
 
         >>> table.to_tensor(null_to_nan=True)
         <pyarrow.Tensor>

From 4a879f96b0f9efbdd71c2c28377b122591164386 Mon Sep 17 00:00:00 2001
From: AlenkaF <frim.alenka@gmail.com>
Date: Thu, 9 Apr 2026 19:08:57 +0200
Subject: [PATCH 18/23] Pre-calculate index and remove the need to cast

---
 cpp/src/arrow/tensor.cc | 17 +++++++++--------
 1 file changed, 9 insertions(+), 8 deletions(-)

diff --git a/cpp/src/arrow/tensor.cc b/cpp/src/arrow/tensor.cc
index 07563a406bee..7d1ef2160799 100644
--- a/cpp/src/arrow/tensor.cc
+++ b/cpp/src/arrow/tensor.cc
@@ -259,9 +259,9 @@ template <typename Out>
 struct ConvertArrayToTensorRowMajorVisitor {
   Out*& out_values;
   const ArrayData& in_data;
-  int num_cols;
-  int col_idx;
-  int chunk_idx;
+  int64_t num_cols;
+  int64_t col_idx;
+  int64_t chunk_idx;
 
   template <typename T>
   Status Visit(const T&) {
@@ -269,14 +269,15 @@ struct ConvertArrayToTensorRowMajorVisitor {
       using In = typename T::c_type;
       auto in_values = ArraySpan(in_data).GetSpan<In>(1, in_data.length);
 
+      const int64_t base = chunk_idx * num_cols + col_idx;
+
       if (in_data.null_count == 0) {
         for (int64_t i = 0; i < in_data.length; ++i) {
-          out_values[(i + chunk_idx) * num_cols + col_idx] =
-              static_cast<Out>(in_values[i]);
+          out_values[base + i * num_cols] = static_cast<Out>(in_values[i]);
         }
       } else {
         for (int64_t i = 0; i < in_data.length; ++i) {
-          out_values[(i + chunk_idx) * num_cols + col_idx] =
+          out_values[base + i * num_cols] =
               in_data.IsNull(i) ? static_cast<Out>(NAN) : static_cast<Out>(in_values[i]);
         }
       }
@@ -291,7 +292,7 @@ inline void ConvertColumnsToTensor(const Table& table, uint8_t* out, bool row_ma
   using CType = typename arrow::TypeTraits<DataType>::CType;
   auto* out_values = reinterpret_cast<CType*>(out);
 
-  int col_idx = 0;
+  int64_t col_idx = 0;
   for (const auto& column : table.columns()) {
     int chunk_idx = 0;
     for (const auto& chunk : column->chunks()) {
@@ -299,7 +300,7 @@ inline void ConvertColumnsToTensor(const Table& table, uint8_t* out, bool row_ma
         ConvertArrayToTensorRowMajorVisitor<CType> visitor{
             out_values, *chunk->data(), table.num_columns(), col_idx, chunk_idx};
         DCHECK_OK(VisitTypeInline(*chunk->type(), &visitor));
-        chunk_idx = chunk_idx + static_cast<int>(chunk->length());
+        chunk_idx += chunk->length();
       } else {
         ConvertArrayToTensorVisitor<CType> visitor{out_values, *chunk->data()};
         DCHECK_OK(VisitTypeInline(*chunk->type(), &visitor));

From 1f12b9013faf76c663b79e6f62c7c1be8c9fec2f Mon Sep 17 00:00:00 2001
From: AlenkaF <frim.alenka@gmail.com>
Date: Fri, 10 Apr 2026 11:19:07 +0200
Subject: [PATCH 19/23] Split batch and table path to eliminate
 heap-allocations for unnecessary Table creation

---
 cpp/src/arrow/record_batch.cc |   3 +-
 cpp/src/arrow/table.cc        |   2 +-
 cpp/src/arrow/tensor.cc       | 120 +++++++++++++++++++++-------------
 cpp/src/arrow/tensor.h        |   4 ++
 4 files changed, 81 insertions(+), 48 deletions(-)

diff --git a/cpp/src/arrow/record_batch.cc b/cpp/src/arrow/record_batch.cc
index 3271f26be8c8..bc2612f92add 100644
--- a/cpp/src/arrow/record_batch.cc
+++ b/cpp/src/arrow/record_batch.cc
@@ -295,10 +295,9 @@ Result<std::shared_ptr<StructArray>> RecordBatch::ToStructArray() const {
 
 Result<std::shared_ptr<Tensor>> RecordBatch::ToTensor(bool null_to_nan, bool row_major,
                                                       MemoryPool* pool) const {
-  std::shared_ptr<Table> table = Table::Make(schema(), columns());
   std::shared_ptr<Tensor> tensor;
   ARROW_RETURN_NOT_OK(
-      internal::TableToTensor(*table, null_to_nan, row_major, pool, &tensor));
+      internal::RecordBatchToTensor(*this, null_to_nan, row_major, pool, &tensor));
   return tensor;
 }
 
diff --git a/cpp/src/arrow/table.cc b/cpp/src/arrow/table.cc
index b2b18bd510bc..2c9ed5195a6f 100644
--- a/cpp/src/arrow/table.cc
+++ b/cpp/src/arrow/table.cc
@@ -41,8 +41,8 @@
 #include "arrow/type_fwd.h"
 #include "arrow/type_traits.h"
 #include "arrow/util/checked_cast.h"
-#include "arrow/util/logging_internal.h"
 #include "arrow/util/logging.h"
+#include "arrow/util/logging_internal.h"
 #include "arrow/util/vector.h"
 
 namespace arrow {
diff --git a/cpp/src/arrow/tensor.cc b/cpp/src/arrow/tensor.cc
index 7d1ef2160799..7e82ba1c6ab5 100644
--- a/cpp/src/arrow/tensor.cc
+++ b/cpp/src/arrow/tensor.cc
@@ -287,40 +287,60 @@ struct ConvertArrayToTensorRowMajorVisitor {
   }
 };
 
-template <typename DataType>
-inline void ConvertColumnsToTensor(const Table& table, uint8_t* out, bool row_major) {
+template <typename DataType, typename Container>
+inline void ConvertColumnsToTensor(const Container& container, uint8_t* out,
+                                   bool row_major) {
   using CType = typename arrow::TypeTraits<DataType>::CType;
   auto* out_values = reinterpret_cast<CType*>(out);
 
-  int64_t col_idx = 0;
-  for (const auto& column : table.columns()) {
-    int chunk_idx = 0;
-    for (const auto& chunk : column->chunks()) {
+  for (int col_idx = 0; col_idx < container.num_columns(); ++col_idx) {
+    if constexpr (std::is_same_v<Container, Table>) {
+      int chunk_idx = 0;
+
+      for (const auto& chunk : container.column(col_idx)->chunks()) {
+        if (row_major) {
+          ConvertArrayToTensorRowMajorVisitor<CType> visitor{
+              out_values, *chunk->data(), container.num_columns(), col_idx, chunk_idx};
+          DCHECK_OK(VisitTypeInline(*chunk->type(), &visitor));
+          chunk_idx += chunk->length();
+        } else {
+          ConvertArrayToTensorVisitor<CType> visitor{out_values, *chunk->data()};
+          DCHECK_OK(VisitTypeInline(*chunk->type(), &visitor));
+        }
+      }
+    } else if constexpr (std::is_same_v<Container, RecordBatch>) {
+      const auto& array_data = container.column_data(col_idx);
+
       if (row_major) {
         ConvertArrayToTensorRowMajorVisitor<CType> visitor{
-            out_values, *chunk->data(), table.num_columns(), col_idx, chunk_idx};
-        DCHECK_OK(VisitTypeInline(*chunk->type(), &visitor));
-        chunk_idx += chunk->length();
+            out_values, *array_data, container.num_columns(), col_idx, 0};
+        DCHECK_OK(VisitTypeInline(*array_data->type, &visitor));
       } else {
-        ConvertArrayToTensorVisitor<CType> visitor{out_values, *chunk->data()};
-        DCHECK_OK(VisitTypeInline(*chunk->type(), &visitor));
+        ConvertArrayToTensorVisitor<CType> visitor{out_values, *array_data};
+        DCHECK_OK(VisitTypeInline(*array_data->type, &visitor));
       }
     }
-    col_idx++;
   }
 }
 
-Status TableToTensor(const Table& table, bool null_to_nan, bool row_major,
-                     MemoryPool* pool, std::shared_ptr<Tensor>* tensor) {
-  if (table.num_columns() == 0) {
+template <typename Container>
+Status ToTensorImpl(const Container& container, bool null_to_nan, bool row_major,
+                    MemoryPool* pool, std::shared_ptr<Tensor>* tensor) {
+  if (container.num_columns() == 0) {
     return Status::TypeError(
         "Conversion to Tensor for Tables or RecordBatches without columns/schema is not "
         "supported.");
   }
   // Check for no validity bitmap of each field
   // if null_to_nan conversion is set to false
-  for (int i = 0; i < table.num_columns(); ++i) {
-    if (table.column(i)->null_count() > 0 && !null_to_nan) {
+  for (int i = 0; i < container.num_columns(); ++i) {
+    int64_t null_count;
+    if constexpr (std::is_same_v<Container, Table>) {
+      null_count = container.column(i)->null_count();
+    } else if constexpr (std::is_same_v<Container, RecordBatch>) {
+      null_count = container.column_data(i)->GetNullCount();
+    }
+    if (null_count > 0 && !null_to_nan) {
       return Status::TypeError(
           "Can only convert a Table or RecordBatch with no nulls. Set null_to_nan to "
           "true to convert nulls to NaN");
@@ -329,12 +349,11 @@ Status TableToTensor(const Table& table, bool null_to_nan, bool row_major,
 
   // Check for supported data types and merge fields
   // to get the resulting uniform data type
-  if (!is_integer(table.column(0)->type()->id()) &&
-      !is_floating(table.column(0)->type()->id())) {
-    return Status::TypeError("DataType is not supported: ",
-                             table.column(0)->type()->ToString());
+  const auto& col_0_type = container.schema()->field(0)->type();
+  if (!is_integer(col_0_type->id()) && !is_floating(col_0_type->id())) {
+    return Status::TypeError("DataType is not supported: ", col_0_type->ToString());
   }
-  std::shared_ptr<Field> result_field = table.schema()->field(0);
+  std::shared_ptr<Field> result_field = container.schema()->field(0);
   std::shared_ptr<DataType> result_type = result_field->type();
 
   Field::MergeOptions options;
@@ -342,24 +361,25 @@ Status TableToTensor(const Table& table, bool null_to_nan, bool row_major,
   options.promote_integer_sign = true;
   options.promote_numeric_width = true;
 
-  if (table.num_columns() > 1) {
-    for (int i = 1; i < table.num_columns(); ++i) {
-      if (!is_numeric(table.column(i)->type()->id())) {
-        return Status::TypeError("DataType is not supported: ",
-                                 table.column(i)->type()->ToString());
+  if (container.num_columns() > 1) {
+    for (int i = 1; i < container.num_columns(); ++i) {
+      const auto& col_type = container.schema()->field(i)->type();
+
+      if (!is_numeric(col_type->id())) {
+        return Status::TypeError("DataType is not supported: ", col_type->ToString());
       }
 
       // Casting of float16 is not supported, throw an error in this case
-      if ((table.column(i)->type()->id() == Type::HALF_FLOAT ||
+      if ((col_type->id() == Type::HALF_FLOAT ||
            result_field->type()->id() == Type::HALF_FLOAT) &&
-          table.column(i)->type()->id() != result_field->type()->id()) {
+          col_type->id() != result_field->type()->id()) {
         return Status::NotImplemented("Casting from or to halffloat is not supported.");
       }
 
       ARROW_ASSIGN_OR_RAISE(
           result_field,
           result_field->MergeWith(
-              table.schema()->field(i)->WithName(result_field->name()), options));
+              container.schema()->field(i)->WithName(result_field->name()), options));
     }
     result_type = result_field->type();
   }
@@ -374,42 +394,42 @@ Status TableToTensor(const Table& table, bool null_to_nan, bool row_major,
   }
 
   // Allocate memory
-  ARROW_ASSIGN_OR_RAISE(
-      std::shared_ptr<Buffer> result,
-      AllocateBuffer(result_type->bit_width() * table.num_columns() * table.num_rows(),
-                     pool));
+  ARROW_ASSIGN_OR_RAISE(std::shared_ptr<Buffer> result,
+                        AllocateBuffer(result_type->bit_width() *
+                                           container.num_columns() * container.num_rows(),
+                                       pool));
   // Copy data
   switch (result_type->id()) {
     case Type::UINT8:
-      ConvertColumnsToTensor<UInt8Type>(table, result->mutable_data(), row_major);
+      ConvertColumnsToTensor<UInt8Type>(container, result->mutable_data(), row_major);
       break;
     case Type::UINT16:
     case Type::HALF_FLOAT:
-      ConvertColumnsToTensor<UInt16Type>(table, result->mutable_data(), row_major);
+      ConvertColumnsToTensor<UInt16Type>(container, result->mutable_data(), row_major);
       break;
     case Type::UINT32:
-      ConvertColumnsToTensor<UInt32Type>(table, result->mutable_data(), row_major);
+      ConvertColumnsToTensor<UInt32Type>(container, result->mutable_data(), row_major);
       break;
     case Type::UINT64:
-      ConvertColumnsToTensor<UInt64Type>(table, result->mutable_data(), row_major);
+      ConvertColumnsToTensor<UInt64Type>(container, result->mutable_data(), row_major);
       break;
     case Type::INT8:
-      ConvertColumnsToTensor<Int8Type>(table, result->mutable_data(), row_major);
+      ConvertColumnsToTensor<Int8Type>(container, result->mutable_data(), row_major);
       break;
     case Type::INT16:
-      ConvertColumnsToTensor<Int16Type>(table, result->mutable_data(), row_major);
+      ConvertColumnsToTensor<Int16Type>(container, result->mutable_data(), row_major);
       break;
     case Type::INT32:
-      ConvertColumnsToTensor<Int32Type>(table, result->mutable_data(), row_major);
+      ConvertColumnsToTensor<Int32Type>(container, result->mutable_data(), row_major);
       break;
     case Type::INT64:
-      ConvertColumnsToTensor<Int64Type>(table, result->mutable_data(), row_major);
+      ConvertColumnsToTensor<Int64Type>(container, result->mutable_data(), row_major);
       break;
     case Type::FLOAT:
-      ConvertColumnsToTensor<FloatType>(table, result->mutable_data(), row_major);
+      ConvertColumnsToTensor<FloatType>(container, result->mutable_data(), row_major);
       break;
     case Type::DOUBLE:
-      ConvertColumnsToTensor<DoubleType>(table, result->mutable_data(), row_major);
+      ConvertColumnsToTensor<DoubleType>(container, result->mutable_data(), row_major);
       break;
     default:
       return Status::TypeError("DataType is not supported: ", result_type->ToString());
@@ -418,7 +438,7 @@ Status TableToTensor(const Table& table, bool null_to_nan, bool row_major,
   // Construct Tensor object
   const auto& fixed_width_type =
       internal::checked_cast<const FixedWidthType&>(*result_type);
-  std::vector<int64_t> shape = {table.num_rows(), table.num_columns()};
+  std::vector<int64_t> shape = {container.num_rows(), container.num_columns()};
   std::vector<int64_t> strides;
 
   if (row_major) {
@@ -433,6 +453,16 @@ Status TableToTensor(const Table& table, bool null_to_nan, bool row_major,
   return Status::OK();
 }
 
+Status TableToTensor(const Table& table, bool null_to_nan, bool row_major,
+                     MemoryPool* pool, std::shared_ptr<Tensor>* tensor) {
+  return ToTensorImpl(table, null_to_nan, row_major, pool, tensor);
+}
+
+Status RecordBatchToTensor(const RecordBatch& batch, bool null_to_nan, bool row_major,
+                           MemoryPool* pool, std::shared_ptr<Tensor>* tensor) {
+  return ToTensorImpl(batch, null_to_nan, row_major, pool, tensor);
+}
+
 }  // namespace internal
 
 /// Constructor with strides and dimension names
diff --git a/cpp/src/arrow/tensor.h b/cpp/src/arrow/tensor.h
index b1c98bf733c7..1300003c2985 100644
--- a/cpp/src/arrow/tensor.h
+++ b/cpp/src/arrow/tensor.h
@@ -81,6 +81,10 @@ ARROW_EXPORT
 Status TableToTensor(const Table& table, bool null_to_nan, bool row_major,
                      MemoryPool* pool, std::shared_ptr<Tensor>* tensor);
 
+ARROW_EXPORT
+Status RecordBatchToTensor(const RecordBatch& batch, bool null_to_nan, bool row_major,
+                           MemoryPool* pool, std::shared_ptr<Tensor>* tensor);
+
 }  // namespace internal
 
 class ARROW_EXPORT Tensor {

From 81baf3011bbcc1f8a2438aaff1eec9abd25becc2 Mon Sep 17 00:00:00 2001
From: AlenkaF <frim.alenka@gmail.com>
Date: Thu, 16 Apr 2026 16:09:31 +0200
Subject: [PATCH 20/23] FIx missing int type change

---
 cpp/src/arrow/tensor.cc | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/cpp/src/arrow/tensor.cc b/cpp/src/arrow/tensor.cc
index 7e82ba1c6ab5..c7bb049a1fd0 100644
--- a/cpp/src/arrow/tensor.cc
+++ b/cpp/src/arrow/tensor.cc
@@ -295,7 +295,7 @@ inline void ConvertColumnsToTensor(const Container& container, uint8_t* out,
 
   for (int col_idx = 0; col_idx < container.num_columns(); ++col_idx) {
     if constexpr (std::is_same_v<Container, Table>) {
-      int chunk_idx = 0;
+      int64_t chunk_idx = 0;
 
       for (const auto& chunk : container.column(col_idx)->chunks()) {
         if (row_major) {

From e1cbc851dc718064837bbbf2473ec825c9d70ed2 Mon Sep 17 00:00:00 2001
From: AlenkaF <frim.alenka@gmail.com>
Date: Thu, 16 Apr 2026 16:23:23 +0200
Subject: [PATCH 21/23] Add numpy test markers

---
 python/pyarrow/tests/test_table.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/python/pyarrow/tests/test_table.py b/python/pyarrow/tests/test_table.py
index 7158c303e53d..a3d2c7aeda1f 100644
--- a/python/pyarrow/tests/test_table.py
+++ b/python/pyarrow/tests/test_table.py
@@ -1079,6 +1079,7 @@ def test_recordbatch_to_tensor_uniform_float_16():
     check_tensors(result, expected, pa.float16(), 27)
 
 
+@pytest.mark.numpy
 @pytest.mark.parametrize(
     ('cls'),
     [
@@ -1275,6 +1276,7 @@ def test_recordbatch_to_tensor_unsupported():
         batch.to_tensor()
 
 
+@pytest.mark.numpy
 @pytest.mark.parametrize('typ', [
     np.uint8, np.uint16, np.uint32, np.uint64,
     np.int8, np.int16, np.int32, np.int64,

From 065185d99852915b1b7f77189b12a05cfca21d3a Mon Sep 17 00:00:00 2001
From: AlenkaF <frim.alenka@gmail.com>
Date: Fri, 17 Apr 2026 06:00:42 +0200
Subject: [PATCH 22/23] Parametrize with string

---
 python/pyarrow/tests/test_table.py | 40 +++++++++++++++---------------
 1 file changed, 20 insertions(+), 20 deletions(-)

diff --git a/python/pyarrow/tests/test_table.py b/python/pyarrow/tests/test_table.py
index a3d2c7aeda1f..57d6faa677bf 100644
--- a/python/pyarrow/tests/test_table.py
+++ b/python/pyarrow/tests/test_table.py
@@ -1277,20 +1277,20 @@ def test_recordbatch_to_tensor_unsupported():
 
 
 @pytest.mark.numpy
-@pytest.mark.parametrize('typ', [
-    np.uint8, np.uint16, np.uint32, np.uint64,
-    np.int8, np.int16, np.int32, np.int64,
-    np.float32, np.float64,
+@pytest.mark.parametrize('typ_str', [
+    "uint8", "uint16", "uint32", "uint64",
+    "int8", "int16", "int32", "int64",
+    "float32", "float64",
 ])
-def test_table_to_tensor_uniform_type(typ):
+def test_table_to_tensor_uniform_type(typ_str):
     arr1 = [[1, 2, 3], [4, 5, 6, 7, 8, 9]]
     arr2 = [[10, 20], [30, 40, 50, 60, 70, 80, 90]]
     arr3 = [[100, 100, 100, 100, 100, 100], [100, 100, 100]]
     table = pa.Table.from_arrays(
         [
-            pa.chunked_array(arr1, type=pa.from_numpy_dtype(typ)),
-            pa.chunked_array(arr2, type=pa.from_numpy_dtype(typ)),
-            pa.chunked_array(arr3, type=pa.from_numpy_dtype(typ)),
+            pa.chunked_array(arr1, type=pa.from_numpy_dtype(typ_str)),
+            pa.chunked_array(arr2, type=pa.from_numpy_dtype(typ_str)),
+            pa.chunked_array(arr3, type=pa.from_numpy_dtype(typ_str)),
         ], ["a", "b", "c"]
     )
 
@@ -1299,14 +1299,14 @@ def test_table_to_tensor_uniform_type(typ):
     arr3_f = [100, 100, 100, 100, 100, 100, 100, 100, 100]
 
     result = table.to_tensor(row_major=False)
-    x = np.column_stack([arr1_f, arr2_f, arr3_f]).astype(typ, order="F")
+    x = np.column_stack([arr1_f, arr2_f, arr3_f]).astype(typ_str, order="F")
     expected = pa.Tensor.from_numpy(x)
-    check_tensors(result, expected, pa.from_numpy_dtype(typ), 27)
+    check_tensors(result, expected, pa.from_numpy_dtype(typ_str), 27)
 
     result = table.to_tensor()
-    x = np.column_stack([arr1_f, arr2_f, arr3_f]).astype(typ, order="C")
+    x = np.column_stack([arr1_f, arr2_f, arr3_f]).astype(typ_str, order="C")
     expected = pa.Tensor.from_numpy(x)
-    check_tensors(result, expected, pa.from_numpy_dtype(typ), 27)
+    check_tensors(result, expected, pa.from_numpy_dtype(typ_str), 27)
 
     # Test offset
     table1 = table.slice(1)
@@ -1315,14 +1315,14 @@ def test_table_to_tensor_uniform_type(typ):
     arr3_f = [100, 100, 100, 100, 100, 100, 100, 100]
 
     result = table1.to_tensor(row_major=False)
-    x = np.column_stack([arr1_f, arr2_f, arr3_f]).astype(typ, order="F")
+    x = np.column_stack([arr1_f, arr2_f, arr3_f]).astype(typ_str, order="F")
     expected = pa.Tensor.from_numpy(x)
-    check_tensors(result, expected, pa.from_numpy_dtype(typ), 24)
+    check_tensors(result, expected, pa.from_numpy_dtype(typ_str), 24)
 
     result = table1.to_tensor()
-    x = np.column_stack([arr1_f, arr2_f, arr3_f]).astype(typ, order="C")
+    x = np.column_stack([arr1_f, arr2_f, arr3_f]).astype(typ_str, order="C")
     expected = pa.Tensor.from_numpy(x)
-    check_tensors(result, expected, pa.from_numpy_dtype(typ), 24)
+    check_tensors(result, expected, pa.from_numpy_dtype(typ_str), 24)
 
     table2 = table.slice(1, 5)
     arr1_f = [2, 3, 4, 5, 6]
@@ -1330,14 +1330,14 @@ def test_table_to_tensor_uniform_type(typ):
     arr3_f = [100, 100, 100, 100, 100]
 
     result = table2.to_tensor(row_major=False)
-    x = np.column_stack([arr1_f, arr2_f, arr3_f]).astype(typ, order="F")
+    x = np.column_stack([arr1_f, arr2_f, arr3_f]).astype(typ_str, order="F")
     expected = pa.Tensor.from_numpy(x)
-    check_tensors(result, expected, pa.from_numpy_dtype(typ), 15)
+    check_tensors(result, expected, pa.from_numpy_dtype(typ_str), 15)
 
     result = table2.to_tensor()
-    x = np.column_stack([arr1_f, arr2_f, arr3_f]).astype(typ, order="C")
+    x = np.column_stack([arr1_f, arr2_f, arr3_f]).astype(typ_str, order="C")
     expected = pa.Tensor.from_numpy(x)
-    check_tensors(result, expected, pa.from_numpy_dtype(typ), 15)
+    check_tensors(result, expected, pa.from_numpy_dtype(typ_str), 15)
 
 
 def _table_like_slice_tests(factory):

From 5d370894ce5f9a557f1787b35ea2464b8005def7 Mon Sep 17 00:00:00 2001
From: Alenka Frim <AlenkaF@users.noreply.github.com>
Date: Thu, 30 Apr 2026 06:50:38 +0200
Subject: [PATCH 23/23] Apply suggestions from code review

Co-authored-by: tadeja <tadeja@users.noreply.github.com>
---
 cpp/src/arrow/table_test.cc | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/cpp/src/arrow/table_test.cc b/cpp/src/arrow/table_test.cc
index c19541a14925..d3522b282df3 100644
--- a/cpp/src/arrow/table_test.cc
+++ b/cpp/src/arrow/table_test.cc
@@ -932,8 +932,7 @@ TYPED_TEST_P(TestTableToTensorColumnMajor, SupportedTypes) {
                      shape_sliced, f_strides_sliced);
 
   EXPECT_TRUE(tensor_expected_sliced->Equals(*tensor_sliced));
-  CheckTableToTensor<DataType>(tensor_expected_sliced, 24, shape_sliced,
-                               f_strides_sliced);
+  CheckTableToTensor<DataType>(tensor_sliced, 24, shape_sliced, f_strides_sliced);
 
   auto table_slice_1 = table->Slice(1, 5);
 
@@ -950,8 +949,7 @@ TYPED_TEST_P(TestTableToTensorColumnMajor, SupportedTypes) {
                      shape_sliced_1, f_strides_sliced_1);
 
   EXPECT_TRUE(tensor_expected_sliced_1->Equals(*tensor_sliced_1));
-  CheckTableToTensor<DataType>(tensor_expected_sliced_1, 15, shape_sliced_1,
-                               f_strides_sliced_1);
+  CheckTableToTensor<DataType>(tensor_sliced_1, 15, shape_sliced_1, f_strides_sliced_1);
 }
 
 REGISTER_TYPED_TEST_SUITE_P(TestTableToTensorColumnMajor, SupportedTypes);