Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
23 commits
Select commit Hold shift + click to select a range
c8691ab
Add Table::ToTensor and bindings to Python with Python tests
AlenkaF May 28, 2024
cd74794
Add C++ tests
AlenkaF May 29, 2024
09843d7
Add benchmarks
AlenkaF May 29, 2024
9c0c6f6
Fix linter error
AlenkaF May 29, 2024
e1562f4
Add cmath include
AlenkaF May 29, 2024
d5a6eaf
Change helper function names in C++ tests, fix doctest errors
AlenkaF May 29, 2024
8854a82
Correct indentations
AlenkaF May 29, 2024
d2be925
Remove code from RecordBatch::ToTensor and use Table implementation
AlenkaF May 29, 2024
520561c
Add RecordBatchToTensor code to tensor.cc
AlenkaF Jun 5, 2024
6bd177d
Change RecordBatchToTensor to TableToTensor and update the code to wo…
AlenkaF Jun 5, 2024
8306e73
Use TableToTensor in Table::ToTensor
AlenkaF Jun 5, 2024
afe3d1e
Fix docstrings and change index names
AlenkaF Jun 10, 2024
2fcc6b6
Remove most of table_to_tensor tests in python and parametrize one te…
AlenkaF Jun 11, 2024
c817081
Use self.table and self.batch, run linter
AlenkaF Jun 11, 2024
3e213dd
Redu unrelated linter changes
AlenkaF Jun 11, 2024
4bc7e39
Remove shape and strides from ToTensor docstrings
AlenkaF Jun 11, 2024
7f58c55
Remove s in NaNs
AlenkaF Jun 11, 2024
4a879f9
Pre-calculate index and remove the need to cast
AlenkaF Apr 9, 2026
1f12b90
Split batch and table path to eliminate heap-allocations for unnecess…
AlenkaF Apr 10, 2026
81baf30
FIx missing int type change
AlenkaF Apr 16, 2026
e1cbc85
Add numpy test markers
AlenkaF Apr 16, 2026
065185d
Parametrize with string
AlenkaF Apr 17, 2026
5d37089
Apply suggestions from code review
AlenkaF Apr 30, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 0 additions & 1 deletion cpp/src/arrow/record_batch.cc
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,6 @@
#include "arrow/record_batch.h"

#include <algorithm>
#include <cmath>
#include <cstdlib>
#include <memory>
#include <mutex>
Expand Down
6 changes: 2 additions & 4 deletions cpp/src/arrow/record_batch.h
Original file line number Diff line number Diff line change
Expand Up @@ -90,11 +90,9 @@ class ARROW_EXPORT RecordBatch {
/// in the resulting struct array.
Result<std::shared_ptr<StructArray>> ToStructArray() const;

/// \brief Convert record batch with one data type to Tensor
/// \brief Convert RecordBatch to Tensor
///
/// Create a Tensor object with shape (number of rows, number of columns) and
/// strides (type size in bytes, type size in bytes * number of rows).
/// Generated Tensor will have column-major layout.
/// Create a Tensor object.
///
/// \param[in] null_to_nan if true, convert nulls to NaN
/// \param[in] row_major if true, create row-major Tensor else column-major Tensor
Expand Down
18 changes: 10 additions & 8 deletions cpp/src/arrow/record_batch_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -906,10 +906,11 @@ TEST_F(TestRecordBatch, ToTensorUnsupportedMissing) {

auto batch = RecordBatch::Make(schema, length, {a0, a1});

ASSERT_RAISES_WITH_MESSAGE(TypeError,
"Type error: Can only convert a RecordBatch with no nulls. "
"Set null_to_nan to true to convert nulls to NaN",
batch->ToTensor());
ASSERT_RAISES_WITH_MESSAGE(
TypeError,
"Type error: Can only convert a Table or RecordBatch with no "
"nulls. Set null_to_nan to true to convert nulls to NaN",
batch->ToTensor());
}

TEST_F(TestRecordBatch, ToTensorEmptyBatch) {
Expand Down Expand Up @@ -940,10 +941,11 @@ TEST_F(TestRecordBatch, ToTensorEmptyBatch) {
auto batch_no_columns =
RecordBatch::Make(::arrow::schema({}), 10, std::vector<std::shared_ptr<Array>>{});

ASSERT_RAISES_WITH_MESSAGE(TypeError,
"Type error: Conversion to Tensor for RecordBatches without "
"columns/schema is not supported.",
batch_no_columns->ToTensor());
ASSERT_RAISES_WITH_MESSAGE(
TypeError,
"Type error: Conversion to Tensor for Tables or RecordBatches "
"without columns/schema is not supported.",
batch_no_columns->ToTensor());
}

template <typename DataType>
Expand Down
10 changes: 10 additions & 0 deletions cpp/src/arrow/table.cc
Original file line number Diff line number Diff line change
Expand Up @@ -36,10 +36,12 @@
#include "arrow/record_batch.h"
#include "arrow/result.h"
#include "arrow/status.h"
#include "arrow/tensor.h"
#include "arrow/type.h"
#include "arrow/type_fwd.h"
#include "arrow/type_traits.h"
#include "arrow/util/checked_cast.h"
#include "arrow/util/logging.h"
#include "arrow/util/logging_internal.h"
#include "arrow/util/vector.h"

Expand Down Expand Up @@ -346,6 +348,14 @@ Result<std::shared_ptr<Table>> Table::FromChunkedStructArray(
array->length());
}

Result<std::shared_ptr<Tensor>> Table::ToTensor(bool null_to_nan, bool row_major,
MemoryPool* pool) const {
std::shared_ptr<Tensor> tensor;
ARROW_RETURN_NOT_OK(
internal::TableToTensor(*this, null_to_nan, row_major, pool, &tensor));
return tensor;
}

std::vector<std::string> Table::ColumnNames() const {
std::vector<std::string> names(num_columns());
for (int i = 0; i < num_columns(); ++i) {
Expand Down
12 changes: 12 additions & 0 deletions cpp/src/arrow/table.h
Original file line number Diff line number Diff line change
Expand Up @@ -102,6 +102,18 @@ class ARROW_EXPORT Table {
static Result<std::shared_ptr<Table>> FromChunkedStructArray(
const std::shared_ptr<ChunkedArray>& array);

/// \brief Convert Table to Tensor
///
/// Create a Tensor object.
///
/// \param[in] null_to_nan if true, convert nulls to NaN
/// \param[in] row_major if true, create row-major Tensor else column-major Tensor
/// \param[in] pool the memory pool to allocate the tensor buffer
/// \return the resulting Tensor
Result<std::shared_ptr<Tensor>> ToTensor(
bool null_to_nan = false, bool row_major = true,
MemoryPool* pool = default_memory_pool()) const;

/// \brief Return the table schema
const std::shared_ptr<Schema>& schema() const { return schema_; }

Expand Down
Loading
Loading