Skip to content

Integration tests list view #438

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 6 commits into from
Jun 24, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/workflows/qemu.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ jobs:
uses: jirutka/setup-alpine@v1
id: alpine-target
with:
branch: v3.20
branch: v3.22
arch: ${{ matrix.target.arch }}
packages: >
ccache
Expand Down
2 changes: 1 addition & 1 deletion include/sparrow/layout/decimal_array.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -516,7 +516,7 @@ namespace sparrow
constexpr std::size_t sizeof_decimal = sizeof(storage_type);
std::stringstream format_str;
format_str << "d:" << precision << "," << scale;
if (sizeof_decimal != 16) // We don't need to specify the size for 128-bit decimals
if constexpr (sizeof_decimal != 16) // We don't need to specify the size for 128-bit decimals
{
format_str << "," << sizeof_decimal * 8;
}
Expand Down
11 changes: 6 additions & 5 deletions test/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,9 @@

cmake_minimum_required(VERSION 3.28)

SET(CMAKE_ENABLE_EXPORTS TRUE)
SET(MAKE_EXECUTABLE_ENABLE_EXPORTS TRUE)

enable_testing()

if(CMAKE_CURRENT_SOURCE_DIR STREQUAL CMAKE_SOURCE_DIR)
Expand All @@ -32,11 +35,9 @@ endif()

add_subdirectory(doctest_better_junit)

if (SPARROW_TARGET_32BIT)
set(SPARROW_TESTS_SOURCES
main.cpp
)
else()
if (SPARROW_TARGET_32BIT)
set(SPARROW_TESTS_SOURCES main.cpp)
else()
set(SPARROW_TESTS_SOURCES
arrow_array_schema_creation.hpp
external_array_data_creation.cpp
Expand Down
1 change: 1 addition & 0 deletions test/c_data_integration/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ add_library(c_data_integration SHARED
src/fixedsizelist_parser.cpp
src/json_parser.cpp
src/list_parser.cpp
src/listview_parser.cpp
src/null_parser.cpp
src/primitive_parser.cpp
src/run_end_encoded_parser.cpp
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,5 +20,6 @@ namespace sparrow::c_data_integration
inline constexpr std::string_view VALIDITY = "VALIDITY";
inline constexpr std::string_view DATA = "DATA";
inline constexpr std::string_view OFFSET = "OFFSET";
inline constexpr std::string_view SIZE = "SIZE";
inline constexpr std::string_view TYPE_ID = "TYPE_ID";
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
// Copyright 2024 Man Group Operations Limited
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#pragma once

#include <nlohmann/json.hpp>

#include <sparrow/array.hpp>

namespace sparrow::c_data_integration
{
sparrow::array
list_view_array_from_json(const nlohmann::json& array, const nlohmann::json& schema, const nlohmann::json& root);

sparrow::array large_list_view_array_from_json(
const nlohmann::json& array,
const nlohmann::json& schema,
const nlohmann::json& root
);
}
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,10 @@ namespace sparrow::c_data_integration::utils

std::optional<std::vector<sparrow::metadata_pair>> get_metadata(const nlohmann::json& schema);

std::vector<size_t> get_offsets(const nlohmann::json& array);

std::vector<size_t> get_sizes(const nlohmann::json& array);

template <std::integral I>
auto from_strings_to_Is(const std::vector<std::string>& data_str)
{
Expand Down
12 changes: 9 additions & 3 deletions test/c_data_integration/src/json_parser.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
#include "sparrow/c_data_integration/fixedsizebinary_parser.hpp"
#include "sparrow/c_data_integration/fixedsizelist_parser.hpp"
#include "sparrow/c_data_integration/list_parser.hpp"
#include "sparrow/c_data_integration/listview_parser.hpp"
#include "sparrow/c_data_integration/null_parser.hpp"
#include "sparrow/c_data_integration/primitive_parser.hpp"
#include "sparrow/c_data_integration/run_end_encoded_parser.hpp"
Expand All @@ -41,27 +42,32 @@ namespace sparrow::c_data_integration
const std::unordered_map<std::string, array_builder_function> array_builders{
{"binary", binary_array_from_json},
{"bool", bool_array_from_json},
{"date", date_array_from_json},
{"decimal", decimal_from_json},
{"dictionary", dictionary_encode_array_from_json},
{"duration", duration_array_from_json},
{"fixedsizebinary", fixedsizebinary_from_json},
{"fixedsizelist", fixed_size_list_array_from_json},
{"floatingpoint", floating_point_from_json},
{"int", primitive_array_from_json},
{"interval", interval_array_from_json},
{"largebinary", large_binary_array_from_json},
{"largelist", large_list_array_from_json},
{"largelistview", large_list_view_array_from_json},
{"largeutf8", big_string_array_from_json},
{"list", list_array_from_json},
{"listview", list_view_array_from_json},
{"null", null_array_from_json},
{"runendencoded", runendencoded_array_from_json},
{"struct", struct_array_from_json},
{"utf8", string_array_from_json},
{"utf8view", string_view_from_json},
{"date", date_array_from_json},
{"time", time_array_from_json},
{"timestamp", timestamp_array_from_json},
{"interval", interval_array_from_json},
{"duration", duration_array_from_json},
{"runendencoded", runendencoded_array_from_json},
{"union", union_array_from_json},
{"utf8", string_array_from_json},
{"utf8view", string_view_from_json},
};

std::vector<sparrow::array>
Expand Down
66 changes: 66 additions & 0 deletions test/c_data_integration/src/listview_parser.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
// Copyright 2024 Man Group Operations Limited
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include "sparrow/c_data_integration/listview_parser.hpp"

#include "sparrow/c_data_integration/json_parser.hpp"
#include "sparrow/c_data_integration/utils.hpp"

namespace sparrow::c_data_integration
{
sparrow::array
list_view_array_from_json(const nlohmann::json& array, const nlohmann::json& schema, const nlohmann::json& root)
{
utils::check_type(schema, "listview");
const std::string name = schema.at("name").get<std::string>();
auto validity = utils::get_validity(array);
auto offsets = utils::get_offsets(array);
auto sizes = utils::get_sizes(array);
auto metadata = utils::get_metadata(schema);
std::vector<sparrow::array> arrays = get_children_arrays(array, schema, root);
sparrow::list_view_array ar{
std::move(arrays[0]),
std::move(offsets),
std::move(sizes),
std::move(validity),
name,
std::move(metadata)
};
return sparrow::array{std::move(ar)};
}

sparrow::array large_list_view_array_from_json(
const nlohmann::json& array,
const nlohmann::json& schema,
const nlohmann::json& root
)
{
utils::check_type(schema, "largelistview");
const std::string name = schema.at("name").get<std::string>();
auto validity = utils::get_validity(array);
auto offsets = utils::get_offsets(array);
auto sizes = utils::get_sizes(array);
auto metadata = utils::get_metadata(schema);
std::vector<sparrow::array> arrays = get_children_arrays(array, schema, root);
sparrow::big_list_view_array ar{
std::move(arrays[0]),
std::move(offsets),
std::move(sizes),
std::move(validity),
name,
std::move(metadata)
};
return sparrow::array{std::move(ar)};
}
}
84 changes: 82 additions & 2 deletions test/c_data_integration/src/utils.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
// See the License for the specific language governing permissions and
// limitations under the License.

#include "sparrow/c_data_integration/utils.hpp"

Check failure on line 15 in test/c_data_integration/src/utils.cpp

View workflow job for this annotation

GitHub Actions / build

test/c_data_integration/src/utils.cpp:15:10 [clang-diagnostic-error]

'sparrow/c_data_integration/utils.hpp' file not found

#include <charconv>
#include <string_view>
Expand Down Expand Up @@ -92,6 +92,87 @@
return std::vector<bool>(validity_range.begin(), validity_range.end());
}

std::vector<size_t> get_offsets(const nlohmann::json& array)
{
if (!array.contains(OFFSET))
{
throw std::runtime_error("Offset not found in array");
}
if (!array.at(OFFSET).is_array())
{
throw std::runtime_error("Offset is not an array");
}
if (array.at(OFFSET).empty())
{
return std::vector<size_t>{};
}
// check element type
if (array.at(OFFSET).front().is_number_unsigned())
{
return array.at(OFFSET).get<std::vector<size_t>>();
}
if (array.at(OFFSET).front().is_string())
{
const auto& strings = array.at(OFFSET).get<std::vector<std::string>>();
auto offsets = strings
| std::views::transform(
[](const std::string& str)
{
size_t value;
auto [ptr, ec] = std::from_chars(str.data(), str.data() + str.size(), value);
if (ec != std::errc{})
{
throw std::runtime_error("Invalid offset value: " + str);
}
return value;
}
);
return std::vector<size_t>(offsets.begin(), offsets.end());
}
throw std::runtime_error("Offset is not an array of unsigned integers or strings");
}

std::vector<size_t> get_sizes(const nlohmann::json& array)
{
if (!array.contains(SIZE))
{
throw std::runtime_error("Size not found in array");
}
if (!array.at(SIZE).is_array())
{
throw std::runtime_error("Size is not an array");
}
if (array.at(SIZE).empty())
{
return std::vector<size_t>{};
}
// check element type
if (array.at(SIZE).front().is_number_unsigned())
{
return array.at(SIZE).get<std::vector<size_t>>();
}
if (array.at(SIZE).front().is_string())
{
const auto& strings = array.at(SIZE).get<std::vector<std::string>>();
auto sizes = strings
| std::views::transform(
[](const std::string& str)
{
size_t value;
auto [ptr, ec] = std::from_chars(str.data(), str.data() + str.size(), value);
if (ec != std::errc{})
{
throw std::runtime_error("Invalid size value: " + str);
}
return value;
}
);
return std::vector<size_t>(sizes.begin(), sizes.end());
}

throw std::runtime_error("Size is not an array of unsigned integers or strings");
}

void check_type(const nlohmann::json& schema, const std::string& type)
{
const std::string schema_type = schema.at("type").at("name").get<std::string>();
Expand All @@ -118,5 +199,4 @@
return metadata;
}


} // namespace sparrow::c_data_integration::utils
} // namespace sparrow::c_data_integration::utils
Loading
Loading