From 7f1c67f2e9fc93460ad7615ae29541e27c37ec38 Mon Sep 17 00:00:00 2001 From: Sutou Kouhei Date: Thu, 29 Jan 2026 16:32:30 +0900 Subject: [PATCH] GH-49053: [Ruby] Add support for writing timestamp array --- c_glib/arrow-glib/basic-data-type.cpp | 28 +++- .../red-arrow-format/lib/arrow-format/type.rb | 13 +- ruby/red-arrow-format/test/test-reader.rb | 38 ++--- ruby/red-arrow-format/test/test-writer.rb | 155 ++++++++++++++++-- 4 files changed, 197 insertions(+), 37 deletions(-) diff --git a/c_glib/arrow-glib/basic-data-type.cpp b/c_glib/arrow-glib/basic-data-type.cpp index 9b77e87422d5..87c5eed530f3 100644 --- a/c_glib/arrow-glib/basic-data-type.cpp +++ b/c_glib/arrow-glib/basic-data-type.cpp @@ -1165,13 +1165,13 @@ GArrowTimestampDataType * garrow_timestamp_data_type_new(GArrowTimeUnit unit, GTimeZone *time_zone) { auto arrow_unit = garrow_time_unit_to_raw(unit); - std::string arrow_timezone; + std::string arrow_time_zone; #if GLIB_CHECK_VERSION(2, 58, 0) if (time_zone) { - arrow_timezone = g_time_zone_get_identifier(time_zone); + arrow_time_zone = g_time_zone_get_identifier(time_zone); } #endif - auto arrow_data_type = arrow::timestamp(arrow_unit, arrow_timezone); + auto arrow_data_type = arrow::timestamp(arrow_unit, arrow_time_zone); auto data_type = GARROW_TIMESTAMP_DATA_TYPE(g_object_new(GARROW_TYPE_TIMESTAMP_DATA_TYPE, "data-type", @@ -2645,6 +2645,28 @@ garrow_data_type_new_raw(std::shared_ptr *arrow_data_type) break; case arrow::Type::type::TIMESTAMP: type = GARROW_TYPE_TIMESTAMP_DATA_TYPE; + { + auto arrow_timestamp_data_type = + std::static_pointer_cast(*arrow_data_type); + const auto &arrow_time_zone = arrow_timestamp_data_type->timezone(); + if (!arrow_time_zone.empty()) { +#if GLIB_CHECK_VERSION(2, 68, 0) + auto time_zone = g_time_zone_new_identifier(arrow_time_zone.c_str()); +#else + auto time_zone = g_time_zone_new(arrow_time_zone.c_str()); +#endif + data_type = GARROW_DATA_TYPE(g_object_new(type, + "data-type", + arrow_data_type, + "time-zone", + time_zone, + nullptr)); + if (time_zone) { + g_time_zone_unref(time_zone); + } + return data_type; + } + } break; case arrow::Type::type::TIME32: type = GARROW_TYPE_TIME32_DATA_TYPE; diff --git a/ruby/red-arrow-format/lib/arrow-format/type.rb b/ruby/red-arrow-format/lib/arrow-format/type.rb index 813278b86154..0cabc6b501ab 100644 --- a/ruby/red-arrow-format/lib/arrow-format/type.rb +++ b/ruby/red-arrow-format/lib/arrow-format/type.rb @@ -434,11 +434,11 @@ def build_array(size, validity_buffer, values_buffer) class TimestampType < TemporalType attr_reader :unit - attr_reader :timezone - def initialize(unit, timezone) + attr_reader :time_zone + def initialize(unit, time_zone) super() @unit = unit - @timezone = timezone + @time_zone = time_zone end def name @@ -448,6 +448,13 @@ def name def build_array(size, validity_buffer, values_buffer) TimestampArray.new(self, size, validity_buffer, values_buffer) end + + def to_flatbuffers + fb_type = FB::Timestamp::Data.new + fb_type.unit = FB::TimeUnit.try_convert(@unit.to_s.upcase) + fb_type.timezone = @time_zone + fb_type + end end class IntervalType < TemporalType diff --git a/ruby/red-arrow-format/test/test-reader.rb b/ruby/red-arrow-format/test/test-reader.rb index a5919c3fb9fa..e2e27d3dbcf3 100644 --- a/ruby/red-arrow-format/test/test-reader.rb +++ b/ruby/red-arrow-format/test/test-reader.rb @@ -351,7 +351,7 @@ def test_type sub_test_case("Timestamp(:second)") do def setup(&block) - @timestamp_2019_11_18_00_09_11 = 1574003351 + @timestamp_2019_11_17_15_09_11 = 1574003351 @timestamp_2025_12_16_05_33_58 = 1765863238 super(&block) end @@ -359,7 +359,7 @@ def setup(&block) def build_array Arrow::TimestampArray.new(:second, [ - @timestamp_2019_11_18_00_09_11, + @timestamp_2019_11_17_15_09_11, nil, @timestamp_2025_12_16_05_33_58, ]) @@ -369,7 +369,7 @@ def test_read assert_equal([ { "value" => [ - @timestamp_2019_11_18_00_09_11, + @timestamp_2019_11_17_15_09_11, nil, @timestamp_2025_12_16_05_33_58, ], @@ -381,7 +381,7 @@ def test_read sub_test_case("Timestamp(:millisecond)") do def setup(&block) - @timestamp_2019_11_18_00_09_11 = 1574003351 * 1_000 + @timestamp_2019_11_17_15_09_11 = 1574003351 * 1_000 @timestamp_2025_12_16_05_33_58 = 1765863238 * 1_000 super(&block) end @@ -389,7 +389,7 @@ def setup(&block) def build_array Arrow::TimestampArray.new(:milli, [ - @timestamp_2019_11_18_00_09_11, + @timestamp_2019_11_17_15_09_11, nil, @timestamp_2025_12_16_05_33_58, ]) @@ -399,7 +399,7 @@ def test_read assert_equal([ { "value" => [ - @timestamp_2019_11_18_00_09_11, + @timestamp_2019_11_17_15_09_11, nil, @timestamp_2025_12_16_05_33_58, ], @@ -411,7 +411,7 @@ def test_read sub_test_case("Timestamp(:microsecond)") do def setup(&block) - @timestamp_2019_11_18_00_09_11 = 1574003351 * 1_000_000 + @timestamp_2019_11_17_15_09_11 = 1574003351 * 1_000_000 @timestamp_2025_12_16_05_33_58 = 1765863238 * 1_000_000 super(&block) end @@ -419,7 +419,7 @@ def setup(&block) def build_array Arrow::TimestampArray.new(:micro, [ - @timestamp_2019_11_18_00_09_11, + @timestamp_2019_11_17_15_09_11, nil, @timestamp_2025_12_16_05_33_58, ]) @@ -429,7 +429,7 @@ def test_read assert_equal([ { "value" => [ - @timestamp_2019_11_18_00_09_11, + @timestamp_2019_11_17_15_09_11, nil, @timestamp_2025_12_16_05_33_58, ], @@ -441,7 +441,7 @@ def test_read sub_test_case("Timestamp(:nanosecond)") do def setup(&block) - @timestamp_2019_11_18_00_09_11 = 1574003351 * 1_000_000_000 + @timestamp_2019_11_17_15_09_11 = 1574003351 * 1_000_000_000 @timestamp_2025_12_16_05_33_58 = 1765863238 * 1_000_000_000 super(&block) end @@ -449,7 +449,7 @@ def setup(&block) def build_array Arrow::TimestampArray.new(:nano, [ - @timestamp_2019_11_18_00_09_11, + @timestamp_2019_11_17_15_09_11, nil, @timestamp_2025_12_16_05_33_58, ]) @@ -459,7 +459,7 @@ def test_read assert_equal([ { "value" => [ - @timestamp_2019_11_18_00_09_11, + @timestamp_2019_11_17_15_09_11, nil, @timestamp_2025_12_16_05_33_58, ], @@ -469,27 +469,27 @@ def test_read end end - sub_test_case("Timestamp(timezone)") do + sub_test_case("Timestamp(time_zone)") do def setup(&block) - @timezone = "UTC" - @timestamp_2019_11_18_00_09_11 = 1574003351 + @time_zone = "UTC" + @timestamp_2019_11_17_15_09_11 = 1574003351 @timestamp_2025_12_16_05_33_58 = 1765863238 super(&block) end def build_array - data_type = Arrow::TimestampDataType.new(:second, @timezone) + data_type = Arrow::TimestampDataType.new(:second, @time_zone) Arrow::TimestampArray.new(data_type, [ - @timestamp_2019_11_18_00_09_11, + @timestamp_2019_11_17_15_09_11, nil, @timestamp_2025_12_16_05_33_58, ]) end def test_type - assert_equal([:second, @timezone], - [type.unit, type.timezone]) + assert_equal([:second, @time_zone], + [type.unit, type.time_zone]) end end diff --git a/ruby/red-arrow-format/test/test-writer.rb b/ruby/red-arrow-format/test/test-writer.rb index ccc09b3f6317..4b6a5cf93366 100644 --- a/ruby/red-arrow-format/test/test-writer.rb +++ b/ruby/red-arrow-format/test/test-writer.rb @@ -16,6 +16,14 @@ # under the License. module WriterTests + def convert_time_unit(red_arrow_time_unit) + if red_arrow_time_unit.nick == "second" + red_arrow_time_unit.nick.to_sym + else + :"#{red_arrow_time_unit.nick}second" + end + end + def convert_type(red_arrow_type) case red_arrow_type when Arrow::NullDataType @@ -47,19 +55,12 @@ def convert_type(red_arrow_type) when Arrow::Date64DataType ArrowFormat::Date64Type.singleton when Arrow::Time32DataType - case red_arrow_type.unit.nick - when "second" - ArrowFormat::Time32Type.new(:second) - when "milli" - ArrowFormat::Time32Type.new(:millisecond) - end + ArrowFormat::Time32Type.new(convert_time_unit(red_arrow_type.unit)) when Arrow::Time64DataType - case red_arrow_type.unit.nick - when "micro" - ArrowFormat::Time64Type.new(:microsecond) - when "nano" - ArrowFormat::Time64Type.new(:nanosecond) - end + ArrowFormat::Time64Type.new(convert_time_unit(red_arrow_type.unit)) + when Arrow::TimestampDataType + ArrowFormat::TimestampType.new(convert_time_unit(red_arrow_type.unit), + red_arrow_type.time_zone&.identifier) when Arrow::BinaryDataType ArrowFormat::BinaryType.singleton when Arrow::LargeBinaryDataType @@ -388,6 +389,134 @@ def test_write end end + sub_test_case("Timestamp(:second)") do + def setup(&block) + @timestamp_2019_11_17_15_09_11 = 1574003351 + @timestamp_2025_12_16_05_33_58 = 1765863238 + super(&block) + end + + def build_array + Arrow::TimestampArray.new(:second, + [ + @timestamp_2019_11_17_15_09_11, + nil, + @timestamp_2025_12_16_05_33_58, + ]) + end + + def test_write + assert_equal([ + Time.at(@timestamp_2019_11_17_15_09_11), + nil, + Time.at(@timestamp_2025_12_16_05_33_58), + ], + @values) + end + end + + sub_test_case("Timestamp(:millisecond)") do + def setup(&block) + @timestamp_2019_11_17_15_09_11 = 1574003351 * 1_000 + @timestamp_2025_12_16_05_33_58 = 1765863238 * 1_000 + super(&block) + end + + def build_array + Arrow::TimestampArray.new(:milli, + [ + @timestamp_2019_11_17_15_09_11, + nil, + @timestamp_2025_12_16_05_33_58, + ]) + end + + def test_write + assert_equal([ + Time.at(@timestamp_2019_11_17_15_09_11 / 1_000), + nil, + Time.at(@timestamp_2025_12_16_05_33_58 / 1_000), + ], + @values) + end + end + + sub_test_case("Timestamp(:microsecond)") do + def setup(&block) + @timestamp_2019_11_17_15_09_11 = 1574003351 * 1_000_000 + @timestamp_2025_12_16_05_33_58 = 1765863238 * 1_000_000 + super(&block) + end + + def build_array + Arrow::TimestampArray.new(:micro, + [ + @timestamp_2019_11_17_15_09_11, + nil, + @timestamp_2025_12_16_05_33_58, + ]) + end + + def test_write + assert_equal([ + Time.at(@timestamp_2019_11_17_15_09_11 / 1_000_000), + nil, + Time.at(@timestamp_2025_12_16_05_33_58 / 1_000_000), + ], + @values) + end + end + + sub_test_case("Timestamp(:nanosecond)") do + def setup(&block) + @timestamp_2019_11_17_15_09_11 = 1574003351 * 1_000_000_000 + @timestamp_2025_12_16_05_33_58 = 1765863238 * 1_000_000_000 + super(&block) + end + + def build_array + Arrow::TimestampArray.new(:nano, + [ + @timestamp_2019_11_17_15_09_11, + nil, + @timestamp_2025_12_16_05_33_58, + ]) + end + + def test_write + assert_equal([ + Time.at(@timestamp_2019_11_17_15_09_11 / 1_000_000_000), + nil, + Time.at(@timestamp_2025_12_16_05_33_58 / 1_000_000_000), + ], + @values) + end + end + + sub_test_case("Timestamp(time_zone)") do + def setup(&block) + @time_zone = "UTC" + @timestamp_2019_11_17_15_09_11 = 1574003351 + @timestamp_2025_12_16_05_33_58 = 1765863238 + super(&block) + end + + def build_array + data_type = Arrow::TimestampDataType.new(:second, @time_zone) + Arrow::TimestampArray.new(data_type, + [ + @timestamp_2019_11_17_15_09_11, + nil, + @timestamp_2025_12_16_05_33_58, + ]) + end + + def test_type + assert_equal([Arrow::TimeUnit::SECOND, @time_zone], + [@type.unit, @type.time_zone&.identifier]) + end + end + sub_test_case("Binary") do def build_array Arrow::BinaryArray.new(["Hello".b, nil, "World".b]) @@ -473,6 +602,7 @@ def setup end data = File.open(path, "rb", &:read).freeze table = Arrow::Table.load(Arrow::Buffer.new(data), format: :arrow) + @type = table.value.data_type @values = table.value.values end end @@ -502,6 +632,7 @@ def setup end data = File.open(path, "rb", &:read).freeze table = Arrow::Table.load(Arrow::Buffer.new(data), format: :arrows) + @type = table.value.data_type @values = table.value.values end end