diff --git a/ruby/red-arrow-format/lib/arrow-format/file-reader.rb b/ruby/red-arrow-format/lib/arrow-format/file-reader.rb index 03514a3cc2e0..05399180be1a 100644 --- a/ruby/red-arrow-format/lib/arrow-format/file-reader.rb +++ b/ruby/red-arrow-format/lib/arrow-format/file-reader.rb @@ -34,6 +34,7 @@ class FileReader FOOTER_SIZE_FORMAT = :s32 FOOTER_SIZE_SIZE = IO::Buffer.size_of(FOOTER_SIZE_FORMAT) + attr_reader :schema def initialize(input) case input when IO diff --git a/ruby/red-arrow-format/lib/arrow-format/type.rb b/ruby/red-arrow-format/lib/arrow-format/type.rb index bc2b3132857f..3c2d5f3ac9bd 100644 --- a/ruby/red-arrow-format/lib/arrow-format/type.rb +++ b/ruby/red-arrow-format/lib/arrow-format/type.rb @@ -16,6 +16,9 @@ module ArrowFormat class Type + def to_s + name + end end class NullType < Type @@ -412,6 +415,10 @@ def initialize(bit_width, unit) @unit = unit end + def to_s + "#{super}(#{unit})" + end + def to_flatbuffers fb_type = FB::Time::Data.new fb_type.bit_width = @bit_width @@ -477,6 +484,12 @@ def build_array(size, validity_buffer, values_buffer) TimestampArray.new(self, size, validity_buffer, values_buffer) end + def to_s + options = [@unit] + options << @time_zone if @time_zone + "#{super}(#{options.join(", ")})" + end + def to_flatbuffers fb_type = FB::Timestamp::Data.new fb_type.unit = FB::TimeUnit.try_convert(@unit.to_s.upcase) @@ -581,6 +594,10 @@ def build_array(size, validity_buffer, values_buffer) DurationArray.new(self, size, validity_buffer, values_buffer) end + def to_s + "#{super}(#{@unit})" + end + def to_flatbuffers fb_type = FB::Duration::Data.new fb_type.unit = FB::TimeUnit.try_convert(@unit.to_s.upcase) @@ -730,6 +747,10 @@ def build_array(size, validity_buffer, values_buffer) FixedSizeBinaryArray.new(self, size, validity_buffer, values_buffer) end + def to_s + "#{super}(#{@byte_width})" + end + def to_flatbuffers fb_type = FB::FixedSizeBinary::Data.new fb_type.byte_width = @byte_width @@ -746,6 +767,10 @@ def initialize(byte_width, precision, scale) @scale = scale end + def to_s + "#{name}(#{@precision}, #{@scale})" + end + def to_flatbuffers fb_type = FB::Decimal::Data.new fb_type.bit_width = @byte_width * 8 @@ -789,6 +814,10 @@ def initialize(child) super() @child = child end + + def to_s + "#{super}<#{child.name}: #{child.type}>" + end end class ListType < VariableSizeListType @@ -842,6 +871,13 @@ def build_array(size, validity_buffer, children) StructArray.new(self, size, validity_buffer, children) end + def to_s + fields = children.collect do |child| + "#{child.name}: #{child.type}" + end + "#{super}<#{fields.join(", ")}>" + end + def to_flatbuffers FB::Struct::Data.new end @@ -880,6 +916,11 @@ def build_array(size, validity_buffer, offsets_buffer, child) MapArray.new(self, size, validity_buffer, offsets_buffer, child) end + def to_s + key, value, = child.type.children + "#{name}<#{key.type}, #{value.type}>" + end + def to_flatbuffers FB::Map::Data.new end @@ -900,6 +941,13 @@ def resolve_type_index(type) @type_indexes[type] ||= @type_ids.index(type) end + def to_s + children = @children.collect.with_index do |child, i| + "#{child.name}: #{child.type}=#{@type_ids[i]}" + end + "#{super}<#{children.join(", ")}>" + end + def to_flatbuffers fb_type = FB::Union::Data.new fb_type.mode = FB::UnionMode.try_convert(@mode.to_s.capitalize) @@ -975,5 +1023,10 @@ def build_fb_field(fb_field, field) fb_field.type = @value_type.to_flatbuffers fb_field.dictionary = fb_dictionary_encoding end + + def to_s + "#{super}" + end end end diff --git a/ruby/red-arrow-format/test/test-reader.rb b/ruby/red-arrow-format/test/test-reader.rb index 10a2597f4a05..763e8737b5cd 100644 --- a/ruby/red-arrow-format/test/test-reader.rb +++ b/ruby/red-arrow-format/test/test-reader.rb @@ -16,534 +16,327 @@ # under the License. module ReaderTests - def read - @reader.collect do |record_batch| - record_batch.to_h.tap do |hash| - hash.each do |key, value| - hash[key] = value.to_a - end + def roundtrip(array) + Dir.mktmpdir do |tmp_dir| + table = Arrow::Table.new(value: array) + path = File.join(tmp_dir, "data.#{file_extension}") + table.save(path) + File.open(path, "rb") do |input| + reader = reader_class.new(input) + values = [] + reader.each do |record_batch| + values.concat(record_batch.columns[0].to_a) + end + [reader.schema.fields[0].type, values] end + ensure + GC.start end end - def type - @type ||= @reader.first.schema.fields[0].type + def test_null + type, values = roundtrip(Arrow::NullArray.new(3)) + assert_equal(["Null", [nil, nil, nil]], + [type.to_s, values]) end - class << self - def included(base) - base.class_eval do - sub_test_case("Null") do - def build_array - Arrow::NullArray.new(3) - end - - def test_read - assert_equal([{"value" => [nil, nil, nil]}], - read) - end - end - - sub_test_case("Boolean") do - def build_array - Arrow::BooleanArray.new([true, nil, false]) - end - - def test_read - assert_equal([{"value" => [true, nil, false]}], - read) - end - end - - sub_test_case("Int8") do - def build_array - Arrow::Int8Array.new([-128, nil, 127]) - end - - def test_read - assert_equal([{"value" => [-128, nil, 127]}], - read) - end - end - - sub_test_case("UInt8") do - def build_array - Arrow::UInt8Array.new([0, nil, 255]) - end - - def test_read - assert_equal([{"value" => [0, nil, 255]}], - read) - end - end - - sub_test_case("Int16") do - def build_array - Arrow::Int16Array.new([-32768, nil, 32767]) - end - - def test_read - assert_equal([{"value" => [-32768, nil, 32767]}], - read) - end - end + def test_boolean + type, values = roundtrip(Arrow::BooleanArray.new([true, nil, false])) + assert_equal(["Boolean", [true, nil, false]], + [type.to_s, values]) + end - sub_test_case("UInt16") do - def build_array - Arrow::UInt16Array.new([0, nil, 65535]) - end + def test_int8 + type, values = roundtrip(Arrow::Int8Array.new([-128, nil, 127])) + assert_equal(["Int8", [-128, nil, 127]], + [type.to_s, values]) + end - def test_read - assert_equal([{"value" => [0, nil, 65535]}], - read) - end - end + def test_uint8 + type, values = roundtrip(Arrow::UInt8Array.new([0, nil, 255])) + assert_equal(["UInt8", [0, nil, 255]], + [type.to_s, values]) + end - sub_test_case("Int32") do - def build_array - Arrow::Int32Array.new([-2147483648, nil, 2147483647]) - end + def test_int16 + type, values = roundtrip(Arrow::Int16Array.new([-32768, nil, 32767])) + assert_equal(["Int16", [-32768, nil, 32767]], + [type.to_s, values]) + end - def test_read - assert_equal([{"value" => [-2147483648, nil, 2147483647]}], - read) - end - end + def test_uint16 + type, values = roundtrip(Arrow::UInt16Array.new([0, nil, 65535])) + assert_equal(["UInt16", [0, nil, 65535]], + [type.to_s, values]) + end - sub_test_case("UInt32") do - def build_array - Arrow::UInt32Array.new([0, nil, 4294967295]) - end + def test_int32 + array = Arrow::Int32Array.new([-2147483648, nil, 2147483647]) + type, values = roundtrip(array) + assert_equal(["Int32", [-2147483648, nil, 2147483647]], + [type.to_s, values]) + end - def test_read - assert_equal([{"value" => [0, nil, 4294967295]}], - read) - end - end + def test_uint32 + array = Arrow::UInt32Array.new([0, nil, 4294967295]) + type, values = roundtrip(array) + assert_equal(["UInt32", [0, nil, 4294967295]], + [type.to_s, values]) + end - sub_test_case("Int64") do - def build_array - Arrow::Int64Array.new([ + def test_int64 + array = Arrow::Int64Array.new([ -9223372036854775808, nil, 9223372036854775807 ]) - end - - def test_read - assert_equal([ - { - "value" => [ - -9223372036854775808, - nil, - 9223372036854775807 - ] - } - ], - read) - end - end - - sub_test_case("UInt64") do - def build_array - Arrow::UInt64Array.new([0, nil, 18446744073709551615]) - end - - def test_read - assert_equal([{"value" => [0, nil, 18446744073709551615]}], - read) - end - end - - sub_test_case("Float32") do - def build_array - Arrow::FloatArray.new([-0.5, nil, 0.5]) - end - - def test_read - assert_equal([{"value" => [-0.5, nil, 0.5]}], - read) - end - end + type, values = roundtrip(array) + assert_equal(["Int64", [-9223372036854775808, nil, 9223372036854775807]], + [type.to_s, values]) + end - sub_test_case("Float64") do - def build_array - Arrow::DoubleArray.new([-0.5, nil, 0.5]) - end + def test_uint64 + array = Arrow::UInt64Array.new([0, nil, 18446744073709551615]) + type, values = roundtrip(array) + assert_equal(["UInt64", [0, nil, 18446744073709551615]], + [type.to_s, values]) + end - def test_read - assert_equal([{"value" => [-0.5, nil, 0.5]}], - read) - end - end + def test_float32 + type, values = roundtrip(Arrow::FloatArray.new([-0.5, nil, 0.5])) + assert_equal(["Float32", [-0.5, nil, 0.5]], + [type.to_s, values]) + end - sub_test_case("Date32") do - def setup(&block) - @date_2017_08_28 = 17406 - @date_2025_12_09 = 20431 - super(&block) - end - - def build_array - Arrow::Date32Array.new([@date_2017_08_28, nil, @date_2025_12_09]) - end - - def test_read - assert_equal([ - { - "value" => [ - @date_2017_08_28, - nil, - @date_2025_12_09, - ], - }, - ], - read) - end - end + def test_float64 + type, values = roundtrip(Arrow::DoubleArray.new([-0.5, nil, 0.5])) + assert_equal(["Float64", [-0.5, nil, 0.5]], + [type.to_s, values]) + end - sub_test_case("Date64") do - def setup(&block) - @date_2017_08_28_00_00_00 = 1503878400000 - @date_2025_12_10_00_00_00 = 1765324800000 - super(&block) - end + def test_date32 + date_2017_08_28 = 17406 + date_2025_12_09 = 20431 + array = Arrow::Date32Array.new([date_2017_08_28, nil, date_2025_12_09]) + type, values = roundtrip(array) + assert_equal(["Date32", [date_2017_08_28, nil, date_2025_12_09]], + [type.to_s, values]) + end - def build_array - Arrow::Date64Array.new([ - @date_2017_08_28_00_00_00, + def test_date64 + date_2017_08_28_00_00_00 = 1503878400000 + date_2025_12_10_00_00_00 = 1765324800000 + array = Arrow::Date64Array.new([ + date_2017_08_28_00_00_00, nil, - @date_2025_12_10_00_00_00, + date_2025_12_10_00_00_00, ]) - end - - def test_read - assert_equal([ - { - "value" => [ - @date_2017_08_28_00_00_00, - nil, - @date_2025_12_10_00_00_00, - ], - }, - ], - read) - end - end - - sub_test_case("Time32(:second)") do - def setup(&block) - @time_00_00_10 = 10 - @time_00_01_10 = 60 + 10 - super(&block) - end - - def build_array - Arrow::Time32Array.new(:second, - [@time_00_00_10, nil, @time_00_01_10]) - end - - def test_read - assert_equal([ - { - "value" => [ - @time_00_00_10, - nil, - @time_00_01_10, - ], - }, - ], - read) - end - - def test_type - assert_equal(:second, type.unit) - end - end + type, values = roundtrip(array) + assert_equal([ + "Date64", + [date_2017_08_28_00_00_00, nil, date_2025_12_10_00_00_00], + ], + [type.to_s, values]) + end - sub_test_case("Time32(:millisecond)") do - def setup(&block) - @time_00_00_10_000 = 10 * 1000 - @time_00_01_10_000 = (60 + 10) * 1000 - super(&block) - end + def test_time32_second + time_00_00_10 = 10 + time_00_01_10 = 60 + 10 + array = Arrow::Time32Array.new(:second, + [time_00_00_10, nil, time_00_01_10]) + type, values = roundtrip(array) + assert_equal(["Time32(second)", [time_00_00_10, nil, time_00_01_10]], + [type.to_s, values]) + end - def build_array - Arrow::Time32Array.new(:milli, + def test_time32_millisecond + time_00_00_10_000 = 10 * 1000 + time_00_01_10_000 = (60 + 10) * 1000 + array = Arrow::Time32Array.new(:milli, [ - @time_00_00_10_000, + time_00_00_10_000, nil, - @time_00_01_10_000, + time_00_01_10_000, ]) - end - - def test_read - assert_equal([ - { - "value" => [ - @time_00_00_10_000, - nil, - @time_00_01_10_000, - ], - }, - ], - read) - end - - def test_type - assert_equal(:millisecond, type.unit) - end - end - - sub_test_case("Time64(:microsecond)") do - def setup(&block) - @time_00_00_10_000_000 = 10 * 1_000_000 - @time_00_01_10_000_000 = (60 + 10) * 1_000_000 - super(&block) - end + type, values = roundtrip(array) + assert_equal([ + "Time32(millisecond)", + [time_00_00_10_000, nil, time_00_01_10_000], + ], + [type.to_s, values]) + end - def build_array - Arrow::Time64Array.new(:micro, + def test_time64_microsecond + time_00_00_10_000_000 = 10 * 1_000_000 + time_00_01_10_000_000 = (60 + 10) * 1_000_000 + array = Arrow::Time64Array.new(:micro, [ - @time_00_00_10_000_000, + time_00_00_10_000_000, nil, - @time_00_01_10_000_000, + time_00_01_10_000_000, ]) - end - - def test_read - assert_equal([ - { - "value" => [ - @time_00_00_10_000_000, - nil, - @time_00_01_10_000_000, - ], - }, - ], - read) - end - - def test_type - assert_equal(:microsecond, type.unit) - end - end - - sub_test_case("Time64(:nanosecond)") do - def setup(&block) - @time_00_00_10_000_000_000 = 10 * 1_000_000_000 - @time_00_01_10_000_000_000 = (60 + 10) * 1_000_000_000 - super(&block) - end + type, values = roundtrip(array) + assert_equal([ + "Time64(microsecond)", + [time_00_00_10_000_000, nil, time_00_01_10_000_000], + ], + [type.to_s, values]) + end - def build_array - Arrow::Time64Array.new(:nano, + def test_time64_nanosecond + time_00_00_10_000_000_000 = 10 * 1_000_000_000 + time_00_01_10_000_000_000 = (60 + 10) * 1_000_000_000 + array = Arrow::Time64Array.new(:nano, [ - @time_00_00_10_000_000_000, + time_00_00_10_000_000_000, nil, - @time_00_01_10_000_000_000, + time_00_01_10_000_000_000, ]) - end - - def test_read - assert_equal([ - { - "value" => [ - @time_00_00_10_000_000_000, - nil, - @time_00_01_10_000_000_000, - ], - }, - ], - read) - end - - def test_type - assert_equal(:nanosecond, type.unit) - end - end - - sub_test_case("Timestamp(:second)") do - def setup(&block) - @timestamp_2019_11_17_15_09_11 = 1574003351 - @timestamp_2025_12_16_05_33_58 = 1765863238 - super(&block) - end + type, values = roundtrip(array) + assert_equal([ + "Time64(nanosecond)", + [ + time_00_00_10_000_000_000, + nil, + time_00_01_10_000_000_000, + ], + ], + [type.to_s, values]) + end - def build_array - Arrow::TimestampArray.new(:second, + def test_timestamp_second + timestamp_2019_11_17_15_09_11 = 1574003351 + timestamp_2025_12_16_05_33_58 = 1765863238 + array = Arrow::TimestampArray.new(:second, [ - @timestamp_2019_11_17_15_09_11, + timestamp_2019_11_17_15_09_11, nil, - @timestamp_2025_12_16_05_33_58, + timestamp_2025_12_16_05_33_58, ]) - end - - def test_read - assert_equal([ - { - "value" => [ - @timestamp_2019_11_17_15_09_11, - nil, - @timestamp_2025_12_16_05_33_58, - ], - }, - ], - read) - end - end - - sub_test_case("Timestamp(:millisecond)") do - def setup(&block) - @timestamp_2019_11_17_15_09_11 = 1574003351 * 1_000 - @timestamp_2025_12_16_05_33_58 = 1765863238 * 1_000 - super(&block) - end + type, values = roundtrip(array) + assert_equal([ + "Timestamp(second)", + [ + timestamp_2019_11_17_15_09_11, + nil, + timestamp_2025_12_16_05_33_58, + ], + ], + [type.to_s, values]) + end - def build_array - Arrow::TimestampArray.new(:milli, + def test_timestamp_millisecond + timestamp_2019_11_17_15_09_11 = 1574003351 * 1_000 + timestamp_2025_12_16_05_33_58 = 1765863238 * 1_000 + array = Arrow::TimestampArray.new(:milli, [ - @timestamp_2019_11_17_15_09_11, + timestamp_2019_11_17_15_09_11, nil, - @timestamp_2025_12_16_05_33_58, + timestamp_2025_12_16_05_33_58, ]) - end - - def test_read - assert_equal([ - { - "value" => [ - @timestamp_2019_11_17_15_09_11, - nil, - @timestamp_2025_12_16_05_33_58, - ], - }, - ], - read) - end - end - - sub_test_case("Timestamp(:microsecond)") do - def setup(&block) - @timestamp_2019_11_17_15_09_11 = 1574003351 * 1_000_000 - @timestamp_2025_12_16_05_33_58 = 1765863238 * 1_000_000 - super(&block) - end + type, values = roundtrip(array) + assert_equal([ + "Timestamp(millisecond)", + [ + timestamp_2019_11_17_15_09_11, + nil, + timestamp_2025_12_16_05_33_58, + ], + ], + [type.to_s, values]) + end - def build_array - Arrow::TimestampArray.new(:micro, + def test_timestamp_microsecond + timestamp_2019_11_17_15_09_11 = 1574003351 * 1_000_000 + timestamp_2025_12_16_05_33_58 = 1765863238 * 1_000_000 + array = Arrow::TimestampArray.new(:micro, [ - @timestamp_2019_11_17_15_09_11, + timestamp_2019_11_17_15_09_11, nil, - @timestamp_2025_12_16_05_33_58, + timestamp_2025_12_16_05_33_58, ]) - end - - def test_read - assert_equal([ - { - "value" => [ - @timestamp_2019_11_17_15_09_11, - nil, - @timestamp_2025_12_16_05_33_58, - ], - }, - ], - read) - end - end - - sub_test_case("Timestamp(:nanosecond)") do - def setup(&block) - @timestamp_2019_11_17_15_09_11 = 1574003351 * 1_000_000_000 - @timestamp_2025_12_16_05_33_58 = 1765863238 * 1_000_000_000 - super(&block) - end + type, values = roundtrip(array) + assert_equal([ + "Timestamp(microsecond)", + [ + timestamp_2019_11_17_15_09_11, + nil, + timestamp_2025_12_16_05_33_58, + ], + ], + [type.to_s, values]) + end - def build_array - Arrow::TimestampArray.new(:nano, + def test_timestamp_nanosecond + timestamp_2019_11_17_15_09_11 = 1574003351 * 1_000_000_000 + timestamp_2025_12_16_05_33_58 = 1765863238 * 1_000_000_000 + array = Arrow::TimestampArray.new(:nano, [ - @timestamp_2019_11_17_15_09_11, + timestamp_2019_11_17_15_09_11, nil, - @timestamp_2025_12_16_05_33_58, + timestamp_2025_12_16_05_33_58, ]) - end - - def test_read - assert_equal([ - { - "value" => [ - @timestamp_2019_11_17_15_09_11, - nil, - @timestamp_2025_12_16_05_33_58, - ], - }, - ], - read) - end - end + type, values = roundtrip(array) + assert_equal([ + "Timestamp(nanosecond)", + [ + timestamp_2019_11_17_15_09_11, + nil, + timestamp_2025_12_16_05_33_58, + ], + ], + [type.to_s, values]) + end - sub_test_case("Timestamp(time_zone)") do - def setup(&block) - @time_zone = "UTC" - @timestamp_2019_11_17_15_09_11 = 1574003351 - @timestamp_2025_12_16_05_33_58 = 1765863238 - super(&block) - end - - def build_array - data_type = Arrow::TimestampDataType.new(:second, @time_zone) - Arrow::TimestampArray.new(data_type, + def test_timestamp_time_zone + time_zone = "UTC" + timestamp_2019_11_17_15_09_11 = 1574003351 + timestamp_2025_12_16_05_33_58 = 1765863238 + data_type = Arrow::TimestampDataType.new(:second, time_zone) + array = Arrow::TimestampArray.new(data_type, [ - @timestamp_2019_11_17_15_09_11, + timestamp_2019_11_17_15_09_11, nil, - @timestamp_2025_12_16_05_33_58, + timestamp_2025_12_16_05_33_58, ]) - end - - def test_type - assert_equal([:second, @time_zone], - [type.unit, type.time_zone]) - end - end - - sub_test_case("YearMonthInterval") do - def build_array - Arrow::MonthIntervalArray.new([0, nil, 100]) - end + type, values = roundtrip(array) + assert_equal([ + "Timestamp(second, #{time_zone})", + [ + timestamp_2019_11_17_15_09_11, + nil, + timestamp_2025_12_16_05_33_58, + ], + ], + [type.to_s, values]) + end - def test_read - assert_equal([{"value" => [0, nil, 100]}], - read) - end - end + def test_year_month_interval + type, values = roundtrip(Arrow::MonthIntervalArray.new([0, nil, 100])) + assert_equal(["YearMonthInterval", [0, nil, 100]], + [type.to_s, values]) + end - sub_test_case("DayTimeInterval") do - def build_array - Arrow::DayTimeIntervalArray.new([ + def test_day_time_interval + array = Arrow::DayTimeIntervalArray.new([ {day: 1, millisecond: 100}, nil, {day: 3, millisecond: 300}, ]) - end - - def test_read - assert_equal([ - { - "value" => [ - [1, 100], - nil, - [3, 300], - ], - }, - ], - read) - end - end + type, values = roundtrip(array) + assert_equal([ + "DayTimeInterval", + [ + [1, 100], + nil, + [3, 300], + ], + ], + [type.to_s, values]) + end - sub_test_case("MonthDayNanoInterval") do - def build_array - Arrow::MonthDayNanoIntervalArray.new([ + def test_month_day_nano_interval + array = Arrow::MonthDayNanoIntervalArray.new([ { month: 1, day: 1, @@ -556,381 +349,277 @@ def build_array nanosecond: 300, }, ]) - end - - def test_read - assert_equal([ - { - "value" => [ - [1, 1, 100], - nil, - [3, 3, 300], - ], - }, - ], - read) - end - end - - sub_test_case("Duration(:second)") do - def build_array - Arrow::DurationArray.new(:second, [0, nil, 100]) - end - - def test_read - assert_equal([{"value" => [0, nil, 100]}], - read) - end - - def test_type - assert_equal(:second, type.unit) - end - end - - sub_test_case("Duration(:millisecond)") do - def build_array - Arrow::DurationArray.new(:milli, [0, nil, 100_000]) - end - - def test_read - assert_equal([{"value" => [0, nil, 100_000]}], - read) - end - - def test_type - assert_equal(:millisecond, type.unit) - end - end - - sub_test_case("Duration(:microsecond)") do - def build_array - Arrow::DurationArray.new(:micro, [0, nil, 100_000_000]) - end - - def test_read - assert_equal([{"value" => [0, nil, 100_000_000]}], - read) - end - - def test_type - assert_equal(:microsecond, type.unit) - end - end - - sub_test_case("Duration(:nanosecond)") do - def build_array - Arrow::DurationArray.new(:nano, [0, nil, 100_000_000_000]) - end - - def test_read - assert_equal([{"value" => [0, nil, 100_000_000_000]}], - read) - end - - def test_type - assert_equal(:nanosecond, type.unit) - end - end + type, values = roundtrip(array) + assert_equal([ + "MonthDayNanoInterval", + [ + [1, 1, 100], + nil, + [3, 3, 300], + ], + ], + [type.to_s, values]) + end - sub_test_case("Binary") do - def build_array - Arrow::BinaryArray.new(["Hello".b, nil, "World".b]) - end + def test_duration_second + type, values = roundtrip(Arrow::DurationArray.new(:second, [0, nil, 100])) + assert_equal(["Duration(second)", [0, nil, 100]], + [type.to_s, values]) + end - def test_read - assert_equal([{"value" => ["Hello".b, nil, "World".b]}], - read) - end - end + def test_duration_millisecond + array = Arrow::DurationArray.new(:milli, [0, nil, 100_000]) + type, values = roundtrip(array) + assert_equal(["Duration(millisecond)", [0, nil, 100_000]], + [type.to_s, values]) + end - sub_test_case("LargeBinary") do - def build_array - Arrow::LargeBinaryArray.new(["Hello".b, nil, "World".b]) - end + def test_duration_microsecond + array = Arrow::DurationArray.new(:micro, [0, nil, 100_000_000]) + type, values = roundtrip(array) + assert_equal(["Duration(microsecond)", [0, nil, 100_000_000]], + [type.to_s, values]) + end - def test_read - assert_equal([{"value" => ["Hello".b, nil, "World".b]}], - read) - end - end + def test_duration_nanosecond + array = Arrow::DurationArray.new(:nano, [0, nil, 100_000_000_000]) + type, values = roundtrip(array) + assert_equal(["Duration(nanosecond)", [0, nil, 100_000_000_000]], + [type.to_s, values]) + end - sub_test_case("UTF8") do - def build_array - Arrow::StringArray.new(["Hello", nil, "World"]) - end + def test_binary + array = Arrow::BinaryArray.new(["Hello".b, nil, "World".b]) + type, values = roundtrip(array) + assert_equal(["Binary", ["Hello".b, nil, "World".b]], + [type.to_s, values]) + end - def test_read - assert_equal([{"value" => ["Hello", nil, "World"]}], - read) - end - end + def test_large_binary + array = Arrow::LargeBinaryArray.new(["Hello".b, nil, "World".b]) + type, values = roundtrip(array) + assert_equal(["LargeBinary", ["Hello".b, nil, "World".b]], + [type.to_s, values]) + end - sub_test_case("LargeUTF8") do - def build_array - Arrow::LargeStringArray.new(["Hello", nil, "World"]) - end + def test_utf8 + array = Arrow::StringArray.new(["Hello", nil, "World"]) + type, values = roundtrip(array) + assert_equal(["UTF8", ["Hello", nil, "World"]], + [type.to_s, values]) + end - def test_read - assert_equal([{"value" => ["Hello", nil, "World"]}], - read) - end - end + def test_large_utf8 + array = Arrow::LargeStringArray.new(["Hello", nil, "World"]) + type, values = roundtrip(array) + assert_equal(["LargeUTF8", ["Hello", nil, "World"]], + [type.to_s, values]) + end - sub_test_case("FixedSizeBinary") do - def build_array - data_type = Arrow::FixedSizeBinaryDataType.new(4) - Arrow::FixedSizeBinaryArray.new(data_type, + def test_fixed_size_binary + data_type = Arrow::FixedSizeBinaryDataType.new(4) + array = Arrow::FixedSizeBinaryArray.new(data_type, ["0124".b, nil, "abcd".b]) - end - - def test_read - assert_equal([{"value" => ["0124".b, nil, "abcd".b]}], - read) - end - end + type, values = roundtrip(array) + assert_equal(["FixedSizeBinary(4)", ["0124".b, nil, "abcd".b]], + [type.to_s, values]) + end - sub_test_case("Decimal128") do - def build_array - @positive_small = "1.200" - @positive_large = ("1234567890" * 3) + "12345.678" - @negative_small = "-1.200" - @negative_large = "-" + ("1234567890" * 3) + "12345.678" - Arrow::Decimal128Array.new({precision: 38, scale: 3}, + def test_decimal128 + positive_small = "1.200" + positive_large = ("1234567890" * 3) + "12345.678" + negative_small = "-1.200" + negative_large = "-" + ("1234567890" * 3) + "12345.678" + array = Arrow::Decimal128Array.new({precision: 38, scale: 3}, [ - @positive_large, - @positive_small, + positive_large, + positive_small, nil, - @negative_small, - @negative_large, + negative_small, + negative_large, ]) - end - - def test_read - assert_equal([ - { - "value" => [ - BigDecimal(@positive_large), - BigDecimal(@positive_small), - nil, - BigDecimal(@negative_small), - BigDecimal(@negative_large), - ], - }, - ], - read) - end - end + type, values = roundtrip(array) + assert_equal([ + "Decimal128(38, 3)", + [ + BigDecimal(positive_large), + BigDecimal(positive_small), + nil, + BigDecimal(negative_small), + BigDecimal(negative_large), + ], + ], + [type.to_s, values]) + end - sub_test_case("Decimal256") do - def build_array - @positive_small = "1.200" - @positive_large = ("1234567890" * 7) + "123.456" - @negative_small = "-1.200" - @negative_large = "-" + ("1234567890" * 7) + "123.456" - Arrow::Decimal256Array.new({precision: 76, scale: 3}, + def test_decimal256 + positive_small = "1.200" + positive_large = ("1234567890" * 7) + "123.456" + negative_small = "-1.200" + negative_large = "-" + ("1234567890" * 7) + "123.456" + array = Arrow::Decimal256Array.new({precision: 76, scale: 3}, [ - @positive_large, - @positive_small, + positive_large, + positive_small, nil, - @negative_small, - @negative_large, + negative_small, + negative_large, ]) - end - - def test_read - assert_equal([ - { - "value" => [ - BigDecimal(@positive_large), - BigDecimal(@positive_small), - nil, - BigDecimal(@negative_small), - BigDecimal(@negative_large), - ], - }, - ], - read) - end - end - - sub_test_case("List") do - def build_array - data_type = Arrow::ListDataType.new(name: "count", type: :int8) - Arrow::ListArray.new(data_type, [[-128, 127], nil, [-1, 0, 1]]) - end + type, values = roundtrip(array) + assert_equal([ + "Decimal256(76, 3)", + [ + BigDecimal(positive_large), + BigDecimal(positive_small), + nil, + BigDecimal(negative_small), + BigDecimal(negative_large), + ], + ], + [type.to_s, values]) + end - def test_read - assert_equal([{"value" => [[-128, 127], nil, [-1, 0, 1]]}], - read) - end - end + def test_list + data_type = Arrow::ListDataType.new(name: "count", type: :int8) + array = Arrow::ListArray.new(data_type, [[-128, 127], nil, [-1, 0, 1]]) + type, values = roundtrip(array) + assert_equal(["List", [[-128, 127], nil, [-1, 0, 1]]], + [type.to_s, values]) + end - sub_test_case("LargeList") do - def build_array - data_type = Arrow::LargeListDataType.new(name: "count", - type: :int8) - Arrow::LargeListArray.new(data_type, + def test_large_list + data_type = Arrow::LargeListDataType.new(name: "count", + type: :int8) + array = Arrow::LargeListArray.new(data_type, [[-128, 127], nil, [-1, 0, 1]]) - end - - def test_read - assert_equal([ - { - "value" => [ - [-128, 127], - nil, - [-1, 0, 1], - ], - }, - ], - read) - end - end + type, values = roundtrip(array) + assert_equal([ + "LargeList", + [ + [-128, 127], + nil, + [-1, 0, 1], + ], + ], + [type.to_s, values]) + end - sub_test_case("Struct") do - def build_array - data_type = Arrow::StructDataType.new(count: :int8, - visible: :boolean) - Arrow::StructArray.new(data_type, + def test_struct + data_type = Arrow::StructDataType.new(count: :int8, + visible: :boolean) + array = Arrow::StructArray.new(data_type, [[-128, nil], nil, [nil, true]]) - end - - def test_read - assert_equal([ - { - "value" => [ - [-128, nil], - nil, - [nil, true], - ], - }, - ], - read) - end - end + type, values = roundtrip(array) + assert_equal([ + "Struct", + [ + [-128, nil], + nil, + [nil, true], + ], + ], + [type.to_s, values]) + end - sub_test_case("DenseUnion") do - def build_array - fields = [ - Arrow::Field.new("number", :int8), - Arrow::Field.new("text", :string), - ] - type_ids = [11, 13] - data_type = Arrow::DenseUnionDataType.new(fields, type_ids) - types = Arrow::Int8Array.new([11, 13, 11, 13, 13]) - value_offsets = Arrow::Int32Array.new([0, 0, 1, 1, 2]) - children = [ - Arrow::Int8Array.new([1, nil]), - Arrow::StringArray.new(["a", "b", "c"]) - ] - Arrow::DenseUnionArray.new(data_type, + def test_dense_union + fields = [ + Arrow::Field.new("number", :int8), + Arrow::Field.new("text", :string), + ] + type_ids = [11, 13] + data_type = Arrow::DenseUnionDataType.new(fields, type_ids) + types = Arrow::Int8Array.new([11, 13, 11, 13, 13]) + value_offsets = Arrow::Int32Array.new([0, 0, 1, 1, 2]) + children = [ + Arrow::Int8Array.new([1, nil]), + Arrow::StringArray.new(["a", "b", "c"]) + ] + array = Arrow::DenseUnionArray.new(data_type, types, value_offsets, children) - end - - def test_read - assert_equal([{"value" => [1, "a", nil, "b", "c"]}], - read) - end - end + type, values = roundtrip(array) + assert_equal([ + "DenseUnion", + [1, "a", nil, "b", "c"], + ], + [type.to_s, values]) + end - sub_test_case("SparseUnion") do - def build_array - fields = [ - Arrow::Field.new("number", :int8), - Arrow::Field.new("text", :string), - ] - type_ids = [11, 13] - data_type = Arrow::SparseUnionDataType.new(fields, type_ids) - types = Arrow::Int8Array.new([11, 13, 11, 13, 11]) - children = [ - Arrow::Int8Array.new([1, nil, nil, nil, 5]), - Arrow::StringArray.new([nil, "b", nil, "d", nil]) - ] - Arrow::SparseUnionArray.new(data_type, types, children) - end - - def test_read - assert_equal([{"value" => [1, "b", nil, "d", 5]}], - read) - end - end + def test_sparse_union + fields = [ + Arrow::Field.new("number", :int8), + Arrow::Field.new("text", :string), + ] + type_ids = [11, 13] + data_type = Arrow::SparseUnionDataType.new(fields, type_ids) + types = Arrow::Int8Array.new([11, 13, 11, 13, 11]) + children = [ + Arrow::Int8Array.new([1, nil, nil, nil, 5]), + Arrow::StringArray.new([nil, "b", nil, "d", nil]) + ] + array = Arrow::SparseUnionArray.new(data_type, types, children) + type, values = roundtrip(array) + assert_equal([ + "SparseUnion", + [1, "b", nil, "d", 5], + ], + [type.to_s, values]) + end - sub_test_case("Map") do - def build_array - data_type = Arrow::MapDataType.new(:string, :int8) - Arrow::MapArray.new(data_type, + def test_map + data_type = Arrow::MapDataType.new(:string, :int8) + array = Arrow::MapArray.new(data_type, [ {"a" => -128, "b" => 127}, nil, {"c" => nil}, ]) - end - - def test_read - assert_equal([ - { - "value" => [ - {"a" => -128, "b" => 127}, - nil, - {"c" => nil}, - ], - }, - ], - read) - end - end + type, values = roundtrip(array) + assert_equal([ + "Map", + [ + {"a" => -128, "b" => 127}, + nil, + {"c" => nil}, + ], + ], + [type.to_s, values]) + end - sub_test_case("Dictionary") do - def build_array - values = ["a", "b", "c", nil, "a"] - string_array = Arrow::StringArray.new(values) - string_array.dictionary_encode - end - - def test_read - assert_equal([{"value" => ["a", "b", "c", nil, "a"]}], - read) - end - end - end - end + def test_dictionary + values = ["a", "b", "c", nil, "a"] + string_array = Arrow::StringArray.new(values) + array = string_array.dictionary_encode + type, values = roundtrip(array) + assert_equal([ + "Dictionary", + ["a", "b", "c", nil, "a"], + ], + [type.to_s, values]) end end class TestFileReader < Test::Unit::TestCase include ReaderTests - def setup - Dir.mktmpdir do |tmp_dir| - table = Arrow::Table.new(value: build_array) - @path = File.join(tmp_dir, "data.arrow") - table.save(@path) - File.open(@path, "rb") do |input| - @reader = ArrowFormat::FileReader.new(input) - yield - @reader = nil - end - GC.start - end + def file_extension + "arrow" + end + + def reader_class + ArrowFormat::FileReader end end class TestStreamingReader < Test::Unit::TestCase include ReaderTests - def setup - Dir.mktmpdir do |tmp_dir| - table = Arrow::Table.new(value: build_array) - @path = File.join(tmp_dir, "data.arrows") - table.save(@path) - File.open(@path, "rb") do |input| - @reader = ArrowFormat::StreamingReader.new(input) - yield - @reader = nil - end - GC.start - end + def file_extension + "arrows" + end + + def reader_class + ArrowFormat::StreamingReader end end