diff --git a/ruby/red-arrow-format/lib/arrow-format/type.rb b/ruby/red-arrow-format/lib/arrow-format/type.rb index c648e5b6313..5f03012802f 100644 --- a/ruby/red-arrow-format/lib/arrow-format/type.rb +++ b/ruby/red-arrow-format/lib/arrow-format/type.rb @@ -555,6 +555,10 @@ def build_array(size, validity_buffer, offsets_buffer, values_buffer) offsets_buffer, values_buffer) end + + def to_flatbuffers + FB::LargeUtf8::Data.new + end end class FixedSizeBinaryType < Type diff --git a/ruby/red-arrow-format/test/test-writer.rb b/ruby/red-arrow-format/test/test-writer.rb index 24a49b3777f..2b5bcef13d7 100644 --- a/ruby/red-arrow-format/test/test-writer.rb +++ b/ruby/red-arrow-format/test/test-writer.rb @@ -48,6 +48,8 @@ def convert_type(red_arrow_type) ArrowFormat::LargeBinaryType.singleton when Arrow::StringDataType ArrowFormat::UTF8Type.singleton + when Arrow::LargeStringDataType + ArrowFormat::LargeUTF8Type.singleton else raise "Unsupported type: #{red_arrow_type.inspect}" end @@ -252,6 +254,17 @@ def test_write @values) end end + + sub_test_case("LargeString") do + def build_array + Arrow::LargeStringArray.new(["Hello", nil, "World"]) + end + + def test_write + assert_equal(["Hello", nil, "World"], + @values) + end + end end end end diff --git a/ruby/red-arrow/ext/arrow/converters.hpp b/ruby/red-arrow/ext/arrow/converters.hpp index 9525700eba9..6a1ceb20b84 100644 --- a/ruby/red-arrow/ext/arrow/converters.hpp +++ b/ruby/red-arrow/ext/arrow/converters.hpp @@ -175,6 +175,14 @@ namespace red_arrow { length); } + inline VALUE convert(const arrow::LargeStringArray& array, + const int64_t i) { + int64_t length; + const auto value = array.GetValue(i, &length); + return rb_utf8_str_new(reinterpret_cast(value), + length); + } + inline VALUE convert(const arrow::FixedSizeBinaryArray& array, const int64_t i) { return rb_enc_str_new(reinterpret_cast(array.Value(i)), diff --git a/ruby/red-arrow/ext/arrow/raw-records.cpp b/ruby/red-arrow/ext/arrow/raw-records.cpp index 25a95379efc..67f1dab13ed 100644 --- a/ruby/red-arrow/ext/arrow/raw-records.cpp +++ b/ruby/red-arrow/ext/arrow/raw-records.cpp @@ -90,6 +90,7 @@ namespace red_arrow { VISIT(Binary) VISIT(LargeBinary) VISIT(String) + VISIT(LargeString) VISIT(FixedSizeBinary) VISIT(Date32) VISIT(Date64) @@ -227,6 +228,7 @@ namespace red_arrow { VISIT(Binary) VISIT(LargeBinary) VISIT(String) + VISIT(LargeString) VISIT(FixedSizeBinary) VISIT(Date32) VISIT(Date64) diff --git a/ruby/red-arrow/ext/arrow/values.cpp b/ruby/red-arrow/ext/arrow/values.cpp index 783cdb3d7d3..9a26baf1d59 100644 --- a/ruby/red-arrow/ext/arrow/values.cpp +++ b/ruby/red-arrow/ext/arrow/values.cpp @@ -71,6 +71,7 @@ namespace red_arrow { VISIT(Binary) VISIT(LargeBinary) VISIT(String) + VISIT(LargeString) VISIT(FixedSizeBinary) VISIT(Date32) VISIT(Date64) diff --git a/ruby/red-arrow/test/raw-records/test-basic-arrays.rb b/ruby/red-arrow/test/raw-records/test-basic-arrays.rb index f09b2e8b714..1c21a493c55 100644 --- a/ruby/red-arrow/test/raw-records/test-basic-arrays.rb +++ b/ruby/red-arrow/test/raw-records/test-basic-arrays.rb @@ -177,6 +177,16 @@ def test_string assert_equal(records, actual_records(target)) end + def test_large_string + records = [ + ["Ruby"], + [nil], + ["\u3042"], # U+3042 HIRAGANA LETTER A + ] + target = build({column: :large_string}, records) + assert_equal(records, actual_records(target)) + end + def test_date32 records = [ [Date.new(1960, 1, 1)], diff --git a/ruby/red-arrow/test/values/test-basic-arrays.rb b/ruby/red-arrow/test/values/test-basic-arrays.rb index ed96a61bd07..ddaaa3db64f 100644 --- a/ruby/red-arrow/test/values/test-basic-arrays.rb +++ b/ruby/red-arrow/test/values/test-basic-arrays.rb @@ -167,6 +167,16 @@ def test_string assert_equal(values, target.values) end + def test_large_string + values = [ + "Ruby", + nil, + "\u3042", # U+3042 HIRAGANA LETTER A + ] + target = build(Arrow::LargeStringArray.new(values)) + assert_equal(values, target.values) + end + def test_date32 values = [ Date.new(1960, 1, 1),