From e023f341ed55713c3d0459c7c69547ae48286ebc Mon Sep 17 00:00:00 2001 From: Renata Amutio Herrero Date: Thu, 25 Apr 2024 21:21:26 +0200 Subject: [PATCH] cleanup --- src/gleam/bit_array.gleam | 7 +++++++ src/gleam_stdlib.erl | 7 ++++++- src/gleam_stdlib.mjs | 37 +++++++++++++++++++++++++++++++++ test/gleam/bit_array_test.gleam | 36 ++++++++++++++++++++++++++++++++ 4 files changed, 86 insertions(+), 1 deletion(-) diff --git a/src/gleam/bit_array.gleam b/src/gleam/bit_array.gleam index c112eb38..29f6e098 100644 --- a/src/gleam/bit_array.gleam +++ b/src/gleam/bit_array.gleam @@ -45,6 +45,13 @@ pub fn slice( take length: Int, ) -> Result(BitArray, Nil) +@external(erlang, "gleam_stdlib", "bit_array_split") +@external(javascript, "../gleam_stdlib.mjs", "bit_array_split") +pub fn split( + bits: BitArray, + on subpattern: BitArray, +) -> Result(List(BitArray), Nil) + /// Tests to see whether a bit array is valid UTF-8. /// pub fn is_utf8(bits: BitArray) -> Bool { diff --git a/src/gleam_stdlib.erl b/src/gleam_stdlib.erl index b7d32ffa..36a521cb 100644 --- a/src/gleam_stdlib.erl +++ b/src/gleam_stdlib.erl @@ -13,7 +13,7 @@ decode_tuple5/1, decode_tuple6/1, tuple_get/2, classify_dynamic/1, print/1, println/1, print_error/1, println_error/1, inspect/1, float_to_string/1, int_from_base_string/2, utf_codepoint_list_to_string/1, contains_string/2, - crop_string/2, base16_decode/1, string_replace/3 + crop_string/2, base16_decode/1, string_replace/3, bit_array_split/2 ]). %% Taken from OTP's uri_string module @@ -206,6 +206,11 @@ bit_array_slice(Bin, Pos, Len) -> catch error:badarg -> {error, nil} end. +bit_array_split(Bin, Sub) -> + try {ok, binary:split(Bin, Sub, [global])} + catch error:badarg -> {error, nil} + end. + bit_array_int_to_u32(I) when 0 =< I, I < 4294967296 -> {ok, <>}; bit_array_int_to_u32(_) -> diff --git a/src/gleam_stdlib.mjs b/src/gleam_stdlib.mjs index d921c63e..bf20479a 100644 --- a/src/gleam_stdlib.mjs +++ b/src/gleam_stdlib.mjs @@ -366,6 +366,43 @@ export function bit_array_slice(bits, position, length) { return new Ok(new BitArray(buffer)); } +export function bit_array_split(bits, subpattern) { + const slices = []; + if (bits.length == 0 && subpattern.length == 0) { + return new Ok(List.fromArray([new BitArray(new Uint8Array(0))])); + } + if (subpattern.length == 0) { + return new Error(Nil); + } + + let subpatternIndex = 0; + let startIndex = 0; + for (let i = 0; i < bits.length; i++) { + if (bits.buffer[i] == subpattern.buffer[subpatternIndex]) { + subpatternIndex++; + } + if (subpatternIndex == subpattern.length) { + const new_slice = new Uint8Array( + bits.buffer.buffer, + startIndex, + i - startIndex - subpattern.length + 1, + ); + slices.push(new BitArray(new_slice)); + startIndex = i + 1; + subpatternIndex = 0; + + if (i == bits.length - 1) { + slices.push(new BitArray(new Uint8Array(0))); + } + } + } + if (startIndex < bits.length || bits.length == 0) { + slices.push(new BitArray(new Uint8Array(bits.buffer.buffer, startIndex))); + } + + return new Ok(List.fromArray(slices)); +} + export function codepoint(int) { return new UtfCodepoint(int); } diff --git a/test/gleam/bit_array_test.gleam b/test/gleam/bit_array_test.gleam index bf38aae3..980203ef 100644 --- a/test/gleam/bit_array_test.gleam +++ b/test/gleam/bit_array_test.gleam @@ -302,3 +302,39 @@ pub fn inspect_partial_bytes_test() { bit_array.inspect(<<5:3, 11:4, 1:2>>) |> should.equal("<<182, 1:size(1)>>") } + +pub fn split_test() { + bit_array.split(<<"ABaABabABc":utf8>>, <<"AB":utf8>>) + |> should.be_ok + |> should.equal([<<"":utf8>>, <<"a":utf8>>, <<"ab":utf8>>, <<"c":utf8>>]) +} + +pub fn split_ends_in_pattern_test() { + bit_array.split(<<"ABaABabAB":utf8>>, <<"AB":utf8>>) + |> should.be_ok + |> should.equal([<<"":utf8>>, <<"a":utf8>>, <<"ab":utf8>>, <<"":utf8>>]) +} + +pub fn split_empty_subpattern_test() { + bit_array.split(<<"ABC":utf8>>, <<>>) + |> should.be_error + |> should.equal(Nil) +} + +pub fn split_same_pattern_test() { + bit_array.split(<<"ABC":utf8>>, <<"ABC":utf8>>) + |> should.be_ok + |> should.equal([<<"":utf8>>, <<"":utf8>>]) +} + +pub fn split_empty_bit_array_with_empty_subpattern_test() { + bit_array.split(<<>>, <<>>) + |> should.be_ok + |> should.equal([<<>>]) +} + +pub fn split_empty_bit_array_with_pattern_test() { + bit_array.split(<<>>, <<"ABC":utf8>>) + |> should.be_ok + |> should.equal([<<"":utf8>>]) +}