From dd3ab8fea8eede12696874c3944ccad71452dc30 Mon Sep 17 00:00:00 2001 From: Shane Poppleton Date: Mon, 10 Jun 2024 07:56:06 +1000 Subject: [PATCH 1/4] Added split_once and index_of to bitarray --- src/gleam/bit_array.gleam | 33 +++++++++++++++++++++++ src/gleam_stdlib.erl | 32 ++++++++++++++++++++++- src/gleam_stdlib.mjs | 37 +++++++++++++++++++++++++- test/gleam/bit_array_test.gleam | 46 +++++++++++++++++++++++++++++++++ 4 files changed, 146 insertions(+), 2 deletions(-) diff --git a/src/gleam/bit_array.gleam b/src/gleam/bit_array.gleam index c112eb38..e5db5bbe 100644 --- a/src/gleam/bit_array.gleam +++ b/src/gleam/bit_array.gleam @@ -206,3 +206,36 @@ fn do_inspect(input: BitArray, accumulator: String) -> String { _ -> accumulator } } + +/// Finds the position of a bit pattern within a bit array. +/// +/// ## Examples +/// +/// ```gleam +/// index_of(<<0, 1, 2, 3, 4, 5, 6, 7>>, <<3, 4, 5>>) +/// // -> "3" +/// ``` +/// +@external(erlang, "gleam_stdlib", "bit_array_index_of") +@external(javascript, "../gleam_stdlib.mjs", "bit_array_index_of") +pub fn index_of(haystack: BitArray, needle: BitArray) -> Int + +// error is returned if not found. +/// Splits a bit array into left and right parts at the bit pattern provided, an +/// +/// ## Examples +/// +/// ```gleam +/// split_once(<<0, 1, 2, 3, 4, 5, 6, 7>>, <<3, 4, 5>>) +/// // -> Ok(<<0, 1, 2>>, <<6, 7>>) +/// +/// split_once(<<0, 1, 2, 3, 4, 5, 6, 7>>, <<5, 4, 3>>) +/// // -> Error(Nil) +/// ``` +/// +@external(erlang, "gleam_stdlib", "bit_array_split_once") +@external(javascript, "../gleam_stdlib.mjs", "bit_array_split_once") +pub fn split_once( + haystack: BitArray, + needle: BitArray, +) -> Result(#(BitArray, BitArray), Nil) diff --git a/src/gleam_stdlib.erl b/src/gleam_stdlib.erl index b95d9f82..6163a1a1 100644 --- a/src/gleam_stdlib.erl +++ b/src/gleam_stdlib.erl @@ -13,7 +13,8 @@ decode_tuple5/1, decode_tuple6/1, tuple_get/2, classify_dynamic/1, print/1, println/1, print_error/1, println_error/1, inspect/1, float_to_string/1, int_from_base_string/2, utf_codepoint_list_to_string/1, contains_string/2, - crop_string/2, base16_decode/1, string_replace/3 + crop_string/2, base16_decode/1, string_replace/3, bit_array_index_of/2, + bit_array_split_once/2 ]). %% Taken from OTP's uri_string module @@ -536,3 +537,32 @@ base16_decode(String) -> string_replace(String, Pattern, Replacement) -> string:replace(String, Pattern, Replacement, all). + +bit_array_index_of(Haystack, Needle) -> + HaystackSize = byte_size(Haystack), + NeedleSize = byte_size(Needle), + bit_array_find_needle(Haystack, Needle, 0, HaystackSize, NeedleSize). + +bit_array_find_needle(_, _, Pos, HaystackSize, NeedleSize) when HaystackSize < NeedleSize + Pos -> + -1; + +bit_array_find_needle(Haystack, Needle, Pos, HaystackSize, NeedleSize) -> + case binary_part(Haystack, Pos, NeedleSize) of + Needle -> Pos; + _ -> bit_array_find_needle(Haystack, Needle, Pos + 1, HaystackSize, NeedleSize) + end. + +bit_array_split_once(Haystack, Needle) -> + Index = bit_array_index_of(Haystack, Needle), + if + Index =:= -1 -> {error, nil}; + true -> + NeedleSize = byte_size(Needle), + {Left, Right} = bit_array_split_at(Haystack, Index, NeedleSize), + {ok, {Left, Right}} + end. + +bit_array_split_at(Binary, Index, NeedleSize) -> + <> = Binary, + {Left, Right}. + diff --git a/src/gleam_stdlib.mjs b/src/gleam_stdlib.mjs index a837e1dc..56b2448a 100644 --- a/src/gleam_stdlib.mjs +++ b/src/gleam_stdlib.mjs @@ -425,7 +425,7 @@ export function compile_regex(pattern, options) { export function regex_split(regex, string) { return List.fromArray( string.split(regex).map((item) => (item === undefined ? "" : item)), - ); + ); } export function regex_scan(regex, string) { @@ -864,3 +864,38 @@ export function base16_decode(string) { export function bit_array_inspect(bits) { return `<<${[...bits.buffer].join(", ")}>>`; } + +export function bit_array_index_of(haystack, needle) { + if (needle.buffer.length > haystack.buffer.length) { + return -1; + } + + for (let i = 0; i <= haystack.buffer.length - needle.buffer.length; i++) { + let found = true; + for (let j = 0; j < needle.buffer.length; j++) { + if (haystack.buffer[i + j] !== needle.buffer[j]) { + found = false; + break; + } + } + if (found) { + return i; + } + } + + return -1; +} + +export function bit_array_split_once(haystack, needle) { + const index = bit_array_index_of(haystack, needle); + + if (index === -1) { + return Error(Nil); + } + + const before = new BitArray(haystack.buffer.slice(0, index)); + const after = new BitArray(haystack.buffer.slice(index + needle.buffer.length)); + + return new Ok([before, after]); +} + diff --git a/test/gleam/bit_array_test.gleam b/test/gleam/bit_array_test.gleam index 7c4a5f4d..04477d0b 100644 --- a/test/gleam/bit_array_test.gleam +++ b/test/gleam/bit_array_test.gleam @@ -308,3 +308,49 @@ pub fn inspect_partial_bytes_test() { bit_array.inspect(<<5:3, 11:4, 1:2>>) |> should.equal("<<182, 1:size(1)>>") } + +pub fn index_of_found_test() { + <<"Hello, World":utf8>> + |> bit_array.index_of(<<", ":utf8>>) + |> should.equal(5) +} + +pub fn index_of_not_found_test() { + <<"Hello, World":utf8>> + |> bit_array.index_of(<<"Joe":utf8>>) + |> should.equal(-1) +} + +pub fn split_once_found_test() { + <<"Hello, World":utf8>> + |> bit_array.split_once(<<", ":utf8>>) + |> should.be_ok + |> should.equal(#(<<"Hello":utf8>>, <<"World":utf8>>)) +} + +pub fn split_once_empty_needle_test() { + <<"Hello, World":utf8>> + |> bit_array.split_once(<<>>) + |> should.be_ok + |> should.equal(#(<<>>, <<"Hello, World":utf8>>)) +} + +pub fn split_once_not_found_test() { + <<"Hello, World":utf8>> + |> bit_array.split_once(<<"Joe":utf8>>) + |> should.be_error +} +// @target(erlang) +// pub fn split_once_not_byte_aligned_found_test() { +// <<"Hello":utf8, 0:4, "World":utf8>> +// |> bit_array.split_once(<<0:4>>) +// |> should.be_ok +// |> should.equal(#(<<"Hello":utf8>>, <<"World":utf8>>)) +// } +// +// @target(erlang) +// pub fn split_once_not_byte_aligned_not_found_test() { +// <<"Hello":utf8, 0:4, "World":utf8>> +// |> bit_array.split_once(<<"Joe":utf8>>) +// |> should.be_error +// } From 47aee0e6f936f28ce19df207e81f9dc42772cb99 Mon Sep 17 00:00:00 2001 From: Shane Poppleton Date: Tue, 11 Jun 2024 06:50:22 +1000 Subject: [PATCH 2/4] Added erlang tests for non-byte aligned bitarrays --- src/gleam_stdlib.erl | 14 ++++++++----- src/gleam_stdlib.mjs | 7 ++++--- test/gleam/bit_array_test.gleam | 35 +++++++++++++++++++-------------- 3 files changed, 33 insertions(+), 23 deletions(-) diff --git a/src/gleam_stdlib.erl b/src/gleam_stdlib.erl index 6163a1a1..25b06a92 100644 --- a/src/gleam_stdlib.erl +++ b/src/gleam_stdlib.erl @@ -538,16 +538,20 @@ base16_decode(String) -> string_replace(String, Pattern, Replacement) -> string:replace(String, Pattern, Replacement, all). +extract_bits(Haystack, Pos, Len) -> + <<_:Pos, Needle:Len/bits, _/bits>> = Haystack, + Needle. + bit_array_index_of(Haystack, Needle) -> - HaystackSize = byte_size(Haystack), - NeedleSize = byte_size(Needle), + HaystackSize = bit_size(Haystack), + NeedleSize = bit_size(Needle), bit_array_find_needle(Haystack, Needle, 0, HaystackSize, NeedleSize). bit_array_find_needle(_, _, Pos, HaystackSize, NeedleSize) when HaystackSize < NeedleSize + Pos -> -1; bit_array_find_needle(Haystack, Needle, Pos, HaystackSize, NeedleSize) -> - case binary_part(Haystack, Pos, NeedleSize) of + case extract_bits(Haystack, Pos, NeedleSize) of Needle -> Pos; _ -> bit_array_find_needle(Haystack, Needle, Pos + 1, HaystackSize, NeedleSize) end. @@ -557,12 +561,12 @@ bit_array_split_once(Haystack, Needle) -> if Index =:= -1 -> {error, nil}; true -> - NeedleSize = byte_size(Needle), + NeedleSize = bit_size(Needle), {Left, Right} = bit_array_split_at(Haystack, Index, NeedleSize), {ok, {Left, Right}} end. bit_array_split_at(Binary, Index, NeedleSize) -> - <> = Binary, + <> = Binary, {Left, Right}. diff --git a/src/gleam_stdlib.mjs b/src/gleam_stdlib.mjs index 56b2448a..abb728ec 100644 --- a/src/gleam_stdlib.mjs +++ b/src/gleam_stdlib.mjs @@ -879,7 +879,7 @@ export function bit_array_index_of(haystack, needle) { } } if (found) { - return i; + return i * 8; } } @@ -887,11 +887,12 @@ export function bit_array_index_of(haystack, needle) { } export function bit_array_split_once(haystack, needle) { - const index = bit_array_index_of(haystack, needle); + let index = bit_array_index_of(haystack, needle); if (index === -1) { - return Error(Nil); + return new Error(Nil); } + index = index / 8; const before = new BitArray(haystack.buffer.slice(0, index)); const after = new BitArray(haystack.buffer.slice(index + needle.buffer.length)); diff --git a/test/gleam/bit_array_test.gleam b/test/gleam/bit_array_test.gleam index 04477d0b..4c9d9c88 100644 --- a/test/gleam/bit_array_test.gleam +++ b/test/gleam/bit_array_test.gleam @@ -312,7 +312,7 @@ pub fn inspect_partial_bytes_test() { pub fn index_of_found_test() { <<"Hello, World":utf8>> |> bit_array.index_of(<<", ":utf8>>) - |> should.equal(5) + |> should.equal(40) } pub fn index_of_not_found_test() { @@ -340,17 +340,22 @@ pub fn split_once_not_found_test() { |> bit_array.split_once(<<"Joe":utf8>>) |> should.be_error } -// @target(erlang) -// pub fn split_once_not_byte_aligned_found_test() { -// <<"Hello":utf8, 0:4, "World":utf8>> -// |> bit_array.split_once(<<0:4>>) -// |> should.be_ok -// |> should.equal(#(<<"Hello":utf8>>, <<"World":utf8>>)) -// } -// -// @target(erlang) -// pub fn split_once_not_byte_aligned_not_found_test() { -// <<"Hello":utf8, 0:4, "World":utf8>> -// |> bit_array.split_once(<<"Joe":utf8>>) -// |> should.be_error -// } + +@target(erlang) +pub fn split_once_non_byte_aligned_found_test() { + // haystack: 00000001 00000010 00000011 00000100 00000101 + // needle: 11 + // before: 00000001 00000010 000000 + // after: 00000100 00000101 + <<1, 2, 3, 4, 5>> + |> bit_array.split_once(<<3:2>>) + |> should.be_ok + |> should.equal(#(<<1, 2, 0:6>>, <<4, 5>>)) +} + +@target(erlang) +pub fn split_once_non_byte_aligned_not_found_test() { + <<1, 2, 3, 4, 5>> + |> bit_array.split_once(<<7:3>>) + |> should.be_error +} From a39a18b7a0929ee75c5734661369d0e60fba5ba0 Mon Sep 17 00:00:00 2001 From: Shane Poppleton Date: Sun, 16 Jun 2024 08:29:49 +1000 Subject: [PATCH 3/4] Remove non-byte aligned stuff, use erlangs binary:split --- src/gleam_stdlib.erl | 39 +++++++++++---------------------- src/gleam_stdlib.mjs | 3 +-- test/gleam/bit_array_test.gleam | 21 +----------------- 3 files changed, 15 insertions(+), 48 deletions(-) diff --git a/src/gleam_stdlib.erl b/src/gleam_stdlib.erl index 25b06a92..dd3dd990 100644 --- a/src/gleam_stdlib.erl +++ b/src/gleam_stdlib.erl @@ -538,35 +538,22 @@ base16_decode(String) -> string_replace(String, Pattern, Replacement) -> string:replace(String, Pattern, Replacement, all). -extract_bits(Haystack, Pos, Len) -> - <<_:Pos, Needle:Len/bits, _/bits>> = Haystack, - Needle. - bit_array_index_of(Haystack, Needle) -> - HaystackSize = bit_size(Haystack), - NeedleSize = bit_size(Needle), - bit_array_find_needle(Haystack, Needle, 0, HaystackSize, NeedleSize). - -bit_array_find_needle(_, _, Pos, HaystackSize, NeedleSize) when HaystackSize < NeedleSize + Pos -> - -1; - -bit_array_find_needle(Haystack, Needle, Pos, HaystackSize, NeedleSize) -> - case extract_bits(Haystack, Pos, NeedleSize) of - Needle -> Pos; - _ -> bit_array_find_needle(Haystack, Needle, Pos + 1, HaystackSize, NeedleSize) + case binary:match(Haystack, Needle) of + {Pos, _Len} -> Pos; + _ -> -1 end. bit_array_split_once(Haystack, Needle) -> - Index = bit_array_index_of(Haystack, Needle), - if - Index =:= -1 -> {error, nil}; - true -> - NeedleSize = bit_size(Needle), - {Left, Right} = bit_array_split_at(Haystack, Index, NeedleSize), - {ok, {Left, Right}} + try + case Needle of + <<>> -> {ok, {<<>>, Haystack}}; + _ -> case binary:split(Haystack, Needle) of + [Part1, Part2] -> {ok, {Part1, Part2}}; + _ -> {error, nil} + end + end + catch + error:badarg -> {error, nil} end. -bit_array_split_at(Binary, Index, NeedleSize) -> - <> = Binary, - {Left, Right}. - diff --git a/src/gleam_stdlib.mjs b/src/gleam_stdlib.mjs index abb728ec..507c0dec 100644 --- a/src/gleam_stdlib.mjs +++ b/src/gleam_stdlib.mjs @@ -879,7 +879,7 @@ export function bit_array_index_of(haystack, needle) { } } if (found) { - return i * 8; + return i; } } @@ -892,7 +892,6 @@ export function bit_array_split_once(haystack, needle) { if (index === -1) { return new Error(Nil); } - index = index / 8; const before = new BitArray(haystack.buffer.slice(0, index)); const after = new BitArray(haystack.buffer.slice(index + needle.buffer.length)); diff --git a/test/gleam/bit_array_test.gleam b/test/gleam/bit_array_test.gleam index 4c9d9c88..f191e399 100644 --- a/test/gleam/bit_array_test.gleam +++ b/test/gleam/bit_array_test.gleam @@ -312,7 +312,7 @@ pub fn inspect_partial_bytes_test() { pub fn index_of_found_test() { <<"Hello, World":utf8>> |> bit_array.index_of(<<", ":utf8>>) - |> should.equal(40) + |> should.equal(5) } pub fn index_of_not_found_test() { @@ -340,22 +340,3 @@ pub fn split_once_not_found_test() { |> bit_array.split_once(<<"Joe":utf8>>) |> should.be_error } - -@target(erlang) -pub fn split_once_non_byte_aligned_found_test() { - // haystack: 00000001 00000010 00000011 00000100 00000101 - // needle: 11 - // before: 00000001 00000010 000000 - // after: 00000100 00000101 - <<1, 2, 3, 4, 5>> - |> bit_array.split_once(<<3:2>>) - |> should.be_ok - |> should.equal(#(<<1, 2, 0:6>>, <<4, 5>>)) -} - -@target(erlang) -pub fn split_once_non_byte_aligned_not_found_test() { - <<1, 2, 3, 4, 5>> - |> bit_array.split_once(<<7:3>>) - |> should.be_error -} From 6770f239113a06902faf7b4e03b092815a766331 Mon Sep 17 00:00:00 2001 From: Shane Poppleton Date: Mon, 24 Jun 2024 07:39:39 +1000 Subject: [PATCH 4/4] Update Changelog --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 11627a1d..224a4046 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,7 @@ ## v0.39.0 - Unreleased +- Added `bit_array.split_once` to split a bit_array. - Fixed `list.window` entering an endless recursive loop for `n` = 0. - The `min` and `max` functions of the `order` module have been deprecated. - The `dict` and `set` modules gain the `is_empty` function.