Skip to content

Commit 14b8688

Browse files
committed
Add bit_array.split, rework tests for split_once
1 parent 8536d3c commit 14b8688

File tree

5 files changed

+143
-12
lines changed

5 files changed

+143
-12
lines changed

Diff for: CHANGELOG.md

+1
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22

33
## Unreleased
44

5+
- The `bit_array` module gains the `split` and `split_once` functions.
56
- The deprecated `drop_left`, `drop_right`, `pad_left`, `pad_right`,
67
`trim_left`, and `trim_right` functions have been removed.
78
- Fixed a bug that would result in `list.unique` having quadratic runtime.

Diff for: src/gleam/bit_array.gleam

+28
Original file line numberDiff line numberDiff line change
@@ -64,11 +64,16 @@ pub fn slice(
6464
/// The result will not include the pattern, and returns an error if the
6565
/// pattern is not found.
6666
///
67+
/// This function runs in linear time.
68+
///
6769
/// ## Examples
6870
///
6971
/// ```gleam
7072
/// split_once(from: <<1, 2, 3>>, on: <<2>>)
7173
/// // -> Ok(#(<<1>>, <<3>>))
74+
///
75+
/// split_once(from: <<0>>, on: <<1>>)
76+
/// // -> Error(Nil)
7277
/// ```
7378
@external(erlang, "gleam_stdlib", "bit_array_split_once")
7479
@external(javascript, "../gleam_stdlib.mjs", "bit_array_split_once")
@@ -77,6 +82,29 @@ pub fn split_once(
7782
on pattern: BitArray,
7883
) -> Result(#(BitArray, BitArray), Nil)
7984

85+
/// Splits a bit array into parts at the locations of the pattern.
86+
///
87+
/// The result will not include the pattern, and returns an empty
88+
/// list if the pattern is not found.
89+
///
90+
/// This function runs in linear time.
91+
///
92+
/// ## Examples
93+
///
94+
/// ```gleam
95+
/// split(from: <<0, 1, 0, 2, 0, 3>>, on: <<0>>)
96+
/// // -> Ok([<<1>>, <<2>>, <<3>>])
97+
///
98+
/// split(from: <<0>>, on: <<1>>)
99+
/// // -> Ok([])
100+
/// ```
101+
@external(erlang, "gleam_stdlib", "bit_array_split")
102+
@external(javascript, "../gleam_stdlib.mjs", "bit_array_split")
103+
pub fn split(
104+
from bits: BitArray,
105+
on pattern: BitArray,
106+
) -> Result(List(BitArray), Nil)
107+
80108
/// Tests to see whether a bit array is valid UTF-8.
81109
///
82110
pub fn is_utf8(bits: BitArray) -> Bool {

Diff for: src/gleam_stdlib.erl

+8-2
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,8 @@
1414
inspect/1, float_to_string/1, int_from_base_string/2,
1515
utf_codepoint_list_to_string/1, contains_string/2, crop_string/2,
1616
base16_encode/1, base16_decode/1, string_replace/3, slice/3,
17-
bit_array_to_int_and_size/1, bit_array_pad_to_bytes/1, bit_array_split_once/2
17+
bit_array_to_int_and_size/1, bit_array_pad_to_bytes/1, bit_array_split_once/2,
18+
bit_array_split/2
1819
]).
1920

2021
%% Taken from OTP's uri_string module
@@ -235,12 +236,17 @@ bit_array_split_once(Bin, Sub) ->
235236
try
236237
case binary:split(Bin, [Sub]) of
237238
[<<>>, <<>>] -> {error, nil};
238-
[Part1, Part2] -> {ok, {Part1, Part2}};
239+
[A, B] -> {ok, {A, B}};
239240
_ -> {error, nil}
240241
end
241242
catch error:badarg -> {error, nil}
242243
end.
243244

245+
bit_array_split(Bin, Sub) ->
246+
try {ok, binary:split(Bin, [Sub], [global, trim_all])}
247+
catch error:badarg -> {error, nil}
248+
end.
249+
244250
base_decode64(S) ->
245251
try {ok, base64:decode(S)}
246252
catch error:_ -> {error, nil}

Diff for: src/gleam_stdlib.mjs

+50-5
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@ import { DecodeError } from "./gleam/dynamic.mjs";
1414
import { Some, None } from "./gleam/option.mjs";
1515
import { Eq, Gt, Lt } from "./gleam/order.mjs";
1616
import Dict from "./dict.mjs";
17+
import { Buffer } from 'node:buffer';
1718

1819
const Nil = undefined;
1920
const NOT_FOUND = {};
@@ -427,16 +428,19 @@ export function bit_array_slice(bits, position, length) {
427428

428429
export function bit_array_split_once(bits, pattern) {
429430
try {
430-
if (!(bits instanceof BitArray) || !(pattern instanceof BitArray) || pattern.buffer.length < 1 || pattern.buffer.length >= bits.buffer.length) {
431+
if (!(bits instanceof BitArray)
432+
|| !(pattern instanceof BitArray)
433+
|| pattern.buffer.length < 1
434+
|| pattern.buffer.length >= bits.buffer.length) {
431435
return new Error(Nil);
432436
}
433437

434-
let i = 0;
435438
const n = bits.buffer.length - pattern.buffer.length + 1;
436-
437-
find: for (; i < n; i++) {
439+
find: for (let i = 0; i < n; i++) {
438440
for (let j = 0; j < pattern.buffer.length; j++) {
439-
if (bits.buffer[i + j] !== pattern.buffer[j]) continue find;
441+
if (bits.buffer[i + j] !== pattern.buffer[j]) {
442+
continue find;
443+
}
440444
}
441445
const before = bits.buffer.slice(0, i);
442446
const after = bits.buffer.slice(i + pattern.buffer.length);
@@ -449,6 +453,47 @@ export function bit_array_split_once(bits, pattern) {
449453
}
450454
}
451455

456+
export function bit_array_split(bits, pattern) {
457+
try {
458+
const patternEmpty = pattern.buffer.length < 1
459+
const incorrectArguments = !(bits instanceof BitArray) || !(pattern instanceof BitArray)
460+
if (incorrectArguments || patternEmpty) {
461+
return new Error(Nil);
462+
}
463+
464+
const bitsEqualToPattern = Buffer.compare(bits.buffer, pattern.buffer) === 0
465+
const bitsEmpty = bits.buffer.length === 0
466+
if (bitsEqualToPattern || bitsEmpty) {
467+
return new Ok(List.fromArray([]));
468+
}
469+
470+
const results = [];
471+
let lastIndex = 0;
472+
const n = bits.buffer.length - pattern.buffer.length + 1;
473+
474+
find: for (let i = 0; i < n; i++) {
475+
for (let j = 0; j < pattern.buffer.length; j++) {
476+
if (bits.buffer[i + j] !== pattern.buffer[j]) {
477+
continue find;
478+
}
479+
}
480+
if (i > lastIndex) {
481+
results.push(new BitArray(bits.buffer.slice(lastIndex, i)));
482+
}
483+
lastIndex = i + pattern.buffer.length;
484+
i = lastIndex - 1;
485+
}
486+
487+
if (lastIndex < bits.buffer.length) {
488+
results.push(new BitArray(bits.buffer.slice(lastIndex)));
489+
}
490+
491+
return new Ok(List.fromArray(results.length ? results : [bits]));
492+
} catch (e) {
493+
return new Error(Nil);
494+
}
495+
}
496+
452497
export function codepoint(int) {
453498
return new UtfCodepoint(int);
454499
}

Diff for: test/gleam/bit_array_test.gleam

+56-5
Original file line numberDiff line numberDiff line change
@@ -215,16 +215,24 @@ pub fn split_once_test() {
215215
|> bit_array.split_once(<<"h":utf8>>)
216216
|> should.equal(Ok(#(<<>>, <<"ello":utf8>>)))
217217

218-
<<"hello":utf8>>
218+
<<0, 1, 0, 2, 0, 3>>
219+
|> bit_array.split_once(<<0, 2>>)
220+
|> should.equal(Ok(#(<<0, 1>>, <<0, 3>>)))
221+
222+
<<0, 1, 2, 0, 3, 4, 5>>
223+
|> bit_array.split_once(<<>>)
224+
|> should.equal(Error(Nil))
225+
226+
<<>>
219227
|> bit_array.split_once(<<1>>)
220228
|> should.equal(Error(Nil))
221229

222-
<<"hello":utf8>>
223-
|> bit_array.split_once(<<"":utf8>>)
230+
<<1>>
231+
|> bit_array.split_once(<<1>>)
224232
|> should.equal(Error(Nil))
225233

226-
<<"hello":utf8>>
227-
|> bit_array.split_once(<<"hello":utf8>>)
234+
<<0>>
235+
|> bit_array.split_once(<<1>>)
228236
|> should.equal(Error(Nil))
229237
}
230238

@@ -237,6 +245,49 @@ pub fn split_once_erlang_only_test() {
237245
|> should.equal(Error(Nil))
238246
}
239247

248+
pub fn split_test() {
249+
<<"hello":utf8>>
250+
|> bit_array.split(<<"l":utf8>>)
251+
|> should.equal(Ok([<<"he":utf8>>, <<"o":utf8>>]))
252+
253+
<<0, 1, 0, 2, 0, 3>>
254+
|> bit_array.split(<<0>>)
255+
|> should.equal(Ok([<<1>>, <<2>>, <<3>>]))
256+
257+
<<1, 0>>
258+
|> bit_array.split(<<0>>)
259+
|> should.equal(Ok([<<1>>]))
260+
261+
<<0, 1, 0, 2, 0, 3>>
262+
|> bit_array.split(<<0, 2>>)
263+
|> should.equal(Ok([<<0, 1>>, <<0, 3>>]))
264+
265+
<<1>>
266+
|> bit_array.split(<<0>>)
267+
|> should.equal(Ok([<<1>>]))
268+
269+
<<1>>
270+
|> bit_array.split(<<1>>)
271+
|> should.equal(Ok([]))
272+
273+
<<>>
274+
|> bit_array.split(<<1>>)
275+
|> should.equal(Ok([]))
276+
277+
<<0, 1, 2, 0, 3, 4, 5>>
278+
|> bit_array.split(<<>>)
279+
|> should.equal(Error(Nil))
280+
}
281+
282+
// This test is target specific since it's using non byte-aligned BitArrays
283+
// and those are not supported on the JavaScript target.
284+
@target(erlang)
285+
pub fn split_erlang_only_test() {
286+
<<0, 1, 2:7>>
287+
|> bit_array.split(<<1>>)
288+
|> should.equal(Error(Nil))
289+
}
290+
240291
pub fn to_string_test() {
241292
<<>>
242293
|> bit_array.to_string

0 commit comments

Comments
 (0)