Skip to content

Commit

Permalink
add (til) PEG special
Browse files Browse the repository at this point in the history
(til sep subpattern) is a specialized (sub) that behaves like
(sub (to sep) subpattern), but advances over the input like (thru sep).
  • Loading branch information
ianthehenry committed Dec 5, 2024
1 parent 5d1bd8a commit 9529062
Show file tree
Hide file tree
Showing 3 changed files with 82 additions and 0 deletions.
46 changes: 46 additions & 0 deletions src/core/peg.c
Original file line number Diff line number Diff line change
Expand Up @@ -544,6 +544,42 @@ static const uint8_t *peg_rule(
return window_end;
}

case RULE_TIL: {
const uint32_t *rule_terminus = s->bytecode + rule[1];
const uint32_t *rule_subpattern = s->bytecode + rule[2];

const uint8_t *terminus_start = text;
const uint8_t *terminus_end = NULL;
down1(s);
while (terminus_start <= s->text_end) {
CapState cs2 = cap_save(s);
terminus_end = peg_rule(s, rule_terminus, terminus_start);
cap_load(s, cs2);
if (terminus_end) {
break;
}
terminus_start++;
}
up1(s);

if (!terminus_end) {
return NULL;
}

const uint8_t *saved_end = s->text_end;
s->text_end = terminus_start;
down1(s);
const uint8_t *matched = peg_rule(s, rule_subpattern, text);
up1(s);
s->text_end = saved_end;

if (!matched) {
return NULL;
}

return terminus_end;
}

case RULE_SPLIT: {
const uint8_t *saved_end = s->text_end;
const uint32_t *rule_separator = s->bytecode + rule[1];
Expand Down Expand Up @@ -1227,6 +1263,14 @@ static void spec_sub(Builder *b, int32_t argc, const Janet *argv) {
emit_2(r, RULE_SUB, subrule1, subrule2);
}

static void spec_til(Builder *b, int32_t argc, const Janet *argv) {
peg_fixarity(b, argc, 2);
Reserve r = reserve(b, 3);
uint32_t subrule1 = peg_compile1(b, argv[0]);
uint32_t subrule2 = peg_compile1(b, argv[1]);
emit_2(r, RULE_TIL, subrule1, subrule2);
}

static void spec_split(Builder *b, int32_t argc, const Janet *argv) {
peg_fixarity(b, argc, 2);
Reserve r = reserve(b, 3);
Expand Down Expand Up @@ -1323,6 +1367,7 @@ static const SpecialPair peg_specials[] = {
{"split", spec_split},
{"sub", spec_sub},
{"thru", spec_thru},
{"til", spec_til},
{"to", spec_to},
{"uint", spec_uint_le},
{"uint-be", spec_uint_be},
Expand Down Expand Up @@ -1657,6 +1702,7 @@ static void *peg_unmarshal(JanetMarshalContext *ctx) {
i += 4;
break;
case RULE_SUB:
case RULE_TIL:
case RULE_SPLIT:
/* [rule, rule] */
if (rule[1] >= blen) goto bad;
Expand Down
1 change: 1 addition & 0 deletions src/include/janet.h
Original file line number Diff line number Diff line change
Expand Up @@ -2180,6 +2180,7 @@ typedef enum {
RULE_UNREF, /* [rule, tag] */
RULE_CAPTURE_NUM, /* [rule, tag] */
RULE_SUB, /* [rule, rule] */
RULE_TIL, /* [rule, rule] */
RULE_SPLIT, /* [rule, rule] */
RULE_NTH, /* [nth, rule, tag] */
RULE_ONLY_TAGS, /* [rule] */
Expand Down
35 changes: 35 additions & 0 deletions test/suite-peg.janet
Original file line number Diff line number Diff line change
Expand Up @@ -713,6 +713,41 @@
"abcdef"
@[])

(test "til: basic matching"
~(til "d" "abc")
"abcdef"
@[])

(test "til: second pattern can't see past the first occurrence of first pattern"
~(til "d" (* "abc" -1))
"abcdef"
@[])

(test "til: fails if first pattern fails"
~(til "x" "abc")
"abcdef"
nil)

(test "til: fails if second pattern fails"
~(til "abc" "x")
"abcdef"
nil)

(test "til: discards captures from initial pattern"
~(til '"d" '"abc")
"abcdef"
@["abc"])

(test "til: positions inside second match are still relative to the entire input"
~(* "one\ntw" (til 0 (* ($) (line) (column))))
"one\ntwo\nthree\n"
@[6 2 3])

(test "til: advances to the end of the first pattern's first occurrence"
~(* (til "d" "ab") "e")
"abcdef"
@[])

(test "split: basic functionality"
~(split "," '1)
"a,b,c"
Expand Down

0 comments on commit 9529062

Please sign in to comment.