Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -397,7 +397,7 @@ jobs:
path: dist/plugins
retention-days: "7"
build-plugins-fern:
name: "Plugins (fern): agda, commonlisp, elixir, elm, erlang, gleam, haskell, idris, lean, ocaml, scheme"
name: "Plugins (fern): agda, commonlisp, elixir, elm, erlang, gleam, haskell, idris, lean, ocaml, regex, scheme"
runs-on: depot-ubuntu-24.04-32
container: "ghcr.io/bearcove/arborium-plugin-builder:latest"
needs:
Expand All @@ -415,10 +415,10 @@ jobs:
set -e
tar -xf generate-output.tar && rm generate-output.tar
shell: bash
- name: Build agda, commonlisp, elixir, elm, erlang, gleam, haskell, idris, lean, ocaml, scheme
- name: Build agda, commonlisp, elixir, elm, erlang, gleam, haskell, idris, lean, ocaml, regex, scheme
run: |-
set -e
./xtask/target/release/xtask build agda commonlisp elixir elm erlang gleam haskell idris lean ocaml scheme -o dist/plugins
./xtask/target/release/xtask build agda commonlisp elixir elm erlang gleam haskell idris lean ocaml regex scheme -o dist/plugins
shell: bash
- name: Upload plugins artifact
uses: actions/upload-artifact@v4
Expand Down
20 changes: 20 additions & 0 deletions langs/group-fern/regex/def/arborium.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
repo: https://github.com/tree-sitter/tree-sitter-regex
commit: b2ac15e27fce703d2f37a79ccd94a5c0cbe9720b
license: MIT

grammars:
- id: regex
name: Regular Expression
tag: code
tier: 1
has_scanner: false

inventor: Stephen Cole Kleene
year: 1951
description: A regular expression is a sequence of characters that specifies a match pattern in text.
link: https://developer.mozilla.org/en-US/docs/Web/JavaScript/Guide/Regular_expressions

samples:
- path: samples/example.regex
link: https://en.wikipedia.org/wiki/Regular_expression
license: CC-BY-SA-4.0
261 changes: 261 additions & 0 deletions langs/group-fern/regex/def/grammar/grammar.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,261 @@
/**
* @file Regex grammar for tree-sitter
* @author Max Brunsfeld <maxbrunsfeld@gmail.com>
* @author Amaan Qureshi <amaanq12@gmail.com>
* @license MIT
*/

/// <reference types="tree-sitter-cli/dsl" />
// @ts-check

/**
*
* @param {RuleBuilder<string>} prefix - The rule builder
*
* @returns {RuleBuilder<string>}
*/
const quantifierRule = prefix => $ => seq(
prefix($),
optional(alias('?', $.lazy)),
);

const SYNTAX_CHARS = [
...'^$\\.*+?()[]|',
];

const SYNTAX_CHARS_ESCAPED = SYNTAX_CHARS.map(
char => `\\${char}`,
).join('');

module.exports = grammar({
name: 'regex',

extras: _ => [/\r?\n/],

inline: $ => [
$._character_escape,
$._class_atom,
],

conflicts: $ => [[$.class_range, $.character_class]],

rules: {
pattern: $ => choice(
$.alternation,
$.term,
),

alternation: $ => seq(
optional($.term),
repeat1(seq('|', optional($.term))),
),

term: $ => repeat1(seq(
choice(
$.start_assertion,
$.end_assertion,
$.boundary_assertion,
$.non_boundary_assertion,
$.lookaround_assertion,
$.pattern_character,
$.character_class,
$.posix_character_class,
$.any_character,
$.decimal_escape,
$.character_class_escape,
$._character_escape,
$.backreference_escape,
$.named_group_backreference,
$.anonymous_capturing_group,
$.named_capturing_group,
$.non_capturing_group,
$.inline_flags_group,
),
optional(choice(
$.zero_or_more,
$.one_or_more,
$.optional,
$.count_quantifier,
)),
)),

any_character: _ => '.',

start_assertion: _ => '^',
end_assertion: _ => '$',
boundary_assertion: _ => '\\b',
non_boundary_assertion: _ => '\\B',
lookaround_assertion: $ => choice(
$._lookahead_assertion,
$._lookbehind_assertion,
),
_lookahead_assertion: $ => seq(
'(?',
choice('=', '!'),
$.pattern,
')',
),
_lookbehind_assertion: $ => seq(
'(?<',
choice('=', '!'),
$.pattern,
')',
),

pattern_character: _ => new RegExp(`[^${SYNTAX_CHARS_ESCAPED}\\r?\\n]`),

character_class: $ => seq(
'[',
optional('^'),
optional(alias('-', $.class_character)),
repeat($._class_atom),
optional(alias('-', $.class_character)),
']',
),

posix_character_class: $ => seq(
'[:',
$.posix_class_name,
':]',
),

posix_class_name: _ => /[a-zA-Z]+/,

class_range: $ => prec.right(seq(
choice(
$.class_character,
$.character_class_escape,
$.control_escape,
alias('-', $.class_character),
),
'-',
choice(
$.class_character,
$.character_class_escape,
$.control_escape,
alias('-', $.class_character),
),
)),

_class_atom: $ => choice(
$.class_character,
alias('\\-', $.identity_escape),
$.character_class_escape,
$._character_escape,
$.posix_character_class,
$.class_range,
),

class_character: _ => // NOT: \ ] or -
/[^\\\]\-]/,

anonymous_capturing_group: $ => seq('(', $.pattern, ')'),

named_capturing_group: $ => seq(choice('(?<', '(?P<'), $.group_name, '>', $.pattern, ')'),

non_capturing_group: $ => seq('(?:', $.pattern, ')'),

inline_flags_group: $ => seq(
'(?',
choice(
$.flags,
seq($.flags, '-', $.flags),
seq('-', $.flags),
),
optional(seq(':', $.pattern)),
')',
),

flags: _ => /[a-zA-Z]+/,

zero_or_more: quantifierRule(_ => '*'),
one_or_more: quantifierRule(_ => '+'),
optional: quantifierRule(_ => '?'),
count_quantifier: quantifierRule($ => seq(
'{',
choice(
seq(
$.decimal_digits,
optional(seq(',', optional($.decimal_digits))),
),
seq(',', $.decimal_digits),
),
'}',
)),

backreference_escape: $ => seq('\\k', '<', $.group_name, '>'),

named_group_backreference: $ => seq('(?P=', $.group_name, ')'),

decimal_escape: _ => /\\[1-9][0-9]*/,

character_class_escape: $ => choice(
/\\[dDsSwW]/,
seq(/\\[pP]/, '{', $.unicode_property_value_expression, '}'),
$.unicode_character_escape,
),

unicode_character_escape: _ => choice(
/\\u[0-9a-fA-F]{4}/,
// NOTE: The following is a valid syntax only if the "u" flag is set.
// However, this is unlikely that "u" would be encountered
/\\u\{[0-9a-fA-F]{1,6}\}/,
),

unicode_property_value_expression: $ => seq(
optional(seq(alias($.unicode_property, $.unicode_property_name), '=')),
alias($.unicode_property, $.unicode_property_value),
),

unicode_property: _ => /[a-zA-Z_0-9]+/,

_character_escape: $ => choice(
$.control_escape,
$.control_letter_escape,
$.identity_escape,
),

// TODO: We should technically not accept \0 unless the
// lookahead is not also a digit.
// I think this has little bearing on the highlighting of
// correct regexes.
control_escape: _ => choice(
/\\[bfnrtv0]/,
/\\x[0-9a-fA-F]{2}/,
),

control_letter_escape: _ => /\\c[a-zA-Z]/,

identity_escape: _ => token(seq('\\', /[^kdDsSpPwWbfnrtv0-9]/)),

// TODO: This is an approximation of RegExpIdentifierName in the
// formal grammar, which allows for Unicode names through
// the following mechanism:
//
// RegExpIdentifierName[U]::
// RegExpIdentifierStart[?U]
// RegExpIdentifierName[?U]RegExpIdentifierPart[?U]
//
// RegExpIdentifierStart[U]::
// UnicodeIDStart
// $
// _
// \RegExpUnicodeEscapeSequence[?U]
//
// RegExpIdentifierPart[U]::
// UnicodeIDContinue
// $
// \RegExpUnicodeEscapeSequence[?U]
// <ZWNJ> <ZWJ>
// RegExpUnicodeEscapeSequence[U]::
// [+U]uLeadSurrogate\uTrailSurrogate
// [+U]uLeadSurrogate
// [+U]uTrailSurrogate
// [+U]uNonSurrogate
// [~U]uHex4Digits
// [+U]u{CodePoint}
group_name: _ => /[A-Za-z_][A-Za-z0-9_]*/,

decimal_digits: _ => /\d+/,
},
});
63 changes: 63 additions & 0 deletions langs/group-fern/regex/def/queries/highlights.scm
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
[
"("
")"
"(?"
"(?:"
"(?<"
"(?P<"
"(?P="
">"
"["
"]"
"{"
"}"
"[:"
":]"
] @punctuation.bracket

(group_name) @property

[
(identity_escape)
(control_letter_escape)
(character_class_escape)
(control_escape)
(start_assertion)
(end_assertion)
(boundary_assertion)
(non_boundary_assertion)
] @escape

[
"*"
"+"
"?"
"|"
"="
"!"
] @operator

(count_quantifier
[
(decimal_digits) @number
"," @punctuation.delimiter
])

(inline_flags_group
"-"? @operator
":"? @punctuation.delimiter)

(flags) @character.special

(character_class
[
"^" @operator
(class_range "-" @operator)
])

[
(class_character)
(posix_class_name)
] @constant.character

(pattern_character) @string
27 changes: 27 additions & 0 deletions langs/group-fern/regex/def/samples/example.regex
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
/([A-Z])\w+/g
/ab+c/
/d(b+)d/g
/\w+\s/g
/\p{L}*/u
/\b[A-Z0-9._%+-]+@[A-Z0-9.-]+\.[A-Z]{2,}\b/
/\A[a-z0-9!#$%&'*+/=?^_`{|}~-]+(?:\.[a-z0-9!#$%&'*+/=?^_`{|}~-]+)*@(?:[a-z0-9](?:[a-z0-9-]*[a-z0-9])?\.)+[a-z0-9](?:[a-z0-9-]*[a-z0-9])?\z/
/[^aeiou]y[^aeiou]/
/r[aeiou]+/g
/seriali[sz]e/
/^[ \t]+|[ \t]+$/
/[+-]?(\d+(\.\d*)?|\.\d+)([eE][+-]?\d+)?/
/gray|grey/
/gr(a|e)y/
/ab*(c|ε)/
/(0|(1(01*0)*1))*/
/[a-z]/
/[abcx-z]/
/[]abc]/
/[^a-z]/
/[xyz]*/
/[hc]at$/
/[^b]at/
/s.*/
/[hc]?at/
/cat|dog/
/\p{Alpha}/