diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 22e3e09d..af822047 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -517,7 +517,7 @@ jobs: path: dist/plugins retention-days: "7" build-plugins-pine: - name: "Plugins (pine): capnp, dart, devicetree, rescript, solidity, starlark, swift, textproto, thrift, uiua, wit, yuri" + name: "Plugins (pine): capnp, dart, devicetree, proto, rescript, solidity, starlark, swift, textproto, thrift, uiua, wit, yuri" runs-on: depot-ubuntu-24.04-32 container: "ghcr.io/bearcove/arborium-plugin-builder:latest" needs: @@ -535,10 +535,10 @@ jobs: set -e tar -xf generate-output.tar && rm generate-output.tar shell: bash - - name: Build capnp, dart, devicetree, rescript, solidity, starlark, swift, textproto, thrift, uiua, wit, yuri + - name: Build capnp, dart, devicetree, proto, rescript, solidity, starlark, swift, textproto, thrift, uiua, wit, yuri run: |- set -e - ./xtask/target/release/xtask build capnp dart devicetree rescript solidity starlark swift textproto thrift uiua wit yuri -o dist/plugins + ./xtask/target/release/xtask build capnp dart devicetree proto rescript solidity starlark swift textproto thrift uiua wit yuri -o dist/plugins shell: bash - name: Upload plugins artifact uses: actions/upload-artifact@v4 diff --git a/demo/samples/proto.proto b/demo/samples/proto.proto new file mode 100644 index 00000000..9407b6d6 --- /dev/null +++ b/demo/samples/proto.proto @@ -0,0 +1,33 @@ +syntax = "proto3"; +package tutorial; + +import "google/protobuf/timestamp.proto"; + +option go_package = "github.com/protocolbuffers/protobuf/examples/go/tutorialpb"; + +message Person { + string name = 1; + int32 id = 2; // Unique ID number for this person. + string email = 3; + + message PhoneNumber { + string number = 1; + PhoneType type = 2; + } + + repeated PhoneNumber phones = 4; + + google.protobuf.Timestamp last_updated = 5; +} + +enum PhoneType { + PHONE_TYPE_UNSPECIFIED = 0; + PHONE_TYPE_MOBILE = 1; + PHONE_TYPE_HOME = 2; + PHONE_TYPE_WORK = 3; +} + +// Our address book file is just one of these. +message AddressBook { + repeated Person people = 1; +} diff --git a/langs/group-pine/proto/def/arborium.yaml b/langs/group-pine/proto/def/arborium.yaml new file mode 100644 index 00000000..3127bff0 --- /dev/null +++ b/langs/group-pine/proto/def/arborium.yaml @@ -0,0 +1,24 @@ +repo: https://github.com/coder3101/tree-sitter-proto +commit: d65a18ce7c2242801f702770114ad08056c7f8c9 +license: MIT + +grammars: + - id: proto + name: Protocol Buffers + tag: data + tier: 3 + icon: vscode-icons:file-type-protobuf + aliases: + - protobuf + + inventor: Google + year: 2008 + description: Protocol Buffers are a language-neutral, platform-neutral extensible mechanism for serializing structured data. + link: https://protobuf.dev + trivia: The first release of Protocol Buffers was "Proto2"; "Proto1" only existed internally at Google. + + samples: + - path: samples/addressbook.proto + description: Address book example from the Go Protocol Buffer tutorial. + link: https://protobuf.dev/getting-started/gotutorial + license: BSD-3-Clause diff --git a/langs/group-pine/proto/def/grammar/grammar.js b/langs/group-pine/proto/def/grammar/grammar.js new file mode 100644 index 00000000..f4ce15f9 --- /dev/null +++ b/langs/group-pine/proto/def/grammar/grammar.js @@ -0,0 +1,572 @@ +/** + * @file Parser for proto2 and proto3 files + * @author Mohammad Ashar Khan + * @license MIT + */ + +/// +// @ts-check + +const + letter = /[a-zA-Z]/; +const decimal_digit = /[0-9]/; +const octal_digit = /[0-7]/; +const hex_digit = /[0-9A-Fa-f]/; + +/** + * + * @param {any} content + */ +function array_of(content) { + return seq( + '[', + optional(seq(content, repeat(seq(',', content)))), + ']', + ); +} + +module.exports = grammar({ + name: 'proto', + + extras: $ => [$.comment, /\s/], + + rules: { + // proto = syntax { import | package | option | topLevelDef | emptyStatement } + // topLevelDef = message | enum | service + source_file: $ => seq( + optional(choice($.syntax, $.edition)), + optional(repeat(choice( + $.import, + $.package, + $.option, + $.enum, + $.message, + $.extend, + $.service, + $.empty_statement, + ))), + ), + + empty_statement: _ => ';', + + // edition = "edition" "=" quote numeric quote ";" + edition: $ => seq('edition', '=', field('year', $.string), ';'), + // syntax = "syntax" "=" quote "proto3" quote ";" + syntax: $ => seq('syntax', '=', choice('"proto3"', '"proto2"'), ';'), + + // import = "import" [ "weak" | "public" | "option" ] strLit ";" + import: $ => seq( + 'import', + optional(choice('weak', 'public', 'option')), + field('path', $.string), + ';', + ), + + // package = "package" fullIdent ";" + package: $ => seq( + 'package', + $.full_ident, + ';', + ), + + // option = "option" optionName "=" constant ";" + // optionName = ( ident | "(" fullIdent ")" ) { "." ident } + option: $ => seq( + 'option', + $._option_name, + '=', + $.constant, + ';', + ), + + _option_name: $ => seq( + choice( + $.identifier, + seq('(', $.full_ident, ')'), + ), + repeat(seq( + '.', + $.identifier, + )), + ), + + // enum = "enum" enumName enumBody + // enumBody = "{" { option | enumField | emptyStatement } "}" + // enumField = ident "=" [ "-" ] intLit [ "[" enumValueOption { "," enumValueOption } "]" ]";" + // enumValueOption = optionName "=" constant + // edition 2024: optional "export" | "local" visibility modifier + enum: $ => seq( + optional(choice('export', 'local')), + 'enum', + $.enum_name, + $.enum_body, + ), + + enum_name: $ => $.identifier, + + enum_body: $ => seq( + '{', + repeat(choice( + $.option, + $.enum_field, + $.empty_statement, + $.reserved, + )), + '}', + ), + + enum_field: $ => seq( + $.identifier, + '=', + optional('-'), + $.int_lit, + optional(seq( + '[', + $.enum_value_option, + repeat(seq(',', $.enum_value_option)), + ']', + )), + ';', + ), + + enum_value_option: $ => seq( + $._option_name, + '=', + $.constant, + ), + + // message = "message" messageName messageBody + // messageBody = "{" { field | enum | message | option | oneof | mapField | reserved | emptyStatement } "}" + // edition 2024: optional "export" | "local" visibility modifier + message: $ => seq( + optional(choice('export', 'local')), + 'message', + $.message_name, + $.message_body, + ), + + message_body: $ => seq( + '{', + repeat(choice( + $.field, + $.enum, + $.message, + $.option, + $.oneof, + $.map_field, + $.reserved, + $.extensions, + $.extend, + $.group, + $.empty_statement, + )), + '}', + ), + + message_name: $ => $.identifier, + + extend: $ => seq( + 'extend', + $.full_ident, + $.message_body, + ), + + // group = label "group" groupName "=" fieldNumber messageBody + // label = "required" | "optional" | "repeated" + // Proto2 only; deprecated but still valid. + group: $ => seq( + optional(choice('optional', 'required', 'repeated')), + 'group', + $.message_name, + '=', + $.field_number, + $.message_body, + ), + + // field = [ "repeated" ] type fieldName "=" fieldNumber [ "[" fieldOptions "]" ] ";" + // fieldOptions = fieldOption { "," fieldOption } + // fieldOption = optionName "=" constant + field: $ => seq( + // This isn't allowed according to the spec and yet the proto3 compiler + // accepts it so we put it here for parsing. + optional(choice('optional', 'required')), + + optional('repeated'), + $.type, + $.identifier, + '=', + $.field_number, + optional(seq('[', $.field_options, ']')), + ';', + ), + + field_options: $ => seq( + $.field_option, + repeat(seq(',', $.field_option)), + ), + + field_option: $ => seq( + $._option_name, + '=', + $.constant, + ), + + // oneof = "oneof" oneofName "{" { option | oneofField | emptyStatement } "}" + // oneofField = type fieldName "=" fieldNumber [ "[" fieldOptions "]" ] ";" + oneof: $ => seq( + 'oneof', + $.identifier, + '{', + repeat(choice( + $.option, + $.oneof_field, + $.empty_statement, + )), + '}', + ), + + oneof_field: $ => seq( + $.type, + $.identifier, + '=', + $.field_number, + optional(seq('[', $.field_options, ']')), + ), + + // mapField = "map" "<" keyType "," type ">" mapName "=" fieldNumber [ "[" fieldOptions "]" ] ";" + // keyType = "int32" | "int64" | "uint32" | "uint64" | "sint32" | "sint64" | + // "fixed32" | "fixed64" | "sfixed32" | "sfixed64" | "bool" | "string" + map_field: $ => seq( + 'map', + '<', + $.key_type, + ',', + $.type, + '>', + $.identifier, + '=', + $.field_number, + optional(seq('[', $.field_options, ']')), + ';', + ), + + key_type: $ => choice( + 'int32', + 'int64', + 'uint32', + 'uint64', + 'sint32', + 'sint64', + 'fixed32', + 'fixed64', + 'sfixed32', + 'sfixed64', + 'bool', + 'string', + ), + + // type = "double" | "float" | "int32" | "int64" | "uint32" | "uint64" + // | "sint32" | "sint64" | "fixed32" | "fixed64" | "sfixed32" | "sfixed64" + // | "bool" | "string" | "bytes" | messageType | enumType + type: $ => choice( + 'double', + 'float', + 'int32', + 'int64', + 'uint32', + 'uint64', + 'sint32', + 'sint64', + 'fixed32', + 'fixed64', + 'sfixed32', + 'sfixed64', + 'bool', + 'string', + 'bytes', + $.message_or_enum_type, + ), + + // reserved = "reserved" ( ranges | fieldNames ) ";" + // ranges = range { "," range } + // range = intLit [ "to" ( intLit | "max" ) ] + // fieldNames = fieldName { "," fieldName } + reserved: $ => seq( + 'reserved', + choice($.ranges, $.reserved_field_names), + ';', + ), + + extensions: $ => seq( + 'extensions', + $.ranges, + ';', + ), + + ranges: $ => seq($.range, repeat(seq(',', $.range))), + + range: $ => seq( + $.int_lit, + optional(seq( + 'to', + choice($.int_lit, 'max'), + )), + ), + + field_names: $ => seq( + $._identifier_or_string, + repeat(seq(',', $._identifier_or_string)), + ), + + reserved_field_names: $ => seq( + $.reserved_identifier, + repeat(seq(',', $.reserved_identifier)), + ), + + // messageType = [ "." ] { ident "." } messageName + message_or_enum_type: $ => seq( + optional('.'), + repeat(seq( + $.identifier, + '.', + )), + $.identifier, + ), + + // fieldNumber = intLit; + field_number: $ => $.int_lit, + + // service = "service" serviceName "{" { option | rpc | emptyStatement } "}" + // rpc = "rpc" rpcName "(" [ "stream" ] messageType ")" "returns" "(" [ "stream" ] + // messageType ")" (( "{" {option | emptyStatement } "}" ) | ";") + service: $ => seq( + 'service', + $.service_name, + '{', + repeat(choice( + $.option, + $.rpc, + $.empty_statement, + )), + '}', + ), + + service_name: $ => $.identifier, + + rpc: $ => seq( + 'rpc', + $.rpc_name, + '(', + optional('stream'), + $.message_or_enum_type, + ')', + 'returns', + '(', + optional('stream'), + $.message_or_enum_type, + ')', + choice( + seq( + '{', + repeat(choice( + $.option, + $.empty_statement, + )), + '}', + ), + ';', + ), + ), + + rpc_name: $ => $.identifier, + + // constant = fullIdent | ( [ "-" | "+" ] intLit ) | ( [ "-" | "+" ] floatLit ) | strLit | boolLit + constant: $ => choice( + $.full_ident, + seq( + optional(choice('-', '+')), + $.int_lit, + ), + seq( + optional(choice('-', '+')), + $.float_lit, + ), + $.string, + $.bool, + + // block_lit is not specified but is used in the real world + // (i.e. grpc-gateway) so we define it + $.block_lit, + ), + + // block_lit is completely unspecified. I determined what is allowed + // based on the "a bit of everything" grpc-gateway example which has + // wildly inconsistent syntax and yet it actually parses and compiles + // with protoc. + block_lit: $ => seq( + '{', + repeat(seq( + choice( + $.identifier, + seq('[', $.full_ident, ']'), + ), + optional(':'), + choice( + $.constant, + array_of($.constant), + ), + optional(choice(',', ';')), + )), + '}', + ), + + // identifier = letter { letter | decimalDigit | "_" } + identifier: $ => token(seq( + choice(letter, '_'), + optional(repeat(choice( + letter, + decimal_digit, + '_', + ))), + )), + + // reserved_identifier = \" | ' letter { letter | decimalDigit | "_" } ' | \" + reserved_identifier: $ => token( + choice( + seq( + '"', + letter, + optional(repeat(choice(letter, decimal_digit, '_'))), + '"', + ), + seq( + '\'', + letter, + optional(repeat(choice(letter, decimal_digit, '_'))), + '\'', + ), + seq( + letter, + optional(repeat(choice(letter, decimal_digit, '_'))), + ), + ), + ), + _identifier_or_string: $ => choice($.identifier, $.string), + + // fullIdent = ident { "." ident } + full_ident: $ => seq( + $.identifier, + optional(repeat(seq('.', $.identifier))), + ), + + // boolLit = "true" | "false" + bool: $ => choice($.true, $.false), + true: $ => 'true', + false: $ => 'false', + + // intLit = decimalLit | octalLit | hexLit + int_lit: $ => choice( + $.decimal_lit, + $.octal_lit, + $.hex_lit, + ), + + // decimalLit = ( "1" … "9" ) { decimalDigit } + decimal_lit: $ => token(seq( + /[1-9]/, + repeat(decimal_digit), + )), + + // octalLit = "0" { octalDigit } + octal_lit: $ => token(seq( + '0', + repeat(octal_digit), + )), + + // hexLit = "0" ( "x" | "X" ) hexDigit { hexDigit } + hex_lit: $ => token(seq( + '0', + choice('x', 'X'), + hex_digit, + repeat(hex_digit), + )), + + // floatLit = ( decimals "." [ decimals ] [ exponent ] | decimals exponent | "."decimals [ exponent ] ) | "inf" | "nan" + // decimals = decimalDigit { decimalDigit } + // exponent = ( "e" | "E" ) [ "+" | "-" ] decimals + float_lit: $ => { + const decimals = seq( + decimal_digit, + repeat(decimal_digit), + ); + + const exponent = seq( + choice('e', 'E'), + optional(choice('+', '-')), + decimals, + ); + + return token(choice( + seq( + decimals, + '.', + optional(decimals), + optional(exponent), + ), + seq( + decimals, + exponent, + ), + seq( + '.', + decimals, + optional(exponent), + ), + 'inf', + 'nan', + )); + }, + + string: $ => repeat1( + choice( + seq( + '"', + repeat(choice( + token.immediate(prec(1, /[^"\\]+/)), + $.escape_sequence, + )), + '"', + ), + + seq( + '\'', + repeat(choice( + token.immediate(prec(1, /[^'\\]+/)), + $.escape_sequence, + )), + '\'', + ), + ), + ), + + escape_sequence: $ => token.immediate(seq( + '\\', + choice( + /[^xuU]/, + /\d{2,3}/, + /x[0-9a-fA-F]{2,}/, + /u[0-9a-fA-F]{4}/, + /U[0-9a-fA-F]{8}/, + ), + )), + + comment: $ => token(choice( + seq('//', /.*/), + seq( + '/*', + /[^*]*\*+([^/*][^*]*\*+)*/, + '/', + ), + )), + }, +}); diff --git a/langs/group-pine/proto/def/queries/highlights.scm b/langs/group-pine/proto/def/queries/highlights.scm new file mode 100644 index 00000000..98b4db4a --- /dev/null +++ b/langs/group-pine/proto/def/queries/highlights.scm @@ -0,0 +1,151 @@ +(package + (full_ident + (identifier) @module)) + +(extend + (full_ident + (identifier) @type)) + +(constant + (full_ident + (identifier) @constant)) + +(field + (identifier) @property) + +(map_field + (identifier) @property) + +(oneof + (identifier) @type) + +(oneof_field + (identifier) @property) + +(field_option + (identifier) @property) + +(enum_value_option + (identifier) @property) + +(block_lit + (identifier) @property) + +; Extension option names, e.g. option (foo.bar) = ... +(option + (full_ident + (identifier) @variable)) + +(option + (full_ident + (identifier) + (identifier) @variable.member)) + +[ + "option" + "syntax" + "edition" +] @keyword.directive + +[ + "reserved" + "to" + "max" +] @keyword + +[ + "enum" + "extend" + "extensions" + "group" + "message" + "map" + "oneof" + "service" +] @keyword.type + +"rpc" @keyword.function + +"returns" @keyword.return + +[ + "export" + "local" + "optional" + "repeated" + "required" + "stream" + "weak" + "public" +] @keyword.modifier + +[ + "package" + "import" +] @keyword.import + +[ + (key_type) + (type) +] @type.builtin + +[ + (message_name) + (enum_name) + (service_name) + (message_or_enum_type) +] @type + +(rpc_name) @function.method + +(enum_field + (identifier) @constant) + +(string) @string + +(import + path: (string) @string.special.path) + +[ + "\"proto3\"" + "\"proto2\"" +] @string.special.symbol + +(escape_sequence) @string.escape + +(int_lit) @number + +(float_lit) @number.float + +[ + (true) + (false) +] @boolean + +(comment) @spell + +(comment) @comment + +[ + "(" + ")" + "[" + "]" + "{" + "}" + "<" + ">" +] @punctuation.bracket + +[ + ";" + "," + "." + ":" +] @punctuation.delimiter + +[ + "=" + "-" + "+" +] @operator diff --git a/langs/group-pine/proto/def/queries/injections.scm b/langs/group-pine/proto/def/queries/injections.scm new file mode 100644 index 00000000..2f0e58eb --- /dev/null +++ b/langs/group-pine/proto/def/queries/injections.scm @@ -0,0 +1,2 @@ +((comment) @injection.content + (#set! injection.language "comment")) diff --git a/langs/group-pine/proto/def/samples/addressbook.proto b/langs/group-pine/proto/def/samples/addressbook.proto new file mode 100644 index 00000000..9407b6d6 --- /dev/null +++ b/langs/group-pine/proto/def/samples/addressbook.proto @@ -0,0 +1,33 @@ +syntax = "proto3"; +package tutorial; + +import "google/protobuf/timestamp.proto"; + +option go_package = "github.com/protocolbuffers/protobuf/examples/go/tutorialpb"; + +message Person { + string name = 1; + int32 id = 2; // Unique ID number for this person. + string email = 3; + + message PhoneNumber { + string number = 1; + PhoneType type = 2; + } + + repeated PhoneNumber phones = 4; + + google.protobuf.Timestamp last_updated = 5; +} + +enum PhoneType { + PHONE_TYPE_UNSPECIFIED = 0; + PHONE_TYPE_MOBILE = 1; + PHONE_TYPE_HOME = 2; + PHONE_TYPE_WORK = 3; +} + +// Our address book file is just one of these. +message AddressBook { + repeated Person people = 1; +}