Skip to content

Commit 00b3da5

Browse files
Merge pull request #29 from PanAeon/feature/unicode_identifiers
broken ids with unicode characters.
2 parents e706a1e + 86f3239 commit 00b3da5

File tree

5 files changed

+233
-24
lines changed

5 files changed

+233
-24
lines changed

src/typescript/Scala.tmLanguage.ts

+22-16
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,20 @@
11
"use strict";
22
import { TmLanguage } from "./TMLanguageModel";
33

4-
const letter = "[_a-zA-Z\\$\\p{Lo}\\p{Lt}\\p{Nl}\\p{Ll}\\p{Lu}]"
4+
const upperLetter = "[A-Z\\p{Lt}\\p{Lu}]"
5+
const lowerLetter = "[_a-z\\$\\p{Lo}\\p{Nl}\\p{Ll}]"
6+
const letter = `[${upperLetter}${lowerLetter}]`
57
const digit = "[0-9]"
68
const letterOrDigit = `${letter}|${digit}`
79
const alphaId = `${letter}+`
810
const simpleInterpolatedVariable = `${letter}(?:${letterOrDigit})*` // see SIP-11 https://docs.scala-lang.org/sips/string-interpolation.html
911
const opchar = `[!#%&*+\\-\\/:<>=?@^|~[\\p{Sm}\\p{So}]]`
10-
const idrest = `${letter}(?:${letterOrDigit})*(?:((?<=_))${opchar}+)?`
12+
const idrest = `${letter}(?:${letterOrDigit})*(?:(?<=_)${opchar}+)?`
13+
const idUpper = `${upperLetter}(?:${letterOrDigit})*(?:(?<=_)${opchar}+)?`
14+
const idLower = `${lowerLetter}(?:${letterOrDigit})*(?:(?<=_)${opchar}+)?`
1115
const plainid = `(?:${idrest}|(?:${opchar})+)`
16+
const backQuotedId = "`[^`]+`"
17+
1218

1319
export const scalaTmLanguage: TmLanguage = {
1420
fileTypes: [
@@ -41,7 +47,7 @@ export const scalaTmLanguage: TmLanguage = {
4147
include: '#comments'
4248
},
4349
{
44-
match: '(`[^`]+`|[_$a-zA-Z][_$a-zA-Z0-9]*)',
50+
match: `(${backQuotedId}|${plainid})`,
4551
name: 'entity.name.import.scala'
4652
},
4753
{
@@ -58,7 +64,7 @@ export const scalaTmLanguage: TmLanguage = {
5864
},
5965
patterns: [
6066
{
61-
match: '(?x) \\s*\n\t\t\t\t (`[^`]+`|[_$a-zA-Z][_$a-zA-Z0-9]*) \\s*\n\t\t\t\t (=>) \\s*\n\t\t\t\t (`[^`]+`|[_$a-zA-Z][_$a-zA-Z0-9]*) \\s*\n\t\t\t ',
67+
match: `(?x)\\s*(${backQuotedId}|${plainid})\\s*(=>)\\s*(${backQuotedId}|${plainid})\\s*`,
6268
captures: {
6369
'1': {
6470
name: 'entity.name.import.renamed-from.scala'
@@ -118,7 +124,7 @@ export const scalaTmLanguage: TmLanguage = {
118124
include: '#block-comments'
119125
},
120126
{
121-
match: '(?x)\n\t\t\t(?! /\\*)\n\t\t\t(?! \\*/)\n\t\t '
127+
match: '(?x)(?! /\\*)(?! \\*/)'
122128
}
123129
],
124130
name: 'comment.block.scala'
@@ -201,7 +207,7 @@ export const scalaTmLanguage: TmLanguage = {
201207
},
202208
'special-identifier': {
203209
match: '\\b[_$a-zA-Z][_$a-zA-Z0-9]*(?:_[^\\t .,;()\\[\\]{}\'"`\\w])',
204-
comment: '\n\t\t Match special scala style identifiers that can end with and underscore and\n\t\t a a not letter such as blank_?. This way the symbol will not be colored\n\t\t differently.\n\t\t '
210+
comment: 'Match special scala style identifiers that can end with and underscore and a a not letter such as blank_?. This way the symbol will not be colored differently.'
205211
},
206212
strings: {
207213
patterns: [
@@ -449,7 +455,7 @@ export const scalaTmLanguage: TmLanguage = {
449455
declarations: {
450456
patterns: [
451457
{
452-
match: '(?x)\n\t\t\t\t\t\t\\b(def)\\s+\n\t\t\t\t\t\t(`[^`]+`|[_$a-zA-Z][_$a-zA-Z0-9]*(?:_[^\\t .,;()\\[\\]{}\'"`\\w])(?=[(\\t ])|[_$a-zA-Z][_$a-zA-Z0-9]*|[-?~><^+*%:!#|/@\\\\]+)',
458+
match: `(?x)\\b(def)\\s+(${backQuotedId}|${plainid})`,
453459
captures: {
454460
'1': {
455461
name: 'keyword.declaration.scala'
@@ -485,7 +491,7 @@ export const scalaTmLanguage: TmLanguage = {
485491
}
486492
},
487493
{
488-
match: '\\b(type)\\s+(`[^`]+`|[_$a-zA-Z][_$a-zA-Z0-9]*(?:_[^\\s])(?=[\\t ])|[_$a-zA-Z][_$a-zA-Z0-9]*|[-?~><^+*%:!#|/@\\\\]+)',
494+
match: `\\b(type)\\s+(${backQuotedId}|${plainid})`,
489495
captures: {
490496
'1': {
491497
name: 'keyword.declaration.scala'
@@ -496,18 +502,19 @@ export const scalaTmLanguage: TmLanguage = {
496502
}
497503
},
498504
{
499-
match: '\\b(val)\\s+(?:([A-Z][_a-zA-Z0-9]*))\\b',
505+
match: `\\b(val)\\s+(${idUpper})\\b`,
500506
captures: {
501507
'1': {
502508
name: 'keyword.declaration.stable.scala'
503-
},
509+
}
510+
,
504511
'2': {
505512
name: 'constant.other.declaration.scala'
506513
}
507514
}
508515
},
509516
{
510-
match: '\\b(?:(val)|(var))\\s+(?:(`[^`]+`|[_$a-zA-Z][_$a-zA-Z0-9]*(?:_[^\\t .,;()\\[\\]{}\'"`\\w])(?=[\\t ])|[_$a-zA-Z][_$a-zA-Z0-9]*|[-?~><^+*%:!#|/@\\\\]+)|(?=\\())',
517+
match: `\\b(?:(val)|(var))\\s+(?:(${backQuotedId}|${plainid})|(?=\\())`,
511518
captures: {
512519
'1': {
513520
name: 'keyword.declaration.stable.scala'
@@ -547,7 +554,7 @@ export const scalaTmLanguage: TmLanguage = {
547554
include: '#comments'
548555
},
549556
{
550-
match: '(`[^`]+`|[_$a-zA-Z][_$a-zA-Z0-9]*)',
557+
match: `(${backQuotedId}|${plainid})`,
551558
name: 'entity.name.package.scala'
552559
},
553560
{
@@ -812,16 +819,15 @@ export const scalaTmLanguage: TmLanguage = {
812819
'parameter-list': {
813820
patterns: [
814821
{
815-
match: '(?<=[^\\._$a-zA-Z0-9])(`[^`]+`|[_$a-z][_$a-zA-Z0-9]*(?:_[^\\s])(?=[\\t ])|[_$a-z][_$a-zA-Z0-9]*|[-?~><^+*%:!#|/@\\\\]+)\\s*(:)\\s+',
822+
match: `(?<=[^\\._$a-zA-Z0-9])(${backQuotedId}|${idLower})\\s*(:)\\s+`,
816823
captures: {
817824
'1': {
818825
name: 'variable.parameter.scala'
819826
},
820827
'2': {
821828
name: 'meta.colon.scala'
822829
}
823-
},
824-
comment: 'We do not match param names that start with a Capitol letter'
830+
}
825831
}
826832
]
827833
},
@@ -852,7 +858,7 @@ export const scalaTmLanguage: TmLanguage = {
852858
include: '#xml-embedded-content'
853859
}
854860
],
855-
comment: 'We do not allow a tag name to start with a - since this would\n\t\t\t\t likely conflict with the <- operator. This is not very common\n\t\t\t\t for tag names anyway. Also code such as -- if (val <val2 || val> val3)\n\t\t\t\t will falsly be recognized as an xml tag. The solution is to put a\n\t\t\t\t space on either side of the comparison operator',
861+
comment: 'We do not allow a tag name to start with a - since this would likely conflict with the <- operator. This is not very common for tag names anyway. Also code such as -- if (val <val2 || val> val3) will falsly be recognized as an xml tag. The solution is to put a space on either side of the comparison operator',
856862
endCaptures: {
857863
'1': {
858864
name: 'punctuation.definition.tag.xml'

0 commit comments

Comments
 (0)