@@ -399,7 +399,7 @@ private RegExpTree parseCharset() {
399399 CharRanges ieExplicits = CharRanges .EMPTY ;
400400 while (pos < limit && pattern .charAt (pos ) != ']' ) {
401401 char ch = pattern .charAt (pos );
402- char start ;
402+ int start ;
403403 if (ch == '\\' ) {
404404 ++pos ;
405405 char possibleGroupName = pattern .charAt (pos );
@@ -414,7 +414,7 @@ private RegExpTree parseCharset() {
414414 start = ch ;
415415 ++pos ;
416416 }
417- char end = start ;
417+ int end = start ;
418418 if (pos + 1 < limit && pattern .charAt (pos ) == '-'
419419 && pattern .charAt (pos + 1 ) != ']' ) {
420420 ++pos ;
@@ -464,15 +464,20 @@ private RegExpTree parseCharset() {
464464 * contexts, so contexts must filter those instead.
465465 * E.g. '\b' means a different thing inside a charset than without.
466466 */
467- private char parseEscapeChar () {
467+ private int parseEscapeChar () {
468468 char ch = pattern .charAt (pos ++);
469469 switch (ch ) {
470470 case 'b' : return '\b' ;
471471 case 'f' : return '\f' ;
472472 case 'n' : return '\n' ;
473473 case 'r' : return '\r' ;
474474 case 't' : return '\t' ;
475- case 'u' : return parseHex (4 );
475+ case 'u' :
476+ if (flags .contains ("u" ) && pos < limit && pattern .charAt (pos ) == '{' ) {
477+ return parseUnicodeEscape ();
478+ } else {
479+ return parseHex (4 );
480+ }
476481 case 'v' : return '\u000b' ;
477482 case 'x' : return parseHex (2 );
478483 default :
@@ -599,7 +604,7 @@ private RegExpTree parseEscape() {
599604 ++pos ;
600605 return new Charset (charGroup , CharRanges .EMPTY );
601606 }
602- return new Text ("" + parseEscapeChar ());
607+ return new Text (new String ( Character . toChars ( parseEscapeChar ()) ));
603608 }
604609 }
605610
@@ -630,6 +635,42 @@ private char parseHex(int n) {
630635 return (char ) result ;
631636 }
632637
638+ private int parseUnicodeEscape () {
639+ checkState (pattern .charAt (pos ) == '{' );
640+ int start = pos ++;
641+ int result = 0 ;
642+ char ch = pattern .charAt (pos );
643+ if (ch == '}' ) {
644+ throw new IllegalArgumentException ("Invalid unicode escape: "
645+ + pattern .substring (start , ++pos ));
646+ }
647+ while (pos < limit ) {
648+ int digit ;
649+ ch = pattern .charAt (pos ++);
650+ if ('0' <= ch && ch <= '9' ) {
651+ digit = ch - '0' ;
652+ } else if ('a' <= ch && ch <= 'f' ) {
653+ digit = ch + (10 - 'a' );
654+ } else if ('A' <= ch && ch <= 'F' ) {
655+ digit = ch + (10 - 'A' );
656+ } else if (ch == '}' ) {
657+ break ;
658+ } else {
659+ throw new IllegalArgumentException ("Invalid character in unicode escape: " + ch );
660+ }
661+ result = (result << 4 ) | digit ;
662+ }
663+ if (ch != '}' ) {
664+ throw new IllegalArgumentException ("Malformed unicode escape: expected '}' after "
665+ + pattern .substring (start , pos ));
666+ }
667+ if (result > 0x10FFFF ) {
668+ throw new IllegalArgumentException ("Unicode must not be greater than 0x10FFFF: "
669+ + pattern .substring (start , pos ));
670+ }
671+ return result ;
672+ }
673+
633674 private boolean isRepetitionStart (char ch ) {
634675 switch (ch ) {
635676 case '?' :
0 commit comments