1
+ # encoding: utf-8
1
2
require 'ebnf/ll1/lexer'
2
3
3
4
module RDF ::Turtle
4
5
module Terminals
5
6
# Definitions of token regular expressions used for lexical analysis
6
-
7
7
##
8
8
# Unicode regular expressions for Ruby 1.9+ with the Oniguruma engine.
9
9
U_CHARS1 = Regexp . compile ( <<-EOS . gsub ( /\s +/ , '' ) )
@@ -12,66 +12,66 @@ module Terminals
12
12
[\\ u2070-\\ u218F]|[\\ u2C00-\\ u2FEF]|[\\ u3001-\\ uD7FF]|
13
13
[\\ uF900-\\ uFDCF]|[\\ uFDF0-\\ uFFFD]|[\\ u{10000}-\\ u{EFFFF}]
14
14
EOS
15
- U_CHARS2 = Regexp . compile ( "\\ u00B7|[\\ u0300-\\ u036F]|[\\ u203F-\\ u2040]" ) . freeze
16
- IRI_RANGE = Regexp . compile ( "[[^<>\" {}|^`\\ \\ ]&&[^\\ x00-\\ x20]]" ) . freeze
15
+ U_CHARS2 = Regexp . compile ( "\\ u00B7|[\\ u0300-\\ u036F]|[\\ u203F-\\ u2040]" , Regexp :: FIXEDENCODING ) . freeze
16
+ IRI_RANGE = Regexp . compile ( "[[^<>\" {}|^`\\ \\ ]&&[^\\ x00-\\ x20]]" , Regexp :: FIXEDENCODING ) . freeze
17
17
18
18
# 26
19
19
UCHAR = EBNF ::LL1 ::Lexer ::UCHAR
20
20
# 170s
21
- PERCENT = /%[0-9A-Fa-f]{2}/ . freeze
21
+ PERCENT = /%[0-9A-Fa-f]{2}/u . freeze
22
22
# 172s
23
- PN_LOCAL_ESC = /\\ [_~\. \- \! $\& '\( \) \* \+ ,;=\/ \? \# @%]/ . freeze
23
+ PN_LOCAL_ESC = /\\ [_~\. \- \! $\& '\( \) \* \+ ,;=\/ \? \# @%]/u . freeze
24
24
# 169s
25
- PLX = /#{ PERCENT } |#{ PN_LOCAL_ESC } / . freeze . freeze
25
+ PLX = /#{ PERCENT } |#{ PN_LOCAL_ESC } /u . freeze
26
26
# 163s
27
- PN_CHARS_BASE = /[A-Z]|[a-z]|#{ U_CHARS1 } / . freeze
27
+ PN_CHARS_BASE = /[A-Z]|[a-z]|#{ U_CHARS1 } /u . freeze
28
28
# 164s
29
- PN_CHARS_U = /_|#{ PN_CHARS_BASE } / . freeze
29
+ PN_CHARS_U = /_|#{ PN_CHARS_BASE } /u . freeze
30
30
# 166s
31
- PN_CHARS = /-|[0-9]|#{ PN_CHARS_U } |#{ U_CHARS2 } / . freeze
32
- PN_LOCAL_BODY = /(?:(?:\. |:|#{ PN_CHARS } |#{ PLX } )*(?:#{ PN_CHARS } |:|#{ PLX } ))?/ . freeze
33
- PN_CHARS_BODY = /(?:(?:\. |#{ PN_CHARS } )*#{ PN_CHARS } )?/ . freeze
31
+ PN_CHARS = /-|[0-9]|#{ PN_CHARS_U } |#{ U_CHARS2 } /u . freeze
32
+ PN_LOCAL_BODY = /(?:(?:\. |:|#{ PN_CHARS } |#{ PLX } )*(?:#{ PN_CHARS } |:|#{ PLX } ))?/u . freeze
33
+ PN_CHARS_BODY = /(?:(?:\. |#{ PN_CHARS } )*#{ PN_CHARS } )?/u . freeze
34
34
# 167s
35
- PN_PREFIX = /#{ PN_CHARS_BASE } #{ PN_CHARS_BODY } / . freeze
35
+ PN_PREFIX = /#{ PN_CHARS_BASE } #{ PN_CHARS_BODY } /u . freeze
36
36
# 168s
37
- PN_LOCAL = /(?:[0-9]|:|#{ PN_CHARS_U } |#{ PLX } )#{ PN_LOCAL_BODY } / . freeze
37
+ PN_LOCAL = /(?:[0-9]|:|#{ PN_CHARS_U } |#{ PLX } )#{ PN_LOCAL_BODY } /u . freeze
38
38
# 154s
39
- EXPONENT = /[eE][+-]?[0-9]+/
39
+ EXPONENT = /[eE][+-]?[0-9]+/u . freeze
40
40
# 159s
41
- ECHAR = /\\ [tbnrf\\ "']/
41
+ ECHAR = /\\ [tbnrf\\ "']/u . freeze
42
42
# 18
43
- IRIREF = /<(?:#{ IRI_RANGE } |#{ UCHAR } )*>/ . freeze
43
+ IRIREF = /<(?:#{ IRI_RANGE } |#{ UCHAR } )*>/u . freeze
44
44
# 139s
45
- PNAME_NS = /#{ PN_PREFIX } ?:/ . freeze
45
+ PNAME_NS = /#{ PN_PREFIX } ?:/u . freeze
46
46
# 140s
47
- PNAME_LN = /#{ PNAME_NS } #{ PN_LOCAL } / . freeze
47
+ PNAME_LN = /#{ PNAME_NS } #{ PN_LOCAL } /u . freeze
48
48
# 141s
49
- BLANK_NODE_LABEL = /_:(?:[0-9]|#{ PN_CHARS_U } )(?:(?:#{ PN_CHARS } |\. )*#{ PN_CHARS } )?/ . freeze
49
+ BLANK_NODE_LABEL = /_:(?:[0-9]|#{ PN_CHARS_U } )(?:(?:#{ PN_CHARS } |\. )*#{ PN_CHARS } )?/u . freeze
50
50
# 144s
51
- LANGTAG = /@[a-zA-Z]+(?:-[a-zA-Z0-9]+)*/ . freeze
51
+ LANGTAG = /@[a-zA-Z]+(?:-[a-zA-Z0-9]+)*/u . freeze
52
52
# 19
53
- INTEGER = /[+-]?[0-9]+/ . freeze
53
+ INTEGER = /[+-]?[0-9]+/u . freeze
54
54
# 20
55
- DECIMAL = /[+-]?(?:[0-9]*\. [0-9]+)/ . freeze
55
+ DECIMAL = /[+-]?(?:[0-9]*\. [0-9]+)/u . freeze
56
56
# 21
57
- DOUBLE = /[+-]?(?:[0-9]+\. [0-9]*#{ EXPONENT } |\. ?[0-9]+#{ EXPONENT } )/ . freeze
57
+ DOUBLE = /[+-]?(?:[0-9]+\. [0-9]*#{ EXPONENT } |\. ?[0-9]+#{ EXPONENT } )/u . freeze
58
58
# 22
59
- STRING_LITERAL_SINGLE_QUOTE = /'(?:[^\' \\ \n \r ]|#{ ECHAR } |#{ UCHAR } )*'/ . freeze
59
+ STRING_LITERAL_SINGLE_QUOTE = /'(?:[^\' \\ \n \r ]|#{ ECHAR } |#{ UCHAR } )*'/u . freeze
60
60
# 23
61
- STRING_LITERAL_QUOTE = /"(?:[^\" \\ \n \r ]|#{ ECHAR } |#{ UCHAR } )*"/ . freeze
61
+ STRING_LITERAL_QUOTE = /"(?:[^\" \\ \n \r ]|#{ ECHAR } |#{ UCHAR } )*"/u . freeze
62
62
# 24
63
- STRING_LITERAL_LONG_SINGLE_QUOTE = /'''(?:(?:'|'')?(?:[^'\\ ]|#{ ECHAR } |#{ UCHAR } ))*'''/m . freeze
63
+ STRING_LITERAL_LONG_SINGLE_QUOTE = /'''(?:(?:'|'')?(?:[^'\\ ]|#{ ECHAR } |#{ UCHAR } ))*'''/um . freeze
64
64
# 25
65
- STRING_LITERAL_LONG_QUOTE = /"""(?:(?:"|"")?(?:[^"\\ ]|#{ ECHAR } |#{ UCHAR } ))*"""/m . freeze
65
+ STRING_LITERAL_LONG_QUOTE = /"""(?:(?:"|"")?(?:[^"\\ ]|#{ ECHAR } |#{ UCHAR } ))*"""/um . freeze
66
66
67
67
# 161s
68
- WS = /(?:\s |(?:#[^\n \r ]*))+/m . freeze
68
+ WS = /(?:\s |(?:#[^\n \r ]*))+/um . freeze
69
69
# 162s
70
- ANON = /\[ #{ WS } *\] /m . freeze
70
+ ANON = /\[ #{ WS } *\] /um . freeze
71
71
# 28t
72
- PREFIX = /@?prefix/i . freeze
72
+ PREFIX = /@?prefix/ui . freeze
73
73
# 29t
74
- BASE = /@?base/i . freeze
74
+ BASE = /@?base/ui . freeze
75
75
76
76
end
77
77
end
0 commit comments