-
-
Notifications
You must be signed in to change notification settings - Fork 433
/
Copy pathcustom_lexer.py
56 lines (40 loc) · 1.3 KB
/
custom_lexer.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
#
# This example demonstrates using Lark with a custom lexer.
#
# You can use a custom lexer to tokenize text when the lexers offered by Lark
# are too slow, or not flexible enough.
#
# You can also use it (as shown in this example) to tokenize streams of objects.
#
from lark import Lark, Transformer, v_args
from lark.lexer import Lexer, Token
class TypeLexer(Lexer):
def __init__(self, lexer_conf):
pass
def lex(self, data):
for obj in data:
if isinstance(obj, int):
yield Token('INT', obj)
elif isinstance(obj, (type(''), type(u''))):
yield Token('STR', obj)
else:
raise TypeError(obj)
parser = Lark("""
start: data_item+
data_item: STR INT*
%declare STR INT
""", parser='lalr', lexer=TypeLexer)
class ParseToDict(Transformer):
@v_args(inline=True)
def data_item(self, name, *numbers):
return name.value, [n.value for n in numbers]
start = dict
def test():
data = ['alice', 1, 27, 3, 'bob', 4, 'carrie', 'dan', 8, 6]
print(data)
tree = parser.parse(data)
res = ParseToDict().transform(tree)
print('-->')
print(res) # prints {'alice': [1, 27, 3], 'bob': [4], 'carrie': [], 'dan': [8, 6]}
if __name__ == '__main__':
test()