-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathScanner.hs
240 lines (214 loc) · 5.86 KB
/
Scanner.hs
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE NoImplicitPrelude #-}
module Scanner(LoxTok(..), LoxTokInfo(..), scanner) where
import Import hiding (many, (<|>), try)
import Data.Text as T
import Data.Char
import Text.Parsec.String as PS
import Text.Parsec.Char as PC
import Text.Parsec
import RIO.Partial (read)
data LoxObject = JString | JDouble
deriving (Show, Eq)
data LoxTok
= -- Single-character tokens.
LEFT_PAREN
| RIGHT_PAREN
| LEFT_BRACE
| RIGHT_BRACE
| COMMA
| DOT
| MINUS
| PLUS
| SEMICOLON
| SLASH
| STAR
| -- One or two character tokens.
BANG
| BANG_EQUAL
| EQUAL
| EQUAL_EQUAL
| GREATER
| GREATER_EQUAL
| LESS
| LESS_EQUAL
| -- Literals.
IDENTIFIER String
| STRING String
| NUMBER Double
| COMMENT Text
-- Keywords.
| AND
| CLASS
| ELSE
| FALSE
| FUN
| FOR
| IF
| NIL
| OR
| PRINT
| RETURN
| SUPER
| THIS
| TRUE
| VAR
| WHILE
| WHITESPACE
| EOF
deriving (Show, Eq)
data LoxTokInfo = LoxTokInfo
{ tokinfo_type :: LoxTok,
tokinfo_lexeme :: Maybe T.Text,
tokinfo_literal :: Maybe LoxObject,
tok_position :: SourcePos
}
deriving (Show, Eq)
tokenShow :: LoxTokInfo -> String
tokenShow t = "LoxTok=" ++ show (tokinfo_type t)
type LoxScannerResult = Either ParseError [LoxTokInfo]
-- type LoxScanner = Parsec String () [LoxTok]
whitespace :: Parser ()
whitespace = void $ many $ oneOf " \n\t"
-- whitespaceToken1 :: Parser LoxTokInfo
-- whitespaceToken1 = do
-- source_pos <- getPosition
-- return $ LoxTokInfo WHITESPACE Nothing Nothing source_pos
whitespaceToken :: Parser LoxTokInfo
charMapping :: [(LoxTok, Char)]
charMapping =
[ (LEFT_PAREN, '('),
(RIGHT_PAREN, ')'),
(LEFT_BRACE, '{'),
(RIGHT_BRACE, '}'),
(COMMA, ','),
(DOT, '.'),
(MINUS, '-'),
(PLUS, '+'),
(SEMICOLON, ';'),
(SLASH, '/'),
(STAR, '*'),
(BANG, '!'),
(EQUAL, '='),
(GREATER, '>'),
(LESS, '<')
]
scanSingleCharToken :: Parser LoxTokInfo
scanSingleCharToken = do
source_pos <- getPosition
sel <- choice $ build <$> charMapping
return $ LoxTokInfo sel Nothing Nothing source_pos
where
build :: (LoxTok, Char) -> Parser LoxTok
build (x, y) = x <$ char y <* whitespace
doubleCharMapping :: [(LoxTok, String)]
doubleCharMapping =
[ (BANG_EQUAL, "!="),
(EQUAL_EQUAL, "=="),
(GREATER_EQUAL, ">="),
(LESS_EQUAL, "<=")
]
scanDoubleToken :: Parser LoxTokInfo
scanDoubleToken = do
source_pos <- getPosition
sel <- choice $ build <$> doubleCharMapping
return $ LoxTokInfo sel Nothing Nothing source_pos
where
build :: (LoxTok, String) -> Parser LoxTok
build (x, y) = x <$ string y <* whitespace
keywordMapping :: [(LoxTok, String)]
keywordMapping =
[ (AND, "and"),
(CLASS, "class"),
(ELSE, "else"),
(FALSE, "false"),
(FUN, "fun"),
(FOR, "for"),
(IF, "if"),
(NIL, "nil"),
(OR, "or"),
(PRINT, "print"),
(RETURN, "return"),
(SUPER, "super"),
(THIS, "this"),
(TRUE, "true"),
(VAR, "var"),
(WHILE, "while")
]
scanKeywordToken :: Parser LoxTokInfo
scanKeywordToken = do
source_pos <- getPosition
sel <- choice $ build <$> keywordMapping
return $ LoxTokInfo sel Nothing Nothing source_pos
where
build :: (LoxTok, String) -> Parser LoxTok
build (x, y) = x <$ string y <* whitespace
whitespaceToken = do
source_pos <- getPosition
_ <- many1 $ char ' '
return $ LoxTokInfo WHITESPACE Nothing Nothing source_pos
scanDouble :: Parser LoxTokInfo
scanDouble = do
source_pos <- getPosition
let la = lookAhead (whitespaceToken <|> scanSingleCharToken)
sel <- do
firstPart <- Text.Parsec.many1 digit
try (secondCharacter firstPart <* la <* whitespace) <|> NUMBER (read firstPart) <$ la <* whitespace
return $ LoxTokInfo sel Nothing Nothing source_pos
where
secondCharacter :: String -> Parser LoxTok
secondCharacter firstPart = do
void $ char '.'
secondPart <- Text.Parsec.many1 digit <* whitespace
return $ NUMBER $ read $ Import.concat [firstPart, ".", secondPart]
-- -- https://stackoverflow.com/questions/24106314/parser-for-quoted-string-using-parsec
escape :: Parser String
escape = do
d <- char '\\'
c <- oneOf "\\\"0nrvtbf" -- all the characters which can be escaped
return [d, c]
nonEscape :: Parser Char
nonEscape = noneOf "\\\"\0\n\r\v\t\b\f"
character :: Parser String
character = fmap return nonEscape <|> escape
scanQuotedString :: Parser LoxTokInfo
scanQuotedString = do
source_pos <- getPosition
qstring <- char '"' *> many character <* char '"' <* whitespace
return $ LoxTokInfo (STRING $ Import.concat qstring) Nothing Nothing source_pos
-- -- http://jakewheat.github.io/intro_to_parsing/#_var
var :: Parser String
var = do
fc <- firstChar
rest <- many nonFirstChar
return (fc : rest)
where
firstChar = satisfy (\a -> isLetter a || a == '_')
nonFirstChar = satisfy (\a -> isDigit a || isLetter a || a == '_')
checkIfIdentifier :: Parser LoxTokInfo
checkIfIdentifier = do
source_pos <- getPosition
s <- var <* whitespace
result ([(x, y) | (x, y) <- keywordMapping, y == s]) s source_pos
where
result xs s source_pos = do
case xs of
[] -> return $ LoxTokInfo (IDENTIFIER s) Nothing Nothing source_pos
(x, _) : _ -> return $ LoxTokInfo x Nothing Nothing source_pos
scanComment :: Parser LoxTokInfo
scanComment = do
source_pos <- getPosition
_ <- string "//"
-- TODO: Find a better way to do this, scanning this more than once is not desirable
comment <- try (manyTill anyToken (try (oneOf "\n"))) <|> manyTill anyToken eof
return $ LoxTokInfo (COMMENT (T.pack comment)) Nothing Nothing source_pos
scanToken :: Parser LoxTokInfo
scanToken =
try scanComment
<|> try scanDoubleToken
<|> try scanSingleCharToken
<|> try scanQuotedString
<|> scanDouble
<|> checkIfIdentifier
scanner :: String -> LoxScannerResult
scanner = parse (many scanToken <* eof) ""