forked from haskell/alex
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathMain.hs
529 lines (461 loc) · 18.9 KB
/
Main.hs
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
{-# LANGUAGE CPP #-}
-- -----------------------------------------------------------------------------
--
-- Main.hs, part of Alex
--
-- (c) Chris Dornan 1995-2000, Simon Marlow 2003
--
-- ----------------------------------------------------------------------------}
module Main (main) where
import AbsSyn
import CharSet
import DFA
import DFAMin
import NFA
import Info
import Map ( Map )
import qualified Map hiding ( Map )
import Output
import ParseMonad ( runP )
import Parser
import Scan
import Util ( hline )
import Paths_alex ( version, getDataDir )
#if __GLASGOW_HASKELL__ < 610
import Control.Exception as Exception ( block, unblock, catch, throw )
#endif
#if __GLASGOW_HASKELL__ >= 610
import Control.Exception ( bracketOnError )
#endif
import Control.Monad ( when, liftM )
import Data.Char ( chr )
import Data.List ( isSuffixOf, nub )
import qualified Data.List as List
import Data.Maybe ( isJust, fromJust )
import Data.Version ( showVersion )
import System.Console.GetOpt ( getOpt, usageInfo, ArgOrder(..), OptDescr(..), ArgDescr(..) )
import System.Directory ( removeFile )
import System.Environment ( getProgName, getArgs )
import System.Exit ( ExitCode(..), exitWith )
import System.IO ( stderr, Handle, IOMode(..), openFile, hClose, hPutStr, hPutStrLn )
#if __GLASGOW_HASKELL__ >= 612
import System.IO ( hGetContents, hSetEncoding, utf8 )
#endif
-- We need to force every file we open to be read in
-- as UTF8
alexReadFile :: FilePath -> IO String
#if __GLASGOW_HASKELL__ >= 612
alexReadFile file = do
h <- alexOpenFile file ReadMode
hGetContents h
#else
alexReadFile = readFile
#endif
-- We need to force every file we write to be written
-- to as UTF8
alexOpenFile :: FilePath -> IOMode -> IO Handle
#if __GLASGOW_HASKELL__ >= 612
alexOpenFile file mode = do
h <- openFile file mode
hSetEncoding h utf8
return h
#else
alexOpenFile = openFile
#endif
-- `main' decodes the command line arguments and calls `alex'.
main:: IO ()
main = do
args <- getArgs
case getOpt Permute argInfo args of
(cli,_,[]) | DumpHelp `elem` cli -> do
prog <- getProgramName
bye (usageInfo (usageHeader prog) argInfo)
(cli,_,[]) | DumpVersion `elem` cli ->
bye copyright
(cli,[file],[]) ->
runAlex cli file
(_,_,errors) -> do
prog <- getProgramName
die (concat errors ++ usageInfo (usageHeader prog) argInfo)
projectVersion :: String
projectVersion = showVersion version
copyright :: String
copyright = "Alex version " ++ projectVersion ++ ", (c) 2003 Chris Dornan and Simon Marlow\n"
usageHeader :: String -> String
usageHeader prog = "Usage: " ++ prog ++ " [OPTION...] file\n"
runAlex :: [CLIFlags] -> FilePath -> IO ()
runAlex cli file = do
basename <- case (reverse file) of
'x':'.':r -> return (reverse r)
_ -> die (file ++ ": filename must end in \'.x\'\n")
prg <- alexReadFile file
script <- parseScript file prg
alex cli file basename script
parseScript :: FilePath -> String
-> IO (Maybe (AlexPosn,Code), [Directive], Scanner, Maybe (AlexPosn,Code))
parseScript file prg =
case runP prg initialParserEnv parse of
Left (Just (AlexPn _ line col),err) ->
die (file ++ ":" ++ show line ++ ":" ++ show col
++ ": " ++ err ++ "\n")
Left (Nothing, err) ->
die (file ++ ": " ++ err ++ "\n")
Right script -> return script
alex :: [CLIFlags]
-> FilePath
-> FilePath
-> (Maybe (AlexPosn, Code), [Directive], Scanner, Maybe (AlexPosn, Code))
-> IO ()
alex cli file basename script = do
(put_info, finish_info) <-
case [ f | OptInfoFile f <- cli ] of
[] -> return (\_ -> return (), return ())
[Nothing] -> infoStart file (basename ++ ".info")
[Just f] -> infoStart file f
_ -> dieAlex "multiple -i/--info options"
o_file <- case [ f | OptOutputFile f <- cli ] of
[] -> return (basename ++ ".hs")
[f] -> return f
_ -> dieAlex "multiple -o/--outfile options"
tab_size <- case [ s | OptTabSize s <- cli ] of
[] -> return (8 :: Int)
[s] -> case reads s of
[(n,"")] -> return n
_ -> dieAlex "-s/--tab-size option is not a valid integer"
_ -> dieAlex "multiple -s/--tab-size options"
let target
| OptGhcTarget `elem` cli = GhcTarget
| otherwise = HaskellTarget
let encodingsCli
| OptLatin1 `elem` cli = [Latin1]
| otherwise = []
template_dir <- templateDir getDataDir cli
let maybe_header, maybe_footer :: Maybe (AlexPosn, Code)
directives :: [Directive]
scanner1 :: Scanner
(maybe_header, directives, scanner1, maybe_footer) = script
scheme <- getScheme directives
-- open the output file; remove it if we encounter an error
bracketOnError
(alexOpenFile o_file WriteMode)
(\h -> do hClose h; removeFile o_file)
$ \out_h -> do
let wrapper_name :: Maybe FilePath
scanner2, scanner_final :: Scanner
scs :: [StartCode]
sc_hdr, actions :: ShowS
encodingsScript :: [Encoding]
wrapper_name = wrapperFile template_dir scheme
(scanner2, scs, sc_hdr) = encodeStartCodes scanner1
(scanner_final, actions) = extractActions scheme scanner2
encodingsScript = [ e | EncodingDirective e <- directives ]
encoding <- case nub (encodingsCli ++ encodingsScript) of
[] -> return UTF8 -- default
[e] -> return e
_ | null encodingsCli -> dieAlex "conflicting %encoding directives"
| otherwise -> dieAlex "--latin1 flag conflicts with %encoding directive"
hPutStr out_h (optsToInject target cli)
injectCode maybe_header file out_h
hPutStr out_h (importsToInject target cli)
-- add the wrapper, if necessary
when (isJust wrapper_name) $
do str <- alexReadFile (fromJust wrapper_name)
hPutStr out_h str
-- Inject the tab size
hPutStrLn out_h $ "alex_tab_size :: Int"
hPutStrLn out_h $ "alex_tab_size = " ++ show (tab_size :: Int)
let dfa = scanner2dfa encoding scanner_final scs
min_dfa = minimizeDFA dfa
nm = scannerName scanner_final
usespreds = usesPreds min_dfa
put_info "\nStart codes\n"
put_info (show $ scs)
put_info "\nScanner\n"
put_info (show $ scanner_final)
put_info "\nNFA\n"
put_info (show $ scanner2nfa encoding scanner_final scs)
put_info "\nDFA"
put_info (infoDFA 1 nm dfa "")
put_info "\nMinimized DFA"
put_info (infoDFA 1 nm min_dfa "")
hPutStr out_h (outputDFA target 1 nm scheme min_dfa "")
injectCode maybe_footer file out_h
hPutStr out_h (sc_hdr "")
hPutStr out_h (actions "")
-- add the template
let template_name = templateFile template_dir target encoding usespreds cli
tmplt <- alexReadFile template_name
hPutStr out_h tmplt
hClose out_h
finish_info
getScheme :: [Directive] -> IO Scheme
getScheme directives =
do
token <- case [ ty | TokenType ty <- directives ] of
[] -> return Nothing
[res] -> return (Just res)
_ -> dieAlex "multiple %token directives"
action <- case [ ty | ActionType ty <- directives ] of
[] -> return Nothing
[res] -> return (Just res)
_ -> dieAlex "multiple %action directives"
typeclass <- case [ tyclass | TypeClass tyclass <- directives ] of
[] -> return Nothing
[res] -> return (Just res)
_ -> dieAlex "multiple %typeclass directives"
case [ f | WrapperDirective f <- directives ] of
[] ->
case (typeclass, token, action) of
(Nothing, Nothing, Nothing) ->
return Default { defaultTypeInfo = Nothing }
(Nothing, Nothing, Just actionty) ->
return Default { defaultTypeInfo = Just (Nothing, actionty) }
(Just _, Nothing, Just actionty) ->
return Default { defaultTypeInfo = Just (typeclass, actionty) }
(_, Just _, _) ->
dieAlex "%token directive only allowed with a wrapper"
(Just _, Nothing, Nothing) ->
dieAlex "%typeclass directive without %token directive"
[single]
| single == "gscan" ->
case (typeclass, token, action) of
(Nothing, Nothing, Nothing) ->
return GScan { gscanTypeInfo = Nothing }
(Nothing, Just tokenty, Nothing) ->
return GScan { gscanTypeInfo = Just (Nothing, tokenty) }
(Just _, Just tokenty, Nothing) ->
return GScan { gscanTypeInfo = Just (typeclass, tokenty) }
(_, _, Just _) ->
dieAlex "%action directive not allowed with a wrapper"
(Just _, Nothing, Nothing) ->
dieAlex "%typeclass directive without %token directive"
| single == "basic" || single == "basic-bytestring" ||
single == "strict-bytestring" ->
let
strty = case single of
"basic" -> Str
"basic-bytestring" -> Lazy
"strict-bytestring" -> Strict
_ -> error "Impossible case"
in case (typeclass, token, action) of
(Nothing, Nothing, Nothing) ->
return Basic { basicStrType = strty,
basicTypeInfo = Nothing }
(Nothing, Just tokenty, Nothing) ->
return Basic { basicStrType = strty,
basicTypeInfo = Just (Nothing, tokenty) }
(Just _, Just tokenty, Nothing) ->
return Basic { basicStrType = strty,
basicTypeInfo = Just (typeclass, tokenty) }
(_, _, Just _) ->
dieAlex "%action directive not allowed with a wrapper"
(Just _, Nothing, Nothing) ->
dieAlex "%typeclass directive without %token directive"
| single == "posn" || single == "posn-bytestring" ->
let
isByteString = single == "posn-bytestring"
in case (typeclass, token, action) of
(Nothing, Nothing, Nothing) ->
return Posn { posnByteString = isByteString,
posnTypeInfo = Nothing }
(Nothing, Just tokenty, Nothing) ->
return Posn { posnByteString = isByteString,
posnTypeInfo = Just (Nothing, tokenty) }
(Just _, Just tokenty, Nothing) ->
return Posn { posnByteString = isByteString,
posnTypeInfo = Just (typeclass, tokenty) }
(_, _, Just _) ->
dieAlex "%action directive not allowed with a wrapper"
(Just _, Nothing, Nothing) ->
dieAlex "%typeclass directive without %token directive"
| single == "monad" || single == "monad-bytestring" ||
single == "monadUserState" ||
single == "monadUserState-bytestring" ->
let
isByteString = single == "monad-bytestring" ||
single == "monadUserState-bytestring"
userState = single == "monadUserState" ||
single == "monadUserState-bytestring"
in case (typeclass, token, action) of
(Nothing, Nothing, Nothing) ->
return Monad { monadByteString = isByteString,
monadUserState = userState,
monadTypeInfo = Nothing }
(Nothing, Just tokenty, Nothing) ->
return Monad { monadByteString = isByteString,
monadUserState = userState,
monadTypeInfo = Just (Nothing, tokenty) }
(Just _, Just tokenty, Nothing) ->
return Monad { monadByteString = isByteString,
monadUserState = userState,
monadTypeInfo = Just (typeclass, tokenty) }
(_, _, Just _) ->
dieAlex "%action directive not allowed with a wrapper"
(Just _, Nothing, Nothing) ->
dieAlex "%typeclass directive without %token directive"
| otherwise -> dieAlex ("unknown wrapper type " ++ single)
_many -> dieAlex "multiple %wrapper directives"
-- inject some code, and add a {-# LINE #-} pragma at the top
injectCode :: Maybe (AlexPosn,Code) -> FilePath -> Handle -> IO ()
injectCode Nothing _ _ = return ()
injectCode (Just (AlexPn _ ln _,code)) filename hdl = do
hPutStrLn hdl ("{-# LINE " ++ show ln ++ " \"" ++ filename ++ "\" #-}")
hPutStrLn hdl code
optsToInject :: Target -> [CLIFlags] -> String
optsToInject GhcTarget _ = optNoWarnings ++ "{-# LANGUAGE CPP,MagicHash #-}\n"
optsToInject _ _ = optNoWarnings ++ "{-# LANGUAGE CPP #-}\n"
optNoWarnings :: String
optNoWarnings = "{-# OPTIONS_GHC -fno-warn-unused-binds -fno-warn-missing-signatures #-}\n"
importsToInject :: Target -> [CLIFlags] -> String
importsToInject _ cli = always_imports ++ debug_imports ++ glaexts_import
where
glaexts_import | OptGhcTarget `elem` cli = import_glaexts
| otherwise = ""
debug_imports | OptDebugParser `elem` cli = import_debug
| otherwise = ""
-- CPP is turned on for -fglasogw-exts, so we can use conditional
-- compilation. We need to #include "config.h" to get hold of
-- WORDS_BIGENDIAN (see GenericTemplate.hs).
always_imports :: String
always_imports = "#if __GLASGOW_HASKELL__ >= 603\n" ++
"#include \"ghcconfig.h\"\n" ++
"#elif defined(__GLASGOW_HASKELL__)\n" ++
"#include \"config.h\"\n" ++
"#endif\n" ++
"#if __GLASGOW_HASKELL__ >= 503\n" ++
"import Data.Array\n" ++
"#else\n" ++
"import Array\n" ++
"#endif\n"
import_glaexts :: String
import_glaexts = "#if __GLASGOW_HASKELL__ >= 503\n" ++
"import Data.Array.Base (unsafeAt)\n" ++
"import GHC.Exts\n" ++
"#else\n" ++
"import GlaExts\n" ++
"#endif\n"
import_debug :: String
import_debug = "#if __GLASGOW_HASKELL__ >= 503\n" ++
"import System.IO\n" ++
"import System.IO.Unsafe\n" ++
"import Debug.Trace\n" ++
"#else\n" ++
"import IO\n" ++
"import IOExts\n" ++
"#endif\n"
templateDir :: IO FilePath -> [CLIFlags] -> IO FilePath
templateDir def cli
= case [ d | OptTemplateDir d <- cli ] of
[] -> def
ds -> return (last ds)
-- Keep this function in sync with its twin in gen-alex-sdist/Main.hs.
templateFileName :: Bool -> Bool -> Bool -> Bool -> FilePath
templateFileName ghc latin1 nopred debug =
List.intercalate "-" $ concat
[ [ "AlexTemplate" ]
, [ "ghc" | ghc ]
, [ "latin1" | latin1 ]
, [ "nopred" | nopred ]
, [ "debug" | debug ]
]
templateFile :: FilePath -> Target -> Encoding -> UsesPreds -> [CLIFlags] -> FilePath
templateFile dir target encoding usespreds cli = concat
[ dir
, "/"
, templateFileName
(target == GhcTarget)
(encoding == Latin1)
(usespreds == DoesntUsePreds)
(OptDebugParser `elem` cli)
]
wrapperFile :: FilePath -> Scheme -> Maybe FilePath
wrapperFile dir scheme =
do
f <- wrapperName scheme
return (dir ++ "/AlexWrapper-" ++ f)
infoStart :: FilePath -> FilePath -> IO (String -> IO (), IO ())
infoStart x_file info_file = do
bracketOnError
(alexOpenFile info_file WriteMode)
(\h -> do hClose h; removeFile info_file)
(\h -> do infoHeader h x_file
return (hPutStr h, hClose h)
)
infoHeader :: Handle -> FilePath -> IO ()
infoHeader h file = do
-- hSetBuffering h NoBuffering
hPutStrLn h ("Info file produced by Alex version " ++ projectVersion ++
", from " ++ file)
hPutStrLn h hline
hPutStr h "\n"
initialParserEnv :: (Map String CharSet, Map String RExp)
initialParserEnv = (initSetEnv, initREEnv)
initSetEnv :: Map String CharSet
initSetEnv = Map.fromList [("white", charSet " \t\n\v\f\r"),
("printable", charSetRange (chr 32) (chr 0x10FFFF)), -- FIXME: Look it up the unicode standard
(".", charSetComplement emptyCharSet
`charSetMinus` charSetSingleton '\n')]
initREEnv :: Map String RExp
initREEnv = Map.empty
-- -----------------------------------------------------------------------------
-- Command-line flags
data CLIFlags
= OptDebugParser
| OptGhcTarget
| OptOutputFile FilePath
| OptInfoFile (Maybe FilePath)
| OptTabSize String
| OptTemplateDir FilePath
| OptLatin1
| DumpHelp
| DumpVersion
deriving Eq
argInfo :: [OptDescr CLIFlags]
argInfo = [
Option ['o'] ["outfile"] (ReqArg OptOutputFile "FILE")
"write the output to FILE (default: file.hs)",
Option ['i'] ["info"] (OptArg OptInfoFile "FILE")
"put detailed state-machine info in FILE (or file.info)",
Option ['t'] ["template"] (ReqArg OptTemplateDir "DIR")
"look in DIR for template files",
Option ['g'] ["ghc"] (NoArg OptGhcTarget)
"use GHC extensions",
Option ['l'] ["latin1"] (NoArg OptLatin1)
"generated lexer will use the Latin-1 encoding instead of UTF-8",
Option ['s'] ["tab-size"] (ReqArg OptTabSize "NUMBER")
"set tab size to be used in the generated lexer (default: 8)",
Option ['d'] ["debug"] (NoArg OptDebugParser)
"produce a debugging scanner",
Option ['?'] ["help"] (NoArg DumpHelp)
"display this help and exit",
Option ['V','v'] ["version"] (NoArg DumpVersion) -- ToDo: -v is deprecated!
"output version information and exit"
]
-- -----------------------------------------------------------------------------
-- Utils
getProgramName :: IO String
getProgramName = liftM (`withoutSuffix` ".bin") getProgName
where str `withoutSuffix` suff
| suff `isSuffixOf` str = take (length str - length suff) str
| otherwise = str
bye :: String -> IO a
bye s = putStr s >> exitWith ExitSuccess
die :: String -> IO a
die s = hPutStr stderr s >> exitWith (ExitFailure 1)
dieAlex :: String -> IO a
dieAlex s = getProgramName >>= \prog -> die (prog ++ ": " ++ s)
#if __GLASGOW_HASKELL__ < 610
bracketOnError
:: IO a -- ^ computation to run first (\"acquire resource\")
-> (a -> IO b) -- ^ computation to run last (\"release resource\")
-> (a -> IO c) -- ^ computation to run in-between
-> IO c -- returns the value from the in-between computation
bracketOnError before after thing =
block (do
a <- before
r <- Exception.catch
(unblock (thing a))
(\e -> do { after a; throw e })
return r
)
#endif