1
1
-- -----------------------------------------------------------------------------
2
- --
2
+ --
3
3
-- ParseMonad.hs, part of Alex
4
4
--
5
5
-- (c) Simon Marlow 2003
9
9
module ParseMonad (
10
10
AlexInput , alexInputPrevChar , alexGetChar , alexGetByte ,
11
11
AlexPosn (.. ), alexStartPos ,
12
-
12
+ Warning ( .. ), warnIfNullable ,
13
13
P , runP , StartCode , failP , lookupSMac , lookupRMac , newSMac , newRMac ,
14
14
setStartCode , getStartCode , getInput , setInput ,
15
15
) where
@@ -23,7 +23,7 @@ import UTF8
23
23
#if __GLASGOW_HASKELL__ < 710
24
24
import Control.Applicative ( Applicative (.. ) )
25
25
#endif
26
- import Control.Monad ( liftM , ap )
26
+ import Control.Monad ( liftM , ap , when )
27
27
import Data.Word (Word8 )
28
28
-- -----------------------------------------------------------------------------
29
29
-- The input type
@@ -49,15 +49,15 @@ alexGetChar (_, _ ,_ : _, _) = undefined -- hide compiler warning
49
49
alexGetByte :: AlexInput -> Maybe (Byte ,AlexInput )
50
50
alexGetByte (p,c,(b: bs),s) = Just (b,(p,c,bs,s))
51
51
alexGetByte (_,_,[] ,[] ) = Nothing
52
- alexGetByte (p,_,[] ,(c: s)) = let p' = alexMove p c
52
+ alexGetByte (p,_,[] ,(c: s)) = let p' = alexMove p c
53
53
(b: bs) = UTF8. encode c
54
54
in p' `seq` Just (b, (p', c, bs, s))
55
55
56
56
-- -----------------------------------------------------------------------------
57
57
-- Token positions
58
58
59
59
-- `Posn' records the location of a token in the input text. It has three
60
- -- fields: the address (number of chacaters preceding the token), line number
60
+ -- fields: the address (number of charaters preceding the token), line number
61
61
-- and column of a token within the file. `start_pos' gives the position of the
62
62
-- start of the file and `eof_pos' a standard encoding for the end of file.
63
63
-- `move_pos' calculates the new position after traversing a given character,
@@ -77,15 +77,22 @@ alexMove (AlexPn a l c) _ = AlexPn (a+1) l (c+1)
77
77
-- -----------------------------------------------------------------------------
78
78
-- Alex lexing/parsing monad
79
79
80
+ data Warning
81
+ = WarnNullableRExp
82
+ { _warnPos :: AlexPosn -- ^ The position of the code following the regex.
83
+ , _warnText :: String -- ^ Warning text.
84
+ }
85
+
80
86
type ParseError = (Maybe AlexPosn , String )
81
87
type StartCode = Int
82
88
83
- data PState = PState {
84
- smac_env :: Map String CharSet ,
85
- rmac_env :: Map String RExp ,
86
- startcode :: Int ,
87
- input :: AlexInput
88
- }
89
+ data PState = PState
90
+ { warnings :: [Warning ] -- ^ Stack of warnings, top = last warning.
91
+ , smac_env :: Map String CharSet
92
+ , rmac_env :: Map String RExp
93
+ , startcode :: Int
94
+ , input :: AlexInput
95
+ }
89
96
90
97
newtype P a = P { unP :: PState -> Either ParseError (PState ,a ) }
91
98
@@ -102,15 +109,27 @@ instance Monad P where
102
109
Right (env',ok) -> unP (k ok) env'
103
110
return = pure
104
111
105
- runP :: String -> (Map String CharSet , Map String RExp )
106
- -> P a -> Either ParseError a
107
- runP str (senv,renv) (P p)
112
+ -- | Run the parser on given input.
113
+ runP :: String
114
+ -- ^ Input string.
115
+ -> (Map String CharSet , Map String RExp )
116
+ -- ^ Character set and regex definitions.
117
+ -> P a
118
+ -- ^ Parsing computation.
119
+ -> Either ParseError ([Warning ], a )
120
+ -- ^ List of warnings in first-to-last order, result.
121
+ runP str (senv,renv) (P p)
108
122
= case p initial_state of
109
123
Left err -> Left err
110
- Right (_,a) -> Right a
111
- where initial_state =
112
- PState { smac_env= senv, rmac_env= renv,
113
- startcode = 0 , input= (alexStartPos,' \n ' ,[] ,str) }
124
+ Right (s, a) -> Right (reverse (warnings s), a)
125
+ where
126
+ initial_state = PState
127
+ { warnings = []
128
+ , smac_env = senv
129
+ , rmac_env = renv
130
+ , startcode = 0
131
+ , input = (alexStartPos, ' \n ' , [] , str)
132
+ }
114
133
115
134
failP :: String -> P a
116
135
failP str = P $ \ PState { input = (p,_,_,_) } -> Left (Just p,str)
@@ -121,24 +140,24 @@ failP str = P $ \PState{ input = (p,_,_,_) } -> Left (Just p,str)
121
140
122
141
lookupSMac :: (AlexPosn ,String ) -> P CharSet
123
142
lookupSMac (posn,smac)
124
- = P $ \ s@ PState { smac_env = senv } ->
143
+ = P $ \ s@ PState { smac_env = senv } ->
125
144
case Map. lookup smac senv of
126
145
Just ok -> Right (s,ok)
127
146
Nothing -> Left (Just posn, " unknown set macro: $" ++ smac)
128
147
129
148
lookupRMac :: String -> P RExp
130
- lookupRMac rmac
131
- = P $ \ s@ PState { rmac_env = renv } ->
149
+ lookupRMac rmac
150
+ = P $ \ s@ PState { rmac_env = renv } ->
132
151
case Map. lookup rmac renv of
133
152
Just ok -> Right (s,ok)
134
153
Nothing -> Left (Nothing , " unknown regex macro: %" ++ rmac)
135
154
136
155
newSMac :: String -> CharSet -> P ()
137
- newSMac smac set
156
+ newSMac smac set
138
157
= P $ \ s -> Right (s{smac_env = Map. insert smac set (smac_env s)}, () )
139
158
140
159
newRMac :: String -> RExp -> P ()
141
- newRMac rmac rexp
160
+ newRMac rmac rexp
142
161
= P $ \ s -> Right (s{rmac_env = Map. insert rmac rexp (rmac_env s)}, () )
143
162
144
163
setStartCode :: StartCode -> P ()
@@ -152,3 +171,21 @@ getInput = P $ \s -> Right (s, input s)
152
171
153
172
setInput :: AlexInput -> P ()
154
173
setInput inp = P $ \ s -> Right (s{ input = inp }, () )
174
+
175
+ -- | Add a warning if given regular expression is nullable
176
+ -- unless the user wrote the regex 'Eps'.
177
+ warnIfNullable
178
+ :: RExp -- ^ Regular expression.
179
+ -> AlexPosn -- ^ Position associated to regular expression.
180
+ -> P ()
181
+ -- If the user wrote @()@, they wanted to match the empty sequence!
182
+ -- Thus, skip the warning then.
183
+ warnIfNullable Eps _ = return ()
184
+ warnIfNullable r pos = when (nullable r) $ P $ \ s ->
185
+ Right (s{ warnings = WarnNullableRExp pos w : warnings s}, () )
186
+ where
187
+ w = unwords
188
+ [ " Regular expression"
189
+ , show r
190
+ , " matches the empty string."
191
+ ]
0 commit comments