@@ -660,20 +660,20 @@ lexStdToken = do
660660
661661 ' 0' : c: d: _ | toLower c == ' o' && isOctDigit d -> do
662662 discard 2
663- (n, str) <- lexOctal
663+ (n, str) <- lexOctal $ numUnderscoresEnabled exts
664664 con <- intHash
665665 return (con (n, ' 0' : c: str))
666666 | toLower c == ' b' && isBinDigit d && BinaryLiterals `elem` exts -> do
667667 discard 2
668- (n, str) <- lexBinary
668+ (n, str) <- lexBinary $ numUnderscoresEnabled exts
669669 con <- intHash
670670 return (con (n, ' 0' : c: str))
671671 | toLower c == ' x' && isHexDigit d && HexFloatLiterals `elem` exts -> do
672672 discard 2
673- lexHexadecimalFloat c
673+ lexHexadecimalFloat (numUnderscoresEnabled exts) c
674674 | toLower c == ' x' && isHexDigit d -> do
675675 discard 2
676- (n, str) <- lexHexadecimal
676+ (n, str) <- lexHexadecimal $ numUnderscoresEnabled exts
677677 con <- intHash
678678 return (con (n, ' 0' : c: str))
679679
@@ -806,7 +806,7 @@ lexStdToken = do
806806 return $ LabelVarId ident
807807
808808
809- c: _ | isDigit c -> lexDecimalOrFloat
809+ c: _ | isDigit c -> lexDecimalOrFloat $ numUnderscoresEnabled exts
810810
811811 | isUpper c -> lexConIdOrQual " "
812812
@@ -1012,77 +1012,76 @@ lexRawPragma = lexRawPragmaAux
10121012 rpr' <- lexRawPragma
10131013 return $ rpr ++ ' #' : rpr'
10141014
1015- lexDecimalOrFloat :: Lex a Token
1016- lexDecimalOrFloat = do
1017- ds <- lexWhile isDigit
1015+ lexDecimalOrFloat :: NumericUnderscoresAllowed -> Lex a Token
1016+ lexDecimalOrFloat underAllowed = do
1017+ (n, raw) <- lexHandleUnderAllowed underAllowed isDigit
10181018 rest <- getInput
10191019 exts <- getExtensionsL
10201020 case rest of
10211021 (' .' : d: _) | isDigit d -> do
10221022 discard 1
1023- frac <- lexWhile isDigit
1024- let num = parseInteger 10 (ds ++ frac)
1023+ ( frac, fracRaw) <- lexHandleUnderAllowed underAllowed isDigit
1024+ let num = parseInteger 10 (n ++ frac)
10251025 decimals = toInteger (length frac)
10261026 (exponent , estr) <- do
10271027 rest2 <- getInput
10281028 case rest2 of
1029- ' e' : _ -> lexExponent
1030- ' E' : _ -> lexExponent
1029+ ' e' : _ -> lexExponent underAllowed
1030+ ' E' : _ -> lexExponent underAllowed
10311031 _ -> return (0 ," " )
10321032 con <- lexHash FloatTok FloatTokHash (Right DoubleTokHash )
1033- return $ con ((num% 1 ) * 10 ^^ (exponent - decimals), ds ++ ' .' : frac ++ estr)
1033+ return $ con ((num% 1 ) * 10 ^^ (exponent - decimals), raw ++ ' .' : fracRaw ++ estr)
10341034 e: _ | toLower e == ' e' -> do
1035- (exponent , estr) <- lexExponent
1035+ (exponent , estr) <- lexExponent underAllowed
10361036 con <- lexHash FloatTok FloatTokHash (Right DoubleTokHash )
1037- return $ con ((parseInteger 10 ds % 1 ) * 10 ^^ exponent , ds ++ estr)
1038- ' #' : ' #' : _ | MagicHash `elem` exts -> discard 2 >> return (WordTokHash (parseInteger 10 ds, ds ))
1039- ' #' : _ | MagicHash `elem` exts -> discard 1 >> return (IntTokHash (parseInteger 10 ds, ds ))
1040- _ -> return (IntTok (parseInteger 10 ds, ds ))
1037+ return $ con ((parseInteger 10 n % 1 ) * 10 ^^ exponent , raw ++ estr)
1038+ ' #' : ' #' : _ | MagicHash `elem` exts -> discard 2 >> return (WordTokHash (parseInteger 10 n, raw ))
1039+ ' #' : _ | MagicHash `elem` exts -> discard 1 >> return (IntTokHash (parseInteger 10 n, raw ))
1040+ _ -> return (IntTok (parseInteger 10 n, raw ))
10411041
1042- lexExponent :: Lex a (Integer , String )
1043- lexExponent = do
1042+ lexExponent :: NumericUnderscoresAllowed -> Lex a (Integer , String )
1043+ lexExponent underAllowed = do
10441044 (e: r) <- getInput
10451045 discard 1 -- discard ex notation
10461046 case r of
10471047 ' +' : d: _ | isDigit d -> do
10481048 discard 1
1049- (n, str) <- lexDecimal
1049+ (n, str) <- lexDecimal underAllowed
10501050 return (n, e: ' +' : str)
10511051 ' -' : d: _ | isDigit d -> do
10521052 discard 1
1053- (n, str) <- lexDecimal
1053+ (n, str) <- lexDecimal underAllowed
10541054 return (negate n, e: ' -' : str)
1055- d: _ | isDigit d -> lexDecimal >>= \ (n,str) -> return (n, e: str)
1055+ d: _ | isDigit d -> lexDecimal underAllowed >>= \ (n,str) -> return (n, e: str)
10561056 _ -> fail " Float with missing exponent"
10571057
1058- lexHexadecimalFloat :: Char -> Lex a Token
1059- lexHexadecimalFloat c = do
1060- ds <- lexWhile isHexDigit
1058+ lexHexadecimalFloat :: NumericUnderscoresAllowed -> Char -> Lex a Token
1059+ lexHexadecimalFloat underAllowed c = do
1060+ (n, raw) <- lexHandleUnderAllowed underAllowed isHexDigit
10611061 rest <- getInput
1062- exts <- getExtensionsL
10631062 case rest of
10641063 (' .' : d: _) | isHexDigit d -> do
10651064 discard 1
1066- frac <- lexWhile isHexDigit
1067- let num = parseInteger 16 ds
1065+ ( frac, fracRaw) <- lexHandleUnderAllowed underAllowed isHexDigit
1066+ let num = parseInteger 16 n
10681067 numFrac = parseFrac frac
10691068 (exponent , estr) <- do
10701069 rest2 <- getInput
10711070 case rest2 of
1072- ' p' : _ -> lexExponent
1073- ' P' : _ -> lexExponent
1071+ ' p' : _ -> lexExponent underAllowed
1072+ ' P' : _ -> lexExponent underAllowed
10741073 _ -> return (0 ," " )
10751074 con <- lexHash FloatTok FloatTokHash (Right DoubleTokHash )
1076- return $ con (((num% 1 ) + numFrac) * 2 ^^ (exponent ), ' 0' : c: ds ++ ' .' : frac ++ estr)
1075+ return $ con (((num% 1 ) + numFrac) * 2 ^^ (exponent ), ' 0' : c: raw ++ ' .' : fracRaw ++ estr)
10771076 e: _ | toLower e == ' p' -> do
1078- (exponent , estr) <- lexExponent
1077+ (exponent , estr) <- lexExponent underAllowed
10791078 con <- lexHash FloatTok FloatTokHash (Right DoubleTokHash )
1080- return $ con (((parseInteger 16 ds )% 1 ) * 2 ^^ exponent , ' 0' : c: ds ++ estr)
1081- _ -> return (IntTok (parseInteger 16 ds , ' 0' : c: ds ))
1079+ return $ con (((parseInteger 16 n )% 1 ) * 2 ^^ exponent , ' 0' : c: raw ++ estr)
1080+ _ -> return (IntTok (parseInteger 16 n , ' 0' : c: raw ))
10821081 where
10831082 parseFrac :: String -> Rational
10841083 parseFrac ds =
1085- foldl (\ n (dp, d) -> n + (d / (16 ^^ dp))) (0 % 1 ) $ zip [1 .. ] (map ((% 1 ) . toInteger . digitToInt) ds)
1084+ foldl (\ n (dp, d) -> n + (d / (16 ^^ dp))) (0 % 1 ) $ zip [( 1 :: Integer ) .. ] (map ((% 1 ) . toInteger . digitToInt) ds)
10861085
10871086lexHash :: (b -> Token ) -> (b -> Token ) -> Either String (b -> Token ) -> Lex a (b -> Token )
10881087lexHash a b c = do
@@ -1281,16 +1280,16 @@ lexEscape = do
12811280
12821281 ' o' : c: _ | isOctDigit c -> do
12831282 discard 1
1284- (n, raw) <- lexOctal
1283+ (n, raw) <- lexOctal NoUnderscoresAllowedInNumeric
12851284 n' <- checkChar n
12861285 return (n', ' o' : raw)
12871286 ' x' : c: _ | isHexDigit c -> do
12881287 discard 1
1289- (n, raw) <- lexHexadecimal
1288+ (n, raw) <- lexHexadecimal NoUnderscoresAllowedInNumeric
12901289 n' <- checkChar n
12911290 return (n', ' x' : raw)
12921291 c: _ | isDigit c -> do
1293- (n, raw) <- lexDecimal
1292+ (n, raw) <- lexDecimal NoUnderscoresAllowedInNumeric
12941293 n' <- checkChar n
12951294 return (n', raw)
12961295
@@ -1307,28 +1306,28 @@ lexEscape = do
13071306 cntrl _ = fail " Illegal control character"
13081307
13091308-- assumes at least one octal digit
1310- lexOctal :: Lex a (Integer , String )
1311- lexOctal = do
1312- ds <- lexWhile isOctDigit
1313- return (parseInteger 8 ds, ds )
1309+ lexOctal :: NumericUnderscoresAllowed -> Lex a (Integer , String )
1310+ lexOctal underAllowed = do
1311+ (n, raw) <- lexHandleUnderAllowed underAllowed isOctDigit
1312+ return (parseInteger 8 n, raw )
13141313
13151314-- assumes at least one binary digit
1316- lexBinary :: Lex a (Integer , String )
1317- lexBinary = do
1318- ds <- lexWhile isBinDigit
1319- return (parseInteger 2 ds, ds )
1315+ lexBinary :: NumericUnderscoresAllowed -> Lex a (Integer , String )
1316+ lexBinary underAllowed = do
1317+ (n, raw) <- lexHandleUnderAllowed underAllowed isBinDigit
1318+ return (parseInteger 2 n, raw )
13201319
13211320-- assumes at least one hexadecimal digit
1322- lexHexadecimal :: Lex a (Integer , String )
1323- lexHexadecimal = do
1324- ds <- lexWhile isHexDigit
1325- return (parseInteger 16 ds, ds )
1321+ lexHexadecimal :: NumericUnderscoresAllowed -> Lex a (Integer , String )
1322+ lexHexadecimal underAllowed = do
1323+ (n, raw) <- lexHandleUnderAllowed underAllowed isHexDigit
1324+ return (parseInteger 16 n, raw )
13261325
13271326-- assumes at least one decimal digit
1328- lexDecimal :: Lex a (Integer , String )
1329- lexDecimal = do
1330- ds <- lexWhile isDigit
1331- return (parseInteger 10 ds, ds )
1327+ lexDecimal :: NumericUnderscoresAllowed -> Lex a (Integer , String )
1328+ lexDecimal underAllowed = do
1329+ (n, raw) <- lexHandleUnderAllowed underAllowed isDigit
1330+ return (parseInteger 10 n, raw )
13321331
13331332-- Stolen from Hugs's Prelude
13341333parseInteger :: Integer -> String -> Integer
@@ -1341,6 +1340,30 @@ flagKW t =
13411340 exts <- getExtensionsL
13421341 when (NondecreasingIndentation `elem` exts) flagDo
13431342
1343+ data NumericUnderscoresAllowed = UnderscoresAllowedInNumeric | NoUnderscoresAllowedInNumeric
1344+ deriving Show
1345+
1346+ numUnderscoresEnabled :: [KnownExtension ] -> NumericUnderscoresAllowed
1347+ numUnderscoresEnabled exts = if (NumericUnderscores `elem` exts)
1348+ then UnderscoresAllowedInNumeric
1349+ else NoUnderscoresAllowedInNumeric
1350+
1351+ lexHandleUnderAllowed :: NumericUnderscoresAllowed -> (Char -> Bool ) -> Lex a (String , String )
1352+ lexHandleUnderAllowed NoUnderscoresAllowedInNumeric p = do
1353+ ds <- lexWhile p
1354+ pure (ds, ds)
1355+ lexHandleUnderAllowed UnderscoresAllowedInNumeric p = do
1356+ s <- getInput
1357+ case s of
1358+ c: _ | p c -> do
1359+ raw <- lexWhile (\ ic -> p ic || ic == ' _' )
1360+ if (not $ null raw) && last raw == ' _'
1361+ then fail $ " lexHandleUnderAllowed: numeric must not end with _: " ++ show raw
1362+ else pure (filter (/= ' _' ) raw, raw)
1363+ c: _ -> fail $ " lexHandleUnderAllowed: numeric must start with proper digit: " ++ show c
1364+ _ -> fail $ " lexHandleUnderAllowed: token stream exhausted"
1365+
1366+
13441367-- | Selects ASCII binary digits, i.e. @\'0\'@..@\'1\'@.
13451368isBinDigit :: Char -> Bool
13461369isBinDigit c = c >= ' 0' && c <= ' 1'
0 commit comments