Skip to content
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.

Commit 751febf

Browse files
author
Leonidas Loucas
committedJun 29, 2020
Add handling for NumericUnderscores extension in numeric literals #455
1 parent d7414ac commit 751febf

File tree

2 files changed

+81
-55
lines changed

2 files changed

+81
-55
lines changed
 

‎src/Language/Haskell/Exts/Extension.hs

+3
Original file line numberDiff line numberDiff line change
@@ -563,6 +563,9 @@ data KnownExtension =
563563
-- | HexFloatLiterals syntax ex 0xFF.FFp-12
564564
| HexFloatLiterals
565565

566+
-- | NumericUnderscores num literal syntax ex 1_000_000 or 0xF_F.F_Fp-12 or 0b11_11_11 or 1_000e+23
567+
| NumericUnderscores
568+
566569
deriving (Show, Read, Eq, Ord, Enum, Bounded, Data, Typeable)
567570

568571
-- | Certain extensions imply other extensions, and this function

‎src/Language/Haskell/Exts/InternalLexer.hs

+78-55
Original file line numberDiff line numberDiff line change
@@ -660,20 +660,20 @@ lexStdToken = do
660660

661661
'0':c:d:_ | toLower c == 'o' && isOctDigit d -> do
662662
discard 2
663-
(n, str) <- lexOctal
663+
(n, str) <- lexOctal $ numUnderscoresEnabled exts
664664
con <- intHash
665665
return (con (n, '0':c:str))
666666
| toLower c == 'b' && isBinDigit d && BinaryLiterals `elem` exts -> do
667667
discard 2
668-
(n, str) <- lexBinary
668+
(n, str) <- lexBinary $ numUnderscoresEnabled exts
669669
con <- intHash
670670
return (con (n, '0':c:str))
671671
| toLower c == 'x' && isHexDigit d && HexFloatLiterals `elem` exts -> do
672672
discard 2
673-
lexHexadecimalFloat c
673+
lexHexadecimalFloat (numUnderscoresEnabled exts) c
674674
| toLower c == 'x' && isHexDigit d -> do
675675
discard 2
676-
(n, str) <- lexHexadecimal
676+
(n, str) <- lexHexadecimal $ numUnderscoresEnabled exts
677677
con <- intHash
678678
return (con (n, '0':c:str))
679679

@@ -806,7 +806,7 @@ lexStdToken = do
806806
return $ LabelVarId ident
807807

808808

809-
c:_ | isDigit c -> lexDecimalOrFloat
809+
c:_ | isDigit c -> lexDecimalOrFloat $ numUnderscoresEnabled exts
810810

811811
| isUpper c -> lexConIdOrQual ""
812812

@@ -1012,77 +1012,76 @@ lexRawPragma = lexRawPragmaAux
10121012
rpr' <- lexRawPragma
10131013
return $ rpr ++ '#':rpr'
10141014

1015-
lexDecimalOrFloat :: Lex a Token
1016-
lexDecimalOrFloat = do
1017-
ds <- lexWhile isDigit
1015+
lexDecimalOrFloat :: NumericUnderscoresAllowed -> Lex a Token
1016+
lexDecimalOrFloat underAllowed = do
1017+
(n, raw) <- lexHandleUnderAllowed underAllowed isDigit
10181018
rest <- getInput
10191019
exts <- getExtensionsL
10201020
case rest of
10211021
('.':d:_) | isDigit d -> do
10221022
discard 1
1023-
frac <- lexWhile isDigit
1024-
let num = parseInteger 10 (ds ++ frac)
1023+
(frac, fracRaw) <- lexHandleUnderAllowed underAllowed isDigit
1024+
let num = parseInteger 10 (n ++ frac)
10251025
decimals = toInteger (length frac)
10261026
(exponent, estr) <- do
10271027
rest2 <- getInput
10281028
case rest2 of
1029-
'e':_ -> lexExponent
1030-
'E':_ -> lexExponent
1029+
'e':_ -> lexExponent underAllowed
1030+
'E':_ -> lexExponent underAllowed
10311031
_ -> return (0,"")
10321032
con <- lexHash FloatTok FloatTokHash (Right DoubleTokHash)
1033-
return $ con ((num%1) * 10^^(exponent - decimals), ds ++ '.':frac ++ estr)
1033+
return $ con ((num%1) * 10^^(exponent - decimals), raw ++ '.':fracRaw ++ estr)
10341034
e:_ | toLower e == 'e' -> do
1035-
(exponent, estr) <- lexExponent
1035+
(exponent, estr) <- lexExponent underAllowed
10361036
con <- lexHash FloatTok FloatTokHash (Right DoubleTokHash)
1037-
return $ con ((parseInteger 10 ds%1) * 10^^exponent, ds ++ estr)
1038-
'#':'#':_ | MagicHash `elem` exts -> discard 2 >> return (WordTokHash (parseInteger 10 ds, ds))
1039-
'#':_ | MagicHash `elem` exts -> discard 1 >> return (IntTokHash (parseInteger 10 ds, ds))
1040-
_ -> return (IntTok (parseInteger 10 ds, ds))
1037+
return $ con ((parseInteger 10 n%1) * 10^^exponent, raw ++ estr)
1038+
'#':'#':_ | MagicHash `elem` exts -> discard 2 >> return (WordTokHash (parseInteger 10 n, raw))
1039+
'#':_ | MagicHash `elem` exts -> discard 1 >> return (IntTokHash (parseInteger 10 n, raw))
1040+
_ -> return (IntTok (parseInteger 10 n, raw))
10411041

1042-
lexExponent :: Lex a (Integer, String)
1043-
lexExponent = do
1042+
lexExponent :: NumericUnderscoresAllowed -> Lex a (Integer, String)
1043+
lexExponent underAllowed = do
10441044
(e:r) <- getInput
10451045
discard 1 -- discard ex notation
10461046
case r of
10471047
'+':d:_ | isDigit d -> do
10481048
discard 1
1049-
(n, str) <- lexDecimal
1049+
(n, str) <- lexDecimal underAllowed
10501050
return (n, e:'+':str)
10511051
'-':d:_ | isDigit d -> do
10521052
discard 1
1053-
(n, str) <- lexDecimal
1053+
(n, str) <- lexDecimal underAllowed
10541054
return (negate n, e:'-':str)
1055-
d:_ | isDigit d -> lexDecimal >>= \(n,str) -> return (n, e:str)
1055+
d:_ | isDigit d -> lexDecimal underAllowed >>= \(n,str) -> return (n, e:str)
10561056
_ -> fail "Float with missing exponent"
10571057

1058-
lexHexadecimalFloat :: Char -> Lex a Token
1059-
lexHexadecimalFloat c = do
1060-
ds <- lexWhile isHexDigit
1058+
lexHexadecimalFloat :: NumericUnderscoresAllowed -> Char -> Lex a Token
1059+
lexHexadecimalFloat underAllowed c = do
1060+
(n, raw) <- lexHandleUnderAllowed underAllowed isHexDigit
10611061
rest <- getInput
1062-
exts <- getExtensionsL
10631062
case rest of
10641063
('.':d:_) | isHexDigit d -> do
10651064
discard 1
1066-
frac <- lexWhile isHexDigit
1067-
let num = parseInteger 16 ds
1065+
(frac, fracRaw) <- lexHandleUnderAllowed underAllowed isHexDigit
1066+
let num = parseInteger 16 n
10681067
numFrac = parseFrac frac
10691068
(exponent, estr) <- do
10701069
rest2 <- getInput
10711070
case rest2 of
1072-
'p':_ -> lexExponent
1073-
'P':_ -> lexExponent
1071+
'p':_ -> lexExponent underAllowed
1072+
'P':_ -> lexExponent underAllowed
10741073
_ -> return (0,"")
10751074
con <- lexHash FloatTok FloatTokHash (Right DoubleTokHash)
1076-
return $ con (((num%1) + numFrac) * 2^^(exponent), '0':c:ds ++ '.':frac ++ estr)
1075+
return $ con (((num%1) + numFrac) * 2^^(exponent), '0':c:raw ++ '.':fracRaw ++ estr)
10771076
e:_ | toLower e == 'p' -> do
1078-
(exponent, estr) <- lexExponent
1077+
(exponent, estr) <- lexExponent underAllowed
10791078
con <- lexHash FloatTok FloatTokHash (Right DoubleTokHash)
1080-
return $ con (((parseInteger 16 ds)%1) * 2^^exponent, '0':c:ds ++ estr)
1081-
_ -> return (IntTok (parseInteger 16 ds, '0':c:ds))
1079+
return $ con (((parseInteger 16 n)%1) * 2^^exponent, '0':c:raw ++ estr)
1080+
_ -> return (IntTok (parseInteger 16 n, '0':c:raw))
10821081
where
10831082
parseFrac :: String -> Rational
10841083
parseFrac ds =
1085-
foldl (\n (dp, d) -> n + (d / (16 ^^ dp))) (0 % 1) $ zip [1..] (map ((% 1) . toInteger . digitToInt) ds)
1084+
foldl (\n (dp, d) -> n + (d / (16 ^^ dp))) (0 % 1) $ zip [(1 :: Integer) ..] (map ((% 1) . toInteger . digitToInt) ds)
10861085

10871086
lexHash :: (b -> Token) -> (b -> Token) -> Either String (b -> Token) -> Lex a (b -> Token)
10881087
lexHash a b c = do
@@ -1281,16 +1280,16 @@ lexEscape = do
12811280

12821281
'o':c:_ | isOctDigit c -> do
12831282
discard 1
1284-
(n, raw) <- lexOctal
1283+
(n, raw) <- lexOctal NoUnderscoresAllowedInNumeric
12851284
n' <- checkChar n
12861285
return (n', 'o':raw)
12871286
'x':c:_ | isHexDigit c -> do
12881287
discard 1
1289-
(n, raw) <- lexHexadecimal
1288+
(n, raw) <- lexHexadecimal NoUnderscoresAllowedInNumeric
12901289
n' <- checkChar n
12911290
return (n', 'x':raw)
12921291
c:_ | isDigit c -> do
1293-
(n, raw) <- lexDecimal
1292+
(n, raw) <- lexDecimal NoUnderscoresAllowedInNumeric
12941293
n' <- checkChar n
12951294
return (n', raw)
12961295

@@ -1307,28 +1306,28 @@ lexEscape = do
13071306
cntrl _ = fail "Illegal control character"
13081307

13091308
-- assumes at least one octal digit
1310-
lexOctal :: Lex a (Integer, String)
1311-
lexOctal = do
1312-
ds <- lexWhile isOctDigit
1313-
return (parseInteger 8 ds, ds)
1309+
lexOctal :: NumericUnderscoresAllowed -> Lex a (Integer, String)
1310+
lexOctal underAllowed = do
1311+
(n, raw) <- lexHandleUnderAllowed underAllowed isOctDigit
1312+
return (parseInteger 8 n, raw)
13141313

13151314
-- assumes at least one binary digit
1316-
lexBinary :: Lex a (Integer, String)
1317-
lexBinary = do
1318-
ds <- lexWhile isBinDigit
1319-
return (parseInteger 2 ds, ds)
1315+
lexBinary :: NumericUnderscoresAllowed -> Lex a (Integer, String)
1316+
lexBinary underAllowed = do
1317+
(n, raw) <- lexHandleUnderAllowed underAllowed isBinDigit
1318+
return (parseInteger 2 n, raw)
13201319

13211320
-- assumes at least one hexadecimal digit
1322-
lexHexadecimal :: Lex a (Integer, String)
1323-
lexHexadecimal = do
1324-
ds <- lexWhile isHexDigit
1325-
return (parseInteger 16 ds, ds)
1321+
lexHexadecimal :: NumericUnderscoresAllowed -> Lex a (Integer, String)
1322+
lexHexadecimal underAllowed = do
1323+
(n, raw) <- lexHandleUnderAllowed underAllowed isHexDigit
1324+
return (parseInteger 16 n, raw)
13261325

13271326
-- assumes at least one decimal digit
1328-
lexDecimal :: Lex a (Integer, String)
1329-
lexDecimal = do
1330-
ds <- lexWhile isDigit
1331-
return (parseInteger 10 ds, ds)
1327+
lexDecimal :: NumericUnderscoresAllowed -> Lex a (Integer, String)
1328+
lexDecimal underAllowed = do
1329+
(n, raw) <- lexHandleUnderAllowed underAllowed isDigit
1330+
return (parseInteger 10 n, raw)
13321331

13331332
-- Stolen from Hugs's Prelude
13341333
parseInteger :: Integer -> String -> Integer
@@ -1341,6 +1340,30 @@ flagKW t =
13411340
exts <- getExtensionsL
13421341
when (NondecreasingIndentation `elem` exts) flagDo
13431342

1343+
data NumericUnderscoresAllowed = UnderscoresAllowedInNumeric | NoUnderscoresAllowedInNumeric
1344+
deriving Show
1345+
1346+
numUnderscoresEnabled :: [KnownExtension] -> NumericUnderscoresAllowed
1347+
numUnderscoresEnabled exts = if (NumericUnderscores `elem` exts)
1348+
then UnderscoresAllowedInNumeric
1349+
else NoUnderscoresAllowedInNumeric
1350+
1351+
lexHandleUnderAllowed :: NumericUnderscoresAllowed -> (Char -> Bool) -> Lex a (String, String)
1352+
lexHandleUnderAllowed NoUnderscoresAllowedInNumeric p = do
1353+
ds <- lexWhile p
1354+
return (ds, ds)
1355+
lexHandleUnderAllowed UnderscoresAllowedInNumeric p = do
1356+
s <- getInput
1357+
case s of
1358+
c:_ | p c -> do
1359+
raw <- lexWhile (\ic -> p ic || ic == '_')
1360+
if (not $ null raw) && last raw == '_'
1361+
then fail $ "lexHandleUnderAllowed: numeric must not end with _: " ++ show raw
1362+
else return (filter (/= '_') raw, raw)
1363+
c:_ -> fail $ "lexHandleUnderAllowed: numeric must start with proper digit: " ++ show c
1364+
_ -> fail $ "lexHandleUnderAllowed: token stream exhausted"
1365+
1366+
13441367
-- | Selects ASCII binary digits, i.e. @\'0\'@..@\'1\'@.
13451368
isBinDigit :: Char -> Bool
13461369
isBinDigit c = c >= '0' && c <= '1'

0 commit comments

Comments
 (0)
Please sign in to comment.