Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
50 changes: 46 additions & 4 deletions src/Text/Email/Parser.hs
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ import Control.Monad (guard, void, when)
import Data.Attoparsec.ByteString.Char8
import Data.ByteString (ByteString)
import qualified Data.ByteString.Char8 as BS
import Data.Char (ord)
import Data.Data (Data, Typeable)
import GHC.Generics (Generic)
import qualified Text.Read as Read
Expand Down Expand Up @@ -106,7 +107,7 @@ dottedAtoms = BS.intercalate (BS.singleton '.') <$>
(atom <|> quotedString) `sepBy1` char '.'

atom :: Parser ByteString
atom = takeWhile1 isAtomText
atom = BS.concat <$> many1 (takeWhile1 isAtomText <|> nonAsciiUtf8)

isAtomText :: Char -> Bool
isAtomText x = isAlphaNum x || inClass "!#$%&'*+/=?^_`{|}~-" x
Expand All @@ -127,13 +128,15 @@ quotedString =
(many (optional fws >> quotedContent) <* optional fws)

quotedContent :: Parser ByteString
quotedContent = takeWhile1 isQuotedText <|> quotedPair
quotedContent = takeWhile1 isQuotedText <|> quotedPair <|> nonAsciiUtf8

isQuotedText :: Char -> Bool
isQuotedText x = inClass "\33\35-\91\93-\126" x || isObsNoWsCtl x

quotedPair :: Parser ByteString
quotedPair = (BS.cons '\\' . BS.singleton) <$> (char '\\' *> (vchar <|> wsp <|> lf <|> cr <|> obsNoWsCtl <|> nullChar))
quotedPair = char '\\' *> (BS.cons '\\' <$>
((BS.singleton <$> (vchar <|> wsp <|> lf <|> cr <|> obsNoWsCtl <|> nullChar))
<|> nonAsciiUtf8))

cfws :: Parser ()
cfws = skipMany (comment <|> fws)
Expand All @@ -151,7 +154,7 @@ comment :: Parser ()
comment = between (char '(') (char ')') $ skipMany (void commentContent <|> fws)

commentContent :: Parser ()
commentContent = skipWhile1 isCommentText <|> void quotedPair <|> comment
commentContent = skipWhile1 isCommentText <|> void quotedPair <|> void nonAsciiUtf8 <|> comment

isCommentText :: Char -> Bool
isCommentText x = inClass "\33-\39\42-\91\93-\126" x || isObsNoWsCtl x
Expand Down Expand Up @@ -189,6 +192,45 @@ isVchar = inClass "\x21-\x7e"
vchar :: Parser Char
vchar = satisfy isVchar

nonAsciiUtf8 :: Parser ByteString
nonAsciiUtf8 = utf8Pair <|> utf8Triple <|> utf8Quad

utf8Pair :: Parser ByteString
utf8Pair = convert <$> satisfy byteOne <*> satisfy byteTwo
where
convert :: Char -> Char -> ByteString
convert high low = BS.pack [high, low]
byteOne x =
let ox = ord x
in ox >= 0xc0 && ox <= 0xdf
byteTwo x =
let ox = ord x
in ox >= 0x80 && ox <= 0xbf

utf8Triple :: Parser ByteString
utf8Triple = convert <$> satisfy byteOne <*> satisfy byteLater <*> satisfy byteLater
where
convert :: Char -> Char -> Char -> ByteString
convert high middle low = BS.pack [high, middle, low]
byteOne x =
let ox = ord x
in ox >= 0xe0 && ox <= 0xef
byteLater x =
let ox = ord x
in ox >= 0x80 && ox <= 0xbf

utf8Quad :: Parser ByteString
utf8Quad = convert <$> satisfy byteOne <*> satisfy byteLater <*> satisfy byteLater <*> satisfy byteLater
where
convert :: Char -> Char -> Char -> Char -> ByteString
convert high middleHigh middleLow low = BS.pack [high, middleHigh, middleLow, low]
byteOne x =
let ox = ord x
in ox >= 0xf0 && ox <= 0xf7
byteLater x =
let ox = ord x
in ox >= 0x80 && ox <= 0xbf

isObsNoWsCtl :: Char -> Bool
isObsNoWsCtl = inClass "\1-\8\11-\12\14-\31\127"

Expand Down
12 changes: 12 additions & 0 deletions tests/Main.hs
Original file line number Diff line number Diff line change
Expand Up @@ -397,4 +397,16 @@ examples =
--, invalid "first.last@com" `why` "Mail host must be second- or lower level"
, invalid "[email protected]" `why` "Label can\'t begin with a hyphen"
, invalid "[email protected]" `why` "Label can\'t end with a hyphen"
, valid "\195\188s\195\[email protected]" `why` "UTF8 pairs are valid atom text"
, valid "\"\195\188s\195\169r\"@example.com" `why` "UTF8 pairs are valid quoted text"
, valid "\"\\\195\188s\\\195\169r\"@example.com" `why` "UTF8 pairs are valid quoted pairs"
, valid "(comm\196\155nt)[email protected]" `why` "UTF8 pairs are valid comment text"
, valid "us\226\136\138\226\132\[email protected]" `why` "UTF8 triples are valid atom text"
, valid "\"us\226\136\138\226\132\157\"@example.com" `why` "UTF8 triples are valid quoted text"
, valid "\"us\\\226\136\138\\\226\132\157\"@example.com" `why` "UTF8 triples are valid quoted pairs"
, valid "(comm\226\147\148nt)[email protected]" `why` "UTF8 triples are valid comment text"
, valid "\240\159\150\[email protected]" `why` "UTF8 quads are valid atom text"
, valid "\"\240\159\150\133\"@example.com" `why` "UTF8 quads are valid quoted text"
, valid "\"\\\240\159\150\133\"@example.com" `why` "UTF8 quads are valid quoted pairs"
, valid "(\240\159\150\133)[email protected]" `why` "UTF8 quads are valid comment text"
]