Skip to content

Commit 740e494

Browse files
committed
Remove Maybe indirection in PatternSet (+ LambdaCase)
Instead of `Maybe . Set` just have `Set`, collapsing the two null values `Nothing` and `Just mempty`. The distinction of these wasn't used anywhere. We drop GHC 7.4 in favor of using LambdaCase.
1 parent 9495435 commit 740e494

File tree

4 files changed

+98
-50
lines changed

4 files changed

+98
-50
lines changed

.github/workflows/haskell-ci.yml

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -92,11 +92,6 @@ jobs:
9292
compilerVersion: 7.6.3
9393
setup-method: hvr-ppa
9494
allow-failure: false
95-
- compiler: ghc-7.4.2
96-
compilerKind: ghc
97-
compilerVersion: 7.4.2
98-
setup-method: hvr-ppa
99-
allow-failure: false
10095
fail-fast: false
10196
steps:
10297
- name: apt

lib/Text/Regex/TDFA/Pattern.hs

Lines changed: 67 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -5,26 +5,32 @@
55
-- the parsed form of a regular expression.
66

77
module Text.Regex.TDFA.Pattern
8-
(Pattern(..)
9-
,PatternSet(..)
10-
,PatternSetCharacterClass(..)
11-
,PatternSetCollatingElement(..)
12-
,PatternSetEquivalenceClass(..)
13-
,GroupIndex
14-
,DoPa(..)
15-
,decodeCharacterClass, decodePatternSet
16-
,showPattern
17-
-- ** Internal use
18-
,starTrans
19-
-- ** Internal use, operations to support debugging under @ghci@
20-
,starTrans',simplify',dfsPattern
8+
( Pattern(..)
9+
, PatternSet(..)
10+
, patternSetChars
11+
, patternSetCharacterClasses
12+
, patternSetCollatingElements
13+
, patternSetEquivalenceClasses
14+
, PatternSetCharacterClass(..)
15+
, PatternSetCollatingElement(..)
16+
, PatternSetEquivalenceClass(..)
17+
, GroupIndex
18+
, DoPa(..)
19+
, decodeCharacterClass, decodePatternSet
20+
, showPattern
21+
-- ** Internal use
22+
, starTrans
23+
-- ** Internal use, operations to support debugging under @ghci@
24+
, starTrans', simplify', dfsPattern
2125
) where
2226

2327
{- By Chris Kuklewicz, 2007. BSD License, see the LICENSE file. -}
2428

2529
import Data.List(intersperse,partition)
2630
import qualified Data.Set as Set
2731
import Data.Set (Set)
32+
33+
import Utils
2834
import Text.Regex.TDFA.Common(DoPa(..),GroupIndex,common_error)
2935

3036
err :: String -> a
@@ -128,20 +134,54 @@ showPattern pIn =
128134
-- collating elements (e.g. @[.ch.]@, unused), and
129135
-- equivalence classes (e.g. @[=a=]@, treated as characters).
130136
--
131-
data PatternSet = PatternSet (Maybe (Set Char))
132-
(Maybe (Set PatternSetCharacterClass))
133-
(Maybe (Set PatternSetCollatingElement))
134-
(Maybe (Set PatternSetEquivalenceClass))
135-
deriving (Eq)
137+
data PatternSet = PatternSet
138+
{ _patternSetChars :: Set Char
139+
-- ^ Characters included in the pattern.
140+
, _patternSetCharacterClasses :: Set PatternSetCharacterClass
141+
-- ^ POSIX character classes included in the pattern.
142+
, _patternSetCollatingElements :: Set PatternSetCollatingElement
143+
-- ^ Collating elements included in the pattern.
144+
, _patternSetEquivalenceClasses :: Set PatternSetEquivalenceClass
145+
-- ^ Equivalence classes included in the pattern.
146+
}
147+
deriving (Eq)
148+
149+
instance Semigroup PatternSet where
150+
PatternSet a b c d <> PatternSet a' b' c' d' =
151+
PatternSet (a <> a') (b <> b') (c <> c') (d <> d')
152+
153+
instance Monoid PatternSet where
154+
mempty = PatternSet mempty mempty mempty mempty
155+
mappend = (<>)
156+
157+
-- | Lens for '_patternSetChars'.
158+
patternSetChars :: Lens' PatternSet (Set Char)
159+
patternSetChars f ps =
160+
f (_patternSetChars ps) <&> \ i -> ps{ _patternSetChars = i }
161+
162+
-- | Lens for '_patternSetCharacterClasses'.
163+
patternSetCharacterClasses :: Lens' PatternSet (Set PatternSetCharacterClass)
164+
patternSetCharacterClasses f ps =
165+
f (_patternSetCharacterClasses ps) <&> \ i -> ps{ _patternSetCharacterClasses = i }
166+
167+
-- | Lens for '_patternSetCollatingElements'.
168+
patternSetCollatingElements :: Lens' PatternSet (Set PatternSetCollatingElement)
169+
patternSetCollatingElements f ps =
170+
f (_patternSetCollatingElements ps) <&> \ i -> ps{ _patternSetCollatingElements = i }
171+
172+
-- | Lens for '_patternSetEquivalenceClasses'.
173+
patternSetEquivalenceClasses :: Lens' PatternSet (Set PatternSetEquivalenceClass)
174+
patternSetEquivalenceClasses f ps =
175+
f (_patternSetEquivalenceClasses ps) <&> \ i -> ps{ _patternSetEquivalenceClasses = i }
136176

137177
-- | Hand-rolled implementation, giving textual rather than Haskell representation.
138178
instance Show PatternSet where
139179
showsPrec i (PatternSet s scc sce sec) =
140-
let (special,normal) = maybe ("","") ((partition (`elem` "]-")) . Set.toAscList) s
180+
let (special,normal) = partition (`elem` "]-") $ Set.toAscList s
141181
charSpec = (if ']' `elem` special then (']':) else id) (byRange normal)
142-
scc' = maybe "" ((concatMap show) . Set.toList) scc
143-
sce' = maybe "" ((concatMap show) . Set.toList) sce
144-
sec' = maybe "" ((concatMap show) . Set.toList) sec
182+
scc' = concatMap show $ Set.toList scc
183+
sce' = concatMap show $ Set.toList sce
184+
sec' = concatMap show $ Set.toList sec
145185
in shows charSpec
146186
. showsPrec i scc' . showsPrec i sce' . showsPrec i sec'
147187
. if '-' `elem` special then showChar '-' else id
@@ -183,11 +223,11 @@ instance Show PatternSetEquivalenceClass where
183223
--
184224
-- @since 1.3.2
185225
decodePatternSet :: PatternSet -> Set Char
186-
decodePatternSet (PatternSet msc mscc _ msec) =
187-
let baseMSC = maybe Set.empty id msc
188-
withMSCC = foldl (flip Set.insert) baseMSC (maybe [] (concatMap decodeCharacterClass . Set.toAscList) mscc)
189-
withMSEC = foldl (flip Set.insert) withMSCC (maybe [] (concatMap unSEC . Set.toAscList) msec)
190-
in withMSEC
226+
decodePatternSet (PatternSet chars ccs _ eqcs) = Set.unions
227+
[ chars
228+
, foldMap (Set.fromList . decodeCharacterClass) ccs
229+
, foldMap (Set.fromList . unSEC) eqcs
230+
]
191231

192232
-- | This returns the strictly ascending list of characters
193233
-- represented by @[: :]@ POSIX character classes.

lib/Text/Regex/TDFA/ReadRegex.hs

Lines changed: 28 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -14,11 +14,12 @@ import Text.ParserCombinators.Parsec((<|>), (<?>),
1414
try, runParser, many, getState, setState, CharParser, ParseError,
1515
sepBy1, option, notFollowedBy, many1, lookAhead, eof, between,
1616
string, noneOf, digit, char, anyChar)
17+
import Utils
1718

1819
import Control.Monad (liftM, guard)
1920

20-
import Data.Foldable (asum)
21-
import qualified Data.Set as Set(fromList)
21+
import Data.Foldable (asum, foldl')
22+
import qualified Data.Set as Set
2223

2324
-- | An element inside @[...]@, denoting a character class.
2425
data BracketElement
@@ -132,20 +133,31 @@ p_bracket :: P Pattern
132133
p_bracket = (char '[') >> ( (char '^' >> p_set True) <|> (p_set False) )
133134

134135
p_set :: Bool -> P Pattern
135-
p_set invert = do initial <- option "" (char ']' >> return "]")
136-
values <- if null initial then many1 p_set_elem else many p_set_elem
137-
_ <- char ']'
138-
ci <- char_index
139-
let chars = maybe'set $ concat $
140-
initial :
141-
[ c | BEChar c <- values ] :
142-
[ [start..end] | BERange start end <- values ]
143-
colls = maybe'set [PatternSetCollatingElement coll | BEColl coll <- values ]
144-
equivs = maybe'set [PatternSetEquivalenceClass equiv | BEEquiv equiv <- values]
145-
class's = maybe'set [PatternSetCharacterClass a'class | BEClass a'class <- values]
146-
maybe'set x = if null x then Nothing else Just (Set.fromList x)
147-
sets = PatternSet chars class's colls equivs
148-
sets `seq` return $ if invert then PAnyNot ci sets else PAny ci sets
136+
p_set invert = do
137+
-- A ] as first character after the opening [ is treated as alternative ']'
138+
-- rather than the closing bracket.
139+
initial <- option mempty $ Set.singleton <$> char ']'
140+
-- Parse remaining content of bracket expression.
141+
values <- if Set.null initial then many1 p_set_elem else many p_set_elem
142+
_ <- char ']'
143+
ci <- char_index
144+
-- Process the content of bracket expression into a PatternSet.
145+
let !sets = foldl' (flip addBracketElement) (mempty{ _patternSetChars = initial }) values
146+
return $ if invert then PAnyNot ci sets else PAny ci sets
147+
148+
addBracketElement :: BracketElement -> PatternSet -> PatternSet
149+
addBracketElement = \case
150+
BEChar c ->
151+
over patternSetChars $ Set.insert c
152+
BERange start end ->
153+
over patternSetChars $ (`Set.union` Set.fromDistinctAscList [start..end])
154+
-- Set.union is left-biased, [start..end] is considered the smaller set
155+
BEClass s ->
156+
over patternSetCharacterClasses $ Set.insert $ PatternSetCharacterClass s
157+
BEColl s ->
158+
over patternSetCollatingElements $ Set.insert $ PatternSetCollatingElement s
159+
BEEquiv s ->
160+
over patternSetEquivalenceClasses $ Set.insert $ PatternSetEquivalenceClass s
149161

150162
-- From here down the code is the parser and functions for pattern [ ] set things
151163

regex-tdfa.cabal

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,6 @@ tested-with:
3737
GHC == 7.10.3
3838
GHC == 7.8.4
3939
GHC == 7.6.3
40-
GHC == 7.4.2
4140

4241
source-repository head
4342
type: git
@@ -99,7 +98,8 @@ library
9998
build-depends: fail == 4.9.*
10099
, semigroups == 0.18.* || == 0.19.*
101100
build-depends: array >= 0.4 && < 0.6
102-
, base >= 4.5 && < 5
101+
, base >= 4.6 && < 5
102+
-- GHC 7.6 required for LambdaCase
103103
, bytestring >= 0.9.2 && < 0.12
104104
, containers >= 0.4.2 && < 0.7
105105
, mtl >= 2.1.3 && < 2.4
@@ -114,6 +114,7 @@ library
114114
FlexibleInstances
115115
ForeignFunctionInterface
116116
FunctionalDependencies
117+
LambdaCase
117118
MagicHash
118119
MultiParamTypeClasses
119120
NondecreasingIndentation

0 commit comments

Comments
 (0)