|
5 | 5 | -- the parsed form of a regular expression.
|
6 | 6 |
|
7 | 7 | module Text.Regex.TDFA.Pattern
|
8 |
| - (Pattern(..) |
9 |
| - ,PatternSet(..) |
10 |
| - ,PatternSetCharacterClass(..) |
11 |
| - ,PatternSetCollatingElement(..) |
12 |
| - ,PatternSetEquivalenceClass(..) |
13 |
| - ,GroupIndex |
14 |
| - ,DoPa(..) |
15 |
| - ,decodeCharacterClass, decodePatternSet |
16 |
| - ,showPattern |
17 |
| --- ** Internal use |
18 |
| - ,starTrans |
19 |
| --- ** Internal use, operations to support debugging under @ghci@ |
20 |
| - ,starTrans',simplify',dfsPattern |
| 8 | + ( Pattern(..) |
| 9 | + , PatternSet(..) |
| 10 | + , patternSetChars |
| 11 | + , patternSetCharacterClasses |
| 12 | + , patternSetCollatingElements |
| 13 | + , patternSetEquivalenceClasses |
| 14 | + , PatternSetCharacterClass(..) |
| 15 | + , PatternSetCollatingElement(..) |
| 16 | + , PatternSetEquivalenceClass(..) |
| 17 | + , GroupIndex |
| 18 | + , DoPa(..) |
| 19 | + , decodeCharacterClass, decodePatternSet |
| 20 | + , showPattern |
| 21 | + -- ** Internal use |
| 22 | + , starTrans |
| 23 | + -- ** Internal use, operations to support debugging under @ghci@ |
| 24 | + , starTrans', simplify', dfsPattern |
21 | 25 | ) where
|
22 | 26 |
|
23 | 27 | {- By Chris Kuklewicz, 2007. BSD License, see the LICENSE file. -}
|
24 | 28 |
|
25 | 29 | import Data.List(intersperse,partition)
|
26 | 30 | import qualified Data.Set as Set
|
27 | 31 | import Data.Set (Set)
|
| 32 | + |
| 33 | +import Utils |
28 | 34 | import Text.Regex.TDFA.Common(DoPa(..),GroupIndex,common_error)
|
29 | 35 |
|
30 | 36 | err :: String -> a
|
@@ -128,20 +134,54 @@ showPattern pIn =
|
128 | 134 | -- collating elements (e.g. @[.ch.]@, unused), and
|
129 | 135 | -- equivalence classes (e.g. @[=a=]@, treated as characters).
|
130 | 136 | --
|
131 |
| -data PatternSet = PatternSet (Maybe (Set Char)) |
132 |
| - (Maybe (Set PatternSetCharacterClass)) |
133 |
| - (Maybe (Set PatternSetCollatingElement)) |
134 |
| - (Maybe (Set PatternSetEquivalenceClass)) |
135 |
| - deriving (Eq) |
| 137 | +data PatternSet = PatternSet |
| 138 | + { _patternSetChars :: Set Char |
| 139 | + -- ^ Characters included in the pattern. |
| 140 | + , _patternSetCharacterClasses :: Set PatternSetCharacterClass |
| 141 | + -- ^ POSIX character classes included in the pattern. |
| 142 | + , _patternSetCollatingElements :: Set PatternSetCollatingElement |
| 143 | + -- ^ Collating elements included in the pattern. |
| 144 | + , _patternSetEquivalenceClasses :: Set PatternSetEquivalenceClass |
| 145 | + -- ^ Equivalence classes included in the pattern. |
| 146 | + } |
| 147 | + deriving (Eq) |
| 148 | + |
| 149 | +instance Semigroup PatternSet where |
| 150 | + PatternSet a b c d <> PatternSet a' b' c' d' = |
| 151 | + PatternSet (a <> a') (b <> b') (c <> c') (d <> d') |
| 152 | + |
| 153 | +instance Monoid PatternSet where |
| 154 | + mempty = PatternSet mempty mempty mempty mempty |
| 155 | + mappend = (<>) |
| 156 | + |
| 157 | +-- | Lens for '_patternSetChars'. |
| 158 | +patternSetChars :: Lens' PatternSet (Set Char) |
| 159 | +patternSetChars f ps = |
| 160 | + f (_patternSetChars ps) <&> \ i -> ps{ _patternSetChars = i } |
| 161 | + |
| 162 | +-- | Lens for '_patternSetCharacterClasses'. |
| 163 | +patternSetCharacterClasses :: Lens' PatternSet (Set PatternSetCharacterClass) |
| 164 | +patternSetCharacterClasses f ps = |
| 165 | + f (_patternSetCharacterClasses ps) <&> \ i -> ps{ _patternSetCharacterClasses = i } |
| 166 | + |
| 167 | +-- | Lens for '_patternSetCollatingElements'. |
| 168 | +patternSetCollatingElements :: Lens' PatternSet (Set PatternSetCollatingElement) |
| 169 | +patternSetCollatingElements f ps = |
| 170 | + f (_patternSetCollatingElements ps) <&> \ i -> ps{ _patternSetCollatingElements = i } |
| 171 | + |
| 172 | +-- | Lens for '_patternSetEquivalenceClasses'. |
| 173 | +patternSetEquivalenceClasses :: Lens' PatternSet (Set PatternSetEquivalenceClass) |
| 174 | +patternSetEquivalenceClasses f ps = |
| 175 | + f (_patternSetEquivalenceClasses ps) <&> \ i -> ps{ _patternSetEquivalenceClasses = i } |
136 | 176 |
|
137 | 177 | -- | Hand-rolled implementation, giving textual rather than Haskell representation.
|
138 | 178 | instance Show PatternSet where
|
139 | 179 | showsPrec i (PatternSet s scc sce sec) =
|
140 |
| - let (special,normal) = maybe ("","") ((partition (`elem` "]-")) . Set.toAscList) s |
| 180 | + let (special,normal) = partition (`elem` "]-") $ Set.toAscList s |
141 | 181 | charSpec = (if ']' `elem` special then (']':) else id) (byRange normal)
|
142 |
| - scc' = maybe "" ((concatMap show) . Set.toList) scc |
143 |
| - sce' = maybe "" ((concatMap show) . Set.toList) sce |
144 |
| - sec' = maybe "" ((concatMap show) . Set.toList) sec |
| 182 | + scc' = concatMap show $ Set.toList scc |
| 183 | + sce' = concatMap show $ Set.toList sce |
| 184 | + sec' = concatMap show $ Set.toList sec |
145 | 185 | in shows charSpec
|
146 | 186 | . showsPrec i scc' . showsPrec i sce' . showsPrec i sec'
|
147 | 187 | . if '-' `elem` special then showChar '-' else id
|
@@ -183,11 +223,11 @@ instance Show PatternSetEquivalenceClass where
|
183 | 223 | --
|
184 | 224 | -- @since 1.3.2
|
185 | 225 | decodePatternSet :: PatternSet -> Set Char
|
186 |
| -decodePatternSet (PatternSet msc mscc _ msec) = |
187 |
| - let baseMSC = maybe Set.empty id msc |
188 |
| - withMSCC = foldl (flip Set.insert) baseMSC (maybe [] (concatMap decodeCharacterClass . Set.toAscList) mscc) |
189 |
| - withMSEC = foldl (flip Set.insert) withMSCC (maybe [] (concatMap unSEC . Set.toAscList) msec) |
190 |
| - in withMSEC |
| 226 | +decodePatternSet (PatternSet chars ccs _ eqcs) = Set.unions |
| 227 | + [ chars |
| 228 | + , foldMap (Set.fromList . decodeCharacterClass) ccs |
| 229 | + , foldMap (Set.fromList . unSEC) eqcs |
| 230 | + ] |
191 | 231 |
|
192 | 232 | -- | This returns the strictly ascending list of characters
|
193 | 233 | -- represented by @[: :]@ POSIX character classes.
|
|
0 commit comments