diff --git a/src/Data/Text.hs b/src/Data/Text.hs index b1f3a189..3b5fdd7e 100644 --- a/src/Data/Text.hs +++ b/src/Data/Text.hs @@ -167,6 +167,7 @@ module Data.Text , isPrefixOf , isSuffixOf , isInfixOf + , isSubsequenceOf -- ** View patterns , stripPrefix @@ -253,6 +254,7 @@ import qualified Language.Haskell.TH.Lib as TH import qualified Language.Haskell.TH.Syntax as TH import Text.Printf (PrintfArg, formatArg, formatString) import System.Posix.Types (CSsize(..)) +import Data.Function (on) -- $setup -- >>> import Data.Text @@ -1881,6 +1883,43 @@ isInfixOf needle haystack | otherwise = not . L.null . indices needle $ haystack {-# INLINE [1] isInfixOf #-} +-- 2021-09-29: NOTE: +-- * after the implementation - determine & mention the big O +-- | The 'isSubsequenceOf' function takes the main text and the subsequnce +-- to find and returns 'True' iff the second argument is a subsequence +-- of the first. +-- +-- "Subsequence" used in the meaning of: characters of the second argument +-- appear in same sequential order in the main data, to say second argument can +-- be derived by deleting some (any) or no elements from the first. +-- +-- Examples: +-- +-- >>> isSubsequenceOf "1234567" "1356" +-- True +-- +-- >>> isSubsequenceOf "1234567" "21" +-- False +-- +-- `isSubsequenceOf` is the base case & implementation of fuzzy search. +isSubsequenceOf :: Text -> Text -> Bool +isSubsequenceOf tf sf + | length sf > length tf = False + | otherwise = subseqOf tf sf + where + subseqOf :: Text -> Text -> Bool + subseqOf t s = + on f uncons t s + where + f :: Maybe (Char, Text) -> Maybe (Char, Text) -> Bool + f _ Nothing = True + f Nothing _ = False + f (Just (tc,ts)) (Just (sc,ss)) = + subseqOf ts $ + if tc == sc + then s + else ss + ------------------------------------------------------------------------------- -- * View patterns diff --git a/tests/Tests/Properties/Substrings.hs b/tests/Tests/Properties/Substrings.hs index 46fa1dca..f3b5b8f4 100644 --- a/tests/Tests/Properties/Substrings.hs +++ b/tests/Tests/Properties/Substrings.hs @@ -20,6 +20,8 @@ import qualified Data.Text.Internal.Lazy as TL (Text(..)) import qualified Data.Text.Internal.Lazy.Fusion as SL import qualified Data.Text.Lazy as TL import qualified Tests.SlowFunctions as Slow +import Control.Monad (replicateM) +import Data.List (nub, sort) s_take n = L.take n `eqP` (unpackS . S.take n) s_take_s (Small n) = L.take n `eqP` (unpackS . S.unstream . S.take n) @@ -231,6 +233,47 @@ tl_isSuffixOf s = L.isSuffixOf s`eqP` TL.isSuffixOf (packS s) t_isInfixOf s = L.isInfixOf s `eqP` T.isInfixOf (packS s) tl_isInfixOf s = L.isInfixOf s `eqP` TL.isInfixOf (packS s) +-- | Generator for substrings that keeps the element order. +-- Aka: "1234567890" -> "245680" +genOrdSubseq :: T.Text -> Gen T.Text +genOrdSubseq txt = + T.pack . transform <$> genTransformMap + where + + pickN :: Gen Int + pickN = + choose (0, T.length txt) + + pickNs :: Gen [Int] + pickNs = + fmap (sort . nub) $ (`replicateM` pickN) =<< pickN + + growInst :: [Bool] -> Int -> [Bool] + growInst ls n = + ls + <> take (length ls - pred n) [True ..] + <> [False] + + mkTransformInst :: [Bool] -> [Int] -> [Bool] + mkTransformInst bls [] = + bls + <> take (T.length txt - length bls) [True ..] + mkTransformInst bls (i:is) = + mkTransformInst + (growInst bls i) + is + + mkTransformMap :: [a] -> [Int] -> [(a, Bool)] + mkTransformMap ls ixs = + zip ls (mkTransformInst mempty ixs) + + genTransformMap :: (Gen [(Char, Bool)]) + genTransformMap = fmap (mkTransformMap $ T.unpack txt) pickNs + + transform :: [(Char, Bool)] -> [Char] + transform = + foldr (\ (c, b) as -> as <> if b then [c] else mempty) mempty + t_stripPrefix s = (fmap packS . L.stripPrefix s) `eqP` T.stripPrefix (packS s) tl_stripPrefix s = (fmap packS . L.stripPrefix s) `eqP` TL.stripPrefix (packS s)