|
1 | 1 | module FuzzySearch (tests) where |
2 | 2 |
|
3 | | -import Data.Char (toLower) |
4 | | -import Data.Maybe (catMaybes) |
5 | | -import qualified Data.Monoid.Textual as T |
6 | | -import Data.Text (Text, inits, pack) |
7 | | -import qualified Data.Text as Text |
8 | | -import Prelude hiding (filter) |
9 | | -import System.Directory (doesFileExist) |
10 | | -import System.IO.Unsafe (unsafePerformIO) |
11 | | -import Test.QuickCheck |
| 3 | +import Data.Maybe (isJust, mapMaybe) |
| 4 | +import Data.Text (Text) |
| 5 | +import qualified Data.Text as Text |
| 6 | +import Prelude hiding (filter) |
12 | 7 | import Test.Tasty |
13 | | -import Test.Tasty.ExpectedFailure |
14 | | -import Test.Tasty.QuickCheck (testProperty) |
15 | | -import qualified Text.Fuzzy as Fuzzy |
16 | | -import Text.Fuzzy (Fuzzy (..)) |
| 8 | +import Test.Tasty.HUnit |
| 9 | +import Test.Tasty.QuickCheck |
17 | 10 | import Text.Fuzzy.Parallel |
18 | 11 |
|
19 | 12 | tests :: TestTree |
20 | 13 | tests = |
21 | 14 | testGroup |
22 | 15 | "Fuzzy search" |
23 | | - [ needDictionary $ |
24 | | - testGroup |
25 | | - "match works as expected on the english dictionary" |
26 | | - [ testProperty "for legit words" propLegit, |
27 | | - testProperty "for prefixes" propPrefix, |
28 | | - testProperty "for typos" propTypo |
29 | | - ] |
| 16 | + [ testGroup "match" |
| 17 | + [ testCase "empty" $ |
| 18 | + match "" "" @?= Just 0 |
| 19 | + , testCase "camel case" $ |
| 20 | + match "myImportantField" "myImportantField" @?= Just 262124 |
| 21 | + , testCase "a" $ |
| 22 | + mapMaybe (matchInput "a") ["", "a", "aa", "aaa", "A", "AA", "aA", "Aa"] |
| 23 | + @?= [("a",3),("aa",3),("aaa",3),("aA",3),("Aa",1)] |
| 24 | + , testCase "lowercase words" $ |
| 25 | + mapMaybe (matchInput "abc") ["abc", "abcd", "axbc", "axbxc", "def"] |
| 26 | + @?= [("abc", 25), ("abcd", 25), ("axbc", 7), ("axbxc", 5)] |
| 27 | + , testCase "lower upper mix" $ |
| 28 | + mapMaybe (matchInput "abc") ["abc", "aBc", "axbC", "axBxC", "def"] |
| 29 | + @?= [("abc", 25), ("aBc", 25), ("axbC", 7), ("axBxC", 5)] |
| 30 | + , testCase "prefixes" $ |
| 31 | + mapMaybe (matchInput "alpha") (Text.inits "alphabet") |
| 32 | + @?= [("alpha", 119), ("alphab", 119), ("alphabe", 119), ("alphabet", 119)] |
| 33 | + , testProperty "x `isSubsequenceOf` y => match x y returns Just" |
| 34 | + prop_matchIfSubsequence |
| 35 | + ] |
30 | 36 | ] |
31 | | - |
32 | | -test :: Text -> Bool |
33 | | -test candidate = do |
34 | | - let previous = |
35 | | - catMaybes |
36 | | - [ (d,) . Fuzzy.score |
37 | | - <$> referenceImplementation candidate d "" "" id |
38 | | - | d <- dictionary |
39 | | - ] |
40 | | - new = catMaybes [(d,) <$> match candidate d | d <- dictionary] |
41 | | - previous == new |
42 | | - |
43 | | -propLegit :: Property |
44 | | -propLegit = forAll (elements dictionary) test |
45 | | - |
46 | | -propPrefix :: Property |
47 | | -propPrefix = forAll (elements dictionary >>= elements . inits) test |
48 | | - |
49 | | -propTypo :: Property |
50 | | -propTypo = forAll typoGen test |
51 | | - |
52 | | -typoGen :: Gen Text |
53 | | -typoGen = do |
54 | | - w <- elements dictionary |
55 | | - l <- elements [0 .. Text.length w -1] |
56 | | - let wl = Text.index w l |
57 | | - c <- elements [ c | c <- ['a' .. 'z'], c /= wl] |
58 | | - return $ replaceAt w l c |
59 | | - |
60 | | -replaceAt :: Text -> Int -> Char -> Text |
61 | | -replaceAt t i c = |
62 | | - let (l, r) = Text.splitAt i t |
63 | | - in l <> Text.singleton c <> r |
64 | | - |
65 | | -dictionaryPath :: FilePath |
66 | | -dictionaryPath = "/usr/share/dict/words" |
67 | | - |
68 | | -{-# ANN dictionary ("HLint: ignore Avoid restricted function" :: String) #-} |
69 | | -{-# NOINLINE dictionary #-} |
70 | | -dictionary :: [Text] |
71 | | -dictionary = unsafePerformIO $ do |
72 | | - existsDictionary <- doesFileExist dictionaryPath |
73 | | - if existsDictionary |
74 | | - then map pack . words <$> readFile dictionaryPath |
75 | | - else pure [] |
76 | | - |
77 | | -referenceImplementation :: forall s t. |
78 | | - (T.TextualMonoid s) => |
79 | | - -- | Pattern in lowercase except for first character |
80 | | - s -> |
81 | | - -- | The value containing the text to search in. |
82 | | - t -> |
83 | | - -- | The text to add before each match. |
84 | | - s -> |
85 | | - -- | The text to add after each match. |
86 | | - s -> |
87 | | - -- | The function to extract the text from the container. |
88 | | - (t -> s) -> |
89 | | - -- | The original value, rendered string and score. |
90 | | - Maybe (Fuzzy t s) |
91 | | -referenceImplementation pat' t pre post extract = |
92 | | - if null pat then Just (Fuzzy t result totalScore) else Nothing |
93 | 37 | where |
94 | | - null :: (T.TextualMonoid s) => s -> Bool |
95 | | - null = not . T.any (const True) |
96 | | - |
97 | | - s = extract t |
98 | | - (totalScore, _currScore, result, pat, _) = |
99 | | - T.foldl' |
100 | | - undefined |
101 | | - ( \(tot, cur, res, pat, isFirst) c -> |
102 | | - case T.splitCharacterPrefix pat of |
103 | | - Nothing -> (tot, 0, res <> T.singleton c, pat, isFirst) |
104 | | - Just (x, xs) -> |
105 | | - -- the case of the first character has to match |
106 | | - -- otherwise use lower case since the pattern is assumed lower |
107 | | - let !c' = if isFirst then c else toLower c |
108 | | - in if x == c' |
109 | | - then |
110 | | - let cur' = cur * 2 + 1 |
111 | | - in ( tot + cur', |
112 | | - cur', |
113 | | - res <> pre <> T.singleton c <> post, |
114 | | - xs, |
115 | | - False |
116 | | - ) |
117 | | - else (tot, 0, res <> T.singleton c, pat, isFirst) |
118 | | - ) |
119 | | - ( 0, |
120 | | - 1, -- matching at the start gives a bonus (cur = 1) |
121 | | - mempty, |
122 | | - pat', |
123 | | - True |
124 | | - ) |
125 | | - s |
| 38 | + matchInput :: Text -> Text -> Maybe (Text, Int) |
| 39 | + matchInput needle candidate = (candidate,) <$> match needle candidate |
| 40 | + |
| 41 | +prop_matchIfSubsequence :: Property |
| 42 | +prop_matchIfSubsequence = |
| 43 | + forAll genNonEmptyText $ \haystack -> |
| 44 | + forAll (genSubsequence haystack) $ \needle -> |
| 45 | + isJust (match needle haystack) |
| 46 | + where |
| 47 | + genNonEmptyText = |
| 48 | + Text.pack <$> listOf1 (elements $ ['a'..'z'] ++ ['A'..'Z'] ++ ['0'..'9']) |
126 | 49 |
|
127 | | -needDictionary :: TestTree -> TestTree |
128 | | -needDictionary |
129 | | - | null dictionary = ignoreTestBecause ("not found: " <> dictionaryPath) |
130 | | - | otherwise = id |
| 50 | + genSubsequence :: Text -> Gen Text |
| 51 | + genSubsequence = |
| 52 | + fmap Text.pack . sublistOf . Text.unpack |
0 commit comments