|
1 | 1 | module FuzzySearch (tests) where
|
2 | 2 |
|
3 |
| -import Data.Char (toLower) |
4 |
| -import Data.Maybe (catMaybes) |
5 |
| -import qualified Data.Monoid.Textual as T |
6 |
| -import Data.Text (Text, inits, pack) |
7 |
| -import qualified Data.Text as Text |
8 |
| -import Prelude hiding (filter) |
9 |
| -import System.Directory (doesFileExist) |
10 |
| -import System.IO.Unsafe (unsafePerformIO) |
11 |
| -import Test.QuickCheck |
| 3 | +import Data.Maybe (isJust, mapMaybe) |
| 4 | +import Data.Text (Text) |
| 5 | +import qualified Data.Text as Text |
| 6 | +import Prelude hiding (filter) |
12 | 7 | import Test.Tasty
|
13 |
| -import Test.Tasty.ExpectedFailure |
14 |
| -import Test.Tasty.QuickCheck (testProperty) |
15 |
| -import qualified Text.Fuzzy as Fuzzy |
16 |
| -import Text.Fuzzy (Fuzzy (..)) |
| 8 | +import Test.Tasty.HUnit |
| 9 | +import Test.Tasty.QuickCheck |
17 | 10 | import Text.Fuzzy.Parallel
|
18 | 11 |
|
19 | 12 | tests :: TestTree
|
20 | 13 | tests =
|
21 | 14 | testGroup
|
22 | 15 | "Fuzzy search"
|
23 |
| - [ needDictionary $ |
24 |
| - testGroup |
25 |
| - "match works as expected on the english dictionary" |
26 |
| - [ testProperty "for legit words" propLegit, |
27 |
| - testProperty "for prefixes" propPrefix, |
28 |
| - testProperty "for typos" propTypo |
29 |
| - ] |
| 16 | + [ testGroup "match" |
| 17 | + [ testCase "empty" $ |
| 18 | + match "" "" @?= Just 0 |
| 19 | + , testCase "camel case" $ |
| 20 | + match "myImportantField" "myImportantField" @?= Just 262124 |
| 21 | + , testCase "a" $ |
| 22 | + mapMaybe (matchInput "a") ["", "a", "aa", "aaa", "A", "AA", "aA", "Aa"] |
| 23 | + @?= [("a",3),("aa",3),("aaa",3),("aA",3),("Aa",1)] |
| 24 | + , testCase "lowercase words" $ |
| 25 | + mapMaybe (matchInput "abc") ["abc", "abcd", "axbc", "axbxc", "def"] |
| 26 | + @?= [("abc", 25), ("abcd", 25), ("axbc", 7), ("axbxc", 5)] |
| 27 | + , testCase "lower upper mix" $ |
| 28 | + mapMaybe (matchInput "abc") ["abc", "aBc", "axbC", "axBxC", "def"] |
| 29 | + @?= [("abc", 25), ("aBc", 25), ("axbC", 7), ("axBxC", 5)] |
| 30 | + , testCase "prefixes" $ |
| 31 | + mapMaybe (matchInput "alpha") (Text.inits "alphabet") |
| 32 | + @?= [("alpha", 119), ("alphab", 119), ("alphabe", 119), ("alphabet", 119)] |
| 33 | + , testProperty "x `isSubsequenceOf` y => match x y returns Just" |
| 34 | + prop_matchIfSubsequence |
| 35 | + ] |
30 | 36 | ]
|
31 |
| - |
32 |
| -test :: Text -> Bool |
33 |
| -test candidate = do |
34 |
| - let previous = |
35 |
| - catMaybes |
36 |
| - [ (d,) . Fuzzy.score |
37 |
| - <$> referenceImplementation candidate d "" "" id |
38 |
| - | d <- dictionary |
39 |
| - ] |
40 |
| - new = catMaybes [(d,) <$> match candidate d | d <- dictionary] |
41 |
| - previous == new |
42 |
| - |
43 |
| -propLegit :: Property |
44 |
| -propLegit = forAll (elements dictionary) test |
45 |
| - |
46 |
| -propPrefix :: Property |
47 |
| -propPrefix = forAll (elements dictionary >>= elements . inits) test |
48 |
| - |
49 |
| -propTypo :: Property |
50 |
| -propTypo = forAll typoGen test |
51 |
| - |
52 |
| -typoGen :: Gen Text |
53 |
| -typoGen = do |
54 |
| - w <- elements dictionary |
55 |
| - l <- elements [0 .. Text.length w -1] |
56 |
| - let wl = Text.index w l |
57 |
| - c <- elements [ c | c <- ['a' .. 'z'], c /= wl] |
58 |
| - return $ replaceAt w l c |
59 |
| - |
60 |
| -replaceAt :: Text -> Int -> Char -> Text |
61 |
| -replaceAt t i c = |
62 |
| - let (l, r) = Text.splitAt i t |
63 |
| - in l <> Text.singleton c <> r |
64 |
| - |
65 |
| -dictionaryPath :: FilePath |
66 |
| -dictionaryPath = "/usr/share/dict/words" |
67 |
| - |
68 |
| -{-# ANN dictionary ("HLint: ignore Avoid restricted function" :: String) #-} |
69 |
| -{-# NOINLINE dictionary #-} |
70 |
| -dictionary :: [Text] |
71 |
| -dictionary = unsafePerformIO $ do |
72 |
| - existsDictionary <- doesFileExist dictionaryPath |
73 |
| - if existsDictionary |
74 |
| - then map pack . words <$> readFile dictionaryPath |
75 |
| - else pure [] |
76 |
| - |
77 |
| -referenceImplementation :: forall s t. |
78 |
| - (T.TextualMonoid s) => |
79 |
| - -- | Pattern in lowercase except for first character |
80 |
| - s -> |
81 |
| - -- | The value containing the text to search in. |
82 |
| - t -> |
83 |
| - -- | The text to add before each match. |
84 |
| - s -> |
85 |
| - -- | The text to add after each match. |
86 |
| - s -> |
87 |
| - -- | The function to extract the text from the container. |
88 |
| - (t -> s) -> |
89 |
| - -- | The original value, rendered string and score. |
90 |
| - Maybe (Fuzzy t s) |
91 |
| -referenceImplementation pat' t pre post extract = |
92 |
| - if null pat then Just (Fuzzy t result totalScore) else Nothing |
93 | 37 | where
|
94 |
| - null :: (T.TextualMonoid s) => s -> Bool |
95 |
| - null = not . T.any (const True) |
96 |
| - |
97 |
| - s = extract t |
98 |
| - (totalScore, _currScore, result, pat, _) = |
99 |
| - T.foldl' |
100 |
| - undefined |
101 |
| - ( \(tot, cur, res, pat, isFirst) c -> |
102 |
| - case T.splitCharacterPrefix pat of |
103 |
| - Nothing -> (tot, 0, res <> T.singleton c, pat, isFirst) |
104 |
| - Just (x, xs) -> |
105 |
| - -- the case of the first character has to match |
106 |
| - -- otherwise use lower case since the pattern is assumed lower |
107 |
| - let !c' = if isFirst then c else toLower c |
108 |
| - in if x == c' |
109 |
| - then |
110 |
| - let cur' = cur * 2 + 1 |
111 |
| - in ( tot + cur', |
112 |
| - cur', |
113 |
| - res <> pre <> T.singleton c <> post, |
114 |
| - xs, |
115 |
| - False |
116 |
| - ) |
117 |
| - else (tot, 0, res <> T.singleton c, pat, isFirst) |
118 |
| - ) |
119 |
| - ( 0, |
120 |
| - 1, -- matching at the start gives a bonus (cur = 1) |
121 |
| - mempty, |
122 |
| - pat', |
123 |
| - True |
124 |
| - ) |
125 |
| - s |
| 38 | + matchInput :: Text -> Text -> Maybe (Text, Int) |
| 39 | + matchInput needle candidate = (candidate,) <$> match needle candidate |
| 40 | + |
| 41 | +prop_matchIfSubsequence :: Property |
| 42 | +prop_matchIfSubsequence = |
| 43 | + forAll genNonEmptyText $ \haystack -> |
| 44 | + forAll (genSubsequence haystack) $ \needle -> |
| 45 | + isJust (match needle haystack) |
| 46 | + where |
| 47 | + genNonEmptyText = |
| 48 | + Text.pack <$> listOf1 (elements $ ['a'..'z'] ++ ['A'..'Z'] ++ ['0'..'9']) |
126 | 49 |
|
127 |
| -needDictionary :: TestTree -> TestTree |
128 |
| -needDictionary |
129 |
| - | null dictionary = ignoreTestBecause ("not found: " <> dictionaryPath) |
130 |
| - | otherwise = id |
| 50 | + genSubsequence :: Text -> Gen Text |
| 51 | + genSubsequence = |
| 52 | + fmap Text.pack . sublistOf . Text.unpack |
0 commit comments