From 8984fefca9b75086ed87231a4381c9aed67626bc Mon Sep 17 00:00:00 2001
From: Evan Rutledge Borden <eborden@frontrowed.com>
Date: Wed, 24 Oct 2018 13:05:46 -0500
Subject: [PATCH 1/2] Add high order function for checking equality after
 transformation
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

It is very common to transform two `Text` values and then compare them
for equality.

```hs
toCaseFold x == toCaseFold y
```

This operation ends up being costly because it must materialize each
newly transformed `Text` in order to check their equality. Instead we
can provide a helper for such patterns `transformEq`, which relies on
the underlying `ustream` to check equality and avoids materializing the
transformed `Text` in the average case.

Benchmarks results for this operation show a worst case, which is close
to the naive comparison and a best case which is much more agreeable.

```
benchmarking transformEq/Text ==: Eq
time                 629.6 ns   (625.7 ns .. 634.5 ns)
                     1.000 R²   (0.999 R² .. 1.000 R²)
mean                 630.0 ns   (626.6 ns .. 634.2 ns)
std dev              12.93 ns   (9.856 ns .. 16.21 ns)
variance introduced by outliers: 25% (moderately inflated)

benchmarking transformEq/Text transformEq: Eq
time                 578.4 ns   (575.6 ns .. 581.2 ns)
                     1.000 R²   (1.000 R² .. 1.000 R²)
mean                 577.9 ns   (575.1 ns .. 581.3 ns)
std dev              10.35 ns   (8.092 ns .. 13.02 ns)
variance introduced by outliers: 21% (moderately inflated)

benchmarking transformEq/Text ==: Not Eq
time                 663.1 ns   (659.3 ns .. 666.8 ns)
                     1.000 R²   (1.000 R² .. 1.000 R²)
mean                 660.4 ns   (657.9 ns .. 664.4 ns)
std dev              10.49 ns   (7.178 ns .. 16.44 ns)
variance introduced by outliers: 17% (moderately inflated)

benchmarking transformEq/Text transformEq: Not Eq
time                 126.9 ns   (126.1 ns .. 127.8 ns)
                     1.000 R²   (1.000 R² .. 1.000 R²)
mean                 126.7 ns   (126.2 ns .. 127.4 ns)
std dev              2.122 ns   (1.663 ns .. 2.636 ns)
variance introduced by outliers: 21% (moderately inflated)

benchmarking transformEq/Text ==: Not Length
time                 499.0 ns   (496.3 ns .. 502.1 ns)
                     1.000 R²   (1.000 R² .. 1.000 R²)
mean                 499.8 ns   (497.2 ns .. 502.9 ns)
std dev              9.655 ns   (7.545 ns .. 11.84 ns)
variance introduced by outliers: 23% (moderately inflated)

benchmarking transformEq/Text transformEq: Not Length
time                 17.53 ns   (17.45 ns .. 17.62 ns)
                     1.000 R²   (1.000 R² .. 1.000 R²)
mean                 17.65 ns   (17.55 ns .. 17.78 ns)
std dev              359.4 ps   (283.1 ps .. 500.4 ps)
variance introduced by outliers: 31% (moderately inflated)
```
---
 Data/Text.hs                                 | 10 +++++++
 benchmarks/haskell/Benchmarks.hs             |  2 ++
 benchmarks/haskell/Benchmarks/TransformEq.hs | 30 ++++++++++++++++++++
 benchmarks/text-benchmarks.cabal             |  1 +
 4 files changed, 43 insertions(+)
 create mode 100644 benchmarks/haskell/Benchmarks/TransformEq.hs

diff --git a/Data/Text.hs b/Data/Text.hs
index df032059..3a90040b 100644
--- a/Data/Text.hs
+++ b/Data/Text.hs
@@ -167,6 +167,7 @@ module Data.Text
     , isPrefixOf
     , isSuffixOf
     , isInfixOf
+    , transformEq
 
     -- ** View patterns
     , stripPrefix
@@ -1753,6 +1754,15 @@ isInfixOf needle haystack
     isInfixOf (singleton n) h = S.elem n (S.stream h)
   #-}
 
+-- | The 'transformEq' function takes a `Text -> Text` function
+-- and two 'Text's if the 'Text's are equal after transformation. This
+-- function relies on fusion to avoid materialising the transformed 'Text'
+transformEq :: (Text -> Text) -> Text -> Text -> Bool
+transformEq f x y
+    | length x == length y = P.all (P.uncurry (==)) $ zip (f x) (f y)
+    | otherwise            = False
+{-# INLINE [1] transformEq #-}
+
 -------------------------------------------------------------------------------
 -- * View patterns
 
diff --git a/benchmarks/haskell/Benchmarks.hs b/benchmarks/haskell/Benchmarks.hs
index fdecba79..04387c08 100644
--- a/benchmarks/haskell/Benchmarks.hs
+++ b/benchmarks/haskell/Benchmarks.hs
@@ -14,6 +14,7 @@ import qualified Benchmarks.Concat as Concat
 import qualified Benchmarks.DecodeUtf8 as DecodeUtf8
 import qualified Benchmarks.EncodeUtf8 as EncodeUtf8
 import qualified Benchmarks.Equality as Equality
+import qualified Benchmarks.TransformEq as TransformEq
 import qualified Benchmarks.FileRead as FileRead
 import qualified Benchmarks.FoldLines as FoldLines
 import qualified Benchmarks.Mul as Mul
@@ -63,6 +64,7 @@ benchmarks = do
         , Search.benchmark (tf "russian.txt") "принимая"
         , Stream.benchmark (tf "russian.txt")
         , WordFrequencies.benchmark (tf "russian.txt")
+        , TransformEq.benchmark
         ]
 
     -- Program-like benchmarks
diff --git a/benchmarks/haskell/Benchmarks/TransformEq.hs b/benchmarks/haskell/Benchmarks/TransformEq.hs
new file mode 100644
index 00000000..0063a306
--- /dev/null
+++ b/benchmarks/haskell/Benchmarks/TransformEq.hs
@@ -0,0 +1,30 @@
+-- | Compare a string after a transformation
+--
+-- Tested in this benchmark:
+--
+-- * Comparison of transformed strings via transformEq
+--
+module Benchmarks.TransformEq
+    ( benchmark
+    ) where
+
+import Criterion (Benchmark, bgroup, bench, whnf)
+import Data.Function (on)
+import Data.Text (transformEq, toCaseFold, pack)
+
+benchmark :: IO Benchmark
+benchmark = do
+    let
+      equiv = (pack "Fooooooo", pack "fOOOOOOO")
+      notEq = (pack "fooooooo", pack "barrrrrr")
+      lengthNotEq = (pack "foo", pack "foooooooo")
+      eq = uncurry ((==) `on` toCaseFold)
+      transEq = uncurry $ transformEq toCaseFold
+    return $ bgroup "transformEq"
+        [ bench "Text ==: Eq" $ whnf eq equiv
+        , bench "Text transformEq: Eq" $ whnf transEq equiv
+        , bench "Text ==: Not Eq" $ whnf eq notEq
+        , bench "Text transformEq: Not Eq" $ whnf transEq notEq
+        , bench "Text ==: Not Length" $ whnf eq lengthNotEq
+        , bench "Text transformEq: Not Length" $ whnf transEq lengthNotEq
+        ]
diff --git a/benchmarks/text-benchmarks.cabal b/benchmarks/text-benchmarks.cabal
index 073dfeee..1c9490a2 100644
--- a/benchmarks/text-benchmarks.cabal
+++ b/benchmarks/text-benchmarks.cabal
@@ -76,6 +76,7 @@ executable text-benchmarks
     Benchmarks.Replace
     Benchmarks.Search
     Benchmarks.Stream
+    Benchmarks.TransformEq
     Benchmarks.WordFrequencies
 
   -- Source code for IUT (implementation under test)

From 2f713091379128ba3d3285df1e6893373d8c354d Mon Sep 17 00:00:00 2001
From: Evan Rutledge Borden <eborden@frontrowed.com>
Date: Sat, 29 Dec 2018 13:02:53 -0600
Subject: [PATCH 2/2] Use stream equality in transformEq

This version is ever so slightly more efficient and utilizes the
underlying stream's equality instead of using `zip` and `all`.
---
 Data/Text.hs | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Data/Text.hs b/Data/Text.hs
index 3a90040b..dad9bdae 100644
--- a/Data/Text.hs
+++ b/Data/Text.hs
@@ -1759,7 +1759,7 @@ isInfixOf needle haystack
 -- function relies on fusion to avoid materialising the transformed 'Text'
 transformEq :: (Text -> Text) -> Text -> Text -> Bool
 transformEq f x y
-    | length x == length y = P.all (P.uncurry (==)) $ zip (f x) (f y)
+    | length x == length y = S.stream (f x) == S.stream (f y)
     | otherwise            = False
 {-# INLINE [1] transformEq #-}