diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml
index a0d7f87..207a337 100644
--- a/.github/workflows/ci.yaml
+++ b/.github/workflows/ci.yaml
@@ -7,3 +7,9 @@ jobs:
       - uses: nixbuild/nix-quick-install-action@v28
       - uses: actions/checkout@v4
       - run: nix build
+  paper:
+    runs-on: ubuntu-latest
+    steps:
+      - uses: nixbuild/nix-quick-install-action@v28
+      - uses: actions/checkout@v4
+      - run: nix build .#paper
diff --git a/flake.lock b/flake.lock
index 4219465..301478a 100644
--- a/flake.lock
+++ b/flake.lock
@@ -16,6 +16,22 @@
         "type": "github"
       }
     },
+    "inara": {
+      "flake": false,
+      "locked": {
+        "lastModified": 1694971498,
+        "narHash": "sha256-i2b//MXoVz13WNoIg8rBnQbCpMIAMAjSi09w/7B5W9I=",
+        "owner": "openjournals",
+        "repo": "inara",
+        "rev": "0739fb5f750a900ef917b973c18f239aafacb1fd",
+        "type": "github"
+       },
+       "original": {
+       "owner": "openjournals",
+       "repo": "inara",
+       "type": "github"
+       }
+    },
     "nixpkgs": {
       "locked": {
         "lastModified": 1690875999,
@@ -34,6 +50,7 @@
     "root": {
       "inputs": {
         "cloudflareZlib": "cloudflareZlib",
+        "inara": "inara",
         "nixpkgs": "nixpkgs"
       }
     }
diff --git a/flake.nix b/flake.nix
index 0d5bcb5..55e9212 100644
--- a/flake.nix
+++ b/flake.nix
@@ -4,10 +4,16 @@
     url = "github:cloudflare/zlib";
     flake = false;
   };
+  inputs.inara = {
+    url = "github:openjournals/inara";
+    flake = false;
+  };
+
   outputs = {
     self,
     nixpkgs,
     cloudflareZlib,
+    inara,
   }: let
     system = "x86_64-linux";
     pkgs = import nixpkgs {
@@ -19,7 +25,13 @@
       ];
     };
   in {
-    packages.${system}.default = import ./. {inherit pkgs cloudflareZlib;};
-    devShells.${system}.default = self.packages.${system}.default.env;
+    packages.${system} = {
+      default = import ./. {inherit pkgs cloudflareZlib;};
+      paper = pkgs.callPackage ./paper/paper.nix {inherit inara;};
+    };
+    devShells.${system} = {
+      default = self.packages.${system}.default.env;
+      paper = self.packages.${system}.paper.env;
+    };
   };
 }
diff --git a/paper/paper.md b/paper/paper.md
new file mode 100644
index 0000000..e7b80f2
--- /dev/null
+++ b/paper/paper.md
@@ -0,0 +1,60 @@
+---
+title: 'Dedumi: approximate deduplication of unaligned reads using unique molecular identifiers'
+tags:
+   - Haskell
+   - bioinformatics
+   - genomics
+authors:
+   - name: Justin Bedő
+     affiliation: "1, 2"
+affiliations:
+   - name: The Walter and Eliza Hall Institute of Medical Research
+     index: 1
+   - name: The University of Melbourne
+     index: 2
+bibliography: references.bib
+---
+
+# Summary
+
+Dedumi is an application for approximately deduplicating unaligned reads with unique molecular identifiers (UMIs).
+Deduplication is through a Cuckoo filter and hence the probability of incorrectly filtering a unique read is bounded.
+By virtue of operating on unaligned reads, there are significant computational savings as duplicate reads are removed prior to alignment.
+
+
+# Statement of Need
+
+Short read sequencing provides cost-effective DNA sequencing and forms a fundamental technique in many biological assays and studies.
+The technique relies on randomly sequencing short fragments of DNA forming a *library*, with much subsequent analysis depending on counts of how frequently certain sequences are observed.
+The formation of a library typically requires amplification of small amounts of DNA through polymerase chain reaction (PCR) prior to sequencing [@Garber2009].
+Unfortunately, PCR can preferentially amplify certain strands of DNA [@Aird2011], which leads to a bias during analysis.
+One method to reduce this bias is to introduce unique molecular identifiers (UMIs) prior to amplification to uniquely identify DNA fragments after amplification.
+These UMIs can then be used to remove fragments (deduplicate) that have been repeatedly sequenced, thus mitigating the bias introduced by PCA.
+
+
+There are several existing approaches to deduplication based on UMIs, most of which operate post-alignment: reads are first aligned to a reference genome and then duplicate reads mapping to the same position are collapsed to a consensuses sequence.
+This is the approach taken by UMI-tools [@Smith2017], and many variants of the core approach exist [@Liu2019].
+The downside to an alignment approach is that there is no computational cost reduction at the alignment stage: all reads, regardless of whether they are duplicates, are aligned to the genome.
+
+In contrast, unaligned deduplication approaches reduce the computational cost of alignment as the duplicate reads can be removed prior to alignment.
+There are some existing methods that deduplicate unaligned reads based on UMIs [@UMIc], however these are resource intensive: as reported by @UMIc, 1M reads required 2.2 hours to process.
+These resource requirements prevent use in sequencing projects where many millions of reads need to be deduplicated.
+
+Dedumi takes an alternative approach and approximately deduplicates unaligned reads.
+UMIs are extracted from reads in a stream, with a Cuckoo filter [@Fan2014] used to detect duplicates.
+A similar approach has recently been implemented in fastp [@Chen2023], however unlike dedumi a Bloom filter [@Bloom1970] is used which does not guarantee an upper bound on the error rate.
+The error rate of the Cuckoo hash is bounded, with memory requirements determined by the chosen error rate.
+
+On synthetic 150 b.p. paired end data generated with a duplication rate of 10% and a base error substitution rate of 0.087% to match the Illumina HiSeq X Ten [@Stoler2021],
+dedumi processes 1M reads in 6.852s^[all results are the median of 11 different simulated datasets] on an Intel i7-14700F using a single core.
+A total of 903,012 reads were conserved (9.7% duplication rate), closely matching the expected 10% duplication rate.
+In comparison, fastp processes 1M reads in 4.259s (multiple threads, 10.667s user+sys) and retains 930,862 reads (6.9% duplication rate).
+Dedumi is therefore 36% faster on a single core and more closely matches the expected duplication rate.
+
+Dedumi is open source and published under the BSD3 licence.
+
+# Acknowledgements
+
+We thank Anthony Papenfuss (A.T.P.) for supporting this work and providing feedback on the manuscript.
+J.B. was supported by the Stafford Fox Medical Research Foundation, Movember Foundation, and by funding to A.T.P. from a National Health and Medical Research Council (NHMRC) Investigator Grant (2026643).
+The research benefitted by support from the Victorian State Government Operational Infrastructure Support and Australian Government NHMRC Independent Research Institute Infrastructure Support.
diff --git a/paper/paper.nix b/paper/paper.nix
new file mode 100644
index 0000000..9d2fe01
--- /dev/null
+++ b/paper/paper.nix
@@ -0,0 +1,50 @@
+{
+  stdenvNoCC,
+  inara,
+  pandoc,
+  texlive,
+  hack-font,
+}:
+stdenvNoCC.mkDerivation {
+  name = "paper";
+  src = inara;
+  buildInputs = [
+    pandoc
+    (texlive.combine {
+      inherit
+        (texlive)
+        scheme-basic
+        latexmk
+        marginnote
+        xcolor
+        preprint
+        etoolbox
+        titlesec
+        pgf
+        hyperxmp
+        ifmtarg
+        luacode
+        luatexbase
+        caption
+        orcidlink
+        tcolorbox
+        environ
+        seqsplit
+        xstring
+        float
+        fontspec
+        fontsetup
+        unicode-math
+        lualatex-math
+        newcomputermodern
+        selnolig
+        ;
+    })
+  ];
+  buildPhase = ''
+    export HOME=$(mktemp -d)
+    make ARTICLE=${./.}/paper.md
+  '';
+  installPhase = "cp -r publishing-artifacts $out";
+  OSFONTDIR = hack-font;
+}
diff --git a/paper/references.bib b/paper/references.bib
new file mode 100644
index 0000000..4bf2654
--- /dev/null
+++ b/paper/references.bib
@@ -0,0 +1,118 @@
+@article{UMIc,
+   incitefulid = {W3171694691},
+   title = {UMIc: A Preprocessing Method for UMI Deduplication and Reads Correction},
+   url = {http://doi.org/https://doi.org/10.3389/fgene.2021.660366},
+   DOI = {https://doi.org/10.3389/fgene.2021.660366},
+   author = {Maria Tsagiopoulou and Maria Christina Maniou and Nikolaos Pechlivanis and Anastasis Togkousidis and Michaela Kotrová and Tobias Hutzenlaub and Ilias Kappas and Anastasia Chatzidimitriou and Fotis Psomopoulos},
+   year = {2021},
+   journal = {Frontiers in Genetics},
+   keyword = {inciteful.xyz}
+}
+@article{Liu2019,
+   incitefulid = {W2996575833},
+   title = {Algorithms for efficiently collapsing reads with Unique Molecular Identifiers},
+   url = {http://doi.org/https://doi.org/10.7717/peerj.8275},
+   DOI = {https://doi.org/10.7717/peerj.8275},
+   author = {Daniel Liu},
+   year = {2019},
+   journal = {PeerJ},
+   keyword = {inciteful.xyz}
+}
+@article{W4283398238,
+   incitefulid = {W4283398238},
+   title = {TrieDedup: A fast trie-based deduplication algorithm to handle ambiguous bases in high-throughput sequencing},
+   url = {http://doi.org/https://doi.org/10.1101/2022.02.20.481170},
+   DOI = {https://doi.org/10.1101/2022.02.20.481170},
+   author = {Jian Hu and Shunlong Luo and Ming Tian and Frederick W. Alt and Adam Yongxin Ye},
+   year = {2022},
+   journal = {bioRxiv (Cold Spring Harbor Laboratory)},
+   keyword = {inciteful.xyz}
+}
+@article{Smith2017,
+   incitefulid = {W2952109315},
+   title = {UMI-tools: modeling sequencing errors in Unique Molecular Identifiers to improve quantification accuracy},
+   url = {http://doi.org/https://doi.org/10.1101/gr.209601.116},
+   DOI = {https://doi.org/10.1101/gr.209601.116},
+   author = {Tom Smith and Andreas Heger and Ian Sudbery},
+   year = {2017},
+   journal = {Genome Research},
+   keyword = {inciteful.xyz}
+}
+@article{Chen2023,
+  title = {Ultrafast one‐pass FASTQ data preprocessing,  quality control,  and deduplication using fastp},
+  volume = {2},
+  ISSN = {2770-596X},
+  url = {http://dx.doi.org/10.1002/imt2.107},
+  DOI = {10.1002/imt2.107},
+  number = {2},
+  journal = {iMeta},
+  publisher = {Wiley},
+  author = {Chen,  Shifu},
+  year = {2023},
+  month = may 
+}
+@article{Stoler2021,
+  title = {Sequencing error profiles of Illumina sequencing instruments},
+  volume = {3},
+  ISSN = {2631-9268},
+  url = {http://dx.doi.org/10.1093/nargab/lqab019},
+  DOI = {10.1093/nargab/lqab019},
+  number = {1},
+  journal = {NAR Genomics and Bioinformatics},
+  publisher = {Oxford University Press (OUP)},
+  author = {Stoler,  Nicholas and Nekrutenko,  Anton},
+  year = {2021},
+  month = jan 
+}
+@article{Aird2011,
+  title = {Analyzing and minimizing PCR amplification bias in Illumina sequencing libraries},
+  volume = {12},
+  ISSN = {1465-6906},
+  url = {http://dx.doi.org/10.1186/gb-2011-12-2-r18},
+  DOI = {10.1186/gb-2011-12-2-r18},
+  number = {2},
+  journal = {Genome Biology},
+  publisher = {Springer Science and Business Media LLC},
+  author = {Aird,  Daniel and Ross,  Michael G and Chen,  Wei-Sheng and Danielsson,  Maxwell and Fennell,  Timothy and Russ,  Carsten and Jaffe,  David B and Nusbaum,  Chad and Gnirke,  Andreas},
+  year = {2011},
+  pages = {R18}
+}
+@article{Garber2009,
+  title = {Closing gaps in the human genome using sequencing by synthesis},
+  volume = {10},
+  ISSN = {1465-6906},
+  url = {http://dx.doi.org/10.1186/gb-2009-10-6-r60},
+  DOI = {10.1186/gb-2009-10-6-r60},
+  number = {6},
+  journal = {Genome Biology},
+  publisher = {Springer Science and Business Media LLC},
+  author = {Garber,  Manuel and Zody,  Michael C and Arachchi,  Harindra M and Berlin,  Aaron and Gnerre,  Sante and Green,  Lisa M and Lennon,  Niall and Nusbaum,  Chad},
+  year = {2009},
+  pages = {R60}
+}
+@inproceedings{Fan2014,
+  series = {CoNEXT ’14},
+  title = {Cuckoo Filter: Practically Better Than Bloom},
+  url = {http://dx.doi.org/10.1145/2674005.2674994},
+  DOI = {10.1145/2674005.2674994},
+  booktitle = {Proceedings of the 10th ACM International on Conference on emerging Networking Experiments and Technologies},
+  publisher = {ACM},
+  author = {Fan,  Bin and Andersen,  Dave G. and Kaminsky,  Michael and Mitzenmacher,  Michael D.},
+  year = {2014},
+  month = dec,
+  collection = {CoNEXT ’14}
+}
+@article{Bloom1970,
+  title = {Space/time trade-offs in hash coding with allowable errors},
+  volume = {13},
+  ISSN = {1557-7317},
+  url = {http://dx.doi.org/10.1145/362686.362692},
+  DOI = {10.1145/362686.362692},
+  number = {7},
+  journal = {Communications of the ACM},
+  publisher = {Association for Computing Machinery (ACM)},
+  author = {Bloom,  Burton H.},
+  year = {1970},
+  month = jul,
+  pages = {422–426}
+}
\ No newline at end of file
diff --git a/paper/src/simulate-reads/flake.lock b/paper/src/simulate-reads/flake.lock
new file mode 100644
index 0000000..c100dfb
--- /dev/null
+++ b/paper/src/simulate-reads/flake.lock
@@ -0,0 +1,26 @@
+{
+  "nodes": {
+    "nixpkgs": {
+      "locked": {
+        "lastModified": 1713312815,
+        "narHash": "sha256-vnvYlxrBWLPHbalKqEUEZPd5JF5/uhGWLm/LMagel9g=",
+        "owner": "nixos",
+        "repo": "nixpkgs",
+        "rev": "229626d4d005a5a93bcba8eb5ac5c4d93994a2e0",
+        "type": "github"
+      },
+      "original": {
+        "owner": "nixos",
+        "repo": "nixpkgs",
+        "type": "github"
+      }
+    },
+    "root": {
+      "inputs": {
+        "nixpkgs": "nixpkgs"
+      }
+    }
+  },
+  "root": "root",
+  "version": 7
+}
diff --git a/paper/src/simulate-reads/flake.nix b/paper/src/simulate-reads/flake.nix
new file mode 100644
index 0000000..2e84fd0
--- /dev/null
+++ b/paper/src/simulate-reads/flake.nix
@@ -0,0 +1,9 @@
+{
+  inputs.nixpkgs.url = "github:nixos/nixpkgs";
+  outputs = {self, nixpkgs}:
+    let system = "x86_64-linux";
+        pkgs = import nixpkgs {inherit system;};
+    in
+      {packages.${system}.default = pkgs.haskellPackages.callCabal2nix "simulate-reads" ./. {};
+       devShells.${system}.default = self.packages.${system}.default.env;} ;
+}
diff --git a/paper/src/simulate-reads/package.yaml b/paper/src/simulate-reads/package.yaml
new file mode 100644
index 0000000..17f9d4d
--- /dev/null
+++ b/paper/src/simulate-reads/package.yaml
@@ -0,0 +1,19 @@
+name: simulate-reads
+version: 0.1
+synopsis: Synthetic read generator with UMIs
+maintainer: Justin Bedo <cu@cua0.org>
+license: MIT
+
+dependencies:
+  - base
+  - streamly-core
+  - random
+  - streaming-commons
+  - bytestring
+  - mtl
+
+ghc-options: [-O2, -Wall, -Wno-name-shadowing, -rtsopts, -prof, -fprof-late]
+
+executables:
+  simulate:
+    main-is: simulate.hs
diff --git a/paper/src/simulate-reads/simulate.hs b/paper/src/simulate-reads/simulate.hs
new file mode 100644
index 0000000..77fa74a
--- /dev/null
+++ b/paper/src/simulate-reads/simulate.hs
@@ -0,0 +1,148 @@
+{-# LANGUAGE GHC2021 #-}
+{-# LANGUAGE BlockArguments #-}
+{-# LANGUAGE LambdaCase #-}
+{-# LANGUAGE OverloadedStrings #-}
+
+module Main where
+
+import Control.Monad
+import Control.Monad.Trans
+import Data.ByteString (ByteString)
+import Data.ByteString qualified as B
+import Data.ByteString.Char8 qualified as BC
+import Data.Function
+import Data.Streaming.Zlib
+import Streamly.Data.Fold qualified as F
+import Streamly.Data.Stream qualified as S
+import System.Environment
+import System.IO
+import System.Random hiding (uniform)
+import Prelude hiding (Read, read)
+
+-- simulation parameters
+readLength :: Int
+readLength = 150
+
+baseErrorRate :: Double
+baseErrorRate = 0.00087 -- HiSeq X Ten
+
+nreads :: Int
+nreads = 1_000_000
+
+-- Probability monad transformer
+newtype P m a = P {runP :: StdGen -> m (a, StdGen)}
+
+evalP :: (Monad m) => P m a -> StdGen -> m a
+evalP (P f) g = fst <$> f g
+
+instance (Monad m) => Monad (P m) where
+  P f >>= g = P \q -> do
+    (x, q') <- f q
+    runP (g x) q'
+
+instance (Monad m) => Functor (P m) where
+  fmap = liftM
+
+instance (Monad m) => Applicative (P m) where
+  pure x = P \g -> pure (x, g)
+  (<*>) = ap
+
+instance (MonadIO m) => MonadIO (P m) where
+  liftIO f = P \g -> (,g) <$> liftIO f
+
+uniform :: Applicative m => P m Double
+uniform = P (pure . random)
+
+poisson :: Monad m => Double -> P m Int
+poisson lambda = go 0 0
+  where
+    go i s
+      | s >= 1 = pure (i - 1)
+      | otherwise = do
+          u <- uniform
+          go (i + 1) (s - log u / lambda)
+
+
+-- Reads/nucleotides and sampling them (with errors)
+data Nuc = A | C | G | T
+  deriving (Eq, Ord, Enum, Show, Bounded)
+
+nuc :: (Monad m) => P m Nuc
+nuc = toEnum . round . (* fromIntegral (fromEnum (maxBound :: Nuc))) <$> uniform
+
+type Read = [Nuc]
+
+data Pair a = Pair !a !a
+  deriving (Eq, Ord, Show, Functor)
+
+read :: (Monad m) => P m Read
+read = replicateM readLength nuc
+
+readPair :: (Monad m) => P m (Pair Read)
+readPair = Pair <$> read <*> read
+
+dup :: (Monad m) => Double -> Pair Read -> S.Stream (P m) (Pair Read)
+dup p r = S.concatEffect do
+  n <- poisson $ 1 / (1 - p) - 1
+  pure $ S.replicate (1 + n) r
+
+baseError :: (Monad m) => Double -> Read -> P m Read
+baseError p = mapM error'
+  where
+    error' b = do
+      q <- uniform
+      if q < p
+        then nuc
+        else pure b
+
+-- FastQ writer
+data Entry = Entry ByteString (Pair Read) deriving (Eq, Ord, Show)
+
+unparse :: (MonadIO m) => FilePath -> FilePath -> S.Stream m Entry -> m ()
+unparse l r str = do
+  lh <- liftIO $ openFile l WriteMode
+  rh <- liftIO $ openFile r WriteMode
+  ld <- liftIO $ initDeflate 0 gzipWindow
+  rd <- liftIO $ initDeflate 0 gzipWindow
+  fmap unparse' str & S.fold (F.drainMapM $ writeFiles lh rh ld rd)
+  liftIO $ do
+    flush rd rh
+    flush ld lh
+    hClose rh
+    hClose lh
+  where
+    gzipWindow = WindowBits 31
+    flush d h = finishDeflate d & writePopper h
+
+    writeFiles l r ld rd (Pair a b) = do
+      putCompressed ld l a
+      putCompressed rd r b
+
+    putCompressed d h chunk = liftIO $ do
+      popper <- feedDeflate d chunk
+      writePopper h popper
+
+    writePopper h p =
+      p >>= \case
+        PRNext str -> do
+          B.hPut h str
+          writePopper h p
+        PRDone -> pure ()
+        PRError _ -> error "unparse: compression error"
+
+    unparse' :: Entry -> Pair ByteString
+    unparse' (Entry hdr (Pair l r)) = Pair (BC.unlines ["@" <> hdr, BC.pack $ concatMap show l, "+", qual]) (BC.unlines ["@" <> hdr, BC.pack $ concatMap show r, "+", qual])
+
+    qual = BC.pack $ replicate readLength '~'
+
+main :: IO ()
+main = do
+  [out1, out2] <- getArgs
+  q <- getStdGen
+  S.repeatM readPair -- Generate random reads
+    & S.concatMap (dup 0.1) -- Duplicate reads
+    & S.mapM (\(Pair l r) -> Pair <$> baseError baseErrorRate l <*> baseError baseErrorRate r) -- Introduce base errors
+    & S.take nreads
+    & S.zipWith (\i rp -> Entry (BC.pack $ show i) rp) (S.fromList [0 :: Int ..])
+    & unparse out1 out2
+    & flip evalP q