From 571d04ce54ce2f9ea131bdd2d7497e319de1e860 Mon Sep 17 00:00:00 2001 From: Wen Kokke Date: Wed, 22 Jan 2025 15:55:43 +0000 Subject: [PATCH 1/4] feat: add fromChecksumsFileForWriteBufferFiles --- src/Database/LSMTree/Internal/Paths.hs | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/src/Database/LSMTree/Internal/Paths.hs b/src/Database/LSMTree/Internal/Paths.hs index 7f4f3f349..2d5399f9a 100644 --- a/src/Database/LSMTree/Internal/Paths.hs +++ b/src/Database/LSMTree/Internal/Paths.hs @@ -31,6 +31,7 @@ module Database.LSMTree.Internal.Paths ( , fromChecksumsFile -- * Checksums for WriteBuffer files , toChecksumsFileForWriteBufferFiles + , fromChecksumsFileForWriteBufferFiles -- * ForRunFiles abstraction , ForKOps (..) , ForBlob (..) @@ -328,3 +329,9 @@ toChecksumsFileForWriteBufferFiles checksums = where toChecksumsFileName :: String -> CRC.ChecksumsFileName toChecksumsFileName = CRC.ChecksumsFileName . BS.pack + +fromChecksumsFileForWriteBufferFiles :: CRC.ChecksumsFile -> Either String (ForKOps CRC.CRC32C, ForBlob CRC.CRC32C) +fromChecksumsFileForWriteBufferFiles file = do + forKOps <- maybe (Left $ "key not found: " <> writeBufferKOpsExt) Right (Map.lookup (CRC.ChecksumsFileName . fromString $ writeBufferKOpsExt) file) + forBlob <- maybe (Left $ "key not found: " <> writeBufferBlobExt) Right (Map.lookup (CRC.ChecksumsFileName . fromString $ writeBufferBlobExt) file) + pure (ForKOps forKOps, ForBlob forBlob) From acab4d9bff190dce8f82e562372cde2008f63d08 Mon Sep 17 00:00:00 2001 From: Wen Kokke Date: Wed, 22 Jan 2025 13:29:36 +0000 Subject: [PATCH 2/4] chore: enable corruption in prop_flipSnapshotBit --- test/Test/Database/LSMTree/Internal/Snapshot/FS.hs | 11 ++--------- 1 file changed, 2 insertions(+), 9 deletions(-) diff --git a/test/Test/Database/LSMTree/Internal/Snapshot/FS.hs b/test/Test/Database/LSMTree/Internal/Snapshot/FS.hs index bdecbb64a..a15f57c83 100644 --- a/test/Test/Database/LSMTree/Internal/Snapshot/FS.hs +++ b/test/Test/Database/LSMTree/Internal/Snapshot/FS.hs @@ -211,15 +211,8 @@ prop_flipSnapshotBit tabFlippedBit = tabulate "Flipped bit" [showPowersOf10 j] counterFlippedBit = counterexample ("Flipped bit: " ++ show j) - let isUncheckedFile = - path == getNamedSnapshotDir namedSnapDir FS.mkFsPath ["0.keyops"] - || path == getNamedSnapshotDir namedSnapDir FS.mkFsPath ["0.blobs"] - || path == getNamedSnapshotDir namedSnapDir FS.mkFsPath ["0.checksums"] - - -- TODO: remove once write buffer files have checksum verification - if isUncheckedFile then - pure discard - else if n <= 0 then -- file is empty + -- TODO: check forgotten refs + if n <= 0 then -- file is empty pure $ tabulate "Result" ["No corruption applied"] True else do -- file is non-empty From 7d470474079cc947597199a3b0cf448ce62c3ebc Mon Sep 17 00:00:00 2001 From: Wen Kokke Date: Wed, 22 Jan 2025 15:26:05 +0000 Subject: [PATCH 3/4] feat: validate checksums in openWriteBuffer --- src/Database/LSMTree/Internal/Snapshot.hs | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/src/Database/LSMTree/Internal/Snapshot.hs b/src/Database/LSMTree/Internal/Snapshot.hs index c83ea74a5..2bfcdf0f2 100644 --- a/src/Database/LSMTree/Internal/Snapshot.hs +++ b/src/Database/LSMTree/Internal/Snapshot.hs @@ -41,6 +41,8 @@ import Data.Text (Text) import Data.Traversable (for) import qualified Data.Vector as V import Database.LSMTree.Internal.Config +import Database.LSMTree.Internal.CRC32C (checkCRC) +import qualified Database.LSMTree.Internal.CRC32C as CRC import Database.LSMTree.Internal.Entry import Database.LSMTree.Internal.Lookup (ResolveSerialisedValue) import Database.LSMTree.Internal.Merge (MergeType (..)) @@ -48,9 +50,10 @@ import qualified Database.LSMTree.Internal.Merge as Merge import Database.LSMTree.Internal.MergeSchedule import Database.LSMTree.Internal.MergingRun (NumRuns (..)) import qualified Database.LSMTree.Internal.MergingRun as MR -import Database.LSMTree.Internal.Paths (ActiveDir (..), ForKOps (..), - NamedSnapshotDir (..), RunFsPaths (..), - WriteBufferFsPaths (..), pathsForRunFiles, +import Database.LSMTree.Internal.Paths (ActiveDir (..), ForBlob (..), + ForKOps (..), NamedSnapshotDir (..), RunFsPaths (..), + WriteBufferFsPaths (..), + fromChecksumsFileForWriteBufferFiles, pathsForRunFiles, runChecksumsPath, writeBufferBlobPath, writeBufferChecksumsPath, writeBufferKOpsPath) import Database.LSMTree.Internal.Run (Run) @@ -301,6 +304,12 @@ openWriteBuffer :: -> WriteBufferFsPaths -> m (WriteBuffer, Ref (WriteBufferBlobs m h)) openWriteBuffer reg resolve hfs hbio uc activeDir snapWriteBufferPaths = do + -- Check the checksums + (expectedChecksumForKOps, expectedChecksumForBlob) <- + CRC.expectValidFile (writeBufferChecksumsPath snapWriteBufferPaths) . fromChecksumsFileForWriteBufferFiles + =<< CRC.readChecksumsFile hfs (writeBufferChecksumsPath snapWriteBufferPaths) + checkCRC hfs hbio False (unForKOps expectedChecksumForKOps) (writeBufferKOpsPath snapWriteBufferPaths) + checkCRC hfs hbio False (unForBlob expectedChecksumForBlob) (writeBufferBlobPath snapWriteBufferPaths) -- Copy the write buffer blobs file to the active directory and open it. activeWriteBufferNumber <- uniqueToInt <$> incrUniqCounter uc let activeWriteBufferBlobPath = From 1cc59213241449cac12ecc110a4fbf5427a2ab72 Mon Sep 17 00:00:00 2001 From: Wen Kokke Date: Tue, 28 Jan 2025 14:04:50 +0000 Subject: [PATCH 4/4] fix: toChecksumsFileForWriteBufferFiles --- src/Database/LSMTree/Internal/Paths.hs | 20 ++++++++++---------- src/Database/LSMTree/Internal/Snapshot.hs | 2 ++ 2 files changed, 12 insertions(+), 10 deletions(-) diff --git a/src/Database/LSMTree/Internal/Paths.hs b/src/Database/LSMTree/Internal/Paths.hs index 2d5399f9a..e1585d056 100644 --- a/src/Database/LSMTree/Internal/Paths.hs +++ b/src/Database/LSMTree/Internal/Paths.hs @@ -52,10 +52,8 @@ module Database.LSMTree.Internal.Paths ( import Control.Applicative (Applicative (..)) import Control.DeepSeq (NFData (..)) -import Data.Bifunctor (Bifunctor (..)) import qualified Data.ByteString.Char8 as BS import Data.Foldable (toList) -import Data.Function ((&)) import qualified Data.Map as Map import Data.Maybe (fromMaybe) import Data.String (IsString (..)) @@ -322,16 +320,18 @@ writeBufferFilePathWithExt (WriteBufferFsPaths dir n) ext = -------------------------------------------------------------------------------} toChecksumsFileForWriteBufferFiles :: (ForKOps CRC.CRC32C, ForBlob CRC.CRC32C) -> CRC.ChecksumsFile -toChecksumsFileForWriteBufferFiles checksums = - Map.fromList . toList $ checksums & bimap - ((toChecksumsFileName writeBufferKOpsExt,) . unForKOps) - ((toChecksumsFileName writeBufferBlobExt,) . unForBlob) +toChecksumsFileForWriteBufferFiles (ForKOps kOpsChecksum, ForBlob blobChecksum) = + Map.fromList + [ (toChecksumsFileName writeBufferKOpsExt, kOpsChecksum) + , (toChecksumsFileName writeBufferBlobExt, blobChecksum) + ] where - toChecksumsFileName :: String -> CRC.ChecksumsFileName toChecksumsFileName = CRC.ChecksumsFileName . BS.pack fromChecksumsFileForWriteBufferFiles :: CRC.ChecksumsFile -> Either String (ForKOps CRC.CRC32C, ForBlob CRC.CRC32C) fromChecksumsFileForWriteBufferFiles file = do - forKOps <- maybe (Left $ "key not found: " <> writeBufferKOpsExt) Right (Map.lookup (CRC.ChecksumsFileName . fromString $ writeBufferKOpsExt) file) - forBlob <- maybe (Left $ "key not found: " <> writeBufferBlobExt) Right (Map.lookup (CRC.ChecksumsFileName . fromString $ writeBufferBlobExt) file) - pure (ForKOps forKOps, ForBlob forBlob) + (,) <$> (ForKOps <$> fromChecksumFile writeBufferKOpsExt) <*> (ForBlob <$> fromChecksumFile writeBufferBlobExt) + where + fromChecksumFile key = + maybe (Left $ "key not found: " <> key) Right $ + Map.lookup (CRC.ChecksumsFileName . fromString $ key) file diff --git a/src/Database/LSMTree/Internal/Snapshot.hs b/src/Database/LSMTree/Internal/Snapshot.hs index 2bfcdf0f2..a4dcaa6ed 100644 --- a/src/Database/LSMTree/Internal/Snapshot.hs +++ b/src/Database/LSMTree/Internal/Snapshot.hs @@ -305,6 +305,8 @@ openWriteBuffer :: -> m (WriteBuffer, Ref (WriteBufferBlobs m h)) openWriteBuffer reg resolve hfs hbio uc activeDir snapWriteBufferPaths = do -- Check the checksums + -- TODO: This reads the blobfile twice: once to check the CRC and once more + -- to copy it from the snapshot directory to the active directory. (expectedChecksumForKOps, expectedChecksumForBlob) <- CRC.expectValidFile (writeBufferChecksumsPath snapWriteBufferPaths) . fromChecksumsFileForWriteBufferFiles =<< CRC.readChecksumsFile hfs (writeBufferChecksumsPath snapWriteBufferPaths)