Skip to content

Commit

Permalink
Merge pull request #542 from IntersectMBO/wenkokke/corrupt-snapshot-wb
Browse files Browse the repository at this point in the history
fix: validate checksums in openWriteBuffer
  • Loading branch information
wenkokke authored Jan 29, 2025
2 parents c202034 + 1cc5921 commit 0036a87
Show file tree
Hide file tree
Showing 3 changed files with 30 additions and 19 deletions.
21 changes: 14 additions & 7 deletions src/Database/LSMTree/Internal/Paths.hs
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ module Database.LSMTree.Internal.Paths (
, fromChecksumsFile
-- * Checksums for WriteBuffer files
, toChecksumsFileForWriteBufferFiles
, fromChecksumsFileForWriteBufferFiles
-- * ForRunFiles abstraction
, ForKOps (..)
, ForBlob (..)
Expand All @@ -51,10 +52,8 @@ module Database.LSMTree.Internal.Paths (

import Control.Applicative (Applicative (..))
import Control.DeepSeq (NFData (..))
import Data.Bifunctor (Bifunctor (..))
import qualified Data.ByteString.Char8 as BS
import Data.Foldable (toList)
import Data.Function ((&))
import qualified Data.Map as Map
import Data.Maybe (fromMaybe)
import Data.String (IsString (..))
Expand Down Expand Up @@ -321,10 +320,18 @@ writeBufferFilePathWithExt (WriteBufferFsPaths dir n) ext =
-------------------------------------------------------------------------------}

toChecksumsFileForWriteBufferFiles :: (ForKOps CRC.CRC32C, ForBlob CRC.CRC32C) -> CRC.ChecksumsFile
toChecksumsFileForWriteBufferFiles checksums =
Map.fromList . toList $ checksums & bimap
((toChecksumsFileName writeBufferKOpsExt,) . unForKOps)
((toChecksumsFileName writeBufferBlobExt,) . unForBlob)
toChecksumsFileForWriteBufferFiles (ForKOps kOpsChecksum, ForBlob blobChecksum) =
Map.fromList
[ (toChecksumsFileName writeBufferKOpsExt, kOpsChecksum)
, (toChecksumsFileName writeBufferBlobExt, blobChecksum)
]
where
toChecksumsFileName :: String -> CRC.ChecksumsFileName
toChecksumsFileName = CRC.ChecksumsFileName . BS.pack

fromChecksumsFileForWriteBufferFiles :: CRC.ChecksumsFile -> Either String (ForKOps CRC.CRC32C, ForBlob CRC.CRC32C)
fromChecksumsFileForWriteBufferFiles file = do
(,) <$> (ForKOps <$> fromChecksumFile writeBufferKOpsExt) <*> (ForBlob <$> fromChecksumFile writeBufferBlobExt)
where
fromChecksumFile key =
maybe (Left $ "key not found: " <> key) Right $
Map.lookup (CRC.ChecksumsFileName . fromString $ key) file
17 changes: 14 additions & 3 deletions src/Database/LSMTree/Internal/Snapshot.hs
Original file line number Diff line number Diff line change
Expand Up @@ -41,16 +41,19 @@ import Data.Text (Text)
import Data.Traversable (for)
import qualified Data.Vector as V
import Database.LSMTree.Internal.Config
import Database.LSMTree.Internal.CRC32C (checkCRC)
import qualified Database.LSMTree.Internal.CRC32C as CRC
import Database.LSMTree.Internal.Entry
import Database.LSMTree.Internal.Lookup (ResolveSerialisedValue)
import Database.LSMTree.Internal.Merge (MergeType (..))
import qualified Database.LSMTree.Internal.Merge as Merge
import Database.LSMTree.Internal.MergeSchedule
import Database.LSMTree.Internal.MergingRun (NumRuns (..))
import qualified Database.LSMTree.Internal.MergingRun as MR
import Database.LSMTree.Internal.Paths (ActiveDir (..), ForKOps (..),
NamedSnapshotDir (..), RunFsPaths (..),
WriteBufferFsPaths (..), pathsForRunFiles,
import Database.LSMTree.Internal.Paths (ActiveDir (..), ForBlob (..),
ForKOps (..), NamedSnapshotDir (..), RunFsPaths (..),
WriteBufferFsPaths (..),
fromChecksumsFileForWriteBufferFiles, pathsForRunFiles,
runChecksumsPath, writeBufferBlobPath,
writeBufferChecksumsPath, writeBufferKOpsPath)
import Database.LSMTree.Internal.Run (Run)
Expand Down Expand Up @@ -301,6 +304,14 @@ openWriteBuffer ::
-> WriteBufferFsPaths
-> m (WriteBuffer, Ref (WriteBufferBlobs m h))
openWriteBuffer reg resolve hfs hbio uc activeDir snapWriteBufferPaths = do
-- Check the checksums
-- TODO: This reads the blobfile twice: once to check the CRC and once more
-- to copy it from the snapshot directory to the active directory.
(expectedChecksumForKOps, expectedChecksumForBlob) <-
CRC.expectValidFile (writeBufferChecksumsPath snapWriteBufferPaths) . fromChecksumsFileForWriteBufferFiles
=<< CRC.readChecksumsFile hfs (writeBufferChecksumsPath snapWriteBufferPaths)
checkCRC hfs hbio False (unForKOps expectedChecksumForKOps) (writeBufferKOpsPath snapWriteBufferPaths)
checkCRC hfs hbio False (unForBlob expectedChecksumForBlob) (writeBufferBlobPath snapWriteBufferPaths)
-- Copy the write buffer blobs file to the active directory and open it.
activeWriteBufferNumber <- uniqueToInt <$> incrUniqCounter uc
let activeWriteBufferBlobPath =
Expand Down
11 changes: 2 additions & 9 deletions test/Test/Database/LSMTree/Internal/Snapshot/FS.hs
Original file line number Diff line number Diff line change
Expand Up @@ -211,15 +211,8 @@ prop_flipSnapshotBit
tabFlippedBit = tabulate "Flipped bit" [showPowersOf10 j]
counterFlippedBit = counterexample ("Flipped bit: " ++ show j)

let isUncheckedFile =
path == getNamedSnapshotDir namedSnapDir </> FS.mkFsPath ["0.keyops"]
|| path == getNamedSnapshotDir namedSnapDir </> FS.mkFsPath ["0.blobs"]
|| path == getNamedSnapshotDir namedSnapDir </> FS.mkFsPath ["0.checksums"]

-- TODO: remove once write buffer files have checksum verification
if isUncheckedFile then
pure discard
else if n <= 0 then -- file is empty
-- TODO: check forgotten refs
if n <= 0 then -- file is empty
pure $ tabulate "Result" ["No corruption applied"] True
else do -- file is non-empty

Expand Down

0 comments on commit 0036a87

Please sign in to comment.