Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix: validate checksums in openWriteBuffer #542

Merged
merged 4 commits into from
Jan 29, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 14 additions & 7 deletions src/Database/LSMTree/Internal/Paths.hs
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ module Database.LSMTree.Internal.Paths (
, fromChecksumsFile
-- * Checksums for WriteBuffer files
, toChecksumsFileForWriteBufferFiles
, fromChecksumsFileForWriteBufferFiles
-- * ForRunFiles abstraction
, ForKOps (..)
, ForBlob (..)
Expand All @@ -51,10 +52,8 @@ module Database.LSMTree.Internal.Paths (

import Control.Applicative (Applicative (..))
import Control.DeepSeq (NFData (..))
import Data.Bifunctor (Bifunctor (..))
import qualified Data.ByteString.Char8 as BS
import Data.Foldable (toList)
import Data.Function ((&))
import qualified Data.Map as Map
import Data.Maybe (fromMaybe)
import Data.String (IsString (..))
Expand Down Expand Up @@ -321,10 +320,18 @@ writeBufferFilePathWithExt (WriteBufferFsPaths dir n) ext =
-------------------------------------------------------------------------------}

toChecksumsFileForWriteBufferFiles :: (ForKOps CRC.CRC32C, ForBlob CRC.CRC32C) -> CRC.ChecksumsFile
toChecksumsFileForWriteBufferFiles checksums =
Map.fromList . toList $ checksums & bimap
wenkokke marked this conversation as resolved.
Show resolved Hide resolved
((toChecksumsFileName writeBufferKOpsExt,) . unForKOps)
((toChecksumsFileName writeBufferBlobExt,) . unForBlob)
toChecksumsFileForWriteBufferFiles (ForKOps kOpsChecksum, ForBlob blobChecksum) =
Map.fromList
[ (toChecksumsFileName writeBufferKOpsExt, kOpsChecksum)
, (toChecksumsFileName writeBufferBlobExt, blobChecksum)
]
where
toChecksumsFileName :: String -> CRC.ChecksumsFileName
toChecksumsFileName = CRC.ChecksumsFileName . BS.pack

fromChecksumsFileForWriteBufferFiles :: CRC.ChecksumsFile -> Either String (ForKOps CRC.CRC32C, ForBlob CRC.CRC32C)
fromChecksumsFileForWriteBufferFiles file = do
(,) <$> (ForKOps <$> fromChecksumFile writeBufferKOpsExt) <*> (ForBlob <$> fromChecksumFile writeBufferBlobExt)
where
fromChecksumFile key =
maybe (Left $ "key not found: " <> key) Right $
Map.lookup (CRC.ChecksumsFileName . fromString $ key) file
17 changes: 14 additions & 3 deletions src/Database/LSMTree/Internal/Snapshot.hs
Original file line number Diff line number Diff line change
Expand Up @@ -41,16 +41,19 @@ import Data.Text (Text)
import Data.Traversable (for)
import qualified Data.Vector as V
import Database.LSMTree.Internal.Config
import Database.LSMTree.Internal.CRC32C (checkCRC)
import qualified Database.LSMTree.Internal.CRC32C as CRC
import Database.LSMTree.Internal.Entry
import Database.LSMTree.Internal.Lookup (ResolveSerialisedValue)
import Database.LSMTree.Internal.Merge (MergeType (..))
import qualified Database.LSMTree.Internal.Merge as Merge
import Database.LSMTree.Internal.MergeSchedule
import Database.LSMTree.Internal.MergingRun (NumRuns (..))
import qualified Database.LSMTree.Internal.MergingRun as MR
import Database.LSMTree.Internal.Paths (ActiveDir (..), ForKOps (..),
NamedSnapshotDir (..), RunFsPaths (..),
WriteBufferFsPaths (..), pathsForRunFiles,
import Database.LSMTree.Internal.Paths (ActiveDir (..), ForBlob (..),
ForKOps (..), NamedSnapshotDir (..), RunFsPaths (..),
WriteBufferFsPaths (..),
fromChecksumsFileForWriteBufferFiles, pathsForRunFiles,
runChecksumsPath, writeBufferBlobPath,
writeBufferChecksumsPath, writeBufferKOpsPath)
import Database.LSMTree.Internal.Run (Run)
Expand Down Expand Up @@ -301,6 +304,14 @@ openWriteBuffer ::
-> WriteBufferFsPaths
-> m (WriteBuffer, Ref (WriteBufferBlobs m h))
openWriteBuffer reg resolve hfs hbio uc activeDir snapWriteBufferPaths = do
-- Check the checksums
-- TODO: This reads the blobfile twice: once to check the CRC and once more
-- to copy it from the snapshot directory to the active directory.
(expectedChecksumForKOps, expectedChecksumForBlob) <-
CRC.expectValidFile (writeBufferChecksumsPath snapWriteBufferPaths) . fromChecksumsFileForWriteBufferFiles
=<< CRC.readChecksumsFile hfs (writeBufferChecksumsPath snapWriteBufferPaths)
checkCRC hfs hbio False (unForKOps expectedChecksumForKOps) (writeBufferKOpsPath snapWriteBufferPaths)
checkCRC hfs hbio False (unForBlob expectedChecksumForBlob) (writeBufferBlobPath snapWriteBufferPaths)
wenkokke marked this conversation as resolved.
Show resolved Hide resolved
-- Copy the write buffer blobs file to the active directory and open it.
activeWriteBufferNumber <- uniqueToInt <$> incrUniqCounter uc
let activeWriteBufferBlobPath =
Expand Down
11 changes: 2 additions & 9 deletions test/Test/Database/LSMTree/Internal/Snapshot/FS.hs
Original file line number Diff line number Diff line change
Expand Up @@ -211,15 +211,8 @@ prop_flipSnapshotBit
tabFlippedBit = tabulate "Flipped bit" [showPowersOf10 j]
counterFlippedBit = counterexample ("Flipped bit: " ++ show j)

let isUncheckedFile =
path == getNamedSnapshotDir namedSnapDir </> FS.mkFsPath ["0.keyops"]
|| path == getNamedSnapshotDir namedSnapDir </> FS.mkFsPath ["0.blobs"]
|| path == getNamedSnapshotDir namedSnapDir </> FS.mkFsPath ["0.checksums"]

-- TODO: remove once write buffer files have checksum verification
if isUncheckedFile then
pure discard
else if n <= 0 then -- file is empty
-- TODO: check forgotten refs
if n <= 0 then -- file is empty
pure $ tabulate "Result" ["No corruption applied"] True
else do -- file is non-empty

Expand Down