diff --git a/badger/cmd/info.go b/badger/cmd/info.go index 719820fb4..634aca4ce 100644 --- a/badger/cmd/info.go +++ b/badger/cmd/info.go @@ -8,6 +8,7 @@ package cmd import ( "bytes" "encoding/hex" + "errors" "fmt" "io/fs" "os" @@ -21,6 +22,7 @@ import ( "github.com/dgraph-io/badger/v4" "github.com/dgraph-io/badger/v4/options" + "github.com/dgraph-io/badger/v4/pb" "github.com/dgraph-io/badger/v4/table" "github.com/dgraph-io/badger/v4/y" ) @@ -40,10 +42,14 @@ type flagOptions struct { checksumVerificationMode string discard bool externalMagicVersion uint16 + checksumAlgorithm string } var ( opt flagOptions + + // errInvalidChecksumAlgorithm is returned if the checksum algorithm is invalid. + errInvalidChecksumAlgorithm = errors.New("Invalid checksum algorithm. Supported values: crc32c, xxhash64.") ) func init() { @@ -69,6 +75,8 @@ func init() { infoCmd.Flags().StringVar(&opt.encryptionKey, "enc-key", "", "Use the provided encryption key") infoCmd.Flags().StringVar(&opt.checksumVerificationMode, "cv-mode", "none", "[none, table, block, tableAndBlock] Specifies when the db should verify checksum for SST.") + infoCmd.Flags().StringVar(&opt.checksumAlgorithm, "ct", "crc32c", "[crc32c,xxhash64] "+ + "Specifies the checksum algorithm for SST.") infoCmd.Flags().BoolVar(&opt.discard, "discard", false, "Parse and print DISCARD file from value logs.") infoCmd.Flags().Uint16Var(&opt.externalMagicVersion, "external-magic", 0, @@ -89,6 +97,9 @@ to the Dgraph team. func handleInfo(cmd *cobra.Command, args []string) error { cvMode := checksumVerificationMode(opt.checksumVerificationMode) + ct, err := strToChecksumAlgorithm(opt.checksumAlgorithm) + y.Check(err) + bopt := badger.DefaultOptions(sstDir). WithValueDir(vlogDir). WithReadOnly(opt.readOnly). @@ -96,7 +107,8 @@ func handleInfo(cmd *cobra.Command, args []string) error { WithIndexCacheSize(200 << 20). WithEncryptionKey([]byte(opt.encryptionKey)). WithChecksumVerificationMode(cvMode). - WithExternalMagic(opt.externalMagicVersion) + WithExternalMagic(opt.externalMagicVersion). + WithChecksumAlgorithm(ct) if opt.discard { ds, err := badger.InitDiscardStats(bopt) @@ -515,6 +527,19 @@ func pluralFiles(count int) string { return "files" } +// When the checkSum Algorithm is invalid, func strToChecksumAlgorithm will return the default checkSum Algorithm +func strToChecksumAlgorithm(ct string) (pb.Checksum_Algorithm, error) { + switch ct { + case "crc32c": + return pb.Checksum_CRC32C, nil + case "xxhash64": + return pb.Checksum_XXHash64, nil + default: + return pb.Checksum_CRC32C, y.Wrap(errInvalidChecksumAlgorithm, + "InvalidChecksumAlgorithm") + } +} + func checksumVerificationMode(cvMode string) options.ChecksumVerificationMode { switch cvMode { case "none": diff --git a/badger/cmd/info_test.go b/badger/cmd/info_test.go new file mode 100644 index 000000000..3347d4780 --- /dev/null +++ b/badger/cmd/info_test.go @@ -0,0 +1,40 @@ +/* + * Copyright 2019 Dgraph Labs, Inc. and Contributors + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package cmd + +import ( + "fmt" + "testing" + + "github.com/dgraph-io/badger/v4/pb" + "github.com/stretchr/testify/require" +) + +func TestStrToChecksumAlgorithm(t *testing.T) { + ctCRC32, err := strToChecksumAlgorithm("crc32c") + require.True(t, ctCRC32 == pb.Checksum_CRC32C) + require.True(t, err == nil) + + ctHash, err := strToChecksumAlgorithm("xxhash64") + require.True(t, ctHash == pb.Checksum_XXHash64) + require.True(t, err == nil) + + ctOthers, err := strToChecksumAlgorithm("others") + fmt.Println(err) + require.True(t, ctOthers == pb.Checksum_CRC32C) + require.True(t, err != nil) +} diff --git a/options.go b/options.go index 54eeed51f..b940f508b 100644 --- a/options.go +++ b/options.go @@ -14,6 +14,7 @@ import ( "time" "github.com/dgraph-io/badger/v4/options" + "github.com/dgraph-io/badger/v4/pb" "github.com/dgraph-io/badger/v4/table" "github.com/dgraph-io/badger/v4/y" "github.com/dgraph-io/ristretto/v2/z" @@ -93,6 +94,9 @@ type Options struct { // ChecksumVerificationMode decides when db should verify checksums for SSTable blocks. ChecksumVerificationMode options.ChecksumVerificationMode + // ChecksumAlgorithm decides which algorithm calculate checksums + ChecksumAlgorithm pb.Checksum_Algorithm + // DetectConflicts determines whether the transactions would be checked for // conflicts. The transactions can be processed at a higher rate when // conflict detection is disabled. @@ -174,6 +178,7 @@ func DefaultOptions(path string) Options { EncryptionKeyRotationDuration: 10 * 24 * time.Hour, // Default 10 days. DetectConflicts: true, NamespaceOffset: -1, + ChecksumAlgorithm: pb.Checksum_CRC32C, } } @@ -188,6 +193,7 @@ func buildTableOptions(db *DB) table.Options { BlockSize: opt.BlockSize, BloomFalsePositive: opt.BloomFalsePositive, ChkMode: opt.ChecksumVerificationMode, + ChkAlgo: opt.ChecksumAlgorithm, Compression: opt.Compression, ZSTDCompressionLevel: opt.ZSTDCompressionLevel, BlockCache: db.blockCache, @@ -669,6 +675,16 @@ func (opt Options) WithChecksumVerificationMode(cvMode options.ChecksumVerificat return opt } +// WithChecksumAlgorithm return a new Options value with ChecksumAlgorithm set to the given value +// +// ChecksumAlgorithm decides which algorithm calculate checksums. +// +// The default value of ChecksumAlgorithm is pb.Checksum_CRC32C. +func (opt Options) WithChecksumAlgorithm(ct pb.Checksum_Algorithm) Options { + opt.ChecksumAlgorithm = ct + return opt +} + // WithBlockCacheSize returns a new Options value with BlockCacheSize set to the given value. // // This value specifies how much data cache should hold in memory. A small size diff --git a/table/builder.go b/table/builder.go index 70ebc99bd..027d6b684 100644 --- a/table/builder.go +++ b/table/builder.go @@ -267,7 +267,7 @@ func (b *Builder) finishBlock() { b.append(y.U32SliceToBytes(b.curBlock.entryOffsets)) b.append(y.U32ToBytes(uint32(len(b.curBlock.entryOffsets)))) - checksum := b.calculateChecksum(b.curBlock.data[:b.curBlock.end]) + checksum := b.calculateChecksum(b.curBlock.data[:b.curBlock.end], b.opts.ChkAlgo) // Append the block checksum and its length. b.append(checksum) @@ -443,7 +443,7 @@ func (b *Builder) Done() buildData { index, err = b.encrypt(index) y.Check(err) } - checksum := b.calculateChecksum(index) + checksum := b.calculateChecksum(index, b.opts.ChkAlgo) bd.index = index bd.checksum = checksum @@ -451,19 +451,18 @@ func (b *Builder) Done() buildData { return bd } -func (b *Builder) calculateChecksum(data []byte) []byte { +func (b *Builder) calculateChecksum(data []byte, ct pb.Checksum_Algorithm) []byte { // Build checksum for the index. checksum := pb.Checksum{ - // TODO: The checksum type should be configurable from the - // options. + // We chose to use CRC32 as the default option because // it performed better compared to xxHash64. // See the BenchmarkChecksum in table_test.go file // Size => 1024 B 2048 B // CRC32 => 63.7 ns/op 112 ns/op // xxHash64 => 87.5 ns/op 158 ns/op - Sum: y.CalculateChecksum(data, pb.Checksum_CRC32C), - Algo: pb.Checksum_CRC32C, + Sum: y.CalculateChecksum(data, ct), + Algo: ct, } // Write checksum to the file. diff --git a/table/table.go b/table/table.go index a32515e2d..2b7444050 100644 --- a/table/table.go +++ b/table/table.go @@ -51,6 +51,9 @@ type Options struct { // ChkMode is the checksum verification mode for Table. ChkMode options.ChecksumVerificationMode + // ChkAlgo is the checksum algorithm mode for Table. + ChkAlgo pb.Checksum_Algorithm + // Options for Table builder. // BloomFalsePositive is the false positive probabiltiy of bloom filter.