diff --git a/cmd/integration/commands/flags.go b/cmd/integration/commands/flags.go index a61fcaab2d3..d2cfe832c82 100644 --- a/cmd/integration/commands/flags.go +++ b/cmd/integration/commands/flags.go @@ -56,6 +56,8 @@ var ( chainTipMode bool syncCfg = ethconfig.Defaults.Sync + + csvOutput string ) func must(err error) { diff --git a/cmd/integration/commands/idx_optimize.go b/cmd/integration/commands/idx_optimize.go new file mode 100644 index 00000000000..933d287ccbd --- /dev/null +++ b/cmd/integration/commands/idx_optimize.go @@ -0,0 +1,204 @@ +package commands + +import ( + "io/fs" + "log" + "os" + "path/filepath" + "strconv" + "strings" + + "github.com/erigontech/erigon-lib/common/background" + "github.com/erigontech/erigon-lib/config3" + lllog "github.com/erigontech/erigon-lib/log/v3" + "github.com/erigontech/erigon-lib/recsplit" + "github.com/erigontech/erigon-lib/recsplit/eliasfano32" + "github.com/erigontech/erigon-lib/recsplit/multiencseq" + "github.com/erigontech/erigon-lib/state" + + "github.com/erigontech/erigon-lib/common" + "github.com/erigontech/erigon-lib/seg" + "github.com/erigontech/erigon/turbo/debug" + "github.com/spf13/cobra" +) + +func parseEFFilename(fileName string) (*efFileInfo, error) { + parts := strings.Split(fileName, ".") + stepParts := strings.Split(parts[1], "-") + startStep, err := strconv.ParseUint(stepParts[0], 10, 64) + if err != nil { + return nil, err + } + endStep, err := strconv.ParseUint(stepParts[1], 10, 64) + if err != nil { + return nil, err + } + + return &efFileInfo{ + prefix: parts[0], + stepSize: endStep - startStep, + startStep: startStep, + endStep: endStep, + }, nil +} + +type efFileInfo struct { + prefix string + stepSize uint64 + startStep uint64 + endStep uint64 +} + +var b []byte + +func doConvert(baseTxNum uint64, v []byte) ([]byte, error) { + ef, _ := eliasfano32.ReadEliasFano(v) + + seqBuilder := multiencseq.NewBuilder(baseTxNum, ef.Count(), ef.Max()) + for it := ef.Iterator(); it.HasNext(); { + n, err := it.Next() + if err != nil { + return nil, err + } + seqBuilder.AddOffset(n) + } + seqBuilder.Build() + + b = seqBuilder.AppendBytes(b[:0]) + return b, nil +} + +var idxOptimize = &cobra.Command{ + Use: "idx_optimize", + Short: "Scan .ef files, backup them up, reencode and optimize them, rebuild .efi files", + Run: func(cmd *cobra.Command, args []string) { + ctx, _ := common.RootContext() + logger := debug.SetupCobra(cmd, "integration") + + // accessorDir := filepath.Join(datadirCli, "snapshots", "accessor") + idxPath := filepath.Join(datadirCli, "snapshots", "idx") + idxDir := os.DirFS(idxPath) + + files, err := fs.ReadDir(idxDir, ".") + if err != nil { + log.Fatalf("Failed to read directory contents: %v", err) + } + + log.Println("Sumarizing idx files...") + cEF := 0 + for _, file := range files { + if file.IsDir() || !strings.HasSuffix(file.Name(), ".ef") { + continue + } + cEF++ + } + + log.Println("Optimizing idx files...") + cOpt := 0 + for _, file := range files { + if file.IsDir() || !strings.HasSuffix(file.Name(), ".ef") { + continue + } + + efInfo, err := parseEFFilename(file.Name()) + if err != nil { + logger.Error("Failed to parse file info: ", err) + } + log.Printf("Optimizing file %s [%d/%d]...", file.Name(), cOpt, cEF) + + // only optimize frozen files for this experiment, because we are not + // implementing collation, merge, etc. support now + // if efInfo.stepSize < 64 { + // log.Printf("Skipping file %s, step size %d < 64", file.Name(), efInfo.stepSize) + // continue + // } + cOpt++ + baseTxNum := efInfo.startStep * config3.DefaultStepSize + + tmpDir := datadirCli + "/temp" + + idxInput, err := seg.NewDecompressor(datadirCli + "/snapshots/idx/" + file.Name()) + if err != nil { + log.Fatalf("Failed to open decompressor: %v", err) + } + defer idxInput.Close() + + idxOutput, err := seg.NewCompressor(ctx, "optimizoor", datadirCli+"/snapshots/idx/"+file.Name()+".new", tmpDir, seg.DefaultCfg, lllog.LvlInfo, logger) + if err != nil { + log.Fatalf("Failed to open compressor: %v", err) + } + defer idxOutput.Close() + + // Summarize 1 idx file + g := idxInput.MakeGetter() + reader := seg.NewReader(g, seg.CompressNone) + reader.Reset(0) + + writer := seg.NewWriter(idxOutput, seg.CompressNone) + ps := background.NewProgressSet() + + for reader.HasNext() { + k, _ := reader.Next(nil) + if !reader.HasNext() { + log.Fatal("reader doesn't have next!") + } + if err := writer.AddWord(k); err != nil { + log.Fatalf("error while writing key %v", err) + } + + v, _ := reader.Next(nil) + v, err := doConvert(baseTxNum, v) + if err != nil { + log.Fatalf("error while optimizing value %v", err) + } + if err := writer.AddWord(v); err != nil { + log.Fatalf("error while writing value %v", err) + } + + select { + case <-ctx.Done(): + return + default: + } + } + if err := writer.Compress(); err != nil { + log.Fatalf("error while writing optimized file %v", err) + } + idxInput.Close() + writer.Close() + idxOutput.Close() + + // rebuid .efi; COPIED FROM InvertedIndex.buildMapAccessor + salt, err := state.GetStateIndicesSalt(datadirCli + "/snapshots/") + if err != nil { + log.Fatalf("Failed to build accessor: %v", err) + } + idxPath := datadirCli + "/snapshots/accessor/" + file.Name() + "i.new" + cfg := recsplit.RecSplitArgs{ + Enums: true, + LessFalsePositives: true, + + BucketSize: recsplit.DefaultBucketSize, + LeafSize: recsplit.DefaultLeafSize, + TmpDir: tmpDir, + IndexFile: idxPath, + Salt: salt, + NoFsync: false, + } + data, err := seg.NewDecompressor(datadirCli + "/snapshots/idx/" + file.Name() + ".new") + if err != nil { + log.Fatalf("Failed to build accessor: %v", err) + } + if err := state.BuildAccessor(ctx, data, seg.CompressNone, idxPath, false, cfg, ps, logger); err != nil { + log.Fatalf("Failed to build accessor: %v", err) + } + } + + log.Printf("Optimized %d of %d files!!!", cOpt, cEF) + }, +} + +func init() { + withDataDir(idxOptimize) + rootCmd.AddCommand(idxOptimize) +} diff --git a/cmd/integration/commands/idx_verify.go b/cmd/integration/commands/idx_verify.go new file mode 100644 index 00000000000..dc9864b9311 --- /dev/null +++ b/cmd/integration/commands/idx_verify.go @@ -0,0 +1,216 @@ +package commands + +import ( + "bytes" + "io/fs" + "log" + "os" + "path/filepath" + "strings" + + "github.com/erigontech/erigon-lib/common" + "github.com/erigontech/erigon-lib/common/hexutility" + "github.com/erigontech/erigon-lib/config3" + "github.com/erigontech/erigon-lib/recsplit" + "github.com/erigontech/erigon-lib/recsplit/eliasfano32" + "github.com/erigontech/erigon-lib/recsplit/multiencseq" + "github.com/erigontech/erigon-lib/seg" + "github.com/spf13/cobra" +) + +var idxVerify = &cobra.Command{ + Use: "idx_verify", + Short: "After a genesis sync + snapshot regen, deep compare original and optimized .ef files of 2 E3 instances", + Run: func(cmd *cobra.Command, args []string) { + ctx, _ := common.RootContext() + + sourceIdxPath := filepath.Join(sourceDirCli, "snapshots", "idx") + sourceIdxDir := os.DirFS(sourceIdxPath) + + files, err := fs.ReadDir(sourceIdxDir, ".") + if err != nil { + log.Fatalf("Failed to read directory contents: %v", err) + } + + log.Println("Comparing idx files:") + F: + for _, file := range files { + if file.IsDir() || !strings.HasSuffix(file.Name(), ".ef") { + continue + } + + efInfo, err := parseEFFilename(file.Name()) + if err != nil { + log.Fatalf("Failed to parse file info: %v", err) + } + baseTxNum := efInfo.startStep * config3.DefaultStepSize + + targetIndexFilename := targetDirCli + "/snapshots/accessor/" + file.Name() + "i" + if manuallyOptimized { + targetIndexFilename = targetDirCli + "/snapshots/accessor/" + file.Name() + "i.new" + } + targetEfi, err := recsplit.OpenIndex(targetIndexFilename) + if err != nil { + log.Fatalf("Failed to open index: %v", err) + } + defer targetEfi.Close() + + targetEfiReader := targetEfi.GetReaderFromPool() + defer targetEfiReader.Close() + + // original .ef file + sourceFilename := sourceDirCli + "/snapshots/idx/" + file.Name() + sourceIdx, err := seg.NewDecompressor(sourceFilename) + if err != nil { + log.Fatalf("Failed to open decompressor: %v", err) + } + defer sourceIdx.Close() + + // reencoded optimized .ef file + targetFilename := targetDirCli + "/snapshots/idx/" + file.Name() + if manuallyOptimized { + targetFilename = targetDirCli + "/snapshots/idx/" + file.Name() + ".new" + } + targetIdx, err := seg.NewDecompressor(targetFilename) + if err != nil { + log.Fatalf("Failed to open decompressor: %v", err) + } + defer targetIdx.Close() + + log.Printf("Deep checking files %s -> %s, %s...", sourceFilename, targetFilename, targetIndexFilename) + + g := sourceIdx.MakeGetter() + sourceReader := seg.NewReader(g, seg.CompressNone) + sourceReader.Reset(0) + + g = targetIdx.MakeGetter() + targetReader := seg.NewReader(g, seg.CompressNone) + targetReader.Reset(0) + + prevKeyOffset := uint64(0) + for sourceReader.HasNext() { + if !targetReader.HasNext() { + log.Printf("target reader doesn't have next!") + log.Println("skipping to next file...") + continue F + } + + sourceK, _ := sourceReader.Next(nil) + targetK, _ := targetReader.Next(nil) + if !bytes.Equal(sourceK, targetK) { + log.Printf("key mismatch!") + log.Println("skipping to next file...") + continue F + } + + if !sourceReader.HasNext() { + log.Println("source reader doesn't have next!") + log.Println("skipping to next file...") + continue F + } + if !targetReader.HasNext() { + log.Println("target reader doesn't have next!") + log.Println("skipping to next file...") + continue F + } + + // source/target semantic value comparison + sourceV, _ := sourceReader.Next(nil) + targetV, nextKeyOffset := targetReader.Next(nil) + if !compareSequences(sourceK, sourceV, targetV, baseTxNum) { + log.Println("value mismatch!") + log.Println("skipping to next file...") + continue F + } + + // checks new efi lookup points to the same value + offset, found := targetEfiReader.TwoLayerLookup(targetK) + if !found { + log.Printf("key %v not found in efi", hexutility.Encode(targetK)) + log.Println("skipping to next file...") + continue F + } + if offset != prevKeyOffset { + log.Printf("offset mismatch: %d != %d", offset, prevKeyOffset) + log.Println("skipping to next file...") + continue F + } + prevKeyOffset = nextKeyOffset + + select { + case <-ctx.Done(): + return + default: + } + } + sourceIdx.Close() + targetIdx.Close() + targetEfiReader.Close() + targetEfi.Close() + } + }, +} + +func compareSequences(sourceK, sourceV, targetV []byte, baseTxNum uint64) bool { + // log.Printf("k=%s sv=%s tv=%s baseTxNum=%d", hexutility.Encode(sourceK), hexutility.Encode(sourceV), hexutility.Encode(targetV), baseTxNum) + sourceEf, _ := eliasfano32.ReadEliasFano(sourceV) + targetSeq := multiencseq.ReadMultiEncSeq(baseTxNum, targetV) + + if targetSeq.EncodingType() == multiencseq.PlainEliasFano { + log.Printf("target encoding type can't be PlainEliasFano") + return false + } + if targetSeq.Count() > sourceEf.Count() { + log.Print("Optimized eliasfano is longer") + log.Printf("key=%s", hexutility.Encode(sourceK)) + log.Printf("source min=%d max=%d count=%d", sourceEf.Min(), sourceEf.Max(), sourceEf.Count()) + log.Printf("target min=%d max=%d count=%d", targetSeq.Min(), targetSeq.Max(), targetSeq.Count()) + return false + } + if sourceEf.Count() > targetSeq.Count() { + log.Print("Optimized eliasfano is shorter") + log.Printf("key=%s", hexutility.Encode(sourceK)) + log.Printf("source min=%d max=%d count=%d", sourceEf.Min(), sourceEf.Max(), sourceEf.Count()) + log.Printf("target min=%d max=%d count=%d", targetSeq.Min(), targetSeq.Max(), targetSeq.Count()) + return false + } + + sourceIt := sourceEf.Iterator() + targetIt := targetSeq.Iterator(0) + for sourceIt.HasNext() { + sourceN, err := sourceIt.Next() + if err != nil { + log.Fatalf("Failed to read next: %v", err) + } + targetN, err := targetIt.Next() + if err != nil { + log.Fatalf("Failed to read next: %v", err) + } + if sourceN != targetN { + log.Printf("values mismatch: source=%d target=%d", sourceN, targetN) + log.Printf("key=%s", hexutility.Encode(sourceK)) + log.Printf("source min=%d max=%d count=%d", sourceEf.Min(), sourceEf.Max(), sourceEf.Count()) + log.Printf("target min=%d max=%d count=%d", targetSeq.Min(), targetSeq.Max(), targetSeq.Count()) + return false + } + } + + return true +} + +func init() { + idxVerify.Flags().StringVar(&sourceDirCli, "sourcedir", "", "data directory of original E3 instance") + must(idxVerify.MarkFlagRequired("sourcedir")) + must(idxVerify.MarkFlagDirname("sourcedir")) + + idxVerify.Flags().StringVar(&targetDirCli, "targetdir", "", "data directory of optimized E3 instance") + must(idxVerify.MarkFlagRequired("targetdir")) + must(idxVerify.MarkFlagDirname("targetdir")) + + idxVerify.Flags().BoolVar(&manuallyOptimized, "manuallyOptimized", false, "set this parameter if you have manually optimized the .ef files ith idx_optimize; set sourcedir/targetdir to the same") + + rootCmd.AddCommand(idxVerify) +} + +var sourceDirCli, targetDirCli string +var manuallyOptimized bool diff --git a/erigon-lib/recsplit/eliasfano32/rebased_elias_fano.go b/erigon-lib/recsplit/eliasfano32/rebased_elias_fano.go new file mode 100644 index 00000000000..0f5cb53adc7 --- /dev/null +++ b/erigon-lib/recsplit/eliasfano32/rebased_elias_fano.go @@ -0,0 +1,95 @@ +package eliasfano32 + +// This is a wrapper of "plain" EliasFano for optimizing scenarios where the number sequence +// is constrained in a closed range [from, to], so we can store the entire sequence as deltas +// of "from" and save space. +// +// This is specially useful when the starting "from" is a huge number, so the binary representation +// of the Elias Fano sequence can be made smaller. +// +// The baseNum stores the base value which is added to each element when it is accessed. It is +// not meant to be stored together with the serialized data, but derived from some other source, +// like the start txNum of a snapshot file, so it can be globally applied to all sequences in the +// same file, resulting in huge space savings. +type RebasedEliasFano struct { + baseNum uint64 + ef EliasFano +} + +func (ref *RebasedEliasFano) Get(i uint64) uint64 { + return ref.baseNum + ref.ef.Get(i) +} + +func (ref *RebasedEliasFano) Min() uint64 { + return ref.baseNum + ref.ef.Min() +} + +func (ref *RebasedEliasFano) Max() uint64 { + return ref.baseNum + ref.ef.Max() +} + +func (ref *RebasedEliasFano) Count() uint64 { + return ref.ef.Count() +} + +func (ref *RebasedEliasFano) Reset(baseNum uint64, raw []byte) { + ref.baseNum = baseNum + ref.ef.Reset(raw) +} + +func (ref *RebasedEliasFano) Search(v uint64) (uint64, bool) { + if v < ref.baseNum { + v = ref.baseNum + } + + n, found := ref.ef.Search(v - ref.baseNum) + return ref.baseNum + n, found +} + +func (ref *RebasedEliasFano) Iterator() *RebasedIterWrapper { + return &RebasedIterWrapper{ + baseNum: ref.baseNum, + it: ref.ef.Iterator(), + reverse: false, + } +} + +func (ref *RebasedEliasFano) ReverseIterator() *RebasedIterWrapper { + return &RebasedIterWrapper{ + baseNum: ref.baseNum, + it: ref.ef.ReverseIterator(), + reverse: true, + } +} + +type RebasedIterWrapper struct { + baseNum uint64 + it *EliasFanoIter + reverse bool +} + +func (it *RebasedIterWrapper) HasNext() bool { + return it.it.HasNext() +} + +func (it *RebasedIterWrapper) Next() (uint64, error) { + n, err := it.it.Next() + return it.baseNum + n, err +} + +func (it *RebasedIterWrapper) Seek(v uint64) { + if v < it.baseNum { + it.it.Seek(0) + if it.reverse { + // force exhaustion as we are seeking before the first elem + it.it.Next() + } + return + } + + it.it.Seek(v - it.baseNum) +} + +func (it *RebasedIterWrapper) Close() { + it.it.Close() +} diff --git a/erigon-lib/recsplit/multiencseq/sequence_builder.go b/erigon-lib/recsplit/multiencseq/sequence_builder.go new file mode 100644 index 00000000000..bca4468a03a --- /dev/null +++ b/erigon-lib/recsplit/multiencseq/sequence_builder.go @@ -0,0 +1,113 @@ +package multiencseq + +import ( + "encoding/binary" + + "github.com/erigontech/erigon-lib/recsplit/eliasfano32" +) + +// Encode sequences up to this length using simple encoding. +// +// The choice of this constant is tightly coupled with the encoding type; we used +// the least significant bits of the encoding byte type to signal simple encoding + +// sequence size. +// +// In this case, we use the range [0b10000000, 0b10001111] to say the sequence must contain +// (N & 0b00001111) + 1 elements, i.e., 0 means 1 element, 1 means 2 elements, etc... +const SIMPLE_SEQUENCE_MAX_THRESHOLD = 16 + +// A SequenceBuilder is used to build serialized number sequences. +// +// It follows the following pattern: +// +// - New builder: NewBuilder() +// - Add offsets: AddOffset() +// - Build: Build() +// - Serialize: AppendBytes() +// +// It contains decision logic to choose the best encoding for the given sequence. +// +// This is the "writer" counterpart of SequenceReader. +type SequenceBuilder struct { + baseNum uint64 + ef *eliasfano32.EliasFano +} + +// Creates a new builder. The builder is not meant to be reused. The construction +// parameters may or may not be used during the build process depending on the +// encoding being used. +// +// The encoding being used depends on the parameters themselves and the characteristics +// of the number sequence. +// +// While non-optimized "legacy mode" is supported (for now) on SequenceReader to be backwards +// compatible with old files, this writer ONLY writes optimized multiencoding sequences. +// +// baseNum: this is used to calculate the deltas on simple encoding and on "rebased elias fano" +// count: this is the number of elements in the sequence, used in case of elias fano +// max: this is maximum value in the sequence, used in case of elias fano +func NewBuilder(baseNum, count, max uint64) *SequenceBuilder { + return &SequenceBuilder{ + baseNum: baseNum, + ef: eliasfano32.NewEliasFano(count, max), + } +} + +func (b *SequenceBuilder) AddOffset(offset uint64) { + // TODO: write offset already substracting baseNum now that PlainEF is gone + b.ef.AddOffset(offset) +} + +func (b *SequenceBuilder) Build() { + b.ef.Build() +} + +func (b *SequenceBuilder) AppendBytes(buf []byte) []byte { + if b.ef.Count() <= SIMPLE_SEQUENCE_MAX_THRESHOLD { + return b.simpleEncoding(buf) + } + + return b.rebasedEliasFano(buf) +} + +func (b *SequenceBuilder) simpleEncoding(buf []byte) []byte { + // Simple encoding type + size: [0x80, 0x8F] + count := b.ef.Count() + enc := byte(count-1) & byte(0b00001111) + enc |= byte(SimpleEncoding) + buf = append(buf, enc) + + // Encode elems + var bn [4]byte + for it := b.ef.Iterator(); it.HasNext(); { + n, err := it.Next() + if err != nil { + // TODO: err + panic(err) + } + n -= b.baseNum + + binary.BigEndian.PutUint32(bn[:], uint32(n)) + buf = append(buf, bn[:]...) + } + + return buf +} + +func (b *SequenceBuilder) rebasedEliasFano(buf []byte) []byte { + // Reserved encoding type 0x90 == rebased elias fano + buf = append(buf, byte(RebasedEliasFano)) + + // Rebased ef + rbef := eliasfano32.NewEliasFano(b.ef.Count(), b.ef.Max()-b.baseNum) + for it := b.ef.Iterator(); it.HasNext(); { + n, err := it.Next() + if err != nil { + panic(err) + } + + rbef.AddOffset(n - b.baseNum) + } + rbef.Build() + return rbef.AppendBytes(buf) +} diff --git a/erigon-lib/recsplit/multiencseq/sequence_builder_test.go b/erigon-lib/recsplit/multiencseq/sequence_builder_test.go new file mode 100644 index 00000000000..bc91940882d --- /dev/null +++ b/erigon-lib/recsplit/multiencseq/sequence_builder_test.go @@ -0,0 +1,102 @@ +package multiencseq + +import ( + "testing" + + "github.com/erigontech/erigon-lib/common/hexutility" + "github.com/erigontech/erigon-lib/recsplit/eliasfano32" + "github.com/stretchr/testify/require" +) + +func TestMultiEncodingSeqBuilder(t *testing.T) { + + t.Run("singleton sequence", func(t *testing.T) { + builder := NewBuilder(1000, 1, 1005) + builder.AddOffset(1005) + builder.Build() + + b := make([]byte, 0) + b = builder.AppendBytes(b) + require.Equal(t, hexutility.MustDecodeHex("0x8000000005"), b) + }) + + t.Run("short sequences must use simple encoding", func(t *testing.T) { + builder := NewBuilder(1000, 16, 1035) + builder.AddOffset(1005) + builder.AddOffset(1007) + builder.AddOffset(1009) + builder.AddOffset(1011) + builder.AddOffset(1013) + builder.AddOffset(1015) + builder.AddOffset(1017) + builder.AddOffset(1019) + builder.AddOffset(1021) + builder.AddOffset(1023) + builder.AddOffset(1025) + builder.AddOffset(1027) + builder.AddOffset(1029) + builder.AddOffset(1031) + builder.AddOffset(1033) + builder.AddOffset(1035) + builder.Build() + + b := make([]byte, 0) + b = builder.AppendBytes(b) + require.Equal(t, hexutility.MustDecodeHex( + "0x8F"+ + "00000005"+ + "00000007"+ + "00000009"+ + "0000000B"+ + "0000000D"+ + "0000000F"+ + "00000011"+ + "00000013"+ + "00000015"+ + "00000017"+ + "00000019"+ + "0000001B"+ + "0000001D"+ + "0000001F"+ + "00000021"+ + "00000023"), b) + }) + + t.Run("large sequences must use rebased elias fano", func(t *testing.T) { + builder := NewBuilder(1000, 17, 1035) + builder.AddOffset(1005) + builder.AddOffset(1007) + builder.AddOffset(1009) + builder.AddOffset(1011) + builder.AddOffset(1013) + builder.AddOffset(1015) + builder.AddOffset(1017) + builder.AddOffset(1019) + builder.AddOffset(1021) + builder.AddOffset(1023) + builder.AddOffset(1025) + builder.AddOffset(1027) + builder.AddOffset(1029) + builder.AddOffset(1031) + builder.AddOffset(1033) + builder.AddOffset(1035) + builder.AddOffset(1037) + builder.Build() + + b := make([]byte, 0) + b = builder.AppendBytes(b) + require.Equal(t, b[0], byte(0x90), "encoding type is not 0x90") + + ef, _ := eliasfano32.ReadEliasFano(b[1:]) + require.Equal(t, uint64(17), ef.Count()) + curr := uint64(5) + for it := ef.Iterator(); it.HasNext(); { + n, err := it.Next() + + require.NoError(t, err) + require.Equal(t, curr, n) + + curr += 2 + } + }) +} diff --git a/erigon-lib/recsplit/multiencseq/sequence_reader.go b/erigon-lib/recsplit/multiencseq/sequence_reader.go new file mode 100644 index 00000000000..9898a85447a --- /dev/null +++ b/erigon-lib/recsplit/multiencseq/sequence_reader.go @@ -0,0 +1,180 @@ +package multiencseq + +import ( + "github.com/erigontech/erigon-lib/kv/stream" + "github.com/erigontech/erigon-lib/recsplit/eliasfano32" + "github.com/erigontech/erigon-lib/recsplit/simpleseq" +) + +type EncodingType uint8 + +const ( + // TODO: remove PlainEliasFano reader support once all snapshots are migrated to + // optimized .ef and E3 alpha/beta version which writes ONLY new format is published + PlainEliasFano EncodingType = 0b0 + SimpleEncoding EncodingType = 0b10000000 + RebasedEliasFano EncodingType = 0b10010000 + + PlainEliasFanoMask byte = 0b10000000 + SimpleEncodingMask byte = 0b11110000 + SimpleEncodingSizeMask byte = ^SimpleEncodingMask +) + +// SequenceReader is used to read serialized number sequences. +// +// It is aware of the different encoding types and can read them transparently. +// +// This is the "reader" counterpart of SequenceBuilder. +type SequenceReader struct { + currentEnc EncodingType + ref eliasfano32.RebasedEliasFano + sseq simpleseq.SimpleSequence +} + +func ReadMultiEncSeq(baseNum uint64, raw []byte) *SequenceReader { + var s SequenceReader + s.Reset(baseNum, raw) + return &s +} + +// This is a specialized "fast" Count method that shouldn't allocate new objects, but read the count directly +// from raw data +func Count(baseNum uint64, data []byte) uint64 { + // plain elias fano (legacy) + if data[0]&PlainEliasFanoMask == 0 { + return eliasfano32.Count(data) + } + + // rebased elias fano + if EncodingType(data[0]) == RebasedEliasFano { + return eliasfano32.Count(data[1:]) + } + + // simple encoding + if EncodingType(data[0]&SimpleEncodingMask) == SimpleEncoding { + return uint64(data[0]&SimpleEncodingSizeMask) + 1 + } + + panic("unknown encoding") +} + +// TODO: optimize me - to avoid object allocation (this TODO was inherited from elias_fano.go) +func Seek(baseNum uint64, data []byte, n uint64) (uint64, bool) { + seq := ReadMultiEncSeq(baseNum, data) + return seq.search(n) +} + +func (s *SequenceReader) EncodingType() EncodingType { + return s.currentEnc +} + +func (s *SequenceReader) Get(i uint64) uint64 { + if s.currentEnc == SimpleEncoding { + return s.sseq.Get(i) + } else if s.currentEnc == PlainEliasFano || s.currentEnc == RebasedEliasFano { + return s.ref.Get(i) + } + + panic("unknown encoding") +} + +func (s *SequenceReader) Min() uint64 { + if s.currentEnc == SimpleEncoding { + return s.sseq.Min() + } else if s.currentEnc == PlainEliasFano || s.currentEnc == RebasedEliasFano { + return s.ref.Min() + } + + panic("unknown encoding") +} + +func (s *SequenceReader) Max() uint64 { + if s.currentEnc == SimpleEncoding { + return s.sseq.Max() + } else if s.currentEnc == PlainEliasFano || s.currentEnc == RebasedEliasFano { + return s.ref.Max() + } + + panic("unknown encoding") +} + +func (s *SequenceReader) Count() uint64 { + if s.currentEnc == SimpleEncoding { + return s.sseq.Count() + } else if s.currentEnc == PlainEliasFano || s.currentEnc == RebasedEliasFano { + return s.ref.Count() + } + + panic("unknown encoding") +} + +func (s *SequenceReader) Reset(baseNum uint64, raw []byte) { + // plain elias fano (legacy) + if raw[0]&PlainEliasFanoMask == 0 { + s.currentEnc = PlainEliasFano + s.ref.Reset(0, raw) + return + } + + // rebased elias fano + if EncodingType(raw[0]) == RebasedEliasFano { + s.currentEnc = RebasedEliasFano + s.ref.Reset(baseNum, raw[1:]) + return + } + + // simple encoding + if EncodingType(raw[0]&SimpleEncodingMask) == SimpleEncoding { + s.currentEnc = SimpleEncoding + s.sseq.Reset(baseNum, raw[1:]) + return + } + + panic("unknown encoding") +} + +func (s *SequenceReader) search(v uint64) (uint64, bool) { + if s.currentEnc == SimpleEncoding { + return s.sseq.Search(v) + } else if s.currentEnc == PlainEliasFano || s.currentEnc == RebasedEliasFano { + return s.ref.Search(v) + } + + panic("unknown encoding") +} + +func (s *SequenceReader) Iterator(v int) stream.U64 { + if s.currentEnc == SimpleEncoding { + it := s.sseq.Iterator() + if v > 0 { + it.Seek(uint64(v)) + } + return it + } else if s.currentEnc == PlainEliasFano || s.currentEnc == RebasedEliasFano { + it := s.ref.Iterator() + if v > 0 { + it.Seek(uint64(v)) + } + return it + } + + panic("unknown encoding") +} + +func (s *SequenceReader) ReverseIterator(v int) stream.U64 { + if s.currentEnc == SimpleEncoding { + it := s.sseq.ReverseIterator() + if v > 0 { + it.Seek(uint64(v)) + } + return it + } else if s.currentEnc == PlainEliasFano || s.currentEnc == RebasedEliasFano { + it := s.ref.ReverseIterator() + if v > 0 { + it.Seek(uint64(v)) + } + return it + } + + panic("unknown encoding") +} diff --git a/erigon-lib/recsplit/multiencseq/sequence_reader_test.go b/erigon-lib/recsplit/multiencseq/sequence_reader_test.go new file mode 100644 index 00000000000..634091e1727 --- /dev/null +++ b/erigon-lib/recsplit/multiencseq/sequence_reader_test.go @@ -0,0 +1,192 @@ +package multiencseq + +import ( + "testing" + + "github.com/erigontech/erigon-lib/recsplit/eliasfano32" + "github.com/erigontech/erigon-lib/recsplit/simpleseq" + "github.com/stretchr/testify/require" +) + +func TestMultiEncSeq(t *testing.T) { + + t.Run("plain elias fano", func(t *testing.T) { + b := make([]byte, 0) + + // append serialized elias fano + ef := eliasfano32.NewEliasFano(3, 1027) + ef.AddOffset(1000) + ef.AddOffset(1015) + ef.AddOffset(1027) + ef.Build() + b = ef.AppendBytes(b) + + // check deserialization + s := ReadMultiEncSeq(1000, b) + require.Equal(t, PlainEliasFano, s.EncodingType()) + requireSequenceChecks(t, s) + requireRawDataChecks(t, b) + }) + + t.Run("simple encoding", func(t *testing.T) { + b := make([]byte, 0) + + // type: simple encoding, count: 3 + b = append(b, 0b10000010) + + // append serialized simple sequence + seq := simpleseq.NewSimpleSequence(1000, 3) + seq.AddOffset(1000) + seq.AddOffset(1015) + seq.AddOffset(1027) + b = seq.AppendBytes(b) + + // check deserialization + s := ReadMultiEncSeq(1000, b) + require.Equal(t, SimpleEncoding, s.EncodingType()) + requireSequenceChecks(t, s) + requireRawDataChecks(t, b) + }) + + t.Run("rebased elias fano", func(t *testing.T) { + b := make([]byte, 0) + + // type: rebased elias fano + b = append(b, 0b10010000) + + // append serialized elias fano (rebased -1000) + ef := eliasfano32.NewEliasFano(3, 27) + ef.AddOffset(0) + ef.AddOffset(15) + ef.AddOffset(27) + ef.Build() + b = ef.AppendBytes(b) + + // check deserialization + s := ReadMultiEncSeq(1000, b) + require.Equal(t, RebasedEliasFano, s.EncodingType()) + requireSequenceChecks(t, s) + requireRawDataChecks(t, b) + }) +} + +func requireSequenceChecks(t *testing.T, s *SequenceReader) { + require.Equal(t, uint64(1000), s.Min()) + require.Equal(t, uint64(1027), s.Max()) + require.Equal(t, uint64(3), s.Count()) + + require.Equal(t, uint64(1000), s.Get(0)) + require.Equal(t, uint64(1015), s.Get(1)) + require.Equal(t, uint64(1027), s.Get(2)) + + // check iterator + it := s.Iterator(0) + require.True(t, it.HasNext()) + n, err := it.Next() + require.NoError(t, err) + require.Equal(t, uint64(1000), n) + + require.True(t, it.HasNext()) + n, err = it.Next() + require.NoError(t, err) + require.Equal(t, uint64(1015), n) + + require.True(t, it.HasNext()) + n, err = it.Next() + require.NoError(t, err) + require.Equal(t, uint64(1027), n) + + require.False(t, it.HasNext()) + + // check iterator + seek + it = s.Iterator(1014) + require.True(t, it.HasNext()) + n, err = it.Next() + require.NoError(t, err) + require.Equal(t, uint64(1015), n) + + require.True(t, it.HasNext()) + n, err = it.Next() + require.NoError(t, err) + require.Equal(t, uint64(1027), n) + + require.False(t, it.HasNext()) + + // check iterator + seek before base num + it = s.Iterator(999) + require.True(t, it.HasNext()) + n, err = it.Next() + require.NoError(t, err) + require.Equal(t, uint64(1000), n) + + require.True(t, it.HasNext()) + n, err = it.Next() + require.NoError(t, err) + require.Equal(t, uint64(1015), n) + + require.True(t, it.HasNext()) + n, err = it.Next() + require.NoError(t, err) + require.Equal(t, uint64(1027), n) + + require.False(t, it.HasNext()) + + // check reverse iterator + it = s.ReverseIterator(2000) + require.True(t, it.HasNext()) + n, err = it.Next() + require.NoError(t, err) + require.Equal(t, uint64(1027), n) + + require.True(t, it.HasNext()) + n, err = it.Next() + require.NoError(t, err) + require.Equal(t, uint64(1015), n) + + require.True(t, it.HasNext()) + n, err = it.Next() + require.NoError(t, err) + require.Equal(t, uint64(1000), n) + + require.False(t, it.HasNext()) + + // check reverse iterator + seek + it = s.ReverseIterator(1016) + require.True(t, it.HasNext()) + n, err = it.Next() + require.NoError(t, err) + require.Equal(t, uint64(1015), n) + + require.True(t, it.HasNext()) + n, err = it.Next() + require.NoError(t, err) + require.Equal(t, uint64(1000), n) + + require.False(t, it.HasNext()) + + // check reverse iterator + seek before base num + it = s.ReverseIterator(999) + require.False(t, it.HasNext()) +} + +func requireRawDataChecks(t *testing.T, b []byte) { + // check fast count + require.Equal(t, uint64(3), Count(1000, b)) + + // check search + n, found := Seek(1000, b, 1014) + require.True(t, found) + require.Equal(t, uint64(1015), n) + + n, found = Seek(1000, b, 1015) + require.True(t, found) + require.Equal(t, uint64(1015), n) + + _, found = Seek(1000, b, 1028) + require.False(t, found) + + // check search before base num + n, found = Seek(1000, b, 999) + require.True(t, found) + require.Equal(t, uint64(1000), n) +} diff --git a/erigon-lib/recsplit/simpleseq/simple_sequence.go b/erigon-lib/recsplit/simpleseq/simple_sequence.go new file mode 100644 index 00000000000..657efaf23ad --- /dev/null +++ b/erigon-lib/recsplit/simpleseq/simple_sequence.go @@ -0,0 +1,182 @@ +package simpleseq + +import ( + "encoding/binary" + "sort" + + "github.com/erigontech/erigon-lib/kv/stream" +) + +// SimpleSequence is a simpler representation of number sequences meant to be a drop-in +// replacement for EliasFano for short sequences. +// +// It stores sequences as deltas of a base number. We assume base number and element values +// as uint64, while the deltas are uint32, reinforcing the assumption that this implementation +// is optimized for short sequences of close values. +type SimpleSequence struct { + baseNum uint64 + raw []byte + pos int +} + +func NewSimpleSequence(baseNum uint64, count uint64) *SimpleSequence { + return &SimpleSequence{ + baseNum: baseNum, + raw: make([]byte, count*4), + pos: 0, + } +} + +// Construct a SimpleSequence from a serialized representation. +// +// Returned object can be reused by calling Reset() on it. +func ReadSimpleSequence(baseNum uint64, raw []byte) *SimpleSequence { + seq := SimpleSequence{} + seq.Reset(baseNum, raw) + return &seq +} + +func (s *SimpleSequence) Get(i uint64) uint64 { + idx := i * 4 + delta := binary.BigEndian.Uint32(s.raw[idx : idx+4]) + return s.baseNum + uint64(delta) +} + +func (s *SimpleSequence) Min() uint64 { + return s.Get(0) +} + +func (s *SimpleSequence) Max() uint64 { + return s.Get(s.Count() - 1) +} + +func (s *SimpleSequence) Count() uint64 { + return uint64(len(s.raw) / 4) +} + +func (s *SimpleSequence) AddOffset(offset uint64) { + binary.BigEndian.PutUint32(s.raw[s.pos*4:(s.pos+1)*4], uint32(offset-s.baseNum)) + s.pos++ +} + +func (s *SimpleSequence) Reset(baseNum uint64, raw []byte) { + s.baseNum = baseNum + s.raw = raw + s.pos = len(raw) / 4 +} + +func (s *SimpleSequence) AppendBytes(buf []byte) []byte { + return append(buf, s.raw...) +} + +func (s *SimpleSequence) search(v uint64) (int, bool) { + c := s.Count() + idx := sort.Search(int(c), func(i int) bool { + return s.Get(uint64(i)) >= v + }) + + if idx >= int(c) { + return 0, false + } + return idx, true +} + +func (s *SimpleSequence) reverseSearch(v uint64) (int, bool) { + c := s.Count() + idx := sort.Search(int(c), func(i int) bool { + return s.Get(c-uint64(i)-1) <= v + }) + + if idx >= int(c) { + return 0, false + } + return int(c) - idx - 1, true +} + +func (s *SimpleSequence) Search(v uint64) (uint64, bool) { + idx, found := s.search(v) + if !found { + return 0, false + } + return s.Get(uint64(idx)), true +} + +func (s *SimpleSequence) Iterator() *SimpleSequenceIterator { + return &SimpleSequenceIterator{ + seq: s, + pos: 0, + } +} + +func (s *SimpleSequence) ReverseIterator() *ReverseSimpleSequenceIterator { + return &ReverseSimpleSequenceIterator{ + seq: s, + pos: int(s.Count()) - 1, + } +} + +type SimpleSequenceIterator struct { + seq *SimpleSequence + pos int +} + +func (it *SimpleSequenceIterator) Next() (uint64, error) { + if !it.HasNext() { + return 0, stream.ErrIteratorExhausted + } + + v := it.seq.Get(uint64(it.pos)) + it.pos++ + return v, nil +} + +func (it *SimpleSequenceIterator) HasNext() bool { + return it.pos < int(it.seq.Count()) +} + +func (it *SimpleSequenceIterator) Close() { + // noop +} + +func (it *SimpleSequenceIterator) Seek(v uint64) { + idx, found := it.seq.search(v) + if !found { + it.pos = int(it.seq.Count()) + return + } + + it.pos = idx +} + +type ReverseSimpleSequenceIterator struct { + seq *SimpleSequence + pos int +} + +func (it *ReverseSimpleSequenceIterator) Next() (uint64, error) { + if !it.HasNext() { + return 0, stream.ErrIteratorExhausted + } + + v := it.seq.Get(uint64(it.pos)) + it.pos-- + return v, nil +} + +func (it *ReverseSimpleSequenceIterator) HasNext() bool { + return it.pos >= 0 +} + +func (it *ReverseSimpleSequenceIterator) Close() { + // noop +} + +func (it *ReverseSimpleSequenceIterator) Seek(v uint64) { + idx, found := it.seq.reverseSearch(v) + if !found { + it.pos = -1 + return + } + + it.pos = idx +} diff --git a/erigon-lib/recsplit/simpleseq/simple_sequence_test.go b/erigon-lib/recsplit/simpleseq/simple_sequence_test.go new file mode 100644 index 00000000000..d93a3832f28 --- /dev/null +++ b/erigon-lib/recsplit/simpleseq/simple_sequence_test.go @@ -0,0 +1,283 @@ +package simpleseq + +import ( + "testing" + + "github.com/erigontech/erigon-lib/common/hexutility" + "github.com/erigontech/erigon-lib/kv/stream" + "github.com/stretchr/testify/require" +) + +func TestSimpleSequence(t *testing.T) { + s := NewSimpleSequence(1000, 4) + s.AddOffset(1001) + s.AddOffset(1007) + s.AddOffset(1015) + s.AddOffset(1027) + + t.Run("basic test", func(t *testing.T) { + require.Equal(t, uint64(1001), s.Get(0)) + require.Equal(t, uint64(1007), s.Get(1)) + require.Equal(t, uint64(1015), s.Get(2)) + require.Equal(t, uint64(1027), s.Get(3)) + + require.Equal(t, uint64(1001), s.Min()) + require.Equal(t, uint64(1027), s.Max()) + require.Equal(t, uint64(4), s.Count()) + }) + + t.Run("serialization", func(t *testing.T) { + b := make([]byte, 0) + b = s.AppendBytes(b) + + require.Equal(t, hexutility.MustDecodeHex("0x"+ + "00000001"+ + "00000007"+ + "0000000f"+ + "0000001b"), b) + }) + + t.Run("search", func(t *testing.T) { + // before baseNum + v, found := s.Search(10) + require.True(t, found) + require.Equal(t, uint64(1001), v) + + // at baseNum + v, found = s.Search(1000) + require.True(t, found) + require.Equal(t, uint64(1001), v) + + // at elem + v, found = s.Search(1007) + require.True(t, found) + require.Equal(t, uint64(1007), v) + + // between elems + v, found = s.Search(1014) + require.True(t, found) + require.Equal(t, uint64(1015), v) + + // at last + v, found = s.Search(1027) + require.True(t, found) + require.Equal(t, uint64(1027), v) + + // after last + v, found = s.Search(1028) + require.False(t, found) + require.Equal(t, uint64(0), v) + }) + + t.Run("iterator", func(t *testing.T) { + it := s.Iterator() + defer it.Close() + + require.True(t, it.HasNext()) + v, err := it.Next() + require.NoError(t, err) + require.Equal(t, uint64(1001), v) + + require.True(t, it.HasNext()) + v, err = it.Next() + require.NoError(t, err) + require.Equal(t, uint64(1007), v) + + require.True(t, it.HasNext()) + v, err = it.Next() + require.NoError(t, err) + require.Equal(t, uint64(1015), v) + + require.True(t, it.HasNext()) + v, err = it.Next() + require.NoError(t, err) + require.Equal(t, uint64(1027), v) + + require.False(t, it.HasNext()) + v, err = it.Next() + require.Error(t, stream.ErrIteratorExhausted, err) + require.Equal(t, uint64(0), v) + }) + + t.Run("iterator seek exact", func(t *testing.T) { + it := s.Iterator() + defer it.Close() + + it.Seek(1015) + + require.True(t, it.HasNext()) + v, err := it.Next() + require.NoError(t, err) + require.Equal(t, uint64(1015), v) + + require.True(t, it.HasNext()) + v, err = it.Next() + require.NoError(t, err) + require.Equal(t, uint64(1027), v) + + require.False(t, it.HasNext()) + v, err = it.Next() + require.Error(t, stream.ErrIteratorExhausted, err) + require.Equal(t, uint64(0), v) + }) + + t.Run("iterator seek", func(t *testing.T) { + it := s.Iterator() + defer it.Close() + + it.Seek(1014) + + require.True(t, it.HasNext()) + v, err := it.Next() + require.NoError(t, err) + require.Equal(t, uint64(1015), v) + + require.True(t, it.HasNext()) + v, err = it.Next() + require.NoError(t, err) + require.Equal(t, uint64(1027), v) + + require.False(t, it.HasNext()) + v, err = it.Next() + require.Error(t, stream.ErrIteratorExhausted, err) + require.Equal(t, uint64(0), v) + }) + + t.Run("iterator seek not found", func(t *testing.T) { + it := s.Iterator() + defer it.Close() + + it.Seek(1029) + require.False(t, it.HasNext()) + v, err := it.Next() + require.Error(t, stream.ErrIteratorExhausted, err) + require.Equal(t, uint64(0), v) + }) + + t.Run("iterator seek before base num", func(t *testing.T) { + it := s.Iterator() + defer it.Close() + + it.Seek(999) + require.True(t, it.HasNext()) + v, err := it.Next() + require.NoError(t, err) + require.Equal(t, uint64(1001), v) + + require.True(t, it.HasNext()) + v, err = it.Next() + require.NoError(t, err) + require.Equal(t, uint64(1007), v) + + require.True(t, it.HasNext()) + v, err = it.Next() + require.NoError(t, err) + require.Equal(t, uint64(1015), v) + + require.True(t, it.HasNext()) + v, err = it.Next() + require.NoError(t, err) + require.Equal(t, uint64(1027), v) + + require.False(t, it.HasNext()) + v, err = it.Next() + require.Error(t, stream.ErrIteratorExhausted, err) + require.Equal(t, uint64(0), v) + }) + + t.Run("reverse iterator", func(t *testing.T) { + it := s.ReverseIterator() + defer it.Close() + + require.True(t, it.HasNext()) + v, err := it.Next() + require.NoError(t, err) + require.Equal(t, uint64(1027), v) + + require.True(t, it.HasNext()) + v, err = it.Next() + require.NoError(t, err) + require.Equal(t, uint64(1015), v) + + require.True(t, it.HasNext()) + v, err = it.Next() + require.NoError(t, err) + require.Equal(t, uint64(1007), v) + + require.True(t, it.HasNext()) + v, err = it.Next() + require.NoError(t, err) + require.Equal(t, uint64(1001), v) + + require.False(t, it.HasNext()) + v, err = it.Next() + require.Error(t, stream.ErrIteratorExhausted, err) + require.Equal(t, uint64(0), v) + }) + + t.Run("reverse iterator seek exact", func(t *testing.T) { + it := s.ReverseIterator() + defer it.Close() + + it.Seek(1007) + + require.True(t, it.HasNext()) + v, err := it.Next() + require.NoError(t, err) + require.Equal(t, uint64(1007), v) + + require.True(t, it.HasNext()) + v, err = it.Next() + require.NoError(t, err) + require.Equal(t, uint64(1001), v) + + require.False(t, it.HasNext()) + v, err = it.Next() + require.Error(t, stream.ErrIteratorExhausted, err) + require.Equal(t, uint64(0), v) + }) + + t.Run("reverse iterator seek", func(t *testing.T) { + it := s.ReverseIterator() + defer it.Close() + + it.Seek(1008) + + require.True(t, it.HasNext()) + v, err := it.Next() + require.NoError(t, err) + require.Equal(t, uint64(1007), v) + + require.True(t, it.HasNext()) + v, err = it.Next() + require.NoError(t, err) + require.Equal(t, uint64(1001), v) + + require.False(t, it.HasNext()) + v, err = it.Next() + require.Error(t, stream.ErrIteratorExhausted, err) + require.Equal(t, uint64(0), v) + }) + + t.Run("reverse iterator seek not found", func(t *testing.T) { + it := s.ReverseIterator() + defer it.Close() + + it.Seek(1000) + require.False(t, it.HasNext()) + v, err := it.Next() + require.Error(t, stream.ErrIteratorExhausted, err) + require.Equal(t, uint64(0), v) + }) + + t.Run("reverse iterator seek before base num", func(t *testing.T) { + it := s.ReverseIterator() + defer it.Close() + + it.Seek(999) + require.False(t, it.HasNext()) + v, err := it.Next() + require.Error(t, stream.ErrIteratorExhausted, err) + require.Equal(t, uint64(0), v) + }) +} diff --git a/erigon-lib/state/aggregator.go b/erigon-lib/state/aggregator.go index 908c5a9e340..ec78543e22c 100644 --- a/erigon-lib/state/aggregator.go +++ b/erigon-lib/state/aggregator.go @@ -149,6 +149,11 @@ func NewAggregator(ctx context.Context, dirs datadir.Dirs, aggregationStep uint6 }, nil } +// TODO: exported for idx_optimize.go +func GetStateIndicesSalt(baseDir string) (salt *uint32, err error) { + return getStateIndicesSalt(baseDir) +} + // getStateIndicesSalt - try read salt for all indices from DB. Or fall-back to new salt creation. // if db is Read-Only (for example remote RPCDaemon or utilities) - we will not create new indices - and existing indices have salt in metadata. func getStateIndicesSalt(baseDir string) (salt *uint32, err error) { diff --git a/erigon-lib/state/domain.go b/erigon-lib/state/domain.go index 58d0e1ffc9e..bed38c226bd 100644 --- a/erigon-lib/state/domain.go +++ b/erigon-lib/state/domain.go @@ -1299,6 +1299,11 @@ func (d *Domain) BuildMissedAccessors(ctx context.Context, g *errgroup.Group, ps } } +// TODO: exported for idx_optimize.go +func BuildAccessor(ctx context.Context, d *seg.Decompressor, compressed seg.FileCompression, idxPath string, values bool, cfg recsplit.RecSplitArgs, ps *background.ProgressSet, logger log.Logger) error { + return buildAccessor(ctx, d, compressed, idxPath, values, cfg, ps, logger) +} + func buildAccessor(ctx context.Context, d *seg.Decompressor, compressed seg.FileCompression, idxPath string, values bool, cfg recsplit.RecSplitArgs, ps *background.ProgressSet, logger log.Logger) error { _, fileName := filepath.Split(idxPath) count := d.Count() diff --git a/erigon-lib/state/history.go b/erigon-lib/state/history.go index f2a0fc6256f..26319fe804c 100644 --- a/erigon-lib/state/history.go +++ b/erigon-lib/state/history.go @@ -42,7 +42,7 @@ import ( "github.com/erigontech/erigon-lib/kv/stream" "github.com/erigontech/erigon-lib/log/v3" "github.com/erigontech/erigon-lib/recsplit" - "github.com/erigontech/erigon-lib/recsplit/eliasfano32" + "github.com/erigontech/erigon-lib/recsplit/multiencseq" "github.com/erigontech/erigon-lib/seg" ) @@ -349,14 +349,14 @@ func (h *History) buildVi(ctx context.Context, item *filesItem, ps *background.P fromStep, toStep := item.startTxNum/h.aggregationStep, item.endTxNum/h.aggregationStep idxPath := h.vAccessorFilePath(fromStep, toStep) - _, err = h.buildVI(ctx, idxPath, item.decompressor, iiItem.decompressor, ps) + _, err = h.buildVI(ctx, idxPath, item.decompressor, iiItem.decompressor, iiItem.startTxNum, ps) if err != nil { return fmt.Errorf("buildVI: %w", err) } return nil } -func (h *History) buildVI(ctx context.Context, historyIdxPath string, hist, efHist *seg.Decompressor, ps *background.ProgressSet) (string, error) { +func (h *History) buildVI(ctx context.Context, historyIdxPath string, hist, efHist *seg.Decompressor, efBaseTxNum uint64, ps *background.ProgressSet) (string, error) { rs, err := recsplit.NewRecSplit(recsplit.RecSplitArgs{ KeyCount: hist.Count(), Enums: false, @@ -399,10 +399,10 @@ func (h *History) buildVI(ctx context.Context, historyIdxPath string, hist, efHi // fmt.Printf("ef key %x\n", keyBuf) - ef, _ := eliasfano32.ReadEliasFano(valBuf) - efIt := ef.Iterator() - for efIt.HasNext() { - txNum, err := efIt.Next() + seq := multiencseq.ReadMultiEncSeq(efBaseTxNum, valBuf) + it := seq.Iterator(0) + for it.HasNext() { + txNum, err := it.Next() if err != nil { return "", err } @@ -564,12 +564,14 @@ func (w *historyBufferedWriter) Flush(ctx context.Context, tx kv.RwTx) error { return nil } +// TODO: rename ef* fields type HistoryCollation struct { historyComp *seg.Writer efHistoryComp *seg.Writer historyPath string efHistoryPath string - historyCount int // same as historyComp.Count() + efBaseTxNum uint64 // TODO: is it necessary or using step later is reliable? + historyCount int // same as historyComp.Count() } func (c HistoryCollation) Close() { @@ -588,10 +590,10 @@ func (h *History) collate(ctx context.Context, step, txFrom, txTo uint64, roTx k } var ( - historyComp *seg.Writer - efHistoryComp *seg.Writer - txKey [8]byte - err error + historyComp *seg.Writer + seqWriter *seg.Writer + txKey [8]byte + err error historyPath = h.vFilePath(step, step+1) efHistoryPath = h.efFilePath(step, step+1) @@ -604,8 +606,8 @@ func (h *History) collate(ctx context.Context, step, txFrom, txTo uint64, roTx k if historyComp != nil { historyComp.Close() } - if efHistoryComp != nil { - efHistoryComp.Close() + if seqWriter != nil { + seqWriter.Close() } } }() @@ -661,26 +663,27 @@ func (h *History) collate(ctx context.Context, step, txFrom, txTo uint64, roTx k defer cd.Close() } - efComp, err := seg.NewCompressor(ctx, "collate idx "+h.filenameBase, efHistoryPath, h.dirs.Tmp, h.compressorCfg, log.LvlTrace, h.logger) + seqComp, err := seg.NewCompressor(ctx, "collate idx "+h.filenameBase, efHistoryPath, h.dirs.Tmp, h.compressorCfg, log.LvlTrace, h.logger) if err != nil { return HistoryCollation{}, fmt.Errorf("create %s ef history compressor: %w", h.filenameBase, err) } if h.noFsync { - efComp.DisableFsync() + seqComp.DisableFsync() } var ( keyBuf = make([]byte, 0, 256) numBuf = make([]byte, 8) bitmap = bitmapdb.NewBitmap64() - prevEf []byte + prevSeq []byte prevKey []byte initialized bool ) - efHistoryComp = seg.NewWriter(efComp, seg.CompressNone) // coll+build must be fast - no compression + seqWriter = seg.NewWriter(seqComp, seg.CompressNone) // coll+build must be fast - no compression collector.SortAndFlushInBackground(true) defer bitmapdb.ReturnToPool64(bitmap) + baseTxNum := step * h.aggregationStep loadBitmapsFunc := func(k, v []byte, table etl.CurrentTableReader, next etl.LoadNextFunc) error { txNum := binary.BigEndian.Uint64(v) if !initialized { @@ -694,7 +697,7 @@ func (h *History) collate(ctx context.Context, step, txFrom, txTo uint64, roTx k return nil } - ef := eliasfano32.NewEliasFano(bitmap.GetCardinality(), bitmap.Maximum()) + seqBuilder := multiencseq.NewBuilder(baseTxNum, bitmap.GetCardinality(), bitmap.Maximum()) it := bitmap.Iterator() for it.HasNext() { @@ -727,17 +730,17 @@ func (h *History) collate(ctx context.Context, step, txFrom, txTo uint64, roTx k } } - ef.AddOffset(vTxNum) + seqBuilder.AddOffset(vTxNum) } bitmap.Clear() - ef.Build() + seqBuilder.Build() - prevEf = ef.AppendBytes(prevEf[:0]) + prevSeq = seqBuilder.AppendBytes(prevSeq[:0]) - if err = efHistoryComp.AddWord(prevKey); err != nil { + if err = seqWriter.AddWord(prevKey); err != nil { return fmt.Errorf("add %s ef history key [%x]: %w", h.filenameBase, prevKey, err) } - if err = efHistoryComp.AddWord(prevEf); err != nil { + if err = seqWriter.AddWord(prevSeq); err != nil { return fmt.Errorf("add %s ef history val: %w", h.filenameBase, err) } @@ -762,8 +765,9 @@ func (h *History) collate(ctx context.Context, step, txFrom, txTo uint64, roTx k mxCollationSizeHist.SetUint64(uint64(historyComp.Count())) return HistoryCollation{ - efHistoryComp: efHistoryComp, + efHistoryComp: seqWriter, efHistoryPath: efHistoryPath, + efBaseTxNum: step * h.aggregationStep, historyPath: historyPath, historyComp: historyComp, historyCount: historyComp.Count(), @@ -883,7 +887,7 @@ func (h *History) buildFiles(ctx context.Context, step uint64, collation History } historyIdxPath := h.vAccessorFilePath(step, step+1) - historyIdxPath, err = h.buildVI(ctx, historyIdxPath, historyDecomp, efHistoryDecomp, ps) + historyIdxPath, err = h.buildVI(ctx, historyIdxPath, historyDecomp, efHistoryDecomp, collation.efBaseTxNum, ps) if err != nil { return HistoryFiles{}, fmt.Errorf("build %s .vi: %w", h.filenameBase, err) } diff --git a/erigon-lib/state/history_stream.go b/erigon-lib/state/history_stream.go index 083eaccefc0..c852d8da486 100644 --- a/erigon-lib/state/history_stream.go +++ b/erigon-lib/state/history_stream.go @@ -28,7 +28,7 @@ import ( "github.com/erigontech/erigon-lib/kv/order" "github.com/erigontech/erigon-lib/kv/stream" "github.com/erigontech/erigon-lib/log/v3" - "github.com/erigontech/erigon-lib/recsplit/eliasfano32" + "github.com/erigontech/erigon-lib/recsplit/multiencseq" "github.com/erigontech/erigon-lib/seg" ) @@ -116,7 +116,7 @@ func (hi *HistoryRangeAsOfFiles) advanceInFiles() error { if bytes.Equal(key, hi.nextKey) { continue } - n, ok := eliasfano32.Seek(idxVal, hi.startTxNum) + n, ok := multiencseq.Seek(top.startTxNum, idxVal, hi.startTxNum) if !ok { continue } @@ -397,7 +397,7 @@ func (hi *HistoryChangesIterFiles) advance() error { if bytes.Equal(key, hi.nextKey) { continue } - n, ok := eliasfano32.Seek(idxVal, hi.startTxNum) + n, ok := multiencseq.Seek(top.startTxNum, idxVal, hi.startTxNum) if !ok { continue } diff --git a/erigon-lib/state/history_test.go b/erigon-lib/state/history_test.go index 24272c8e01e..0eca834c559 100644 --- a/erigon-lib/state/history_test.go +++ b/erigon-lib/state/history_test.go @@ -40,7 +40,7 @@ import ( "github.com/erigontech/erigon-lib/kv/stream" "github.com/erigontech/erigon-lib/log/v3" "github.com/erigontech/erigon-lib/recsplit" - "github.com/erigontech/erigon-lib/recsplit/eliasfano32" + "github.com/erigontech/erigon-lib/recsplit/multiencseq" "github.com/erigontech/erigon-lib/seg" "github.com/stretchr/testify/require" ) @@ -117,8 +117,8 @@ func TestHistoryCollationsAndBuilds(t *testing.T) { keyBuf, _ = efReader.Next(nil) valBuf, _ = efReader.Next(nil) - ef, _ := eliasfano32.ReadEliasFano(valBuf) - efIt := ef.Iterator() + ef := multiencseq.ReadMultiEncSeq(i, valBuf) + efIt := ef.Iterator(0) require.Contains(t, values, string(keyBuf), "key not found in values") seenKeys = append(seenKeys, string(keyBuf)) @@ -240,8 +240,8 @@ func TestHistoryCollationBuild(t *testing.T) { w, _ := g.Next(nil) keyWords = append(keyWords, string(w)) w, _ = g.Next(w[:0]) - ef, _ := eliasfano32.ReadEliasFano(w) - ints, err := stream.ToArrayU64(ef.Iterator()) + ef := multiencseq.ReadMultiEncSeq(0, w) + ints, err := stream.ToArrayU64(ef.Iterator(0)) require.NoError(err) intArrs = append(intArrs, ints) } diff --git a/erigon-lib/state/inverted_index.go b/erigon-lib/state/inverted_index.go index 99df82db5aa..4fa5b9593d4 100644 --- a/erigon-lib/state/inverted_index.go +++ b/erigon-lib/state/inverted_index.go @@ -49,7 +49,7 @@ import ( "github.com/erigontech/erigon-lib/kv/stream" "github.com/erigontech/erigon-lib/log/v3" "github.com/erigontech/erigon-lib/recsplit" - "github.com/erigontech/erigon-lib/recsplit/eliasfano32" + "github.com/erigontech/erigon-lib/recsplit/multiencseq" "github.com/erigontech/erigon-lib/seg" ) @@ -611,8 +611,8 @@ func (iit *InvertedIndexRoTx) seekInFiles(key []byte, txNum uint64) (found bool, if !bytes.Equal(k, key) { continue } - eliasVal, _ := g.Next(nil) - equalOrHigherTxNum, found = eliasfano32.Seek(eliasVal, txNum) + encodedSeq, _ := g.Next(nil) + equalOrHigherTxNum, found = multiencseq.Seek(iit.files[i].startTxNum, encodedSeq, txNum) if !found { continue } @@ -702,7 +702,7 @@ func (iit *InvertedIndexRoTx) iterateRangeOnFiles(key []byte, startTxNum, endTxN indexTable: iit.ii.valuesTable, orderAscend: asc, limit: limit, - ef: eliasfano32.NewEliasFano(1, 1), + seq: &multiencseq.SequenceReader{}, } if asc { for i := len(iit.files) - 1; i >= 0; i-- { @@ -1029,7 +1029,7 @@ func (ii *InvertedIndex) collate(ctx context.Context, step uint64, roTx kv.Tx) ( coll.writer = seg.NewWriter(comp, ii.compression) var ( - prevEf []byte + prevSeq []byte prevKey []byte initialized bool bitmap = bitmapdb.NewBitmap64() @@ -1049,20 +1049,20 @@ func (ii *InvertedIndex) collate(ctx context.Context, step uint64, roTx kv.Tx) ( return nil } - ef := eliasfano32.NewEliasFano(bitmap.GetCardinality(), bitmap.Maximum()) + seqBuilder := multiencseq.NewBuilder(step*ii.aggregationStep, bitmap.GetCardinality(), bitmap.Maximum()) it := bitmap.Iterator() for it.HasNext() { - ef.AddOffset(it.Next()) + seqBuilder.AddOffset(it.Next()) } bitmap.Clear() - ef.Build() + seqBuilder.Build() - prevEf = ef.AppendBytes(prevEf[:0]) + prevSeq = seqBuilder.AppendBytes(prevSeq[:0]) if err = coll.writer.AddWord(prevKey); err != nil { return fmt.Errorf("add %s efi index key [%x]: %w", ii.filenameBase, prevKey, err) } - if err = coll.writer.AddWord(prevEf); err != nil { + if err = coll.writer.AddWord(prevSeq); err != nil { return fmt.Errorf("add %s efi index val: %w", ii.filenameBase, err) } diff --git a/erigon-lib/state/inverted_index_stream.go b/erigon-lib/state/inverted_index_stream.go index 6df0a465153..79b730a02ba 100644 --- a/erigon-lib/state/inverted_index_stream.go +++ b/erigon-lib/state/inverted_index_stream.go @@ -27,7 +27,7 @@ import ( "github.com/erigontech/erigon-lib/kv/bitmapdb" "github.com/erigontech/erigon-lib/kv/order" "github.com/erigontech/erigon-lib/kv/stream" - "github.com/erigontech/erigon-lib/recsplit/eliasfano32" + "github.com/erigontech/erigon-lib/recsplit/multiencseq" ) // InvertedIdxStreamFiles allows iteration over range of txn numbers @@ -40,7 +40,7 @@ type InvertedIdxStreamFiles struct { limit int orderAscend order.By - efIt stream.Uno[uint64] + seqIt stream.Uno[uint64] indexTable string stack []visibleFile @@ -48,7 +48,7 @@ type InvertedIdxStreamFiles struct { hasNext bool err error - ef *eliasfano32.EliasFano + seq *multiencseq.SequenceReader } func (it *InvertedIdxStreamFiles) Close() { @@ -84,7 +84,7 @@ func (it *InvertedIdxStreamFiles) next() uint64 { func (it *InvertedIdxStreamFiles) advanceInFiles() { for { - for it.efIt == nil { + for it.seqIt == nil { if len(it.stack) == 0 { it.hasNext = false return @@ -99,26 +99,23 @@ func (it *InvertedIdxStreamFiles) advanceInFiles() { g.Reset(offset) k, _ := g.NextUncompressed() if bytes.Equal(k, it.key) { - eliasVal, _ := g.NextUncompressed() - it.ef.Reset(eliasVal) - var efiter *eliasfano32.EliasFanoIter + numSeqVal, _ := g.NextUncompressed() + it.seq.Reset(item.startTxNum, numSeqVal) + var seqIt stream.Uno[uint64] if it.orderAscend { - efiter = it.ef.Iterator() + seqIt = it.seq.Iterator(it.startTxNum) } else { - efiter = it.ef.ReverseIterator() + seqIt = it.seq.ReverseIterator(it.startTxNum) } - if it.startTxNum > 0 { - efiter.Seek(uint64(it.startTxNum)) - } - it.efIt = efiter + it.seqIt = seqIt } } //Asc: [from, to) AND from < to //Desc: [from, to) AND from > to if it.orderAscend { - for it.efIt.HasNext() { - n, err := it.efIt.Next() + for it.seqIt.HasNext() { + n, err := it.seqIt.Next() if err != nil { it.err = err return @@ -137,8 +134,8 @@ func (it *InvertedIdxStreamFiles) advanceInFiles() { return } } else { - for it.efIt.HasNext() { - n, err := it.efIt.Next() + for it.seqIt.HasNext() { + n, err := it.seqIt.Next() if err != nil { it.err = err return @@ -157,7 +154,7 @@ func (it *InvertedIdxStreamFiles) advanceInFiles() { return } } - it.efIt = nil // Exhausted this iterator + it.seqIt = nil // Exhausted this iterator } } @@ -339,7 +336,7 @@ func (it *InvertedIterator1) advanceInFiles() { heap.Push(&it.h, top) } if !bytes.Equal(key, it.key) { - ef, _ := eliasfano32.ReadEliasFano(val) + ef := multiencseq.ReadMultiEncSeq(top.startTxNum, val) _min := ef.Get(0) _max := ef.Max() if _min < it.endTxNum && _max >= it.startTxNum { // Intersection of [min; max) and [it.startTxNum; it.endTxNum) diff --git a/erigon-lib/state/inverted_index_test.go b/erigon-lib/state/inverted_index_test.go index e173e4f7df4..72084aeae9f 100644 --- a/erigon-lib/state/inverted_index_test.go +++ b/erigon-lib/state/inverted_index_test.go @@ -38,7 +38,7 @@ import ( "github.com/erigontech/erigon-lib/kv/stream" "github.com/erigontech/erigon-lib/log/v3" "github.com/erigontech/erigon-lib/recsplit" - "github.com/erigontech/erigon-lib/recsplit/eliasfano32" + "github.com/erigontech/erigon-lib/recsplit/multiencseq" "github.com/erigontech/erigon-lib/seg" ) @@ -212,9 +212,9 @@ func TestInvIndexCollationBuild(t *testing.T) { w, _ := g.Next(nil) words = append(words, string(w)) w, _ = g.Next(w[:0]) - ef, _ := eliasfano32.ReadEliasFano(w) + ef := multiencseq.ReadMultiEncSeq(0, w) var ints []uint64 - it := ef.Iterator() + it := ef.Iterator(0) for it.HasNext() { v, _ := it.Next() ints = append(ints, v) diff --git a/erigon-lib/state/merge.go b/erigon-lib/state/merge.go index d6b6baf2c32..1b17b034da0 100644 --- a/erigon-lib/state/merge.go +++ b/erigon-lib/state/merge.go @@ -35,7 +35,7 @@ import ( "github.com/erigontech/erigon-lib/kv" "github.com/erigontech/erigon-lib/log/v3" "github.com/erigontech/erigon-lib/recsplit" - "github.com/erigontech/erigon-lib/recsplit/eliasfano32" + "github.com/erigontech/erigon-lib/recsplit/multiencseq" "github.com/erigontech/erigon-lib/seg" ) @@ -338,28 +338,28 @@ func (ht *HistoryRoTx) staticFilesInRange(r HistoryRanges) (indexFiles, historyF return } -func mergeEfs(preval, val, buf []byte) ([]byte, error) { - preef, _ := eliasfano32.ReadEliasFano(preval) - ef, _ := eliasfano32.ReadEliasFano(val) - preIt := preef.Iterator() - efIt := ef.Iterator() - newEf := eliasfano32.NewEliasFano(preef.Count()+ef.Count(), ef.Max()) +func mergeNumSeqs(preval, val []byte, preBaseNum, baseNum uint64, buf []byte, outBaseNum uint64) ([]byte, error) { + preSeq := multiencseq.ReadMultiEncSeq(preBaseNum, preval) + seq := multiencseq.ReadMultiEncSeq(baseNum, val) + preIt := preSeq.Iterator(0) + efIt := seq.Iterator(0) + newSeq := multiencseq.NewBuilder(outBaseNum, preSeq.Count()+seq.Count(), seq.Max()) for preIt.HasNext() { v, err := preIt.Next() if err != nil { return nil, err } - newEf.AddOffset(v) + newSeq.AddOffset(v) } for efIt.HasNext() { v, err := efIt.Next() if err != nil { return nil, err } - newEf.AddOffset(v) + newSeq.AddOffset(v) } - newEf.Build() - return newEf.AppendBytes(buf), nil + newSeq.Build() + return newSeq.AppendBytes(buf), nil } type valueTransformer func(val []byte, startTxNum, endTxNum uint64) ([]byte, error) @@ -603,12 +603,13 @@ func (iit *InvertedIndexRoTx) mergeFiles(ctx context.Context, files []*filesItem val, _ := g.Next(nil) //fmt.Printf("heap push %s [%d] %x\n", item.decompressor.FilePath(), item.endTxNum, key) heap.Push(&cp, &CursorItem{ - t: FILE_CURSOR, - dg: g, - key: key, - val: val, - endTxNum: item.endTxNum, - reverse: true, + t: FILE_CURSOR, + dg: g, + key: key, + val: val, + startTxNum: item.startTxNum, + endTxNum: item.endTxNum, + reverse: true, }) } } @@ -622,13 +623,28 @@ func (iit *InvertedIndexRoTx) mergeFiles(ctx context.Context, files []*filesItem for cp.Len() > 0 { lastKey := common.Copy(cp[0].key) lastVal := common.Copy(cp[0].val) + + // Pre-rebase the first sequence + preSeq := multiencseq.ReadMultiEncSeq(cp[0].startTxNum, lastVal) + preIt := preSeq.Iterator(0) + newSeq := multiencseq.NewBuilder(startTxNum, preSeq.Count(), preSeq.Max()) + for preIt.HasNext() { + v, err := preIt.Next() + if err != nil { + return nil, err + } + newSeq.AddOffset(v) + } + newSeq.Build() + lastVal = newSeq.AppendBytes(nil) + var mergedOnce bool // Advance all the items that have this key (including the top) for cp.Len() > 0 && bytes.Equal(cp[0].key, lastKey) { ci1 := heap.Pop(&cp).(*CursorItem) if mergedOnce { - if lastVal, err = mergeEfs(ci1.val, lastVal, nil); err != nil { + if lastVal, err = mergeNumSeqs(ci1.val, lastVal, ci1.startTxNum, startTxNum, nil, startTxNum); err != nil { return nil, fmt.Errorf("merge %s inverted index: %w", iit.ii.filenameBase, err) } } else { @@ -768,13 +784,14 @@ func (ht *HistoryRoTx) mergeFiles(ctx context.Context, indexFiles, historyFiles key, _ := g.Next(nil) val, _ := g.Next(nil) heap.Push(&cp, &CursorItem{ - t: FILE_CURSOR, - dg: g, - dg2: g2, - key: key, - val: val, - endTxNum: item.endTxNum, - reverse: false, + t: FILE_CURSOR, + dg: g, + dg2: g2, + key: key, + val: val, + startTxNum: item.startTxNum, + endTxNum: item.endTxNum, + reverse: false, }) } } @@ -790,7 +807,7 @@ func (ht *HistoryRoTx) mergeFiles(ctx context.Context, indexFiles, historyFiles // Advance all the items that have this key (including the top) for cp.Len() > 0 && bytes.Equal(cp[0].key, lastKey) { ci1 := heap.Pop(&cp).(*CursorItem) - count := eliasfano32.Count(ci1.val) + count := multiencseq.Count(ci1.startTxNum, ci1.val) for i := uint64(0); i < count; i++ { if !ci1.dg2.HasNext() { panic(fmt.Errorf("assert: no value??? %s, i=%d, count=%d, lastKey=%x, ci1.key=%x", ci1.dg2.FileName(), i, count, lastKey, ci1.key)) @@ -853,10 +870,10 @@ func (ht *HistoryRoTx) mergeFiles(ctx context.Context, indexFiles, historyFiles for g.HasNext() { keyBuf, _ = g.Next(nil) valBuf, _ = g.Next(nil) - ef, _ := eliasfano32.ReadEliasFano(valBuf) - efIt := ef.Iterator() - for efIt.HasNext() { - txNum, err := efIt.Next() + seq := multiencseq.ReadMultiEncSeq(indexIn.startTxNum, valBuf) + it := seq.Iterator(0) + for it.HasNext() { + txNum, err := it.Next() if err != nil { return nil, nil, err } diff --git a/erigon-lib/state/merge_test.go b/erigon-lib/state/merge_test.go index 794514f5f2e..a1f7d86feda 100644 --- a/erigon-lib/state/merge_test.go +++ b/erigon-lib/state/merge_test.go @@ -485,7 +485,7 @@ func Test_mergeEliasFano(t *testing.T) { require.Contains(t, secondList, int(v)) } - menc, err := mergeEfs(firstBytes, secondBytes, nil) + menc, err := mergeNumSeqs(firstBytes, secondBytes, 0, 0, nil, 0) require.NoError(t, err) merged, _ := eliasfano32.ReadEliasFano(menc) diff --git a/turbo/cli/flags.go b/turbo/cli/flags.go index f0502679cc3..b7d00709264 100644 --- a/turbo/cli/flags.go +++ b/turbo/cli/flags.go @@ -266,6 +266,12 @@ var ( Usage: "How often transactions should be committed to the storage", Value: txpoolcfg.DefaultConfig.CommitEvery, } + + CsvOutput = cli.StringFlag{ + Name: "csv", + Usage: "Output statistics to a CSV file", + Value: "idx_stat.csv", + } ) func ApplyFlagsForEthConfig(ctx *cli.Context, cfg *ethconfig.Config, logger log.Logger) {