From 935aa9d915f158d1c77a1432770b1d54e78e0f04 Mon Sep 17 00:00:00 2001 From: Francesco4203 <100074926+Francesco4203@users.noreply.github.com> Date: Mon, 11 Nov 2024 15:29:09 +0700 Subject: [PATCH] core, cmd, trie: pbss fix release v1.13.6 and v1.13.8 (#618) * trie: remove inconsistent trie nodes during sync in path mode (#28595) This fixes a database corruption issue that could occur during state healing. When sync is aborted while certain modifications were already committed, and a reorg occurs, the database would contain incorrect trie nodes stored by path. These nodes need to detected/deleted in order to obtain a complete and fully correct state after state healing. --------- Co-authored-by: Felix Lange * core, cmd, trie: fix the condition of pathdb initialization (#28718) Original problem was caused by #28595, where we made it so that as soon as we start to sync, the root of the disk layer is deleted. That is not wrong per se, but another part of the code uses the "presence of the root" as an init-check for the pathdb. And, since the init-check now failed, the code tried to re-initialize it which failed since a sync was already ongoing. The total impact being: after a state-sync has begun, if the node for some reason is is shut down, it will refuse to start up again, with the error message: `Fatal: Failed to register the Ethereum service: waiting for sync.`. This change also modifies how `geth removedb` works, so that the user is prompted for two things: `state data` and `ancient chain`. The former includes both the chaindb aswell as any state history stored in ancients. --------- Co-authored-by: Martin HS --------- Co-authored-by: rjl493456442 Co-authored-by: Felix Lange Co-authored-by: Martin HS --- cmd/ronin/dbcmd.go | 99 +++++++++-------- core/rawdb/ancient_scheme.go | 8 +- core/rawdb/ancient_utils.go | 12 +-- core/rawdb/database.go | 2 +- ethdb/dbtest/testsuite.go | 6 +- trie/sync.go | 192 ++++++++++++++++++++++----------- trie/sync_test.go | 125 ++++++++++++++++++++- trie/triedb/pathdb/database.go | 36 +++++-- 8 files changed, 350 insertions(+), 130 deletions(-) diff --git a/cmd/ronin/dbcmd.go b/cmd/ronin/dbcmd.go index 53a83b5031..1de4223294 100644 --- a/cmd/ronin/dbcmd.go +++ b/cmd/ronin/dbcmd.go @@ -248,60 +248,73 @@ WARNING: This is a low-level operation which may cause database corruption!`, func removeDB(ctx *cli.Context) error { stack, config := makeConfigNode(ctx) - // Remove the full node state database - path := stack.ResolvePath("chaindata") - if common.FileExist(path) { - confirmAndRemoveDB(path, "full node state database") - } else { - log.Info("Full node state database missing", "path", path) - } - // Remove the full node ancient database - path = config.Eth.DatabaseFreezer + // Resolve folder paths. + var ( + rootDir = stack.ResolvePath("chaindata") + ancientDir = config.Eth.DatabaseFreezer + ) switch { - case path == "": - path = filepath.Join(stack.ResolvePath("chaindata"), "ancient") - case !filepath.IsAbs(path): - path = config.Node.ResolvePath(path) - } - if common.FileExist(path) { - confirmAndRemoveDB(path, "full node ancient database") - } else { - log.Info("Full node ancient database missing", "path", path) - } - // Remove the light node database - path = stack.ResolvePath("lightchaindata") - if common.FileExist(path) { - confirmAndRemoveDB(path, "light node database") - } else { - log.Info("Light node database missing", "path", path) - } + case ancientDir == "": + ancientDir = filepath.Join(stack.ResolvePath("chaindata"), "ancient") + case !filepath.IsAbs(ancientDir): + ancientDir = config.Node.ResolvePath(ancientDir) + } + // Delete state data + statePaths := []string{rootDir, filepath.Join(ancientDir, rawdb.StateFreezerName)} + confirmAndRemoveDB(statePaths, "state data") + + // Delete ancient chain + chainPaths := []string{filepath.Join(ancientDir, rawdb.ChainFreezerName)} + confirmAndRemoveDB(chainPaths, "ancient chain") return nil } +// removeFolder deletes all files (not folders) inside the directory 'dir' (but +// not files in subfolders). +func removeFolder(dir string) { + filepath.Walk(dir, func(path string, info os.FileInfo, err error) error { + // If we're at the top level folder, recurse into + if path == dir { + return nil + } + // Delete all the files, but not subfolders + if !info.IsDir() { + os.Remove(path) + return nil + } + return filepath.SkipDir + }) +} + // confirmAndRemoveDB prompts the user for a last confirmation and removes the -// folder if accepted. -func confirmAndRemoveDB(database string, kind string) { - confirm, err := prompt.Stdin.PromptConfirm(fmt.Sprintf("Remove %s (%s)?", kind, database)) +// list of folders if accepted. +func confirmAndRemoveDB(paths []string, kind string) { + msg := fmt.Sprintf("Location(s) of '%s': \n", kind) + for _, path := range paths { + msg += fmt.Sprintf("\t- %s\n", path) + } + fmt.Println(msg) + + confirm, err := prompt.Stdin.PromptConfirm(fmt.Sprintf("Remove '%s'?", kind)) switch { case err != nil: utils.Fatalf("%v", err) case !confirm: - log.Info("Database deletion skipped", "path", database) + log.Info("Database deletion skipped", "kind", kind, "paths", paths) default: - start := time.Now() - filepath.Walk(database, func(path string, info os.FileInfo, err error) error { - // If we're at the top level folder, recurse into - if path == database { - return nil + var ( + deleted []string + start = time.Now() + ) + for _, path := range paths { + if common.FileExist(path) { + removeFolder(path) + deleted = append(deleted, path) + } else { + log.Info("Folder is not existent", "path", path) } - // Delete all the files, but not subfolders - if !info.IsDir() { - os.Remove(path) - return nil - } - return filepath.SkipDir - }) - log.Info("Database successfully deleted", "path", database, "elapsed", common.PrettyDuration(time.Since(start))) + } + log.Info("Database successfully deleted", "kind", kind, "paths", deleted, "elapsed", common.PrettyDuration(time.Since(start))) } } diff --git a/core/rawdb/ancient_scheme.go b/core/rawdb/ancient_scheme.go index 0e6d4bea5a..5a173f4915 100644 --- a/core/rawdb/ancient_scheme.go +++ b/core/rawdb/ancient_scheme.go @@ -73,16 +73,16 @@ var chainFreezerNoSnappy = map[string]bool{ // The list of identifiers of ancient stores. It can split more in the futures. var ( - chainFreezerName = "chain" // the folder name of chain segment ancient store. - stateFreezerName = "state" // the folder name of reverse diff ancient store. + ChainFreezerName = "chain" // the folder name of chain segment ancient store. + StateFreezerName = "state" // the folder name of reverse diff ancient store. ) // freezers the collections of all builtin freezers. -var freezers = []string{chainFreezerName, stateFreezerName} +var freezers = []string{ChainFreezerName, StateFreezerName} // NewStateFreezer initializes the freezer for state history. func NewStateFreezer(ancientDir string, readOnly bool) (*ResettableFreezer, error) { return NewResettableFreezer( - filepath.Join(ancientDir, stateFreezerName), namespace, readOnly, + filepath.Join(ancientDir, StateFreezerName), namespace, readOnly, stateHistoryTableSize, stateFreezerNoSnappy) } diff --git a/core/rawdb/ancient_utils.go b/core/rawdb/ancient_utils.go index 2dd37d5b27..f0c0f234f1 100644 --- a/core/rawdb/ancient_utils.go +++ b/core/rawdb/ancient_utils.go @@ -82,14 +82,14 @@ func inspectFreezers(db ethdb.Database) ([]freezerInfo, error) { var infos []freezerInfo for _, freezer := range freezers { switch freezer { - case chainFreezerName: - info, err := inspect(chainFreezerName, chainFreezerNoSnappy, db) + case ChainFreezerName: + info, err := inspect(ChainFreezerName, chainFreezerNoSnappy, db) if err != nil { return nil, err } infos = append(infos, info) - case stateFreezerName: + case StateFreezerName: if ReadStateScheme(db) != PathScheme { log.Info("Skip inspecting state freezer", "reason", "state freezer is supported for PathScheme only") continue @@ -104,7 +104,7 @@ func inspectFreezers(db ethdb.Database) ([]freezerInfo, error) { } defer f.Close() - info, err := inspect(stateFreezerName, stateFreezerNoSnappy, f) + info, err := inspect(StateFreezerName, stateFreezerNoSnappy, f) if err != nil { return nil, err } @@ -128,9 +128,9 @@ func InspectFreezerTable(ancient string, freezerName string, tableName string, s ) switch freezerName { - case chainFreezerName: + case ChainFreezerName: path, tables = resolveChainFreezerDir(ancient), chainFreezerNoSnappy - case stateFreezerName: + case StateFreezerName: path, tables = filepath.Join(ancient, freezerName), stateFreezerNoSnappy default: return fmt.Errorf("unknown freezer, supported ones: %v", freezers) diff --git a/core/rawdb/database.go b/core/rawdb/database.go index 9ad0287d59..6bf5366ea2 100644 --- a/core/rawdb/database.go +++ b/core/rawdb/database.go @@ -173,7 +173,7 @@ func resolveChainFreezerDir(ancient string) string { // - chain freezer is not initialized // - it's legacy location, chain freezer is present in the root ancient folder - freezer := path.Join(ancient, chainFreezerName) + freezer := path.Join(ancient, ChainFreezerName) if !common.FileExist(freezer) { if !common.FileExist(ancient) { // The entire ancient store is not initialized, still use the sub diff --git a/ethdb/dbtest/testsuite.go b/ethdb/dbtest/testsuite.go index a2b7003c27..30cef82bec 100644 --- a/ethdb/dbtest/testsuite.go +++ b/ethdb/dbtest/testsuite.go @@ -272,9 +272,13 @@ func TestDatabaseSuite(t *testing.T, New func() ethdb.KeyValueStore) { b.Put([]byte("5"), nil) b.Delete([]byte("1")) b.Put([]byte("6"), nil) - b.Delete([]byte("3")) + + b.Delete([]byte("3")) // delete then put b.Put([]byte("3"), nil) + b.Put([]byte("7"), nil) // put then delete + b.Delete([]byte("7")) + if err := b.Write(); err != nil { t.Fatal(err) } diff --git a/trie/sync.go b/trie/sync.go index 195395f737..f7f45d3ae1 100644 --- a/trie/sync.go +++ b/trie/sync.go @@ -19,6 +19,7 @@ package trie import ( "errors" "fmt" + "sync" "github.com/ethereum/go-ethereum/common" "github.com/ethereum/go-ethereum/common/prque" @@ -98,10 +99,9 @@ func NewSyncPath(path []byte) SyncPath { // nodeRequest represents a scheduled or already in-flight trie node retrieval request. type nodeRequest struct { - hash common.Hash // Hash of the trie node to retrieve - path []byte // Merkle path leading to this node for prioritization - data []byte // Data content of the node, cached until all subtrees complete - deletes [][]byte // List of internal path segments for trie nodes to delete + hash common.Hash // Hash of the trie node to retrieve + path []byte // Merkle path leading to this node for prioritization + data []byte // Data content of the node, cached until all subtrees complete parent *nodeRequest // Parent state node referencing this entry deps int // Number of dependencies before allowed to commit this node @@ -128,37 +128,69 @@ type CodeSyncResult struct { Data []byte // Data content of the retrieved bytecode } +// nodeOp represents an operation upon the trie node. It can either represent a +// deletion to the specific node or a node write for persisting retrieved node. +type nodeOp struct { + owner common.Hash // identifier of the trie (empty for account trie) + path []byte // path from the root to the specified node. + blob []byte // the content of the node (nil for deletion) + hash common.Hash // hash of the node content (empty for node deletion) +} + +// isDelete indicates if the operation is a database deletion. +func (op *nodeOp) isDelete() bool { + return len(op.blob) == 0 +} + // syncMemBatch is an in-memory buffer of successfully downloaded but not yet // persisted data items. type syncMemBatch struct { - nodes map[string][]byte // In-memory membatch of recently completed nodes - hashes map[string]common.Hash // Hashes of recently completed nodes - deletes map[string]struct{} // List of paths for trie node to delete - codes map[common.Hash][]byte // In-memory membatch of recently completed codes + scheme string // State scheme identifier + nodes []nodeOp // In-memory batch of recently completed/deleted nodes + codes map[common.Hash][]byte // In-memory membatch of recently completed codes } // newSyncMemBatch allocates a new memory-buffer for not-yet persisted trie nodes. -func newSyncMemBatch() *syncMemBatch { +func newSyncMemBatch(scheme string) *syncMemBatch { return &syncMemBatch{ - nodes: make(map[string][]byte), - hashes: make(map[string]common.Hash), - deletes: make(map[string]struct{}), - codes: make(map[common.Hash][]byte), + scheme: scheme, + codes: make(map[common.Hash][]byte), } } -// hasNode reports the trie node with specific path is already cached. -func (batch *syncMemBatch) hasNode(path []byte) bool { - _, ok := batch.nodes[string(path)] - return ok -} - // hasCode reports the contract code with specific hash is already cached. func (batch *syncMemBatch) hasCode(hash common.Hash) bool { _, ok := batch.codes[hash] return ok } +// addCode caches a contract code database write operation. +func (batch *syncMemBatch) addCode(hash common.Hash, code []byte) { + batch.codes[hash] = code +} + +// addNode caches a node database write operation. +func (batch *syncMemBatch) addNode(owner common.Hash, path []byte, blob []byte, hash common.Hash) { + batch.nodes = append(batch.nodes, nodeOp{ + owner: owner, + path: path, + blob: blob, + hash: hash, + }) +} + +// delNode caches a node database delete operation. +func (batch *syncMemBatch) delNode(owner common.Hash, path []byte) { + if batch.scheme != rawdb.PathScheme { + log.Error("Unexpected node deletion", "owner", owner, "path", path, "scheme", batch.scheme) + return // deletion is not supported in hash mode. + } + batch.nodes = append(batch.nodes, nodeOp{ + owner: owner, + path: path, + }) +} + // Sync is the main state trie synchronisation scheduler, which provides yet // unknown trie hashes to retrieve, accepts node data associated with said hashes // and reconstructs the trie step by step until all is done. @@ -194,7 +226,7 @@ func NewSync(root common.Hash, database ethdb.KeyValueReader, callback LeafCallb ts := &Sync{ scheme: scheme, database: database, - membatch: newSyncMemBatch(), + membatch: newSyncMemBatch(scheme), nodeReqs: make(map[string]*nodeRequest), codeReqs: make(map[common.Hash]*codeRequest), queue: prque.New(nil), @@ -213,16 +245,18 @@ func (s *Sync) AddSubTrie(root common.Hash, path []byte, parent common.Hash, par if root == emptyRoot { return } - if s.membatch.hasNode(path) { - return - } if s.bloom == nil || s.bloom.Contains(root[:]) { // Bloom filter says this might be a duplicate, double check. // If database says yes, then at least the trie node is present // and we hold the assumption that it's NOT legacy contract code. owner, inner := ResolvePath(path) - if rawdb.HasTrieNode(s.database, owner, inner, root, s.scheme) { + exist, inconsistent := s.hasNode(owner, inner, root) + if exist { + // The entire subtrie is already present in the database. return + } else if inconsistent { + // There is a pre-existing node with the wrong hash in DB, remove it. + s.membatch.delNode(owner, inner) } // False positive, bump fault meter bloomFaultMeter.Mark(1) @@ -382,22 +416,32 @@ func (s *Sync) ProcessNode(result NodeSyncResult) error { } // Commit flushes the data stored in the internal membatch out to persistent -// storage, returning any occurred error. +// storage, returning any occurred error. The whole data set will be flushed +// in an atomic database batch. func (s *Sync) Commit(dbw ethdb.Batch) error { - // Flush the pending node writes into database batch. var ( account int storage int ) - for path, value := range s.membatch.nodes { - owner, inner := ResolvePath([]byte(path)) - if owner == (common.Hash{}) { - account += 1 + // Flush the pending node writes into database batch. + for _, op := range s.membatch.nodes { + if op.isDelete() { + // node deletion is only supported in path mode. + if op.owner == (common.Hash{}) { + rawdb.DeleteAccountTrieNode(dbw, op.path) + } else { + rawdb.DeleteStorageTrieNode(dbw, op.owner, op.path) + } + deletionGauge.Inc(1) } else { - storage += 1 + if op.owner == (common.Hash{}) { + account += 1 + } else { + storage += 1 + } + rawdb.WriteTrieNode(dbw, op.owner, op.path, op.hash, op.blob, s.scheme) } - rawdb.WriteTrieNode(dbw, owner, inner, s.membatch.hashes[path], value, s.scheme) - hash := s.membatch.hashes[path] + hash := op.hash if s.bloom != nil { s.bloom.Add(hash[:]) } @@ -405,13 +449,6 @@ func (s *Sync) Commit(dbw ethdb.Batch) error { accountNodeSyncedGauge.Inc(int64(account)) storageNodeSyncedGauge.Inc(int64(storage)) - // Flush the pending node deletes into the database batch. - // Please note that each written and deleted node has a - // unique path, ensuring no duplication occurs. - for path := range s.membatch.deletes { - owner, inner := ResolvePath([]byte(path)) - rawdb.DeleteTrieNode(dbw, owner, inner, common.Hash{} /* unused */, s.scheme) - } // Flush the pending code writes into database batch. for hash, value := range s.membatch.codes { rawdb.WriteCode(dbw, hash, value) @@ -421,7 +458,7 @@ func (s *Sync) Commit(dbw ethdb.Batch) error { } codeSyncedGauge.Inc(int64(len(s.membatch.codes))) - s.membatch = newSyncMemBatch() // reset the batch + s.membatch = newSyncMemBatch(s.scheme) // reset the batch return nil } @@ -489,12 +526,15 @@ func (s *Sync) children(req *nodeRequest, object node) ([]*nodeRequest, error) { // child as invalid. This is essential in the case of path mode // scheme; otherwise, state healing might overwrite existing child // nodes silently while leaving a dangling parent node within the - // range of this internal path on disk. This would break the - // guarantee for state healing. + // range of this internal path on disk and the persistent state + // ends up with a very weird situation that nodes on the same path + // are not inconsistent while they all present in disk. This property + // would break the guarantee for state healing. // // While it's possible for this shortNode to overwrite a previously // existing full node, the other branches of the fullNode can be - // retained as they remain untouched and complete. + // retained as they are not accessible with the new shortNode, and + // also the whole sub-trie is still untouched and complete. // // This step is only necessary for path mode, as there is no deletion // in hash mode at all. @@ -511,8 +551,7 @@ func (s *Sync) children(req *nodeRequest, object node) ([]*nodeRequest, error) { exists = rawdb.ExistsStorageTrieNode(s.database, owner, append(inner, key[:i]...)) } if exists { - req.deletes = append(req.deletes, key[:i]) - deletionGauge.Inc(1) + s.membatch.delNode(owner, append(inner, key[:i]...)) log.Debug("Detected dangling node", "owner", owner, "path", append(inner, key[:i]...)) } } @@ -532,6 +571,7 @@ func (s *Sync) children(req *nodeRequest, object node) ([]*nodeRequest, error) { } // Iterate over the children, and request all unknown ones requests := make([]*nodeRequest, 0, len(children)) + var batchMu sync.Mutex for _, child := range children { // Notify any external watcher of a new key/value node if req.callback != nil { @@ -548,20 +588,24 @@ func (s *Sync) children(req *nodeRequest, object node) ([]*nodeRequest, error) { } } } - // If the child references another node, resolve or schedule + // If the child references another node, resolve or schedule. + // We check all children concurrently. if node, ok := (child.node).(hashNode); ok { - // Try to resolve the node from the local database - if s.membatch.hasNode(child.path) { - continue - } - chash := common.BytesToHash(node) + path := child.path + hash := common.BytesToHash(node) if s.bloom == nil || s.bloom.Contains(node) { // Bloom filter says this might be a duplicate, double check. // If database says yes, then at least the trie node is present // and we hold the assumption that it's NOT legacy contract code. - owner, inner := ResolvePath(child.path) - if rawdb.HasTrieNode(s.database, owner, inner, chash, s.scheme) { + owner, inner := ResolvePath(path) + exist, inconsistent := s.hasNode(owner, inner, hash) + if exist { continue + } else if inconsistent { + // There is a pre-existing node with the wrong hash in DB, remove it. + batchMu.Lock() + s.membatch.delNode(owner, inner) + batchMu.Unlock() } // False positive, bump fault meter @@ -569,8 +613,8 @@ func (s *Sync) children(req *nodeRequest, object node) ([]*nodeRequest, error) { } // Locally unknown node, schedule for retrieval requests = append(requests, &nodeRequest{ - path: child.path, - hash: chash, + path: path, + hash: hash, parent: req, callback: req.callback, }) @@ -584,14 +628,10 @@ func (s *Sync) children(req *nodeRequest, object node) ([]*nodeRequest, error) { // committed themselves. func (s *Sync) commitNodeRequest(req *nodeRequest) error { // Write the node content to the membatch - s.membatch.nodes[string(req.path)] = req.data - s.membatch.hashes[string(req.path)] = req.hash + owner, path := ResolvePath(req.path) + s.membatch.addNode(owner, path, req.data, req.hash) - // Delete the internal nodes which are marked as invalid - for _, segment := range req.deletes { - path := append(req.path, segment...) - s.membatch.deletes[string(path)] = struct{}{} - } + // Removed the completed node request delete(s.nodeReqs, string(req.path)) s.fetches[len(req.path)]-- @@ -612,7 +652,9 @@ func (s *Sync) commitNodeRequest(req *nodeRequest) error { // committed themselves. func (s *Sync) commitCodeRequest(req *codeRequest) error { // Write the node content to the membatch - s.membatch.codes[req.hash] = req.data + s.membatch.addCode(req.hash, req.data) + + // Removed the completed code request delete(s.codeReqs, req.hash) s.fetches[len(req.path)]-- @@ -628,6 +670,28 @@ func (s *Sync) commitCodeRequest(req *codeRequest) error { return nil } +// hasNode reports whether the specified trie node is present in the database. +// 'exists' is true when the node exists in the database and matches the given root +// hash. The 'inconsistent' return value is true when the node exists but does not +// match the expected hash. +func (s *Sync) hasNode(owner common.Hash, path []byte, hash common.Hash) (exists bool, inconsistent bool) { + // If node is running with hash scheme, check the presence with node hash. + if s.scheme == rawdb.HashScheme { + return rawdb.HasLegacyTrieNode(s.database, hash), false + } + // If node is running with path scheme, check the presence with node path. + var blob []byte + var dbHash common.Hash + if owner == (common.Hash{}) { + blob, dbHash = rawdb.ReadAccountTrieNode(s.database, path) + } else { + blob, dbHash = rawdb.ReadStorageTrieNode(s.database, owner, path) + } + exists = hash == dbHash + inconsistent = !exists && len(blob) != 0 + return exists, inconsistent +} + // ResolvePath resolves the provided composite node path by separating the // path in account trie if it's existent. func ResolvePath(path []byte) (common.Hash, []byte) { diff --git a/trie/sync_test.go b/trie/sync_test.go index a07dbccd87..75eb5809a2 100644 --- a/trie/sync_test.go +++ b/trie/sync_test.go @@ -689,8 +689,11 @@ func testSyncOrdering(t *testing.T, scheme string) { } } } - func syncWith(t *testing.T, root common.Hash, db ethdb.Database, srcDb *Database) { + syncWithHookWriter(t, root, db, srcDb, nil) +} + +func syncWithHookWriter(t *testing.T, root common.Hash, db ethdb.Database, srcDb *Database, hookWriter ethdb.KeyValueWriter) { // Create a destination trie and sync with the scheduler sched := NewSync(root, db, nil, NewSyncBloom(1, db), srcDb.Scheme()) @@ -728,8 +731,11 @@ func syncWith(t *testing.T, root common.Hash, db ethdb.Database, srcDb *Database if err := sched.Commit(batch); err != nil { t.Fatalf("failed to commit data: %v", err) } - batch.Write() - + if hookWriter != nil { + batch.Replay(hookWriter) + } else { + batch.Write() + } paths, nodes, _ = sched.Missing(0) elements = elements[:0] for i := 0; i < len(paths); i++ { @@ -899,3 +905,116 @@ func testPivotMove(t *testing.T, scheme string, tiny bool) { syncWith(t, rootC, destDisk, srcTrieDB) checkTrieContents(t, destDisk, scheme, srcTrie.Hash().Bytes(), stateC, true) } + +func TestSyncAbort(t *testing.T) { + testSyncAbort(t, rawdb.PathScheme) + testSyncAbort(t, rawdb.HashScheme) +} + +type hookWriter struct { + db ethdb.KeyValueStore + filter func(key []byte, value []byte) bool +} + +// Put inserts the given value into the key-value data store. +func (w *hookWriter) Put(key []byte, value []byte) error { + if w.filter != nil && w.filter(key, value) { + return nil + } + return w.db.Put(key, value) +} + +// Delete removes the key from the key-value data store. +func (w *hookWriter) Delete(key []byte) error { + return w.db.Delete(key) +} + +func testSyncAbort(t *testing.T, scheme string) { + var ( + srcDisk = rawdb.NewMemoryDatabase() + srcTrieDB = newTestDatabase(srcDisk, scheme) + srcTrie, _ = New(TrieID(types.EmptyRootHash), srcTrieDB) + + deleteFn = func(key []byte, tr *Trie, states map[string][]byte) { + tr.Delete(key) + delete(states, string(key)) + } + writeFn = func(key []byte, val []byte, tr *Trie, states map[string][]byte) { + if val == nil { + val = randBytes(32) + } + tr.Update(key, val) + states[string(key)] = common.CopyBytes(val) + } + copyStates = func(states map[string][]byte) map[string][]byte { + cpy := make(map[string][]byte) + for k, v := range states { + cpy[k] = v + } + return cpy + } + ) + var ( + stateA = make(map[string][]byte) + key = randBytes(32) + val = randBytes(32) + ) + for i := 0; i < 256; i++ { + writeFn(randBytes(32), nil, srcTrie, stateA) + } + writeFn(key, val, srcTrie, stateA) + + rootA, nodesA, _ := srcTrie.Commit(false) + if err := srcTrieDB.Update(rootA, types.EmptyRootHash, 0, trienode.NewWithNodeSet(nodesA), nil); err != nil { + panic(err) + } + if err := srcTrieDB.Commit(rootA, false); err != nil { + panic(err) + } + // Create a destination trie and sync with the scheduler + destDisk := rawdb.NewMemoryDatabase() + syncWith(t, rootA, destDisk, srcTrieDB) + checkTrieContents(t, destDisk, scheme, srcTrie.Hash().Bytes(), stateA, true) + + // Delete the element from the trie + stateB := copyStates(stateA) + srcTrie, _ = New(TrieID(rootA), srcTrieDB) + deleteFn(key, srcTrie, stateB) + + rootB, nodesB, _ := srcTrie.Commit(false) + if err := srcTrieDB.Update(rootB, rootA, 0, trienode.NewWithNodeSet(nodesB), nil); err != nil { + panic(err) + } + if err := srcTrieDB.Commit(rootB, false); err != nil { + panic(err) + } + + // Sync the new state, but never persist the new root node. Before the + // fix #28595, the original old root node will still be left in database + // which breaks the next healing cycle. + syncWithHookWriter(t, rootB, destDisk, srcTrieDB, &hookWriter{db: destDisk, filter: func(key []byte, value []byte) bool { + if scheme == rawdb.HashScheme { + return false + } + if len(value) == 0 { + return false + } + ok, path := rawdb.ResolveAccountTrieNodeKey(key) + return ok && len(path) == 0 + }}) + + // Add elements to expand trie + stateC := copyStates(stateB) + srcTrie, _ = New(TrieID(rootB), srcTrieDB) + + writeFn(key, val, srcTrie, stateC) + rootC, nodesC, _ := srcTrie.Commit(false) + if err := srcTrieDB.Update(rootC, rootB, 0, trienode.NewWithNodeSet(nodesC), nil); err != nil { + panic(err) + } + if err := srcTrieDB.Commit(rootC, false); err != nil { + panic(err) + } + syncWith(t, rootC, destDisk, srcTrieDB) + checkTrieContents(t, destDisk, scheme, srcTrie.Hash().Bytes(), stateC, true) +} diff --git a/trie/triedb/pathdb/database.go b/trie/triedb/pathdb/database.go index ea5c50ab6a..7843f78de0 100644 --- a/trie/triedb/pathdb/database.go +++ b/trie/triedb/pathdb/database.go @@ -167,14 +167,31 @@ func New(diskdb ethdb.Database, config *Config) *Database { log.Crit("Failed to open state history freezer", "err", err) } - // Truncate the extra state histories above the current diskLayer - // in freezer in case it's not aligned with the disk layer. - pruned, err := truncateFromHead(db.diskdb, db.freezer, db.tree.bottom().stateID()) - if err != nil { - log.Crit("Failed to truncate state history freezer", "err", err) - } - if pruned > 0 { - log.Warn("Truncated extra state histories from freezer", "count", pruned) + diskLayerID := db.tree.bottom().stateID() + if diskLayerID == 0 { + // Reset the entire state histories in case the trie database is + // not initialized yet, as these state histories are not expected. + frozen, err := db.freezer.Ancients() + if err != nil { + log.Crit("Failed to retrieve head of state history", "err", err) + } + if frozen != 0 { + err := db.freezer.Reset() + if err != nil { + log.Crit("Failed to reset state histories", "err", err) + } + log.Info("Truncated extraneous state history") + } + } else { + // Truncate the extra state histories above in freezer in case + // it's not aligned with the disk layer. + pruned, err := truncateFromHead(db.diskdb, db.freezer, diskLayerID) + if err != nil { + log.Crit("Failed to truncate extra state histories", "err", err) + } + if pruned != 0 { + log.Warn("Truncated extra state histories", "number", pruned) + } } } // Disable database in case node is still in the initial state sync stage. @@ -410,6 +427,9 @@ func (db *Database) Initialized(genesisRoot common.Hash) bool { inited = true } }) + if !inited { + inited = rawdb.ReadSnapSyncStatusFlag(db.diskdb) != rawdb.StateSyncUnknown + } return inited }