diff --git a/Dockerfile b/Dockerfile index df83d02d7e13..0587df693719 100644 --- a/Dockerfile +++ b/Dockerfile @@ -15,7 +15,10 @@ RUN cd /go-ethereum && go mod download ADD . /go-ethereum RUN cd /go-ethereum && go run build/ci.go install -static ./cmd/geth -RUN go install github.com/go-delve/delve/cmd/dlv@latest +# Pin delve to a version that still supports Go 1.21. Delve v1.26.0+ +# raised the minimum Go requirement to 1.24 and breaks this image build +# otherwise. Bump this together with the golang:1.21-alpine base above. +RUN go install github.com/go-delve/delve/cmd/dlv@v1.22.1 # Pull Geth into a second stage deploy alpine container FROM alpine:latest diff --git a/cmd/geth/snapshot.go b/cmd/geth/snapshot.go index 39bef1f2d352..a51049d61929 100644 --- a/cmd/geth/snapshot.go +++ b/cmd/geth/snapshot.go @@ -74,6 +74,41 @@ WARNING: It's necessary to delete the trie clean cache after the pruning. If you specify another directory for the trie clean cache via "--cache.trie.journal" during the use of Geth, please also specify it here for correct deletion. Otherwise the trie clean cache with default directory will be deleted. +`, + }, + { + Name: "prune-block", + Usage: "Prune stale ancient block data to reclaim disk space", + ArgsUsage: "", + Action: pruneBlock, + Flags: flags.Merge([]cli.Flag{ + utils.BlockAmountReservedFlag, + }, utils.NetworkFlags, utils.DatabasePathFlags), + Description: ` +geth snapshot prune-block + +will prune historical block data (headers, bodies, receipts, total difficulty +and canonical hashes) from the freezer / ancient store, keeping only the most +recent blocks specified by --block-amount-reserved. + +This command is intended for validator nodes and other non-RPC-serving full +nodes that do not need to answer historical queries. After pruning, the node +is still able to produce and validate new blocks, but requests for pruned +historical data (eth_getBlockByNumber with an old number, eth_getTransactionByHash +for a pruned transaction, etc.) will fail. + +The default reserved window is 1,000,000 blocks (~35 days at 3s/block on KCC +mainnet). The minimum allowed value is 100,000 blocks, chosen to safely cover +the KCC POSA consensus layer's worst-case header walk-back depth; lowering +this bound would risk breaking the validator's ability to verify new headers +after restart. + +WARNING: + - This command must be run with geth fully stopped. + - The operation advances the freezer tail in place; it is idempotent and + can be re-run later with a larger reserved window if needed. + - Do not run this on an archive node, a node serving public RPC, or a block + explorer backend. `, }, { @@ -194,6 +229,34 @@ func pruneState(ctx *cli.Context) error { return nil } +// pruneBlock is the entry point for the `geth snapshot prune-block` command. +// It opens the chain database in write mode and advances the ancient store +// tail so that only the last --block-amount-reserved blocks remain in the +// freezer. +func pruneBlock(ctx *cli.Context) error { + if ctx.NArg() > 0 { + log.Error("Unexpected positional arguments") + return errors.New("prune-block does not accept positional arguments") + } + stack, _ := makeConfigNode(ctx) + defer stack.Close() + + chaindb := utils.MakeChainDatabase(ctx, stack, false) + defer chaindb.Close() + + reserved := ctx.Uint64(utils.BlockAmountReservedFlag.Name) + bp, err := pruner.NewBlockPruner(chaindb, reserved) + if err != nil { + log.Error("Failed to create block pruner", "err", err) + return err + } + if err := bp.Prune(); err != nil { + log.Error("Failed to prune blocks", "err", err) + return err + } + return nil +} + func verifyState(ctx *cli.Context) error { stack, _ := makeConfigNode(ctx) defer stack.Close() diff --git a/cmd/utils/flags.go b/cmd/utils/flags.go index 99186fdc3966..bf49a3599e0c 100644 --- a/cmd/utils/flags.go +++ b/cmd/utils/flags.go @@ -254,6 +254,12 @@ var ( Value: 2048, Category: flags.EthCategory, } + BlockAmountReservedFlag = &cli.Uint64Flag{ + Name: "block-amount-reserved", + Usage: "Sets the number of recent blocks to retain when running offline block pruning (minimum 100000, default ~35 days at 3s/block)", + Value: 1_000_000, + Category: flags.EthCategory, + } OverrideTerminalTotalDifficulty = &flags.BigFlag{ Name: "override.terminaltotaldifficulty", Usage: "Manually specify TerminalTotalDifficulty, overriding the bundled setting", diff --git a/core/state/pruner/block_pruner.go b/core/state/pruner/block_pruner.go new file mode 100644 index 000000000000..0781cb3217d8 --- /dev/null +++ b/core/state/pruner/block_pruner.go @@ -0,0 +1,198 @@ +// Copyright 2024 The go-ethereum Authors +// This file is part of the go-ethereum library. +// +// The go-ethereum library is free software: you can redistribute it and/or modify +// it under the terms of the GNU Lesser General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// The go-ethereum library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public License +// along with the go-ethereum library. If not, see . + +package pruner + +import ( + "errors" + "fmt" + "time" + + "github.com/ethereum/go-ethereum/common" + "github.com/ethereum/go-ethereum/core/rawdb" + "github.com/ethereum/go-ethereum/ethdb" + "github.com/ethereum/go-ethereum/log" + "github.com/ethereum/go-ethereum/params" +) + +// MinBlockAmountReserved is the minimum number of recent blocks that must be +// retained when performing offline block pruning. +// +// The KCC POSA engine reconstructs the validator snapshot by walking headers +// backward from the current head. In the worst case it walks back up to +// params.FullImmutabilityThreshold (90,000) headers before the "trusted +// checkpoint" fallback in consensus/posa/posa.go kicks in. The walker also +// needs to reach the next epoch boundary (epoch=100 on KCC mainnet) after +// accumulating that many headers, so we add a small margin to be safe. +// +// If a user tries to prune more aggressively than this, the validator could +// fail to verify incoming headers after restart, which would stop block +// production. NewBlockPruner rejects any reserved value below this bound. +const MinBlockAmountReserved uint64 = params.FullImmutabilityThreshold + 10000 // 100,000 + +// BlockPruner is an offline tool that removes historical block data from the +// ancient store (freezer) while keeping the most recent blocks. +// +// Unlike the state pruner, BlockPruner does not need a state snapshot nor a +// bloom filter: it simply advances the freezer tail using the in-place +// TruncateTail primitive provided by rawdb.Freezer. The operation is +// idempotent; running it twice with the same reserved window is a no-op. +// +// BlockPruner is meant for validator nodes and other non-RPC-serving full +// nodes. After pruning, the node cannot answer historical RPC queries for +// blocks below the new tail (eth_getBlockByNumber, eth_getTransactionByHash, +// etc.). It remains fully capable of producing and validating new blocks. +type BlockPruner struct { + db ethdb.Database + amountReserved uint64 +} + +// NewBlockPruner creates a BlockPruner that will keep the last amountReserved +// blocks in the ancient store. The returned error is non-nil if amountReserved +// is below MinBlockAmountReserved. +func NewBlockPruner(db ethdb.Database, amountReserved uint64) (*BlockPruner, error) { + if amountReserved < MinBlockAmountReserved { + return nil, fmt.Errorf( + "block-amount-reserved %d is below the minimum safe value %d; "+ + "lowering this bound may break KCC consensus (FullImmutabilityThreshold=%d)", + amountReserved, MinBlockAmountReserved, params.FullImmutabilityThreshold) + } + return &BlockPruner{ + db: db, + amountReserved: amountReserved, + }, nil +} + +// Prune removes historical block data from the ancient store, keeping only +// the most recent amountReserved blocks. It is safe to call on a database that +// has been pruned before; in that case it only advances the tail further. +// +// The caller is responsible for ensuring that no other process is writing to +// the database while Prune runs (i.e., geth must be stopped). +func (p *BlockPruner) Prune() error { + start := time.Now() + + // Resolve the current chain head from the key-value store. We use the + // head header rather than the head block because only the header is + // guaranteed to be present for all full nodes (the body may be missing + // on light-ish configurations). + headHash := rawdb.ReadHeadHeaderHash(p.db) + if headHash == (common.Hash{}) { + return errors.New("failed to read head header hash from database") + } + headNumber := rawdb.ReadHeaderNumber(p.db, headHash) + if headNumber == nil { + return fmt.Errorf("failed to read head header number for hash %s", headHash.Hex()) + } + head := *headNumber + + // Read the current ancient store state. + ancients, err := p.db.Ancients() + if err != nil { + return fmt.Errorf("failed to read ancients count: %w", err) + } + oldTail, err := p.db.Tail() + if err != nil { + return fmt.Errorf("failed to read ancient tail: %w", err) + } + + // The chain must have at least amountReserved blocks for pruning to make + // any sense. Refuse to touch chains that are too young. + if head+1 < p.amountReserved { + return fmt.Errorf( + "chain head %d is below reserved window %d; nothing to prune", + head, p.amountReserved) + } + + // Compute the new tail position. We want to keep the last amountReserved + // blocks of the chain. Since the most recent blocks are typically still + // in the kv store (not yet moved to ancient), the tail we can actually + // advance to is clipped to the number of frozen items. + newTail := head - p.amountReserved + 1 + if newTail > ancients { + newTail = ancients + } + + if newTail <= oldTail { + log.Info("Nothing to prune; ancient tail already at or ahead of target", + "currentTail", oldTail, + "target", newTail, + "ancients", ancients, + "head", head) + return nil + } + + log.Info("Block pruning plan", + "head", head, + "ancients", ancients, + "currentTail", oldTail, + "newTail", newTail, + "blocksToDelete", newTail-oldTail, + "amountReserved", p.amountReserved) + + // Perform the in-place tail truncation on the ancient store. This is a + // local operation that drops data files on disk once the truncated range + // spans an entire file (2 GiB per file by default), and hides partial + // files using the freezer's itemHidden metadata. Either way, reads for + // items below newTail will fail after this call. + if err := p.db.TruncateTail(newTail); err != nil { + return fmt.Errorf("failed to truncate ancient tail to %d: %w", newTail, err) + } + if err := p.db.Sync(); err != nil { + return fmt.Errorf("failed to sync ancient store after truncation: %w", err) + } + log.Info("Ancient tail truncated", "newTail", newTail, "blocksDeleted", newTail-oldTail) + + // Make sure the transaction index tail is not pointing below the new + // ancient tail. Otherwise, when the node starts with --txlookuplimit, + // the background indexer would try to walk pruned block bodies and + // crash. + if tail := rawdb.ReadTxIndexTail(p.db); tail == nil || *tail < newTail { + rawdb.WriteTxIndexTail(p.db, newTail) + log.Info("Updated transaction index tail", "tail", newTail) + } + + // Run a full-range LevelDB compaction. This is not strictly required for + // correctness -- the ancient truncation already reclaimed most of the + // disk space -- but it defragments the LSM tree and releases any lingering + // tombstones from previous freezer-side operations. Mirrors the tail of + // snapshot prune-state for consistency. + log.Info("Compacting database; this may take a while") + cstart := time.Now() + for b := 0x00; b <= 0xf0; b += 0x10 { + var ( + rs = []byte{byte(b)} + re = []byte{byte(b + 0x10)} + ) + if b == 0xf0 { + re = nil + } + log.Info("Compacting database", + "range", fmt.Sprintf("%#x-%#x", rs, re), + "elapsed", common.PrettyDuration(time.Since(cstart))) + if err := p.db.Compact(rs, re); err != nil { + log.Error("Database compaction failed", "err", err) + return err + } + } + log.Info("Database compaction finished", "elapsed", common.PrettyDuration(time.Since(cstart))) + + log.Info("Block pruning successful", + "blocksDeleted", newTail-oldTail, + "newTail", newTail, + "elapsed", common.PrettyDuration(time.Since(start))) + return nil +}