diff --git a/src/config/server.ts b/src/config/server.ts index 2739d40cb..53a883b6e 100644 --- a/src/config/server.ts +++ b/src/config/server.ts @@ -382,6 +382,7 @@ const SERVER_CONFIG: StrictServerConfiguration = { checkDestLimits: true, checkDestLimitCount: 5, globalAccountsReceiptInitiationTimeout: 5000, // 5 seconds default timeout + enableLocalStateConsistencyReportOnExit: false, // Enable printing consistency report to exit log }, sharding: { nodesPerConsensusGroup: 5, nodesPerEdge: 2, executeInOneShard: false }, mode: ServerMode.Release, diff --git a/src/exit-handler/index.ts b/src/exit-handler/index.ts index 3463c89b4..5eaa0670e 100644 --- a/src/exit-handler/index.ts +++ b/src/exit-handler/index.ts @@ -28,6 +28,7 @@ interface ExitHandler { activeStartTime: number lastActiveTime: number lastRotationIndex: { idx: number; total: number } + stateManager?: any // Reference to StateManager for consistency report } class ExitHandler { @@ -94,7 +95,7 @@ class ExitHandler { this.exited = true this._cleanupSync() try { - this.runExitLog(true, exitType, message) + await this.runExitLog(true, exitType, message) await this._cleanupAsync() } catch (e) { console.error(e) @@ -109,7 +110,7 @@ class ExitHandler { this._cleanupSync() try { - this.runExitLog(false, exitType, message) + await this.runExitLog(false, exitType, message) await this._cleanupAsync() } catch (e) { console.error(e) @@ -118,7 +119,7 @@ class ExitHandler { process.exit(1) // exiting with status 1 causes our modified PM2 to not restart the process } - runExitLog(isCleanExit: boolean, exitType: string, msg: string) { + async runExitLog(isCleanExit: boolean, exitType: string, msg: string) { this.exitLogger.fatal(`isCleanExit: ${isCleanExit} exitType: ${exitType} msg: ${msg}`) let log: string[] = [] const fakeStream = { @@ -148,6 +149,51 @@ class ExitHandler { profilerInstance.scopedProfileSectionEnd('counts') this.exitLogger.fatal(log.join('')) + // Add consistency report to exit log if enabled + if ( + this.stateManager && + this.stateManager.config && + this.stateManager.config.stateManager && + this.stateManager.config.stateManager.enableLocalStateConsistencyReportOnExit + ) { + this.exitLogger.fatal('=== LOCAL STATE CONSISTENCY REPORT (EXIT) ===') + + try { + // Generate the full consistency report synchronously for exit log + if (this.stateManager.accountPatcher) { + const report = await this.stateManager.accountPatcher.localStateConsistencyReport({ + recordsPerSecond: 1000, // Higher speed for exit log + consensusRangeOnly: false, + }) + + // Write summary to exit log + this.exitLogger.fatal(`Total accounts processed: ${report.summary.totalAccounts}`) + this.exitLogger.fatal(`Fully matching accounts: ${report.summary.fullyMatching}`) + + if (report.summary.totalAccounts > 0) { + const totalMismatches = report.summary.totalAccounts - report.summary.fullyMatching + if (totalMismatches > 0) { + this.exitLogger.fatal(`Total mismatches found: ${totalMismatches}`) + this.exitLogger.fatal(`Cache-Trie hash matches: ${report.summary.cth}/${report.summary.totalAccounts}`) + this.exitLogger.fatal( + `Cache-Storage timestamp matches: ${report.summary.cst}/${report.summary.totalAccounts}` + ) + this.exitLogger.fatal(`Cache-Storage hash matches: ${report.summary.csh}/${report.summary.totalAccounts}`) + this.exitLogger.fatal(`Trie-Storage hash matches: ${report.summary.tsh}/${report.summary.totalAccounts}`) + } else { + this.exitLogger.fatal('All accounts are fully consistent across cache, trie, and storage!') + } + } + } else { + this.exitLogger.fatal('AccountPatcher not available for exit report') + } + } catch (error) { + this.exitLogger.fatal(`Error generating exit consistency report: ${error}`) + } + + this.exitLogger.fatal('=== END STATE CONSISTENCY REPORT ===') + } + this.writeExitSummary(isCleanExit, exitType, msg) } diff --git a/src/shardus/shardus-types.ts b/src/shardus/shardus-types.ts index a1711b797..51d8eebfb 100644 --- a/src/shardus/shardus-types.ts +++ b/src/shardus/shardus-types.ts @@ -1426,7 +1426,9 @@ export interface ServerConfiguration { // how many times can this destination address show up in the queue before we avoid sending to it checkDestLimitCount: number // timeout for global accounts receipt initiation - globalAccountsReceiptInitiationTimeout?: number + globalAccountsReceiptInitiationTimeout: number + /** Enable printing local state consistency report to exit log */ + enableLocalStateConsistencyReportOnExit?: boolean } /** Options for sharding calculations */ sharding?: { diff --git a/src/state-manager/AccountPatcher.ts b/src/state-manager/AccountPatcher.ts index 7f99c8932..cc403abb4 100644 --- a/src/state-manager/AccountPatcher.ts +++ b/src/state-manager/AccountPatcher.ts @@ -1714,6 +1714,100 @@ class AccountPatcher { res.write(`1.0.1\n`) res.end() }) + + /** + * Local State Consistency Report endpoint + * + * Usage: http://:/local-state-consistency?recordsPerSecond=50&summaryOnly=false&onlyMismatch=true + */ + Context.network.registerExternalGet('local-state-consistency', isDebugModeMiddleware, async (req, res) => { + try { + const recordsPerSecond = parseInt(req.query.recordsPerSecond as string) || 50 + const summaryOnly = (req.query.summaryOnly as string) === 'true' + const onlyMismatch = (req.query.onlyMismatch as string) !== 'false' // default true + + res.write(`Starting local state consistency report...\n`) + res.write(`Records per second: ${recordsPerSecond}\n`) + res.write(`Summary only: ${summaryOnly}\n`) + res.write(`Only mismatches: ${onlyMismatch}\n\n`) + + const startTime = shardusGetTime() + const report = await this.localStateConsistencyReport({ + recordsPerSecond, + consensusRangeOnly: false, // Phase 1: not used + }) + const endTime = shardusGetTime() + const totalTimeMs = endTime - startTime + + // Always write summary + res.write('=== SUMMARY ===\n') + res.write(`Total execution time: ${totalTimeMs}ms\n`) + res.write(`Total accounts processed: ${report.summary.totalAccounts}\n`) + res.write(`Fully matching accounts: ${report.summary.fullyMatching}\n`) + res.write(`Cache-Trie hash matches: ${report.summary.cth}\n`) + res.write(`Cache-Storage timestamp matches: ${report.summary.cst}\n`) + res.write(`Cache-Storage hash matches: ${report.summary.csh}\n`) + res.write(`Trie-Storage hash matches: ${report.summary.tsh}\n\n`) + + res.write('=== CHUNK PERFORMANCE ===\n') + for (const chunk of report.summary.chunks) { + if (chunk.recordsProcessed > 0) { + res.write( + `Chunk ${chunk.chunkIndex}: ${chunk.recordsProcessed} records in ${ + chunk.timeSpentMs + }ms (${chunk.low.substring(0, 4)}...)\n` + ) + } + } + res.write('\n') + + if (!summaryOnly && report.accounts) { + res.write('=== ACCOUNT DETAILS ===\n') + + let accountsToShow = report.accounts + if (onlyMismatch) { + accountsToShow = report.accounts.filter( + (account) => !account.cth || !account.cst || !account.csh || !account.tsh + ) + } + + if (accountsToShow.length === 0) { + res.write('No accounts to display (all match or no mismatches found)\n') + } else { + res.write(`Showing ${accountsToShow.length} accounts:\n\n`) + for (const account of accountsToShow) { + res.write(`Account: ${account.accountId}\n`) + if (account.cache) { + res.write( + ` Cache: hash=${account.cache.hash.substring(0, 8)}... timestamp=${account.cache.timestamp}\n` + ) + } else { + res.write(` Cache: [missing]\n`) + } + if (account.trie) { + res.write(` Trie: hash=${account.trie.hash.substring(0, 8)}...\n`) + } else { + res.write(` Trie: [missing]\n`) + } + if (account.storage) { + res.write( + ` Storage: hash=${account.storage.hash.substring(0, 8)}... timestamp=${account.storage.timestamp}\n` + ) + } else { + res.write(` Storage: [missing]\n`) + } + res.write(` Matches: cth=${account.cth} cst=${account.cst} csh=${account.csh} tsh=${account.tsh}\n\n`) + } + } + } + + res.write('=== REPORT COMPLETE ===\n') + } catch (error) { + res.write(`Error generating consistency report: ${error}\n`) + /* prettier-ignore */ if (logFlags.error) this.mainLogger.error(`local-state-consistency endpoint error: ${error}`) + } + res.end() + }) } getAccountTreeInfo(accountID: string): TrieAccount { @@ -4608,6 +4702,273 @@ class AccountPatcher { minVotes = Math.max(1, minVotes) return minVotes } + + /** + * Generate 256 address range chunks for iteration + * Each chunk covers 1/256th of the address space using the first two hex digits + */ + generateAddressChunks(): { low: string; high: string }[] { + const chunks: { low: string; high: string }[] = [] + + for (let i = 0; i < 256; i++) { + const prefix = i.toString(16).padStart(2, '0') + const low = prefix + '0'.repeat(62) + const high = prefix + 'f'.repeat(62) + chunks.push({ low, high }) + } + + return chunks + } + + /** + * Perform exhaustive analysis of accountsCache, sharded hash trie state, and stored state + * Phase 1: Full address range, non-rotating nodes only + */ + async localStateConsistencyReport(options: { + recordsPerSecond: number + consensusRangeOnly?: boolean // not used in phase 1 + }): Promise<{ + summary: { + totalAccounts: number + fullyMatching: number + cth: number // cache-trie hash matches + cst: number // cache-storage timestamp matches + csh: number // cache-storage hash matches + tsh: number // trie-storage hash matches + chunks: Array<{ + chunkIndex: number + recordsProcessed: number + timeSpentMs: number + low: string + high: string + }> + } + accounts?: Array<{ + accountId: string + cache?: { hash: string; timestamp: number } + trie?: { hash: string } + storage?: { hash: string; timestamp: number } + cth: boolean + cst: boolean + csh: boolean + tsh: boolean + }> + }> { + const chunks = this.generateAddressChunks() + + // Initialize summary counters + const summary = { + totalAccounts: 0, + fullyMatching: 0, + cth: 0, // cache-trie hash matches + cst: 0, // cache-storage timestamp matches + csh: 0, // cache-storage hash matches + tsh: 0, // trie-storage hash matches + chunks: [] as Array<{ + chunkIndex: number + recordsProcessed: number + timeSpentMs: number + low: string + high: string + }>, + } + + const allAccountResults: Array<{ + accountId: string + cache?: { hash: string; timestamp: number } + trie?: { hash: string } + storage?: { hash: string; timestamp: number } + cth: boolean + cst: boolean + csh: boolean + tsh: boolean + }> = [] + + const minWaitTime = 10 // minimum wait time in ms + + // Process each chunk + for (let chunkIndex = 0; chunkIndex < chunks.length; chunkIndex++) { + const chunk = chunks[chunkIndex] + const chunkStartTime = shardusGetTime() + + // Maps to store data from each structure for this chunk + const cacheData = new Map() + const trieData = new Map() + const storageData = new Map() + const allAccountIds = new Set() + + // 1. Collect data from cache for this chunk range + // Iterate through accountsCache + for (const [accountId, accountHistory] of this.stateManager.accountCache.accountsHashCache3.accountHashMap) { + if (accountId >= chunk.low && accountId <= chunk.high) { + if (accountHistory.accountHashList.length > 0) { + const latestEntry = accountHistory.accountHashList[0] // newest is at index 0 + cacheData.set(accountId, { hash: latestEntry.h, timestamp: latestEntry.t }) + allAccountIds.add(accountId) + } + } + } + + // 2. Collect data from trie for this chunk range + // For each chunk (2 chars), we need to check all trie nodes that fall within that range + // Since treeMaxDepth=4, we need to check all combinations of the remaining 2 characters + const chunkPrefix2 = chunk.low.substring(0, 2) // First 2 chars of chunk (e.g., "00") + + if (this.treeMaxDepth >= 2) { + // Generate all possible radix combinations for this chunk + const remainingDepth = this.treeMaxDepth - 2 + const numCombinations = Math.pow(16, remainingDepth) + + for (let i = 0; i < numCombinations; i++) { + // Convert i to hex with appropriate padding + const suffix = i.toString(16).padStart(remainingDepth, '0') + const fullRadix = chunkPrefix2 + suffix + + const trieNode = this.shardTrie.layerMaps[this.treeMaxDepth].get(fullRadix) + + if (trieNode) { + // Prefer accountTempMap for performance, fall back to accounts array + if (trieNode.accountTempMap && trieNode.accountTempMap.size > 0) { + for (const [accountId, trieAccount] of trieNode.accountTempMap) { + if (accountId >= chunk.low && accountId <= chunk.high) { + trieData.set(accountId, { hash: trieAccount.hash }) + allAccountIds.add(accountId) + } + } + } else if (trieNode.accounts && trieNode.accounts.length > 0) { + for (const trieAccount of trieNode.accounts) { + if (trieAccount.accountID >= chunk.low && trieAccount.accountID <= chunk.high) { + trieData.set(trieAccount.accountID, { hash: trieAccount.hash }) + allAccountIds.add(trieAccount.accountID) + } + } + } + } + } + } else { + // If treeMaxDepth < 2, just use the chunk prefix directly + const trieNode = this.shardTrie.layerMaps[this.treeMaxDepth].get(chunkPrefix2.substring(0, this.treeMaxDepth)) + + if (trieNode) { + // Prefer accountTempMap for performance, fall back to accounts array + if (trieNode.accountTempMap && trieNode.accountTempMap.size > 0) { + for (const [accountId, trieAccount] of trieNode.accountTempMap) { + if (accountId >= chunk.low && accountId <= chunk.high) { + trieData.set(accountId, { hash: trieAccount.hash }) + allAccountIds.add(accountId) + } + } + } else if (trieNode.accounts && trieNode.accounts.length > 0) { + for (const trieAccount of trieNode.accounts) { + if (trieAccount.accountID >= chunk.low && trieAccount.accountID <= chunk.high) { + trieData.set(trieAccount.accountID, { hash: trieAccount.hash }) + allAccountIds.add(trieAccount.accountID) + } + } + } + } + } + + // 3. Collect data from storage for this chunk range + try { + const storedAccounts = await this.app.getAccountDataByRange( + chunk.low, + chunk.high, + 0, // tsStart + shardusGetTime(), // tsEnd + 10000, // maxRecords - large number for chunk + 0, // offset + '' // accountOffset + ) + + for (const account of storedAccounts) { + storageData.set(account.accountId, { + hash: account.stateId, + timestamp: account.timestamp, + }) + allAccountIds.add(account.accountId) + } + } catch (error) { + /* prettier-ignore */ if (logFlags.error) this.mainLogger.error(`localStateConsistencyReport: Error fetching storage data for chunk ${chunkIndex}: ${error}`) + } + + // 4. Compare data across structures for each account + let chunkRecordsProcessed = 0 + for (const accountId of allAccountIds) { + const cache = cacheData.get(accountId) + const trie = trieData.get(accountId) + const storage = storageData.get(accountId) + + // Calculate comparison flags + const cth = cache && trie ? cache.hash === trie.hash : false + const cst = cache && storage ? cache.timestamp === storage.timestamp : false + const csh = cache && storage ? cache.hash === storage.hash : false + const tsh = trie && storage ? trie.hash === storage.hash : false + + // Update summary counters + summary.totalAccounts++ + if (cth) summary.cth++ + if (cst) summary.cst++ + if (csh) summary.csh++ + if (tsh) summary.tsh++ + + // Check if fully matching (all present and all hash matches) + if (cache && trie && storage && cth && cst && csh && tsh) { + summary.fullyMatching++ + } + + // Store account result + allAccountResults.push({ + accountId, + cache, + trie, + storage, + cth, + cst, + csh, + tsh, + }) + + chunkRecordsProcessed++ + } + + const chunkEndTime = shardusGetTime() + const chunkTimeSpent = chunkEndTime - chunkStartTime + + // Record chunk stats + summary.chunks.push({ + chunkIndex, + recordsProcessed: chunkRecordsProcessed, + timeSpentMs: chunkTimeSpent, + low: chunk.low, + high: chunk.high, + }) + + // Rate limiting logic + if (chunkRecordsProcessed > 0) { + const recordsPerMs = chunkRecordsProcessed / chunkTimeSpent + const targetRecordsPerMs = options.recordsPerSecond / 1000 + + if (recordsPerMs > targetRecordsPerMs) { + // We're going too fast, calculate additional wait time + const idealTimeMs = chunkRecordsProcessed / targetRecordsPerMs + const additionalWaitMs = Math.max(minWaitTime, idealTimeMs - chunkTimeSpent) + await new Promise((resolve) => setTimeout(resolve, additionalWaitMs)) + } else { + // We're going slower than target, just wait the minimum + await new Promise((resolve) => setTimeout(resolve, minWaitTime)) + } + } else { + // No records processed, wait minimum time + await new Promise((resolve) => setTimeout(resolve, minWaitTime)) + } + } + + return { + summary, + accounts: allAccountResults, + } + } } type BadAccountStats = { diff --git a/src/state-manager/index.ts b/src/state-manager/index.ts index e6a2b8500..996e809a1 100644 --- a/src/state-manager/index.ts +++ b/src/state-manager/index.ts @@ -334,6 +334,71 @@ class StateManager { this.accountPatcher = new AccountPatcher(this, profiler, app, logger, p2p, crypto, config) this.cachedAppDataManager = new CachedAppDataManager(this, profiler, app, logger, crypto, p2p, config) + // Register exit handler for local state consistency report (only if enabled) + if (config.stateManager.enableLocalStateConsistencyReportOnExit) { + // Set reference to StateManager for exit handler access + this.shardus.exitHandler.stateManager = this + + this.shardus.exitHandler.registerAsync('stateConsistencyReport', async () => { + try { + /* prettier-ignore */ if (logFlags.console) console.log('Generating local state consistency report for exit log...') + const startTime = shardusGetTime() + const report = await this.accountPatcher.localStateConsistencyReport({ + recordsPerSecond: 1000, // Higher speed for exit log + consensusRangeOnly: false, + }) + const endTime = shardusGetTime() + + // Log summary to exit log - focus on differences only + this.logger.getLogger('main').info('=== LOCAL STATE CONSISTENCY REPORT (EXIT) ===') + this.logger.getLogger('main').info(`Total execution time: ${endTime - startTime}ms`) + this.logger.getLogger('main').info(`Total accounts processed: ${report.summary.totalAccounts}`) + this.logger.getLogger('main').info(`Fully matching accounts: ${report.summary.fullyMatching}`) + + if (report.summary.totalAccounts > 0) { + const totalMismatches = report.summary.totalAccounts - report.summary.fullyMatching + if (totalMismatches > 0) { + this.logger.getLogger('main').info(`Total mismatches found: ${totalMismatches}`) + this.logger + .getLogger('main') + .info(`Cache-Trie hash matches: ${report.summary.cth}/${report.summary.totalAccounts}`) + this.logger + .getLogger('main') + .info(`Cache-Storage timestamp matches: ${report.summary.cst}/${report.summary.totalAccounts}`) + this.logger + .getLogger('main') + .info(`Cache-Storage hash matches: ${report.summary.csh}/${report.summary.totalAccounts}`) + this.logger + .getLogger('main') + .info(`Trie-Storage hash matches: ${report.summary.tsh}/${report.summary.totalAccounts}`) + + // Log a few example mismatched accounts (first 5) + if (report.accounts) { + const mismatchedAccounts = report.accounts + .filter((account) => !account.cth || !account.cst || !account.csh || !account.tsh) + .slice(0, 5) + + for (const account of mismatchedAccounts) { + this.logger + .getLogger('main') + .info( + `Mismatch example: ${account.accountId.substring(0, 16)}... - Matches: cth=${account.cth} cst=${ + account.cst + } csh=${account.csh} tsh=${account.tsh}` + ) + } + } + } else { + this.logger.getLogger('main').info('All accounts are fully consistent across cache, trie, and storage!') + } + } + this.logger.getLogger('main').info('=== END STATE CONSISTENCY REPORT ===') + } catch (error) { + this.logger.getLogger('main').error(`Error generating exit consistency report: ${error}`) + } + }) + } + // feature controls. // this.oldFeature_TXHashsetTest = true // this.oldFeature_GeneratePartitionReport = false diff --git a/test/unit/src/network/debugMiddleware.test.ts b/test/unit/src/network/debugMiddleware.test.ts index 267a04c4d..95201f2af 100644 --- a/test/unit/src/network/debugMiddleware.test.ts +++ b/test/unit/src/network/debugMiddleware.test.ts @@ -446,6 +446,7 @@ describe('debugMiddleware', () => { // Use proper DevSecurityLevel values const mockPublicKeys = { owner1: DevSecurityLevel.High } mockGetDevPublicKeys.mockReturnValue(mockPublicKeys) + mockGetPublicKey.mockReturnValue('abcd1234') // Return full public key mockHash.mockReturnValue('hash-value') mockSafeStringify.mockReturnValue('{"stringified":"payload"}') mockVerify.mockReturnValue(false) diff --git a/test/unit/state-manager/AccountPatcher.test.ts b/test/unit/state-manager/AccountPatcher.test.ts index c83df4b08..c5b3435fb 100644 --- a/test/unit/state-manager/AccountPatcher.test.ts +++ b/test/unit/state-manager/AccountPatcher.test.ts @@ -540,7 +540,7 @@ describe('AccountPatcher', () => { ) // Verify it was called multiple times - expect(Context.network.registerExternalGet).toHaveBeenCalledTimes(14) + expect(Context.network.registerExternalGet).toHaveBeenCalledTimes(15) }) }) })