Skip to content

Commit 607506e

Browse files
committed
add localStateConsistencyReport - phase 1
1 parent 1d6b52a commit 607506e

5 files changed

Lines changed: 341 additions & 4 deletions

File tree

src/config/server.ts

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -242,7 +242,8 @@ const SERVER_CONFIG: StrictServerConfiguration = {
242242
robustQueryDebug: false,
243243
forwardTXToSyncingNeighbors: false,
244244
recordAcceptedTx: false,
245-
recordAccountStates: false,
245+
recordAccountStates: true,
246+
printStateConsistencyOnExit: false, // whether to include local state consistency report in exit log
246247
useShardusMemoryPatterns: true,
247248
sanitizeInput: false,
248249
checkTxGroupChanges: true,

src/debug/debug.ts

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@ import * as Context from '../p2p/Context'
44
import zlib from 'zlib'
55
import Trie from 'trie-prefix-tree'
66
import { isDebugModeMiddleware, isDebugModeMiddlewareMedium } from '../network/debugMiddleware'
7+
import { localStateConsistencyReport } from './stateConsistency'
78
import { nestedCountersInstance } from '../utils/nestedCounters'
89
import { logFlags } from '../logger'
910
import * as ProblemNodeHandler from '../p2p/ProblemNodeHandler'
@@ -322,6 +323,30 @@ class Debug {
322323
res.json({ success: false, error: e.message })
323324
}
324325
})
326+
327+
this.network.registerExternalGet('state-consistency', isDebugModeMiddleware, async (req, res) => {
328+
try {
329+
const rps =
330+
req.query.recordsPerSecond && typeof req.query.recordsPerSecond === 'string'
331+
? Number(req.query.recordsPerSecond)
332+
: 5000
333+
const summaryOnly = req.query.summaryOnly === 'true'
334+
const onlyMismatch = req.query.onlyMismatch !== 'false' // default true
335+
336+
const report = await localStateConsistencyReport({
337+
recordsPerSecond: rps,
338+
summaryOnly,
339+
onlyMismatch,
340+
maxChunks: 4, // Very limited chunks for fast response
341+
})
342+
343+
res.json(report)
344+
} catch (e) {
345+
const errorMessage = e instanceof Error ? e.message : String(e)
346+
res.status(500).json({ success: false, error: errorMessage })
347+
}
348+
})
349+
325350
//NEVER EVER RELEASE THIS... can only uncommment for test branches
326351
// this.network.registerExternalGet('unsafe_unlock', (req, res) => {
327352
// try {

src/debug/stateConsistency.ts

Lines changed: 294 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,294 @@
1+
import * as Context from '../p2p/Context'
2+
import { sleep } from '../utils/functions/time'
3+
4+
/**
5+
* Options for localStateConsistencyReport
6+
*/
7+
export interface ConsistencyOptions {
8+
/** desired maximum records processed per second */
9+
recordsPerSecond?: number
10+
/** future feature – restrict to consensus ranges only */
11+
consensusRangeOnly?: boolean
12+
/** if true only the summary is returned */
13+
summaryOnly?: boolean
14+
/** if true only mismatching accounts are returned */
15+
onlyMismatch?: boolean
16+
/** limit number of chunks to process (default 256) */
17+
maxChunks?: number
18+
}
19+
20+
/**
21+
* Individual account comparison result
22+
*/
23+
export interface AccountConsistencyResult {
24+
accountId: string
25+
cache?: { hash: string; timestamp: number }
26+
trie?: { hash: string }
27+
storage?: { hash: string; timestamp: number }
28+
// pair-wise comparisons (t = timestamp, h = hash)
29+
cth: boolean
30+
ctt: boolean
31+
csh: boolean
32+
cst: boolean
33+
tsh: boolean
34+
tst: boolean
35+
}
36+
37+
/**
38+
* Summary counters
39+
*/
40+
export interface ConsistencySummary {
41+
totalAccounts: number
42+
matchingAll: number
43+
mismatching: number
44+
// counters for mismatch permutations
45+
cthFalse: number
46+
cttFalse: number
47+
cshFalse: number
48+
cstFalse: number
49+
tshFalse: number
50+
tstFalse: number
51+
// chunk statistics
52+
totalChunks: number
53+
chunksProcessed: number
54+
totalTimeMs: number
55+
averageTimePerChunk: number
56+
recordsPerSecondActual: number
57+
}
58+
59+
/**
60+
* Generates 256 address ranges split by the first two hex characters
61+
*/
62+
function generate256Chunks(): { low: string; high: string; prefix: string }[] {
63+
const chunks = []
64+
for (let i = 0; i < 256; i++) {
65+
const prefix = i.toString(16).padStart(2, '0')
66+
chunks.push({
67+
prefix,
68+
low: prefix + '0'.repeat(62), // 64-char address
69+
high: prefix + 'f'.repeat(62),
70+
})
71+
}
72+
return chunks
73+
}
74+
75+
76+
77+
/**
78+
* Main diagnostic function – scans cache, trie and storage and produces a report
79+
*/
80+
export async function localStateConsistencyReport(opts: ConsistencyOptions = {}): Promise<{
81+
summary: ConsistencySummary
82+
details: AccountConsistencyResult[]
83+
}> {
84+
const recordsPerSecond = opts.recordsPerSecond ?? 5000 // default fairly fast
85+
const reportStartTime = Date.now()
86+
const summary: ConsistencySummary = {
87+
totalAccounts: 0,
88+
matchingAll: 0,
89+
mismatching: 0,
90+
cthFalse: 0,
91+
cttFalse: 0,
92+
cshFalse: 0,
93+
cstFalse: 0,
94+
tshFalse: 0,
95+
tstFalse: 0,
96+
totalChunks: 0,
97+
chunksProcessed: 0,
98+
totalTimeMs: 0,
99+
averageTimePerChunk: 0,
100+
recordsPerSecondActual: 0,
101+
}
102+
103+
const details: AccountConsistencyResult[] = []
104+
105+
const stateManager = Context.stateManager
106+
if (!stateManager) {
107+
throw new Error('stateManager not initialised yet')
108+
}
109+
110+
const cacheMapGlobal = stateManager.accountCache?.accountsHashCache3?.accountHashMap
111+
const trie = stateManager.accountPatcher
112+
const storage = Context.storage
113+
const crypto = Context.crypto
114+
115+
// Debug: Test if storage has ANY account states at all
116+
if (storage && typeof (storage as any).queryAccountStateTable === 'function') {
117+
try {
118+
// eslint-disable-next-line @typescript-eslint/no-explicit-any
119+
const testQuery = await (storage as any).queryAccountStateTable('0', 'f', 0, Date.now(), 10)
120+
console.log(`Debug: Storage test query returned ${testQuery?.length || 0} total account states`)
121+
} catch (e) {
122+
console.log('Debug: Storage test query failed:', (e as Error).message)
123+
}
124+
}
125+
126+
const allChunks = generate256Chunks()
127+
const maxChunks = opts.maxChunks || 256
128+
const chunks = allChunks.slice(0, maxChunks)
129+
summary.totalChunks = chunks.length
130+
131+
for (let i = 0; i < chunks.length; i++) {
132+
const chunk = chunks[i]
133+
const chunkStartTime = Date.now()
134+
135+
const cacheMap: Map<string, { hash: string; timestamp: number }> = new Map()
136+
const trieMap: Map<string, { hash: string }> = new Map()
137+
const storageMap: Map<string, { hash: string; timestamp: number }> = new Map()
138+
139+
// 1) CACHE – iterate global map once per chunk (plain loop for perf)
140+
if (cacheMapGlobal) {
141+
for (const [accountId, history] of cacheMapGlobal) {
142+
if (accountId.startsWith(chunk.prefix)) {
143+
const latest = history.accountHashList?.[0]
144+
if (latest) {
145+
cacheMap.set(accountId, { hash: latest.h, timestamp: latest.t })
146+
}
147+
}
148+
}
149+
}
150+
151+
// 2) TRIE – iterate leaf nodes whose radix share the prefix
152+
if (trie?.shardTrie?.layerMaps) {
153+
const leafDepth = trie.treeMaxDepth ?? 4
154+
const leafLayer = trie.shardTrie.layerMaps[leafDepth]
155+
if (leafLayer) {
156+
for (const [radix, node] of leafLayer) {
157+
if (!radix.startsWith(chunk.prefix)) continue
158+
// accountTempMap is preferred (contains most recent hashes)
159+
const acctMap = node.accountTempMap ?? null
160+
if (acctMap) {
161+
for (const [accId, acc] of acctMap) {
162+
if (accId.startsWith(chunk.prefix)) {
163+
trieMap.set(accId, { hash: acc.hash })
164+
}
165+
}
166+
}
167+
// Fall back to static accounts array if present
168+
if (node.accounts) {
169+
for (const acc of node.accounts) {
170+
// eslint-disable-next-line security/detect-object-injection
171+
const accountId = acc.accountID as string
172+
if (accountId.startsWith(chunk.prefix)) {
173+
trieMap.set(accountId, { hash: acc.hash })
174+
}
175+
}
176+
}
177+
}
178+
}
179+
}
180+
181+
// Build union set and prepare for DB query
182+
const unionIds: string[] = Array.from(new Set<string>([...cacheMap.keys(), ...trieMap.keys()]))
183+
184+
// 3) STORAGE – fetch newest state row per account in batches of 800 to stay below SQLite parameter limits
185+
const batchSize = 800
186+
let storageErrors = 0
187+
for (let i = 0; i < unionIds.length; i += batchSize) {
188+
const slice = unionIds.slice(i, i + batchSize)
189+
try {
190+
if (!storage) {
191+
throw new Error('Storage not initialized')
192+
}
193+
// storage.queryAccountStateTableByListNewest returns rows with accountId, txTimestamp, stateAfter
194+
// eslint-disable-next-line @typescript-eslint/no-explicit-any
195+
const rows: any[] = await (storage as any).queryAccountStateTableByListNewest(slice)
196+
if (rows && Array.isArray(rows)) {
197+
for (const row of rows) {
198+
if (row && row.stateAfter && row.accountId && row.txTimestamp) {
199+
const rowHash = crypto.hash(row.stateAfter)
200+
storageMap.set(row.accountId, { hash: rowHash, timestamp: Number(row.txTimestamp) })
201+
}
202+
}
203+
} else if (slice.length > 0) {
204+
// Log when we query for accounts but get no results - this helps debug the issue
205+
console.log(`Storage query returned no data for ${slice.length} accounts in chunk ${chunk.prefix}. Sample account: ${slice[0]?.substring(0, 8)}...`)
206+
}
207+
} catch (e) {
208+
storageErrors++
209+
const logger = Context.logger?.getLogger('stateConsistency')
210+
if (logger) {
211+
logger.warn(`Storage query failed for chunk ${chunk.prefix}, batch ${Math.floor(i/batchSize)}: ${(e as Error).message}`)
212+
}
213+
// Continue processing other batches
214+
}
215+
}
216+
217+
// Log storage errors if any occurred
218+
if (storageErrors > 0) {
219+
const logger = Context.logger?.getLogger('stateConsistency')
220+
if (logger) {
221+
logger.warn(`Total storage query errors for chunk ${chunk.prefix}: ${storageErrors}`)
222+
}
223+
}
224+
225+
// 4) Compare
226+
const allIds = new Set<string>([...unionIds, ...storageMap.keys()])
227+
for (const accountId of allIds) {
228+
const c = cacheMap.get(accountId)
229+
const t = trieMap.get(accountId)
230+
const s = storageMap.get(accountId)
231+
232+
const res: AccountConsistencyResult = {
233+
accountId,
234+
cache: c,
235+
trie: t,
236+
storage: s,
237+
cth: c && t ? c.hash === t.hash : false,
238+
ctt: false, // trie doesn't have timestamp in v1
239+
csh: c && s ? c.hash === s.hash : false,
240+
cst: c && s ? c.timestamp === s.timestamp : false,
241+
tsh: t && s ? t.hash === s.hash : false,
242+
tst: false, // trie timestamp not tracked in v1
243+
}
244+
245+
summary.totalAccounts++
246+
247+
const allMatch = res.cth && res.csh && res.tsh && res.cst // check all hash comparisons plus cache-storage timestamp
248+
249+
if (allMatch) {
250+
summary.matchingAll++
251+
} else {
252+
summary.mismatching++
253+
if (!res.cth) summary.cthFalse++
254+
if (!res.ctt) summary.cttFalse++
255+
if (!res.csh) summary.cshFalse++
256+
if (!res.cst) summary.cstFalse++
257+
if (!res.tsh) summary.tshFalse++
258+
if (!res.tst) summary.tstFalse++
259+
}
260+
261+
// Add to details if requested
262+
if (!opts.onlyMismatch || !allMatch) {
263+
if (!opts.summaryOnly) details.push(res)
264+
}
265+
}
266+
267+
// 5) Rate limiting and pacing
268+
const elapsed = Date.now() - chunkStartTime
269+
const recordsProcessed = allIds.size
270+
const targetTimeMs = recordsProcessed > 0 ? (recordsProcessed / recordsPerSecond) * 1000 : 0
271+
const waitFor = Math.max(10, targetTimeMs - elapsed)
272+
273+
if (recordsProcessed > 0) {
274+
await sleep(waitFor)
275+
} else {
276+
await sleep(50) // minimum sleep when no records processed
277+
}
278+
279+
summary.chunksProcessed++
280+
}
281+
282+
// Calculate final statistics
283+
const reportEndTime = Date.now()
284+
summary.totalTimeMs = reportEndTime - reportStartTime
285+
summary.averageTimePerChunk = summary.chunksProcessed > 0 ? summary.totalTimeMs / summary.chunksProcessed : 0
286+
summary.recordsPerSecondActual = summary.totalAccounts > 0 && summary.totalTimeMs > 0 ?
287+
(summary.totalAccounts / summary.totalTimeMs) * 1000 : 0
288+
289+
return {
290+
summary,
291+
details: opts.summaryOnly ? [] : details,
292+
}
293+
}
294+

src/exit-handler/index.ts

Lines changed: 18 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -94,7 +94,7 @@ class ExitHandler {
9494
this.exited = true
9595
this._cleanupSync()
9696
try {
97-
this.runExitLog(true, exitType, message)
97+
await this.runExitLog(true, exitType, message)
9898
await this._cleanupAsync()
9999
} catch (e) {
100100
console.error(e)
@@ -109,7 +109,7 @@ class ExitHandler {
109109
this._cleanupSync()
110110

111111
try {
112-
this.runExitLog(false, exitType, message)
112+
await this.runExitLog(false, exitType, message)
113113
await this._cleanupAsync()
114114
} catch (e) {
115115
console.error(e)
@@ -118,7 +118,7 @@ class ExitHandler {
118118
process.exit(1) // exiting with status 1 causes our modified PM2 to not restart the process
119119
}
120120

121-
runExitLog(isCleanExit: boolean, exitType: string, msg: string) {
121+
async runExitLog(isCleanExit: boolean, exitType: string, msg: string) {
122122
this.exitLogger.fatal(`isCleanExit: ${isCleanExit} exitType: ${exitType} msg: ${msg}`)
123123
let log: string[] = []
124124
const fakeStream = {
@@ -148,6 +148,21 @@ class ExitHandler {
148148
profilerInstance.scopedProfileSectionEnd('counts')
149149
this.exitLogger.fatal(log.join(''))
150150

151+
// ----------- State consistency report (optional) ------------
152+
try {
153+
const cfg = Context.config?.debug
154+
if (cfg?.printStateConsistencyOnExit) {
155+
const { localStateConsistencyReport } = await import('../debug/stateConsistency')
156+
const rep = await localStateConsistencyReport({ summaryOnly: false, onlyMismatch: true })
157+
this.exitLogger.fatal('State consistency summary:\n' + JSON.stringify(rep.summary, null, 2))
158+
if (rep.details && rep.details.length > 0) {
159+
this.exitLogger.fatal('State consistency mismatches:\n' + JSON.stringify(rep.details, null, 2))
160+
}
161+
}
162+
} catch (e) {
163+
this.exitLogger.fatal('Error generating state consistency report: ' + e.message)
164+
}
165+
151166
this.writeExitSummary(isCleanExit, exitType, msg)
152167
}
153168

0 commit comments

Comments
 (0)