diff --git a/src/App/Program.fs b/src/App/Program.fs index cf84cc0e..52dbeb47 100644 --- a/src/App/Program.fs +++ b/src/App/Program.fs @@ -110,6 +110,8 @@ type MissionOptions pubnetParallelCatchupNumWorkers: int, tag: string option, numRuns: int option, + numPregeneratedTxs: int option, + genesisTestAccountCount: int option, catchupSkipKnownResultsForTesting: bool option ) = @@ -459,6 +461,16 @@ type MissionOptions Required = false)>] member self.NumRuns = numRuns + [] + member self.NumPregeneratedTxs = numPregeneratedTxs + + [] + member self.GenesisTestAccountCount = genesisTestAccountCount + [] @@ -577,6 +589,8 @@ let main argv = pubnetParallelCatchupNumWorkers = 128 tag = None numRuns = None + numPregeneratedTxs = None + genesisTestAccountCount = None enableTailLogging = true catchupSkipKnownResultsForTesting = None updateSorobanCosts = None } @@ -715,9 +729,11 @@ let main argv = pubnetParallelCatchupNumWorkers = mission.PubnetParallelCatchupNumWorkers tag = mission.Tag numRuns = mission.NumRuns + numPregeneratedTxs = mission.NumPregeneratedTxs enableTailLogging = true catchupSkipKnownResultsForTesting = mission.CatchupSkipKnownResultsForTesting - updateSorobanCosts = None } + updateSorobanCosts = None + genesisTestAccountCount = mission.GenesisTestAccountCount } allMissions.[m] missionContext diff --git a/src/FSLibrary.Tests/Tests.fs b/src/FSLibrary.Tests/Tests.fs index 9bb0b90e..9591ea52 100644 --- a/src/FSLibrary.Tests/Tests.fs +++ b/src/FSLibrary.Tests/Tests.fs @@ -117,9 +117,11 @@ let ctx : MissionContext = pubnetParallelCatchupNumWorkers = 128 tag = None numRuns = None + numPregeneratedTxs = None enableTailLogging = true catchupSkipKnownResultsForTesting = None - updateSorobanCosts = None } + updateSorobanCosts = None + genesisTestAccountCount = None } let netdata = __SOURCE_DIRECTORY__ + "/../../../data/public-network-data-2024-08-01.json" let pubkeys = __SOURCE_DIRECTORY__ + "/../../../data/tier1keys.json" diff --git a/src/FSLibrary/MaxTPSTest.fs b/src/FSLibrary/MaxTPSTest.fs index 0fdf3696..d6ab1737 100644 --- a/src/FSLibrary/MaxTPSTest.fs +++ b/src/FSLibrary/MaxTPSTest.fs @@ -132,35 +132,110 @@ let maxTPSTest (context: MissionContext) (baseLoadGen: LoadGen) (setupCfg: LoadG context.image (if context.flatQuorum.IsSome then context.flatQuorum.Value else false) + // PayPregenerated requires node restart between failed iterations to ensure validity of the pregenerated transactions + // However, large-scale simulation restarts can be slow, so for now only use the new mode on small networks + let lgMode = if List.length allNodes > 30 then GeneratePaymentLoad else PayPregenerated + let baseLoadGen = { baseLoadGen with mode = lgMode } + + let context = + { context with + genesisTestAccountCount = + if baseLoadGen.mode = PayPregenerated then + Some(context.genesisTestAccountCount |> Option.defaultValue 500000) + else + None + numPregeneratedTxs = + if baseLoadGen.mode = PayPregenerated then + Some(context.numPregeneratedTxs |> Option.defaultValue 500000) + else + None } + let sdf = List.find (fun (cs: CoreSet) -> cs.name.StringName = "stellar" || cs.name.StringName = "sdf") allNodes let tier1 = List.filter (fun (cs: CoreSet) -> cs.options.tier1 = Some true) allNodes + // On smaller networks, run loadgen on all nodes to better balance the overhead of load generation + let loadGenNodes = if List.length allNodes > smallNetworkSize then tier1 else allNodes + let isLoadGenNode cs = List.exists (fun (cs': CoreSet) -> cs' = cs) loadGenNodes + + // Assign pre-generated transaction information to each load generator node. + // Specifically, partition all availabe accounts evenly across nodes, + // and assign appropriate offsets to prevent conflicts. + let allNodes = + match context.numPregeneratedTxs, context.genesisTestAccountCount, lgMode with + | Some txs, Some accounts, PayPregenerated -> + let loadGenCount = List.length loadGenNodes + let accountsPerNode = accounts / loadGenCount + let mutable j = 0 + + List.map + (fun (cs: CoreSet) -> + if isLoadGenNode cs then + let i = j + j <- j + 1 + + { cs with + options = + { cs.options with + initialization = + { cs.options.initialization with + pregenerateTxs = Some(txs, accountsPerNode, accountsPerNode * i) } } } + else + cs) + allNodes + | _ -> allNodes + context.ExecuteWithOptionalConsistencyCheck allNodes None false (fun (formation: StellarFormation) -> - let numAccounts = 30000 + let numAccounts = + match context.genesisTestAccountCount with + | Some x -> x + | None -> 30000 let upgradeMaxTxSetSize (coreSets: CoreSet list) (rate: int) = // Max tx size to avoid overflowing the transaction queue let size = rate * limitMultiplier formation.UpgradeMaxTxSetSize coreSets size - // Setup overlay connections first before manually closing - // ledger, which kick off consensus - formation.WaitUntilConnected allNodes - formation.ManualClose allNodes + let setupCoreSets (coreSets: CoreSet list) = + // Setup overlay connections first before manually closing + // ledger, which kick off consensus + formation.WaitUntilConnected coreSets + formation.ManualClose coreSets + + // Wait until the whole network is synced before proceeding, + // to fail asap in case of a misconfiguration + formation.WaitUntilSynced coreSets + formation.UpgradeProtocolToLatest coreSets + + let restartCoreSetsOrWait (coreSets: CoreSet list) = + if lgMode = PayPregenerated then + // Stop all nodes in parallel + allNodes + |> List.map (fun set -> async { formation.Stop set.name }) + |> Async.Parallel + |> Async.RunSynchronously + |> ignore + + // Start all nodes in parallel + allNodes + |> List.map (fun set -> async { formation.Start set.name }) + |> Async.Parallel + |> Async.RunSynchronously + |> ignore + else + System.Threading.Thread.Sleep(5 * 60 * 1000) + + setupCoreSets allNodes - // Wait until the whole network is synced before proceeding, - // to fail asap in case of a misconfiguration - formation.WaitUntilSynced allNodes - formation.UpgradeProtocolToLatest allNodes - upgradeMaxTxSetSize allNodes 10000 - formation.RunLoadgen sdf { context.GenerateAccountCreationLoad with accounts = numAccounts } + if lgMode <> PayPregenerated then + upgradeMaxTxSetSize allNodes 10000 + formation.RunLoadgen sdf { context.GenerateAccountCreationLoad with accounts = numAccounts } // Perform setup (if requested) match setupCfg with @@ -171,26 +246,28 @@ let maxTPSTest (context: MissionContext) (baseLoadGen: LoadGen) (setupCfg: LoadG formation.RunLoadgen cs { cfg with accounts = numAccounts; minSorobanPercentSuccess = Some 100 } | None -> () - let wait () = System.Threading.Thread.Sleep(5 * 60 * 1000) - let getMiddle (low: int) (high: int) = low + (high - low) / 2 let binarySearchWithThreshold (low: int) (high: int) (threshold: int) = let mutable lowerBound = low let mutable upperBound = high - let mutable shouldWait = false + let mutable shouldRestartOrWait = false let mutable finalTxRate = None while upperBound - lowerBound > threshold do let middle = getMiddle lowerBound upperBound - if shouldWait then wait () + if shouldRestartOrWait then + restartCoreSetsOrWait allNodes + setupCoreSets allNodes formation.clearMetrics allNodes upgradeMaxTxSetSize allNodes middle - upgradeSorobanLedgerLimits context formation allNodes middle - upgradeSorobanTxLimits context formation allNodes + + if baseLoadGen.mode <> PayPregenerated && baseLoadGen.mode <> GeneratePaymentLoad then + upgradeSorobanLedgerLimits context formation allNodes middle + upgradeSorobanTxLimits context formation allNodes try LogInfo "Run started at tx rate %i" middle @@ -202,8 +279,6 @@ let maxTPSTest (context: MissionContext) (baseLoadGen: LoadGen) (setupCfg: LoadG txs = middle * 1000 txrate = middle } - // On smaller networks, run loadgen on all nodes to better balance the overhead of load generation - let loadGenNodes = if List.length allNodes > smallNetworkSize then tier1 else allNodes formation.RunMultiLoadgen loadGenNodes loadGen formation.CheckNoErrorsAndPairwiseConsistency() formation.EnsureAllNodesInSync allNodes @@ -212,12 +287,12 @@ let maxTPSTest (context: MissionContext) (baseLoadGen: LoadGen) (setupCfg: LoadG lowerBound <- middle finalTxRate <- Some middle LogInfo "Run succeeded at tx rate %i" middle - shouldWait <- false + shouldRestartOrWait <- false with e -> LogInfo "Run failed at tx rate %i: %s" middle e.Message upperBound <- middle - shouldWait <- true + shouldRestartOrWait <- true if finalTxRate.IsSome then LogInfo "Found max tx rate %i" finalTxRate.Value @@ -239,7 +314,7 @@ let maxTPSTest (context: MissionContext) (baseLoadGen: LoadGen) (setupCfg: LoadG LogInfo "Starting max TPS run %i out of %i" run numRuns let resultRate = binarySearchWithThreshold context.txRate context.maxTxRate threshold results <- List.append results [ resultRate ] - if run < numRuns then wait () + if run < numRuns then restartCoreSetsOrWait allNodes LogInfo "Final tx rate averaged to %i over %i runs for image %s" diff --git a/src/FSLibrary/MissionDatabaseInplaceUpgrade.fs b/src/FSLibrary/MissionDatabaseInplaceUpgrade.fs index 7d21716b..a81439c1 100644 --- a/src/FSLibrary/MissionDatabaseInplaceUpgrade.fs +++ b/src/FSLibrary/MissionDatabaseInplaceUpgrade.fs @@ -42,7 +42,8 @@ let databaseInplaceUpgrade (context: MissionContext) = newHist = false initialCatchup = false waitForConsensus = true - fetchDBFromPeer = fetchFromPeer } } + fetchDBFromPeer = fetchFromPeer + pregenerateTxs = None } } context.Execute [ beforeUpgradeCoreSet; coreSet; afterUpgradeCoreSet ] diff --git a/src/FSLibrary/MissionVersionMixConsensus.fs b/src/FSLibrary/MissionVersionMixConsensus.fs index eeb5bccb..5b8293db 100644 --- a/src/FSLibrary/MissionVersionMixConsensus.fs +++ b/src/FSLibrary/MissionVersionMixConsensus.fs @@ -40,7 +40,8 @@ let versionMixConsensus (context: MissionContext) = newHist = true initialCatchup = false waitForConsensus = false - fetchDBFromPeer = fetchFromPeer } } + fetchDBFromPeer = fetchFromPeer + pregenerateTxs = None } } let oldCoreSet = MakeDeferredCoreSet @@ -54,7 +55,8 @@ let versionMixConsensus (context: MissionContext) = newHist = true initialCatchup = false waitForConsensus = false - fetchDBFromPeer = fetchFromPeer } } + fetchDBFromPeer = fetchFromPeer + pregenerateTxs = None } } context.Execute [ beforeSet; newCoreSet; oldCoreSet ] diff --git a/src/FSLibrary/StellarCoreCfg.fs b/src/FSLibrary/StellarCoreCfg.fs index 506cc5ae..652a0f2a 100644 --- a/src/FSLibrary/StellarCoreCfg.fs +++ b/src/FSLibrary/StellarCoreCfg.fs @@ -194,6 +194,10 @@ type StellarCoreCfg = t.Add("DEPRECATED_SQL_LEDGER_STATE", self.deprecatedSQLState) |> ignore t.Add("METADATA_DEBUG_LEDGERS", 0) |> ignore + match self.network.missionContext.genesisTestAccountCount with + | Some count -> t.Add("GENESIS_TEST_ACCOUNT_COUNT", count) |> ignore + | None -> () + match self.containerType with // REVERTME: temporarily use same nonzero port for both container types. | _ -> t.Add("HTTP_PORT", int64 (CfgVal.httpPort)) |> ignore diff --git a/src/FSLibrary/StellarCoreHTTP.fs b/src/FSLibrary/StellarCoreHTTP.fs index b7648ee8..7364d812 100644 --- a/src/FSLibrary/StellarCoreHTTP.fs +++ b/src/FSLibrary/StellarCoreHTTP.fs @@ -51,6 +51,7 @@ type LoadGenMode = | SorobanInvoke | MixedClassicSoroban | StopRun + | PayPregenerated override self.ToString() = match self with @@ -64,6 +65,7 @@ type LoadGenMode = | SorobanInvoke -> "soroban_invoke" | MixedClassicSoroban -> "mixed_classic_soroban" | StopRun -> "stop" + | PayPregenerated -> "pay_pregenerated" type LoadGen = { mode: LoadGenMode diff --git a/src/FSLibrary/StellarCoreSet.fs b/src/FSLibrary/StellarCoreSet.fs index 5dafd2b0..13e953ce 100644 --- a/src/FSLibrary/StellarCoreSet.fs +++ b/src/FSLibrary/StellarCoreSet.fs @@ -84,42 +84,49 @@ type CoreSetInitialization = newHist: bool initialCatchup: bool waitForConsensus: bool - fetchDBFromPeer: (CoreSetName * int) option } + fetchDBFromPeer: (CoreSetName * int) option + // (numTxs, numAccounts, offset) + pregenerateTxs: (int * int * int) option } static member Default = { newDb = true newHist = true initialCatchup = false waitForConsensus = false - fetchDBFromPeer = None } + fetchDBFromPeer = None + pregenerateTxs = None } static member DefaultNoForceSCP = { newDb = true newHist = true initialCatchup = false waitForConsensus = true - fetchDBFromPeer = None } + fetchDBFromPeer = None + pregenerateTxs = None } static member CatchupNoForceSCP = { newDb = true newHist = true initialCatchup = true waitForConsensus = true - fetchDBFromPeer = None } + fetchDBFromPeer = None + pregenerateTxs = None } static member OnlyNewDb = { newDb = true newHist = false initialCatchup = false waitForConsensus = true - fetchDBFromPeer = None } + fetchDBFromPeer = None + pregenerateTxs = None } static member NoInitCmds = { newDb = false newHist = false initialCatchup = false waitForConsensus = true - fetchDBFromPeer = None } + fetchDBFromPeer = None + pregenerateTxs = None } type GeoLoc = { lat: float; lon: float } diff --git a/src/FSLibrary/StellarKubeSpecs.fs b/src/FSLibrary/StellarKubeSpecs.fs index 8f7a4756..5cd8ed38 100644 --- a/src/FSLibrary/StellarKubeSpecs.fs +++ b/src/FSLibrary/StellarKubeSpecs.fs @@ -339,6 +339,7 @@ let WithProbes (container: V1Container) (probeTimeout: int) : V1Container = periodSeconds = System.Nullable(1), failureThreshold = System.Nullable(60), timeoutSeconds = System.Nullable(probeTimeout), + initialDelaySeconds = System.Nullable(60), httpGet = V1HTTPGetAction(path = "/info", port = httpPortStr) ) @@ -568,6 +569,16 @@ type NetworkCfg with // we want. let newHistIgnoreError = ignoreError newHist + let pregenerate = + match init.pregenerateTxs with + | None -> None + | Some (txs, accounts, offset) -> + runCoreIf + true + [| "pregenerate-loadgen-txs" + "--count " + txs.ToString() + "--accounts " + accounts.ToString() + "--offset " + offset.ToString() |] let initialCatchup = runCoreIf init.initialCatchup [| "catchup"; "current/0" |] @@ -580,6 +591,7 @@ type NetworkCfg with setPgHost waitForTime newDb + pregenerate newHistIgnoreError initialCatchup |]) createDbs) diff --git a/src/FSLibrary/StellarMissionContext.fs b/src/FSLibrary/StellarMissionContext.fs index 1d4ae84e..b10e3e33 100644 --- a/src/FSLibrary/StellarMissionContext.fs +++ b/src/FSLibrary/StellarMissionContext.fs @@ -99,10 +99,12 @@ type MissionContext = randomSeed: int tag: string option numRuns: int option + numPregeneratedTxs: int option networkSizeLimit: int pubnetParallelCatchupStartingLedger: int pubnetParallelCatchupEndLedger: int option pubnetParallelCatchupNumWorkers: int + genesisTestAccountCount: int option // Tail logging can cause the pubnet simulation missions like SorobanLoadGeneration // and SimulatePubnet to fail on the heartbeat handler due to what looks like a