Skip to content

Commit

Permalink
Introduced verification of big ledger peer snapshot file
Browse files Browse the repository at this point in the history
  • Loading branch information
crocodile-dentist committed Jul 23, 2024
1 parent 2244210 commit 4bf418e
Show file tree
Hide file tree
Showing 10 changed files with 148 additions and 32 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ module Ouroboros.Network.PeerSelection.LedgerPeers.Type
, LedgerPeersKind (..)
, LedgerPeerSnapshot (.., LedgerPeerSnapshot)
, isLedgerPeersEnabled
, compareLedgerPeerSnapshotApproximate
) where

import Control.Monad (forM)
Expand Down Expand Up @@ -64,6 +65,26 @@ pattern LedgerPeerSnapshot payload <- LedgerPeerSnapshotV1 payload where

{-# COMPLETE LedgerPeerSnapshot #-}

-- | Since ledger peer snapshot is serialised with all domain names
-- fully qualified, and all stake values are approximate in floating
-- point, comparison is necessarily approximate as well.
-- The candidate argument is processed here to simulate a round trip
-- by the serialisation mechanism and then compared to the baseline
-- argument, which is assumed that it was actually processed this way
-- when a snapshot was created earlier, and hence it is approximate as well.
-- The two approximate values should be equal if they were created
-- from the same 'faithful' data.
--
compareLedgerPeerSnapshotApproximate :: LedgerPeerSnapshot
-> LedgerPeerSnapshot
-> Bool
compareLedgerPeerSnapshotApproximate baseline candidate =
case tripIt of
Success candidate' -> candidate' == baseline
Error _ -> False
where
tripIt = fromJSON . toJSON $ candidate

-- | In case the format changes in the future, this function provides a migration functionality
-- when possible.
--
Expand Down
4 changes: 3 additions & 1 deletion ouroboros-network/CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,9 @@
peers
* Implemented separate configurable peer selection targets for Praos and
Genesis consensus modes. Genesis mode may use more big ledger peers when
a node is syncing up.
* Implemented verification of big ledger peer snapshot when syncing reaches
the point at which the snapshot was taken. An error is raised when there's
a mismatch detected.

## 0.16.1.1 -- 2024-06-28

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,7 @@ import Ouroboros.Network.PeerSelection.State.KnownPeers qualified as KnownPeers
import Ouroboros.Network.PeerSelection.State.LocalRootPeers (HotValency (..),
LocalRootPeers (..), WarmValency (..))
import Ouroboros.Network.PeerSelection.State.LocalRootPeers qualified as LocalRootPeers
import Ouroboros.Network.Point
import Ouroboros.Network.Protocol.PeerSharing.Type (PeerSharingResult (..))

import Ouroboros.Network.Testing.Data.Script
Expand Down Expand Up @@ -917,6 +918,7 @@ traceNum TraceOutboundGovernorCriticalFailure {} = 53
traceNum TraceDebugState {} = 54
traceNum TraceChurnAction {} = 55
traceNum TraceChurnTimeout {} = 56
traceNum TraceVerifyPeerSnapshot {} = 57

allTraceNames :: Map Int String
allTraceNames =
Expand Down Expand Up @@ -978,6 +980,7 @@ allTraceNames =
, (54, "TraceDebugState")
, (55, "TraceChurnAction")
, (56, "TraceChurnTimeout")
, (57, "TraceVerifyPeerSnapshot")
]


Expand Down Expand Up @@ -3788,12 +3791,17 @@ _governorFindingPublicRoots targetNumberOfRootPeers readDomains readUseBootstrap
closePeerConnection = error "closePeerConnection"
},
readUseBootstrapPeers,
readLedgerStateJudgement,
readInboundPeers = pure Map.empty,
updateOutboundConnectionsState = \a -> do
a' <- readTVar olocVar
when (a /= a') $
writeTVar olocVar a
writeTVar olocVar a,
getLedgerStateCtx =
LedgerPeersConsensusInterface {
lpGetLatestSlot = pure Origin,
lpGetLedgerStateJudgement = readLedgerStateJudgement,
lpGetLedgerPeers = pure [] },
readLedgerPeerSnapshot = pure Nothing
}

targets :: PeerSelectionTargets
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,7 @@ import Ouroboros.Network.ExitPolicy
import Ouroboros.Network.PeerSelection.Governor hiding (PeerSelectionState (..))
import Ouroboros.Network.PeerSelection.Governor qualified as Governor
import Ouroboros.Network.PeerSelection.State.LocalRootPeers qualified as LocalRootPeers
import Ouroboros.Network.Point

import Ouroboros.Network.Testing.Data.Script (PickScript, Script (..),
ScriptDelay (..), TimedScript, arbitraryPickScript,
Expand Down Expand Up @@ -444,13 +445,17 @@ mockPeerSelectionActions' tracer
closePeerConnection
},
readUseBootstrapPeers,
readLedgerStateJudgement,
getLedgerStateCtx = LedgerPeersConsensusInterface {
lpGetLedgerStateJudgement = readLedgerStateJudgement,
lpGetLatestSlot = pure Origin,
lpGetLedgerPeers = pure [] },
readInboundPeers = pure Map.empty,
updateOutboundConnectionsState = \a -> do
a' <- readTVar outboundConnectionsStateVar
when (a /= a') $
writeTVar outboundConnectionsStateVar a,
peerTargets
peerTargets,
readLedgerPeerSnapshot = pure Nothing
}
where
-- TODO: make this dynamic
Expand Down Expand Up @@ -757,6 +762,7 @@ tracerTracePeerSelection = contramap f tracerTestTraceEvent
f a@(TraceDebugState !_ !_) = GovernorEvent a
f a@(TraceChurnAction !_ !_ !_) = GovernorEvent a
f a@(TraceChurnTimeout !_ !_ !_) = GovernorEvent a
f a@(TraceVerifyPeerSnapshot !_) = GovernorEvent a

tracerDebugPeerSelection :: Tracer (IOSim s) (DebugPeerSelection PeerAddr)
tracerDebugPeerSelection = GovernorDebug `contramap` tracerTestTraceEvent
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1096,7 +1096,8 @@ prop_peer_selection_trace_coverage defaultBearerInfo diffScript =
show a
peerSelectionTraceMap a@TraceChurnTimeout {} =
show a

peerSelectionTraceMap (TraceVerifyPeerSnapshot result) =
"TraceVerifyPeerSnapshot " ++ show result
eventsSeenNames = map peerSelectionTraceMap events

-- TODO: Add checkCoverage here
Expand Down
9 changes: 4 additions & 5 deletions ouroboros-network/src/Ouroboros/Network/Diffusion/P2P.hs
Original file line number Diff line number Diff line change
Expand Up @@ -659,9 +659,7 @@ runM Interfaces
{ daApplicationInitiatorMode
, daApplicationInitiatorResponderMode
, daLocalResponderApplication
, daLedgerPeersCtx =
daLedgerPeersCtx@LedgerPeersConsensusInterface
{ lpGetLedgerStateJudgement }
, daLedgerPeersCtx
, daUpdateOutboundConnectionsState
}
ApplicationsExtra
Expand Down Expand Up @@ -990,7 +988,7 @@ runM Interfaces
psLocalRootPeersTracer = dtTraceLocalRootPeersTracer,
psPublicRootPeersTracer = dtTracePublicRootPeersTracer,
psReadTargets = readTVar peerSelectionTargetsVar,
psJudgement = lpGetLedgerStateJudgement,
getLedgerStateCtx = daLedgerPeersCtx,
psReadLocalRootPeers = daReadLocalRootPeers,
psReadPublicRootPeers = daReadPublicRootPeers,
psReadUseBootstrapPeers = daReadUseBootstrapPeers,
Expand All @@ -1002,7 +1000,8 @@ runM Interfaces
PeerSharingDisabled -> pure Map.empty
PeerSharingEnabled -> readInboundPeers,
psUpdateOutboundConnectionsState = daUpdateOutboundConnectionsState,
peerTargets = daPeerTargets }
peerTargets = daPeerTargets,
readLedgerPeerSnapshot = daReadLedgerPeerSnapshot }
WithLedgerPeersArgs {
wlpRng = ledgerPeersRng,
wlpConsensusInterface = daLedgerPeersCtx,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -654,6 +654,13 @@ peerSelectionGovernorLoop tracer

<> Monitor.connections actions st
<> Monitor.jobs jobPool st
-- This job monitors for changes in big ledger peer snapshot file (eg. reload)
-- and copies it into the governor's private state. When a change is detected,
-- it also flips private state LedgerStateJudgement to TooYoung so that it
-- can launch the appropriate verification task in the job pool when external
-- LedgerStateJudgement is TooOld. If the verification job detects a discrepancy
-- vs. big peers on the ledger, it throws and the node is shut down.
<> Monitor.ledgerPeerSnapshotChange st actions
-- In Genesis consensus mode, this is responsible for settings targets on the basis
-- of the ledger state judgement. It takes into account whether
-- the churn governor is running via a tmvar such that targets are set
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,11 +12,13 @@
module Ouroboros.Network.PeerSelection.Governor.Monitor
( targetPeers
, jobs
, jobVerifyPeerSnapshot
, connections
, localRoots
, monitorLedgerStateJudgement
, monitorBootstrapPeersFlag
, waitForSystemToQuiesce
, ledgerPeerSnapshotChange
) where

import Data.Map.Strict (Map)
Expand All @@ -25,7 +27,7 @@ import Data.Maybe (fromMaybe, isJust)
import Data.Set (Set)
import Data.Set qualified as Set

import Control.Concurrent.JobPool (JobPool)
import Control.Concurrent.JobPool (Job (..), JobPool)
import Control.Concurrent.JobPool qualified as JobPool
import Control.Exception (assert)
import Control.Monad.Class.MonadSTM
Expand All @@ -42,7 +44,9 @@ import Ouroboros.Network.PeerSelection.Governor.ActivePeers
import Ouroboros.Network.PeerSelection.Governor.Types hiding
(PeerSelectionCounters)
import Ouroboros.Network.PeerSelection.LedgerPeers.Type
(LedgerStateJudgement (..))
(LedgerPeerSnapshot (..), LedgerPeersConsensusInterface (..),
LedgerStateJudgement (..), compareLedgerPeerSnapshotApproximate)
import Ouroboros.Network.PeerSelection.LedgerPeers.Utils
import Ouroboros.Network.PeerSelection.PeerTrustable (PeerTrustable (..))
import Ouroboros.Network.PeerSelection.PublicRootPeers qualified as PublicRootPeers
import Ouroboros.Network.PeerSelection.State.EstablishedPeers qualified as EstablishedPeers
Expand Down Expand Up @@ -600,15 +604,17 @@ monitorLedgerStateJudgement :: ( MonadSTM m
=> PeerSelectionActions peeraddr peerconn m
-> PeerSelectionState peeraddr peerconn
-> Guarded (STM m) (TimedDecision m peeraddr peerconn)
monitorLedgerStateJudgement PeerSelectionActions{ readLedgerStateJudgement }
monitorLedgerStateJudgement PeerSelectionActions{ getLedgerStateCtx = ledgerCtx@LedgerPeersConsensusInterface {
lpGetLedgerStateJudgement = readLedgerStateJudgement } }
st@PeerSelectionState{ bootstrapPeersFlag,
publicRootPeers,
knownPeers,
establishedPeers,
inProgressPromoteCold,
inProgressPromoteWarm,
ledgerStateJudgement,
consensusMode }
consensusMode,
ledgerPeerSnapshot }
| GenesisMode <- consensusMode =
Guarded Nothing $ do
lsj <- readLedgerStateJudgement
Expand All @@ -617,7 +623,10 @@ monitorLedgerStateJudgement PeerSelectionActions{ readLedgerStateJudgement }
return $ \_now ->
Decision {
decisionTrace = [TraceLedgerStateJudgementChanged lsj],
decisionJobs = [],
decisionJobs = case (lsj, ledgerPeerSnapshot) of
(TooOld, Just ledgerPeerSnapshot') ->
[jobVerifyPeerSnapshot ledgerPeerSnapshot' ledgerCtx]
_otherwise -> [],
decisionState = st {
ledgerStateJudgement = lsj } }

Expand Down Expand Up @@ -744,3 +753,59 @@ waitForSystemToQuiesce st@PeerSelectionState{
}
}
| otherwise = GuardedSkip Nothing

-- |This job, which is initiated by monitorLedgerStateJudgement job,
-- verifies whether the provided big ledger pools match up with the
-- ledger state once the node catches up to the slot at which the
-- snapshot was ostensibly taken
--
jobVerifyPeerSnapshot :: ( MonadSTM m )
=> LedgerPeerSnapshot
-> LedgerPeersConsensusInterface m
-> Job () m (Completion m peeraddr peerconn)
jobVerifyPeerSnapshot baseline@(LedgerPeerSnapshot (slot, _))
LedgerPeersConsensusInterface {
lpGetLatestSlot,
lpGetLedgerPeers }
= Job job (const (completion False)) () "jobVerifyPeerSnapshot"
where
completion result = return . Completion $ \st _now ->
Decision {
decisionTrace = [TraceVerifyPeerSnapshot result],
decisionState = st,
decisionJobs = [] }

job = do
ledgerPeers <-
atomically $ do
check . (>= slot) =<< lpGetLatestSlot
accumulateBigLedgerStake <$> lpGetLedgerPeers
let candidate = LedgerPeerSnapshot (slot, ledgerPeers) -- ^ slot here is intentional
completion $ compareLedgerPeerSnapshotApproximate baseline candidate

-- |This job monitors for any changes in the big ledger peer snapshot
-- and flips ledger state judgement private state so that monitoring action
-- can launch `jobVerifyPeerSnapshot`
--
ledgerPeerSnapshotChange :: (MonadSTM m)
=> PeerSelectionState peeraddr peerconn
-> PeerSelectionActions peeraddr peerconn m
-> Guarded (STM m) (TimedDecision m peeraddr peerconn)
ledgerPeerSnapshotChange st@PeerSelectionState {
ledgerPeerSnapshot }
PeerSelectionActions {
readLedgerPeerSnapshot } =
Guarded Nothing $ do
ledgerPeerSnapshot' <- readLedgerPeerSnapshot
case (ledgerPeerSnapshot', ledgerPeerSnapshot) of
(Nothing, _) -> retry
(Just (LedgerPeerSnapshot (slot, _)), Just (LedgerPeerSnapshot (slot', _)))
| slot == slot' -> retry
_otherwise ->
return $ \_now ->
Decision { decisionTrace = [],
decisionJobs = [],
decisionState = st {
ledgerStateJudgement = YoungEnough,
ledgerPeerSnapshot = ledgerPeerSnapshot' } }

Original file line number Diff line number Diff line change
Expand Up @@ -145,10 +145,7 @@ import Control.Concurrent.Class.MonadSTM.Strict
import Ouroboros.Network.ConsensusMode
import Ouroboros.Network.ExitPolicy
import Ouroboros.Network.PeerSelection.Bootstrap (UseBootstrapPeers (..))
import Ouroboros.Network.PeerSelection.LedgerPeers (IsBigLedgerPeer,
LedgerPeersKind)
import Ouroboros.Network.PeerSelection.LedgerPeers.Type
(LedgerStateJudgement (..), UseLedgerPeers (..))
import Ouroboros.Network.PeerSelection.LocalRootPeers (OutboundConnectionsState)
import Ouroboros.Network.PeerSelection.PeerSharing (PeerSharing)
import Ouroboros.Network.PeerSelection.PublicRootPeers (PublicRootPeers)
Expand Down Expand Up @@ -394,9 +391,9 @@ data PeerSelectionActions peeraddr peerconn m = PeerSelectionActions {
-- | Read the current bootstrap peers flag
readUseBootstrapPeers :: STM m UseBootstrapPeers,

-- | Read the current ledger state judgement
-- | Read the current ledger state
--
readLedgerStateJudgement :: STM m LedgerStateJudgement,
getLedgerStateCtx :: LedgerPeersConsensusInterface m,

-- | Callback provided by consensus to inform it if the node is
-- connected to only local roots or also some external peers.
Expand All @@ -405,8 +402,11 @@ data PeerSelectionActions peeraddr peerconn m = PeerSelectionActions {
-- simply refuse to transition from TooOld to YoungEnough while
-- it only has local peers.
--
updateOutboundConnectionsState :: OutboundConnectionsState -> STM m ()
updateOutboundConnectionsState :: OutboundConnectionsState -> STM m (),

-- | Read the current state of ledger peer snapshot
--
readLedgerPeerSnapshot :: STM m (Maybe LedgerPeerSnapshot)
}

-- | Interfaces required by the peer selection governor, which do not need to
Expand Down Expand Up @@ -579,9 +579,11 @@ data PeerSelectionState peeraddr peerconn = PeerSelectionState {

-- | Time to query of inbound peers time.
--
inboundPeersRetryTime :: !Time

inboundPeersRetryTime :: !Time,

-- | Internal state of ledger peer snapshot
--
ledgerPeerSnapshot :: Maybe LedgerPeerSnapshot
-- TODO: need something like this to distinguish between lots of bad peers
-- and us getting disconnected from the network locally. We don't want a
-- network disconnect to cause us to flush our full known peer set by
Expand Down Expand Up @@ -1232,7 +1234,8 @@ emptyPeerSelectionState rng consensusMode =
bootstrapPeersFlag = DontUseBootstrapPeers,
hasOnlyBootstrapPeers = False,
bootstrapPeersTimeout = Nothing,
inboundPeersRetryTime = Time 0
inboundPeersRetryTime = Time 0,
ledgerPeerSnapshot = Nothing
}


Expand Down Expand Up @@ -1707,6 +1710,7 @@ data TracePeerSelection peeraddr =
| TraceOnlyBootstrapPeers
| TraceBootstrapPeersFlagChangedWhilstInSensitiveState
| TraceUseBootstrapPeersChanged UseBootstrapPeers
| TraceVerifyPeerSnapshot Bool

--
-- Critical Failures
Expand Down
Loading

0 comments on commit 4bf418e

Please sign in to comment.