@@ -128,15 +128,35 @@ func (s *Service) Run(ctx context.Context) error {
128128 return nil
129129 }
130130
131- if err := s .initClients (ctx ); err != nil {
131+ if err := s .initClients (); err != nil {
132132 return err
133133 }
134134
135+ // Effective knobs (production defaults). Jitter is bounded by the epoch length
136+ // to avoid sleeping past the epoch window on short epochs.
137+ lookbackEpochs := scCandidateKeysLookbackEpochs
138+ respTimeout := scResponseTimeout
139+ affirmTimeout := scAffirmationTimeout
140+ logtrace .Debug (ctx , "storage challenge runtime knobs" , logtrace.Fields {
141+ "start_jitter_ms" : scStartJitterMs ,
142+ "response_timeout_ms" : respTimeout .Milliseconds (),
143+ "affirmation_timeout_ms" : affirmTimeout .Milliseconds (),
144+ "submit_evidence_config" : s .cfg .SubmitEvidence ,
145+ "poll_interval_ms" : s .cfg .PollInterval .Milliseconds (),
146+ "sc_files_per_challenger" : scFilesPerChallenger ,
147+ "sc_replica_count" : scReplicaCount ,
148+ "sc_observer_threshold" : scObserverThreshold ,
149+ "sc_keys_lookback_epochs" : lookbackEpochs ,
150+ })
151+
135152 ticker := time .NewTicker (s .cfg .PollInterval )
136153 defer ticker .Stop ()
137154
138155 var lastRunEpoch uint64
139156 var lastRunOK bool
157+ var loggedAlreadyRanEpoch uint64
158+ var loggedNotSelectedEpoch uint64
159+ var loggedDisabledEpoch uint64
140160
141161 for {
142162 select {
@@ -158,11 +178,19 @@ func (s *Service) Run(ctx context.Context) error {
158178 continue
159179 }
160180 if ! params .ScEnabled {
181+ if loggedDisabledEpoch != epochID {
182+ logtrace .Debug (ctx , "storage challenge disabled by on-chain params" , logtrace.Fields {"epoch_id" : epochID })
183+ loggedDisabledEpoch = epochID
184+ }
161185 lastRunEpoch = epochID
162186 lastRunOK = true
163187 continue
164188 }
165189 if lastRunOK && lastRunEpoch == epochID {
190+ if loggedAlreadyRanEpoch != epochID {
191+ logtrace .Debug (ctx , "storage challenge already ran this epoch; skipping" , logtrace.Fields {"epoch_id" : epochID })
192+ loggedAlreadyRanEpoch = epochID
193+ }
166194 continue
167195 }
168196
@@ -175,13 +203,36 @@ func (s *Service) Run(ctx context.Context) error {
175203
176204 challengers := deterministic .SelectChallengers (anchor .ActiveSupernodeAccounts , anchor .Seed , epochID , params .ScChallengersPerEpoch )
177205 if ! containsString (challengers , s .identity ) {
206+ if loggedNotSelectedEpoch != epochID {
207+ logtrace .Debug (ctx , "storage challenge: not selected challenger; skipping" , logtrace.Fields {
208+ "epoch_id" : epochID ,
209+ "identity" : s .identity ,
210+ "selected" : len (challengers ),
211+ "sc_param" : params .ScChallengersPerEpoch ,
212+ })
213+ loggedNotSelectedEpoch = epochID
214+ }
178215 lastRunEpoch = epochID
179216 lastRunOK = true
180217 continue
181218 }
182219
183- jitterMs := deterministic .DeterministicJitterMs (anchor .Seed , epochID , s .identity , scStartJitterMs )
220+ // Bound jitter by a conservative estimate of epoch duration (assume ~1s blocks).
221+ // This is intentionally simple and is primarily to avoid sleeping past the epoch window.
222+ jitterMaxMs := scStartJitterMs
223+ epochBudgetMs := uint64 (params .EpochLengthBlocks ) * 1000
224+ if epochBudgetMs > 0 && epochBudgetMs / 2 < jitterMaxMs {
225+ jitterMaxMs = epochBudgetMs / 2
226+ }
227+
228+ jitterMs := deterministic .DeterministicJitterMs (anchor .Seed , epochID , s .identity , jitterMaxMs )
184229 if jitterMs > 0 {
230+ logtrace .Debug (ctx , "storage challenge jitter sleep" , logtrace.Fields {
231+ "epoch_id" : epochID ,
232+ "jitter_ms" : jitterMs ,
233+ "jitter_max_ms" : jitterMaxMs ,
234+ "challenger_id" : s .identity ,
235+ })
185236 timer := time .NewTimer (time .Duration (jitterMs ) * time .Millisecond )
186237 select {
187238 case <- ctx .Done ():
@@ -191,7 +242,7 @@ func (s *Service) Run(ctx context.Context) error {
191242 }
192243 }
193244
194- if err := s .runEpoch (ctx , anchor , params ); err != nil {
245+ if err := s .runEpoch (ctx , anchor , params , lookbackEpochs , respTimeout , affirmTimeout ); err != nil {
195246 logtrace .Warn (ctx , "storage challenge epoch run error" , logtrace.Fields {
196247 "epoch_id" : epochID ,
197248 "error" : err .Error (),
@@ -207,7 +258,7 @@ func (s *Service) Run(ctx context.Context) error {
207258 }
208259}
209260
210- func (s * Service ) initClients (ctx context. Context ) error {
261+ func (s * Service ) initClients () error {
211262 validator := lumera .NewSecureKeyExchangeValidator (s .lumera )
212263
213264 grpcCreds , err := credentials .NewClientCreds (& credentials.ClientOptions {
@@ -254,10 +305,10 @@ func (s *Service) auditParams(ctx context.Context) (audittypes.Params, bool) {
254305 return p , true
255306}
256307
257- func (s * Service ) runEpoch (ctx context.Context , anchor audittypes.EpochAnchor , params audittypes.Params ) error {
308+ func (s * Service ) runEpoch (ctx context.Context , anchor audittypes.EpochAnchor , params audittypes.Params , lookbackEpochs uint32 , respTimeout time. Duration , affirmTimeout time. Duration ) error {
258309 epochID := anchor .EpochId
259310
260- lookback := s .candidateKeysLookbackDuration (ctx , params )
311+ lookback := s .candidateKeysLookbackDuration (ctx , params , lookbackEpochs )
261312 to := time .Now ().UTC ()
262313 from := to .Add (- lookback )
263314
@@ -275,9 +326,15 @@ func (s *Service) runEpoch(ctx context.Context, anchor audittypes.EpochAnchor, p
275326 if len (fileKeys ) == 0 {
276327 return nil
277328 }
329+ logtrace .Debug (ctx , "storage challenge selected file keys" , logtrace.Fields {
330+ "epoch_id" : epochID ,
331+ "challenger_id" : s .identity ,
332+ "keys_total" : len (keys ),
333+ "file_keys" : strings .Join (fileKeys , "," ),
334+ })
278335
279336 for _ , fileKey := range fileKeys {
280- if err := s .runChallengeForFile (ctx , anchor , params , fileKey ); err != nil {
337+ if err := s .runChallengeForFile (ctx , anchor , params , fileKey , respTimeout , affirmTimeout ); err != nil {
281338 logtrace .Warn (ctx , "storage challenge file run error" , logtrace.Fields {
282339 "epoch_id" : epochID ,
283340 "file_key" : fileKey ,
@@ -289,7 +346,7 @@ func (s *Service) runEpoch(ctx context.Context, anchor audittypes.EpochAnchor, p
289346 return nil
290347}
291348
292- func (s * Service ) runChallengeForFile (ctx context.Context , anchor audittypes.EpochAnchor , params audittypes.Params , fileKey string ) error {
349+ func (s * Service ) runChallengeForFile (ctx context.Context , anchor audittypes.EpochAnchor , params audittypes.Params , fileKey string , respTimeout time. Duration , affirmTimeout time. Duration ) error {
293350 epochID := anchor .EpochId
294351
295352 replicas , err := deterministic .SelectReplicaSet (anchor .ActiveSupernodeAccounts , fileKey , scReplicaCount )
@@ -301,6 +358,13 @@ func (s *Service) runChallengeForFile(ctx context.Context, anchor audittypes.Epo
301358 if recipient == "" {
302359 return nil
303360 }
361+ logtrace .Debug (ctx , "storage challenge selected recipient/observers" , logtrace.Fields {
362+ "epoch_id" : epochID ,
363+ "file_key" : fileKey ,
364+ "challenger_id" : s .identity ,
365+ "recipient_id" : recipient ,
366+ "observers" : strings .Join (observers , "," ),
367+ })
304368
305369 recipientAddr , err := s .supernodeGRPCAddr (ctx , recipient )
306370 if err != nil {
@@ -342,7 +406,7 @@ func (s *Service) runChallengeForFile(ctx context.Context, anchor audittypes.Epo
342406 ObserverIds : append ([]string (nil ), observers ... ),
343407 }
344408
345- resp , err := s .callGetSliceProof (ctx , recipientAddr , req , scResponseTimeout )
409+ resp , err := s .callGetSliceProof (ctx , recipient , recipientAddr , req , respTimeout )
346410 if err != nil || resp == nil || ! resp .Ok {
347411 failure := "RECIPIENT_ERROR"
348412 if err != nil {
@@ -377,9 +441,12 @@ func (s *Service) runChallengeForFile(ctx context.Context, anchor audittypes.Epo
377441 }
378442
379443 timeout := scAffirmationTimeout
444+ if affirmTimeout > 0 {
445+ timeout = affirmTimeout
446+ }
380447
381448 for _ , peer := range observerPeers {
382- vr , verr := s .callVerifySliceProof (ctx , peer .addr , verifyReq , timeout )
449+ vr , verr := s .callVerifySliceProof (ctx , peer .id , peer . addr , verifyReq , timeout )
383450 if verr == nil && vr != nil && vr .Ok {
384451 okCount ++
385452 }
@@ -453,11 +520,13 @@ func parseHostAndPort(address string, defaultPort int) (host string, port int, o
453520 return address , defaultPort , true
454521}
455522
456- func (s * Service ) callGetSliceProof (ctx context.Context , address string , req * supernode.GetSliceProofRequest , timeout time.Duration ) (* supernode.GetSliceProofResponse , error ) {
523+ func (s * Service ) callGetSliceProof (ctx context.Context , remoteIdentity string , address string , req * supernode.GetSliceProofRequest , timeout time.Duration ) (* supernode.GetSliceProofResponse , error ) {
457524 cctx , cancel := context .WithTimeout (ctx , timeout )
458525 defer cancel ()
459526
460- conn , err := s .grpcClient .Connect (cctx , address , s .grpcOpts )
527+ // secure gRPC requires the peer identity in the dial target
528+ // (format: "<remoteIdentity>@<host:port>") so the handshake can authenticate the peer.
529+ conn , err := s .grpcClient .Connect (cctx , fmt .Sprintf ("%s@%s" , strings .TrimSpace (remoteIdentity ), address ), s .grpcOpts )
461530 if err != nil {
462531 return nil , err
463532 }
@@ -467,11 +536,12 @@ func (s *Service) callGetSliceProof(ctx context.Context, address string, req *su
467536 return client .GetSliceProof (cctx , req )
468537}
469538
470- func (s * Service ) callVerifySliceProof (ctx context.Context , address string , req * supernode.VerifySliceProofRequest , timeout time.Duration ) (* supernode.VerifySliceProofResponse , error ) {
539+ func (s * Service ) callVerifySliceProof (ctx context.Context , remoteIdentity string , address string , req * supernode.VerifySliceProofRequest , timeout time.Duration ) (* supernode.VerifySliceProofResponse , error ) {
471540 cctx , cancel := context .WithTimeout (ctx , timeout )
472541 defer cancel ()
473542
474- conn , err := s .grpcClient .Connect (cctx , address , s .grpcOpts )
543+ // Production behavior: secure gRPC requires "<remoteIdentity>@<host:port>" (see callGetSliceProof).
544+ conn , err := s .grpcClient .Connect (cctx , fmt .Sprintf ("%s@%s" , strings .TrimSpace (remoteIdentity ), address ), s .grpcOpts )
475545 if err != nil {
476546 return nil , err
477547 }
@@ -483,6 +553,14 @@ func (s *Service) callVerifySliceProof(ctx context.Context, address string, req
483553
484554func (s * Service ) maybeSubmitEvidence (ctx context.Context , params audittypes.Params , epochID uint64 , challengeID , fileKey , recipient , failureType , transcriptHashHex string ) error {
485555 if ! s .cfg .SubmitEvidence || ! params .ScEnabled {
556+ logtrace .Debug (ctx , "storage challenge: evidence submission skipped" , logtrace.Fields {
557+ "epoch_id" : epochID ,
558+ "challenge_id" : challengeID ,
559+ "recipient_id" : recipient ,
560+ "failure_type" : failureType ,
561+ "submit_evidence_config" : s .cfg .SubmitEvidence ,
562+ "sc_enabled_param" : params .ScEnabled ,
563+ })
486564 return nil
487565 }
488566
@@ -571,8 +649,7 @@ func containsString(list []string, v string) bool {
571649 return false
572650}
573651
574- func (s * Service ) candidateKeysLookbackDuration (ctx context.Context , params audittypes.Params ) time.Duration {
575- epochs := scCandidateKeysLookbackEpochs
652+ func (s * Service ) candidateKeysLookbackDuration (ctx context.Context , params audittypes.Params , epochs uint32 ) time.Duration {
576653 if epochs == 0 {
577654 epochs = 1
578655 }
0 commit comments