From 742e6248ac235d89ba78ffdc22eb6bb5c4345032 Mon Sep 17 00:00:00 2001 From: BrandonCheng0121 Date: Tue, 16 Jun 2026 14:47:44 +0800 Subject: [PATCH] [Accton][wedge800cact] Fix VerifyCallbacksOnMacEntryChange warmboot timeout After warmboot, the flood-prevention port may remain DOWN (preserved from coldboot state) or may be in a transient state due to link flap. Calling bringDownPort directly on an already-DOWN port causes LinkStateToggler to wait indefinitely because SDK won't generate a new DOWN notification when state hasn't changed. Add bringDownPortIfUp() helper that polls for the port to come UP (10s timeout, 100ms interval) before calling bringDownPort. If the port stays DOWN (already in desired state), skip the bringDown call and proceed with the test. --- .../agent_hw_tests/AgentMacLearningTests.cpp | 41 ++++++++++++++++++- 1 file changed, 40 insertions(+), 1 deletion(-) diff --git a/fboss/agent/test/agent_hw_tests/AgentMacLearningTests.cpp b/fboss/agent/test/agent_hw_tests/AgentMacLearningTests.cpp index 4698dcf636d7e..67c303f34a56b 100644 --- a/fboss/agent/test/agent_hw_tests/AgentMacLearningTests.cpp +++ b/fboss/agent/test/agent_hw_tests/AgentMacLearningTests.cpp @@ -376,6 +376,38 @@ class AgentMacLearningTest : public AgentHwTest { L2LearningUpdateObserverUtil l2LearningObserver_; + /* + * Helper to safely bring down a port that may already be DOWN after warmboot. + * Polls for the port to come UP (with timeout), then calls bringDownPort(). + * If the port stays DOWN (already in desired state), skips the call to avoid + * LinkStateToggler hanging indefinitely waiting for a DOWN notification that + * SDK won't generate. + */ + void bringDownPortIfUp(PortID port) { + constexpr int kMaxWaitMs = 10000; + constexpr int kPollIntervalMs = 100; + XLOG(DBG2) << "Waiting up to " << kMaxWaitMs << "ms for port " << port + << " to come UP before bringDown"; + for (int elapsed = 0; elapsed < kMaxWaitMs; elapsed += kPollIntervalMs) { + auto ports = getProgrammedState()->getPorts(); + auto portState = ports->getNodeIf(port); + if (portState && portState->isUp()) { + XLOG(DBG2) << "Port " << port << " is UP after " << elapsed + << "ms, proceeding with bringDownPort"; + getAgentEnsemble()->bringDownPort(port); + return; + } + /* sleep override */ + std::this_thread::sleep_for(std::chrono::milliseconds(kPollIntervalMs)); + } + // Port never came UP ??this is expected after warmboot if the port was + // already DOWN in coldboot state. Log at WARN level so it's visible in + // test output for debugging warmboot regressions. + XLOG(WARN) << "Port " << port << " still DOWN after " << kMaxWaitMs + << "ms ??skipping bringDownPort. If this port should have come " + << "UP, investigate a potential warmboot regression."; + } + private: bool wasMacLearntInHw(bool shouldExist, MacAddress mac) { bringUpPort(masterLogicalPortIds()[1]); @@ -911,7 +943,14 @@ TEST_F(AgentMacSwLearningModeTest, VerifyCallbacksOnMacEntryChange) { // Disable aging, so entry stays in L2 table when we verify. utility::setMacAgeTimerSeconds(getAgentEnsemble(), 0); enum class MacOp { ASSOCIATE, DISSOASSOCIATE, DELETE }; - getAgentEnsemble()->bringDownPort(masterLogicalPortIds()[1]); + + // After warmboot, the flood-prevention port may remain DOWN (preserved from + // coldboot state) or may be in a transient state due to link flap. If we + // call bringDownPort directly and the port is already DOWN, SDK won't + // generate a new DOWN notification, causing LinkStateToggler to wait + // indefinitely. + bringDownPortIfUp(masterLogicalPortIds()[1]); + l2LearningObserver_.startObserving(getAgentEnsemble()); induceMacLearning(physPortDescr()); auto doMacOp = [this, isTH3](MacOp op) {