Skip to content

Commit db7a2fb

Browse files
committed
test(bootstrap): add tests for namespace-not-ready diagnostic fallback
Cover the three key behaviors introduced by the diagnostic fix: - generic_failure_diagnosis suggests doctor logs/check commands - Plain namespace timeout returns None from diagnose_failure (confirming the generic fallback is necessary) - Container logs enable pattern matching for namespace errors that would otherwise go undiagnosed (node pressure, corrupted state, no route, network connectivity) - End-to-end fallback pattern mirrors the actual CLI unwrap_or_else chain
1 parent b74da9d commit db7a2fb

File tree

1 file changed

+192
-0
lines changed

1 file changed

+192
-0
lines changed

crates/openshell-bootstrap/src/errors.rs

Lines changed: 192 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -654,4 +654,196 @@ mod tests {
654654
);
655655
assert!(d.retryable);
656656
}
657+
658+
// -- generic_failure_diagnosis tests --
659+
660+
#[test]
661+
fn generic_diagnosis_suggests_doctor_logs() {
662+
let d = generic_failure_diagnosis("my-gw");
663+
let commands: Vec<String> = d
664+
.recovery_steps
665+
.iter()
666+
.filter_map(|s| s.command.clone())
667+
.collect();
668+
assert!(
669+
commands.iter().any(|c| c.contains("openshell doctor logs")),
670+
"expected 'openshell doctor logs' in recovery commands, got: {commands:?}"
671+
);
672+
}
673+
674+
#[test]
675+
fn generic_diagnosis_suggests_doctor_check() {
676+
let d = generic_failure_diagnosis("my-gw");
677+
let commands: Vec<String> = d
678+
.recovery_steps
679+
.iter()
680+
.filter_map(|s| s.command.clone())
681+
.collect();
682+
assert!(
683+
commands
684+
.iter()
685+
.any(|c| c.contains("openshell doctor check")),
686+
"expected 'openshell doctor check' in recovery commands, got: {commands:?}"
687+
);
688+
}
689+
690+
#[test]
691+
fn generic_diagnosis_includes_gateway_name() {
692+
let d = generic_failure_diagnosis("custom-name");
693+
let all_text: String = d
694+
.recovery_steps
695+
.iter()
696+
.filter_map(|s| s.command.clone())
697+
.collect::<Vec<_>>()
698+
.join(" ");
699+
assert!(
700+
all_text.contains("custom-name"),
701+
"expected gateway name in recovery commands, got: {all_text}"
702+
);
703+
}
704+
705+
// -- fallback behavior tests --
706+
707+
#[test]
708+
fn namespace_timeout_without_logs_returns_none() {
709+
// This is the most common user-facing error: a plain timeout with only
710+
// kubectl output. It must NOT match any specific pattern so the caller
711+
// can fall back to generic_failure_diagnosis.
712+
let diagnosis = diagnose_failure(
713+
"test",
714+
"K8s namespace not ready\n\nCaused by:\n \
715+
timed out waiting for namespace 'openshell' to exist: \
716+
error: the server doesn't have a resource type \"namespace\"",
717+
None,
718+
);
719+
assert!(
720+
diagnosis.is_none(),
721+
"plain namespace timeout should not match any specific pattern, got: {:?}",
722+
diagnosis.map(|d| d.summary)
723+
);
724+
}
725+
726+
#[test]
727+
fn namespace_timeout_with_pressure_logs_matches() {
728+
// When container logs reveal node pressure, the diagnosis engine
729+
// should detect it even though the error message itself is generic.
730+
let diagnosis = diagnose_failure(
731+
"test",
732+
"K8s namespace not ready\n\nCaused by:\n \
733+
timed out waiting for namespace 'openshell' to exist: <kubectl output>",
734+
Some("HEALTHCHECK_NODE_PRESSURE: DiskPressure"),
735+
);
736+
assert!(diagnosis.is_some(), "expected node pressure diagnosis");
737+
let d = diagnosis.unwrap();
738+
assert!(
739+
d.summary.contains("pressure"),
740+
"expected pressure in summary, got: {}",
741+
d.summary
742+
);
743+
}
744+
745+
#[test]
746+
fn namespace_timeout_with_corrupted_state_logs_matches() {
747+
// Container logs revealing RBAC corruption should be caught.
748+
let diagnosis = diagnose_failure(
749+
"test",
750+
"K8s namespace not ready\n\nCaused by:\n \
751+
timed out waiting for namespace 'openshell' to exist: <output>",
752+
Some(
753+
"configmaps \"extension-apiserver-authentication\" is forbidden: \
754+
User cannot get resource",
755+
),
756+
);
757+
assert!(diagnosis.is_some(), "expected corrupted state diagnosis");
758+
let d = diagnosis.unwrap();
759+
assert!(
760+
d.summary.contains("Corrupted"),
761+
"expected Corrupted in summary, got: {}",
762+
d.summary
763+
);
764+
}
765+
766+
#[test]
767+
fn namespace_timeout_with_no_route_logs_matches() {
768+
let diagnosis = diagnose_failure(
769+
"test",
770+
"K8s namespace not ready",
771+
Some("Error: no default route present before starting k3s"),
772+
);
773+
assert!(diagnosis.is_some(), "expected networking diagnosis");
774+
let d = diagnosis.unwrap();
775+
assert!(
776+
d.summary.contains("networking"),
777+
"expected networking in summary, got: {}",
778+
d.summary
779+
);
780+
}
781+
782+
#[test]
783+
fn diagnose_failure_with_logs_uses_combined_text() {
784+
// Verify that diagnose_failure combines error_message + container_logs
785+
// for pattern matching. The pattern "connection refused" is in logs,
786+
// not in the error message.
787+
let diagnosis = diagnose_failure(
788+
"test",
789+
"K8s namespace not ready",
790+
Some("dial tcp 127.0.0.1:6443: connect: connection refused"),
791+
);
792+
assert!(
793+
diagnosis.is_some(),
794+
"expected diagnosis from container logs pattern"
795+
);
796+
let d = diagnosis.unwrap();
797+
assert!(
798+
d.summary.contains("Network") || d.summary.contains("connectivity"),
799+
"expected network diagnosis, got: {}",
800+
d.summary
801+
);
802+
}
803+
804+
// -- end-to-end fallback pattern (mirrors CLI code) --
805+
806+
#[test]
807+
fn fallback_to_generic_produces_actionable_diagnosis() {
808+
// This mirrors the actual CLI pattern:
809+
// diagnose_failure(...).unwrap_or_else(|| generic_failure_diagnosis(name))
810+
// For a plain namespace timeout with no useful container logs, the
811+
// specific matcher returns None and we must fall back to the generic
812+
// diagnosis that suggests doctor commands.
813+
let err_str = "K8s namespace not ready\n\nCaused by:\n \
814+
timed out waiting for namespace 'openshell' to exist: \
815+
error: the server doesn't have a resource type \"namespace\"";
816+
let container_logs = Some("k3s is starting\nwaiting for kube-apiserver");
817+
818+
let diagnosis = diagnose_failure("my-gw", err_str, container_logs)
819+
.unwrap_or_else(|| generic_failure_diagnosis("my-gw"));
820+
821+
// Should have gotten the generic diagnosis (no specific pattern matched)
822+
assert_eq!(diagnosis.summary, "Gateway failed to start");
823+
// Must contain actionable recovery steps
824+
assert!(
825+
!diagnosis.recovery_steps.is_empty(),
826+
"generic diagnosis should have recovery steps"
827+
);
828+
// Must mention doctor commands
829+
let all_commands: String = diagnosis
830+
.recovery_steps
831+
.iter()
832+
.filter_map(|s| s.command.as_ref())
833+
.cloned()
834+
.collect::<Vec<_>>()
835+
.join("\n");
836+
assert!(
837+
all_commands.contains("doctor logs"),
838+
"should suggest 'doctor logs', got: {all_commands}"
839+
);
840+
assert!(
841+
all_commands.contains("doctor check"),
842+
"should suggest 'doctor check', got: {all_commands}"
843+
);
844+
assert!(
845+
all_commands.contains("my-gw"),
846+
"commands should include gateway name, got: {all_commands}"
847+
);
848+
}
657849
}

0 commit comments

Comments
 (0)