Skip to content

Commit 5607fc2

Browse files
committed
fix(sandbox): defer symlink resolution until container filesystem is ready
The one-shot resolve ran immediately after ProcessHandle::spawn, before the child's mount namespace and /proc/<pid>/root/ were populated. This caused symlink_metadata to fail with ENOENT on every binary, and the poll loop never retried because it only reloads when the policy hash changes on the server. Replace the synchronous resolve with an async task that probes /proc/<pid>/root/ with retries (10 attempts, 500ms apart, 5s total). The child's mount namespace is typically ready within a few hundred ms. Also inline error values into warning message strings so they appear in default log output (not just as structured tracing fields that may be elided), and add debug-level logs before each symlink_metadata call to aid diagnosis.
1 parent 907b9fe commit 5607fc2

File tree

2 files changed

+73
-44
lines changed

2 files changed

+73
-44
lines changed

crates/openshell-sandbox/src/lib.rs

Lines changed: 54 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -714,30 +714,64 @@ pub async fn run_sandbox(
714714
.build()
715715
);
716716

717-
// Resolve policy binary symlinks now that the container filesystem is
718-
// accessible via /proc/<pid>/root/. This expands symlinks like
719-
// /usr/bin/python3 → /usr/bin/python3.11 in the OPA policy data so that
720-
// either path matches at evaluation time.
717+
// Spawn a task to resolve policy binary symlinks after the container
718+
// filesystem becomes accessible via /proc/<pid>/root/. This expands
719+
// symlinks like /usr/bin/python3 → /usr/bin/python3.11 in the OPA
720+
// policy data so that either path matches at evaluation time.
721721
//
722-
// If /proc/<pid>/root/ is inaccessible (restricted ptrace, rootless
723-
// container, etc.), resolve_binary_in_container logs a warning per binary
724-
// and falls back to literal path matching. The reload itself still
725-
// succeeds — only the symlink expansion is skipped.
722+
// We cannot do this synchronously here because the child process has
723+
// just been spawned and its mount namespace / procfs entries may not
724+
// be fully populated yet. Instead, we probe with retries until
725+
// /proc/<pid>/root/ is accessible or we exhaust attempts.
726726
if let (Some(engine), Some(proto)) = (&opa_engine, &retained_proto) {
727-
let pid = handle.pid();
728-
if let Err(e) = engine.reload_from_proto_with_pid(proto, pid) {
727+
let resolve_engine = engine.clone();
728+
let resolve_proto = proto.clone();
729+
let resolve_pid = entrypoint_pid.clone();
730+
tokio::spawn(async move {
731+
let pid = resolve_pid.load(Ordering::Acquire);
732+
let probe_path = format!("/proc/{pid}/root/");
733+
// Retry up to 10 times with 500ms intervals (5s total).
734+
// The child's mount namespace is typically ready within a
735+
// few hundred ms of spawn.
736+
for attempt in 1..=10 {
737+
tokio::time::sleep(Duration::from_millis(500)).await;
738+
if std::fs::metadata(&probe_path).is_ok() {
739+
info!(
740+
pid = pid,
741+
attempt = attempt,
742+
"Container filesystem accessible, resolving policy binary symlinks"
743+
);
744+
match resolve_engine.reload_from_proto_with_pid(&resolve_proto, pid) {
745+
Ok(()) => {
746+
info!(
747+
pid = pid,
748+
"Policy binary symlink resolution complete \
749+
(check logs above for per-binary results)"
750+
);
751+
}
752+
Err(e) => {
753+
warn!(
754+
"Failed to rebuild OPA engine with symlink resolution \
755+
(non-fatal, falling back to literal path matching): {e}"
756+
);
757+
}
758+
}
759+
return;
760+
}
761+
debug!(
762+
pid = pid,
763+
attempt = attempt,
764+
probe_path = %probe_path,
765+
"Container filesystem not yet accessible, retrying symlink resolution"
766+
);
767+
}
729768
warn!(
730-
error = %e,
731-
"Failed to rebuild OPA engine with symlink resolution (non-fatal, \
732-
falling back to literal path matching)"
733-
);
734-
} else {
735-
info!(
736-
pid = pid,
737-
"Policy binary symlink resolution attempted via container filesystem \
738-
(check logs above for per-binary results)"
769+
"Container filesystem /proc/{pid}/root/ not accessible after 10 attempts (5s); \
770+
binary symlink resolution skipped. Policy binary paths will be matched literally. \
771+
If binaries are symlinks, use canonical paths in your policy \
772+
(run 'readlink -f <path>' inside the sandbox)"
739773
);
740-
}
774+
});
741775
}
742776

743777
// Spawn background policy poll task (gRPC mode only).

crates/openshell-sandbox/src/opa.rs

Lines changed: 19 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -670,6 +670,10 @@ fn resolve_binary_in_container(policy_path: &str, entrypoint_pid: u32) -> Option
670670
for _ in 0..40 {
671671
let container_path = format!("/proc/{entrypoint_pid}/root{}", resolved.display());
672672

673+
tracing::debug!(
674+
"Symlink resolution: probing container_path={container_path} for policy_path={policy_path} pid={entrypoint_pid}"
675+
);
676+
673677
let meta = match std::fs::symlink_metadata(&container_path) {
674678
Ok(m) => m,
675679
Err(e) => {
@@ -678,23 +682,18 @@ fn resolve_binary_in_container(policy_path: &str, entrypoint_pid: u32) -> Option
678682
// legitimately not exist (broken symlink chain).
679683
if resolved.as_os_str() == policy_path {
680684
tracing::warn!(
681-
path = %policy_path,
682-
container_path = %container_path,
683-
pid = entrypoint_pid,
684-
error = %e,
685-
"Cannot access container filesystem for symlink resolution; \
686-
binary paths in policy will be matched literally. If a policy \
687-
binary is a symlink (e.g., /usr/bin/python3 -> python3.11), \
688-
use the canonical path instead, or run with CAP_SYS_PTRACE"
685+
"Cannot access container filesystem for symlink resolution: \
686+
path={policy_path} container_path={container_path} pid={entrypoint_pid} \
687+
error={e}. Binary paths in policy will be matched literally. \
688+
If this binary is a symlink (e.g., /usr/bin/python3 -> python3.11), \
689+
use the canonical path instead, or run with CAP_SYS_PTRACE."
689690
);
690691
} else {
691692
tracing::warn!(
692-
original = %policy_path,
693-
current = %resolved.display(),
694-
pid = entrypoint_pid,
695-
error = %e,
696-
"Symlink chain broken during resolution; \
697-
binary will be matched by original path only"
693+
"Symlink chain broken during resolution: \
694+
original={policy_path} current={} pid={entrypoint_pid} error={e}. \
695+
Binary will be matched by original path only.",
696+
resolved.display()
698697
);
699698
}
700699
return None;
@@ -710,12 +709,10 @@ fn resolve_binary_in_container(policy_path: &str, entrypoint_pid: u32) -> Option
710709
Ok(t) => t,
711710
Err(e) => {
712711
tracing::warn!(
713-
path = %policy_path,
714-
current = %resolved.display(),
715-
pid = entrypoint_pid,
716-
error = %e,
717-
"Symlink detected but read_link failed; \
718-
binary will be matched by original path only"
712+
"Symlink detected but read_link failed: \
713+
path={policy_path} current={} pid={entrypoint_pid} error={e}. \
714+
Binary will be matched by original path only.",
715+
resolved.display()
719716
);
720717
return None;
721718
}
@@ -740,10 +737,8 @@ fn resolve_binary_in_container(policy_path: &str, entrypoint_pid: u32) -> Option
740737
None
741738
} else {
742739
tracing::info!(
743-
original = %policy_path,
744-
resolved = %resolved_str,
745-
pid = entrypoint_pid,
746-
"Resolved policy binary symlink via container filesystem"
740+
"Resolved policy binary symlink via container filesystem: \
741+
original={policy_path} resolved={resolved_str} pid={entrypoint_pid}"
747742
);
748743
Some(resolved_str)
749744
}

0 commit comments

Comments
 (0)