Skip to content

Commit 0815f82

Browse files
koikerjohntmyers
andauthored
perf(sandbox): streaming SHA256 and spawn_blocking for identity resolution (#555)
* perf(sandbox): streaming SHA256, spawn_blocking for identity resolution Key changes: - Replace full file read + SHA256 with streaming 64KB-buffered hash (saves 124MB allocation for node binary) - Wrap evaluate_opa_tcp in spawn_blocking to prevent blocking tokio runtime during heavy /proc I/O and SHA256 computation - Add file-based perf logging for profiling proxy latency phases Profiling data (node binary, 124MB): - Cold TOFU: ~890ms (read+hash), warm: 0ms (cache hit) - evaluate_opa_tcp: cold=1002ms, warm=11ms - OPA evaluation: 1ms - DNS+TCP connect: 166-437ms Made-with: Cursor Signed-off-by: Rafael Koike <koike.rafael@gmail.com> * refactor(sandbox): replace perf_log with tracing::debug Replace the custom file-based perf_log() helper with standard tracing::debug!() macros as requested in PR review. This removes the custom log file writes to /var/log/openshell-perf.log and routes all performance timing through the tracing framework at DEBUG level, consistent with the rest of the codebase. Made-with: Cursor * refactor(sandbox): reduce tracing to 6 key diagnostic logs Address PR review feedback: 1. Remove ~20 inner-phase timing logs, keeping only the 6 that tell the full diagnostic story: - evaluate_opa_tcp TOTAL (proxy.rs) - dns_resolve_and_tcp_connect (proxy.rs) - file_sha256 (procfs.rs) - verify_or_cache CACHE HIT / CACHE MISS / TOTAL cold (identity.rs) 2. Restore intent-describing comments that were replaced by timing logs: - "TOFU verify the immediate binary" (proxy.rs) - "Walk the process tree upward to collect ancestor binaries" (proxy.rs) - "Collect cmdline paths for script-based binary detection." (proxy.rs) - "First: scan descendants of the entrypoint process" (procfs.rs) - "Fallback: scan all of /proc in case the process isn't in the tree" (procfs.rs) - "Skip PIDs we already checked" (procfs.rs) 3. Preserve file path in file_sha256 read errors instead of losing context via into_diagnostic(). 4. Tests: 293 passed, 1 pre-existing failure (drop_privileges), 1 ignored. Made-with: Cursor * style(sandbox): apply rustfmt formatting to debug macros --------- Signed-off-by: Rafael Koike <koike.rafael@gmail.com> Co-authored-by: John Myers <johntmyers@users.noreply.github.com>
1 parent e8950e6 commit 0815f82

File tree

3 files changed

+98
-26
lines changed

3 files changed

+98
-26
lines changed

crates/openshell-sandbox/src/identity.rs

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@ use std::fs::Metadata;
1616
use std::os::unix::fs::MetadataExt;
1717
use std::path::{Path, PathBuf};
1818
use std::sync::Mutex;
19+
use tracing::debug;
1920

2021
#[derive(Clone)]
2122
struct FileFingerprint {
@@ -100,6 +101,7 @@ impl BinaryIdentityCache {
100101
where
101102
F: FnMut(&Path) -> Result<String>,
102103
{
104+
let start = std::time::Instant::now();
103105
let metadata = std::fs::metadata(path)
104106
.map_err(|error| miette::miette!("Failed to stat {}: {error}", path.display()))?;
105107
let fingerprint = FileFingerprint::from_metadata(&metadata);
@@ -114,9 +116,20 @@ impl BinaryIdentityCache {
114116
if let Some(cached_binary) = &cached
115117
&& cached_binary.fingerprint == fingerprint
116118
{
119+
debug!(
120+
" verify_or_cache: {}ms CACHE HIT path={}",
121+
start.elapsed().as_millis(),
122+
path.display()
123+
);
117124
return Ok(cached_binary.hash.clone());
118125
}
119126

127+
debug!(
128+
" verify_or_cache: CACHE MISS size={} path={}",
129+
metadata.len(),
130+
path.display()
131+
);
132+
120133
let current_hash = hash_file(path)?;
121134

122135
let mut hashes = self
@@ -143,6 +156,12 @@ impl BinaryIdentityCache {
143156
},
144157
);
145158

159+
debug!(
160+
" verify_or_cache TOTAL (cold): {}ms path={}",
161+
start.elapsed().as_millis(),
162+
path.display()
163+
);
164+
146165
Ok(current_hash)
147166
}
148167
}

crates/openshell-sandbox/src/procfs.rs

Lines changed: 29 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -6,10 +6,11 @@
66
//! Provides functions to resolve binary paths and compute file hashes
77
//! for process-identity binding in the OPA proxy policy engine.
88
9-
use miette::{IntoDiagnostic, Result};
9+
use miette::Result;
1010
use std::path::Path;
1111
#[cfg(target_os = "linux")]
1212
use std::path::PathBuf;
13+
use tracing::debug;
1314

1415
/// Read the binary path of a process via `/proc/{pid}/exe` symlink.
1516
///
@@ -229,16 +230,16 @@ fn parse_proc_net_tcp(pid: u32, peer_port: u16) -> Result<u64> {
229230
fn find_pid_by_socket_inode(inode: u64, entrypoint_pid: u32) -> Result<u32> {
230231
let target = format!("socket:[{inode}]");
231232

232-
// First: scan descendants of the entrypoint process (targeted, most likely to succeed)
233+
// First: scan descendants of the entrypoint process
233234
let descendants = collect_descendant_pids(entrypoint_pid);
235+
234236
for &pid in &descendants {
235237
if let Some(found) = check_pid_fds(pid, &target) {
236238
return Ok(found);
237239
}
238240
}
239241

240242
// Fallback: scan all of /proc in case the process isn't in the tree
241-
// (e.g., if /proc/<pid>/task/<tid>/children wasn't available)
242243
if let Ok(proc_dir) = std::fs::read_dir("/proc") {
243244
for entry in proc_dir.flatten() {
244245
let name = entry.file_name();
@@ -318,9 +319,32 @@ fn collect_descendant_pids(root_pid: u32) -> Vec<u32> {
318319
/// same hash, or the request is denied.
319320
pub fn file_sha256(path: &Path) -> Result<String> {
320321
use sha2::{Digest, Sha256};
322+
use std::io::Read;
323+
324+
let start = std::time::Instant::now();
325+
let mut file = std::fs::File::open(path)
326+
.map_err(|e| miette::miette!("Failed to open {}: {e}", path.display()))?;
327+
let mut hasher = Sha256::new();
328+
let mut buf = [0u8; 65536];
329+
let mut total_read = 0u64;
330+
loop {
331+
let n = file
332+
.read(&mut buf)
333+
.map_err(|e| miette::miette!("Failed to read {}: {e}", path.display()))?;
334+
if n == 0 {
335+
break;
336+
}
337+
total_read += n as u64;
338+
hasher.update(&buf[..n]);
339+
}
321340

322-
let bytes = std::fs::read(path).into_diagnostic()?;
323-
let hash = Sha256::digest(&bytes);
341+
let hash = hasher.finalize();
342+
debug!(
343+
" file_sha256: {}ms size={} path={}",
344+
start.elapsed().as_millis(),
345+
total_read,
346+
path.display()
347+
);
324348
Ok(hex::encode(hash))
325349
}
326350

crates/openshell-sandbox/src/proxy.rs

Lines changed: 50 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -336,15 +336,25 @@ async fn handle_tcp_connection(
336336
let peer_addr = client.peer_addr().into_diagnostic()?;
337337
let local_addr = client.local_addr().into_diagnostic()?;
338338

339-
// Evaluate OPA policy with process-identity binding
340-
let decision = evaluate_opa_tcp(
341-
peer_addr,
342-
&opa_engine,
343-
&identity_cache,
344-
&entrypoint_pid,
345-
&host_lc,
346-
port,
347-
);
339+
// Evaluate OPA policy with process-identity binding.
340+
// Wrapped in spawn_blocking because identity resolution does heavy sync I/O:
341+
// /proc scanning + SHA256 hashing of binaries (e.g. node at 124MB).
342+
let opa_clone = opa_engine.clone();
343+
let cache_clone = identity_cache.clone();
344+
let pid_clone = entrypoint_pid.clone();
345+
let host_clone = host_lc.clone();
346+
let decision = tokio::task::spawn_blocking(move || {
347+
evaluate_opa_tcp(
348+
peer_addr,
349+
&opa_clone,
350+
&cache_clone,
351+
&pid_clone,
352+
&host_clone,
353+
port,
354+
)
355+
})
356+
.await
357+
.map_err(|e| miette::miette!("identity resolution task panicked: {e}"))?;
348358

349359
// Extract action string and matched policy for logging
350360
let (matched_policy, deny_reason) = match &decision.action {
@@ -426,6 +436,7 @@ async fn handle_tcp_connection(
426436
}
427437

428438
// Defense-in-depth: resolve DNS and reject connections to internal IPs.
439+
let dns_connect_start = std::time::Instant::now();
429440
let mut upstream = if !raw_allowed_ips.is_empty() {
430441
// allowed_ips mode: validate resolved IPs against CIDR allowlist.
431442
// Loopback and link-local are still always blocked.
@@ -502,6 +513,11 @@ async fn handle_tcp_connection(
502513
}
503514
};
504515

516+
debug!(
517+
"handle_tcp_connection dns_resolve_and_tcp_connect: {}ms host={host_lc}",
518+
dns_connect_start.elapsed().as_millis()
519+
);
520+
505521
respond(&mut client, b"HTTP/1.1 200 Connection Established\r\n\r\n").await?;
506522

507523
// Check if endpoint has L7 config for protocol-aware inspection
@@ -736,7 +752,9 @@ fn evaluate_opa_tcp(
736752
);
737753
}
738754

755+
let total_start = std::time::Instant::now();
739756
let peer_port = peer_addr.port();
757+
740758
let (bin_path, binary_pid) = match crate::procfs::resolve_tcp_peer_identity(pid, peer_port) {
741759
Ok(r) => r,
742760
Err(e) => {
@@ -767,7 +785,6 @@ fn evaluate_opa_tcp(
767785
// Walk the process tree upward to collect ancestor binaries
768786
let ancestors = crate::procfs::collect_ancestor_binaries(binary_pid, pid);
769787

770-
// TOFU verify each ancestor binary
771788
for ancestor in &ancestors {
772789
if let Err(e) = identity_cache.verify_or_cache(ancestor) {
773790
return deny(
@@ -784,7 +801,6 @@ fn evaluate_opa_tcp(
784801
}
785802

786803
// Collect cmdline paths for script-based binary detection.
787-
// Excludes exe paths already captured in bin_path/ancestors to avoid duplicates.
788804
let mut exclude = ancestors.clone();
789805
exclude.push(bin_path.clone());
790806
let cmdline_paths = crate::procfs::collect_cmdline_paths(binary_pid, pid, &exclude);
@@ -798,7 +814,7 @@ fn evaluate_opa_tcp(
798814
cmdline_paths: cmdline_paths.clone(),
799815
};
800816

801-
match engine.evaluate_network_action(&input) {
817+
let result = match engine.evaluate_network_action(&input) {
802818
Ok(action) => ConnectDecision {
803819
action,
804820
binary: Some(bin_path),
@@ -813,7 +829,12 @@ fn evaluate_opa_tcp(
813829
ancestors,
814830
cmdline_paths,
815831
),
816-
}
832+
};
833+
debug!(
834+
"evaluate_opa_tcp TOTAL: {}ms host={host} port={port}",
835+
total_start.elapsed().as_millis()
836+
);
837+
result
817838
}
818839

819840
/// Non-Linux stub: OPA identity binding requires /proc.
@@ -1728,14 +1749,22 @@ async fn handle_forward_proxy(
17281749
let peer_addr = client.peer_addr().into_diagnostic()?;
17291750
let local_addr = client.local_addr().into_diagnostic()?;
17301751

1731-
let decision = evaluate_opa_tcp(
1732-
peer_addr,
1733-
&opa_engine,
1734-
&identity_cache,
1735-
&entrypoint_pid,
1736-
&host_lc,
1737-
port,
1738-
);
1752+
let opa_clone = opa_engine.clone();
1753+
let cache_clone = identity_cache.clone();
1754+
let pid_clone = entrypoint_pid.clone();
1755+
let host_clone = host_lc.clone();
1756+
let decision = tokio::task::spawn_blocking(move || {
1757+
evaluate_opa_tcp(
1758+
peer_addr,
1759+
&opa_clone,
1760+
&cache_clone,
1761+
&pid_clone,
1762+
&host_clone,
1763+
port,
1764+
)
1765+
})
1766+
.await
1767+
.map_err(|e| miette::miette!("identity resolution task panicked: {e}"))?;
17391768

17401769
// Build log context
17411770
let binary_str = decision

0 commit comments

Comments
 (0)