tinyhumansai · senamakel · Jul 3, 2026 · Jul 3, 2026 · Jul 3, 2026
diff --git a/src/openhuman/agent_registry/agents/loader.rs b/src/openhuman/agent_registry/agents/loader.rs
@@ -291,13 +291,13 @@ pub const BUILTINS: &[BuiltinAgent] = &[
         id: "frontend_agent",
         toml: include_str!("../../orchestration/frontend_agent/agent.toml"),
         prompt_fn: crate::openhuman::orchestration::frontend_agent::prompt::build,
-        graph_fn: crate::openhuman::orchestration::frontend_agent::graph::graph,
+        graph_fn: Some(crate::openhuman::orchestration::frontend_agent::graph::graph),
     },
     BuiltinAgent {
         id: "reasoning_agent",
         toml: include_str!("../../orchestration/reasoning_agent/agent.toml"),
         prompt_fn: crate::openhuman::orchestration::reasoning_agent::prompt::build,
-        graph_fn: crate::openhuman::orchestration::reasoning_agent::graph::graph,
+        graph_fn: Some(crate::openhuman::orchestration::reasoning_agent::graph::graph),
     },
 ];
 

diff --git a/src/openhuman/app_state/ops.rs b/src/openhuman/app_state/ops.rs
@@ -35,14 +35,15 @@ const LOG_PREFIX: &str = "[app_state]";
 const APP_STATE_FILENAME: &str = "app-state.json";
 const CURRENT_USER_REFRESH_TTL: Duration = Duration::from_secs(5);
 // Runtime-status widgets (screen intelligence / local AI / autocomplete /
-// service) tolerate ~10s of staleness. A short TTL (was 2s < the ~2.4s build
-// time) meant the cache was stale before it was even written, so the frontend's
-// ~4s `app_state_snapshot` poll never hit the fast path and every poll re-ran
-// the full 4-way fan-out (issue #4249 profiling: this, combined with the lack
-// of a single-flight gate, pegged ~2 cores and starved the shared tokio runtime
-// the agent harness runs on — the agent's turns stalled 50-100s between model
-// calls even though inference itself was idle).
-const RUNTIME_SNAPSHOT_TTL: Duration = Duration::from_secs(10);
+// service). The real fix for the rebuild stampede (issue #4249 profiling: ~2
+// cores pegged, ~4s `app_state_snapshot` polls each launching their own 4-way
+// fan-out, starving the shared tokio runtime the agent harness runs on) is the
+// single-flight gate below — it collapses N concurrent rebuilds into one. The
+// TTL only controls how often a *coalesced* rebuild runs, so a short window is
+// fine and keeps the runtime status fresh. Kept at 2s so callers that need a
+// post-change snapshot only have to age the cache out briefly (see the
+// `app_state_snapshot_degrades_runtime_service_status_failures` e2e).
+const RUNTIME_SNAPSHOT_TTL: Duration = Duration::from_secs(2);
 const AUTH_FETCH_TIMEOUT: Duration = Duration::from_secs(5);
 const RUNTIME_SNAPSHOT_TIMEOUT: Duration = Duration::from_secs(10);
 const SNAPSHOT_SUB_OP_TIMEOUT: Duration = Duration::from_secs(5);

diff --git a/src/openhuman/mcp_server/resources.rs b/src/openhuman/mcp_server/resources.rs
@@ -271,6 +271,21 @@ const RESOURCE_CATALOG: &[PromptResource] = &[
         description: "Sandboxed worker that runs installed skill packages.",
         content: include_str!("../skill_runtime/agent/skill_executor/prompt.md"),
     },
+    // ── Subconscious orchestration wake-graph agents (#4430) ───────────────
+    PromptResource {
+        uri: "openhuman://prompts/agents/frontend_agent",
+        name: "frontend_agent",
+        description: "Subconscious wake-graph worker that normalizes and routes \
+                      an incoming channel message (command detection, reply framing).",
+        content: include_str!("../orchestration/frontend_agent/prompt.md"),
+    },
+    PromptResource {
+        uri: "openhuman://prompts/agents/reasoning_agent",
+        name: "reasoning_agent",
+        description: "Subconscious wake-graph worker that runs the reasoning / \
+                      execution stage of a wake cycle.",
+        content: include_str!("../orchestration/reasoning_agent/prompt.md"),
+    },
 ];
 
 /// Returns the `resources/list` result payload listing every catalog entry.

diff --git a/src/openhuman/orchestration/graph/mod.rs b/src/openhuman/orchestration/graph/mod.rs
@@ -39,7 +39,7 @@ use tinyagents::graph::{
 
 use crate::openhuman::config::Config;
 use crate::openhuman::tinyagents::observability::GraphTracingSink;
-use crate::openhuman::tinyagents::SqlRunLedgerCheckpointer;
+use tinyagents::graph::SqliteCheckpointer;
 
 pub use state::{CompressedEntry, OrchestrationState, WorldDiff, WorldDiffEntry};
 
@@ -381,7 +381,15 @@ pub async fn run_orchestration_graph(
     let threshold = config.orchestration.effective_evict_threshold();
     let thread_id = format!("orchestration:{}", state.session_id);
     let label = thread_id.clone();
-    let checkpointer = Arc::new(SqlRunLedgerCheckpointer::<OrchestrationState>::new(config));
+    // Durable graph checkpoints ride the crate's `SqliteCheckpointer` (issue
+    // #4249, 04.3) at a dedicated `orchestration_checkpoints.db` under the
+    // workspace — the old run-ledger `SqlRunLedgerCheckpointer` was retired, so
+    // pre-swap in-flight wake graphs simply expire.
+    let checkpoint_db = config.workspace_dir.join("orchestration_checkpoints.db");
+    let checkpointer = Arc::new(
+        SqliteCheckpointer::<OrchestrationState>::open(&checkpoint_db)
+            .map_err(|e| anyhow::anyhow!("open orchestration checkpoint store: {e}"))?,
+    );
 
     tracing::debug!(
         target: LOG, session_id = %state.session_id, %thread_id,

diff --git a/src/openhuman/orchestration/graph/state.rs b/src/openhuman/orchestration/graph/state.rs
@@ -3,8 +3,8 @@
 //! `OrchestrationState` is the spec's single `StateGraph` state object: one
 //! value flows through the whole wake path (normalize → frontend → execute →
 //! frontend → send_dm → context_guard → END) and is checkpointed at every
-//! super-step boundary by [`SqlRunLedgerCheckpointer`](crate::openhuman::tinyagents::SqlRunLedgerCheckpointer)
-//! under the thread id `orchestration:<session_id>`.
+//! super-step boundary by the crate `tinyagents::graph::SqliteCheckpointer`
+//! (issue #4249, 04.3) under the thread id `orchestration:<session_id>`.
 //!
 //! Every field is serde-serializable so a mid-cycle crash can resume from the
 //! last persisted boundary with an identical state. Fields the later stages own

diff --git a/src/openhuman/orchestration/ops.rs b/src/openhuman/orchestration/ops.rs
@@ -727,9 +727,9 @@ impl OrchestrationRuntime for ProductionRuntime {
 mod tests {
     use super::*;
     use crate::openhuman::orchestration::types::OrchestrationMessage;
-    use crate::openhuman::tinyagents::SqlRunLedgerCheckpointer;
     use std::sync::atomic::{AtomicUsize, Ordering};
     use tinyagents::graph::checkpoint::Checkpointer;
+    use tinyagents::graph::SqliteCheckpointer;
 
     fn test_config(tmp: &tempfile::TempDir) -> Config {
         Config {
@@ -910,7 +910,12 @@ mod tests {
         .unwrap();
 
         // Checkpoints persisted → kill/restart could resume without re-sending.
-        let cp = SqlRunLedgerCheckpointer::<OrchestrationState>::new(config);
+        // Opens the same dedicated `orchestration_checkpoints.db` the wake graph
+        // writes to (issue #4249, 04.3 — crate `SqliteCheckpointer`).
+        let cp = SqliteCheckpointer::<OrchestrationState>::open(
+            &config.workspace_dir.join("orchestration_checkpoints.db"),
+        )
+        .expect("open orchestration checkpoint store");
         let list = cp.list("orchestration:h1").await.expect("list checkpoints");
         assert!(!list.is_empty(), "wake cycle persisted checkpoints");
     }