Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions src/openhuman/agent_registry/agents/loader.rs
Original file line number Diff line number Diff line change
Expand Up @@ -291,13 +291,13 @@ pub const BUILTINS: &[BuiltinAgent] = &[
id: "frontend_agent",
toml: include_str!("../../orchestration/frontend_agent/agent.toml"),
prompt_fn: crate::openhuman::orchestration::frontend_agent::prompt::build,
graph_fn: crate::openhuman::orchestration::frontend_agent::graph::graph,
graph_fn: Some(crate::openhuman::orchestration::frontend_agent::graph::graph),
},
BuiltinAgent {
id: "reasoning_agent",
toml: include_str!("../../orchestration/reasoning_agent/agent.toml"),
prompt_fn: crate::openhuman::orchestration::reasoning_agent::prompt::build,
graph_fn: crate::openhuman::orchestration::reasoning_agent::graph::graph,
graph_fn: Some(crate::openhuman::orchestration::reasoning_agent::graph::graph),
},
];

Expand Down
17 changes: 9 additions & 8 deletions src/openhuman/app_state/ops.rs
Original file line number Diff line number Diff line change
Expand Up @@ -35,14 +35,15 @@ const LOG_PREFIX: &str = "[app_state]";
const APP_STATE_FILENAME: &str = "app-state.json";
const CURRENT_USER_REFRESH_TTL: Duration = Duration::from_secs(5);
// Runtime-status widgets (screen intelligence / local AI / autocomplete /
// service) tolerate ~10s of staleness. A short TTL (was 2s < the ~2.4s build
// time) meant the cache was stale before it was even written, so the frontend's
// ~4s `app_state_snapshot` poll never hit the fast path and every poll re-ran
// the full 4-way fan-out (issue #4249 profiling: this, combined with the lack
// of a single-flight gate, pegged ~2 cores and starved the shared tokio runtime
// the agent harness runs on — the agent's turns stalled 50-100s between model
// calls even though inference itself was idle).
const RUNTIME_SNAPSHOT_TTL: Duration = Duration::from_secs(10);
// service). The real fix for the rebuild stampede (issue #4249 profiling: ~2
// cores pegged, ~4s `app_state_snapshot` polls each launching their own 4-way
// fan-out, starving the shared tokio runtime the agent harness runs on) is the
// single-flight gate below — it collapses N concurrent rebuilds into one. The
// TTL only controls how often a *coalesced* rebuild runs, so a short window is
// fine and keeps the runtime status fresh. Kept at 2s so callers that need a
// post-change snapshot only have to age the cache out briefly (see the
// `app_state_snapshot_degrades_runtime_service_status_failures` e2e).
const RUNTIME_SNAPSHOT_TTL: Duration = Duration::from_secs(2);
const AUTH_FETCH_TIMEOUT: Duration = Duration::from_secs(5);
const RUNTIME_SNAPSHOT_TIMEOUT: Duration = Duration::from_secs(10);
const SNAPSHOT_SUB_OP_TIMEOUT: Duration = Duration::from_secs(5);
Expand Down
15 changes: 15 additions & 0 deletions src/openhuman/mcp_server/resources.rs
Original file line number Diff line number Diff line change
Expand Up @@ -271,6 +271,21 @@ const RESOURCE_CATALOG: &[PromptResource] = &[
description: "Sandboxed worker that runs installed skill packages.",
content: include_str!("../skill_runtime/agent/skill_executor/prompt.md"),
},
// ── Subconscious orchestration wake-graph agents (#4430) ───────────────
PromptResource {
uri: "openhuman://prompts/agents/frontend_agent",
name: "frontend_agent",
description: "Subconscious wake-graph worker that normalizes and routes \
an incoming channel message (command detection, reply framing).",
content: include_str!("../orchestration/frontend_agent/prompt.md"),
},
PromptResource {
uri: "openhuman://prompts/agents/reasoning_agent",
name: "reasoning_agent",
description: "Subconscious wake-graph worker that runs the reasoning / \
execution stage of a wake cycle.",
content: include_str!("../orchestration/reasoning_agent/prompt.md"),
},
];

/// Returns the `resources/list` result payload listing every catalog entry.
Expand Down
12 changes: 10 additions & 2 deletions src/openhuman/orchestration/graph/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ use tinyagents::graph::{

use crate::openhuman::config::Config;
use crate::openhuman::tinyagents::observability::GraphTracingSink;
use crate::openhuman::tinyagents::SqlRunLedgerCheckpointer;
use tinyagents::graph::SqliteCheckpointer;

pub use state::{CompressedEntry, OrchestrationState, WorldDiff, WorldDiffEntry};

Expand Down Expand Up @@ -381,7 +381,15 @@ pub async fn run_orchestration_graph(
let threshold = config.orchestration.effective_evict_threshold();
let thread_id = format!("orchestration:{}", state.session_id);
let label = thread_id.clone();
let checkpointer = Arc::new(SqlRunLedgerCheckpointer::<OrchestrationState>::new(config));
// Durable graph checkpoints ride the crate's `SqliteCheckpointer` (issue
// #4249, 04.3) at a dedicated `orchestration_checkpoints.db` under the
// workspace — the old run-ledger `SqlRunLedgerCheckpointer` was retired, so
// pre-swap in-flight wake graphs simply expire.
let checkpoint_db = config.workspace_dir.join("orchestration_checkpoints.db");
let checkpointer = Arc::new(
SqliteCheckpointer::<OrchestrationState>::open(&checkpoint_db)
.map_err(|e| anyhow::anyhow!("open orchestration checkpoint store: {e}"))?,
);

tracing::debug!(
target: LOG, session_id = %state.session_id, %thread_id,
Expand Down
4 changes: 2 additions & 2 deletions src/openhuman/orchestration/graph/state.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,8 @@
//! `OrchestrationState` is the spec's single `StateGraph` state object: one
//! value flows through the whole wake path (normalize → frontend → execute →
//! frontend → send_dm → context_guard → END) and is checkpointed at every
//! super-step boundary by [`SqlRunLedgerCheckpointer`](crate::openhuman::tinyagents::SqlRunLedgerCheckpointer)
//! under the thread id `orchestration:<session_id>`.
//! super-step boundary by the crate `tinyagents::graph::SqliteCheckpointer`
//! (issue #4249, 04.3) under the thread id `orchestration:<session_id>`.
//!
//! Every field is serde-serializable so a mid-cycle crash can resume from the
//! last persisted boundary with an identical state. Fields the later stages own
Expand Down
9 changes: 7 additions & 2 deletions src/openhuman/orchestration/ops.rs
Original file line number Diff line number Diff line change
Expand Up @@ -727,9 +727,9 @@ impl OrchestrationRuntime for ProductionRuntime {
mod tests {
use super::*;
use crate::openhuman::orchestration::types::OrchestrationMessage;
use crate::openhuman::tinyagents::SqlRunLedgerCheckpointer;
use std::sync::atomic::{AtomicUsize, Ordering};
use tinyagents::graph::checkpoint::Checkpointer;
use tinyagents::graph::SqliteCheckpointer;

fn test_config(tmp: &tempfile::TempDir) -> Config {
Config {
Expand Down Expand Up @@ -910,7 +910,12 @@ mod tests {
.unwrap();

// Checkpoints persisted → kill/restart could resume without re-sending.
let cp = SqlRunLedgerCheckpointer::<OrchestrationState>::new(config);
// Opens the same dedicated `orchestration_checkpoints.db` the wake graph
// writes to (issue #4249, 04.3 — crate `SqliteCheckpointer`).
let cp = SqliteCheckpointer::<OrchestrationState>::open(
&config.workspace_dir.join("orchestration_checkpoints.db"),
)
.expect("open orchestration checkpoint store");
let list = cp.list("orchestration:h1").await.expect("list checkpoints");
assert!(!list.is_empty(), "wake cycle persisted checkpoints");
}
Expand Down
Loading