diff --git a/cli.py b/cli.py index 3dc019f54f0..ac7a9741e22 100644 --- a/cli.py +++ b/cli.py @@ -70,7 +70,7 @@ # Load .env from ~/.hermes/.env first, then project root as dev fallback. # User-managed env files should override stale shell exports on restart. -from hermes_constants import get_hermes_home, OPENROUTER_BASE_URL +from hermes_constants import get_hermes_home, display_hermes_home, OPENROUTER_BASE_URL from hermes_cli.env_loader import load_hermes_dotenv _hermes_home = get_hermes_home() @@ -144,8 +144,8 @@ def load_cli_config() -> Dict[str, Any]: # Default configuration defaults = { "model": { - "default": "anthropic/claude-opus-4.6", - "base_url": OPENROUTER_BASE_URL, + "default": "", + "base_url": "", "provider": "auto", }, "terminal": { @@ -205,6 +205,7 @@ def load_cli_config() -> Dict[str, Any]: "resume_display": "full", "show_reasoning": False, "streaming": True, + "busy_input_mode": "interrupt", "skin": "default", }, @@ -261,6 +262,29 @@ def load_cli_config() -> Dict[str, Any]: elif isinstance(file_config["model"], dict): # Old format: model is a dict with default/base_url defaults["model"].update(file_config["model"]) + # If the user config sets model.model but not model.default, + # promote model.model to model.default so the user's explicit + # choice isn't shadowed by the hardcoded default. Without this, + # profile configs that only set "model:" (not "default:") silently + # fall back to claude-opus because the merge preserves the + # hardcoded default and HermesCLI.__init__ checks "default" first. + if "model" in file_config["model"] and "default" not in file_config["model"]: + defaults["model"]["default"] = file_config["model"]["model"] + + # Legacy root-level provider/base_url fallback. + # Some users (or old code) put provider: / base_url: at the + # config root instead of inside the model: section. These are + # only used as a FALLBACK when model.provider / model.base_url + # is not already set — never as an override. The canonical + # location is model.provider (written by `hermes model`). + if not defaults["model"].get("provider"): + root_provider = file_config.get("provider") + if root_provider: + defaults["model"]["provider"] = root_provider + if not defaults["model"].get("base_url"): + root_base_url = file_config.get("base_url") + if root_base_url: + defaults["model"]["base_url"] = root_base_url # Deep merge file_config into defaults. # First: merge keys that exist in both (deep-merge dicts, overwrite scalars) @@ -436,6 +460,25 @@ def load_cli_config() -> Dict[str, Any]: except Exception: pass # Skin engine is optional — default skin used if unavailable +# Initialize tool preview length from config +try: + from agent.display import set_tool_preview_max_len + _tpl = CLI_CONFIG.get("display", {}).get("tool_preview_length", 0) + set_tool_preview_max_len(int(_tpl) if _tpl else 0) +except Exception: + pass + +# Neuter AsyncHttpxClientWrapper.__del__ before any AsyncOpenAI clients are +# created. The SDK's __del__ schedules aclose() on asyncio.get_running_loop() +# which, during CLI idle time, finds prompt_toolkit's event loop and tries to +# close TCP transports bound to dead worker loops — producing +# "Event loop is closed" / "Press ENTER to continue..." errors. +try: + from agent.auxiliary_client import neuter_async_httpx_del + neuter_async_httpx_del() +except Exception: + pass + from rich import box as rich_box from rich.console import Console from rich.markup import escape as _escape @@ -465,6 +508,8 @@ def load_cli_config() -> Dict[str, Any]: # Guard to prevent cleanup from running multiple times on exit _cleanup_done = False +# Weak reference to the active AIAgent for memory provider shutdown at exit +_active_agent_ref = None def _run_cleanup(): """Run resource cleanup exactly once.""" @@ -493,6 +538,15 @@ def _run_cleanup(): shutdown_cached_clients() except Exception: pass + # Shut down memory provider (on_session_end + shutdown_all) at actual + # session boundary — NOT per-turn inside run_conversation(). + try: + if _active_agent_ref and hasattr(_active_agent_ref, 'shutdown_memory_provider'): + _active_agent_ref.shutdown_memory_provider( + getattr(_active_agent_ref, 'conversation_history', None) or [] + ) + except Exception: + pass # ============================================================================= @@ -787,6 +841,63 @@ def _cprint(text: str): _pt_print(_PT_ANSI(text)) +# --------------------------------------------------------------------------- +# File-drop detection — extracted as a pure function for testability. +# --------------------------------------------------------------------------- + +_IMAGE_EXTENSIONS = frozenset({ + '.png', '.jpg', '.jpeg', '.gif', '.webp', + '.bmp', '.tiff', '.tif', '.svg', '.ico', +}) + + +def _detect_file_drop(user_input: str) -> "dict | None": + """Detect if *user_input* is a dragged/pasted file path, not a slash command. + + When a user drags a file into the terminal, macOS pastes the absolute path + (e.g. ``/Users/roland/Desktop/file.png``) which starts with ``/`` and would + otherwise be mistaken for a slash command. + + Returns a dict on match:: + + { + "path": Path, # resolved file path + "is_image": bool, # True when suffix is a known image type + "remainder": str, # any text after the path + } + + Returns ``None`` when the input is not a real file path. + """ + if not isinstance(user_input, str) or not user_input.startswith("/"): + return None + + # Walk the string absorbing backslash-escaped spaces ("\ "). + raw = user_input + pos = 0 + while pos < len(raw): + ch = raw[pos] + if ch == '\\' and pos + 1 < len(raw) and raw[pos + 1] == ' ': + pos += 2 # skip escaped space + elif ch == ' ': + break + else: + pos += 1 + + first_token_raw = raw[:pos] + first_token = first_token_raw.replace('\\ ', ' ') + drop_path = Path(first_token) + + if not drop_path.exists() or not drop_path.is_file(): + return None + + remainder = raw[pos:].strip() + return { + "path": drop_path, + "is_image": drop_path.suffix.lower() in _IMAGE_EXTENSIONS, + "remainder": remainder, + } + + class ChatConsole: """Rich Console adapter for prompt_toolkit's patch_stdout context. @@ -872,6 +983,28 @@ def _build_compact_banner() -> str: +# ============================================================================ +# Slash-command detection helper +# ============================================================================ + +def _looks_like_slash_command(text: str) -> bool: + """Return True if *text* looks like a slash command, not a file path. + + Slash commands are ``/help``, ``/model gpt-4``, ``/q``, etc. + File paths like ``/Users/ironin/file.md:45-46 can you fix this?`` + also start with ``/`` but contain additional ``/`` characters in + the first whitespace-delimited word. This helper distinguishes + the two so that pasted paths are sent to the agent instead of + triggering "Unknown command". + """ + if not text or not text.startswith("/"): + return False + first_word = text.split()[0] + # After stripping the leading /, a command name has no slashes. + # A path like /Users/foo/bar.md always does. + return "/" not in first_word[1:] + + # ============================================================================ # Skill Slash Commands — dynamic commands generated from installed skills # ============================================================================ @@ -959,9 +1092,10 @@ def save_config_value(key_path: str, value: any) -> bool: current = current[key] current[keys[-1]] = value - # Save back - with open(config_path, 'w') as f: - yaml.dump(config, f, default_flow_style=False, sort_keys=False) + # Save back atomically — write to temp file + fsync + os.replace + # so an interrupt never leaves config.yaml truncated or empty. + from utils import atomic_yaml_write + atomic_yaml_write(config_path, config) # Enforce owner-only permissions on config files (contain API keys) try: @@ -1023,25 +1157,34 @@ def __init__( self.config = CLI_CONFIG self.compact = compact if compact is not None else CLI_CONFIG["display"].get("compact", False) # tool_progress: "off", "new", "all", "verbose" (from config.yaml display section) - self.tool_progress_mode = CLI_CONFIG["display"].get("tool_progress", "all") + # YAML 1.1 parses bare `off` as boolean False — normalise to string. + _raw_tp = CLI_CONFIG["display"].get("tool_progress", "all") + self.tool_progress_mode = "off" if _raw_tp is False else str(_raw_tp) # resume_display: "full" (show history) | "minimal" (one-liner only) self.resume_display = CLI_CONFIG["display"].get("resume_display", "full") # bell_on_complete: play terminal bell (\a) when agent finishes a response self.bell_on_complete = CLI_CONFIG["display"].get("bell_on_complete", False) # show_reasoning: display model thinking/reasoning before the response self.show_reasoning = CLI_CONFIG["display"].get("show_reasoning", False) + # busy_input_mode: "interrupt" (Enter interrupts current run) or "queue" (Enter queues for next turn) + _bim = CLI_CONFIG["display"].get("busy_input_mode", "interrupt") + self.busy_input_mode = "queue" if str(_bim).strip().lower() == "queue" else "interrupt" self.verbose = verbose if verbose is not None else (self.tool_progress_mode == "verbose") # streaming: stream tokens to the terminal as they arrive (display.streaming in config.yaml) self.streaming_enabled = CLI_CONFIG["display"].get("streaming", False) + # Inline diff previews for write actions (display.inline_diffs in config.yaml) + self._inline_diffs_enabled = CLI_CONFIG["display"].get("inline_diffs", True) + # Streaming display state self._stream_buf = "" # Partial line buffer for line-buffered rendering self._stream_started = False # True once first delta arrives self._stream_box_opened = False # True once the response box header is printed self._reasoning_stream_started = False # True once live reasoning starts streaming self._reasoning_preview_buf = "" # Coalesce tiny reasoning chunks for [thinking] output + self._pending_edit_snapshots = {} # Configuration - priority: CLI args > env vars > config file # Model comes from: CLI arg or config.yaml (single source of truth). @@ -1049,12 +1192,12 @@ def __init__( # authoritative. This avoids conflicts in multi-agent setups where # env vars would stomp each other. _model_config = CLI_CONFIG.get("model", {}) - _config_model = _model_config.get("default", "") if isinstance(_model_config, dict) else (_model_config or "") - _FALLBACK_MODEL = "anthropic/claude-opus-4.6" - self.model = model or _config_model or _FALLBACK_MODEL - # Auto-detect model from local server if still on fallback - if self.model == _FALLBACK_MODEL: - _base_url = _model_config.get("base_url", "") if isinstance(_model_config, dict) else "" + _config_model = (_model_config.get("default") or _model_config.get("model") or "") if isinstance(_model_config, dict) else (_model_config or "") + _DEFAULT_CONFIG_MODEL = "" + self.model = model or _config_model or _DEFAULT_CONFIG_MODEL + # Auto-detect model from local server if still on default + if self.model == _DEFAULT_CONFIG_MODEL: + _base_url = (_model_config.get("base_url") or "") if isinstance(_model_config, dict) else "" if "localhost" in _base_url or "127.0.0.1" in _base_url: from hermes_cli.runtime_provider import _auto_detect_local_model _detected = _auto_detect_local_model(_base_url) @@ -1067,7 +1210,7 @@ def __init__( # explicit choice — the user just never changed it. But a config model # like "gpt-5.3-codex" IS explicit and must be preserved. self._model_is_default = not model and ( - not _config_model or _config_model == _FALLBACK_MODEL + not _config_model or _config_model == _DEFAULT_CONFIG_MODEL ) self._explicit_api_key = api_key @@ -1087,9 +1230,9 @@ def __init__( self.acp_args: list[str] = [] self.base_url = ( base_url - or os.getenv("OPENAI_BASE_URL") - or os.getenv("OPENROUTER_BASE_URL", CLI_CONFIG["model"]["base_url"]) - ) + or CLI_CONFIG["model"].get("base_url", "") + or os.getenv("OPENROUTER_BASE_URL", "") + ) or None # Match key to resolved base_url: OpenRouter URL → prefer OPENROUTER_API_KEY, # custom endpoint → prefer OPENAI_API_KEY (issue #560). # Note: _ensure_runtime_credentials() re-resolves this before first use. @@ -1153,9 +1296,13 @@ def __init__( self._provider_require_params = pr.get("require_parameters", False) self._provider_data_collection = pr.get("data_collection") - # Fallback model config — tried when primary provider fails after retries - fb = CLI_CONFIG.get("fallback_model") or {} - self._fallback_model = fb if fb.get("provider") and fb.get("model") else None + # Fallback provider chain — tried in order when primary fails after retries. + # Supports new list format (fallback_providers) and legacy single-dict (fallback_model). + fb = CLI_CONFIG.get("fallback_providers") or CLI_CONFIG.get("fallback_model") or [] + # Normalize legacy single-dict to a one-element list + if isinstance(fb, dict): + fb = [fb] if fb.get("provider") and fb.get("model") else [] + self._fallback_model = fb # Optional cheap-vs-strong routing for simple turns self._smart_model_routing = CLI_CONFIG.get("smart_model_routing", {}) or {} @@ -1314,20 +1461,69 @@ def _get_status_bar_snapshot(self) -> Dict[str, Any]: return snapshot + @staticmethod + def _status_bar_display_width(text: str) -> int: + """Return terminal cell width for status-bar text. + + len() is not enough for prompt_toolkit layout decisions because some + glyphs can render wider than one Python codepoint. Keeping the status + bar within the real display width prevents it from wrapping onto a + second line and leaving behind duplicate rows. + """ + try: + from prompt_toolkit.utils import get_cwidth + return get_cwidth(text or "") + except Exception: + return len(text or "") + + @classmethod + def _trim_status_bar_text(cls, text: str, max_width: int) -> str: + """Trim status-bar text to a single terminal row.""" + if max_width <= 0: + return "" + try: + from prompt_toolkit.utils import get_cwidth + except Exception: + get_cwidth = None + + if cls._status_bar_display_width(text) <= max_width: + return text + + ellipsis = "..." + ellipsis_width = cls._status_bar_display_width(ellipsis) + if max_width <= ellipsis_width: + return ellipsis[:max_width] + + out = [] + width = 0 + for ch in text: + ch_width = get_cwidth(ch) if get_cwidth else len(ch) + if width + ch_width + ellipsis_width > max_width: + break + out.append(ch) + width += ch_width + return "".join(out).rstrip() + ellipsis + def _build_status_bar_text(self, width: Optional[int] = None) -> str: try: snapshot = self._get_status_bar_snapshot() - width = width or shutil.get_terminal_size((80, 24)).columns + if width is None: + try: + from prompt_toolkit.application import get_app + width = get_app().output.get_size().columns + except Exception: + width = shutil.get_terminal_size((80, 24)).columns percent = snapshot["context_percent"] percent_label = f"{percent}%" if percent is not None else "--" duration_label = snapshot["duration"] if width < 52: - return f"⚕ {snapshot['model_short']} · {duration_label}" + text = f"⚕ {snapshot['model_short']} · {duration_label}" + return self._trim_status_bar_text(text, width) if width < 76: parts = [f"⚕ {snapshot['model_short']}", percent_label] parts.append(duration_label) - return " · ".join(parts) + return self._trim_status_bar_text(" · ".join(parts), width) if snapshot["context_length"]: ctx_total = _format_context_length(snapshot["context_length"]) @@ -1338,7 +1534,7 @@ def _build_status_bar_text(self, width: Optional[int] = None) -> str: parts = [f"⚕ {snapshot['model_short']}", context_label, percent_label] parts.append(duration_label) - return " │ ".join(parts) + return self._trim_status_bar_text(" │ ".join(parts), width) except Exception: return f"⚕ {self.model if getattr(self, 'model', None) else 'Hermes'}" @@ -1347,57 +1543,67 @@ def _get_status_bar_fragments(self): return [] try: snapshot = self._get_status_bar_snapshot() - width = shutil.get_terminal_size((80, 24)).columns + # Use prompt_toolkit's own terminal width when running inside the + # TUI — shutil.get_terminal_size() can return stale or fallback + # values (especially on SSH) that differ from what prompt_toolkit + # actually renders, causing the fragments to overflow to a second + # line and produce duplicated status bar rows over long sessions. + try: + from prompt_toolkit.application import get_app + width = get_app().output.get_size().columns + except Exception: + width = shutil.get_terminal_size((80, 24)).columns duration_label = snapshot["duration"] if width < 52: - return [ - ("class:status-bar", " ⚕ "), - ("class:status-bar-strong", snapshot["model_short"]), - ("class:status-bar-dim", " · "), - ("class:status-bar-dim", duration_label), - ("class:status-bar", " "), - ] - - percent = snapshot["context_percent"] - percent_label = f"{percent}%" if percent is not None else "--" - if width < 76: frags = [ ("class:status-bar", " ⚕ "), ("class:status-bar-strong", snapshot["model_short"]), - ("class:status-bar-dim", " · "), - (self._status_bar_context_style(percent), percent_label), - ] - frags.extend([ ("class:status-bar-dim", " · "), ("class:status-bar-dim", duration_label), ("class:status-bar", " "), - ]) - return frags - - if snapshot["context_length"]: - ctx_total = _format_context_length(snapshot["context_length"]) - ctx_used = format_token_count_compact(snapshot["context_tokens"]) - context_label = f"{ctx_used}/{ctx_total}" + ] else: - context_label = "ctx --" + percent = snapshot["context_percent"] + percent_label = f"{percent}%" if percent is not None else "--" + if width < 76: + frags = [ + ("class:status-bar", " ⚕ "), + ("class:status-bar-strong", snapshot["model_short"]), + ("class:status-bar-dim", " · "), + (self._status_bar_context_style(percent), percent_label), + ("class:status-bar-dim", " · "), + ("class:status-bar-dim", duration_label), + ("class:status-bar", " "), + ] + else: + if snapshot["context_length"]: + ctx_total = _format_context_length(snapshot["context_length"]) + ctx_used = format_token_count_compact(snapshot["context_tokens"]) + context_label = f"{ctx_used}/{ctx_total}" + else: + context_label = "ctx --" + + bar_style = self._status_bar_context_style(percent) + frags = [ + ("class:status-bar", " ⚕ "), + ("class:status-bar-strong", snapshot["model_short"]), + ("class:status-bar-dim", " │ "), + ("class:status-bar-dim", context_label), + ("class:status-bar-dim", " │ "), + (bar_style, self._build_context_bar(percent)), + ("class:status-bar-dim", " "), + (bar_style, percent_label), + ("class:status-bar-dim", " │ "), + ("class:status-bar-dim", duration_label), + ("class:status-bar", " "), + ] - bar_style = self._status_bar_context_style(percent) - frags = [ - ("class:status-bar", " ⚕ "), - ("class:status-bar-strong", snapshot["model_short"]), - ("class:status-bar-dim", " │ "), - ("class:status-bar-dim", context_label), - ("class:status-bar-dim", " │ "), - (bar_style, self._build_context_bar(percent)), - ("class:status-bar-dim", " "), - (bar_style, percent_label), - ] - frags.extend([ - ("class:status-bar-dim", " │ "), - ("class:status-bar-dim", duration_label), - ("class:status-bar", " "), - ]) + total_width = sum(self._status_bar_display_width(text) for _, text in frags) + if total_width > width: + plain_text = "".join(text for _, text in frags) + trimmed = self._trim_status_bar_text(plain_text, width) + return [("class:status-bar", trimmed)] return frags except Exception: return [("class:status-bar", f" {self._build_status_bar_text()} ")] @@ -1429,6 +1635,28 @@ def _normalize_model_for_provider(self, resolved_provider: str) -> bool: pass return changed + if resolved_provider in {"opencode-zen", "opencode-go"}: + try: + from hermes_cli.models import normalize_opencode_model_id, opencode_model_api_mode + + canonical = normalize_opencode_model_id(resolved_provider, current_model) + if canonical and canonical != current_model: + if not self._model_is_default: + self.console.print( + f"[yellow]⚠️ Stripped provider prefix from '{current_model}'; using '{canonical}' for {resolved_provider}.[/]" + ) + self.model = canonical + current_model = canonical + changed = True + + resolved_mode = opencode_model_api_mode(resolved_provider, current_model) + if resolved_mode != self.api_mode: + self.api_mode = resolved_mode + changed = True + except Exception: + pass + return changed + if resolved_provider != "openai-codex": return False @@ -1582,6 +1810,7 @@ def _stream_reasoning_delta(self, text: str) -> None: if not text: return self._reasoning_stream_started = True + self._reasoning_shown_this_turn = True if getattr(self, "_stream_box_opened", False): return @@ -1854,6 +2083,7 @@ def _ensure_runtime_credentials(self) -> bool: resolved_api_mode = runtime.get("api_mode", self.api_mode) resolved_acp_command = runtime.get("command") resolved_acp_args = list(runtime.get("args") or []) + resolved_credential_pool = runtime.get("credential_pool") if not isinstance(api_key, str) or not api_key: # Custom / local endpoints (llama.cpp, ollama, vLLM, etc.) often # don't require authentication. When a base_url IS configured but @@ -1869,10 +2099,12 @@ def _ensure_runtime_credentials(self) -> bool: base_url, _source, ) else: - self.console.print("[bold red]Provider resolver returned an empty API key.[/]") + print("\n⚠️ Provider resolver returned an empty API key. " + "Set OPENROUTER_API_KEY or run: hermes setup") return False if not isinstance(base_url, str) or not base_url: - self.console.print("[bold red]Provider resolver returned an empty base URL.[/]") + print("\n⚠️ Provider resolver returned an empty base URL. " + "Check your provider config or run: hermes setup") return False credentials_changed = api_key != self.api_key or base_url != self.base_url @@ -1886,6 +2118,7 @@ def _ensure_runtime_credentials(self) -> bool: self.api_mode = resolved_api_mode self.acp_command = resolved_acp_command self.acp_args = resolved_acp_args + self._credential_pool = resolved_credential_pool self._provider_source = runtime.get("source") self.api_key = api_key self.base_url = base_url @@ -1917,6 +2150,7 @@ def _resolve_turn_agent_config(self, user_message: str) -> dict: "api_mode": self.api_mode, "command": self.acp_command, "args": list(self.acp_args or []), + "credential_pool": getattr(self, "_credential_pool", None), }, ) @@ -1954,6 +2188,7 @@ def _init_agent(self, *, model_override: str = None, runtime_override: dict = No return False restored = self._session_db.get_messages_as_conversation(self.session_id) if restored: + restored = [m for m in restored if m.get("role") != "session_meta"] self.conversation_history = restored msg_count = len([m for m in restored if m.get("role") == "user"]) title_part = "" @@ -1987,6 +2222,7 @@ def _init_agent(self, *, model_override: str = None, runtime_override: dict = No "api_mode": self.api_mode, "command": self.acp_command, "args": list(self.acp_args or []), + "credential_pool": getattr(self, "_credential_pool", None), } effective_model = model_override or self.model self.agent = AIAgent( @@ -1997,6 +2233,7 @@ def _init_agent(self, *, model_override: str = None, runtime_override: dict = No api_mode=runtime.get("api_mode"), acp_command=runtime.get("command"), acp_args=runtime.get("args"), + credential_pool=runtime.get("credential_pool"), max_iterations=self.max_turns, enabled_toolsets=self.enabled_toolsets, verbose_logging=self.verbose, @@ -2015,16 +2252,21 @@ def _init_agent(self, *, model_override: str = None, runtime_override: dict = No session_db=self._session_db, clarify_callback=self._clarify_callback, reasoning_callback=self._current_reasoning_callback(), - honcho_session_key=None, # resolved by run_agent via config sessions map / title + fallback_model=self._fallback_model, thinking_callback=self._on_thinking, checkpoints_enabled=self.checkpoints_enabled, checkpoint_max_snapshots=self.checkpoint_max_snapshots, pass_session_id=self.pass_session_id, tool_progress_callback=self._on_tool_progress, + tool_start_callback=self._on_tool_start if self._inline_diffs_enabled else None, + tool_complete_callback=self._on_tool_complete if self._inline_diffs_enabled else None, stream_delta_callback=self._stream_delta if self.streaming_enabled else None, tool_gen_callback=self._on_tool_gen_start if self.streaming_enabled else None, ) + # Store reference for atexit memory provider shutdown + global _active_agent_ref + _active_agent_ref = self.agent # Route agent status output through prompt_toolkit so ANSI escape # sequences aren't garbled by patch_stdout's StdoutProxy (#2262). self.agent._print_fn = _cprint @@ -2053,6 +2295,12 @@ def _init_agent(self, *, model_override: str = None, runtime_override: dict = No def show_banner(self): """Display the welcome banner in Claude Code style.""" self.console.clear() + + # Get context length for display before branching so it remains + # available to the low-context warning logic in compact mode too. + ctx_len = None + if hasattr(self, 'agent') and self.agent and hasattr(self.agent, 'context_compressor'): + ctx_len = self.agent.context_compressor.context_length # Auto-compact for narrow terminals — the full banner with caduceus # + tool list needs ~80 columns minimum to render without wrapping. @@ -2069,11 +2317,6 @@ def show_banner(self): # Get terminal working directory (where commands will execute) cwd = os.getenv("TERMINAL_CWD", os.getcwd()) - # Get context length for display - ctx_len = None - if hasattr(self, 'agent') and self.agent and hasattr(self.agent, 'context_compressor'): - ctx_len = self.agent.context_compressor.context_length - # Build and display the banner build_welcome_banner( console=self.console, @@ -2087,7 +2330,31 @@ def show_banner(self): # Show tool availability warnings if any tools are disabled self._show_tool_availability_warnings() - + + # Warn about very low context lengths (common with local servers) + if ctx_len and ctx_len <= 8192: + self.console.print() + self.console.print( + f"[yellow]⚠️ Context length is only {ctx_len:,} tokens — " + f"this is likely too low for agent use with tools.[/]" + ) + self.console.print( + "[dim] Hermes needs 16k–32k minimum. Tool schemas + system prompt alone use ~4k–8k.[/]" + ) + base_url = getattr(self, "base_url", "") or "" + if "11434" in base_url or "ollama" in base_url.lower(): + self.console.print( + "[dim] Ollama fix: OLLAMA_CONTEXT_LENGTH=32768 ollama serve[/]" + ) + elif "1234" in base_url: + self.console.print( + "[dim] LM Studio fix: Set context length in model settings → reload model[/]" + ) + else: + self.console.print( + "[dim] Fix: Set model.context_length in config.yaml, or increase your server's context setting[/]" + ) + self.console.print() def _preload_resumed_session(self) -> bool: @@ -2117,6 +2384,7 @@ def _preload_resumed_session(self) -> bool: restored = self._session_db.get_messages_as_conversation(self.session_id) if restored: + restored = [m for m in restored if m.get("role") != "session_meta"] self.conversation_history = restored msg_count = len([m for m in restored if m.get("role") == "user"]) title_part = "" @@ -2402,13 +2670,13 @@ def _handle_rollback_command(self, command: str): print(f" ✅ Restored {file_path} from checkpoint {result['restored_to']}: {result['reason']}") else: print(f" ✅ Restored to checkpoint {result['restored_to']}: {result['reason']}") - print(f" A pre-rollback snapshot was saved automatically.") + print(" A pre-rollback snapshot was saved automatically.") # Also undo the last conversation turn so the agent's context # matches the restored filesystem state if self.conversation_history: self.undo_last() - print(f" Chat turn undone to match restored file state.") + print(" Chat turn undone to match restored file state.") else: print(f" ❌ {result['error']}") @@ -2688,22 +2956,12 @@ def _handle_tools_command(self, cmd: str): print(f" MCP tool: /tools {subcommand} github:create_issue") return - # Confirm session reset before applying - verb = "Disable" if subcommand == "disable" else "Enable" + # Apply the change directly — the user typing the command is implicit + # consent. Do NOT use input() here; it hangs inside prompt_toolkit's + # TUI event loop (known pitfall). + verb = "Disabling" if subcommand == "disable" else "Enabling" label = ", ".join(names) - _cprint(f"{_GOLD}{verb} {label}?{_RST}") - _cprint(f"{_DIM}This will save to config and reset your session so the " - f"change takes effect cleanly.{_RST}") - try: - answer = input(" Continue? [y/N] ").strip().lower() - except (EOFError, KeyboardInterrupt): - print() - _cprint(f"{_DIM}Cancelled.{_RST}") - return - - if answer not in ("y", "yes"): - _cprint(f"{_DIM}Cancelled.{_RST}") - return + _cprint(f"{_GOLD}{verb} {label}...{_RST}") tools_disable_enable_command( Namespace(tools_action=subcommand, names=names, platform="cli")) @@ -2746,6 +3004,28 @@ def show_toolsets(self): print(" Example: python cli.py --toolsets web,terminal") print() + def _handle_profile_command(self): + """Display active profile name and home directory.""" + from hermes_constants import get_hermes_home, display_hermes_home + + home = get_hermes_home() + display = display_hermes_home() + + profiles_parent = Path.home() / ".hermes" / "profiles" + try: + rel = home.relative_to(profiles_parent) + profile_name = str(rel).split("/")[0] + except ValueError: + profile_name = None + + print() + if profile_name: + print(f" Profile: {profile_name}") + else: + print(" Profile: default") + print(f" Home: {display}") + print() + def show_config(self): """Display current configuration with kawaii ASCII art.""" # Get terminal config from environment (which was set from cli-config.yaml) @@ -2796,10 +3076,54 @@ def show_config(self): print(f" Config File: {config_path} {config_status}") print() + def _list_recent_sessions(self, limit: int = 10) -> list[dict[str, Any]]: + """Return recent CLI sessions for in-chat browsing/resume affordances.""" + if not self._session_db: + return [] + try: + sessions = self._session_db.list_sessions_rich( + source="cli", + exclude_sources=["tool"], + limit=limit, + ) + except Exception: + return [] + return [s for s in sessions if s.get("id") != self.session_id] + + def _show_recent_sessions(self, *, reason: str = "history", limit: int = 10) -> bool: + """Render recent sessions inline from the active chat TUI. + + Returns True when something was shown, False if no session list was available. + """ + sessions = self._list_recent_sessions(limit=limit) + if not sessions: + return False + + from hermes_cli.main import _relative_time + + print() + if reason == "history": + print("(._.) No messages in the current chat yet — here are recent sessions you can resume:") + else: + print(" Recent sessions:") + print() + print(f" {'Title':<32} {'Preview':<40} {'Last Active':<13} {'ID'}") + print(f" {'─' * 32} {'─' * 40} {'─' * 13} {'─' * 24}") + for session in sessions: + title = (session.get("title") or "—")[:30] + preview = (session.get("preview") or "")[:38] + last_active = _relative_time(session.get("last_active")) + print(f" {title:<32} {preview:<40} {last_active:<13} {session['id']}") + print() + print(" Use /resume to continue where you left off.") + print() + return True + def show_history(self): """Display conversation history.""" if not self.conversation_history: - print("(._.) No conversation history yet.") + if not self._show_recent_sessions(reason="history"): + print("(._.) No conversation history yet.") return preview_limit = 400 @@ -2904,7 +3228,7 @@ def new_session(self, silent=False): try: self._session_db.create_session( session_id=self.session_id, - source="cli", + source=os.environ.get("HERMES_SESSION_SOURCE", "cli"), model=self.model, model_config={ "max_iterations": self.max_turns, @@ -2917,8 +3241,90 @@ def new_session(self, silent=False): if not silent: print("(^_^)v New session started!") + def _handle_resume_command(self, cmd_original: str) -> None: + """Handle /resume — switch to a previous session mid-conversation.""" + parts = cmd_original.split(None, 1) + target = parts[1].strip() if len(parts) > 1 else "" + + if not target: + _cprint(" Usage: /resume ") + if self._show_recent_sessions(reason="resume"): + return + _cprint(" Tip: Use /history or `hermes sessions list` to find sessions.") + return + + if not self._session_db: + _cprint(" Session database not available.") + return + + # Resolve title or ID + from hermes_cli.main import _resolve_session_by_name_or_id + resolved = _resolve_session_by_name_or_id(target) + target_id = resolved or target + + session_meta = self._session_db.get_session(target_id) + if not session_meta: + _cprint(f" Session not found: {target}") + _cprint(" Use /history or `hermes sessions list` to see available sessions.") + return + + if target_id == self.session_id: + _cprint(" Already on that session.") + return + + # End current session + try: + self._session_db.end_session(self.session_id, "resumed_other") + except Exception: + pass + + # Switch to the target session + self.session_id = target_id + self._resumed = True + self._pending_title = None + + # Load conversation history (strip transcript-only metadata entries) + restored = self._session_db.get_messages_as_conversation(target_id) + restored = [m for m in (restored or []) if m.get("role") != "session_meta"] + self.conversation_history = restored + + # Re-open the target session so it's not marked as ended + try: + self._session_db.reopen_session(target_id) + except Exception: + pass + + # Sync the agent if already initialised + if self.agent: + self.agent.session_id = target_id + self.agent.reset_session_state() + if hasattr(self.agent, "_last_flushed_db_idx"): + self.agent._last_flushed_db_idx = len(self.conversation_history) + if hasattr(self.agent, "_todo_store"): + try: + from tools.todo_tool import TodoStore + self.agent._todo_store = TodoStore() + except Exception: + pass + if hasattr(self.agent, "_invalidate_system_prompt"): + self.agent._invalidate_system_prompt() + + title_part = f" \"{session_meta['title']}\"" if session_meta.get("title") else "" + msg_count = len([m for m in self.conversation_history if m.get("role") == "user"]) + if self.conversation_history: + _cprint( + f" ↻ Resumed session {target_id}{title_part}" + f" ({msg_count} user message{'s' if msg_count != 1 else ''}," + f" {len(self.conversation_history)} total)" + ) + else: + _cprint(f" ↻ Resumed session {target_id}{title_part} — no messages, starting fresh.") + def reset_conversation(self): """Reset the conversation by starting a new session.""" + # Shut down memory provider before resetting — actual session boundary + if hasattr(self, 'agent') and self.agent: + self.agent.shutdown_memory_provider(self.conversation_history) self.new_session() def save_conversation(self): @@ -3050,20 +3456,20 @@ def _show_model_and_providers(self): print(f" {mid}{current_marker}") elif p["id"] == "custom": from hermes_cli.models import _get_custom_base_url - custom_url = _get_custom_base_url() or os.getenv("OPENAI_BASE_URL", "") + custom_url = _get_custom_base_url() if custom_url: print(f" endpoint: {custom_url}") if is_active: print(f" model: {self.model} ← current") - print(f" (use hermes model to change)") + print(" (use hermes model to change)") else: - print(f" (use hermes model to change)") + print(" (use hermes model to change)") print() if unauthed: names = ", ".join(p["label"] for p in unauthed) print(f" Not configured: {names}") - print(f" Run: hermes setup") + print(" Run: hermes setup") print() print(" To change model or provider, use: hermes model") @@ -3087,9 +3493,9 @@ def _handle_prompt_command(self, cmd: str): self.system_prompt = new_prompt self.agent = None # Force re-init if save_config_value("agent.system_prompt", new_prompt): - print(f"(^_^)b System prompt set (saved to config)") + print("(^_^)b System prompt set (saved to config)") else: - print(f"(^_^) System prompt set (session only)") + print("(^_^) System prompt set (session only)") print(f" \"{new_prompt[:60]}{'...' if len(new_prompt) > 60 else ''}\"") else: # Show current prompt @@ -3474,7 +3880,7 @@ def _show_gateway_status(self): print(" To start the gateway:") print(" python cli.py --gateway") print() - print(" Configuration file: ~/.hermes/config.yaml") + print(f" Configuration file: {display_hermes_home()}/config.yaml") print() except Exception as e: @@ -3484,7 +3890,7 @@ def _show_gateway_status(self): print(" 1. Set environment variables:") print(" TELEGRAM_BOT_TOKEN=your_token") print(" DISCORD_BOT_TOKEN=your_token") - print(" 2. Or configure settings in ~/.hermes/config.yaml") + print(f" 2. Or configure settings in {display_hermes_home()}/config.yaml") print() def process_command(self, command: str) -> bool: @@ -3512,6 +3918,8 @@ def process_command(self, command: str) -> bool: return False elif canonical == "help": self.show_help() + elif canonical == "profile": + self._handle_profile_command() elif canonical == "tools": self._handle_tools_command(cmd_original) elif canonical == "toolsets": @@ -3581,28 +3989,6 @@ def process_command(self, command: str) -> bool: try: if self._session_db.set_session_title(self.session_id, new_title): _cprint(f" Session title set: {new_title}") - # Re-map Honcho session key to new title - if self.agent and getattr(self.agent, '_honcho', None): - try: - hcfg = self.agent._honcho_config - new_key = ( - hcfg.resolve_session_name( - session_title=new_title, - session_id=self.agent.session_id, - ) - if hcfg else new_title - ) - if new_key and new_key != self.agent._honcho_session_key: - old_key = self.agent._honcho_session_key - self.agent._honcho.get_or_create(new_key) - self.agent._honcho_session_key = new_key - from tools.honcho_tools import set_session_context - set_session_context(self.agent._honcho, new_key) - from agent.display import honcho_session_line, write_tty - write_tty(honcho_session_line(hcfg.workspace_id, new_key) + "\n") - _cprint(f" Honcho session: {old_key} → {new_key}") - except Exception: - pass else: _cprint(" Session not found in database.") except ValueError as e: @@ -3630,11 +4016,13 @@ def process_command(self, command: str) -> bool: elif self._pending_title: _cprint(f" Title (pending): {self._pending_title}") else: - _cprint(f" No title set. Usage: /title ") + _cprint(" No title set. Usage: /title ") else: _cprint(" Session database not available.") elif canonical == "new": self.new_session() + elif canonical == "resume": + self._handle_resume_command(cmd_original) elif canonical == "provider": self._show_model_and_providers() elif canonical == "prompt": @@ -3667,6 +4055,8 @@ def process_command(self, command: str) -> bool: self.console.print(f" Status bar {state}") elif canonical == "verbose": self._toggle_verbose() + elif canonical == "yolo": + self._toggle_yolo() elif canonical == "reasoning": self._handle_reasoning_command(cmd_original) elif canonical == "compress": @@ -3689,7 +4079,7 @@ def process_command(self, command: str) -> bool: plugins = mgr.list_plugins() if not plugins: print("No plugins installed.") - print(f"Drop plugin directories into ~/.hermes/plugins/ to get started.") + print(f"Drop plugin directories into {display_hermes_home()}/plugins/ to get started.") else: print(f"Plugins ({len(plugins)}):") for p in plugins: @@ -3709,18 +4099,20 @@ def process_command(self, command: str) -> bool: self._handle_stop_command() elif canonical == "background": self._handle_background_command(cmd_original) + elif canonical == "btw": + self._handle_btw_command(cmd_original) elif canonical == "queue": - if not self._agent_running: - _cprint(" /queue only works while Hermes is busy. Just type your message normally.") + # Extract prompt after "/queue " or "/q " + parts = cmd_original.split(None, 1) + payload = parts[1].strip() if len(parts) > 1 else "" + if not payload: + _cprint(" Usage: /queue ") else: - # Extract prompt after "/queue " or "/q " - parts = cmd_original.split(None, 1) - payload = parts[1].strip() if len(parts) > 1 else "" - if not payload: - _cprint(" Usage: /queue ") - else: - self._pending_input.put(payload) + self._pending_input.put(payload) + if self._agent_running: _cprint(f" Queued for the next turn: {payload[:80]}{'...' if len(payload) > 80 else ''}") + else: + _cprint(f" Queued: {payload[:80]}{'...' if len(payload) > 80 else ''}") elif canonical == "skin": self._handle_skin_command(cmd_original) elif canonical == "voice": @@ -3882,7 +4274,7 @@ def _handle_background_command(self, cmd: str): _cprint(f" 🔄 Background task #{task_num} started: \"{prompt[:60]}{'...' if len(prompt) > 60 else ''}\"") _cprint(f" Task ID: {task_id}") - _cprint(f" You can continue chatting — results will appear when done.\n") + _cprint(" You can continue chatting — results will appear when done.\n") turn_route = self._resolve_turn_agent_config(prompt) @@ -3912,6 +4304,17 @@ def run_background(): provider_data_collection=self._provider_data_collection, fallback_model=self._fallback_model, ) + # Silence raw spinner; route thinking through TUI widget when no foreground agent is active. + bg_agent._print_fn = lambda *_a, **_kw: None + + def _bg_thinking(text: str) -> None: + # Concurrent bg tasks may race on _spinner_text; acceptable for best-effort UI. + if not self._agent_running: + self._spinner_text = text + if self._app: + self._app.invalidate() + + bg_agent.thinking_callback = _bg_thinking result = bg_agent.run_conversation( user_message=prompt, @@ -3974,6 +4377,9 @@ def run_background(): _cprint(f" ❌ Background task #{task_num} failed: {e}") finally: self._background_tasks.pop(task_id, None) + # Clear spinner only if no foreground agent owns it + if not self._agent_running: + self._spinner_text = "" if self._app: self._invalidate(min_interval=0) @@ -3981,6 +4387,120 @@ def run_background(): self._background_tasks[task_id] = thread thread.start() + def _handle_btw_command(self, cmd: str): + """Handle /btw — ephemeral side question using session context. + + Snapshots the current conversation history, spawns a no-tools agent in + a background thread, and prints the answer without persisting anything + to the main session. + """ + parts = cmd.strip().split(maxsplit=1) + if len(parts) < 2 or not parts[1].strip(): + _cprint(" Usage: /btw ") + _cprint(" Example: /btw what module owns session title sanitization?") + _cprint(" Answers using session context. No tools, not persisted.") + return + + question = parts[1].strip() + task_id = f"btw_{datetime.now().strftime('%H%M%S')}_{uuid.uuid4().hex[:6]}" + + if not self._ensure_runtime_credentials(): + _cprint(" (>_<) Cannot start /btw: no valid credentials.") + return + + turn_route = self._resolve_turn_agent_config(question) + history_snapshot = list(self.conversation_history) + + preview = question[:60] + ("..." if len(question) > 60 else "") + _cprint(f' 💬 /btw: "{preview}"') + + def run_btw(): + try: + btw_agent = AIAgent( + model=turn_route["model"], + api_key=turn_route["runtime"].get("api_key"), + base_url=turn_route["runtime"].get("base_url"), + provider=turn_route["runtime"].get("provider"), + api_mode=turn_route["runtime"].get("api_mode"), + acp_command=turn_route["runtime"].get("command"), + acp_args=turn_route["runtime"].get("args"), + max_iterations=8, + enabled_toolsets=[], + quiet_mode=True, + verbose_logging=False, + session_id=task_id, + platform="cli", + reasoning_config=self.reasoning_config, + providers_allowed=self._providers_only, + providers_ignored=self._providers_ignore, + providers_order=self._providers_order, + provider_sort=self._provider_sort, + provider_require_parameters=self._provider_require_params, + provider_data_collection=self._provider_data_collection, + fallback_model=self._fallback_model, + session_db=None, + skip_memory=True, + skip_context_files=True, + persist_session=False, + ) + + btw_prompt = ( + "[Ephemeral /btw side question. Answer using the conversation " + "context. No tools available. Be direct and concise.]\n\n" + + question + ) + result = btw_agent.run_conversation( + user_message=btw_prompt, + conversation_history=history_snapshot, + task_id=task_id, + ) + + response = (result.get("final_response") or "") if result else "" + if not response and result and result.get("error"): + response = f"Error: {result['error']}" + + # TUI refresh before printing + if self._app: + self._app.invalidate() + time.sleep(0.05) + print() + + if response: + try: + from hermes_cli.skin_engine import get_active_skin + _skin = get_active_skin() + _resp_color = _skin.get_color("response_border", "#4F6D4A") + except Exception: + _resp_color = "#4F6D4A" + + ChatConsole().print(Panel( + _rich_text_from_ansi(response), + title=f"[{_resp_color} bold]⚕ /btw[/]", + title_align="left", + border_style=_resp_color, + box=rich_box.HORIZONTALS, + padding=(1, 2), + )) + else: + _cprint(" 💬 /btw: (no response)") + + if self.bell_on_complete: + sys.stdout.write("\a") + sys.stdout.flush() + + except Exception as e: + if self._app: + self._app.invalidate() + time.sleep(0.05) + print() + _cprint(f" ❌ /btw failed: {e}") + finally: + if self._app: + self._invalidate(min_interval=0) + + thread = threading.Thread(target=run_btw, daemon=True, name=f"btw-{task_id}") + thread.start() + @staticmethod def _try_launch_chrome_debug(port: int, system: str) -> bool: """Try to launch Chrome/Chromium with remote debugging enabled. @@ -4092,7 +4612,7 @@ def _handle_browser_command(self, cmd: str): print(f" ⚠ Chrome launched but port {_port} isn't responding yet") print(" You may need to close existing Chrome windows first and retry") else: - print(f" ⚠ Could not auto-launch Chrome") + print(" ⚠ Could not auto-launch Chrome") # Show manual instructions as fallback sys_name = _plat.system() if sys_name == "Darwin": @@ -4149,7 +4669,7 @@ def _handle_browser_command(self, cmd: str): elif sub == "status": print() if current: - print(f"🌐 Browser: connected to live Chrome via CDP") + print("🌐 Browser: connected to live Chrome via CDP") print(f" Endpoint: {current}") _port = 9222 @@ -4163,9 +4683,9 @@ def _handle_browser_command(self, cmd: str): s.settimeout(1) s.connect(("127.0.0.1", _port)) s.close() - print(f" Status: ✓ reachable") + print(" Status: ✓ reachable") except (OSError, Exception): - print(f" Status: ⚠ not reachable (Chrome may not be running)") + print(" Status: ⚠ not reachable (Chrome may not be running)") elif os.environ.get("BROWSERBASE_API_KEY"): print("🌐 Browser: Browserbase (cloud)") else: @@ -4198,13 +4718,13 @@ def _handle_skin_command(self, cmd: str): current = get_active_skin_name() skins = list_skins() print(f"\n Current skin: {current}") - print(f" Available skins:") + print(" Available skins:") for s in skins: marker = " ●" if s["name"] == current else " " source = f" ({s['source']})" if s["source"] == "user" else "" print(f" {marker} {s['name']}{source} — {s['description']}") - print(f"\n Usage: /skin ") - print(f" Custom skins: drop a YAML file in ~/.hermes/skins/\n") + print("\n Usage: /skin ") + print(f" Custom skins: drop a YAML file in {display_hermes_home()}/skins/\n") return new_skin = parts[1].strip().lower() @@ -4251,6 +4771,17 @@ def _toggle_verbose(self): } _cprint(labels.get(self.tool_progress_mode, "")) + def _toggle_yolo(self): + """Toggle YOLO mode — skip all dangerous command approval prompts.""" + import os + current = bool(os.environ.get("HERMES_YOLO_MODE")) + if current: + os.environ.pop("HERMES_YOLO_MODE", None) + self.console.print(" ⚠ YOLO mode [bold red]OFF[/] — dangerous commands will require approval.") + else: + os.environ["HERMES_YOLO_MODE"] = "1" + self.console.print(" ⚡ YOLO mode [bold green]ON[/] — all commands auto-approved. Use with caution.") + def _handle_reasoning_command(self, cmd: str): """Handle /reasoning — manage effort level and display toggle. @@ -4351,12 +4882,7 @@ def _manual_compress(self): f" ✅ Compressed: {original_count} → {new_count} messages " f"(~{approx_tokens:,} → ~{new_tokens:,} tokens)" ) - # Flush Honcho async queue so queued messages land before context resets - if self.agent and getattr(self.agent, '_honcho', None): - try: - self.agent._honcho.flush_all() - except Exception: - pass + except Exception as e: print(f" ❌ Compression failed: {e}") @@ -4384,7 +4910,7 @@ def _show_usage(self): compressor = agent.context_compressor last_prompt = compressor.last_prompt_tokens ctx_len = compressor.context_length - pct = (last_prompt / ctx_len * 100) if ctx_len else 0 + pct = min(100, (last_prompt / ctx_len * 100)) if ctx_len else 0 compressions = compressor.compression_count msg_count = len(self.conversation_history) @@ -4401,7 +4927,7 @@ def _show_usage(self): ) elapsed = format_duration_compact((datetime.now() - self.session_start).total_seconds()) - print(f" 📊 Session Token Usage") + print(" 📊 Session Token Usage") print(f" {'─' * 40}") print(f" Model: {agent.model}") print(f" Input tokens: {input_tokens:>10,}") @@ -4515,11 +5041,18 @@ def _check_config_mcp_changes(self) -> None: return # mcp_servers unchanged (some other section was edited) self._config_mcp_servers = new_mcp - # Notify user and reload + # Notify user and reload. Run in a separate thread with a hard + # timeout so a hung MCP server cannot block the process_loop + # indefinitely (which would freeze the entire TUI). print() print("🔄 MCP server config changed — reloading connections...") - with self._busy_command(self._slow_command_status("/reload-mcp")): - self._reload_mcp() + _reload_thread = threading.Thread( + target=self._reload_mcp, daemon=True + ) + _reload_thread.start() + _reload_thread.join(timeout=30) + if _reload_thread.is_alive(): + print(" ⚠️ MCP reload timed out (30s). Some servers may not have reconnected.") def _reload_mcp(self): """Reload MCP servers: disconnect all, re-read config.yaml, reconnect. @@ -4642,8 +5175,10 @@ def _on_tool_progress(self, function_name: str, preview: str, function_args: dic from agent.display import get_tool_emoji emoji = get_tool_emoji(function_name) label = preview or function_name - if len(label) > 50: - label = label[:47] + "..." + from agent.display import get_tool_preview_max_len + _pl = get_tool_preview_max_len() + if _pl > 0 and len(label) > _pl: + label = label[:_pl - 3] + "..." self._spinner_text = f"{emoji} {label}" self._invalidate() @@ -4661,6 +5196,33 @@ def _on_tool_progress(self, function_name: str, preview: str, function_args: dic except Exception: pass + def _on_tool_start(self, tool_call_id: str, function_name: str, function_args: dict): + """Capture local before-state for write-capable tools.""" + try: + from agent.display import capture_local_edit_snapshot + + snapshot = capture_local_edit_snapshot(function_name, function_args) + if snapshot is not None: + self._pending_edit_snapshots[tool_call_id] = snapshot + except Exception: + logger.debug("Edit snapshot capture failed for %s", function_name, exc_info=True) + + def _on_tool_complete(self, tool_call_id: str, function_name: str, function_args: dict, function_result: str): + """Render file edits with inline diff after write-capable tools complete.""" + snapshot = self._pending_edit_snapshots.pop(tool_call_id, None) + try: + from agent.display import render_edit_diff_with_delta + + render_edit_diff_with_delta( + function_name, + function_result, + function_args=function_args, + snapshot=snapshot, + print_fn=_cprint, + ) + except Exception: + logger.debug("Edit diff preview failed for %s", function_name, exc_info=True) + # ==================================================================== # Voice mode methods # ==================================================================== @@ -5375,6 +5937,8 @@ def chat(self, message, images: list = None) -> Optional[str]: self.agent = None # Initialize agent if needed + if self.agent is None: + _cprint(f"{_DIM}Initializing agent...{_RST}") if not self._init_agent( model_override=turn_route["model"], runtime_override=turn_route["runtime"], @@ -5412,6 +5976,13 @@ def chat(self, message, images: list = None) -> Optional[str]: except Exception as e: logging.debug("@ context reference expansion failed: %s", e) + # Sanitize surrogate characters that can arrive via clipboard paste from + # rich-text editors (Google Docs, Word, etc.). Lone surrogates are invalid + # UTF-8 and crash JSON serialization in the OpenAI SDK. + if isinstance(message, str): + from run_agent import _sanitize_surrogates + message = _sanitize_surrogates(message) + # Add user message to history self.conversation_history.append({"role": "user", "content": message}) @@ -5424,6 +5995,10 @@ def chat(self, message, images: list = None) -> Optional[str]: # Reset streaming display state for this turn self._reset_stream_state() + # Separate from _reset_stream_state because this must persist + # across intermediate turn boundaries (tool-calling loops) — only + # reset at the start of each user turn. + self._reasoning_shown_this_turn = False # --- Streaming TTS setup --- # When ElevenLabs is the TTS provider and sounddevice is available, @@ -5496,13 +6071,25 @@ def stream_callback(delta: str): def run_agent(): nonlocal result agent_message = _voice_prefix + message if _voice_prefix else message - result = self.agent.run_conversation( - user_message=agent_message, - conversation_history=self.conversation_history[:-1], # Exclude the message we just added - stream_callback=stream_callback, - task_id=self.session_id, - persist_user_message=message if _voice_prefix else None, - ) + try: + result = self.agent.run_conversation( + user_message=agent_message, + conversation_history=self.conversation_history[:-1], # Exclude the message we just added + stream_callback=stream_callback, + task_id=self.session_id, + persist_user_message=message if _voice_prefix else None, + ) + except Exception as exc: + logging.error("run_conversation raised: %s", exc, exc_info=True) + _summary = getattr(self.agent, '_summarize_api_error', lambda e: str(e)[:300])(exc) + result = { + "final_response": f"Error: {_summary}", + "messages": [], + "api_calls": 0, + "completed": False, + "failed": True, + "error": _summary, + } # Start agent in background thread agent_thread = threading.Thread(target=run_agent) @@ -5525,7 +6112,7 @@ def run_agent(): # But if it does (race condition), don't interrupt. if self._clarify_state or self._clarify_freetext: continue - print(f"\n⚡ New message detected, interrupting...") + print("\n⚡ New message detected, interrupting...") # Signal TTS to stop on interrupt if stop_event is not None: stop_event.set() @@ -5556,6 +6143,16 @@ def run_agent(): agent_thread.join() # Ensure agent thread completes + # Proactively clean up async clients whose event loop is dead. + # The agent thread may have created AsyncOpenAI clients bound + # to a per-thread event loop; if that loop is now closed, those + # clients' __del__ would crash prompt_toolkit's loop on GC. + try: + from agent.auxiliary_client import cleanup_stale_async_clients + cleanup_stale_async_clients() + except Exception: + pass + # Flush any remaining streamed text and close the box self._flush_stream() @@ -5616,8 +6213,13 @@ def run_agent(): response_previewed = result.get("response_previewed", False) if result else False # Display reasoning (thinking) box if enabled and available. - # Skip when streaming already showed reasoning live. - if self.show_reasoning and result and not self._reasoning_stream_started: + # Skip when streaming already showed reasoning live. Use the + # turn-persistent flag (_reasoning_shown_this_turn) instead of + # _reasoning_stream_started — the latter gets reset during + # intermediate turn boundaries (tool-calling loops), which caused + # the reasoning box to re-render after the final response. + _reasoning_already_shown = getattr(self, '_reasoning_shown_this_turn', False) + if self.show_reasoning and result and not _reasoning_already_shown: reasoning = result.get("last_reasoning") if reasoning: w = shutil.get_terminal_size().columns @@ -5686,8 +6288,11 @@ def run_agent(): ).start() - # Combine all interrupt messages (user may have typed multiple while waiting) - # and re-queue as one prompt for process_loop + # Re-queue the interrupt message (and any that arrived while we were + # processing the first) as the next prompt for process_loop. + # Only reached when busy_input_mode == "interrupt" (the default). + # In "queue" mode Enter routes directly to _pending_input so this + # block is never hit. if pending_message and hasattr(self, '_pending_input'): all_parts = [pending_message] while not self._interrupt_queue.empty(): @@ -5698,7 +6303,12 @@ def run_agent(): except queue.Empty: break combined = "\n".join(all_parts) - print(f"\n📨 Queued: '{combined[:50]}{'...' if len(combined) > 50 else ''}'") + n = len(all_parts) + preview = combined[:50] + ("..." if len(combined) > 50 else "") + if n > 1: + print(f"\n⚡ Sending {n} messages after interrupt: '{preview}'") + else: + print(f"\n⚡ Sending after interrupt: '{preview}'") self._pending_input.put(combined) return response @@ -5738,10 +6348,22 @@ def _print_exit_summary(self): else: duration_str = f"{seconds}s" - print(f"Resume this session with:") + # Look up session title for resume-by-name hint + session_title = None + if self._session_db: + try: + session_title = self._session_db.get_session_title(self.session_id) + except Exception: + pass + + print("Resume this session with:") print(f" hermes --resume {self.session_id}") + if session_title: + print(f" hermes -c \"{session_title}\"") print() print(f"Session: {self.session_id}") + if session_title: + print(f"Title: {session_title}") print(f"Duration: {duration_str}") print(f"Messages: {msg_count} ({user_msgs} user, {tool_calls} tool calls)") else: @@ -5758,6 +6380,9 @@ def _get_tui_prompt_symbols(self) -> tuple[str, str]: ``normal_prompt`` is the full ``branding.prompt_symbol``. ``state_suffix`` is what special states (sudo/secret/approval/agent) should render after their leading icon. + + When a profile is active (not "default"), the profile name is + prepended to the prompt symbol: ``coder ❯`` instead of ``❯``. """ try: from hermes_cli.skin_engine import get_active_prompt_symbol @@ -5766,6 +6391,15 @@ def _get_tui_prompt_symbols(self) -> tuple[str, str]: symbol = "❯ " symbol = (symbol or "❯ ").rstrip() + " " + + # Prepend profile name when not default + try: + from hermes_cli.profiles import get_active_profile_name + profile = get_active_profile_name() + if profile not in ("default", "custom"): + symbol = f"{profile} {symbol}" + except Exception: + pass stripped = symbol.rstrip() if not stripped: return "❯ ", "❯ " @@ -5908,22 +6542,22 @@ def _build_tui_layout_children( def run(self): """Run the interactive CLI loop with persistent input at bottom.""" + # Push the entire TUI to the bottom of the terminal so the banner, + # responses, and prompt all appear pinned to the bottom — empty + # space stays above, not below. This prints enough blank lines to + # scroll the cursor to the last row before any content is rendered. + try: + _term_lines = shutil.get_terminal_size().lines + if _term_lines > 2: + print("\n" * (_term_lines - 1), end="", flush=True) + except Exception: + pass + self.show_banner() # One-line Honcho session indicator (TTY-only, not captured by agent). # Only show when the user explicitly configured Honcho for Hermes # (not auto-enabled from a stray HONCHO_API_KEY env var). - try: - from honcho_integration.client import HonchoClientConfig - from agent.display import honcho_session_line, write_tty - hcfg = HonchoClientConfig.from_global_config() - if hcfg.enabled and hcfg.api_key and hcfg.explicitly_configured: - sname = hcfg.resolve_session_name(session_id=self.session_id) - if sname: - write_tty(honcho_session_line(hcfg.workspace_id, sname) + "\n") - except Exception: - pass - # If resuming a session, load history and display it immediately # so the user has context before typing their first message. if self._resumed: @@ -5953,6 +6587,11 @@ def run(self): self._interrupt_queue = queue.Queue() # For messages typed while agent is running self._should_exit = False self._last_ctrl_c_time = 0 # Track double Ctrl+C for force exit + + # Give plugin manager a CLI reference so plugins can inject messages + from hermes_cli.plugins import get_plugin_manager + get_plugin_manager()._cli_ref = self + # Config file watcher — detect mcp_servers changes and auto-reload from hermes_cli.config import get_config_path as _get_config_path _cfg_path = _get_config_path() @@ -6004,10 +6643,18 @@ def run(self): set_approval_callback(self._approval_callback) set_secret_capture_callback(self._secret_capture_callback) - # Ensure tirith security scanner is available (downloads if needed) + # Ensure tirith security scanner is available (downloads if needed). + # Warn the user if tirith is enabled in config but not available, + # so they know command security scanning is degraded. try: from tools.tirith_security import ensure_installed - ensure_installed(log_failures=False) + tirith_path = ensure_installed(log_failures=False) + if tirith_path is None: + security_cfg = self.config.get("security", {}) or {} + tirith_enabled = security_cfg.get("tirith_enabled", True) + if tirith_enabled: + _cprint(f" {_DIM}⚠ tirith security scanner enabled but not available " + f"— command scanning will use pattern matching only{_RST}") except Exception: pass # Non-fatal — fail-open at scan time if unavailable @@ -6078,7 +6725,9 @@ def handle_enter(event): return # --- Normal input routing --- - text = event.app.current_buffer.text.strip() + # Expand any [Pasted text #N +X lines] markers to real content before submit + raw_text = event.app.current_buffer.text + text = _reconstruct_pastes(raw_text).strip() has_images = bool(self._attached_images) if text or has_images: # Snapshot and clear attached images @@ -6087,17 +6736,23 @@ def handle_enter(event): event.app.invalidate() # Bundle text + images as a tuple when images are present payload = (text, images) if images else text - if self._agent_running and not (text and text.startswith("/")): - self._interrupt_queue.put(payload) - # Debug: log to file when message enters interrupt queue - try: - _dbg = _hermes_home / "interrupt_debug.log" - with open(_dbg, "a") as _f: - import time as _t - _f.write(f"{_t.strftime('%H:%M:%S')} ENTER: queued interrupt msg={str(payload)[:60]!r}, " - f"agent_running={self._agent_running}\n") - except Exception: - pass + if self._agent_running and not (text and _looks_like_slash_command(text)): + if self.busy_input_mode == "queue": + # Queue for the next turn instead of interrupting + self._pending_input.put(payload) + preview = text if text else f"[{len(images)} image{'s' if len(images) != 1 else ''} attached]" + _cprint(f" Queued for the next turn: {preview[:80]}{'...' if len(preview) > 80 else ''}") + else: + self._interrupt_queue.put(payload) + # Debug: log to file when message enters interrupt queue + try: + _dbg = _hermes_home / "interrupt_debug.log" + with open(_dbg, "a") as _f: + import time as _t + _f.write(f"{_t.strftime('%H:%M:%S')} ENTER: queued interrupt msg={str(payload)[:60]!r}, " + f"agent_running={self._agent_running}\n") + except Exception: + pass else: self._pending_input.put(payload) event.app.current_buffer.reset(append_to_history=True) @@ -6288,6 +6943,24 @@ def handle_ctrl_d(event): self._should_exit = True event.app.exit() + @kb.add('c-z') + def handle_ctrl_z(event): + """Handle Ctrl+Z - suspend process to background (Unix only).""" + import sys + if sys.platform == 'win32': + _cprint(f"\n{_DIM}Suspend (Ctrl+Z) is not supported on Windows.{_RST}") + event.app.invalidate() + return + import os, signal as _sig + from prompt_toolkit.application import run_in_terminal + from hermes_cli.skin_engine import get_active_skin + agent_name = get_active_skin().get_branding("agent_name", "Hermes Agent") + msg = f"\n{agent_name} has been suspended. Run `fg` to bring {agent_name} back." + def _suspend(): + os.write(1, msg.encode()) + os.kill(0, _sig.SIGTSTP) + run_in_terminal(_suspend) + # Voice push-to-talk key: configurable via config.yaml (voice.record_key) # Default: Ctrl+B (avoids conflict with Ctrl+R readline reverse-search) # Config uses "ctrl+b" format; prompt_toolkit expects "c-b" format. @@ -6367,28 +7040,40 @@ def handle_paste(event): triggers this with the pasted text. We also check the clipboard for an image on every paste event. - Large pastes (5+ lines) are collapsed to a file reference - placeholder while preserving any existing user text in the - buffer. + Large pastes (5+ lines) are collapsed to a compact marker + placeholder while preserving the full pasted text in memory. + The real content is restored at submit time. """ pasted_text = event.data or "" + # Normalise line endings — Windows \r\n and old Mac \r both become \n + # so the 5-line collapse threshold and display are consistent. + pasted_text = pasted_text.replace('\r\n', '\n').replace('\r', '\n') if self._try_attach_clipboard_image(): event.app.invalidate() if pasted_text: - line_count = pasted_text.count('\n') + char_count = len(pasted_text) buf = event.current_buffer - if line_count >= 5 and not buf.text.strip().startswith('/'): - _paste_counter[0] += 1 - paste_dir = _hermes_home / "pastes" - paste_dir.mkdir(parents=True, exist_ok=True) - paste_file = paste_dir / f"paste_{_paste_counter[0]}_{datetime.now().strftime('%H%M%S')}.txt" - paste_file.write_text(pasted_text, encoding="utf-8") - placeholder = f"[Pasted text #{_paste_counter[0]}: {line_count + 1} lines \u2192 {paste_file}]" - prefix = "" - if buf.cursor_position > 0 and buf.text[buf.cursor_position - 1] != '\n': - prefix = "\n" - _paste_just_collapsed[0] = True - buf.insert_text(prefix + placeholder) + # Bypass collapsing only when the pasted text itself starts with / + # (e.g. pasting a full slash command like "/effort max" verbatim). + # Do NOT bypass just because the buffer has existing content — + # that existing content might be a [Pasted text #N] marker and + # the new paste is regular text that should still be collapsed. + will_collapse = char_count >= 20 and not pasted_text.strip().startswith('/') + if will_collapse: + # Exceedingly large pastes are inserted as plain text — + # no point hoarding them in memory at the cost of OOM risk. + if char_count > MAX_PASTE_SIZE: + buf.insert_text(pasted_text) + else: + _paste_counter[0] += 1 + paste_id = str(_paste_counter[0]) + # Store full content in memory (no temp file) + _paste_store[paste_id] = pasted_text + _active_paste_ids.add(paste_id) + placeholder = _make_paste_marker(paste_id, pasted_text) + _paste_just_collapsed[0] = True + buf.text = buf.text + placeholder + buf.cursor_position = len(buf.text) else: buf.insert_text(pasted_text) @@ -6474,38 +7159,138 @@ def _input_height(): input_area.window.height = _input_height - # Paste collapsing: detect large pastes and save to temp file + # Paste collapsing: detect large pastes and collapse to a compact marker. + # Full pasted text is stored in memory and restored at submit time. _paste_counter = [0] _prev_text_len = [0] + _prev_newline_count = [0] _paste_just_collapsed = [False] + _paste_store: dict[str, str] = {} # paste_id -> full pasted text + _active_paste_ids: set[str] = set() # paste_ids currently in the buffer + MAX_PASTE_SIZE = 100_000 # chars — pastes larger than this are inserted as plain text + + # Regex that matches BOTH [+X line(s)] and [+X char(s)] marker formats. + # Used by _reconstruct_pastes to expand markers regardless of which path + # created them (line-based handle_paste vs char-based handle_paste). + _PASTE_MARKER_RE = re.compile( + r'\[Pasted text #(\d+) \+(\d+) (line|char)\(s\)\]' + ) - def _on_text_changed(buf): - """Detect large pastes and collapse them to a file reference. + def _make_paste_marker(paste_id: str, stored_text: str) -> str: + """Return the compact marker string for a given paste_id and stored text. + + Format is [+X char(s)] for single-line pastes and [+X line(s)] for + multi-line pastes. Defined once here so all 3 call sites (handle_paste, + _reconstruct_pastes, _on_text_changed) use the identical format. + """ + lc = stored_text.count('\n') + if lc == 0: + return f"[Pasted text #{paste_id} +{len(stored_text)} chars]" + return f"[Pasted text #{paste_id} +{lc + 1} lines]" + + def _reconstruct_pastes(text: str) -> str: + """Replace any [Pasted text #N +X lines/chars] markers with stored content. + Called at submit time (Enter) so the agent sees the full pasted text. + + Uses _PASTE_MARKER_RE so it handles BOTH [+X line(s)] and [+X char(s)] + marker formats — robust against any inconsistency between creation and + reconstruction paths. + """ + if not _active_paste_ids: + return text + + def _replace_marker(match: re.Match) -> str: + pid = match.group(1) + if pid in _active_paste_ids: + stored = _paste_store.pop(pid, "") + _active_paste_ids.discard(pid) + return stored + return match.group(0) # no-op if already cleared + + result = _PASTE_MARKER_RE.sub(_replace_marker, text) - When bracketed paste is available, handle_paste collapses - large pastes directly. This handler is a fallback for - terminals without bracketed paste support. + # Reset paste counter when all pastes are reconstructed (submitted) + if not _active_paste_ids: + _paste_counter[0] = 0 + return result + + def _on_text_changed(buf): + """Detect large pastes and collapse them to a compact marker. + Also detects when user deletes any part of a paste marker — + the entire marker block is wiped from the buffer. """ text = buf.text - chars_added = len(text) - _prev_text_len[0] - _prev_text_len[0] = len(text) + + # Guard: suppress processing immediately after a collapse or deletion. if _paste_just_collapsed[0]: _paste_just_collapsed[0] = False + _prev_text_len[0] = len(text) + _prev_newline_count[0] = text.count('\n') return + + # --- Deletion detection: wipe ALL markers whose marker is gone --- + # Process every active paste_id — do NOT early-return after the first. + # A single keystroke can delete multiple markers simultaneously. + any_deleted = False + for pid in list(_active_paste_ids): + stored = _paste_store.get(pid, "") + full_marker = _make_paste_marker(pid, stored) if stored else f"[Pasted text #{pid} +1 lines]" + if full_marker not in text: + # Marker partially or fully deleted — wipe entire block. + any_deleted = True + _paste_just_collapsed[0] = True + if full_marker in text: + result = text.replace(full_marker, "", 1).rstrip("\n") + else: + marker_prefix = f"[Pasted text #{pid}" + marker_line_start = text.find(marker_prefix) + if marker_line_start != -1: + line_end = text.find("\n", marker_line_start) + if line_end == -1: + line_end = len(text) + result = text[:marker_line_start] + text[line_end:] + else: + result = text + buf.text = result.rstrip("\n") + buf.cursor_position = len(buf.text) + _paste_store.pop(pid, None) + _active_paste_ids.discard(pid) + if not _active_paste_ids: + _paste_counter[0] = 0 + if any_deleted: + _prev_text_len[0] = len(buf.text) + _prev_newline_count[0] = buf.text.count('\n') + return + + # --- Fallback paste detection (terminals without bracketed paste) --- + prev_len = _prev_text_len[0] + prev_newlines = _prev_newline_count[0] + chars_added = len(text) - prev_len line_count = text.count('\n') - # Heuristic: a real paste adds many characters at once (not just a - # single newline from Alt+Enter) AND the result has 5+ lines. - # Fallback for terminals without bracketed paste support. - if line_count >= 5 and chars_added > 1 and not text.startswith('/'): - _paste_counter[0] += 1 - # Save to temp file - paste_dir = _hermes_home / "pastes" - paste_dir.mkdir(parents=True, exist_ok=True) - paste_file = paste_dir / f"paste_{_paste_counter[0]}_{datetime.now().strftime('%H%M%S')}.txt" - paste_file.write_text(text, encoding="utf-8") - # Replace buffer with compact reference - buf.text = f"[Pasted text #{_paste_counter[0]}: {line_count + 1} lines \u2192 {paste_file}]" - buf.cursor_position = len(buf.text) + newlines_added = line_count - prev_newlines + _prev_text_len[0] = len(text) + _prev_newline_count[0] = line_count + is_paste = chars_added > 1 or newlines_added >= 4 + # Collapse pastes of 20+ chars, unless the pasted content itself + # starts with / (bypass lets full slash commands paste verbatim). + # Store ONLY the newly pasted portion — not the full buffer — + # so multi-paste works correctly when the user typed before pasting. + if is_paste and chars_added >= 20 and not text[prev_len:].strip().startswith('/'): + pasted_portion = text[prev_len:] + if len(pasted_portion) > MAX_PASTE_SIZE: + # Too large — insert as plain text silently. + pass + else: + _paste_counter[0] += 1 + paste_id = str(_paste_counter[0]) + _paste_store[paste_id] = pasted_portion + _active_paste_ids.add(paste_id) + placeholder = _make_paste_marker(paste_id, pasted_portion) + _paste_just_collapsed[0] = True + # Replace the pasted portion with the marker, keeping + # whatever the user had typed before the paste intact. + buf.text = text[:prev_len] + placeholder + buf.cursor_position = len(buf.text) input_area.buffer.on_text_changed += _on_text_changed @@ -6870,6 +7655,15 @@ def _get_voice_status(): Window( content=FormattedTextControl(lambda: cli_ref._get_status_bar_fragments()), height=1, + # Prevent fragments that overflow the terminal width from + # wrapping onto a second line, which causes the status bar to + # appear duplicated (one full + one partial row) during long + # sessions, especially on SSH where shutil.get_terminal_size + # may return stale values. _get_status_bar_fragments now reads + # width from prompt_toolkit's own output object, so fragments + # will always fit; wrap_lines=False is the belt-and-suspenders + # guard against any future width mismatch. + wrap_lines=False, ), filter=Condition(lambda: cli_ref._status_bar_visible), ) @@ -7007,8 +7801,24 @@ def process_loop(): if isinstance(user_input, tuple): user_input, submit_images = user_input - # Check for commands - if isinstance(user_input, str) and user_input.startswith("/"): + # Check for commands — but detect dragged/pasted file paths first. + # See _detect_file_drop() for details. + _file_drop = _detect_file_drop(user_input) if isinstance(user_input, str) else None + if _file_drop: + _drop_path = _file_drop["path"] + _remainder = _file_drop["remainder"] + if _file_drop["is_image"]: + submit_images.append(_drop_path) + user_input = _remainder or f"[User attached image: {_drop_path.name}]" + _cprint(f" 📎 Auto-attached image: {_drop_path.name}") + else: + _cprint(f" 📄 Detected file: {_drop_path.name}") + user_input = ( + f"[User attached file: {_drop_path}]" + + (f"\n{_remainder}" if _remainder else "") + ) + + if not _file_drop and isinstance(user_input, str) and _looks_like_slash_command(user_input): _cprint(f"\n⚙️ {user_input}") if not self.process_command(user_input): self._should_exit = True @@ -7076,6 +7886,7 @@ def _expand_ref(m): finally: self._agent_running = False self._spinner_text = "" + app.invalidate() # Refresh status line # Continuous voice: auto-restart recording after agent responds. @@ -7104,11 +7915,44 @@ def _restart_recording(): # Register atexit cleanup so resources are freed even on unexpected exit atexit.register(_run_cleanup) + # Register signal handlers for graceful shutdown on SSH disconnect / SIGTERM + def _signal_handler(signum, frame): + """Handle SIGHUP/SIGTERM by triggering graceful cleanup.""" + logger.debug("Received signal %s, triggering graceful shutdown", signum) + raise KeyboardInterrupt() + + try: + import signal as _signal + _signal.signal(_signal.SIGTERM, _signal_handler) + if hasattr(_signal, 'SIGHUP'): + _signal.signal(_signal.SIGHUP, _signal_handler) + except Exception: + pass # Signal handlers may fail in restricted environments + + # Install a custom asyncio exception handler that suppresses the + # "Event loop is closed" RuntimeError from httpx transport cleanup. + # This is defense-in-depth — the primary fix is neuter_async_httpx_del + # which disables __del__ entirely, but older clients or SDK upgrades + # could bypass it. + def _suppress_closed_loop_errors(loop, context): + exc = context.get("exception") + if isinstance(exc, RuntimeError) and "Event loop is closed" in str(exc): + return # silently suppress + # Fall back to default handler for everything else + loop.default_exception_handler(context) + # Run the application with patch_stdout for proper output handling try: with patch_stdout(): + # Set the custom handler on prompt_toolkit's event loop + try: + import asyncio as _aio + _loop = _aio.get_event_loop() + _loop.set_exception_handler(_suppress_closed_loop_errors) + except Exception: + pass app.run() - except (EOFError, KeyboardInterrupt): + except (EOFError, KeyboardInterrupt, BrokenPipeError): pass finally: self._should_exit = True @@ -7135,18 +7979,29 @@ def _restart_recording(): set_sudo_password_callback(None) set_approval_callback(None) set_secret_capture_callback(None) - # Flush + shut down Honcho async writer (drains queue before exit) - if self.agent and getattr(self.agent, '_honcho', None): - try: - self.agent._honcho.shutdown() - except Exception: - pass # Close session in SQLite if hasattr(self, '_session_db') and self._session_db and self.agent: try: self._session_db.end_session(self.agent.session_id, "cli_close") - except Exception as e: + except (Exception, KeyboardInterrupt) as e: logger.debug("Could not close session in DB: %s", e) + # Plugin hook: on_session_end — safety net for interrupted exits. + # run_conversation() already fires this per-turn on normal completion, + # so only fire here if the agent was mid-turn (_agent_running) when + # the exit occurred, meaning run_conversation's hook didn't fire. + if self.agent and getattr(self, '_agent_running', False): + try: + from hermes_cli.plugins import invoke_hook as _invoke_hook + _invoke_hook( + "on_session_end", + session_id=self.agent.session_id, + completed=False, + interrupted=True, + model=getattr(self.agent, 'model', None), + platform=getattr(self.agent, 'platform', None) or "cli", + ) + except Exception: + pass _run_cleanup() self._print_exit_summary() @@ -7264,12 +8119,9 @@ def main( else: toolsets_list.append(str(t)) else: - # Check config for CLI toolsets, fallback to hermes-cli - config_cli_toolsets = CLI_CONFIG.get("platform_toolsets", {}).get("cli") - if config_cli_toolsets and isinstance(config_cli_toolsets, list): - toolsets_list = config_cli_toolsets - else: - toolsets_list = ["hermes-cli"] + # Use the shared resolver so MCP servers are included at runtime + from hermes_cli.tools_config import _get_platform_tools + toolsets_list = sorted(_get_platform_tools(CLI_CONFIG, "cli")) parsed_skills = _parse_skills_argument(skills) @@ -7351,6 +8203,12 @@ def main( if response: print(response) print(f"\nsession_id: {cli.session_id}") + + # Ensure proper exit code for automation wrappers + sys.exit(1 if isinstance(result, dict) and result.get("failed") else 0) + + # Exit with error code if credentials or agent init fails + sys.exit(1) else: cli.show_banner() cli.console.print(f"[bold blue]Query:[/] {query}")