From 32d67b55ae4a6747628ecb53c435c5b39e78b2ed Mon Sep 17 00:00:00 2001 From: GitRicko Bot Date: Tue, 23 Jun 2026 07:10:28 +0000 Subject: [PATCH 1/2] fix: gateway health check in self-check misreports connection refused as pass MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The gateway probe uses 000 to default the HTTP code on failure. When curl returns (connection refused) AND the fallback fires, stdout gets concatenated as (6 chars). Bash string comparison then evaluates as true, causing the gateway to be reported as ✅ up when it is actually down. Fix: replace with + bash parameter expansion . This avoids stdout concatenation entirely — curl's output stays clean, and only empty results get the 000 default. --- docker/self-check.sh | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/docker/self-check.sh b/docker/self-check.sh index da4956f..c832070 100755 --- a/docker/self-check.sh +++ b/docker/self-check.sh @@ -278,7 +278,8 @@ if ! should_skip "hermes"; then if [ -f "$HERMES_CONFIG" ]; then cfg_model=$(grep -A1 '^model:' "$HERMES_CONFIG" 2>/dev/null | grep 'default' | head -1 | sed 's/.*default: *//' || echo "") cfg_provider=$(grep -A1 '^model:' "$HERMES_CONFIG" 2>/dev/null | grep 'provider' | head -1 | sed 's/.*provider: *//' || echo "") - has_gateway=$(curl -s -o /dev/null -w "%{http_code}" --max-time 3 "$HERMES_GATEWAY_URL" 2>/dev/null || echo "000") + has_gateway=$(curl -s -o /dev/null -w "%{http_code}" --max-time 3 "$HERMES_GATEWAY_URL" 2>/dev/null || true) + has_gateway="${has_gateway:-000}" if [ -n "$cfg_model" ]; then _ok "Config" "model=${cfg_model}, provider=${cfg_provider:-unset}" From 98fac58671b846563a4e46caf20da649f8e6e4cd Mon Sep 17 00:00:00 2001 From: gitricko Date: Tue, 23 Jun 2026 06:27:24 -0400 Subject: [PATCH 2/2] feat: expose Hermes Gateway port 9119 and update infrastructure utilities and health checks --- .github/workflows/docker-publish.yml | 7 +-- docker-compose.yml | 2 + docker/Dockerfile | 2 +- docker/self-check.sh | 76 ++-------------------------- docker/start-hermes.sh | 4 +- 5 files changed, 14 insertions(+), 77 deletions(-) diff --git a/.github/workflows/docker-publish.yml b/.github/workflows/docker-publish.yml index 165d331..d793ef5 100644 --- a/.github/workflows/docker-publish.yml +++ b/.github/workflows/docker-publish.yml @@ -54,6 +54,7 @@ jobs: -p 8888:8888 \ -p 7352:7352 \ -p 20128:20128 \ + -p 9119:9119 \ hermes-webtop:test # Wait for init scripts to complete (poll for known log markers) @@ -109,7 +110,7 @@ jobs: if [ "$ELAPSED" -gt "$PORT_POLL_TIMEOUT" ]; then echo "" echo "→ FAIL: Port poll timeout after ${PORT_POLL_TIMEOUT}s — not all services responded" - for pair in "3000:WebTop" "8888:CodeServer" "7352:ModelRelay" "20128:OmniRoute"; do + for pair in "3000:WebTop" "8888:CodeServer" "7352:ModelRelay" "20128:OmniRoute" "9119:HermesGateway"; do PORT="${pair%%:*}" NAME="${pair##*:}" if [ "${RESPONDED[$PORT]}" != "true" ]; then @@ -122,7 +123,7 @@ jobs: exit 1 fi - for pair in "3000:WebTop" "8888:CodeServer" "7352:ModelRelay" "20128:OmniRoute"; do + for pair in "3000:WebTop" "8888:CodeServer" "7352:ModelRelay" "20128:OmniRoute" "9119:HermesGateway"; do PORT="${pair%%:*}" NAME="${pair##*:}" @@ -141,7 +142,7 @@ jobs: # Check if all ports responded ALL_RESPONDED=true - for port in 3000 8888 7352 20128; do + for port in 3000 8888 7352 20128 9119; do if [ "${RESPONDED[$port]}" != "true" ]; then ALL_RESPONDED=false break diff --git a/docker-compose.yml b/docker-compose.yml index 04c9515..89e05be 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -32,6 +32,8 @@ services: - 3000:3000 # hermes dashboard specific ports (adjust as needed) - 9119:9119 + # hermes api server (if enabled) + - 8642:8642 # modelrelay specific ports (adjust as needed) - 7352:7352 # OmniRoute specific ports (adjust as needed) diff --git a/docker/Dockerfile b/docker/Dockerfile index 8cee694..ec839d9 100644 --- a/docker/Dockerfile +++ b/docker/Dockerfile @@ -28,7 +28,7 @@ RUN GH_ARCH="${GH_ARCH:-${TARGETARCH:-amd64}}"; \ && echo "deb [arch=${GH_ARCH} signed-by=/usr/share/keyrings/githubcli-archive-keyring.gpg] https://cli.github.com/packages stable main" > /etc/apt/sources.list.d/github-cli.list # Install help utilities and clean up apt cache to reduce image size -RUN apt-get update && apt-get install -y htop zsh ripgrep gh remmina && rm -rf /var/lib/apt/lists/* +RUN apt-get update && apt-get install -y htop zsh ripgrep gh remmina iputils-ping net-tools socat && rm -rf /var/lib/apt/lists/* # Copy Node.js binaries and libraries COPY --from=node-bin --chown=abc:abc /usr/local/bin/node /usr/local/bin/ diff --git a/docker/self-check.sh b/docker/self-check.sh index c832070..e4268cd 100755 --- a/docker/self-check.sh +++ b/docker/self-check.sh @@ -90,74 +90,6 @@ echo " ${BOLD}HERMES-WEBTOP HEALTH REPORT${NC}" echo " $(date -u)" echo " ════════════════════════════════════════════════════════════" -# ── 0. Cleanup ─────────────────────────────────────────────────────────────── -# Purge stale ACP sessions from state.db. ACP sessions are VSCode/IDE extension -# sessions that persist in state.db across container reboots. Some may still be -# recoverable (if their saved billing_provider still exists in the current -# config), others are unrecoverable (provider was removed/renamed). -# -# This checks each session individually: only deletes sessions whose -# billing_provider no longer matches a configured provider, so recoverable -# sessions from the same container lifetime are preserved. -# section "Cleanup" -# STATE_DB="${HOME:-/config}/.hermes/state.db" -# if [ -f "$STATE_DB" ]; then -# python3 -c " -# import sqlite3, os, yaml - -# db_path = os.path.expanduser('$STATE_DB') -# cfg_path = os.path.expanduser('~/.hermes/config.yaml') - -# try: -# # 1. Build set of configured providers -# configured = set() -# with open(cfg_path) as f: -# cfg = yaml.safe_load(f) or {} -# configured.update((cfg.get('providers') or {}).keys()) -# configured.update((cfg.get('custom_providers') or {}).keys()) -# mc = cfg.get('model', {}) -# if isinstance(mc, dict) and mc.get('provider'): -# configured.add(mc['provider']) - -# # 2. Check each ACP session -# db = sqlite3.connect(db_path) -# rows = db.execute(''' -# SELECT id, billing_provider FROM sessions -# WHERE source='acp' -# ''').fetchall() - -# purged = 0 -# kept = 0 -# for sid, bp in rows: -# if bp is None or bp in configured: -# kept += 1 -# else: -# purged += 1 -# db.execute('DELETE FROM sessions WHERE id=?', (sid,)) -# db.commit() -# db.close() -# print(f'{purged} {kept}') -# except Exception: -# print('-1 -1') -# " 2>/dev/null | while read purged kept; do -# [ -z "$purged" ] && purged=0 -# [ -z "$kept" ] && kept=0 -# if [ "$purged" -gt 0 ] 2>/dev/null; then -# _ok "ACP sessions" "purged ${purged} stale session(s), kept ${kept}" -# json_add "cleanup:acp-sessions" "ok" "purged ${purged} stale, kept ${kept}" "{\"purged\":${purged},\"kept\":${kept}}" -# elif [ "$purged" = "0" ] 2>/dev/null && [ "$kept" -ge 0 ] 2>/dev/null; then -# _ok "ACP sessions" "none to clean (${kept} kept)" -# json_add "cleanup:acp-sessions" "ok" "no stale sessions" "{\"kept\":${kept}}" -# else -# _warn "ACP sessions" "cleanup query failed" -# json_add "cleanup:acp-sessions" "warn" "cleanup failed" "{}" -# fi -# done -# else -# _ok "ACP sessions" "no state.db yet" -# json_add "cleanup:acp-sessions" "ok" "state.db not found" "{}" -# fi - # ── 1. Services ────────────────────────────────────────────────────────────── section "Services" @@ -165,7 +97,7 @@ if ! should_skip "services"; then # Poll all service ports until all respond or timeout PORT_POLL_TIMEOUT=60 POLL_STARTED_AT=$(date +%s) - declare -A RESPONDED=([3000]="" [8888]="" [7352]="" [20128]="") + declare -A RESPONDED=([3000]="" [8888]="" [7352]="" [20128]="", [9119]="") echo "" echo "=== Polling Service Ports (${PORT_POLL_TIMEOUT}s timeout) ===" @@ -176,7 +108,7 @@ if ! should_skip "services"; then if [ "$ELAPSED" -gt "$PORT_POLL_TIMEOUT" ]; then echo "" echo "→ FAIL: Port poll timeout after ${PORT_POLL_TIMEOUT}s — not all services responded" - for pair in "3000:WebTop" "8888:CodeServer" "7352:ModelRelay" "20128:OmniRoute"; do + for pair in "3000:WebTop" "8888:CodeServer" "7352:ModelRelay" "20128:OmniRoute", "9119:HermesGateway"; do PORT="${pair%%:*}" NAME="${pair##*:}" if [ "${RESPONDED[$PORT]}" != "true" ]; then @@ -185,7 +117,7 @@ if ! should_skip "services"; then done fi - for pair in "3000:WebTop" "8888:CodeServer" "7352:ModelRelay" "20128:OmniRoute"; do + for pair in "3000:WebTop" "8888:CodeServer" "7352:ModelRelay" "20128:OmniRoute" "9119:HermesGateway"; do PORT="${pair%%:*}" NAME="${pair##*:}" @@ -204,7 +136,7 @@ if ! should_skip "services"; then # Check if all ports responded ALL_RESPONDED=true - for port in 3000 8888 7352 20128; do + for port in 3000 8888 7352 20128 9119; do if [ "${RESPONDED[$port]}" != "true" ]; then ALL_RESPONDED=false break diff --git a/docker/start-hermes.sh b/docker/start-hermes.sh index 666dfa7..d08e756 100755 --- a/docker/start-hermes.sh +++ b/docker/start-hermes.sh @@ -76,7 +76,9 @@ runuser -l abc <<'EOF' ensure_ownership "/usr/local/lib/hermes-agent/hermes_cli" ensure_ownership "/usr/local/lib/hermes-agent/web" echo "[start-hermes] Starting Hermes Dashboard..." - nohup hermes dashboard --host 0.0.0.0 --insecure --no-open > ~/.hermes/logs/dashboard.log 2>&1 & + # Start Hermes Dashboard in background and expose it on port 9119 via socat + nohup socat TCP4-LISTEN:9119,fork,reuseaddr TCP4:127.0.0.1:9009 > ~/.hermes/logs/socat-9119.log 2>&1 & + nohup hermes dashboard --port 9009 --no-open > ~/.hermes/logs/dashboard.log 2>&1 & # Remind Hermes on Mnemon setup if needed if [ ! -f "$HOME/.hermes/memories/USER.md" ]; then