From 7599c5ae9659616220baee9f20864c38edd2cbf3 Mon Sep 17 00:00:00 2001 From: Tommy Lin Date: Fri, 3 Apr 2026 13:07:33 +0800 Subject: [PATCH 1/2] fix(telegram): add socket timeout to prevent poll loop from hanging MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit tgApi() had no client-side socket timeout. When the TCP connection silently dies (e.g. NAT timeout, ISP routing change, network blip during the 30s long-poll window), the HTTPS request hangs forever and the poll loop never recovers — the bot stops responding until the process is restarted. Add a 60s socket timeout with req.destroy() on the timeout event. The existing poll() catch block already continues to the next iteration, so the loop self-heals after a transient failure. Signed-off-by: Tommy Lin --- scripts/telegram-bridge.js | 4 + test/telegram-bridge-timeout.test.js | 191 +++++++++++++++++++++++++++ 2 files changed, 195 insertions(+) create mode 100644 test/telegram-bridge-timeout.test.js diff --git a/scripts/telegram-bridge.js b/scripts/telegram-bridge.js index 96a29fd88..155a98106 100755 --- a/scripts/telegram-bridge.js +++ b/scripts/telegram-bridge.js @@ -54,6 +54,7 @@ function tgApi(method, body) { hostname: "api.telegram.org", path: `/bot${TOKEN}/${method}`, method: "POST", + timeout: 60000, headers: { "Content-Type": "application/json", "Content-Length": Buffer.byteLength(data) }, }, (res) => { @@ -64,6 +65,9 @@ function tgApi(method, body) { }); }, ); + req.on("timeout", () => { + req.destroy(new Error(`Telegram API ${method} timed out`)); + }); req.on("error", reject); req.write(data); req.end(); diff --git a/test/telegram-bridge-timeout.test.js b/test/telegram-bridge-timeout.test.js new file mode 100644 index 000000000..d258cb0be --- /dev/null +++ b/test/telegram-bridge-timeout.test.js @@ -0,0 +1,191 @@ +// SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// SPDX-License-Identifier: Apache-2.0 + +/** + * Tests for the telegram-bridge socket timeout fix. + * + * Verifies that tgApi() properly handles: + * 1. Normal responses still work with the timeout in place + * 2. Socket timeout when server stops responding (simulates network hang) + * 3. Timeout fires within expected window + * 4. The poll loop recovers after a timeout error + * 5. Partial response with socket destroy (known limitation, documented) + * 6. Connection refused + */ + +import { describe, it, expect, afterEach } from "vitest"; +import https from "node:https"; +import net from "node:net"; +import { execFileSync } from "node:child_process"; +import fs from "node:fs"; +import path from "node:path"; + +// ── Generate a self-signed cert for the local test server ──────────── +const tmpDir = fs.mkdtempSync("/tmp/tg-bridge-test-"); +const keyPath = path.join(tmpDir, "key.pem"); +const certPath = path.join(tmpDir, "cert.pem"); +execFileSync( + "openssl", + [ + "req", + "-x509", + "-newkey", + "rsa:2048", + "-keyout", + keyPath, + "-out", + certPath, + "-days", + "1", + "-nodes", + "-subj", + "/CN=localhost", + ], + { stdio: "ignore" }, +); +const key = fs.readFileSync(keyPath); +const cert = fs.readFileSync(certPath); +fs.rmSync(tmpDir, { recursive: true }); + +// ── tgApi WITH timeout fix (mirrors telegram-bridge.js) ────────────── +function tgApi(baseUrl, method, body, timeoutMs = 2000) { + return new Promise((resolve, reject) => { + const data = JSON.stringify(body); + const url = new URL(`${baseUrl}/${method}`); + const req = https.request( + { + hostname: url.hostname, + port: url.port, + path: url.pathname, + method: "POST", + timeout: timeoutMs, + headers: { "Content-Type": "application/json", "Content-Length": Buffer.byteLength(data) }, + rejectUnauthorized: false, + }, + (res) => { + let buf = ""; + res.on("data", (c) => (buf += c)); + res.on("end", () => { + try { + resolve(JSON.parse(buf)); + } catch { + resolve({ ok: false, error: buf }); + } + }); + }, + ); + req.on("timeout", () => { + req.destroy(new Error(`Telegram API ${method} timed out`)); + }); + req.on("error", reject); + req.write(data); + req.end(); + }); +} + +// ── Helpers ────────────────────────────────────────────────────────── +const servers = []; + +function createServer(handler) { + return new Promise((resolve) => { + const server = https.createServer({ key, cert }, handler); + server.listen(0, "127.0.0.1", () => { + servers.push(server); + const { port } = server.address(); + resolve({ server, port, baseUrl: `https://127.0.0.1:${port}` }); + }); + }); +} + +afterEach(() => { + while (servers.length) { + const s = servers.pop(); + if (s.closeAllConnections) s.closeAllConnections(); + s.close(); + } +}); + +// ── Tests ──────────────────────────────────────────────────────────── + +describe("telegram-bridge tgApi timeout behavior", () => { + it("resolves normally when server responds promptly", async () => { + const { baseUrl } = await createServer((_req, res) => { + res.writeHead(200, { "Content-Type": "application/json" }); + res.end(JSON.stringify({ ok: true, result: { update_id: 1 } })); + }); + const result = await tgApi(baseUrl, "getUpdates", { offset: 0 }); + expect(result.ok).toBe(true); + }); + + it("rejects with timeout when server hangs (simulates network drop)", async () => { + const { baseUrl } = await createServer(() => { + // never respond — simulates dead TCP connection + }); + const start = Date.now(); + await expect(tgApi(baseUrl, "getUpdates", { offset: 0 }, 1000)).rejects.toThrow("timed out"); + const elapsed = Date.now() - start; + expect(elapsed).toBeGreaterThanOrEqual(900); + expect(elapsed).toBeLessThan(5000); + }); + + it("timeout fires within expected window", async () => { + const { baseUrl } = await createServer(() => { + /* never respond */ + }); + const start = Date.now(); + await expect(tgApi(baseUrl, "getUpdates", { offset: 0 }, 500)).rejects.toThrow("timed out"); + const elapsed = Date.now() - start; + expect(elapsed).toBeGreaterThanOrEqual(450); + expect(elapsed).toBeLessThan(2000); + }); + + it("poll loop recovers after timeout", async () => { + let reqCount = 0; + const { baseUrl } = await createServer((_req, res) => { + reqCount++; + if (reqCount === 1) return; // first: hang + res.writeHead(200, { "Content-Type": "application/json" }); + res.end(JSON.stringify({ ok: true, result: [] })); + }); + + // First call: timeout + await expect(tgApi(baseUrl, "getUpdates", { offset: 0 }, 500)).rejects.toThrow("timed out"); + + // Second call: should succeed (poll loop recovery) + const result = await tgApi(baseUrl, "getUpdates", { offset: 0 }, 500); + expect(result.ok).toBe(true); + }); + + it("handles server closing connection mid-response (known limitation)", async () => { + // Node.js `timeout` only fires on idle sockets — once the server + // has started responding, timeout won't fire. This documents the + // behavior. The primary fix covers the real-world scenario (dead + // connection before any response, e.g. after machine sleep). + const { baseUrl } = await createServer((req, res) => { + res.writeHead(200, { "Content-Type": "application/json" }); + res.write('{"ok":'); + setTimeout(() => req.socket.destroy(), 50); + }); + + const result = await Promise.race([ + tgApi(baseUrl, "getUpdates", { offset: 0 }, 1000) + .then(() => "resolved") + .catch(() => "rejected"), + new Promise((r) => setTimeout(() => r("timeout-fallback"), 2000)), + ]); + // Accept any outcome — documenting that mid-response hangs are a + // known limitation not covered by socket timeout alone. + expect(["resolved", "rejected", "timeout-fallback"]).toContain(result); + }); + + it("handles connection refused (server down)", async () => { + const tempServer = net.createServer(); + await new Promise((r) => tempServer.listen(0, "127.0.0.1", r)); + const { port } = tempServer.address(); + tempServer.close(); + + await expect( + tgApi(`https://127.0.0.1:${port}`, "getUpdates", { offset: 0 }, 2000), + ).rejects.toThrow(); + }); +}); From e95f13d667ff1b504f513469205658011c254565 Mon Sep 17 00:00:00 2001 From: Tommy Lin Date: Fri, 3 Apr 2026 13:20:19 +0800 Subject: [PATCH 2/2] chore: remove duplicated tgApi test, will follow up with shared module The test validated a copy of tgApi rather than the production code. A follow-up PR will extract tgApi into a shared module so tests import the real implementation. Signed-off-by: Tommy Lin --- test/telegram-bridge-timeout.test.js | 191 --------------------------- 1 file changed, 191 deletions(-) delete mode 100644 test/telegram-bridge-timeout.test.js diff --git a/test/telegram-bridge-timeout.test.js b/test/telegram-bridge-timeout.test.js deleted file mode 100644 index d258cb0be..000000000 --- a/test/telegram-bridge-timeout.test.js +++ /dev/null @@ -1,191 +0,0 @@ -// SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. -// SPDX-License-Identifier: Apache-2.0 - -/** - * Tests for the telegram-bridge socket timeout fix. - * - * Verifies that tgApi() properly handles: - * 1. Normal responses still work with the timeout in place - * 2. Socket timeout when server stops responding (simulates network hang) - * 3. Timeout fires within expected window - * 4. The poll loop recovers after a timeout error - * 5. Partial response with socket destroy (known limitation, documented) - * 6. Connection refused - */ - -import { describe, it, expect, afterEach } from "vitest"; -import https from "node:https"; -import net from "node:net"; -import { execFileSync } from "node:child_process"; -import fs from "node:fs"; -import path from "node:path"; - -// ── Generate a self-signed cert for the local test server ──────────── -const tmpDir = fs.mkdtempSync("/tmp/tg-bridge-test-"); -const keyPath = path.join(tmpDir, "key.pem"); -const certPath = path.join(tmpDir, "cert.pem"); -execFileSync( - "openssl", - [ - "req", - "-x509", - "-newkey", - "rsa:2048", - "-keyout", - keyPath, - "-out", - certPath, - "-days", - "1", - "-nodes", - "-subj", - "/CN=localhost", - ], - { stdio: "ignore" }, -); -const key = fs.readFileSync(keyPath); -const cert = fs.readFileSync(certPath); -fs.rmSync(tmpDir, { recursive: true }); - -// ── tgApi WITH timeout fix (mirrors telegram-bridge.js) ────────────── -function tgApi(baseUrl, method, body, timeoutMs = 2000) { - return new Promise((resolve, reject) => { - const data = JSON.stringify(body); - const url = new URL(`${baseUrl}/${method}`); - const req = https.request( - { - hostname: url.hostname, - port: url.port, - path: url.pathname, - method: "POST", - timeout: timeoutMs, - headers: { "Content-Type": "application/json", "Content-Length": Buffer.byteLength(data) }, - rejectUnauthorized: false, - }, - (res) => { - let buf = ""; - res.on("data", (c) => (buf += c)); - res.on("end", () => { - try { - resolve(JSON.parse(buf)); - } catch { - resolve({ ok: false, error: buf }); - } - }); - }, - ); - req.on("timeout", () => { - req.destroy(new Error(`Telegram API ${method} timed out`)); - }); - req.on("error", reject); - req.write(data); - req.end(); - }); -} - -// ── Helpers ────────────────────────────────────────────────────────── -const servers = []; - -function createServer(handler) { - return new Promise((resolve) => { - const server = https.createServer({ key, cert }, handler); - server.listen(0, "127.0.0.1", () => { - servers.push(server); - const { port } = server.address(); - resolve({ server, port, baseUrl: `https://127.0.0.1:${port}` }); - }); - }); -} - -afterEach(() => { - while (servers.length) { - const s = servers.pop(); - if (s.closeAllConnections) s.closeAllConnections(); - s.close(); - } -}); - -// ── Tests ──────────────────────────────────────────────────────────── - -describe("telegram-bridge tgApi timeout behavior", () => { - it("resolves normally when server responds promptly", async () => { - const { baseUrl } = await createServer((_req, res) => { - res.writeHead(200, { "Content-Type": "application/json" }); - res.end(JSON.stringify({ ok: true, result: { update_id: 1 } })); - }); - const result = await tgApi(baseUrl, "getUpdates", { offset: 0 }); - expect(result.ok).toBe(true); - }); - - it("rejects with timeout when server hangs (simulates network drop)", async () => { - const { baseUrl } = await createServer(() => { - // never respond — simulates dead TCP connection - }); - const start = Date.now(); - await expect(tgApi(baseUrl, "getUpdates", { offset: 0 }, 1000)).rejects.toThrow("timed out"); - const elapsed = Date.now() - start; - expect(elapsed).toBeGreaterThanOrEqual(900); - expect(elapsed).toBeLessThan(5000); - }); - - it("timeout fires within expected window", async () => { - const { baseUrl } = await createServer(() => { - /* never respond */ - }); - const start = Date.now(); - await expect(tgApi(baseUrl, "getUpdates", { offset: 0 }, 500)).rejects.toThrow("timed out"); - const elapsed = Date.now() - start; - expect(elapsed).toBeGreaterThanOrEqual(450); - expect(elapsed).toBeLessThan(2000); - }); - - it("poll loop recovers after timeout", async () => { - let reqCount = 0; - const { baseUrl } = await createServer((_req, res) => { - reqCount++; - if (reqCount === 1) return; // first: hang - res.writeHead(200, { "Content-Type": "application/json" }); - res.end(JSON.stringify({ ok: true, result: [] })); - }); - - // First call: timeout - await expect(tgApi(baseUrl, "getUpdates", { offset: 0 }, 500)).rejects.toThrow("timed out"); - - // Second call: should succeed (poll loop recovery) - const result = await tgApi(baseUrl, "getUpdates", { offset: 0 }, 500); - expect(result.ok).toBe(true); - }); - - it("handles server closing connection mid-response (known limitation)", async () => { - // Node.js `timeout` only fires on idle sockets — once the server - // has started responding, timeout won't fire. This documents the - // behavior. The primary fix covers the real-world scenario (dead - // connection before any response, e.g. after machine sleep). - const { baseUrl } = await createServer((req, res) => { - res.writeHead(200, { "Content-Type": "application/json" }); - res.write('{"ok":'); - setTimeout(() => req.socket.destroy(), 50); - }); - - const result = await Promise.race([ - tgApi(baseUrl, "getUpdates", { offset: 0 }, 1000) - .then(() => "resolved") - .catch(() => "rejected"), - new Promise((r) => setTimeout(() => r("timeout-fallback"), 2000)), - ]); - // Accept any outcome — documenting that mid-response hangs are a - // known limitation not covered by socket timeout alone. - expect(["resolved", "rejected", "timeout-fallback"]).toContain(result); - }); - - it("handles connection refused (server down)", async () => { - const tempServer = net.createServer(); - await new Promise((r) => tempServer.listen(0, "127.0.0.1", r)); - const { port } = tempServer.address(); - tempServer.close(); - - await expect( - tgApi(`https://127.0.0.1:${port}`, "getUpdates", { offset: 0 }, 2000), - ).rejects.toThrow(); - }); -});