From 58578a456a83f3480a5fe21f8b8cfdd38dfa3152 Mon Sep 17 00:00:00 2001 From: Noah Zweben Date: Fri, 10 Apr 2026 12:01:01 -0700 Subject: [PATCH] fix(telegram): prevent zombie pollers from blocking new sessions with 409 Conflict (#1349) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * fix(telegram): prevent zombie pollers from blocking new sessions The MCP server runs as a grandchild of the CLI (via `bun run start` → shell → `bun server.ts`). When the CLI is killed uncleanly (SIGKILL, crash, terminal close), the grandchild survives as an orphan and keeps long-polling getUpdates indefinitely. Telegram allows only one consumer per token, so every subsequent session sees 409 Conflict and the existing retry loop spins forever. Three layered mitigations: - PID lockfile (STATE_DIR/bot.pid): on startup, SIGTERM any stale holder before claiming the slot, so a fresh session always wins. - Orphan watchdog: every 5s check for parent reparenting (POSIX ppid change) or a dead stdin pipe, and self-terminate. Covers cases where the existing stdin end/close events never fire through the wrapper. - 409 retry cap: give up after 8 attempts (~28s) instead of looping forever, and bail immediately if shutdown has begun. Also adds a SIGHUP handler and removes the pidfile on clean shutdown (only if still owned by this process). * chore(telegram): bump version to 0.0.5 --------- Co-authored-by: Claude --- .../telegram/.claude-plugin/plugin.json | 2 +- external_plugins/telegram/server.ts | 40 +++++++++++++++++++ 2 files changed, 41 insertions(+), 1 deletion(-) diff --git a/external_plugins/telegram/.claude-plugin/plugin.json b/external_plugins/telegram/.claude-plugin/plugin.json index 2763481..9e3c96a 100644 --- a/external_plugins/telegram/.claude-plugin/plugin.json +++ b/external_plugins/telegram/.claude-plugin/plugin.json @@ -1,7 +1,7 @@ { "name": "telegram", "description": "Telegram channel for Claude Code \u2014 messaging bridge with built-in access control. Manage pairing, allowlists, and policy via /telegram:access.", - "version": "0.0.4", + "version": "0.0.5", "keywords": [ "telegram", "messaging", diff --git a/external_plugins/telegram/server.ts b/external_plugins/telegram/server.ts index 3211bba..6a07e35 100644 --- a/external_plugins/telegram/server.ts +++ b/external_plugins/telegram/server.ts @@ -51,6 +51,22 @@ if (!TOKEN) { process.exit(1) } const INBOX_DIR = join(STATE_DIR, 'inbox') +const PID_FILE = join(STATE_DIR, 'bot.pid') + +// Telegram allows exactly one getUpdates consumer per token. If a previous +// session crashed (SIGKILL, terminal closed) its server.ts grandchild can +// survive as an orphan and hold the slot forever, so every new session sees +// 409 Conflict. Kill any stale holder before we start polling. +mkdirSync(STATE_DIR, { recursive: true, mode: 0o700 }) +try { + const stale = parseInt(readFileSync(PID_FILE, 'utf8'), 10) + if (stale > 1 && stale !== process.pid) { + process.kill(stale, 0) + process.stderr.write(`telegram channel: replacing stale poller pid=${stale}\n`) + process.kill(stale, 'SIGTERM') + } +} catch {} +writeFileSync(PID_FILE, String(process.pid)) // Last-resort safety net — without these the process dies silently on any // unhandled promise rejection. With them it logs and keeps serving tools. @@ -621,6 +637,9 @@ function shutdown(): void { if (shuttingDown) return shuttingDown = true process.stderr.write('telegram channel: shutting down\n') + try { + if (parseInt(readFileSync(PID_FILE, 'utf8'), 10) === process.pid) rmSync(PID_FILE) + } catch {} // bot.stop() signals the poll loop to end; the current getUpdates request // may take up to its long-poll timeout to return. Force-exit after 2s. setTimeout(() => process.exit(0), 2000) @@ -630,6 +649,19 @@ process.stdin.on('end', shutdown) process.stdin.on('close', shutdown) process.on('SIGTERM', shutdown) process.on('SIGINT', shutdown) +process.on('SIGHUP', shutdown) + +// Orphan watchdog: stdin events above don't reliably fire when the parent +// chain (`bun run` wrapper → shell → us) is severed by a crash. Poll for +// reparenting (POSIX) or a dead stdin pipe and self-terminate. +const bootPpid = process.ppid +setInterval(() => { + const orphaned = + (process.platform !== 'win32' && process.ppid !== bootPpid) || + process.stdin.destroyed || + process.stdin.readableEnded + if (orphaned) shutdown() +}, 5000).unref() // Commands are DM-only. Responding in groups would: (1) leak pairing codes via // /status to other group members, (2) confirm bot presence in non-allowlisted @@ -975,7 +1007,15 @@ void (async () => { }) return // bot.stop() was called — clean exit from the loop } catch (err) { + if (shuttingDown) return if (err instanceof GrammyError && err.error_code === 409) { + if (attempt >= 8) { + process.stderr.write( + `telegram channel: 409 Conflict persists after ${attempt} attempts — ` + + `another poller is holding the bot token (stray 'bun server.ts' process or a second session). Exiting.\n`, + ) + return + } const delay = Math.min(1000 * attempt, 15000) const detail = attempt === 1 ? ' — another instance is polling (zombie session, or a second Claude Code running?)'