mirror of
https://github.com/anthropics/claude-plugins-official.git
synced 2026-04-16 04:12:43 +00:00
fix(telegram): prevent zombie pollers from blocking new sessions with 409 Conflict (#1349)
* fix(telegram): prevent zombie pollers from blocking new sessions The MCP server runs as a grandchild of the CLI (via `bun run start` → shell → `bun server.ts`). When the CLI is killed uncleanly (SIGKILL, crash, terminal close), the grandchild survives as an orphan and keeps long-polling getUpdates indefinitely. Telegram allows only one consumer per token, so every subsequent session sees 409 Conflict and the existing retry loop spins forever. Three layered mitigations: - PID lockfile (STATE_DIR/bot.pid): on startup, SIGTERM any stale holder before claiming the slot, so a fresh session always wins. - Orphan watchdog: every 5s check for parent reparenting (POSIX ppid change) or a dead stdin pipe, and self-terminate. Covers cases where the existing stdin end/close events never fire through the wrapper. - 409 retry cap: give up after 8 attempts (~28s) instead of looping forever, and bail immediately if shutdown has begun. Also adds a SIGHUP handler and removes the pidfile on clean shutdown (only if still owned by this process). * chore(telegram): bump version to 0.0.5 --------- Co-authored-by: Claude <noreply@anthropic.com>
This commit is contained in:
@@ -1,7 +1,7 @@
|
||||
{
|
||||
"name": "telegram",
|
||||
"description": "Telegram channel for Claude Code \u2014 messaging bridge with built-in access control. Manage pairing, allowlists, and policy via /telegram:access.",
|
||||
"version": "0.0.4",
|
||||
"version": "0.0.5",
|
||||
"keywords": [
|
||||
"telegram",
|
||||
"messaging",
|
||||
|
||||
@@ -51,6 +51,22 @@ if (!TOKEN) {
|
||||
process.exit(1)
|
||||
}
|
||||
const INBOX_DIR = join(STATE_DIR, 'inbox')
|
||||
const PID_FILE = join(STATE_DIR, 'bot.pid')
|
||||
|
||||
// Telegram allows exactly one getUpdates consumer per token. If a previous
|
||||
// session crashed (SIGKILL, terminal closed) its server.ts grandchild can
|
||||
// survive as an orphan and hold the slot forever, so every new session sees
|
||||
// 409 Conflict. Kill any stale holder before we start polling.
|
||||
mkdirSync(STATE_DIR, { recursive: true, mode: 0o700 })
|
||||
try {
|
||||
const stale = parseInt(readFileSync(PID_FILE, 'utf8'), 10)
|
||||
if (stale > 1 && stale !== process.pid) {
|
||||
process.kill(stale, 0)
|
||||
process.stderr.write(`telegram channel: replacing stale poller pid=${stale}\n`)
|
||||
process.kill(stale, 'SIGTERM')
|
||||
}
|
||||
} catch {}
|
||||
writeFileSync(PID_FILE, String(process.pid))
|
||||
|
||||
// Last-resort safety net — without these the process dies silently on any
|
||||
// unhandled promise rejection. With them it logs and keeps serving tools.
|
||||
@@ -621,6 +637,9 @@ function shutdown(): void {
|
||||
if (shuttingDown) return
|
||||
shuttingDown = true
|
||||
process.stderr.write('telegram channel: shutting down\n')
|
||||
try {
|
||||
if (parseInt(readFileSync(PID_FILE, 'utf8'), 10) === process.pid) rmSync(PID_FILE)
|
||||
} catch {}
|
||||
// bot.stop() signals the poll loop to end; the current getUpdates request
|
||||
// may take up to its long-poll timeout to return. Force-exit after 2s.
|
||||
setTimeout(() => process.exit(0), 2000)
|
||||
@@ -630,6 +649,19 @@ process.stdin.on('end', shutdown)
|
||||
process.stdin.on('close', shutdown)
|
||||
process.on('SIGTERM', shutdown)
|
||||
process.on('SIGINT', shutdown)
|
||||
process.on('SIGHUP', shutdown)
|
||||
|
||||
// Orphan watchdog: stdin events above don't reliably fire when the parent
|
||||
// chain (`bun run` wrapper → shell → us) is severed by a crash. Poll for
|
||||
// reparenting (POSIX) or a dead stdin pipe and self-terminate.
|
||||
const bootPpid = process.ppid
|
||||
setInterval(() => {
|
||||
const orphaned =
|
||||
(process.platform !== 'win32' && process.ppid !== bootPpid) ||
|
||||
process.stdin.destroyed ||
|
||||
process.stdin.readableEnded
|
||||
if (orphaned) shutdown()
|
||||
}, 5000).unref()
|
||||
|
||||
// Commands are DM-only. Responding in groups would: (1) leak pairing codes via
|
||||
// /status to other group members, (2) confirm bot presence in non-allowlisted
|
||||
@@ -975,7 +1007,15 @@ void (async () => {
|
||||
})
|
||||
return // bot.stop() was called — clean exit from the loop
|
||||
} catch (err) {
|
||||
if (shuttingDown) return
|
||||
if (err instanceof GrammyError && err.error_code === 409) {
|
||||
if (attempt >= 8) {
|
||||
process.stderr.write(
|
||||
`telegram channel: 409 Conflict persists after ${attempt} attempts — ` +
|
||||
`another poller is holding the bot token (stray 'bun server.ts' process or a second session). Exiting.\n`,
|
||||
)
|
||||
return
|
||||
}
|
||||
const delay = Math.min(1000 * attempt, 15000)
|
||||
const detail = attempt === 1
|
||||
? ' — another instance is polling (zombie session, or a second Claude Code running?)'
|
||||
|
||||
Reference in New Issue
Block a user