From 7e401edac7ca6449c4f549fda9e1b43385496bef Mon Sep 17 00:00:00 2001 From: Noah Zweben Date: Tue, 14 Apr 2026 12:47:13 -0700 Subject: [PATCH] fix(telegram): retry polling on all transient errors, not just 409 (#1397) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit A single ETIMEDOUT/ECONNRESET/DNS failure during long-polling rejected bot.start(); the catch block returned and polling stopped permanently. The MCP server process stayed alive (stdin keeps it running), so outbound reply/react tools kept working — but the bot was deaf to inbound messages until a full restart. Users see 'typing...' then nothing, indistinguishable from the harness-side gate bug. Now all errors retry with the same capped backoff (max 15s). attempt resets to 0 in onStart so backoff doesn't accumulate across a long-running session. Co-authored-by: Claude --- .../telegram/.claude-plugin/plugin.json | 2 +- external_plugins/telegram/server.ts | 43 +++++++++---------- 2 files changed, 21 insertions(+), 24 deletions(-) diff --git a/external_plugins/telegram/.claude-plugin/plugin.json b/external_plugins/telegram/.claude-plugin/plugin.json index 9e3c96a..e1edd21 100644 --- a/external_plugins/telegram/.claude-plugin/plugin.json +++ b/external_plugins/telegram/.claude-plugin/plugin.json @@ -1,7 +1,7 @@ { "name": "telegram", "description": "Telegram channel for Claude Code \u2014 messaging bridge with built-in access control. Manage pairing, allowlists, and policy via /telegram:access.", - "version": "0.0.5", + "version": "0.0.6", "keywords": [ "telegram", "messaging", diff --git a/external_plugins/telegram/server.ts b/external_plugins/telegram/server.ts index 6a07e35..bfc551f 100644 --- a/external_plugins/telegram/server.ts +++ b/external_plugins/telegram/server.ts @@ -985,14 +985,17 @@ bot.catch(err => { process.stderr.write(`telegram channel: handler error (polling continues): ${err.error}\n`) }) -// 409 Conflict = another getUpdates consumer is still active (zombie from a -// previous session, or a second Claude Code instance). Retry with backoff -// until the slot frees up instead of crashing on the first rejection. +// Retry polling with backoff on any error. Previously only 409 was retried — +// a single ETIMEDOUT/ECONNRESET/DNS failure rejected bot.start(), the catch +// returned, and polling stopped permanently while the process stayed alive +// (MCP stdin keeps it running). Outbound tools kept working but the bot was +// deaf to inbound messages until a full restart. void (async () => { for (let attempt = 1; ; attempt++) { try { await bot.start({ onStart: info => { + attempt = 0 botUsername = info.username process.stderr.write(`telegram channel: polling as @${info.username}\n`) void bot.api.setMyCommands( @@ -1008,28 +1011,22 @@ void (async () => { return // bot.stop() was called — clean exit from the loop } catch (err) { if (shuttingDown) return - if (err instanceof GrammyError && err.error_code === 409) { - if (attempt >= 8) { - process.stderr.write( - `telegram channel: 409 Conflict persists after ${attempt} attempts — ` + - `another poller is holding the bot token (stray 'bun server.ts' process or a second session). Exiting.\n`, - ) - return - } - const delay = Math.min(1000 * attempt, 15000) - const detail = attempt === 1 - ? ' — another instance is polling (zombie session, or a second Claude Code running?)' - : '' - process.stderr.write( - `telegram channel: 409 Conflict${detail}, retrying in ${delay / 1000}s\n`, - ) - await new Promise(r => setTimeout(r, delay)) - continue - } // bot.stop() mid-setup rejects with grammy's "Aborted delay" — expected, not an error. if (err instanceof Error && err.message === 'Aborted delay') return - process.stderr.write(`telegram channel: polling failed: ${err}\n`) - return + const is409 = err instanceof GrammyError && err.error_code === 409 + if (is409 && attempt >= 8) { + process.stderr.write( + `telegram channel: 409 Conflict persists after ${attempt} attempts — ` + + `another poller is holding the bot token (stray 'bun server.ts' process or a second session). Exiting.\n`, + ) + return + } + const delay = Math.min(1000 * attempt, 15000) + const detail = is409 + ? `409 Conflict${attempt === 1 ? ' — another instance is polling (zombie session, or a second Claude Code running?)' : ''}` + : `polling error: ${err}` + process.stderr.write(`telegram channel: ${detail}, retrying in ${delay / 1000}s\n`) + await new Promise(r => setTimeout(r, delay)) } } })()