fix(telegram): retry polling on all transient errors, not just 409 (#1397)

A single ETIMEDOUT/ECONNRESET/DNS failure during long-polling rejected
bot.start(); the catch block returned and polling stopped permanently.
The MCP server process stayed alive (stdin keeps it running), so outbound
reply/react tools kept working — but the bot was deaf to inbound messages
until a full restart. Users see 'typing...' then nothing, indistinguishable
from the harness-side gate bug.

Now all errors retry with the same capped backoff (max 15s). attempt resets
to 0 in onStart so backoff doesn't accumulate across a long-running session.

Co-authored-by: Claude <noreply@anthropic.com>
This commit is contained in:
Noah Zweben
2026-04-14 12:47:13 -07:00
committed by GitHub
parent 7f3389d21f
commit 7e401edac7
2 changed files with 21 additions and 24 deletions

View File

@@ -1,7 +1,7 @@
{
"name": "telegram",
"description": "Telegram channel for Claude Code \u2014 messaging bridge with built-in access control. Manage pairing, allowlists, and policy via /telegram:access.",
"version": "0.0.5",
"version": "0.0.6",
"keywords": [
"telegram",
"messaging",

View File

@@ -985,14 +985,17 @@ bot.catch(err => {
process.stderr.write(`telegram channel: handler error (polling continues): ${err.error}\n`)
})
// 409 Conflict = another getUpdates consumer is still active (zombie from a
// previous session, or a second Claude Code instance). Retry with backoff
// until the slot frees up instead of crashing on the first rejection.
// Retry polling with backoff on any error. Previously only 409 was retried —
// a single ETIMEDOUT/ECONNRESET/DNS failure rejected bot.start(), the catch
// returned, and polling stopped permanently while the process stayed alive
// (MCP stdin keeps it running). Outbound tools kept working but the bot was
// deaf to inbound messages until a full restart.
void (async () => {
for (let attempt = 1; ; attempt++) {
try {
await bot.start({
onStart: info => {
attempt = 0
botUsername = info.username
process.stderr.write(`telegram channel: polling as @${info.username}\n`)
void bot.api.setMyCommands(
@@ -1008,28 +1011,22 @@ void (async () => {
return // bot.stop() was called — clean exit from the loop
} catch (err) {
if (shuttingDown) return
if (err instanceof GrammyError && err.error_code === 409) {
if (attempt >= 8) {
process.stderr.write(
`telegram channel: 409 Conflict persists after ${attempt} attempts — ` +
`another poller is holding the bot token (stray 'bun server.ts' process or a second session). Exiting.\n`,
)
return
}
const delay = Math.min(1000 * attempt, 15000)
const detail = attempt === 1
? ' — another instance is polling (zombie session, or a second Claude Code running?)'
: ''
process.stderr.write(
`telegram channel: 409 Conflict${detail}, retrying in ${delay / 1000}s\n`,
)
await new Promise(r => setTimeout(r, delay))
continue
}
// bot.stop() mid-setup rejects with grammy's "Aborted delay" — expected, not an error.
if (err instanceof Error && err.message === 'Aborted delay') return
process.stderr.write(`telegram channel: polling failed: ${err}\n`)
return
const is409 = err instanceof GrammyError && err.error_code === 409
if (is409 && attempt >= 8) {
process.stderr.write(
`telegram channel: 409 Conflict persists after ${attempt} attempts — ` +
`another poller is holding the bot token (stray 'bun server.ts' process or a second session). Exiting.\n`,
)
return
}
const delay = Math.min(1000 * attempt, 15000)
const detail = is409
? `409 Conflict${attempt === 1 ? ' — another instance is polling (zombie session, or a second Claude Code running?)' : ''}`
: `polling error: ${err}`
process.stderr.write(`telegram channel: ${detail}, retrying in ${delay / 1000}s\n`)
await new Promise(r => setTimeout(r, delay))
}
}
})()