Align mcp-server-dev skills with claude.com/docs connector guidance

- build-mcp-server: load llms-full.txt for Claude-specific context; add Phase 6 (test in Claude, review checklist, submit, ship plugin) - references/auth.md: add Claude auth-type table, callback URL, not-supported list - references/tool-design.md: add Anthropic Directory hard requirements (annotations, name length, read/write split, prompt-injection rule) - build-mcp-app: add Claude host specifics (prefersBorder, safeAreaInsets, CSP) and submission asset specs; testing via custom connector - build-mcpb: note remote servers are the recommended directory path
2026-07-06 20:53:28 +00:00 · 2026-04-15 01:31:23 +00:00
12 changed files with 185 additions and 1009 deletions
--- a/.claude-plugin/marketplace.json
+++ b/.claude-plugin/marketplace.json
--- a/.github/scripts/discover_bumps.py
+++ b/.github/scripts/discover_bumps.py
@@ -1,229 +0,0 @@
-#!/usr/bin/env python3
-"""Discover plugins in marketplace.json whose upstream repo has moved past
-their pinned SHA, update the file in place, and emit a summary.
-
-Adapted from claude-plugins-community-internal's discover_bumps.py for the
-single-file marketplace.json format used by claude-plugins-official.
-
-Usage: discover_bumps.py [--plugin NAME] [--max N] [--dry-run]
-"""
-
-import argparse
-import json
-import os
-import re
-import subprocess
-import sys
-from datetime import datetime, timezone
-from typing import Any
-
-
-MARKETPLACE_PATH = ".claude-plugin/marketplace.json"
-
-
-def gh_api(path: str) -> Any:
-    """GET from the GitHub API. None on not-found; raises on other errors.
-
-    "Not found" covers both 404 (resource gone) and 422 "No commit found
-    for SHA" (force-pushed away). Both mean the thing we asked for isn't
-    there — treating them the same lets callers handle dead refs uniformly.
-    """
-    r = subprocess.run(
-        ["gh", "api", path], capture_output=True, text=True
-    )
-    if r.returncode != 0:
-        combined = r.stdout + r.stderr
-        if any(s in combined for s in ("404", "Not Found", "No commit found")):
-            return None
-        raise RuntimeError(f"gh api {path}: {r.stderr.strip() or r.stdout.strip()}")
-    return json.loads(r.stdout)
-
-
-def parse_github_repo(url: str) -> tuple[str, str] | None:
-    """Extract (owner, repo) from a URL or owner/repo shorthand."""
-    # Full URL: https://github.com/owner/repo(.git)(/...)
-    m = re.match(r"https?://github\.com/([^/]+)/([^/]+?)(?:\.git)?(?:/|$)", url)
-    if m:
-        return m.group(1), m.group(2)
-    # Shorthand: owner/repo
-    m = re.match(r"^([\w.-]+)/([\w.-]+)$", url)
-    if m:
-        return m.group(1), m.group(2)
-    return None
-
-
-def latest_sha(owner: str, repo: str, *, ref: str | None, path: str | None) -> str | None:
-    """Latest commit SHA for the repo, optionally scoped to a ref and/or path."""
-    if path:
-        # Scoped to a subdirectory — use the commits list endpoint with path filter.
-        q = f"repos/{owner}/{repo}/commits?per_page=1&path={path}"
-        if ref:
-            q += f"&sha={ref}"
-        commits = gh_api(q)
-        if not commits:
-            return None
-        return commits[0]["sha"]
-    # Whole repo — the single-ref endpoint is cheaper.
-    if not ref:
-        meta = gh_api(f"repos/{owner}/{repo}")
-        if not meta:
-            return None
-        ref = meta["default_branch"]
-    c = gh_api(f"repos/{owner}/{repo}/commits/{ref}")
-    return c["sha"] if c else None
-
-
-def pinned_age_days(owner: str, repo: str, sha: str) -> int | None:
-    """Days since the pinned commit was authored. Used for oldest-first rotation."""
-    c = gh_api(f"repos/{owner}/{repo}/commits/{sha}")
-    if not c:
-        return None
-    dt = datetime.fromisoformat(
-        c["commit"]["committer"]["date"].replace("Z", "+00:00")
-    )
-    return (datetime.now(timezone.utc) - dt).days
-
-
-def main() -> int:
-    ap = argparse.ArgumentParser()
-    ap.add_argument("--plugin", help="only check this plugin")
-    ap.add_argument("--max", type=int, default=20, help="cap bumps emitted")
-    ap.add_argument("--dry-run", action="store_true", help="don't write marketplace.json")
-    args = ap.parse_args()
-
-    with open(MARKETPLACE_PATH) as f:
-        marketplace = json.load(f)
-
-    plugins = marketplace.get("plugins", [])
-    bumps: list[dict] = []
-    dead: list[str] = []
-    skipped_non_github = 0
-    checked = 0
-
-    for plugin in plugins:
-        name = plugin.get("name", "?")
-        src = plugin.get("source")
-
-        # Only process object sources with a sha field
-        if not isinstance(src, dict) or "sha" not in src:
-            continue
-
-        # Filter to specific plugin if requested
-        if args.plugin and name != args.plugin:
-            continue
-
-        checked += 1
-        kind = src.get("source")
-        url = src.get("url", "")
-        path = src.get("path")
-        ref = src.get("ref")
-        pinned = src.get("sha")
-
-        slug = parse_github_repo(url)
-        if not slug:
-            skipped_non_github += 1
-            continue
-        owner, repo = slug
-
-        try:
-            latest = latest_sha(owner, repo, ref=ref, path=path)
-        except RuntimeError as e:
-            print(f"::warning::{name}: {e}", file=sys.stderr)
-            continue
-
-        if latest is None:
-            dead.append(f"{name} ({owner}/{repo})")
-            continue
-
-        if latest == pinned:
-            continue  # up to date
-
-        # Age lookup for rotation — oldest-pinned first prevents starvation.
-        try:
-            age = pinned_age_days(owner, repo, pinned) if pinned else None
-        except RuntimeError as e:
-            print(f"::warning::{name}: age lookup failed: {e}", file=sys.stderr)
-            age = None
-
-        bumps.append({
-            "name": name,
-            "kind": kind,
-            "url": url,
-            "path": path or "",
-            "ref": ref or "",
-            "old_sha": pinned or "",
-            "new_sha": latest,
-            "age_days": age if age is not None else 10**6,
-        })
-
-    # Oldest-pinned first so nothing starves under the cap.
-    bumps.sort(key=lambda b: -b["age_days"])
-    emitted = bumps[: args.max]
-
-    # Apply bumps to marketplace data
-    if emitted and not args.dry_run:
-        bump_map = {b["name"]: b["new_sha"] for b in emitted}
-        for plugin in plugins:
-            name = plugin.get("name")
-            src = plugin.get("source")
-            if isinstance(src, dict) and name in bump_map:
-                src["sha"] = bump_map[name]
-
-        with open(MARKETPLACE_PATH, "w") as f:
-            json.dump(marketplace, f, indent=2, ensure_ascii=False)
-            f.write("\n")
-
-    # Write GitHub outputs
-    out = os.environ.get("GITHUB_OUTPUT")
-    if out:
-        bumped_names = ",".join(b["name"] for b in emitted)
-        with open(out, "a") as fh:
-            fh.write(f"count={len(emitted)}\n")
-            fh.write(f"bumped_names={bumped_names}\n")
-
-    # Write GitHub step summary
-    summary = os.environ.get("GITHUB_STEP_SUMMARY")
-    if summary:
-        with open(summary, "a") as fh:
-            fh.write("## SHA Bump Discovery\n\n")
-            fh.write(f"- Checked: {checked} SHA-pinned entries\n")
-            fh.write(f"- Stale: {len(bumps)} (applying {len(emitted)}, cap {args.max})\n")
-            if skipped_non_github:
-                fh.write(f"- Skipped non-GitHub: {skipped_non_github}\n")
-            if dead:
-                fh.write(f"- **Dead upstream** ({len(dead)}): {', '.join(dead)}\n")
-            if emitted:
-                fh.write("\n| Plugin | Old | New | Age |\n|---|---|---|---|\n")
-                for b in emitted:
-                    old = b["old_sha"][:8] if b["old_sha"] else "(unpinned)"
-                    fh.write(f"| {b['name']} | `{old}` | `{b['new_sha'][:8]}` | {b['age_days']}d |\n")
-
-    # Write PR body for the workflow to use
-    pr_body_path = os.environ.get("PR_BODY_PATH", "/tmp/bump-pr-body.md")
-    if emitted:
-        with open(pr_body_path, "w") as fh:
-            fh.write("Upstream repos moved. Bumping pinned SHAs so plugins track latest.\n\n")
-            fh.write("| Plugin | Old | New | Upstream |\n")
-            fh.write("|--------|-----|-----|----------|\n")
-            for b in emitted:
-                old = b["old_sha"][:8] if b["old_sha"] else "(unpinned)"
-                slug_str = re.sub(r"https?://github\.com/", "", b["url"])
-                slug_str = re.sub(r"\.git$", "", slug_str)
-                compare = f"https://github.com/{slug_str}/compare/{b['old_sha'][:12]}...{b['new_sha'][:12]}"
-                fh.write(f"| `{b['name']}` | `{old}` | `{b['new_sha'][:8]}` | [diff]({compare}) |\n")
-            fh.write(f"\n---\n_Auto-generated by `bump-plugin-shas.yml` on {datetime.now(timezone.utc).strftime('%Y-%m-%d')}_\n")
-
-    # Console summary
-    print(f"Checked {checked} SHA-pinned plugins", file=sys.stderr)
-    print(f"Stale: {len(bumps)}, applying: {len(emitted)}", file=sys.stderr)
-    if dead:
-        print(f"Dead upstream: {', '.join(dead)}", file=sys.stderr)
-    for b in emitted:
-        old = b["old_sha"][:8] if b["old_sha"] else "unpinned"
-        print(f"  {b['name']}: {old} -> {b['new_sha'][:8]} ({b['age_days']}d)", file=sys.stderr)
-
-    return 0
-
-
-if __name__ == "__main__":
-    sys.exit(main())
--- a/.github/workflows/bump-plugin-shas.yml
+++ b/.github/workflows/bump-plugin-shas.yml
@@ -1,133 +0,0 @@
-name: Bump plugin SHAs
-
-# Weekly sweep of marketplace.json — for each entry whose upstream repo has
-# moved past its pinned SHA, open a PR against main with updated SHAs. The
-# validate-marketplace workflow then runs on the PR to confirm the file is
-# still well-formed.
-#
-# Adapted from claude-plugins-community-internal's bump-plugin-shas.yml
-# for the single-file marketplace.json format. Key difference: all bumps
-# are batched into one PR (since they all modify the same file).
-
-on:
-  schedule:
-    - cron: '23 7 * * 1'  # Monday 07:23 UTC
-  workflow_dispatch:
-    inputs:
-      plugin:
-        description: Only bump this plugin (for testing)
-        required: false
-      max_bumps:
-        description: Cap on plugins bumped this run
-        required: false
-        default: '20'
-      dry_run:
-        description: Discover only, don't open PR
-        type: boolean
-        default: true
-
-concurrency:
-  group: bump-plugin-shas
-  cancel-in-progress: false
-
-permissions:
-  contents: write
-  pull-requests: write
-
-jobs:
-  bump:
-    runs-on: ubuntu-latest
-    timeout-minutes: 15
-    steps:
-      - uses: actions/checkout@v4
-
-      - name: Check for existing bump PR
-        id: existing
-        env:
-          GH_TOKEN: ${{ github.token }}
-        run: |
-          existing=$(gh pr list --label sha-bump --state open --json number --jq 'length')
-          echo "count=$existing" >> "$GITHUB_OUTPUT"
-          if [ "$existing" -gt 0 ]; then
-            echo "::notice::Open sha-bump PR already exists — skipping"
-          fi
-
-      - name: Ensure sha-bump label exists
-        if: steps.existing.outputs.count == '0'
-        env:
-          GH_TOKEN: ${{ github.token }}
-        run: gh label create sha-bump --color 0e8a16 --description "Automated SHA bump" 2>/dev/null || true
-
-      - name: Overlay marketplace data from main
-        if: steps.existing.outputs.count == '0'
-        run: |
-          git fetch origin main --depth=1 --quiet
-          git checkout origin/main -- .claude-plugin/marketplace.json
-
-      - name: Discover and apply SHA bumps
-        if: steps.existing.outputs.count == '0'
-        id: discover
-        env:
-          GH_TOKEN: ${{ github.token }}
-          PR_BODY_PATH: /tmp/bump-pr-body.md
-          PLUGIN: ${{ inputs.plugin }}
-          MAX_BUMPS: ${{ inputs.max_bumps }}
-          DRY_RUN: ${{ inputs.dry_run }}
-        run: |
-          args=(--max "${MAX_BUMPS:-20}")
-          [[ -n "$PLUGIN" ]] && args+=(--plugin "$PLUGIN")
-          [[ "$DRY_RUN" = "true" ]] && args+=(--dry-run)
-          python3 .github/scripts/discover_bumps.py "${args[@]}"
-
-      - uses: oven-sh/setup-bun@v2
-        if: steps.existing.outputs.count == '0' && steps.discover.outputs.count != '0' && inputs.dry_run != true
-
-      - name: Validate marketplace.json
-        if: steps.existing.outputs.count == '0' && steps.discover.outputs.count != '0' && inputs.dry_run != true
-        run: |
-          bun .github/scripts/validate-marketplace.ts .claude-plugin/marketplace.json
-          bun .github/scripts/check-marketplace-sorted.ts
-
-      - name: Push bump branch
-        if: steps.existing.outputs.count == '0' && steps.discover.outputs.count != '0' && inputs.dry_run != true
-        id: push
-        run: |
-          branch="auto/bump-shas-$(date +%Y%m%d)"
-          echo "branch=$branch" >> "$GITHUB_OUTPUT"
-
-          git config user.name "github-actions[bot]"
-          git config user.email "41898282+github-actions[bot]@users.noreply.github.com"
-          git checkout -b "$branch"
-          git add .claude-plugin/marketplace.json
-          git commit -m "Bump SHA pins for ${{ steps.discover.outputs.count }} plugin(s)
-
-          Plugins: ${{ steps.discover.outputs.bumped_names }}"
-          git push -u origin "$branch" --force-with-lease
-
-      # GITHUB_TOKEN cannot create PRs (org policy: "Allow GitHub Actions to
-      # create and approve pull requests" is disabled). Use the same GitHub App
-      # that -internal's bump workflow uses.
-      #
-      # Prerequisite: app 2812036 must be installed on this repo. The PEM
-      # secret must exist in this repo's settings (shared with -internal).
-      - name: Generate bot token
-        if: steps.push.outcome == 'success'
-        id: app-token
-        uses: actions/create-github-app-token@v1
-        with:
-          app-id: 2812036
-          private-key: ${{ secrets.CLAUDE_DIRECTORY_BOT_PRIVATE_KEY }}
-          owner: ${{ github.repository_owner }}
-          repositories: ${{ github.event.repository.name }}
-
-      - name: Create pull request
-        if: steps.push.outcome == 'success'
-        env:
-          GH_TOKEN: ${{ steps.app-token.outputs.token }}
-        run: |
-          gh pr create \
-            --base main \
-            --head "${{ steps.push.outputs.branch }}" \
-            --title "Bump SHA pins (${{ steps.discover.outputs.count }} plugins)" \
-            --body-file /tmp/bump-pr-body.md \
-            --label sha-bump
--- a/.github/workflows/validate-frontmatter.yml
+++ b/.github/workflows/validate-frontmatter.yml
@@ -9,10 +9,6 @@ on:

 jobs:
  validate:
-    # Fork PRs are auto-closed by close-external-prs.yml, so skip validation
-    # for them entirely. This also prevents untrusted filenames from forks
-    # from ever reaching the shell steps below.
-    if: github.event.pull_request.head.repo.full_name == github.repository
    runs-on: ubuntu-latest
    steps:
      - uses: actions/checkout@v4
@@ -24,19 +20,16 @@ jobs:

      - name: Get changed frontmatter files
        id: changed
-        env:
-          GH_TOKEN: ${{ github.token }}
-          PR_NUMBER: ${{ github.event.pull_request.number }}
        run: |
          # Use diff-filter=AMRC to exclude deleted files (D) - only Added, Modified, Renamed, Copied
-          FILES=$(gh pr diff "$PR_NUMBER" --name-only --diff-filter=AMRC | grep -E '(agents/.*\.md|skills/.*/SKILL\.md|commands/.*\.md)$' || true)
+          FILES=$(gh pr diff ${{ github.event.pull_request.number }} --name-only --diff-filter=AMRC | grep -E '(agents/.*\.md|skills/.*/SKILL\.md|commands/.*\.md)$' || true)
          echo "files<<EOF" >> "$GITHUB_OUTPUT"
          echo "$FILES" >> "$GITHUB_OUTPUT"
          echo "EOF" >> "$GITHUB_OUTPUT"
+        env:
+          GH_TOKEN: ${{ github.token }}

      - name: Validate frontmatter
        if: steps.changed.outputs.files != ''
-        env:
-          FILES: ${{ steps.changed.outputs.files }}
        run: |
-          printf '%s\n' "$FILES" | xargs bun .github/scripts/validate-frontmatter.ts
+          echo "${{ steps.changed.outputs.files }}" | xargs bun .github/scripts/validate-frontmatter.ts
--- a/external_plugins/discord/server.ts
+++ b/external_plugins/discord/server.ts
@@ -222,8 +222,6 @@ type GateResult =
 const recentSentIds = new Set<string>()
 const RECENT_SENT_CAP = 200

-const dmChannelUsers = new Map<string, string>()
-
 function noteSent(id: string): void {
  recentSentIds.add(id)
  if (recentSentIds.size > RECENT_SENT_CAP) {
@@ -406,8 +404,7 @@ async function fetchAllowedChannel(id: string) {
  const ch = await fetchTextChannel(id)
  const access = loadAccess()
  if (ch.type === ChannelType.DM) {
-    const userId = ch.recipientId ?? dmChannelUsers.get(id)
-    if (userId && access.allowFrom.includes(userId)) return ch
+    if (access.allowFrom.includes(ch.recipientId)) return ch
  } else {
    const key = ch.isThread() ? ch.parentId ?? ch.id : ch.id
    if (key in access.groups) return ch
@@ -826,10 +823,6 @@ async function handleInbound(msg: Message): Promise<void> {

  const chat_id = msg.channelId

-  if (msg.channel.type === ChannelType.DM) {
-    dmChannelUsers.set(chat_id, msg.author.id)
-  }
-
  // Permission-reply intercept: if this looks like "yes xxxxx" for a
  // pending permission request, emit the structured event instead of
  // relaying as chat. The sender is already gate()-approved at this point
--- a/external_plugins/supabase/.claude-plugin/plugin.json
+++ b/external_plugins/supabase/.claude-plugin/plugin.json
@@ -0,0 +1,7 @@
+{
+  "name": "supabase",
+  "description": "Supabase MCP integration for database operations, authentication, storage, and real-time subscriptions. Manage your Supabase projects, run SQL queries, and interact with your backend directly.",
+  "author": {
+    "name": "Supabase"
+  }
+}
--- a/external_plugins/supabase/.mcp.json
+++ b/external_plugins/supabase/.mcp.json
@@ -0,0 +1,6 @@
+{
+  "supabase": {
+    "type": "http",
+    "url": "https://mcp.supabase.com/mcp"
+  }
+}
--- a/external_plugins/telegram/.claude-plugin/plugin.json
+++ b/external_plugins/telegram/.claude-plugin/plugin.json
@@ -1,7 +1,7 @@
 {
  "name": "telegram",
  "description": "Telegram channel for Claude Code \u2014 messaging bridge with built-in access control. Manage pairing, allowlists, and policy via /telegram:access.",
-  "version": "0.0.6",
+  "version": "0.0.5",
  "keywords": [
    "telegram",
    "messaging",
--- a/external_plugins/telegram/server.ts
+++ b/external_plugins/telegram/server.ts
@@ -284,19 +284,6 @@ function gate(ctx: Context): GateResult {
  return { action: 'drop' }
 }

-// Like gate() but for bot commands: no pairing side effects, just allow/drop.
-function dmCommandGate(ctx: Context): { access: Access; senderId: string } | null {
-  if (ctx.chat?.type !== 'private') return null
-  if (!ctx.from) return null
-  const senderId = String(ctx.from.id)
-  const access = loadAccess()
-  const pruned = pruneExpired(access)
-  if (pruned) saveAccess(access)
-  if (access.dmPolicy === 'disabled') return null
-  if (access.dmPolicy === 'allowlist' && !access.allowFrom.includes(senderId)) return null
-  return { access, senderId }
-}
-
 function isMentioned(ctx: Context, extraPatterns?: string[]): boolean {
  const entities = ctx.message?.entities ?? ctx.message?.caption_entities ?? []
  const text = ctx.message?.text ?? ctx.message?.caption ?? ''
@@ -682,7 +669,12 @@ setInterval(() => {
 // the gate's behavior for unrecognized groups.

 bot.command('start', async ctx => {
-  if (!dmCommandGate(ctx)) return
+  if (ctx.chat?.type !== 'private') return
+  const access = loadAccess()
+  if (access.dmPolicy === 'disabled') {
+    await ctx.reply(`This bot isn't accepting new connections.`)
+    return
+  }
  await ctx.reply(
    `This bot bridges Telegram to a Claude Code session.\n\n` +
    `To pair:\n` +
@@ -693,7 +685,7 @@ bot.command('start', async ctx => {
 })

 bot.command('help', async ctx => {
-  if (!dmCommandGate(ctx)) return
+  if (ctx.chat?.type !== 'private') return
  await ctx.reply(
    `Messages you send here route to a paired Claude Code session. ` +
    `Text and photos are forwarded; replies and reactions come back.\n\n` +
@@ -703,12 +695,14 @@ bot.command('help', async ctx => {
 })

 bot.command('status', async ctx => {
-  const gated = dmCommandGate(ctx)
-  if (!gated) return
-  const { access, senderId } = gated
+  if (ctx.chat?.type !== 'private') return
+  const from = ctx.from
+  if (!from) return
+  const senderId = String(from.id)
+  const access = loadAccess()

  if (access.allowFrom.includes(senderId)) {
-    const name = ctx.from!.username ? `@${ctx.from!.username}` : senderId
+    const name = from.username ? `@${from.username}` : senderId
    await ctx.reply(`Paired as ${name}.`)
    return
  }
@@ -991,17 +985,14 @@ bot.catch(err => {
  process.stderr.write(`telegram channel: handler error (polling continues): ${err.error}\n`)
 })

-// Retry polling with backoff on any error. Previously only 409 was retried —
-// a single ETIMEDOUT/ECONNRESET/DNS failure rejected bot.start(), the catch
-// returned, and polling stopped permanently while the process stayed alive
-// (MCP stdin keeps it running). Outbound tools kept working but the bot was
-// deaf to inbound messages until a full restart.
+// 409 Conflict = another getUpdates consumer is still active (zombie from a
+// previous session, or a second Claude Code instance). Retry with backoff
+// until the slot frees up instead of crashing on the first rejection.
 void (async () => {
  for (let attempt = 1; ; attempt++) {
    try {
      await bot.start({
        onStart: info => {
-          attempt = 0
          botUsername = info.username
          process.stderr.write(`telegram channel: polling as @${info.username}\n`)
          void bot.api.setMyCommands(
@@ -1017,22 +1008,28 @@ void (async () => {
      return // bot.stop() was called — clean exit from the loop
    } catch (err) {
      if (shuttingDown) return
+      if (err instanceof GrammyError && err.error_code === 409) {
+        if (attempt >= 8) {
+          process.stderr.write(
+            `telegram channel: 409 Conflict persists after ${attempt} attempts — ` +
+            `another poller is holding the bot token (stray 'bun server.ts' process or a second session). Exiting.\n`,
+          )
+          return
+        }
+        const delay = Math.min(1000 * attempt, 15000)
+        const detail = attempt === 1
+          ? ' — another instance is polling (zombie session, or a second Claude Code running?)'
+          : ''
+        process.stderr.write(
+          `telegram channel: 409 Conflict${detail}, retrying in ${delay / 1000}s\n`,
+        )
+        await new Promise(r => setTimeout(r, delay))
+        continue
+      }
      // bot.stop() mid-setup rejects with grammy's "Aborted delay" — expected, not an error.
      if (err instanceof Error && err.message === 'Aborted delay') return
-      const is409 = err instanceof GrammyError && err.error_code === 409
-      if (is409 && attempt >= 8) {
-        process.stderr.write(
-          `telegram channel: 409 Conflict persists after ${attempt} attempts — ` +
-          `another poller is holding the bot token (stray 'bun server.ts' process or a second session). Exiting.\n`,
-        )
-        return
-      }
-      const delay = Math.min(1000 * attempt, 15000)
-      const detail = is409
-        ? `409 Conflict${attempt === 1 ? ' — another instance is polling (zombie session, or a second Claude Code running?)' : ''}`
-        : `polling error: ${err}`
-      process.stderr.write(`telegram channel: ${detail}, retrying in ${delay / 1000}s\n`)
-      await new Promise(r => setTimeout(r, delay))
+      process.stderr.write(`telegram channel: polling failed: ${err}\n`)
+      return
    }
  }
 })()
--- a/plugins/skill-creator/skills/skill-creator/SKILL.md
+++ b/plugins/skill-creator/skills/skill-creator/SKILL.md
@@ -1,6 +1,6 @@
 ---
 name: skill-creator
-description: Create new skills, modify and improve existing skills, and measure skill performance. Use when users want to create a skill from scratch, edit, or optimize an existing skill, run evals to test a skill, benchmark skill performance with variance analysis, or optimize a skill's description for better triggering accuracy.
+description: Create new skills, modify and improve existing skills, and measure skill performance. Use when users want to create a skill from scratch, update or optimize an existing skill, run evals to test a skill, benchmark skill performance with variance analysis, or optimize a skill's description for better triggering accuracy.
 ---

 # Skill Creator
@@ -391,7 +391,7 @@ Use the model ID from your system prompt (the one powering the current session)

 While it runs, periodically tail the output to give the user updates on which iteration it's on and what the scores look like.

-This handles the full optimization loop automatically. It splits the eval set into 60% train and 40% held-out test, evaluates the current description (running each query 3 times to get a reliable trigger rate), then calls Claude to propose improvements based on what failed. It re-evaluates each new description on both train and test, iterating up to 5 times. When it's done, it opens an HTML report in the browser showing the results per iteration and returns JSON with `best_description` — selected by test score rather than train score to avoid overfitting.
+This handles the full optimization loop automatically. It splits the eval set into 60% train and 40% held-out test, evaluates the current description (running each query 3 times to get a reliable trigger rate), then calls Claude with extended thinking to propose improvements based on what failed. It re-evaluates each new description on both train and test, iterating up to 5 times. When it's done, it opens an HTML report in the browser showing the results per iteration and returns JSON with `best_description` — selected by test score rather than train score to avoid overfitting.

 ### How skill triggering works

@@ -435,11 +435,6 @@ In Claude.ai, the core workflow is the same (draft → test → review → impro

 **Packaging**: The `package_skill.py` script works anywhere with Python and a filesystem. On Claude.ai, you can run it and the user can download the resulting `.skill` file.

-**Updating an existing skill**: The user might be asking you to update an existing skill, not create a new one. In this case:
- **Preserve the original name.** Note the skill's directory name and `name` frontmatter field -- use them unchanged. E.g., if the installed skill is `research-helper`, output `research-helper.skill` (not `research-helper-v2`).
- **Copy to a writeable location before editing.** The installed skill path may be read-only. Copy to `/tmp/skill-name/`, edit there, and package from the copy.
- **If packaging manually, stage in `/tmp/` first**, then copy to the output directory -- direct writes may fail due to permissions.
-
 ---

 ## Cowork-Specific Instructions
@@ -452,7 +447,6 @@ If you're in Cowork, the main things to know are:
 - Feedback works differently: since there's no running server, the viewer's "Submit All Reviews" button will download `feedback.json` as a file. You can then read it from there (you may have to request access first).
 - Packaging works — `package_skill.py` just needs Python and a filesystem.
 - Description optimization (`run_loop.py` / `run_eval.py`) should work in Cowork just fine since it uses `claude -p` via subprocess, not a browser, but please save it until you've fully finished making the skill and the user agrees it's in good shape.
- **Updating an existing skill**: The user might be asking you to update an existing skill, not create a new one. Follow the update guidance in the claude.ai section above.

 ---

--- a/plugins/skill-creator/skills/skill-creator/scripts/improve_description.py
+++ b/plugins/skill-creator/skills/skill-creator/scripts/improve_description.py
@@ -2,52 +2,22 @@
 """Improve a skill description based on eval results.

 Takes eval results (from run_eval.py) and generates an improved description
-by calling `claude -p` as a subprocess (same auth pattern as run_eval.py —
-uses the session's Claude Code auth, no separate ANTHROPIC_API_KEY needed).
+using Claude with extended thinking.
 """

 import argparse
 import json
-import os
 import re
-import subprocess
 import sys
 from pathlib import Path

+import anthropic
+
 from scripts.utils import parse_skill_md


-def _call_claude(prompt: str, model: str | None, timeout: int = 300) -> str:
-    """Run `claude -p` with the prompt on stdin and return the text response.
-
-    Prompt goes over stdin (not argv) because it embeds the full SKILL.md
-    body and can easily exceed comfortable argv length.
-    """
-    cmd = ["claude", "-p", "--output-format", "text"]
-    if model:
-        cmd.extend(["--model", model])
-
-    # Remove CLAUDECODE env var to allow nesting claude -p inside a
-    # Claude Code session. The guard is for interactive terminal conflicts;
-    # programmatic subprocess usage is safe. Same pattern as run_eval.py.
-    env = {k: v for k, v in os.environ.items() if k != "CLAUDECODE"}
-
-    result = subprocess.run(
-        cmd,
-        input=prompt,
-        capture_output=True,
-        text=True,
-        env=env,
-        timeout=timeout,
-    )
-    if result.returncode != 0:
-        raise RuntimeError(
-            f"claude -p exited {result.returncode}\nstderr: {result.stderr}"
-        )
-    return result.stdout
-
-
 def improve_description(
+    client: anthropic.Anthropic,
    skill_name: str,
    skill_content: str,
    current_description: str,
@@ -129,7 +99,7 @@ Based on the failures, write a new and improved description that is more likely
 1. Avoid overfitting
 2. The list might get loooong and it's injected into ALL queries and there might be a lot of skills, so we don't want to blow too much space on any given description.

-Concretely, your description should not be more than about 100-200 words, even if that comes at the cost of accuracy. There is a hard limit of 1024 characters — descriptions over that will be truncated, so stay comfortably under it.
+Concretely, your description should not be more than about 100-200 words, even if that comes at the cost of accuracy.

 Here are some tips that we've found to work well in writing these descriptions:
 - The skill should be phrased in the imperative -- "Use this skill for" rather than "this skill does"
@@ -141,41 +111,70 @@ I'd encourage you to be creative and mix up the style in different iterations si

 Please respond with only the new description text in <new_description> tags, nothing else."""

-    text = _call_claude(prompt, model)
+    response = client.messages.create(
+        model=model,
+        max_tokens=16000,
+        thinking={
+            "type": "enabled",
+            "budget_tokens": 10000,
+        },
+        messages=[{"role": "user", "content": prompt}],
+    )

+    # Extract thinking and text from response
+    thinking_text = ""
+    text = ""
+    for block in response.content:
+        if block.type == "thinking":
+            thinking_text = block.thinking
+        elif block.type == "text":
+            text = block.text
+
+    # Parse out the <new_description> tags
    match = re.search(r"<new_description>(.*?)</new_description>", text, re.DOTALL)
    description = match.group(1).strip().strip('"') if match else text.strip().strip('"')

+    # Log the transcript
    transcript: dict = {
        "iteration": iteration,
        "prompt": prompt,
+        "thinking": thinking_text,
        "response": text,
        "parsed_description": description,
        "char_count": len(description),
        "over_limit": len(description) > 1024,
    }

-    # Safety net: the prompt already states the 1024-char hard limit, but if
-    # the model blew past it anyway, make one fresh single-turn call that
-    # quotes the too-long version and asks for a shorter rewrite. (The old
-    # SDK path did this as a true multi-turn; `claude -p` is one-shot, so we
-    # inline the prior output into the new prompt instead.)
+    # If over 1024 chars, ask the model to shorten it
    if len(description) > 1024:
-        shorten_prompt = (
-            f"{prompt}\n\n"
-            f"---\n\n"
-            f"A previous attempt produced this description, which at "
-            f"{len(description)} characters is over the 1024-character hard limit:\n\n"
-            f'"{description}"\n\n'
-            f"Rewrite it to be under 1024 characters while keeping the most "
-            f"important trigger words and intent coverage. Respond with only "
-            f"the new description in <new_description> tags."
+        shorten_prompt = f"Your description is {len(description)} characters, which exceeds the hard 1024 character limit. Please rewrite it to be under 1024 characters while preserving the most important trigger words and intent coverage. Respond with only the new description in <new_description> tags."
+        shorten_response = client.messages.create(
+            model=model,
+            max_tokens=16000,
+            thinking={
+                "type": "enabled",
+                "budget_tokens": 10000,
+            },
+            messages=[
+                {"role": "user", "content": prompt},
+                {"role": "assistant", "content": text},
+                {"role": "user", "content": shorten_prompt},
+            ],
        )
-        shorten_text = _call_claude(shorten_prompt, model)
+
+        shorten_thinking = ""
+        shorten_text = ""
+        for block in shorten_response.content:
+            if block.type == "thinking":
+                shorten_thinking = block.thinking
+            elif block.type == "text":
+                shorten_text = block.text
+
        match = re.search(r"<new_description>(.*?)</new_description>", shorten_text, re.DOTALL)
        shortened = match.group(1).strip().strip('"') if match else shorten_text.strip().strip('"')

        transcript["rewrite_prompt"] = shorten_prompt
+        transcript["rewrite_thinking"] = shorten_thinking
        transcript["rewrite_response"] = shorten_text
        transcript["rewrite_description"] = shortened
        transcript["rewrite_char_count"] = len(shortened)
@@ -217,7 +216,9 @@ def main():
        print(f"Current: {current_description}", file=sys.stderr)
        print(f"Score: {eval_results['summary']['passed']}/{eval_results['summary']['total']}", file=sys.stderr)

+    client = anthropic.Anthropic()
    new_description = improve_description(
+        client=client,
        skill_name=name,
        skill_content=content,
        current_description=current_description,
--- a/plugins/skill-creator/skills/skill-creator/scripts/run_loop.py
+++ b/plugins/skill-creator/skills/skill-creator/scripts/run_loop.py
@@ -15,6 +15,8 @@ import time
 import webbrowser
 from pathlib import Path

+import anthropic
+
 from scripts.generate_report import generate_html
 from scripts.improve_description import improve_description
 from scripts.run_eval import find_project_root, run_eval
@@ -73,6 +75,7 @@ def run_loop(
        train_set = eval_set
        test_set = []

+    client = anthropic.Anthropic()
    history = []
    exit_reason = "unknown"

@@ -197,6 +200,7 @@ def run_loop(
            for h in history
        ]
        new_description = improve_description(
+            client=client,
            skill_name=name,
            skill_content=content,
            current_description=current_description,