code-modernization: harden writes a patch instead of editing legacy; make map/security guidance language-agnostic

- modernize-harden: never edits legacy/ anymore. Writes findings plus a reviewed unified diff to analysis/<system>/security_remediation.patch. A second security-auditor pass reviews each hunk (RESOLVES / PARTIAL / INTRODUCES-RISK) before presenting. The user reviews and applies the patch deliberately, then re-runs to verify. This makes every command consistent with the recommended deny Edit(legacy/**) workspace setting, so the README's exception note is gone. - modernize-map: restructure the parse-target list around three stack- agnostic principles (dispatcher targets are variables; code-storage joins live in config; entry points live in deployment descriptors), with COBOL/Java/web/CLI examples on equal footing rather than COBOL-dominant. Same protections against false dead-code findings, less stack-specific. - security-auditor agent: rephrase coverage items in stack-neutral terms (record layouts/temp datasets, resource ACLs, deployment scripts/job definitions, batch input records) so the checklist reads naturally for COBOL, Java EE, .NET, and web targets alike. - README: drop the harden exception note; describe the patch workflow.
Harden code-modernization plugin from a real CardDemo dry run
2026-05-12 23:42:40 +00:00 · 2026-05-11 16:46:03 -07:00 · 2026-05-11 16:28:27 -07:00 · 2026-05-11 16:17:59 -07:00 · 2026-04-24 19:52:02 +00:00 · 2026-04-23 22:45:48 +01:00
36 changed files with 2694 additions and 237 deletions
--- a/.claude-plugin/marketplace.json
+++ b/.claude-plugin/marketplace.json
--- a/.github/scripts/discover_bumps.py
+++ b/.github/scripts/discover_bumps.py
@@ -0,0 +1,229 @@
+#!/usr/bin/env python3
+"""Discover plugins in marketplace.json whose upstream repo has moved past
+their pinned SHA, update the file in place, and emit a summary.
+
+Adapted from claude-plugins-community-internal's discover_bumps.py for the
+single-file marketplace.json format used by claude-plugins-official.
+
+Usage: discover_bumps.py [--plugin NAME] [--max N] [--dry-run]
+"""
+
+import argparse
+import json
+import os
+import re
+import subprocess
+import sys
+from datetime import datetime, timezone
+from typing import Any
+
+
+MARKETPLACE_PATH = ".claude-plugin/marketplace.json"
+
+
+def gh_api(path: str) -> Any:
+    """GET from the GitHub API. None on not-found; raises on other errors.
+
+    "Not found" covers both 404 (resource gone) and 422 "No commit found
+    for SHA" (force-pushed away). Both mean the thing we asked for isn't
+    there — treating them the same lets callers handle dead refs uniformly.
+    """
+    r = subprocess.run(
+        ["gh", "api", path], capture_output=True, text=True
+    )
+    if r.returncode != 0:
+        combined = r.stdout + r.stderr
+        if any(s in combined for s in ("404", "Not Found", "No commit found")):
+            return None
+        raise RuntimeError(f"gh api {path}: {r.stderr.strip() or r.stdout.strip()}")
+    return json.loads(r.stdout)
+
+
+def parse_github_repo(url: str) -> tuple[str, str] | None:
+    """Extract (owner, repo) from a URL or owner/repo shorthand."""
+    # Full URL: https://github.com/owner/repo(.git)(/...)
+    m = re.match(r"https?://github\.com/([^/]+)/([^/]+?)(?:\.git)?(?:/|$)", url)
+    if m:
+        return m.group(1), m.group(2)
+    # Shorthand: owner/repo
+    m = re.match(r"^([\w.-]+)/([\w.-]+)$", url)
+    if m:
+        return m.group(1), m.group(2)
+    return None
+
+
+def latest_sha(owner: str, repo: str, *, ref: str | None, path: str | None) -> str | None:
+    """Latest commit SHA for the repo, optionally scoped to a ref and/or path."""
+    if path:
+        # Scoped to a subdirectory — use the commits list endpoint with path filter.
+        q = f"repos/{owner}/{repo}/commits?per_page=1&path={path}"
+        if ref:
+            q += f"&sha={ref}"
+        commits = gh_api(q)
+        if not commits:
+            return None
+        return commits[0]["sha"]
+    # Whole repo — the single-ref endpoint is cheaper.
+    if not ref:
+        meta = gh_api(f"repos/{owner}/{repo}")
+        if not meta:
+            return None
+        ref = meta["default_branch"]
+    c = gh_api(f"repos/{owner}/{repo}/commits/{ref}")
+    return c["sha"] if c else None
+
+
+def pinned_age_days(owner: str, repo: str, sha: str) -> int | None:
+    """Days since the pinned commit was authored. Used for oldest-first rotation."""
+    c = gh_api(f"repos/{owner}/{repo}/commits/{sha}")
+    if not c:
+        return None
+    dt = datetime.fromisoformat(
+        c["commit"]["committer"]["date"].replace("Z", "+00:00")
+    )
+    return (datetime.now(timezone.utc) - dt).days
+
+
+def main() -> int:
+    ap = argparse.ArgumentParser()
+    ap.add_argument("--plugin", help="only check this plugin")
+    ap.add_argument("--max", type=int, default=20, help="cap bumps emitted")
+    ap.add_argument("--dry-run", action="store_true", help="don't write marketplace.json")
+    args = ap.parse_args()
+
+    with open(MARKETPLACE_PATH) as f:
+        marketplace = json.load(f)
+
+    plugins = marketplace.get("plugins", [])
+    bumps: list[dict] = []
+    dead: list[str] = []
+    skipped_non_github = 0
+    checked = 0
+
+    for plugin in plugins:
+        name = plugin.get("name", "?")
+        src = plugin.get("source")
+
+        # Only process object sources with a sha field
+        if not isinstance(src, dict) or "sha" not in src:
+            continue
+
+        # Filter to specific plugin if requested
+        if args.plugin and name != args.plugin:
+            continue
+
+        checked += 1
+        kind = src.get("source")
+        url = src.get("url", "")
+        path = src.get("path")
+        ref = src.get("ref")
+        pinned = src.get("sha")
+
+        slug = parse_github_repo(url)
+        if not slug:
+            skipped_non_github += 1
+            continue
+        owner, repo = slug
+
+        try:
+            latest = latest_sha(owner, repo, ref=ref, path=path)
+        except RuntimeError as e:
+            print(f"::warning::{name}: {e}", file=sys.stderr)
+            continue
+
+        if latest is None:
+            dead.append(f"{name} ({owner}/{repo})")
+            continue
+
+        if latest == pinned:
+            continue  # up to date
+
+        # Age lookup for rotation — oldest-pinned first prevents starvation.
+        try:
+            age = pinned_age_days(owner, repo, pinned) if pinned else None
+        except RuntimeError as e:
+            print(f"::warning::{name}: age lookup failed: {e}", file=sys.stderr)
+            age = None
+
+        bumps.append({
+            "name": name,
+            "kind": kind,
+            "url": url,
+            "path": path or "",
+            "ref": ref or "",
+            "old_sha": pinned or "",
+            "new_sha": latest,
+            "age_days": age if age is not None else 10**6,
+        })
+
+    # Oldest-pinned first so nothing starves under the cap.
+    bumps.sort(key=lambda b: -b["age_days"])
+    emitted = bumps[: args.max]
+
+    # Apply bumps to marketplace data
+    if emitted and not args.dry_run:
+        bump_map = {b["name"]: b["new_sha"] for b in emitted}
+        for plugin in plugins:
+            name = plugin.get("name")
+            src = plugin.get("source")
+            if isinstance(src, dict) and name in bump_map:
+                src["sha"] = bump_map[name]
+
+        with open(MARKETPLACE_PATH, "w") as f:
+            json.dump(marketplace, f, indent=2, ensure_ascii=False)
+            f.write("\n")
+
+    # Write GitHub outputs
+    out = os.environ.get("GITHUB_OUTPUT")
+    if out:
+        bumped_names = ",".join(b["name"] for b in emitted)
+        with open(out, "a") as fh:
+            fh.write(f"count={len(emitted)}\n")
+            fh.write(f"bumped_names={bumped_names}\n")
+
+    # Write GitHub step summary
+    summary = os.environ.get("GITHUB_STEP_SUMMARY")
+    if summary:
+        with open(summary, "a") as fh:
+            fh.write("## SHA Bump Discovery\n\n")
+            fh.write(f"- Checked: {checked} SHA-pinned entries\n")
+            fh.write(f"- Stale: {len(bumps)} (applying {len(emitted)}, cap {args.max})\n")
+            if skipped_non_github:
+                fh.write(f"- Skipped non-GitHub: {skipped_non_github}\n")
+            if dead:
+                fh.write(f"- **Dead upstream** ({len(dead)}): {', '.join(dead)}\n")
+            if emitted:
+                fh.write("\n| Plugin | Old | New | Age |\n|---|---|---|---|\n")
+                for b in emitted:
+                    old = b["old_sha"][:8] if b["old_sha"] else "(unpinned)"
+                    fh.write(f"| {b['name']} | `{old}` | `{b['new_sha'][:8]}` | {b['age_days']}d |\n")
+
+    # Write PR body for the workflow to use
+    pr_body_path = os.environ.get("PR_BODY_PATH", "/tmp/bump-pr-body.md")
+    if emitted:
+        with open(pr_body_path, "w") as fh:
+            fh.write("Upstream repos moved. Bumping pinned SHAs so plugins track latest.\n\n")
+            fh.write("| Plugin | Old | New | Upstream |\n")
+            fh.write("|--------|-----|-----|----------|\n")
+            for b in emitted:
+                old = b["old_sha"][:8] if b["old_sha"] else "(unpinned)"
+                slug_str = re.sub(r"https?://github\.com/", "", b["url"])
+                slug_str = re.sub(r"\.git$", "", slug_str)
+                compare = f"https://github.com/{slug_str}/compare/{b['old_sha'][:12]}...{b['new_sha'][:12]}"
+                fh.write(f"| `{b['name']}` | `{old}` | `{b['new_sha'][:8]}` | [diff]({compare}) |\n")
+            fh.write(f"\n---\n_Auto-generated by `bump-plugin-shas.yml` on {datetime.now(timezone.utc).strftime('%Y-%m-%d')}_\n")
+
+    # Console summary
+    print(f"Checked {checked} SHA-pinned plugins", file=sys.stderr)
+    print(f"Stale: {len(bumps)}, applying: {len(emitted)}", file=sys.stderr)
+    if dead:
+        print(f"Dead upstream: {', '.join(dead)}", file=sys.stderr)
+    for b in emitted:
+        old = b["old_sha"][:8] if b["old_sha"] else "unpinned"
+        print(f"  {b['name']}: {old} -> {b['new_sha'][:8]} ({b['age_days']}d)", file=sys.stderr)
+
+    return 0
+
+
+if __name__ == "__main__":
+    sys.exit(main())
--- a/.github/workflows/bump-plugin-shas.yml
+++ b/.github/workflows/bump-plugin-shas.yml
@@ -0,0 +1,133 @@
+name: Bump plugin SHAs
+
+# Weekly sweep of marketplace.json — for each entry whose upstream repo has
+# moved past its pinned SHA, open a PR against main with updated SHAs. The
+# validate-marketplace workflow then runs on the PR to confirm the file is
+# still well-formed.
+#
+# Adapted from claude-plugins-community-internal's bump-plugin-shas.yml
+# for the single-file marketplace.json format. Key difference: all bumps
+# are batched into one PR (since they all modify the same file).
+
+on:
+  schedule:
+    - cron: '23 7 * * 1'  # Monday 07:23 UTC
+  workflow_dispatch:
+    inputs:
+      plugin:
+        description: Only bump this plugin (for testing)
+        required: false
+      max_bumps:
+        description: Cap on plugins bumped this run
+        required: false
+        default: '20'
+      dry_run:
+        description: Discover only, don't open PR
+        type: boolean
+        default: true
+
+concurrency:
+  group: bump-plugin-shas
+  cancel-in-progress: false
+
+permissions:
+  contents: write
+  pull-requests: write
+
+jobs:
+  bump:
+    runs-on: ubuntu-latest
+    timeout-minutes: 15
+    steps:
+      - uses: actions/checkout@v4
+
+      - name: Check for existing bump PR
+        id: existing
+        env:
+          GH_TOKEN: ${{ github.token }}
+        run: |
+          existing=$(gh pr list --label sha-bump --state open --json number --jq 'length')
+          echo "count=$existing" >> "$GITHUB_OUTPUT"
+          if [ "$existing" -gt 0 ]; then
+            echo "::notice::Open sha-bump PR already exists — skipping"
+          fi
+
+      - name: Ensure sha-bump label exists
+        if: steps.existing.outputs.count == '0'
+        env:
+          GH_TOKEN: ${{ github.token }}
+        run: gh label create sha-bump --color 0e8a16 --description "Automated SHA bump" 2>/dev/null || true
+
+      - name: Overlay marketplace data from main
+        if: steps.existing.outputs.count == '0'
+        run: |
+          git fetch origin main --depth=1 --quiet
+          git checkout origin/main -- .claude-plugin/marketplace.json
+
+      - name: Discover and apply SHA bumps
+        if: steps.existing.outputs.count == '0'
+        id: discover
+        env:
+          GH_TOKEN: ${{ github.token }}
+          PR_BODY_PATH: /tmp/bump-pr-body.md
+          PLUGIN: ${{ inputs.plugin }}
+          MAX_BUMPS: ${{ inputs.max_bumps }}
+          DRY_RUN: ${{ inputs.dry_run }}
+        run: |
+          args=(--max "${MAX_BUMPS:-20}")
+          [[ -n "$PLUGIN" ]] && args+=(--plugin "$PLUGIN")
+          [[ "$DRY_RUN" = "true" ]] && args+=(--dry-run)
+          python3 .github/scripts/discover_bumps.py "${args[@]}"
+
+      - uses: oven-sh/setup-bun@v2
+        if: steps.existing.outputs.count == '0' && steps.discover.outputs.count != '0' && inputs.dry_run != true
+
+      - name: Validate marketplace.json
+        if: steps.existing.outputs.count == '0' && steps.discover.outputs.count != '0' && inputs.dry_run != true
+        run: |
+          bun .github/scripts/validate-marketplace.ts .claude-plugin/marketplace.json
+          bun .github/scripts/check-marketplace-sorted.ts
+
+      - name: Push bump branch
+        if: steps.existing.outputs.count == '0' && steps.discover.outputs.count != '0' && inputs.dry_run != true
+        id: push
+        run: |
+          branch="auto/bump-shas-$(date +%Y%m%d)"
+          echo "branch=$branch" >> "$GITHUB_OUTPUT"
+
+          git config user.name "github-actions[bot]"
+          git config user.email "41898282+github-actions[bot]@users.noreply.github.com"
+          git checkout -b "$branch"
+          git add .claude-plugin/marketplace.json
+          git commit -m "Bump SHA pins for ${{ steps.discover.outputs.count }} plugin(s)
+
+          Plugins: ${{ steps.discover.outputs.bumped_names }}"
+          git push -u origin "$branch" --force-with-lease
+
+      # GITHUB_TOKEN cannot create PRs (org policy: "Allow GitHub Actions to
+      # create and approve pull requests" is disabled). Use the same GitHub App
+      # that -internal's bump workflow uses.
+      #
+      # Prerequisite: app 2812036 must be installed on this repo. The PEM
+      # secret must exist in this repo's settings (shared with -internal).
+      - name: Generate bot token
+        if: steps.push.outcome == 'success'
+        id: app-token
+        uses: actions/create-github-app-token@v1
+        with:
+          app-id: 2812036
+          private-key: ${{ secrets.CLAUDE_DIRECTORY_BOT_PRIVATE_KEY }}
+          owner: ${{ github.repository_owner }}
+          repositories: ${{ github.event.repository.name }}
+
+      - name: Create pull request
+        if: steps.push.outcome == 'success'
+        env:
+          GH_TOKEN: ${{ steps.app-token.outputs.token }}
+        run: |
+          gh pr create \
+            --base main \
+            --head "${{ steps.push.outputs.branch }}" \
+            --title "Bump SHA pins (${{ steps.discover.outputs.count }} plugins)" \
+            --body-file /tmp/bump-pr-body.md \
+            --label sha-bump
--- a/external_plugins/discord/server.ts
+++ b/external_plugins/discord/server.ts
@@ -222,6 +222,8 @@ type GateResult =
 const recentSentIds = new Set<string>()
 const RECENT_SENT_CAP = 200

+const dmChannelUsers = new Map<string, string>()
+
 function noteSent(id: string): void {
  recentSentIds.add(id)
  if (recentSentIds.size > RECENT_SENT_CAP) {
@@ -404,7 +406,8 @@ async function fetchAllowedChannel(id: string) {
  const ch = await fetchTextChannel(id)
  const access = loadAccess()
  if (ch.type === ChannelType.DM) {
-    if (access.allowFrom.includes(ch.recipientId)) return ch
+    const userId = ch.recipientId ?? dmChannelUsers.get(id)
+    if (userId && access.allowFrom.includes(userId)) return ch
  } else {
    const key = ch.isThread() ? ch.parentId ?? ch.id : ch.id
    if (key in access.groups) return ch
@@ -823,6 +826,10 @@ async function handleInbound(msg: Message): Promise<void> {

  const chat_id = msg.channelId

+  if (msg.channel.type === ChannelType.DM) {
+    dmChannelUsers.set(chat_id, msg.author.id)
+  }
+
  // Permission-reply intercept: if this looks like "yes xxxxx" for a
  // pending permission request, emit the structured event instead of
  // relaying as chat. The sender is already gate()-approved at this point
--- a/external_plugins/slack/.claude-plugin/plugin.json
+++ b/external_plugins/slack/.claude-plugin/plugin.json
@@ -1,7 +0,0 @@
-{
-  "name": "slack",
-  "description": "Slack workspace integration. Search messages, access channels, read threads, and stay connected with your team's communications while coding. Find relevant discussions and context quickly.",
-  "author": {
-    "name": "Slack"
-  }
-}
--- a/external_plugins/slack/.mcp.json
+++ b/external_plugins/slack/.mcp.json
@@ -1,10 +0,0 @@
-{
-  "slack": {
-    "type": "http",
-    "url": "https://mcp.slack.com/mcp",
-    "oauth": {
-      "clientId": "1601185624273.8899143856786",
-      "callbackPort": 3118
-    }
-  }
-}
--- a/external_plugins/supabase/.claude-plugin/plugin.json
+++ b/external_plugins/supabase/.claude-plugin/plugin.json
@@ -1,7 +0,0 @@
-{
-  "name": "supabase",
-  "description": "Supabase MCP integration for database operations, authentication, storage, and real-time subscriptions. Manage your Supabase projects, run SQL queries, and interact with your backend directly.",
-  "author": {
-    "name": "Supabase"
-  }
-}
--- a/external_plugins/supabase/.mcp.json
+++ b/external_plugins/supabase/.mcp.json
@@ -1,6 +0,0 @@
-{
-  "supabase": {
-    "type": "http",
-    "url": "https://mcp.supabase.com/mcp"
-  }
-}
--- a/external_plugins/telegram/.claude-plugin/plugin.json
+++ b/external_plugins/telegram/.claude-plugin/plugin.json
@@ -1,7 +1,7 @@
 {
  "name": "telegram",
  "description": "Telegram channel for Claude Code \u2014 messaging bridge with built-in access control. Manage pairing, allowlists, and policy via /telegram:access.",
-  "version": "0.0.4",
+  "version": "0.0.6",
  "keywords": [
    "telegram",
    "messaging",
--- a/external_plugins/telegram/server.ts
+++ b/external_plugins/telegram/server.ts
@@ -51,6 +51,22 @@ if (!TOKEN) {
  process.exit(1)
 }
 const INBOX_DIR = join(STATE_DIR, 'inbox')
+const PID_FILE = join(STATE_DIR, 'bot.pid')
+
+// Telegram allows exactly one getUpdates consumer per token. If a previous
+// session crashed (SIGKILL, terminal closed) its server.ts grandchild can
+// survive as an orphan and hold the slot forever, so every new session sees
+// 409 Conflict. Kill any stale holder before we start polling.
+mkdirSync(STATE_DIR, { recursive: true, mode: 0o700 })
+try {
+  const stale = parseInt(readFileSync(PID_FILE, 'utf8'), 10)
+  if (stale > 1 && stale !== process.pid) {
+    process.kill(stale, 0)
+    process.stderr.write(`telegram channel: replacing stale poller pid=${stale}\n`)
+    process.kill(stale, 'SIGTERM')
+  }
+} catch {}
+writeFileSync(PID_FILE, String(process.pid))

 // Last-resort safety net — without these the process dies silently on any
 // unhandled promise rejection. With them it logs and keeps serving tools.
@@ -268,6 +284,19 @@ function gate(ctx: Context): GateResult {
  return { action: 'drop' }
 }

+// Like gate() but for bot commands: no pairing side effects, just allow/drop.
+function dmCommandGate(ctx: Context): { access: Access; senderId: string } | null {
+  if (ctx.chat?.type !== 'private') return null
+  if (!ctx.from) return null
+  const senderId = String(ctx.from.id)
+  const access = loadAccess()
+  const pruned = pruneExpired(access)
+  if (pruned) saveAccess(access)
+  if (access.dmPolicy === 'disabled') return null
+  if (access.dmPolicy === 'allowlist' && !access.allowFrom.includes(senderId)) return null
+  return { access, senderId }
+}
+
 function isMentioned(ctx: Context, extraPatterns?: string[]): boolean {
  const entities = ctx.message?.entities ?? ctx.message?.caption_entities ?? []
  const text = ctx.message?.text ?? ctx.message?.caption ?? ''
@@ -621,6 +650,9 @@ function shutdown(): void {
  if (shuttingDown) return
  shuttingDown = true
  process.stderr.write('telegram channel: shutting down\n')
+  try {
+    if (parseInt(readFileSync(PID_FILE, 'utf8'), 10) === process.pid) rmSync(PID_FILE)
+  } catch {}
  // bot.stop() signals the poll loop to end; the current getUpdates request
  // may take up to its long-poll timeout to return. Force-exit after 2s.
  setTimeout(() => process.exit(0), 2000)
@@ -630,6 +662,19 @@ process.stdin.on('end', shutdown)
 process.stdin.on('close', shutdown)
 process.on('SIGTERM', shutdown)
 process.on('SIGINT', shutdown)
+process.on('SIGHUP', shutdown)
+
+// Orphan watchdog: stdin events above don't reliably fire when the parent
+// chain (`bun run` wrapper → shell → us) is severed by a crash. Poll for
+// reparenting (POSIX) or a dead stdin pipe and self-terminate.
+const bootPpid = process.ppid
+setInterval(() => {
+  const orphaned =
+    (process.platform !== 'win32' && process.ppid !== bootPpid) ||
+    process.stdin.destroyed ||
+    process.stdin.readableEnded
+  if (orphaned) shutdown()
+}, 5000).unref()

 // Commands are DM-only. Responding in groups would: (1) leak pairing codes via
 // /status to other group members, (2) confirm bot presence in non-allowlisted
@@ -637,12 +682,7 @@ process.on('SIGINT', shutdown)
 // the gate's behavior for unrecognized groups.

 bot.command('start', async ctx => {
-  if (ctx.chat?.type !== 'private') return
-  const access = loadAccess()
-  if (access.dmPolicy === 'disabled') {
-    await ctx.reply(`This bot isn't accepting new connections.`)
-    return
-  }
+  if (!dmCommandGate(ctx)) return
  await ctx.reply(
    `This bot bridges Telegram to a Claude Code session.\n\n` +
    `To pair:\n` +
@@ -653,7 +693,7 @@ bot.command('start', async ctx => {
 })

 bot.command('help', async ctx => {
-  if (ctx.chat?.type !== 'private') return
+  if (!dmCommandGate(ctx)) return
  await ctx.reply(
    `Messages you send here route to a paired Claude Code session. ` +
    `Text and photos are forwarded; replies and reactions come back.\n\n` +
@@ -663,14 +703,12 @@ bot.command('help', async ctx => {
 })

 bot.command('status', async ctx => {
-  if (ctx.chat?.type !== 'private') return
-  const from = ctx.from
-  if (!from) return
-  const senderId = String(from.id)
-  const access = loadAccess()
+  const gated = dmCommandGate(ctx)
+  if (!gated) return
+  const { access, senderId } = gated

  if (access.allowFrom.includes(senderId)) {
-    const name = from.username ? `@${from.username}` : senderId
+    const name = ctx.from!.username ? `@${ctx.from!.username}` : senderId
    await ctx.reply(`Paired as ${name}.`)
    return
  }
@@ -953,14 +991,17 @@ bot.catch(err => {
  process.stderr.write(`telegram channel: handler error (polling continues): ${err.error}\n`)
 })

-// 409 Conflict = another getUpdates consumer is still active (zombie from a
-// previous session, or a second Claude Code instance). Retry with backoff
-// until the slot frees up instead of crashing on the first rejection.
+// Retry polling with backoff on any error. Previously only 409 was retried —
+// a single ETIMEDOUT/ECONNRESET/DNS failure rejected bot.start(), the catch
+// returned, and polling stopped permanently while the process stayed alive
+// (MCP stdin keeps it running). Outbound tools kept working but the bot was
+// deaf to inbound messages until a full restart.
 void (async () => {
  for (let attempt = 1; ; attempt++) {
    try {
      await bot.start({
        onStart: info => {
+          attempt = 0
          botUsername = info.username
          process.stderr.write(`telegram channel: polling as @${info.username}\n`)
          void bot.api.setMyCommands(
@@ -975,21 +1016,23 @@ void (async () => {
      })
      return // bot.stop() was called — clean exit from the loop
    } catch (err) {
-      if (err instanceof GrammyError && err.error_code === 409) {
-        const delay = Math.min(1000 * attempt, 15000)
-        const detail = attempt === 1
-          ? ' — another instance is polling (zombie session, or a second Claude Code running?)'
-          : ''
-        process.stderr.write(
-          `telegram channel: 409 Conflict${detail}, retrying in ${delay / 1000}s\n`,
-        )
-        await new Promise(r => setTimeout(r, delay))
-        continue
-      }
+      if (shuttingDown) return
      // bot.stop() mid-setup rejects with grammy's "Aborted delay" — expected, not an error.
      if (err instanceof Error && err.message === 'Aborted delay') return
-      process.stderr.write(`telegram channel: polling failed: ${err}\n`)
-      return
+      const is409 = err instanceof GrammyError && err.error_code === 409
+      if (is409 && attempt >= 8) {
+        process.stderr.write(
+          `telegram channel: 409 Conflict persists after ${attempt} attempts — ` +
+          `another poller is holding the bot token (stray 'bun server.ts' process or a second session). Exiting.\n`,
+        )
+        return
+      }
+      const delay = Math.min(1000 * attempt, 15000)
+      const detail = is409
+        ? `409 Conflict${attempt === 1 ? ' — another instance is polling (zombie session, or a second Claude Code running?)' : ''}`
+        : `polling error: ${err}`
+      process.stderr.write(`telegram channel: ${detail}, retrying in ${delay / 1000}s\n`)
+      await new Promise(r => setTimeout(r, delay))
    }
  }
 })()
--- a/plugins/code-modernization/.claude-plugin/plugin.json
+++ b/plugins/code-modernization/.claude-plugin/plugin.json
@@ -0,0 +1,8 @@
+{
+  "name": "code-modernization",
+  "description": "Modernize legacy codebases (COBOL, legacy Java/C++, monolith web apps) with a structured assess → map → extract-rules → brief → reimagine/transform → harden workflow and specialist review agents",
+  "author": {
+    "name": "Anthropic",
+    "email": "support@anthropic.com"
+  }
+}
--- a/plugins/code-modernization/LICENSE
+++ b/plugins/code-modernization/LICENSE
@@ -0,0 +1,202 @@
+
+                                 Apache License
+                           Version 2.0, January 2004
+                        http://www.apache.org/licenses/
+
+   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+
+   1. Definitions.
+
+      "License" shall mean the terms and conditions for use, reproduction,
+      and distribution as defined by Sections 1 through 9 of this document.
+
+      "Licensor" shall mean the copyright owner or entity authorized by
+      the copyright owner that is granting the License.
+
+      "Legal Entity" shall mean the union of the acting entity and all
+      other entities that control, are controlled by, or are under common
+      control with that entity. For the purposes of this definition,
+      "control" means (i) the power, direct or indirect, to cause the
+      direction or management of such entity, whether by contract or
+      otherwise, or (ii) ownership of fifty percent (50%) or more of the
+      outstanding shares, or (iii) beneficial ownership of such entity.
+
+      "You" (or "Your") shall mean an individual or Legal Entity
+      exercising permissions granted by this License.
+
+      "Source" form shall mean the preferred form for making modifications,
+      including but not limited to software source code, documentation
+      source, and configuration files.
+
+      "Object" form shall mean any form resulting from mechanical
+      transformation or translation of a Source form, including but
+      not limited to compiled object code, generated documentation,
+      and conversions to other media types.
+
+      "Work" shall mean the work of authorship, whether in Source or
+      Object form, made available under the License, as indicated by a
+      copyright notice that is included in or attached to the work
+      (an example is provided in the Appendix below).
+
+      "Derivative Works" shall mean any work, whether in Source or Object
+      form, that is based on (or derived from) the Work and for which the
+      editorial revisions, annotations, elaborations, or other modifications
+      represent, as a whole, an original work of authorship. For the purposes
+      of this License, Derivative Works shall not include works that remain
+      separable from, or merely link (or bind by name) to the interfaces of,
+      the Work and Derivative Works thereof.
+
+      "Contribution" shall mean any work of authorship, including
+      the original version of the Work and any modifications or additions
+      to that Work or Derivative Works thereof, that is intentionally
+      submitted to Licensor for inclusion in the Work by the copyright owner
+      or by an individual or Legal Entity authorized to submit on behalf of
+      the copyright owner. For the purposes of this definition, "submitted"
+      means any form of electronic, verbal, or written communication sent
+      to the Licensor or its representatives, including but not limited to
+      communication on electronic mailing lists, source code control systems,
+      and issue tracking systems that are managed by, or on behalf of, the
+      Licensor for the purpose of discussing and improving the Work, but
+      excluding communication that is conspicuously marked or otherwise
+      designated in writing by the copyright owner as "Not a Contribution."
+
+      "Contributor" shall mean Licensor and any individual or Legal Entity
+      on behalf of whom a Contribution has been received by Licensor and
+      subsequently incorporated within the Work.
+
+   2. Grant of Copyright License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      copyright license to reproduce, prepare Derivative Works of,
+      publicly display, publicly perform, sublicense, and distribute the
+      Work and such Derivative Works in Source or Object form.
+
+   3. Grant of Patent License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      (except as stated in this section) patent license to make, have made,
+      use, offer to sell, sell, import, and otherwise transfer the Work,
+      where such license applies only to those patent claims licensable
+      by such Contributor that are necessarily infringed by their
+      Contribution(s) alone or by combination of their Contribution(s)
+      with the Work to which such Contribution(s) was submitted. If You
+      institute patent litigation against any entity (including a
+      cross-claim or counterclaim in a lawsuit) alleging that the Work
+      or a Contribution incorporated within the Work constitutes direct
+      or contributory patent infringement, then any patent licenses
+      granted to You under this License for that Work shall terminate
+      as of the date such litigation is filed.
+
+   4. Redistribution. You may reproduce and distribute copies of the
+      Work or Derivative Works thereof in any medium, with or without
+      modifications, and in Source or Object form, provided that You
+      meet the following conditions:
+
+      (a) You must give any other recipients of the Work or
+          Derivative Works a copy of this License; and
+
+      (b) You must cause any modified files to carry prominent notices
+          stating that You changed the files; and
+
+      (c) You must retain, in the Source form of any Derivative Works
+          that You distribute, all copyright, patent, trademark, and
+          attribution notices from the Source form of the Work,
+          excluding those notices that do not pertain to any part of
+          the Derivative Works; and
+
+      (d) If the Work includes a "NOTICE" text file as part of its
+          distribution, then any Derivative Works that You distribute must
+          include a readable copy of the attribution notices contained
+          within such NOTICE file, excluding those notices that do not
+          pertain to any part of the Derivative Works, in at least one
+          of the following places: within a NOTICE text file distributed
+          as part of the Derivative Works; within the Source form or
+          documentation, if provided along with the Derivative Works; or,
+          within a display generated by the Derivative Works, if and
+          wherever such third-party notices normally appear. The contents
+          of the NOTICE file are for informational purposes only and
+          do not modify the License. You may add Your own attribution
+          notices within Derivative Works that You distribute, alongside
+          or as an addendum to the NOTICE text from the Work, provided
+          that such additional attribution notices cannot be construed
+          as modifying the License.
+
+      You may add Your own copyright statement to Your modifications and
+      may provide additional or different license terms and conditions
+      for use, reproduction, or distribution of Your modifications, or
+      for any such Derivative Works as a whole, provided Your use,
+      reproduction, and distribution of the Work otherwise complies with
+      the conditions stated in this License.
+
+   5. Submission of Contributions. Unless You explicitly state otherwise,
+      any Contribution intentionally submitted for inclusion in the Work
+      by You to the Licensor shall be under the terms and conditions of
+      this License, without any additional terms or conditions.
+      Notwithstanding the above, nothing herein shall supersede or modify
+      the terms of any separate license agreement you may have executed
+      with Licensor regarding such Contributions.
+
+   6. Trademarks. This License does not grant permission to use the trade
+      names, trademarks, service marks, or product names of the Licensor,
+      except as required for reasonable and customary use in describing the
+      origin of the Work and reproducing the content of the NOTICE file.
+
+   7. Disclaimer of Warranty. Unless required by applicable law or
+      agreed to in writing, Licensor provides the Work (and each
+      Contributor provides its Contributions) on an "AS IS" BASIS,
+      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+      implied, including, without limitation, any warranties or conditions
+      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+      PARTICULAR PURPOSE. You are solely responsible for determining the
+      appropriateness of using or redistributing the Work and assume any
+      risks associated with Your exercise of permissions under this License.
+
+   8. Limitation of Liability. In no event and under no legal theory,
+      whether in tort (including negligence), contract, or otherwise,
+      unless required by applicable law (such as deliberate and grossly
+      negligent acts) or agreed to in writing, shall any Contributor be
+      liable to You for damages, including any direct, indirect, special,
+      incidental, or consequential damages of any character arising as a
+      result of this License or out of the use or inability to use the
+      Work (including but not limited to damages for loss of goodwill,
+      work stoppage, computer failure or malfunction, or any and all
+      other commercial damages or losses), even if such Contributor
+      has been advised of the possibility of such damages.
+
+   9. Accepting Warranty or Additional Liability. While redistributing
+      the Work or Derivative Works thereof, You may choose to offer,
+      and charge a fee for, acceptance of support, warranty, indemnity,
+      or other liability obligations and/or rights consistent with this
+      License. However, in accepting such obligations, You may act only
+      on Your own behalf and on Your sole responsibility, not on behalf
+      of any other Contributor, and only if You agree to indemnify,
+      defend, and hold each Contributor harmless for any liability
+      incurred by, or claims asserted against, such Contributor by reason
+      of your accepting any such warranty or additional liability.
+
+   END OF TERMS AND CONDITIONS
+
+   APPENDIX: How to apply the Apache License to your work.
+
+      To apply the Apache License to your work, attach the following
+      boilerplate notice, with the fields enclosed by brackets "[]"
+      replaced with your own identifying information. (Don't include
+      the brackets!)  The text should be enclosed in the appropriate
+      comment syntax for the file format. We also recommend that a
+      file or class name and description of purpose be included on the
+      same "printed page" as the copyright notice for easier
+      identification within third-party archives.
+
+   Copyright [yyyy] [name of copyright owner]
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
--- a/plugins/code-modernization/README.md
+++ b/plugins/code-modernization/README.md
@@ -0,0 +1,119 @@
+# Code Modernization Plugin
+
+A structured workflow and set of specialist agents for modernizing legacy codebases — COBOL, legacy Java/C++, monolith web apps — into current stacks while preserving behavior.
+
+## Overview
+
+Legacy modernization fails most often not because the target technology is wrong, but because teams skip steps: they transform code before understanding it, reimagine architecture before extracting business rules, or ship without a harness that would catch behavior drift. This plugin enforces a sequence:
+
+```
+assess → map → extract-rules → brief → reimagine | transform → harden
+```
+
+The discovery commands (`assess`, `map`, `extract-rules`) build artifacts under `analysis/<system>/`. The `brief` command synthesizes them into an approval gate. The build commands (`reimagine`, `transform`) write new code under `modernized/`. The `harden` command audits the legacy system and produces a reviewable remediation patch. Each step has a dedicated slash command, and specialist agents (legacy analyst, business rules extractor, architecture critic, security auditor, test engineer) are invoked from within those commands — or directly — to keep the work honest.
+
+## Expected layout
+
+Commands take a `<system-dir>` argument and assume the system being modernized lives at `legacy/<system-dir>/`. Discovery artifacts go to `analysis/<system-dir>/`, transformed code to `modernized/<system-dir>/…`. If your codebase lives elsewhere, symlink it in:
+
+```bash
+mkdir -p legacy && ln -s /path/to/your/legacy/codebase legacy/billing
+```
+
+## Optional tooling
+
+`/modernize-assess` works best with [`scc`](https://github.com/boyter/scc) (LOC + complexity + COCOMO) or [`cloc`](https://github.com/AlDanial/cloc), and falls back to `find`/`wc` if neither is installed. Portfolio mode also benefits from [`lizard`](https://github.com/terryyin/lizard) (cyclomatic complexity). The commands degrade gracefully without them, but the metrics will be coarser.
+
+## Commands
+
+The commands are designed to be run in order, but each produces a standalone artifact so you can stop, review, and resume.
+
+### `/modernize-assess <system-dir>`  — or — `/modernize-assess --portfolio <parent-dir>`
+Inventory the legacy codebase: languages, line counts, complexity, build system, integrations, technical debt, security posture, documentation gaps, and a COCOMO-derived effort estimate. Produces `analysis/<system>/ASSESSMENT.md` and `analysis/<system>/ARCHITECTURE.mmd`. Spawns `legacy-analyst` (×2) and `security-auditor` in parallel for deep reads. With `--portfolio`, sweeps every subdirectory of a parent directory and writes a sequencing heat-map to `analysis/portfolio.html`.
+
+### `/modernize-map <system-dir>`
+Build a dependency and topology map of the **legacy** system: program/module call graph, data lineage (programs ↔ data stores), entry points, dead-end candidates, and one traced critical-path business flow. Writes a re-runnable extraction script and produces `analysis/<system>/topology.json` (machine-readable), `analysis/<system>/TOPOLOGY.html` (rendered Mermaid + architect observations), and standalone `call-graph.mmd`, `data-lineage.mmd`, and `critical-path.mmd`.
+
+### `/modernize-extract-rules <system-dir> [module-pattern]`
+Mine the business rules embedded in the legacy code — calculations, validations, eligibility, state transitions, policies — into Given/When/Then "Rule Cards" with `file:line` citations and confidence ratings. Spawns three `business-rules-extractor` agents in parallel (calculations, validations, lifecycle). Produces `analysis/<system>/BUSINESS_RULES.md` and `analysis/<system>/DATA_OBJECTS.md`.
+
+### `/modernize-brief <system-dir> [target-stack]`
+Synthesize the discovery artifacts into a phased **Modernization Brief** — the single document a steering committee approves and engineering executes: target architecture, strangler-fig phase plan with entry/exit criteria, behavior contract, validation strategy, open questions, and an approval block. Reads `ASSESSMENT.md`, `TOPOLOGY.html`, and `BUSINESS_RULES.md` and **stops if any are missing** — run the discovery commands first. Produces `analysis/<system>/MODERNIZATION_BRIEF.md` and enters plan mode as a human-in-the-loop gate.
+
+### `/modernize-reimagine <system-dir> <target-vision>`
+Greenfield rebuild from extracted intent rather than a structural port. Mines a spec (`analysis/<system>/AI_NATIVE_SPEC.md`), designs a target architecture and has it adversarially reviewed (`analysis/<system>/REIMAGINED_ARCHITECTURE.md`), then **scaffolds services with executable acceptance tests** under `modernized/<system>-reimagined/` and writes a `CLAUDE.md` knowledge handoff for the new system. Two human-in-the-loop checkpoints. Spawns `business-rules-extractor`, `legacy-analyst` (×2), `architecture-critic`, and general-purpose scaffolding agents.
+
+### `/modernize-transform <system-dir> <module> <target-stack>`
+Surgical, single-module strangler-fig rewrite. Plans first (HITL gate), then writes characterization tests via `test-engineer`, then an idiomatic target implementation under `modernized/<system>/<module>/`, proves equivalence by running the tests, and produces `TRANSFORMATION_NOTES.md` mapping legacy → modern with deliberate deviations called out. Reviewed by `architecture-critic`.
+
+### `/modernize-harden <system-dir>`
+Security hardening pass on the **legacy** system: OWASP/CWE scan, dependency CVEs, secrets, injection. Spawns `security-auditor`. Produces `analysis/<system>/SECURITY_FINDINGS.md` ranked Critical / High / Medium / Low and a reviewed `analysis/<system>/security_remediation.patch` with minimal fixes for the Critical/High findings. The patch is reviewed by a second `security-auditor` pass before you see it. **Never edits `legacy/`** — you review and apply the patch yourself when ready, then re-run to verify. Useful as a pre-modernization step when the legacy system will keep running in production during the migration.
+
+## Agents
+
+- **`legacy-analyst`** — Reads legacy code (COBOL, legacy Java/C++, procedural PHP, classic ASP) and produces structured summaries. Good at spotting implicit dependencies, copybook inheritance, and "JOBOL" patterns (procedural code wearing a modern syntax). Used by `assess` and `reimagine`.
+- **`business-rules-extractor`** — Extracts business rules from procedural code with source citations. Each rule includes: what, where it's implemented, which conditions fire it, and any corner cases hidden in data. Used by `extract-rules` and `reimagine`.
+- **`architecture-critic`** — Adversarial reviewer for target architectures and transformed code. Default stance is skeptical: asks "do we actually need this?" Flags microservices-for-the-resume, ceremonial error handling, abstractions with one implementation. Used by `reimagine` and `transform`.
+- **`security-auditor`** — Reviews code for auth, input validation, secret handling, and dependency CVEs. Tuned for the kinds of issues that appear when translating security primitives across stacks (e.g., session handling from servlet to stateless JWT). Used by `assess` and `harden`.
+- **`test-engineer`** — Writes characterization, contract, and equivalence tests that pin legacy behavior so transformation can be proven correct. Flags tests that exercise code paths without asserting outcomes. Used by `transform`.
+
+## Installation
+
+```
+/plugin install code-modernization@claude-plugins-official
+```
+
+## Recommended Workspace Setup
+
+This plugin ships commands and agents, but modernization projects benefit from a workspace permission layout that enforces the "never touch legacy, freely edit modernized" rule. A starting-point `.claude/settings.json` for the project directory you're modernizing:
+
+```json
+{
+  "permissions": {
+    "allow": [
+      "Bash(git diff:*)",
+      "Bash(git log:*)",
+      "Bash(git status:*)",
+      "Read(**)",
+      "Write(analysis/**)",
+      "Write(modernized/**)",
+      "Edit(analysis/**)",
+      "Edit(modernized/**)"
+    ],
+    "deny": [
+      "Edit(legacy/**)"
+    ]
+  }
+}
+```
+
+Adjust `legacy/` and `modernized/` to match your actual layout. The key invariants: `Edit` under `legacy/` is denied, and writes are scoped to `analysis/` (for documents) and `modernized/` (for the new code). Every command in this plugin respects this — `/modernize-harden` writes a patch to `analysis/` rather than editing `legacy/` in place.
+
+## Typical Workflow
+
+```bash
+# 1. Inventory the legacy system (or sweep a portfolio of them)
+/modernize-assess billing
+
+# 2. Map call graph, data lineage, and the critical path
+/modernize-map billing
+
+# 3. Extract business rules into testable Rule Cards
+/modernize-extract-rules billing
+
+# 4. Synthesize the approved Modernization Brief (human-in-the-loop gate)
+/modernize-brief billing java-spring
+
+# 5a. Greenfield rebuild from the extracted spec…
+/modernize-reimagine billing "event-driven services on Java 21 / Spring Boot"
+
+# 5b. …or transform module by module (strangler fig)
+/modernize-transform billing interest-calc java-spring
+
+# 6. Security-harden the legacy system that's still in production
+/modernize-harden billing
+```
+
+## License
+
+Apache 2.0. See `LICENSE`.
--- a/plugins/code-modernization/agents/architecture-critic.md
+++ b/plugins/code-modernization/agents/architecture-critic.md
@@ -0,0 +1,36 @@
+---
+name: architecture-critic
+description: Reviews proposed target architectures and transformed code against modern best practice. Adversarial — looks for over-engineering, missed requirements, and simpler alternatives.
+tools: Read, Glob, Grep, Bash
+---
+
+You are a principal engineer reviewing a modernization design or a freshly
+transformed module. Your default stance is **skeptical**. The team is excited
+about the new shiny; your job is to ask "do we actually need this?"
+
+## Review lens
+
+For **architecture proposals**:
+- Does every service boundary correspond to a real domain seam, or is this
+  microservices-for-the-resume?
+- What's the simplest design that meets the stated requirements? How does
+  the proposal compare?
+- Which non-functional requirements (latency, throughput, consistency) are
+  unstated, and does the design accidentally violate them?
+- What's the data migration story? "We'll figure it out" is a finding.
+- What happens when service X is down? Trace one failure mode end-to-end.
+
+For **transformed code**:
+- Is this idiomatic for the target stack, or is legacy structure leaking
+  through? (Flag "JOBOL" — procedural Java with COBOL variable names.)
+- Is error handling meaningful or ceremonial?
+- Are there abstractions with exactly one implementation and no second use
+  case in sight?
+- Does the test suite actually pin behavior, or just exercise code paths?
+- What would the on-call engineer need at 3am that isn't here?
+
+## Output
+
+Findings ranked **Blocker / High / Medium / Nit**. Each with: what, where,
+why it matters, and a concrete suggested change. End with one paragraph:
+"If I could only change one thing, it would be ___."
--- a/plugins/code-modernization/agents/business-rules-extractor.md
+++ b/plugins/code-modernization/agents/business-rules-extractor.md
@@ -0,0 +1,46 @@
+---
+name: business-rules-extractor
+description: Mines domain logic, calculations, validations, and policies from legacy code into testable Given/When/Then specifications. Use when you need to separate "what the business requires" from "how the old code happened to implement it."
+tools: Read, Glob, Grep, Bash
+---
+
+You are a business analyst who reads code. Your job is to find the **rules**
+hidden inside legacy systems — the calculations, thresholds, eligibility
+checks, and policies that define how the business actually operates — and
+express them in a form that survives the rewrite.
+
+## What counts as a business rule
+
+- **Calculations**: interest, fees, taxes, discounts, scores, aggregates
+- **Validations**: required fields, format checks, range limits, cross-field
+- **Eligibility / authorization**: who can do what, when, under which conditions
+- **State transitions**: status lifecycles, what triggers each transition
+- **Policies**: retention periods, retry limits, cutoff times, rounding rules
+
+## What does NOT count
+
+Infrastructure, logging, error handling, UI layout, technical retries,
+connection pooling. If a rule would be the same regardless of what language
+the system was written in, it's a business rule. If it only exists because
+of the technology, skip it.
+
+## Extraction discipline
+
+1. Find the rule in code. Record exact `file:line-line`.
+2. State it in plain English a non-engineer would recognize.
+3. Encode it as Given/When/Then with **concrete values**:
+   ```
+   Given an account with balance $1,250.00 and APR 18.5%
+   When the monthly interest batch runs
+   Then the interest charged is $19.27 (balance × APR ÷ 12, rounded half-up to cents)
+   ```
+4. List the parameters (rates, limits, magic numbers) with their current
+   hardcoded values — these often need to become configuration.
+5. Rate your confidence: **High** (logic is explicit), **Medium** (inferred
+   from structure/names), **Low** (ambiguous; needs SME).
+6. If confidence < High, write the exact question an SME must answer.
+
+## Output format
+
+One "Rule Card" per rule (see the format in the `/modernize-extract-rules`
+command). Group by category. Lead with a summary table.
--- a/plugins/code-modernization/agents/legacy-analyst.md
+++ b/plugins/code-modernization/agents/legacy-analyst.md
@@ -0,0 +1,39 @@
+---
+name: legacy-analyst
+description: Deep-reads legacy codebases (COBOL, Java, .NET, Node, anything) to build structural and behavioral understanding. Use for discovery, dependency mapping, dead-code detection, and "what does this system actually do" questions.
+tools: Read, Glob, Grep, Bash
+---
+
+You are a senior legacy systems analyst with 20 years of experience reading
+code nobody else wants to read — COBOL, JCL, RPG, classic ASP, EJB 2,
+Struts 1, raw servlets, Perl CGI.
+
+Your job is **understanding, not judgment**. The code in front of you kept a
+business running for decades. Treat it with respect, figure out what it does,
+and explain it in terms a modern engineer can act on.
+
+## How you work
+
+- **Read before you grep.** Open the entry points (main programs, JCL jobs,
+  controllers, routes) and trace the actual flow. Pattern-matching on names
+  lies; control flow doesn't.
+- **Cite everything.** Every claim gets a `path/to/file:line` reference.
+  If you can't point to a line, you don't know it — say so.
+- **Distinguish "is" from "appears to be."** When you're inferring intent
+  from structure, flag it: "appears to handle X (inferred from variable
+  names; no comments confirm)."
+- **Use the right vocabulary for the stack.** COBOL has paragraphs,
+  copybooks, and FD entries. CICS has transactions and BMS maps. JCL has
+  steps and DD statements. Java has packages and beans. Use the native
+  terms so SMEs trust your output.
+- **Find the data first.** In legacy systems, the data structures (copybooks,
+  DDL, schemas) are usually more stable and truthful than the procedural
+  code. Map the data, then map who touches it.
+- **Note what's missing.** Unhandled error paths, TODO comments, commented-out
+  blocks, magic numbers — these are signals about history and risk.
+
+## Output format
+
+Default to structured markdown: tables for inventories, Mermaid for graphs,
+bullet lists for findings. Always include a "Confidence & Gaps" footer
+listing what you couldn't determine and what you'd ask an SME.
--- a/plugins/code-modernization/agents/security-auditor.md
+++ b/plugins/code-modernization/agents/security-auditor.md
@@ -0,0 +1,56 @@
+---
+name: security-auditor
+description: Adversarial security reviewer — OWASP Top 10, CWE, dependency CVEs, secrets, injection. Use for security debt scanning and pre-modernization hardening.
+tools: Read, Glob, Grep, Bash
+---
+
+You are an application security engineer performing an adversarial review.
+Assume the code is hostile until proven otherwise. Your job is to find
+vulnerabilities a real attacker would find — and explain them in terms an
+engineer can fix.
+
+## Coverage checklist
+
+Adapt to the target stack — web items don't apply to a batch system,
+terminal/screen items don't apply to a SPA. Work through what's relevant:
+
+- **Injection** (SQL, NoSQL, OS command, LDAP, XPath, template) — trace every
+  user-controlled input to every sink, including dynamic SQL and shell-outs
+- **Authentication / session** — hardcoded creds, weak session handling,
+  missing auth checks on sensitive routes/transactions/jobs
+- **Sensitive data exposure** — secrets in source, weak crypto, PII in logs,
+  cleartext sensitive data in record layouts, flat files, or temp datasets
+- **Access control** — IDOR, missing ownership checks, privilege escalation;
+  missing/permissive resource ACLs (RACF profiles, IAM policies, file perms);
+  unguarded admin functions
+- **XSS / CSRF** — unescaped output, missing tokens (web targets)
+- **Insecure deserialization** — untrusted data into pickle/yaml.load/
+  `ObjectInputStream` or custom record parsers
+- **Vulnerable dependencies** — run `npm audit` / `pip-audit` /
+  read manifests and flag versions with known CVEs
+- **SSRF / path traversal / open redirect** (web/network targets)
+- **Input validation** — missing length/range/format checks at trust
+  boundaries (form/screen fields, API params, batch input records) before
+  persistence or downstream calls
+- **Security misconfiguration** — debug mode, verbose errors, default creds,
+  hardcoded credentials in deployment scripts, job definitions, or config
+
+## Tooling
+
+Use available SAST where it helps (npm audit, pip-audit, grep for known-bad
+patterns) but **read the code** — tools miss logic flaws. Show tool output
+verbatim, then add your manual findings.
+
+## Reporting standard
+
+For each finding:
+| Field | Content |
+|---|---|
+| **ID** | SEC-NNN |
+| **CWE** | CWE-XXX with name |
+| **Severity** | Critical / High / Medium / Low (CVSS-ish reasoning) |
+| **Location** | `file:line` |
+| **Exploit scenario** | One sentence: how an attacker uses this |
+| **Fix** | Concrete code-level remediation |
+
+No hand-waving. If you can't write the exploit scenario, downgrade severity.
--- a/plugins/code-modernization/agents/test-engineer.md
+++ b/plugins/code-modernization/agents/test-engineer.md
@@ -0,0 +1,36 @@
+---
+name: test-engineer
+description: Writes characterization, contract, and equivalence tests that pin down legacy behavior so transformation can be proven correct. Use before any rewrite.
+tools: Read, Write, Edit, Glob, Grep, Bash
+---
+
+You are a test engineer specializing in **characterization testing** —
+writing tests that capture what legacy code *actually does* (not what
+someone thinks it should do) so that a rewrite can be proven equivalent.
+
+## Principles
+
+- **The legacy code is the oracle.** If the legacy computes 19.27 and the
+  spec says 19.28, the test asserts 19.27 and you flag the discrepancy
+  separately. We're proving equivalence first; fixing bugs is a separate
+  decision.
+- **Concrete over abstract.** Every test has literal input values and literal
+  expected outputs. No "should calculate correctly" — instead "given balance
+  1250.00 and APR 18.5%, returns 19.27".
+- **Cover the edges the legacy covers.** Read the legacy code's branches.
+  Every IF/EVALUATE/switch arm gets at least one test case. Boundary values
+  (zero, negative, max, empty) get explicit cases.
+- **Tests must run against BOTH.** Structure tests so the same inputs can be
+  fed to the legacy implementation (or a recorded trace of it) and the modern
+  one. The test harness compares.
+- **Executable, not aspirational.** Tests compile and run from day one.
+  Behaviors not yet implemented in the target are marked
+  `@Disabled("pending RULE-NNN")` / `@pytest.mark.skip` / `it.todo()` — never
+  deleted.
+
+## Output
+
+Idiomatic tests for the requested target stack (JUnit 5 / pytest / Vitest /
+xUnit), one test class/file per legacy module, test method names that read
+as specifications. Include a `README.md` in the test directory explaining
+how to run them and how to add a new case.
--- a/plugins/code-modernization/commands/modernize-assess.md
+++ b/plugins/code-modernization/commands/modernize-assess.md
@@ -0,0 +1,161 @@
+---
+description: Full discovery & portfolio analysis of a legacy system — inventory, complexity, debt, effort estimation
+argument-hint: <system-dir> | --portfolio <parent-dir>
+---
+
+**Mode select.** If `$ARGUMENTS` starts with `--portfolio`, run **Portfolio
+mode** against the directory that follows. Otherwise run **Single-system
+mode** against `legacy/$1`.
+
+---
+
+# Portfolio mode (`--portfolio <parent-dir>`)
+
+Sweep every immediate subdirectory of the parent dir and produce a
+heat-map a steering committee can use to sequence a multi-year program.
+
+## Step P1 — Per-system metrics
+
+For each subdirectory `<sys>`:
+
+```bash
+cloc --quiet --csv <parent>/<sys>          # LOC by language
+lizard -s cyclomatic_complexity <parent>/<sys> 2>/dev/null | tail -1
+```
+
+If `cloc`/`lizard` are not installed, fall back to `scc <parent>/<sys>`
+(LOC + complexity) or `find` + `wc -l` grouped by extension, and estimate
+complexity by counting decision keywords per file. Note which tool you used.
+
+Capture: total SLOC, dominant language, file count, mean & max
+cyclomatic complexity (CCN). For dependency freshness, locate the
+manifest (`package.json`, `pom.xml`, `*.csproj`, `requirements*.txt`,
+copybook dir) and note its age / pinned-version count.
+
+## Step P2 — COCOMO-II effort
+
+Compute person-months per system using COCOMO-II basic:
+`PM = 2.94 × (KSLOC)^1.10` (nominal scale factors). Show the formula and
+inputs so the figure is defensible, not a guess.
+
+## Step P3 — Documentation coverage
+
+For each system, count source files with vs without a header comment
+block, and list architecture docs present (`README`, `docs/`, ADRs).
+Report coverage % and the top undocumented subsystems.
+
+## Step P4 — Render the heat-map
+
+Write `analysis/portfolio.html` (dark `#1e1e1e` bg, `#d4d4d4` text,
+`#cc785c` accent, system-ui font, all CSS inline). One row per system;
+columns: **System · Lang · KSLOC · Files · Mean CCN · Max CCN · Dep
+Freshness · Doc Coverage % · COCOMO PM · Risk**. Color-grade the PM and
+Risk cells (green→amber→red). Below the table, a 2-3 sentence
+sequencing recommendation: which system first and why.
+
+Then stop. Tell the user to open `analysis/portfolio.html`.
+
+---
+
+# Single-system mode
+
+Perform a complete **modernization assessment** of `legacy/$1`.
+
+This is the discovery phase — the goal is a fact-grounded executive brief that
+a VP of Engineering could take into a budget meeting. Work in this order:
+
+## Step 1 — Quantitative inventory
+
+Run and show the output of:
+```bash
+scc legacy/$1
+```
+Then run `scc --by-file -s complexity legacy/$1 | head -25` to identify the
+highest-complexity files. Capture the COCOMO effort/cost estimate scc provides.
+
+If `scc` is not installed, fall back in order:
+1. `cloc legacy/$1` for the LOC table, then compute COCOMO-II effort
+   yourself: `PM = 2.94 × (KSLOC)^1.10` (nominal scale factors). Show the
+   inputs.
+2. If `cloc` is also missing, use `find` + `wc -l` grouped by extension
+   for LOC, and rank file complexity by counting decision keywords
+   (`IF`/`EVALUATE`/`WHEN`/`PERFORM` for COBOL; `if`/`for`/`while`/`case`/
+   `catch` for C-family). Compute COCOMO from KSLOC as above.
+
+Note in the assessment which tool was used so the figures are reproducible.
+
+## Step 2 — Technology fingerprint
+
+Identify, with file evidence:
+- Languages, frameworks, and runtime versions in use
+- Build system and dependency manifest locations
+- Data stores (schemas, copybooks, DDL, ORM configs)
+- Integration points (queues, APIs, batch interfaces, screen maps)
+- Test presence and approximate coverage signal
+
+## Step 3 — Parallel deep analysis
+
+Spawn three subagents **in parallel**:
+
+1. **legacy-analyst** — "Build a structural map of legacy/$1: what are the
+   5-12 major functional domains (group optional/feature-gated subsystems
+   under one umbrella), which source files belong to each, and how do they
+   depend on each other (control flow + shared data)? Return a markdown
+   table + a Mermaid `graph TD` of domain-level dependencies — use
+   `subgraph` to cluster and cap at ~40 edges. Cite repo-relative file
+   paths. Flag dangling references (defined but no source, or unused)."
+
+2. **legacy-analyst** — "Identify technical debt in legacy/$1: dead code,
+   deprecated APIs, copy-paste duplication, god objects/programs, missing
+   error handling, hardcoded config. Return the top 10 findings ranked by
+   remediation value, each with file:line evidence."
+
+3. **security-auditor** — "Scan legacy/$1 for security vulnerabilities:
+   injection, auth weaknesses, hardcoded secrets, vulnerable dependencies,
+   missing input validation. Return findings in CWE-tagged table form with
+   file:line evidence and severity."
+
+Wait for all three. Synthesize their findings.
+
+## Step 4 — Production runtime overlay (optional)
+
+If production telemetry is available — an observability/APM MCP server, batch
+job logs, or runtime exports the user can supply — gather p50/p95/p99
+wall-clock for the system's key jobs/transactions (e.g. JCL members under
+`legacy/$1/jcl/`, scheduled batches, top API routes). Use it to:
+
+- Tag each functional domain from Step 3 with its production wall-clock
+  cost and **p99 variance** (p99/p50 ratio).
+- Flag the highest-variance domain as the highest operational risk —
+  this is telemetry-grounded, not a static-analysis opinion.
+
+Include a small **Runtime Profile** table (Job/Route · Domain · p50 · p95 ·
+p99 · p99/p50) in the assessment. If no telemetry is available, skip this
+step and note the gap in the assessment.
+
+## Step 5 — Documentation gap analysis
+
+Compare what the code *does* against what README/docs/comments *say*. List
+the top 5 undocumented behaviors or subsystems that a new engineer would
+need explained.
+
+## Step 6 — Write the assessment
+
+Create `analysis/$1/ASSESSMENT.md` with these sections:
+- **Executive Summary** (3-4 sentences: what it is, how big, how risky, headline recommendation)
+- **System Inventory** (the scc table + tech fingerprint)
+- **Architecture-at-a-Glance** (the domain table; reference the diagram)
+- **Production Runtime Profile** (the runtime table from Step 4 with the highest-variance domain called out — or "no telemetry available")
+- **Technical Debt** (top 10, ranked)
+- **Security Findings** (CWE table)
+- **Documentation Gaps** (top 5)
+- **Effort Estimation** (COCOMO-derived person-months, ±range, key cost drivers)
+- **Recommended Modernization Pattern** (one of: Rehost / Replatform / Refactor / Rearchitect / Rebuild / Replace — with one-paragraph rationale)
+
+Also create `analysis/$1/ARCHITECTURE.mmd` containing the Mermaid domain
+dependency diagram from the legacy-analyst.
+
+## Step 7 — Present
+
+Tell the user the assessment is ready and suggest:
+`glow -p analysis/$1/ASSESSMENT.md`
--- a/plugins/code-modernization/commands/modernize-brief.md
+++ b/plugins/code-modernization/commands/modernize-brief.md
@@ -0,0 +1,65 @@
+---
+description: Generate a phased Modernization Brief — the approved plan that transformation agents will execute against
+argument-hint: <system-dir> [target-stack]
+---
+
+Synthesize everything in `analysis/$1/` into a **Modernization Brief** — the
+single document a steering committee approves and engineering executes.
+
+Target stack: `$2` (if blank, recommend one based on the assessment findings).
+
+Read `analysis/$1/ASSESSMENT.md`, `analysis/$1/TOPOLOGY.html` (and the `.mmd`
+files alongside it), and `analysis/$1/BUSINESS_RULES.md` first. If any are
+missing, say so and stop — they come from `/modernize-assess`, `/modernize-map`,
+and `/modernize-extract-rules` respectively. Run those first.
+
+## The Brief
+
+Write `analysis/$1/MODERNIZATION_BRIEF.md`:
+
+### 1. Objective
+One paragraph: from what, to what, why now.
+
+### 2. Target Architecture
+Mermaid C4 Container diagram of the *end state*. Name every service, data
+store, and integration. Below it, a table mapping legacy component → target
+component(s).
+
+### 3. Phased Sequence
+Break the work into 3-6 phases using **strangler-fig ordering** — lowest-risk,
+fewest-dependencies first. For each phase:
+- Scope (which legacy modules, which target services)
+- Entry criteria (what must be true to start)
+- Exit criteria (what tests/metrics prove it's done)
+- Estimated effort (person-weeks, derived from COCOMO + complexity data)
+- Risk level + top 2 risks + mitigation
+
+Render the phases as a Mermaid `gantt` chart.
+
+### 4. Behavior Contract
+List the **P0 rules** from BUSINESS_RULES.md (the ones tagged `Priority: P0` —
+money, regulatory, data integrity) that MUST be proven equivalent before any
+phase ships. These become the regression suite. Flag any P0 rule with
+Confidence < High as a blocker requiring SME confirmation before its phase
+starts.
+
+### 5. Validation Strategy
+State which combination applies: characterization tests, contract tests,
+parallel-run / dual-execution diff, property-based tests, manual UAT.
+Justify per phase.
+
+### 6. Open Questions
+Anything requiring human/SME decision before Phase 1 starts. Each as a
+checkbox the approver must tick.
+
+### 7. Approval Block
+```
+Approved by: ________________  Date: __________
+Approval covers: Phase 1 only | Full plan
+```
+
+## Present
+
+Enter **plan mode** and present a summary of the brief. Do NOT proceed to any
+transformation until the user explicitly approves. This gate is the
+human-in-the-loop control point.
--- a/plugins/code-modernization/commands/modernize-extract-rules.md
+++ b/plugins/code-modernization/commands/modernize-extract-rules.md
@@ -0,0 +1,76 @@
+---
+description: Mine business logic from legacy code into testable, human-readable rule specifications
+argument-hint: <system-dir> [module-pattern]
+---
+
+Extract the **business rules** embedded in `legacy/$1` into a structured,
+testable specification — the institutional knowledge that's currently locked
+in code and in the heads of engineers who are about to retire.
+
+Scope: if a module pattern was given (`$2`), focus there; otherwise cover the
+entire system. Either way, prioritize calculation, validation, eligibility,
+and state-transition logic over plumbing.
+
+## Method
+
+Spawn **three business-rules-extractor subagents in parallel**, each assigned
+a different lens. If `$2` is non-empty, include "focusing on files matching
+$2" in each prompt.
+
+1. **Calculations** — "Find every formula, rate, threshold, and computed value
+   in legacy/$1. For each: what does it compute, what are the inputs, what is
+   the exact formula/algorithm, where is it implemented (file:line), and what
+   edge cases does the code handle?"
+
+2. **Validations & eligibility** — "Find every business validation, eligibility
+   check, and guard condition in legacy/$1. For each: what is being checked,
+   what happens on pass/fail, where is it (file:line)?"
+
+3. **State & lifecycle** — "Find every status field, state machine, and
+   lifecycle transition in legacy/$1. For each entity: what states exist,
+   what triggers transitions, what side-effects fire?"
+
+## Synthesize
+
+Merge the three result sets. Deduplicate. For each distinct rule, write a
+**Rule Card** in this exact format:
+
+```
+### RULE-NNN: <plain-English name>
+**Category:** Calculation | Validation | Lifecycle | Policy
+**Priority:** P0 | P1 | P2
+**Source:** `path/to/file.ext:line-line`
+**Plain English:** One sentence a business analyst would recognize.
+**Specification:**
+  Given <precondition>
+  When  <trigger>
+  Then  <outcome>
+  [And  <additional outcome>]
+**Parameters:** <constants, rates, thresholds with their current values>
+**Edge cases handled:** <list>
+**Suspected defect:** <optional — legacy behavior that looks wrong; decide preserve-vs-fix during transform>
+**Confidence:** High | Medium | Low — <why; if < High, state the exact SME question>
+```
+
+Priority heuristic — default to **P1**. Assign **P0** if the rule moves money,
+enforces a regulatory/compliance requirement, or guards data integrity (and
+flag P0 rules at <High confidence as SME-required). Assign **P2** for
+display/formatting/convenience rules. The downstream `/modernize-brief`
+behavior contract is built from the P0 rules, so assign deliberately.
+
+Write all rule cards to `analysis/$1/BUSINESS_RULES.md` with:
+- A summary table at top (ID, name, category, priority, source, confidence)
+- Rule cards grouped by category
+- A final **"Rules requiring SME confirmation"** section listing every
+  Medium/Low confidence rule with the specific question a human needs to answer
+
+## Generate the DTO catalog
+
+As a companion, create `analysis/$1/DATA_OBJECTS.md` cataloging the core
+data transfer objects / records / entities: name, fields with types, which
+rules consume/produce them, source location.
+
+## Present
+
+Report: total rules found, breakdown by category, count needing SME review.
+Suggest: `glow -p analysis/$1/BUSINESS_RULES.md`
--- a/plugins/code-modernization/commands/modernize-harden.md
+++ b/plugins/code-modernization/commands/modernize-harden.md
@@ -0,0 +1,64 @@
+---
+description: Security vulnerability scan with a reviewable remediation patch — OWASP, CWE, CVE, secrets, injection
+argument-hint: <system-dir>
+---
+
+Run a **security hardening pass** on `legacy/$1`: find vulnerabilities, rank
+them, and produce a reviewable patch for the critical ones.
+
+This command never edits `legacy/` — it writes findings and a proposed patch
+to `analysis/$1/`. The user reviews and applies (or not).
+
+## Scan
+
+Spawn the **security-auditor** subagent:
+
+"Adversarially audit legacy/$1 for security vulnerabilities. Cover what's
+relevant to the stack: injection (SQL/NoSQL/OS command/template), broken
+auth, sensitive data exposure, access control gaps, insecure deserialization,
+hardcoded secrets, vulnerable dependency versions, missing input validation,
+path traversal. For each finding return: CWE ID, severity
+(Critical/High/Med/Low), file:line, one-sentence exploit scenario, and
+recommended fix. Run any available SAST tooling (npm audit, pip-audit,
+OWASP dependency-check) and include its raw output."
+
+## Triage
+
+Write `analysis/$1/SECURITY_FINDINGS.md`:
+- Summary scorecard (count by severity, top CWE categories)
+- Findings table sorted by severity
+- Dependency CVE table (package, installed version, CVE, fixed version)
+
+## Remediate
+
+For each **Critical** and **High** finding, draft a minimal, targeted fix.
+Do **not** edit `legacy/` — write all fixes as a single unified diff to
+`analysis/$1/security_remediation.patch`, with a comment line above each
+hunk citing the finding ID it addresses (`# SEC-001: parameterize the query`).
+
+Add a **Remediation Log** section to SECURITY_FINDINGS.md mapping each
+finding ID → one-line summary of the proposed fix and the patch hunk that
+implements it.
+
+## Verify
+
+Spawn the **security-auditor** again to **review the patch** against the
+original code:
+
+"Review analysis/$1/security_remediation.patch against legacy/$1. For each
+hunk: does it fully remediate the cited finding? Does it introduce new
+vulnerabilities or change behavior beyond the fix? Return one verdict per
+hunk: RESOLVES / PARTIAL / INTRODUCES-RISK, with a one-line reason."
+
+Add a **Patch Review** section to SECURITY_FINDINGS.md with the verdicts.
+If any hunk is PARTIAL or INTRODUCES-RISK, revise the patch and re-review.
+
+## Present
+
+Tell the user the artifacts are ready:
+- `analysis/$1/SECURITY_FINDINGS.md` — findings, remediation log, patch review
+- `analysis/$1/security_remediation.patch` — review, then apply if appropriate
+  with `git -C legacy/$1 apply ../../analysis/$1/security_remediation.patch`
+- Re-run `/modernize-harden $1` after applying to confirm resolution
+
+Suggest: `glow -p analysis/$1/SECURITY_FINDINGS.md`
--- a/plugins/code-modernization/commands/modernize-map.md
+++ b/plugins/code-modernization/commands/modernize-map.md
@@ -0,0 +1,104 @@
+---
+description: Dependency & topology mapping — call graphs, data lineage, batch flows, rendered as navigable diagrams
+argument-hint: <system-dir>
+---
+
+Build a **dependency and topology map** of `legacy/$1` and render it visually.
+
+The assessment gave us domains. Now go one level deeper: how do the *pieces*
+connect? This is the map an engineer needs before touching anything.
+
+## What to produce
+
+Write a one-off analysis script (Python or shell — your choice) that parses
+the source under `legacy/$1` and extracts the four datasets below. Three
+principles apply across stacks; getting them wrong produces a misleading map:
+
+1. **Edges live in two places** — direct calls in source, *and* dispatcher/
+   router calls whose targets are variables (config tables, route maps,
+   dependency injection, dynamic dispatch). Resolve variables against config
+   before declaring an edge unresolvable.
+2. **The code↔storage join is usually external configuration**, not source —
+   job/deployment descriptors map logical names to physical stores.
+3. **Entry points usually live in deployment config**, not source — without
+   parsing it, every top-level module looks unreachable.
+
+Extract:
+
+- **Program/module call graph** — direct calls (`CALL`, method invocations,
+  `import`/`require`) *and* dispatcher calls (`EXEC CICS LINK/XCTL`, DI
+  container wiring, framework routing, reflection/factory). Resolve variable
+  call targets against route tables, copybooks, config, or constant pools.
+- **Data dependency graph** — which modules read/write which data stores,
+  joined through the relevant config: `SELECT…ASSIGN TO` ↔ JCL `DD` (batch
+  COBOL), `EXEC CICS READ/WRITE…FILE()` ↔ CSD `DEFINE FILE` (CICS online),
+  `EXEC SQL` table refs (embedded SQL), ORM annotations/mappings (Java/.NET),
+  model files (Node/Python/Ruby). Include UI/screen bindings (BMS maps, JSPs,
+  templates) — they're dependencies too.
+- **Entry points** — whatever the stack's outermost invoker is, read from
+  where it's defined: JCL `EXEC PGM=` and CICS CSD `DEFINE TRANSACTION`
+  (mainframe), `web.xml`/route annotations/route files (web), `main()`/argv
+  parsing (CLI), queue/scheduler subscriptions (event-driven).
+- **Dead-end candidates** — modules with no inbound edges. **Only meaningful
+  once all the entry-point and call-edge types above are in the graph.**
+  Suppress the dead claim for anything that could be the target of an
+  unresolved dynamic call. A grep-only graph will mark most dispatcher-driven
+  modules (CICS programs, Spring controllers, ORM-bound DAOs) dead when they
+  aren't.
+
+If the source is fixed-column (COBOL columns 8–72, RPG, etc.), slice the
+code area and strip comment lines before regex matching, or you'll match
+sequence numbers and commented-out code.
+
+Save the script as `analysis/$1/extract_topology.py` (or `.sh`) so it can be
+re-run and audited. Have it write a machine-readable
+`analysis/$1/topology.json` and print a human summary. Run it; show the
+summary (cap at ~200 lines for very large estates).
+
+## Render
+
+From the extracted data, generate **three Mermaid diagrams** and write them
+to `analysis/$1/TOPOLOGY.html` as a self-contained page that renders in any
+browser.
+
+The HTML page must use: dark `#1e1e1e` background, `#d4d4d4` text,
+`#cc785c` for `<h2>`/accents, `system-ui` font, all CSS **inline** (no
+external stylesheets). Load Mermaid from a CDN in `<head>`:
+
+```html
+<script type="module">
+  import mermaid from 'https://cdn.jsdelivr.net/npm/mermaid@11/dist/mermaid.esm.min.mjs';
+  mermaid.initialize({ startOnLoad: true, theme: 'dark' });
+</script>
+```
+
+Each diagram goes in a `<pre class="mermaid">...</pre>` block. Do **not**
+wrap diagrams in markdown ` ``` ` fences inside the HTML.
+
+1. **`graph TD` — Module call graph.** Cluster by domain (use `subgraph`).
+   Highlight entry points in a distinct style. Cap at ~40 nodes — if larger,
+   show domain-level with one expanded domain.
+
+2. **`graph LR` — Data lineage.** Programs → data stores.
+   Mark read vs write edges.
+
+3. **`flowchart TD` — Critical path.** Trace ONE end-to-end business flow
+   (e.g., "monthly billing run" or "process payment") through every program
+   and data store it touches, in execution order. If production telemetry is
+   available (see `/modernize-assess` Step 4), annotate each step with its
+   p50/p99 wall-clock.
+
+Also export the three diagrams as standalone `.mmd` files for re-use:
+`analysis/$1/call-graph.mmd`, `analysis/$1/data-lineage.mmd`,
+`analysis/$1/critical-path.mmd`.
+
+## Annotate
+
+Below each `<pre class="mermaid">` block in TOPOLOGY.html, add a `<ul>`
+with 3-5 **architect observations**: tight coupling clusters, single
+points of failure, candidates for service extraction, data stores
+touched by too many writers.
+
+## Present
+
+Tell the user to open `analysis/$1/TOPOLOGY.html` in a browser.
--- a/plugins/code-modernization/commands/modernize-reimagine.md
+++ b/plugins/code-modernization/commands/modernize-reimagine.md
@@ -0,0 +1,83 @@
+---
+description: Multi-agent greenfield rebuild — extract specs from legacy, design AI-native, scaffold & validate with HITL
+argument-hint: <system-dir> <target-vision>
+---
+
+**Reimagine** `legacy/$1` as: $2
+
+This is not a port — it's a rebuild from extracted intent. The legacy system
+becomes the *specification source*, not the structural template. This command
+orchestrates a multi-agent team with explicit human checkpoints.
+
+## Phase A — Specification mining (parallel agents)
+
+Spawn concurrently and show the user that all three are running:
+
+1. **business-rules-extractor** — "Extract every business rule from legacy/$1
+   into Given/When/Then form. Output to a structured list I can parse."
+
+2. **legacy-analyst** — "Catalog every external interface of legacy/$1:
+   inbound (screens, APIs, batch triggers, queues) and outbound (reports,
+   files, downstream calls, DB writes). For each: name, direction, payload
+   shape, frequency/SLA if discernible."
+
+3. **legacy-analyst** — "Identify the core domain entities in legacy/$1 and
+   their relationships. Return as an entity list + Mermaid erDiagram."
+
+Collect results. Write `analysis/$1/AI_NATIVE_SPEC.md` containing:
+- **Capabilities** (what the system must do — derived from rules + interfaces)
+- **Domain Model** (entities + erDiagram)
+- **Interface Contracts** (each external interface as an OpenAPI fragment or
+  AsyncAPI fragment)
+- **Non-functional requirements** inferred from legacy (batch windows, volumes)
+- **Behavior Contract** (the Given/When/Then rules — these are the acceptance tests)
+
+## Phase B — HITL checkpoint #1
+
+Present the spec summary. Ask the user **one focused question**: "Which of
+these capabilities are P0 for the reimagined system, and are there any we
+should deliberately drop?" Wait for the answer. Record it in the spec.
+
+## Phase C — Architecture (single agent, then critique)
+
+Design the target architecture for "$2":
+- Mermaid C4 Container diagram
+- Service boundaries with rationale (which rules/entities live where)
+- Technology choices with one-line justification each
+- Data migration approach from legacy stores
+
+Then spawn **architecture-critic**: "Review this proposed architecture for
+$2 against the spec in analysis/$1/AI_NATIVE_SPEC.md. Identify over-engineering,
+missed requirements, scaling risks, and simpler alternatives." Incorporate
+the critique. Write the result to `analysis/$1/REIMAGINED_ARCHITECTURE.md`.
+
+## Phase D — HITL checkpoint #2
+
+Enter plan mode. Present the architecture. Wait for approval.
+
+## Phase E — Parallel scaffolding
+
+For each service in the approved architecture (cap at 3 to keep the run
+tractable; tell the user which you deferred), spawn a **general-purpose agent
+in parallel**:
+
+"Scaffold the <service-name> service per analysis/$1/REIMAGINED_ARCHITECTURE.md
+and AI_NATIVE_SPEC.md. Create: project skeleton, domain model, API stubs
+matching the interface contracts, and **executable acceptance tests** for every
+behavior-contract rule assigned to this service (mark unimplemented ones as
+expected-failure/skip with the rule ID). Write to modernized/$1-reimagined/<service-name>/."
+
+Show the agents' progress. When all complete, run the acceptance test suites
+and report: total tests, passing (scaffolded behavior), pending (rule IDs
+awaiting implementation).
+
+## Phase F — Knowledge graph handoff
+
+Write `modernized/$1-reimagined/CLAUDE.md` — the persistent context file for
+the new system, containing: architecture summary, service responsibilities,
+where the spec lives, how to run tests, and the legacy→modern traceability
+map. This file IS the knowledge graph that future agents and engineers will
+load.
+
+Report: services scaffolded, acceptance tests defined, % behaviors with a
+home, location of all artifacts.
--- a/plugins/code-modernization/commands/modernize-transform.md
+++ b/plugins/code-modernization/commands/modernize-transform.md
@@ -0,0 +1,78 @@
+---
+description: Transform one legacy module to the target stack — idiomatic rewrite with behavior-equivalence tests
+argument-hint: <system-dir> <module> <target-stack>
+---
+
+Transform `legacy/$1` module **`$2`** into **$3**, with proof of behavioral
+equivalence.
+
+This is a surgical, single-module transformation — one vertical slice of the
+strangler fig. Output goes to `modernized/$1/$2/`.
+
+## Step 0 — Plan (HITL gate)
+
+Read the source module and any business rules in `analysis/$1/BUSINESS_RULES.md`
+that reference it. Then **enter plan mode** and present:
+- Which source files are in scope
+- The target module structure (packages/classes/files you'll create)
+- Which business rules / behaviors this module implements
+- How you'll prove equivalence (test strategy)
+- Anything ambiguous that needs a human decision NOW
+
+Wait for approval before writing any code.
+
+## Step 1 — Characterization tests FIRST
+
+Before writing target code, spawn the **test-engineer** subagent:
+
+"Write characterization tests for legacy/$1 module $2. Read the source,
+identify every observable behavior, and encode each as a test case with
+concrete input → expected output pairs derived from the legacy logic.
+Target framework: <appropriate for $3>. Write to
+`modernized/$1/$2/src/test/`. These tests define 'done' — the new code
+must pass all of them."
+
+Show the user the test file. Get a 👍 before proceeding.
+
+## Step 2 — Idiomatic transformation
+
+Write the target implementation in `modernized/$1/$2/src/main/`.
+
+**Critical:** Write code a senior $3 engineer would write from the
+*specification*, not from the legacy structure. Do NOT mirror COBOL paragraphs
+as methods, do NOT preserve legacy variable names like `WS-TEMP-AMT-X`.
+Use the target language's idioms: records/dataclasses, streams, dependency
+injection, proper error types, etc.
+
+Include: domain model, service logic, API surface (REST controller or
+equivalent), and configuration. Add concise Javadoc/docstrings linking each
+class back to the rule IDs it implements.
+
+## Step 3 — Prove it
+
+Run the characterization tests:
+```bash
+cd modernized/$1/$2 && <appropriate test command for $3>
+```
+Show the output. If anything fails, fix and re-run until green.
+
+## Step 4 — Side-by-side review
+
+Generate `modernized/$1/$2/TRANSFORMATION_NOTES.md`:
+- Mapping table: legacy file:lines → target file:lines, per behavior
+- Deliberate deviations from legacy behavior (with rationale)
+- What was NOT migrated (dead code, unreachable branches) and why
+- Follow-ups for the next module that depends on this one
+
+Then show a visual diff of one representative behavior, legacy vs modern:
+```bash
+delta --side-by-side <(sed -n '<lines>p' legacy/$1/<file>) modernized/$1/$2/src/main/<file>
+```
+
+## Step 5 — Architecture review
+
+Spawn the **architecture-critic** subagent to review the transformed code
+against $3 best practices. Apply any HIGH-severity feedback; list the rest
+in TRANSFORMATION_NOTES.md.
+
+Report: tests passing, lines of legacy retired, location of artifacts.
--- a/plugins/mcp-server-dev/skills/build-mcp-app/SKILL.md
+++ b/plugins/mcp-server-dev/skills/build-mcp-app/SKILL.md
@@ -10,6 +10,15 @@ An MCP app is a standard MCP server that **also serves UI resources** — intera

 The UI layer is **additive**. Under the hood it's still tools, resources, and the same wire protocol. If you haven't built a plain MCP server before, the `build-mcp-server` skill covers the base layer. This skill adds widgets on top.

+> **Testing in Claude:** Add the server as a custom connector in claude.ai (via a Cloudflare tunnel for local dev) — this exercises the real iframe sandbox and `hostContext`. See https://claude.com/docs/connectors/building/testing.
+
+## Claude host specifics
+
+- `_meta.ui.prefersBorder: false` on a `ui://` resource removes the outer card border (mobile).
+- `hostContext.safeAreaInsets: {top, right, bottom, left}` (px) — honor these for notches and the composer overlay.
+- `_meta.ui.csp.{connectDomains, resourceDomains, baseUriDomains}` — declare external origins per resource; default is block-all. `frameDomains` is currently restricted in Claude.
+- Directory submission for MCP Apps requires 3–5 PNG screenshots, ≥1000px wide, cropped to the app response only (no prompt in the image). See https://claude.com/docs/connectors/building/submission#asset-specifications.
+
 ---

 ## When a widget beats plain text
--- a/plugins/mcp-server-dev/skills/build-mcp-server/SKILL.md
+++ b/plugins/mcp-server-dev/skills/build-mcp-server/SKILL.md
@@ -8,6 +8,8 @@ version: 0.1.0

 You are guiding a developer through designing and building an MCP server that works seamlessly with Claude. MCP servers come in many forms — picking the wrong shape early causes painful rewrites later. Your first job is **discovery, not code**.

+**Load Claude-specific context first.** The MCP spec is generic; Claude has additional auth types, review criteria, and limits. Before answering questions or scaffolding, fetch `https://claude.com/docs/llms-full.txt` (the full export of the Claude connector docs) so your guidance reflects Claude's actual constraints.
+
 Do not start scaffolding until you have answers to the questions in Phase 1. If the user's opening message already answers them, acknowledge that and skip straight to the recommendation.

 ---
@@ -182,6 +184,17 @@ Tools are one of three server primitives. Most servers start with tools and neve

 ---

+## Phase 6 — Test in Claude and publish
+
+Once the server runs:
+
+1. **Test against real Claude** by adding the server URL as a custom connector at Settings → Connectors (use a Cloudflare tunnel for local servers). Claude identifies itself with `clientInfo.name: "claude-ai"` on initialize. → https://claude.com/docs/connectors/building/testing
+2. **Run the pre-submission checklist** — read/write tool split, required annotations, name limits, prompt-injection rules. → https://claude.com/docs/connectors/building/review-criteria
+3. **Submit to the Anthropic Directory.** → https://claude.com/docs/connectors/building/submission
+4. **Recommend shipping a plugin** that wraps this MCP with skills — most partners ship both. → https://claude.com/docs/connectors/building/what-to-build
+
+---
+
 ## Quick reference: decision matrix

 | Scenario | Deployment | Tool pattern |
--- a/plugins/mcp-server-dev/skills/build-mcp-server/references/auth.md
+++ b/plugins/mcp-server-dev/skills/build-mcp-server/references/auth.md
@@ -2,6 +2,22 @@

 Auth is the reason most people end up needing a **remote** server even when a local one would be simpler. OAuth redirects, token storage, and refresh all work cleanly when there's a real hosted endpoint to redirect back to.

+## Claude-specific authentication
+
+Claude's MCP client supports a specific set of auth types — not every spec-compliant flow works. Full reference: https://claude.com/docs/connectors/building/authentication
+
+| Type | Notes |
+|---|---|
+| `oauth_dcr` | Supported. For high-volume directory entries, prefer CIMD or Anthropic-held creds — DCR registers a new client on every fresh connection. |
+| `oauth_cimd` | Supported, recommended over DCR for directory entries. |
+| `oauth_anthropic_creds` | Partner provides `client_id`/`client_secret` to Anthropic; user-consent-gated. Contact `mcp-review@anthropic.com`. |
+| `custom_connection` | User supplies URL/creds at connect time (Snowflake-style). Contact `mcp-review@anthropic.com`. |
+| `none` | Authless. |
+
+**Not supported:** user-pasted bearer tokens (`static_bearer`); pure machine-to-machine `client_credentials` grant without user consent.
+
+**Callback URL** (single, all surfaces): `https://claude.ai/api/mcp/auth_callback`
+
 ---

 ## The three tiers
--- a/plugins/mcp-server-dev/skills/build-mcp-server/references/tool-design.md
+++ b/plugins/mcp-server-dev/skills/build-mcp-server/references/tool-design.md
@@ -2,6 +2,16 @@

 Tool schemas and descriptions are prompt engineering. They land directly in Claude's context and determine whether Claude picks the right tool with the right arguments. Most MCP integration bugs trace back to vague descriptions or loose schemas.

+## Anthropic Directory hard requirements
+
+If this server will be submitted to the Anthropic Directory, the following are pass/fail review criteria (full list: https://claude.com/docs/connectors/building/review-criteria):
+
+- Every tool **must** include `readOnlyHint`, `destructiveHint`, and `title` annotations — these determine auto-permissions in Claude.
+- Tool names **must** be ≤64 characters.
+- Read and write operations **must** be in separate tools. A single tool accepting both GET and POST/PUT/PATCH/DELETE is rejected — documenting safe vs unsafe within one tool's description does not satisfy this.
+- Tool descriptions **must not** instruct Claude how to behave (e.g. "always do X", "you must call Y first", overriding system instructions, promoting products) — treated as prompt injection at review.
+- Tools that accept freeform API endpoints/params **must** reference the target API's documentation in their description.
+
 ---

 ## Descriptions
--- a/plugins/mcp-server-dev/skills/build-mcpb/SKILL.md
+++ b/plugins/mcp-server-dev/skills/build-mcpb/SKILL.md
@@ -8,6 +8,8 @@ version: 0.1.0

 MCPB is a local MCP server **packaged with its runtime**. The user installs one file; it runs without needing Node, Python, or any toolchain on their machine. It's the sanctioned way to distribute local MCP servers.

+> MCPB is the **secondary** distribution path. Anthropic recommends remote MCP servers for directory listing — see https://claude.com/docs/connectors/building/what-to-build.
+
 **Use MCPB when the server must run on the user's machine** — reading local files, driving a desktop app, talking to localhost services, OS-level APIs. If your server only hits cloud APIs, you almost certainly want a remote HTTP server instead (see `build-mcp-server`). Don't pay the MCPB packaging tax for something that could be a URL.

 ---
--- a/plugins/session-report/LICENSE
+++ b/plugins/session-report/LICENSE
@@ -0,0 +1,202 @@
+
+                                 Apache License
+                           Version 2.0, January 2004
+                        http://www.apache.org/licenses/
+
+   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+
+   1. Definitions.
+
+      "License" shall mean the terms and conditions for use, reproduction,
+      and distribution as defined by Sections 1 through 9 of this document.
+
+      "Licensor" shall mean the copyright owner or entity authorized by
+      the copyright owner that is granting the License.
+
+      "Legal Entity" shall mean the union of the acting entity and all
+      other entities that control, are controlled by, or are under common
+      control with that entity. For the purposes of this definition,
+      "control" means (i) the power, direct or indirect, to cause the
+      direction or management of such entity, whether by contract or
+      otherwise, or (ii) ownership of fifty percent (50%) or more of the
+      outstanding shares, or (iii) beneficial ownership of such entity.
+
+      "You" (or "Your") shall mean an individual or Legal Entity
+      exercising permissions granted by this License.
+
+      "Source" form shall mean the preferred form for making modifications,
+      including but not limited to software source code, documentation
+      source, and configuration files.
+
+      "Object" form shall mean any form resulting from mechanical
+      transformation or translation of a Source form, including but
+      not limited to compiled object code, generated documentation,
+      and conversions to other media types.
+
+      "Work" shall mean the work of authorship, whether in Source or
+      Object form, made available under the License, as indicated by a
+      copyright notice that is included in or attached to the work
+      (an example is provided in the Appendix below).
+
+      "Derivative Works" shall mean any work, whether in Source or Object
+      form, that is based on (or derived from) the Work and for which the
+      editorial revisions, annotations, elaborations, or other modifications
+      represent, as a whole, an original work of authorship. For the purposes
+      of this License, Derivative Works shall not include works that remain
+      separable from, or merely link (or bind by name) to the interfaces of,
+      the Work and Derivative Works thereof.
+
+      "Contribution" shall mean any work of authorship, including
+      the original version of the Work and any modifications or additions
+      to that Work or Derivative Works thereof, that is intentionally
+      submitted to Licensor for inclusion in the Work by the copyright owner
+      or by an individual or Legal Entity authorized to submit on behalf of
+      the copyright owner. For the purposes of this definition, "submitted"
+      means any form of electronic, verbal, or written communication sent
+      to the Licensor or its representatives, including but not limited to
+      communication on electronic mailing lists, source code control systems,
+      and issue tracking systems that are managed by, or on behalf of, the
+      Licensor for the purpose of discussing and improving the Work, but
+      excluding communication that is conspicuously marked or otherwise
+      designated in writing by the copyright owner as "Not a Contribution."
+
+      "Contributor" shall mean Licensor and any individual or Legal Entity
+      on behalf of whom a Contribution has been received by Licensor and
+      subsequently incorporated within the Work.
+
+   2. Grant of Copyright License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      copyright license to reproduce, prepare Derivative Works of,
+      publicly display, publicly perform, sublicense, and distribute the
+      Work and such Derivative Works in Source or Object form.
+
+   3. Grant of Patent License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      (except as stated in this section) patent license to make, have made,
+      use, offer to sell, sell, import, and otherwise transfer the Work,
+      where such license applies only to those patent claims licensable
+      by such Contributor that are necessarily infringed by their
+      Contribution(s) alone or by combination of their Contribution(s)
+      with the Work to which such Contribution(s) was submitted. If You
+      institute patent litigation against any entity (including a
+      cross-claim or counterclaim in a lawsuit) alleging that the Work
+      or a Contribution incorporated within the Work constitutes direct
+      or contributory patent infringement, then any patent licenses
+      granted to You under this License for that Work shall terminate
+      as of the date such litigation is filed.
+
+   4. Redistribution. You may reproduce and distribute copies of the
+      Work or Derivative Works thereof in any medium, with or without
+      modifications, and in Source or Object form, provided that You
+      meet the following conditions:
+
+      (a) You must give any other recipients of the Work or
+          Derivative Works a copy of this License; and
+
+      (b) You must cause any modified files to carry prominent notices
+          stating that You changed the files; and
+
+      (c) You must retain, in the Source form of any Derivative Works
+          that You distribute, all copyright, patent, trademark, and
+          attribution notices from the Source form of the Work,
+          excluding those notices that do not pertain to any part of
+          the Derivative Works; and
+
+      (d) If the Work includes a "NOTICE" text file as part of its
+          distribution, then any Derivative Works that You distribute must
+          include a readable copy of the attribution notices contained
+          within such NOTICE file, excluding those notices that do not
+          pertain to any part of the Derivative Works, in at least one
+          of the following places: within a NOTICE text file distributed
+          as part of the Derivative Works; within the Source form or
+          documentation, if provided along with the Derivative Works; or,
+          within a display generated by the Derivative Works, if and
+          wherever such third-party notices normally appear. The contents
+          of the NOTICE file are for informational purposes only and
+          do not modify the License. You may add Your own attribution
+          notices within Derivative Works that You distribute, alongside
+          or as an addendum to the NOTICE text from the Work, provided
+          that such additional attribution notices cannot be construed
+          as modifying the License.
+
+      You may add Your own copyright statement to Your modifications and
+      may provide additional or different license terms and conditions
+      for use, reproduction, or distribution of Your modifications, or
+      for any such Derivative Works as a whole, provided Your use,
+      reproduction, and distribution of the Work otherwise complies with
+      the conditions stated in this License.
+
+   5. Submission of Contributions. Unless You explicitly state otherwise,
+      any Contribution intentionally submitted for inclusion in the Work
+      by You to the Licensor shall be under the terms and conditions of
+      this License, without any additional terms or conditions.
+      Notwithstanding the above, nothing herein shall supersede or modify
+      the terms of any separate license agreement you may have executed
+      with Licensor regarding such Contributions.
+
+   6. Trademarks. This License does not grant permission to use the trade
+      names, trademarks, service marks, or product names of the Licensor,
+      except as required for reasonable and customary use in describing the
+      origin of the Work and reproducing the content of the NOTICE file.
+
+   7. Disclaimer of Warranty. Unless required by applicable law or
+      agreed to in writing, Licensor provides the Work (and each
+      Contributor provides its Contributions) on an "AS IS" BASIS,
+      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+      implied, including, without limitation, any warranties or conditions
+      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+      PARTICULAR PURPOSE. You are solely responsible for determining the
+      appropriateness of using or redistributing the Work and assume any
+      risks associated with Your exercise of permissions under this License.
+
+   8. Limitation of Liability. In no event and under no legal theory,
+      whether in tort (including negligence), contract, or otherwise,
+      unless required by applicable law (such as deliberate and grossly
+      negligent acts) or agreed to in writing, shall any Contributor be
+      liable to You for damages, including any direct, indirect, special,
+      incidental, or consequential damages of any character arising as a
+      result of this License or out of the use or inability to use the
+      Work (including but not limited to damages for loss of goodwill,
+      work stoppage, computer failure or malfunction, or any and all
+      other commercial damages or losses), even if such Contributor
+      has been advised of the possibility of such damages.
+
+   9. Accepting Warranty or Additional Liability. While redistributing
+      the Work or Derivative Works thereof, You may choose to offer,
+      and charge a fee for, acceptance of support, warranty, indemnity,
+      or other liability obligations and/or rights consistent with this
+      License. However, in accepting such obligations, You may act only
+      on Your own behalf and on Your sole responsibility, not on behalf
+      of any other Contributor, and only if You agree to indemnify,
+      defend, and hold each Contributor harmless for any liability
+      incurred by, or claims asserted against, such Contributor by reason
+      of your accepting any such warranty or additional liability.
+
+   END OF TERMS AND CONDITIONS
+
+   APPENDIX: How to apply the Apache License to your work.
+
+      To apply the Apache License to your work, attach the following
+      boilerplate notice, with the fields enclosed by brackets "[]"
+      replaced with your own identifying information. (Don't include
+      the brackets!)  The text should be enclosed in the appropriate
+      comment syntax for the file format. We also recommend that a
+      file or class name and description of purpose be included on the
+      same "printed page" as the copyright notice for easier
+      identification within third-party archives.
+
+   Copyright [yyyy] [name of copyright owner]
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
--- a/plugins/session-report/skills/session-report/analyze-sessions.mjs
+++ b/plugins/session-report/skills/session-report/analyze-sessions.mjs
@@ -166,6 +166,7 @@ const toolUseIdToPrompt = new Map() // tool_use id -> promptKey (Agent spawned d
 const agentIdToPrompt = new Map() // agentId -> promptKey
 const prompts = new Map() // promptKey -> { text, ts, project, sessionId, ...usage }
 const sessionTurns = new Map() // sessionId -> [promptKey, ...] in transcript order
+const sessionSpans = new Map() // sessionId -> {project, firstTs, lastTs, tokens}

 function promptRecord(key, init) {
  let r = prompts.get(key)
@@ -333,11 +334,29 @@ async function processFile(p, info, buckets) {
    }
  }

+  // session span (for by_day timeline) — subagent files roll into parent sessionId
+  let span = sessionSpans.get(info.sessionId)
+  if (!span) {
+    span = { project: info.project, firstTs: null, lastTs: null, tokens: 0 }
+    sessionSpans.set(info.sessionId, span)
+  }
+  if (firstTs !== null) {
+    if (span.firstTs === null || firstTs < span.firstTs) span.firstTs = firstTs
+    if (span.lastTs === null || lastTs > span.lastTs) span.lastTs = lastTs
+  }
+
  // commit API calls
  for (const [key, { usage, ts, skill, prompt }] of fileApiCalls) {
    if (key && seenRequestIds.has(key)) continue
    seenRequestIds.add(key)

+    const tot =
+      (usage.input_tokens || 0) +
+      (usage.cache_creation_input_tokens || 0) +
+      (usage.cache_read_input_tokens || 0) +
+      (usage.output_tokens || 0)
+    span.tokens += tot
+
    const targets = [overall, project]
    if (subagent) targets.push(subagent)
    if (skill && skillStats) {
@@ -359,11 +378,6 @@ async function processFile(p, info, buckets) {

    // subagent token accounting on parent buckets
    if (info.kind === 'subagent') {
-      const tot =
-        (usage.input_tokens || 0) +
-        (usage.cache_creation_input_tokens || 0) +
-        (usage.cache_read_input_tokens || 0) +
-        (usage.output_tokens || 0)
      overall.subagentTokens += tot
      project.subagentTokens += tot
      if (subagent) subagent.subagentTokens += tot
@@ -656,10 +670,55 @@ function printJson({ overall, perProject, perSubagent, perSkill }) {
      [...perSkill].map(([k, v]) => [k, summarize(v)]),
    ),
    top_prompts: topPrompts(100),
+    by_day: buildByDay(),
  }
  process.stdout.write(JSON.stringify(out, null, 2) + '\n')
 }

+// Group sessions into local-date buckets for the timeline view. A session is
+// placed on the day its first message landed; tokens for that session (incl.
+// subagents) count toward that day even if it ran past midnight.
+function buildByDay() {
+  const DOW = ['Sun', 'Mon', 'Tue', 'Wed', 'Thu', 'Fri', 'Sat']
+  const days = new Map() // yyyy-mm-dd -> {date, dow, tokens, sessions:[]}
+  for (const [id, s] of sessionSpans) {
+    if (s.firstTs === null || s.tokens === 0) continue
+    const d0 = new Date(s.firstTs)
+    const key = `${d0.getFullYear()}-${String(d0.getMonth() + 1).padStart(2, '0')}-${String(d0.getDate()).padStart(2, '0')}`
+    let day = days.get(key)
+    if (!day) {
+      day = { date: key, dow: DOW[d0.getDay()], tokens: 0, sessions: [] }
+      days.set(key, day)
+    }
+    const base = new Date(
+      d0.getFullYear(),
+      d0.getMonth(),
+      d0.getDate(),
+    ).getTime()
+    day.tokens += s.tokens
+    day.sessions.push({
+      id,
+      project: s.project,
+      tokens: s.tokens,
+      start_min: Math.max(0, Math.round((s.firstTs - base) / 60000)),
+      end_min: Math.max(1, Math.round((s.lastTs - base) / 60000)),
+    })
+  }
+  for (const d of days.values()) {
+    // peak concurrency via 10-min buckets, capped at 24h for display
+    const b = new Array(144).fill(0)
+    for (const s of d.sessions) {
+      const lo = Math.min(143, Math.floor(s.start_min / 10))
+      const hi = Math.min(144, Math.ceil(Math.min(s.end_min, 1440) / 10))
+      for (let i = lo; i < hi; i++) b[i]++
+    }
+    d.peak = Math.max(0, ...b)
+    d.peak_at_min = d.peak > 0 ? b.indexOf(d.peak) * 10 : 0
+    d.sessions.sort((a, b) => a.start_min - b.start_min)
+  }
+  return [...days.values()].sort((a, b) => a.date.localeCompare(b.date))
+}
+
 function promptTotal(r) {
  return (
    r.inputUncached + r.inputCacheCreate + r.inputCacheRead + r.outputTokens
--- a/plugins/session-report/skills/session-report/template.html
+++ b/plugins/session-report/skills/session-report/template.html
@@ -102,6 +102,42 @@
             color: var(--dim); margin: 6px 0; }
  .callout b, .callout code { color: var(--term-fg); }

+  /* ——— day pills + session gantt ——— */
+  .days { display: flex; gap: 8px; flex-wrap: wrap; margin-bottom: 14px; }
+  .dpill { flex: 1; min-width: 84px; max-width: 140px; background: none;
+           border: 1px solid var(--subtle); border-radius: 4px;
+           padding: 9px 6px; font: inherit; color: var(--dim);
+           cursor: pointer; text-align: center; }
+  .dpill:hover { border-color: var(--dim); background: var(--hover); }
+  .dpill .dow { font-size: 10px; color: var(--subtle); display: block; }
+  .dpill .date { font-size: 11px; color: var(--term-fg); font-weight: 500;
+                 display: block; margin: 2px 0 4px; }
+  .dpill .pct { font-size: 16px; font-weight: 700; color: var(--term-fg); display: block; }
+  .dpill .ns { font-size: 10px; color: var(--subtle); display: block; margin-top: 2px; }
+  .dpill.heaviest .pct { color: var(--clay); }
+  .dpill.sel { border-color: var(--clay); background: rgba(217,119,87,0.10); }
+  .gantt-hd { display: flex; justify-content: space-between; align-items: baseline;
+              margin-bottom: 6px; }
+  .gantt-hd .day { color: var(--term-fg); font-weight: 500; }
+  .gantt-hd .stats { font-size: 11px; color: var(--dim); }
+  .gantt-hd .stats b { color: var(--clay); }
+  .gantt { position: relative; border-top: 1px solid var(--outline);
+           border-bottom: 1px solid var(--outline); min-height: 32px; }
+  .lane { position: relative; height: 16px;
+          border-bottom: 1px dashed rgba(255,255,255,0.04); }
+  .seg { position: absolute; top: 2px; height: 12px; border-radius: 2px;
+         opacity: .85; cursor: crosshair; }
+  .seg:hover { opacity: 1; outline: 1px solid var(--term-fg); z-index: 2; }
+  .gantt-rule { position: absolute; top: 0; bottom: 0; width: 0;
+                border-left: 1px dashed var(--subtle); opacity: .4;
+                pointer-events: none; }
+  .gantt-axis { display: flex; justify-content: space-between;
+                font-size: 10px; color: var(--subtle); padding: 4px 0; }
+  .gantt-leg { font-size: 10px; color: var(--subtle); margin-top: 8px;
+               display: flex; gap: 14px; flex-wrap: wrap; }
+  .gantt-leg .sw { display: inline-block; width: 14px; height: 10px;
+                   border-radius: 2px; vertical-align: middle; margin-right: 4px; }
+
  /* ——— block-char bars ——— */
  .bar { display: grid; grid-template-columns: 26ch 1fr 8ch; gap: 14px;
         padding: 2px 0; align-items: center; }
@@ -231,6 +267,21 @@
      <div class="section-body" id="project-bars"></div>
    </section>

+    <section id="timeline-section">
+      <div class="hr"></div>
+      <h2>session timeline by day<span class="hint">click a day · ←/→ to navigate</span></h2>
+      <div class="section-body">
+        <div class="days" id="day-pills"></div>
+        <div class="gantt-hd">
+          <span class="day" id="g-day">—</span>
+          <span class="stats" id="g-stats"></span>
+        </div>
+        <div class="gantt-axis"><span>00:00</span><span>06:00</span><span>12:00</span><span>18:00</span><span>24:00</span></div>
+        <div class="gantt" id="gantt"></div>
+        <div class="gantt-leg" id="gantt-leg"></div>
+      </div>
+    </section>
+
    <section>
      <div class="hr"></div>
      <h2>most expensive prompts<span class="hint">click to expand context</span></h2>
@@ -335,6 +386,65 @@
    `<div class="val">${typeof v==='number'&&v>=1e4?fmt(v):v}</div>`+
    (d?`<div class="detail">${d}</div>`:'')+`</div>`).join('');

+  // session timeline by day
+  (function() {
+    const days = (DATA.by_day||[]).slice(-14);
+    if (!days.length) { $('timeline-section').style.display='none'; return; }
+    const PCOL = ['rgb(177,185,249)','rgb(78,186,101)','#D97757','rgb(255,193,7)',
+                  'rgb(255,107,128)','#9b8cff','#6ec1d6','#c792ea'];
+    const dayTotal = days.reduce((a,d)=>a+d.tokens,0) || 1;
+    const tokMax = Math.max(...days.map(d=>d.tokens));
+    const projects = [...new Set(days.flatMap(d=>d.sessions.map(s=>s.project)))];
+    const colorOf = p => PCOL[projects.indexOf(p)%PCOL.length];
+    const hhmm = m => (m>=1440?`+${Math.floor(m/1440)}d `:'') +
+      `${String(Math.floor(m/60)%24).padStart(2,'0')}:${String(m%60).padStart(2,'0')}`;
+    const md = iso => { const [,mo,da]=iso.split('-'); return `${MON[+mo-1]} ${+da}`; };
+    let sel = days.findIndex(d=>d.tokens===tokMax);
+
+    function pills() {
+      $('day-pills').innerHTML = days.map((d,i)=>
+        `<button class="dpill${d.tokens===tokMax?' heaviest':''}${i===sel?' sel':''}" data-i="${i}">`+
+        `<span class="dow">${esc(d.dow)}</span>`+
+        `<span class="date">${esc(md(d.date))}</span>`+
+        `<span class="pct">${(100*d.tokens/dayTotal).toFixed(1)}%</span>`+
+        `<span class="ns">${d.sessions.length} sess</span></button>`
+      ).join('');
+      $('day-pills').querySelectorAll('.dpill').forEach(el=>
+        el.onclick=()=>{sel=+el.dataset.i;pills();gantt();});
+    }
+    function gantt() {
+      const d = days[sel], DAY = 1440;
+      $('g-day').textContent = `${d.dow} ${md(d.date)}`;
+      $('g-stats').innerHTML = `${d.sessions.length} sessions · ${fmt(d.tokens)} tokens`+
+        ` · peak <b>${d.peak}</b> concurrent at <b>${hhmm(d.peak_at_min)}</b>`;
+      const lanes = [];
+      for (const s of d.sessions) {
+        let placed = false;
+        for (const L of lanes) if (L[L.length-1].end_min <= s.start_min) { L.push(s); placed=true; break; }
+        if (!placed) lanes.push([s]);
+      }
+      let h = '';
+      for (let t=0;t<=24;t+=6) h += `<div class="gantt-rule" style="left:${100*t/24}%"></div>`;
+      h += lanes.map(L=>`<div class="lane">${L.map(s=>{
+        const end = Math.min(s.end_min, DAY);
+        const w = Math.max(0.15, 100*(end-s.start_min)/DAY);
+        const tip = `folder: ${short(s.project)}\n`+
+          `${hhmm(s.start_min)}–${hhmm(s.end_min)} · ${fmt(s.tokens)} tokens\n`+
+          `session ${s.id}`;
+        return `<span class="seg" style="left:${100*s.start_min/DAY}%;width:${w}%;`+
+          `background:${colorOf(s.project)}" title="${esc(tip)}"></span>`;
+      }).join('')}</div>`).join('');
+      $('gantt').innerHTML = h || '<div class="callout">no sessions</div>';
+    }
+    document.addEventListener('keydown',e=>{
+      if (e.key==='ArrowRight'&&sel<days.length-1){sel++;pills();gantt();e.preventDefault();}
+      if (e.key==='ArrowLeft'&&sel>0){sel--;pills();gantt();e.preventDefault();}
+    });
+    $('gantt-leg').innerHTML = projects.slice(0,12).map(p=>
+      `<span><span class="sw" style="background:${colorOf(p)}"></span>${esc(short(p))}</span>`).join('');
+    pills(); gantt();
+  })();
+
  // block-char project bars
  (function() {
    const W = 48;
@@ -366,57 +476,52 @@
    return h + '</div>';
  }

-  // top prompts — share of grand total
-  (function() {
-    const ps = (DATA.top_prompts||[]).slice(0,100);
+  // expandable drill-down list with "show N more" toggle
+  function drillList(hostId, items, rowFn, empty) {
    const SHOW = 5;
-    const row = p => {
-      const inTot = p.input.uncached+p.input.cache_create+p.input.cache_read;
-      return `<details><summary>`+
-        `<span class="amt">${share(p.total_tokens)}</span>`+
-        `<span class="desc">${esc(p.text)}</span>`+
-        `<span class="meta">${niceDate(p.ts)} · ${esc(short(p.project))} · ${p.api_calls} calls`+
-          (p.subagent_calls?` · ${p.subagent_calls} subagents`:'')+
-          ` · ${pct(p.input.cache_read,inTot)} cached</span>`+
-        `</summary><div class="body">`+
-        renderContext(p.context)+
-        `<div>session <code>${esc(p.session)}</code></div>`+
-        `<div>in: uncached ${fmt(p.input.uncached)} · cache-create ${fmt(p.input.cache_create)} · `+
-        `cache-read ${fmt(p.input.cache_read)} · out ${fmt(p.output)}</div>`+
-        `</div></details>`;
-    };
-    const head = ps.slice(0,SHOW).map(row).join('');
-    const rest = ps.slice(SHOW).map(row).join('');
-    $('top-prompts').innerHTML = ps.length
-      ? head + (rest
-          ? `<div id="tp-rest" hidden>${rest}</div>`+
-            `<button id="tp-more" class="more-btn">show ${ps.length-SHOW} more</button>`
-          : '')
-      : '<div class="callout">No prompts in range.</div>';
-    const btn = $('tp-more');
+    const host = $(hostId);
+    if (!items.length) { host.innerHTML = `<div class="callout">${empty}</div>`; return; }
+    const head = items.slice(0,SHOW).map(rowFn).join('');
+    const rest = items.slice(SHOW).map(rowFn).join('');
+    host.innerHTML = head + (rest
+      ? `<div hidden>${rest}</div><button class="more-btn">show ${items.length-SHOW} more</button>`
+      : '');
+    const btn = host.querySelector('.more-btn');
    if (btn) btn.onclick = () => {
-      const r = $('tp-rest'); r.hidden = !r.hidden;
-      btn.textContent = r.hidden ? `show ${ps.length-SHOW} more` : 'show less';
+      const r = btn.previousElementSibling; r.hidden = !r.hidden;
+      btn.textContent = r.hidden ? `show ${items.length-SHOW} more` : 'show less';
    };
-  })();
+  }

-  // cache breaks
-  (function() {
-    const bs = (DATA.cache_breaks||[]).slice(0,100);
-    $('cache-breaks').innerHTML = bs.map(b =>
-      `<details><summary>`+
-      `<span class="amt">${fmt(b.uncached)}</span>`+
-      `<span class="desc">${esc(short(b.project))} · `+
-        `${b.kind==='subagent'?esc(b.agentType||'subagent'):'main'}</span>`+
-      `<span class="meta">${niceDate(b.ts)} · ${pct(b.uncached,b.total)} of ${fmt(b.total)} uncached</span>`+
+  drillList('top-prompts', (DATA.top_prompts||[]).slice(0,100), p => {
+    const inTot = p.input.uncached+p.input.cache_create+p.input.cache_read;
+    return `<details><summary>`+
+      `<span class="amt">${share(p.total_tokens)}</span>`+
+      `<span class="desc">${esc(p.text)}</span>`+
+      `<span class="meta">${niceDate(p.ts)} · ${esc(short(p.project))} · ${p.api_calls} calls`+
+        (p.subagent_calls?` · ${p.subagent_calls} subagents`:'')+
+        ` · ${pct(p.input.cache_read,inTot)} cached</span>`+
      `</summary><div class="body">`+
-      renderContext(b.context,
-        `<div class="ctx-break"><b>${fmt(b.uncached)}</b> uncached `+
-        `(${pct(b.uncached,b.total)} of ${fmt(b.total)}) — cache break here</div>`)+
-      `<div>session <code>${esc(b.session)}</code></div>`+
-      `</div></details>`
-    ).join('') || '<div class="callout">No cache breaks over threshold.</div>';
-  })();
+      renderContext(p.context)+
+      `<div>session <code>${esc(p.session)}</code></div>`+
+      `<div>in: uncached ${fmt(p.input.uncached)} · cache-create ${fmt(p.input.cache_create)} · `+
+      `cache-read ${fmt(p.input.cache_read)} · out ${fmt(p.output)}</div>`+
+      `</div></details>`;
+  }, 'No prompts in range.');
+
+  drillList('cache-breaks', (DATA.cache_breaks||[]).slice(0,100), b =>
+    `<details><summary>`+
+    `<span class="amt">${fmt(b.uncached)}</span>`+
+    `<span class="desc">${esc(short(b.project))} · `+
+      `${b.kind==='subagent'?esc(b.agentType||'subagent'):'main'}</span>`+
+    `<span class="meta">${niceDate(b.ts)} · ${pct(b.uncached,b.total)} of ${fmt(b.total)} uncached</span>`+
+    `</summary><div class="body">`+
+    renderContext(b.context,
+      `<div class="ctx-break"><b>${fmt(b.uncached)}</b> uncached `+
+      `(${pct(b.uncached,b.total)} of ${fmt(b.total)}) — cache break here</div>`)+
+    `<div>session <code>${esc(b.session)}</code></div>`+
+    `</div></details>`,
+  'No cache breaks over threshold.');

  // sortable table
  function table(el, cols, rows) {
--- a/plugins/skill-creator/skills/skill-creator/SKILL.md
+++ b/plugins/skill-creator/skills/skill-creator/SKILL.md
@@ -1,6 +1,6 @@
 ---
 name: skill-creator
-description: Create new skills, modify and improve existing skills, and measure skill performance. Use when users want to create a skill from scratch, update or optimize an existing skill, run evals to test a skill, benchmark skill performance with variance analysis, or optimize a skill's description for better triggering accuracy.
+description: Create new skills, modify and improve existing skills, and measure skill performance. Use when users want to create a skill from scratch, edit, or optimize an existing skill, run evals to test a skill, benchmark skill performance with variance analysis, or optimize a skill's description for better triggering accuracy.
 ---

 # Skill Creator
@@ -391,7 +391,7 @@ Use the model ID from your system prompt (the one powering the current session)

 While it runs, periodically tail the output to give the user updates on which iteration it's on and what the scores look like.

-This handles the full optimization loop automatically. It splits the eval set into 60% train and 40% held-out test, evaluates the current description (running each query 3 times to get a reliable trigger rate), then calls Claude with extended thinking to propose improvements based on what failed. It re-evaluates each new description on both train and test, iterating up to 5 times. When it's done, it opens an HTML report in the browser showing the results per iteration and returns JSON with `best_description` — selected by test score rather than train score to avoid overfitting.
+This handles the full optimization loop automatically. It splits the eval set into 60% train and 40% held-out test, evaluates the current description (running each query 3 times to get a reliable trigger rate), then calls Claude to propose improvements based on what failed. It re-evaluates each new description on both train and test, iterating up to 5 times. When it's done, it opens an HTML report in the browser showing the results per iteration and returns JSON with `best_description` — selected by test score rather than train score to avoid overfitting.

 ### How skill triggering works

@@ -435,6 +435,11 @@ In Claude.ai, the core workflow is the same (draft → test → review → impro

 **Packaging**: The `package_skill.py` script works anywhere with Python and a filesystem. On Claude.ai, you can run it and the user can download the resulting `.skill` file.

+**Updating an existing skill**: The user might be asking you to update an existing skill, not create a new one. In this case:
+- **Preserve the original name.** Note the skill's directory name and `name` frontmatter field -- use them unchanged. E.g., if the installed skill is `research-helper`, output `research-helper.skill` (not `research-helper-v2`).
+- **Copy to a writeable location before editing.** The installed skill path may be read-only. Copy to `/tmp/skill-name/`, edit there, and package from the copy.
+- **If packaging manually, stage in `/tmp/` first**, then copy to the output directory -- direct writes may fail due to permissions.
+
 ---

 ## Cowork-Specific Instructions
@@ -447,6 +452,7 @@ If you're in Cowork, the main things to know are:
 - Feedback works differently: since there's no running server, the viewer's "Submit All Reviews" button will download `feedback.json` as a file. You can then read it from there (you may have to request access first).
 - Packaging works — `package_skill.py` just needs Python and a filesystem.
 - Description optimization (`run_loop.py` / `run_eval.py`) should work in Cowork just fine since it uses `claude -p` via subprocess, not a browser, but please save it until you've fully finished making the skill and the user agrees it's in good shape.
+- **Updating an existing skill**: The user might be asking you to update an existing skill, not create a new one. Follow the update guidance in the claude.ai section above.

 ---

--- a/plugins/skill-creator/skills/skill-creator/scripts/improve_description.py
+++ b/plugins/skill-creator/skills/skill-creator/scripts/improve_description.py
@@ -2,22 +2,52 @@
 """Improve a skill description based on eval results.

 Takes eval results (from run_eval.py) and generates an improved description
-using Claude with extended thinking.
+by calling `claude -p` as a subprocess (same auth pattern as run_eval.py —
+uses the session's Claude Code auth, no separate ANTHROPIC_API_KEY needed).
 """

 import argparse
 import json
+import os
 import re
+import subprocess
 import sys
 from pathlib import Path

-import anthropic
-
 from scripts.utils import parse_skill_md


+def _call_claude(prompt: str, model: str | None, timeout: int = 300) -> str:
+    """Run `claude -p` with the prompt on stdin and return the text response.
+
+    Prompt goes over stdin (not argv) because it embeds the full SKILL.md
+    body and can easily exceed comfortable argv length.
+    """
+    cmd = ["claude", "-p", "--output-format", "text"]
+    if model:
+        cmd.extend(["--model", model])
+
+    # Remove CLAUDECODE env var to allow nesting claude -p inside a
+    # Claude Code session. The guard is for interactive terminal conflicts;
+    # programmatic subprocess usage is safe. Same pattern as run_eval.py.
+    env = {k: v for k, v in os.environ.items() if k != "CLAUDECODE"}
+
+    result = subprocess.run(
+        cmd,
+        input=prompt,
+        capture_output=True,
+        text=True,
+        env=env,
+        timeout=timeout,
+    )
+    if result.returncode != 0:
+        raise RuntimeError(
+            f"claude -p exited {result.returncode}\nstderr: {result.stderr}"
+        )
+    return result.stdout
+
+
 def improve_description(
-    client: anthropic.Anthropic,
    skill_name: str,
    skill_content: str,
    current_description: str,
@@ -99,7 +129,7 @@ Based on the failures, write a new and improved description that is more likely
 1. Avoid overfitting
 2. The list might get loooong and it's injected into ALL queries and there might be a lot of skills, so we don't want to blow too much space on any given description.

-Concretely, your description should not be more than about 100-200 words, even if that comes at the cost of accuracy.
+Concretely, your description should not be more than about 100-200 words, even if that comes at the cost of accuracy. There is a hard limit of 1024 characters — descriptions over that will be truncated, so stay comfortably under it.

 Here are some tips that we've found to work well in writing these descriptions:
 - The skill should be phrased in the imperative -- "Use this skill for" rather than "this skill does"
@@ -111,70 +141,41 @@ I'd encourage you to be creative and mix up the style in different iterations si

 Please respond with only the new description text in <new_description> tags, nothing else."""

-    response = client.messages.create(
-        model=model,
-        max_tokens=16000,
-        thinking={
-            "type": "enabled",
-            "budget_tokens": 10000,
-        },
-        messages=[{"role": "user", "content": prompt}],
-    )
+    text = _call_claude(prompt, model)

-    # Extract thinking and text from response
-    thinking_text = ""
-    text = ""
-    for block in response.content:
-        if block.type == "thinking":
-            thinking_text = block.thinking
-        elif block.type == "text":
-            text = block.text
-
-    # Parse out the <new_description> tags
    match = re.search(r"<new_description>(.*?)</new_description>", text, re.DOTALL)
    description = match.group(1).strip().strip('"') if match else text.strip().strip('"')

-    # Log the transcript
    transcript: dict = {
        "iteration": iteration,
        "prompt": prompt,
-        "thinking": thinking_text,
        "response": text,
        "parsed_description": description,
        "char_count": len(description),
        "over_limit": len(description) > 1024,
    }

-    # If over 1024 chars, ask the model to shorten it
+    # Safety net: the prompt already states the 1024-char hard limit, but if
+    # the model blew past it anyway, make one fresh single-turn call that
+    # quotes the too-long version and asks for a shorter rewrite. (The old
+    # SDK path did this as a true multi-turn; `claude -p` is one-shot, so we
+    # inline the prior output into the new prompt instead.)
    if len(description) > 1024:
-        shorten_prompt = f"Your description is {len(description)} characters, which exceeds the hard 1024 character limit. Please rewrite it to be under 1024 characters while preserving the most important trigger words and intent coverage. Respond with only the new description in <new_description> tags."
-        shorten_response = client.messages.create(
-            model=model,
-            max_tokens=16000,
-            thinking={
-                "type": "enabled",
-                "budget_tokens": 10000,
-            },
-            messages=[
-                {"role": "user", "content": prompt},
-                {"role": "assistant", "content": text},
-                {"role": "user", "content": shorten_prompt},
-            ],
+        shorten_prompt = (
+            f"{prompt}\n\n"
+            f"---\n\n"
+            f"A previous attempt produced this description, which at "
+            f"{len(description)} characters is over the 1024-character hard limit:\n\n"
+            f'"{description}"\n\n'
+            f"Rewrite it to be under 1024 characters while keeping the most "
+            f"important trigger words and intent coverage. Respond with only "
+            f"the new description in <new_description> tags."
        )
-
-        shorten_thinking = ""
-        shorten_text = ""
-        for block in shorten_response.content:
-            if block.type == "thinking":
-                shorten_thinking = block.thinking
-            elif block.type == "text":
-                shorten_text = block.text
-
+        shorten_text = _call_claude(shorten_prompt, model)
        match = re.search(r"<new_description>(.*?)</new_description>", shorten_text, re.DOTALL)
        shortened = match.group(1).strip().strip('"') if match else shorten_text.strip().strip('"')

        transcript["rewrite_prompt"] = shorten_prompt
-        transcript["rewrite_thinking"] = shorten_thinking
        transcript["rewrite_response"] = shorten_text
        transcript["rewrite_description"] = shortened
        transcript["rewrite_char_count"] = len(shortened)
@@ -216,9 +217,7 @@ def main():
        print(f"Current: {current_description}", file=sys.stderr)
        print(f"Score: {eval_results['summary']['passed']}/{eval_results['summary']['total']}", file=sys.stderr)

-    client = anthropic.Anthropic()
    new_description = improve_description(
-        client=client,
        skill_name=name,
        skill_content=content,
        current_description=current_description,
--- a/plugins/skill-creator/skills/skill-creator/scripts/run_loop.py
+++ b/plugins/skill-creator/skills/skill-creator/scripts/run_loop.py
@@ -15,8 +15,6 @@ import time
 import webbrowser
 from pathlib import Path

-import anthropic
-
 from scripts.generate_report import generate_html
 from scripts.improve_description import improve_description
 from scripts.run_eval import find_project_root, run_eval
@@ -75,7 +73,6 @@ def run_loop(
        train_set = eval_set
        test_set = []

-    client = anthropic.Anthropic()
    history = []
    exit_reason = "unknown"

@@ -200,7 +197,6 @@ def run_loop(
            for h in history
        ]
        new_description = improve_description(
-            client=client,
            skill_name=name,
            skill_content=content,
            current_description=current_description,