bump(qdrant-skills): 82337ccd → 0814a087

bump(outputai): 65cd0871 → 83742db5 (#2560 )
Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
2026-06-10 10:13:36 +00:00 · 2026-06-10 08:40:59 +00:00 · 2026-06-09 20:28:45 -05:00 · 2026-06-09 20:28:39 -05:00 · 2026-06-09 20:28:18 -05:00 · 2026-06-09 20:27:56 -05:00
28 changed files with 1826 additions and 292 deletions
--- a/.claude-plugin/marketplace.json
+++ b/.claude-plugin/marketplace.json
--- a/.github/workflows/bump-plugin-shas.yml
+++ b/.github/workflows/bump-plugin-shas.yml
@@ -2,25 +2,24 @@ name: Bump Plugin SHAs

 # Nightly sweep: for each external entry whose upstream HEAD has moved past
 # its pinned SHA, validate at the new SHA with `claude plugin validate`
-# inline, then open one PR with all passing bumps. Each run force-resets the
-# bump/plugin-shas branch, so a previous night's unmerged PR is replaced (and
-# its review state discarded) — review and merge same-day to avoid churn.
+# inline, then open one PR per bumped plugin on branch `bump/<slug>`.
+# Failing entries stay isolated in their own PR; passing bumps merge
+# independently.
 #
 # Bot-free — uses the default GITHUB_TOKEN. PRs opened with GITHUB_TOKEN don't
-# trigger on:pull_request workflows, so the policy scan (`Scan Plugins`, a
-# required status check on main) would never run and the bump PR could never
-# merge. workflow_dispatch is exempt from that recursion guard, so we dispatch
-# the scan ourselves on the bump branch after the PR is opened. The check run
-# lands on the branch HEAD — the same SHA as the PR head — and satisfies the
-# required check.
+# trigger on:pull_request workflows, so the required status checks on main
+# (`scan` from Scan Plugins, `check` from Check MCP URLs, `validate` from
+# Validate Plugins) would never run and the bump PR could never merge.
+# workflow_dispatch is exempt from that recursion guard, so we dispatch all
+# three ourselves against each per-entry bump branch after its PR is opened.
+# Each check run lands on the branch HEAD — the same SHA as the PR head — and
+# satisfies the corresponding required check. (Each of those workflows runs
+# its job unconditionally on workflow_dispatch, so a dispatch always reports.)
 #
-# max-bumps is set above the external-entry count so a single run can clear
-# any backlog. The cost-control mechanisms are downstream:
-#   - scan-plugins.yml caches verdicts by (plugin, sha) so an unchanged SHA
-#     is never re-scanned across nightly force-resets.
-#   - revert-failed-bumps.yml drops policy-failing entries from the bump PR
-#     so one bad upstream can't block the rest.
-# See those files for details.
+# max-bumps caps the per-night work for cost control. Per-entry scans are
+# more expensive than a single batched scan, so the cap is conservative.
+# The composite action skips entries that already have an open bump PR, so
+# re-dispatches don't pile up duplicate work.

 on:
  schedule:
@@ -30,12 +29,12 @@ on:
      max_bumps:
        description: Cap on plugins bumped this run
        required: false
-        default: '130'
+        default: '30'

 permissions:
  contents: write
  pull-requests: write
-  actions: write  # gh workflow run scan-plugins.yml on the bump branch
+  actions: write  # gh workflow run {scan-plugins,check-mcp-urls,validate-plugins}.yml per bump branch

 concurrency:
  group: bump-plugin-shas
@@ -43,8 +42,8 @@ concurrency:
 jobs:
  bump:
    runs-on: ubuntu-latest
-    # Per-bump cost is ~2s (ls-remote + shallow clone + validate); 130 entries
-    # is ~5 min. The 60 min ceiling absorbs slow upstreams without letting a
+    # Per-bump cost is ~2s (ls-remote + shallow clone + validate); 30 entries
+    # is ~1-2 min. The 60 min ceiling absorbs slow upstreams without letting a
    # pathological run consume the default 360 min budget.
    timeout-minutes: 60
    steps:
@@ -52,18 +51,44 @@ jobs:

      # createCommitOnBranch-based bump so commits are signed by GitHub and
      # satisfy the org-level required_signatures ruleset on main.
-      - uses: anthropics/claude-plugins-community/.github/actions/bump-plugin-shas@c41c6911de0afffd2bc5cd8b21fb1e06444ee13b
+      - uses: anthropics/claude-plugins-community/.github/actions/bump-plugin-shas@e2019b2a01f11aa1484c53540b1cfab5eebbc299
        id: bump
        with:
          marketplace-path: .claude-plugin/marketplace.json
-          max-bumps: ${{ inputs.max_bumps || '130' }}
+          max-bumps: ${{ inputs.max_bumps || '30' }}
+          pr-mode: per-entry
          claude-cli-version: latest

-      # `bump/plugin-shas` is the action's default `pr-branch`. The scan diffs
-      # the branch against origin/main (the action's base-ref fallback when
-      # there's no pull_request event) and scans only the bumped entries.
-      - name: Dispatch policy scan on bump branch
-        if: steps.bump.outputs.pr-url != ''
+      # Per-entry fan-out: dispatch the three required checks against each bump
+      # branch. `pr-urls` is a JSON array of {name, old_sha, new_sha, branch,
+      # pr_url} entries emitted by the composite action when pr-mode is
+      # per-entry. All three (scan / check / validate) are required on main and
+      # none fire on the GITHUB_TOKEN-opened PR, so each must be dispatched.
+      # A single failed dispatch (transient API error / rate limit) must not
+      # strand the remaining branches, so we attempt every dispatch, then fail
+      # the step if any failed: a missing required check would otherwise leave
+      # its bump PR silently blocked behind a green run, and the composite
+      # action skips slugs with an open PR so it would never be retried.
+      - name: Dispatch required checks per per-entry PR
+        if: steps.bump.outputs.pr-urls != '' && steps.bump.outputs.pr-urls != '[]'
        env:
          GH_TOKEN: ${{ github.token }}
-        run: gh workflow run scan-plugins.yml --ref bump/plugin-shas
+          PR_URLS: ${{ steps.bump.outputs.pr-urls }}
+        run: |
+          set -euo pipefail
+          dispatch_failures="$(mktemp)"
+          jq -c '.[]' <<<"$PR_URLS" | while read -r entry; do
+            branch=$(jq -r '.branch' <<<"$entry")
+            name=$(jq -r '.name' <<<"$entry")
+            for wf in scan-plugins check-mcp-urls validate-plugins; do
+              echo "Dispatching ${wf}.yml against $branch ($name)"
+              if ! gh workflow run "${wf}.yml" --ref "$branch"; then
+                echo "::error::Failed to dispatch ${wf}.yml against $branch ($name) — required check will be missing; re-dispatch with: gh workflow run ${wf}.yml --ref $branch"
+                echo "${wf} ${branch}" >> "$dispatch_failures"
+              fi
+            done
+          done
+          if [ -s "$dispatch_failures" ]; then
+            echo "::error::$(wc -l < "$dispatch_failures" | tr -d ' ') required-check dispatch(es) failed; the affected bump PR(s) are blocked until re-dispatched (see annotations above)."
+            exit 1
+          fi
--- a/.github/workflows/validate-plugins.yml
+++ b/.github/workflows/validate-plugins.yml
@@ -12,6 +12,14 @@ on:
    branches: [main]
    paths:
      - '.claude-plugin/**'
+  # `validate` is a required status check on main. Bump PRs are opened with
+  # GITHUB_TOKEN, which doesn't fire on:pull_request (recursion guard), so the
+  # path-filtered trigger above never reports on them and the PR would be
+  # blocked forever. The bump workflow dispatches this against each per-entry
+  # bump branch instead; the check run lands on the branch HEAD (= PR head)
+  # and satisfies the required check. The validate job runs unconditionally,
+  # so a dispatch always reports.
+  workflow_dispatch:

 permissions:
  contents: read
--- a/plugins/code-modernization/.claude-plugin/plugin.json
+++ b/plugins/code-modernization/.claude-plugin/plugin.json
@@ -1,6 +1,6 @@
 {
  "name": "code-modernization",
-  "description": "Modernize legacy codebases (COBOL, legacy Java/C++, monolith web apps) with a structured assess → map → extract-rules → brief → reimagine/transform → harden workflow and specialist review agents",
+  "description": "Modernize legacy codebases (COBOL, legacy Java/C++, monolith web apps) with a structured preflight / assess / map / extract-rules / brief / reimagine / transform / harden workflow, an interactive topology viewer, and specialist review agents",
  "author": {
    "name": "Anthropic",
    "email": "support@anthropic.com"
--- a/plugins/code-modernization/README.md
+++ b/plugins/code-modernization/README.md
@@ -7,7 +7,7 @@ A structured workflow and set of specialist agents for modernizing legacy codeba
 Legacy modernization fails most often not because the target technology is wrong, but because teams skip steps: they transform code before understanding it, reimagine architecture before extracting business rules, or ship without a harness that would catch behavior drift. This plugin enforces a sequence:

 ```
-assess → map → extract-rules → brief → reimagine | transform → harden
+preflight → assess → map → extract-rules → brief → reimagine | transform → harden
 ```

 The discovery commands (`assess`, `map`, `extract-rules`) build artifacts under `analysis/<system>/`. The `brief` command synthesizes them into an approval gate. The build commands (`reimagine`, `transform`) write new code under `modernized/`. The `harden` command audits the legacy system and produces a reviewable remediation patch. Each step has a dedicated slash command, and specialist agents (legacy analyst, business rules extractor, architecture critic, security auditor, test engineer) are invoked from within those commands — or directly — to keep the work honest.
@@ -20,25 +20,40 @@ Commands take a `<system-dir>` argument and assume the system being modernized l
 mkdir -p legacy && ln -s /path/to/your/legacy/codebase legacy/billing
 ```

-## Optional tooling
+## What to give Claude

-`/modernize-assess` works best with [`scc`](https://github.com/boyter/scc) (LOC + complexity + COCOMO) or [`cloc`](https://github.com/AlDanial/cloc), and falls back to `find`/`wc` if neither is installed. Portfolio mode also benefits from [`lizard`](https://github.com/terryyin/lizard) (cyclomatic complexity). The commands degrade gracefully without them, but the metrics will be coarser.
+The commands degrade gracefully, but each of these makes the output meaningfully better — run `/modernize-preflight <system-dir>` to check all of them at once and get a readiness report:
+
+- **Analysis tools**: [`scc`](https://github.com/boyter/scc) (LOC + complexity + COCOMO) or [`cloc`](https://github.com/AlDanial/cloc); [`lizard`](https://github.com/terryyin/lizard) for portfolio mode. Without them, metrics fall back to `find`/`wc` and get coarser.
+- **A working build toolchain** for the legacy stack (e.g. GnuCOBOL for COBOL) — required before `/modernize-transform` can prove behavioral equivalence, and verified by preflight with a real smoke compile against your code.
+- **The whole system in the tree**: deployment descriptors (JCL, CICS definitions, route configs), copybooks/includes, and DDL/schemas. Entry-point detection and data lineage in `/modernize-map` are guesswork without them.
+- **Production telemetry** (optional): an observability MCP server or batch job logs enable the runtime overlay in `/modernize-assess` and timing annotations on critical paths.
+
+## Secret handling
+
+Legacy systems routinely contain live credentials, and assessment artifacts get committed and shared. **Every agent in this plugin masks credential values** — findings, rule-card parameters, architecture notes, and test fixtures cite `file:line` with a masked preview (`AKIA****`), never the value. When credentials are found, a per-credential inventory (type, location, blast radius, rotation recommendation) is written to `analysis/<system>/SECRETS.local.md`, which the commands gitignore before writing; on non-git projects the quarantine file goes to `~/.modernize/<system>/` instead. `/modernize-harden` splits its remediation diff so credential-removal hunks (which necessarily contain the raw value) land in a gitignored `security_remediation.local.patch`, never the shareable patch. Pass `--show-secrets` to include raw values in the quarantine file (and only there). If you ran an earlier version of this plugin on a real system, check whether `analysis/` artifacts containing credentials were committed or shared, and rotate anything that was.

 ## Commands

 The commands are designed to be run in order, but each produces a standalone artifact so you can stop, review, and resume.

+### `/modernize-preflight <system-dir> [target-stack]`
+Environment readiness check, meant to run first: detects the legacy stack, checks analysis tooling, **smoke-compiles a real source file** with the legacy toolchain (the errors this surfaces — missing copybooks, wrong dialect flags — are the ones that otherwise appear mid-transform), inventories missing includes / deployment descriptors / binary-only artifacts, and probes for telemetry. Produces `analysis/<system>/PREFLIGHT.md` with a per-command Ready / Ready-with-gaps / Not-ready verdict.
+
 ### `/modernize-assess <system-dir>`  — or — `/modernize-assess --portfolio <parent-dir>`
 Inventory the legacy codebase: languages, line counts, complexity, build system, integrations, technical debt, security posture, documentation gaps, and a COCOMO-derived effort estimate. Produces `analysis/<system>/ASSESSMENT.md` and `analysis/<system>/ARCHITECTURE.mmd`. Spawns `legacy-analyst` (×2) and `security-auditor` in parallel for deep reads. With `--portfolio`, sweeps every subdirectory of a parent directory and writes a sequencing heat-map to `analysis/portfolio.html`.

 ### `/modernize-map <system-dir>`
-Build a dependency and topology map of the **legacy** system: program/module call graph, data lineage (programs ↔ data stores), entry points, dead-end candidates, and one traced critical-path business flow. Writes a re-runnable extraction script and produces `analysis/<system>/topology.json` (machine-readable), `analysis/<system>/TOPOLOGY.html` (rendered Mermaid + architect observations), and standalone `call-graph.mmd`, `data-lineage.mmd`, and `critical-path.mmd`.
+
+![Interactive topology map of AWS CardDemo — domains as containers, modules sized by lines of code, dependency edges colored by kind, entry points ringed](assets/topology-viewer-screenshot.jpg)
+
+Build a dependency and topology map of the **legacy** system: program/module call graph, data lineage (programs ↔ data stores), entry points, dead-end candidates, and 2–4 traced business flows each anchored to a persona (the claimant, the operator, the auditor — not the maintainer). Writes a re-runnable extraction script and produces `analysis/<system>/topology.json` plus `analysis/<system>/TOPOLOGY.html` — an **interactive zoomable map** (circle-pack of domains/modules sized by LOC, dependency edges with per-kind toggles, search, click-for-details sidebar, and a walkthrough mode that plays each persona flow as a numbered path with a plain-language narrative). Built from a template shipped with the plugin, so it works on systems far too dense for a static diagram. Small domain-level `call-graph.mmd`, `data-lineage.mmd`, and `critical-path.mmd` are still exported for docs and PRs.

 ### `/modernize-extract-rules <system-dir> [module-pattern]`
 Mine the business rules embedded in the legacy code — calculations, validations, eligibility, state transitions, policies — into Given/When/Then "Rule Cards" with `file:line` citations and confidence ratings. Spawns three `business-rules-extractor` agents in parallel (calculations, validations, lifecycle). Produces `analysis/<system>/BUSINESS_RULES.md` and `analysis/<system>/DATA_OBJECTS.md`.

 ### `/modernize-brief <system-dir> [target-stack]`
-Synthesize the discovery artifacts into a phased **Modernization Brief** — the single document a steering committee approves and engineering executes: target architecture, strangler-fig phase plan with entry/exit criteria, behavior contract, validation strategy, open questions, and an approval block. Reads `ASSESSMENT.md`, `TOPOLOGY.html`, and `BUSINESS_RULES.md` and **stops if any are missing** — run the discovery commands first. Produces `analysis/<system>/MODERNIZATION_BRIEF.md` and enters plan mode as a human-in-the-loop gate.
+Synthesize the discovery artifacts into a phased **Modernization Brief** — the single document a steering committee approves and engineering executes: target architecture, strangler-fig phase plan with entry/exit criteria, persona-based business walkthroughs (the section non-technical approvers actually read), behavior contract, validation strategy, open questions, and an approval block. Reads `ASSESSMENT.md`, `TOPOLOGY.html`, and `BUSINESS_RULES.md` and **stops if any are missing** — run the discovery commands first. Produces `analysis/<system>/MODERNIZATION_BRIEF.md` and enters plan mode as a human-in-the-loop gate.

 ### `/modernize-reimagine <system-dir> <target-vision>`
 Greenfield rebuild from extracted intent rather than a structural port. Mines a spec (`analysis/<system>/AI_NATIVE_SPEC.md`), designs a target architecture and has it adversarially reviewed (`analysis/<system>/REIMAGINED_ARCHITECTURE.md`), then **scaffolds services with executable acceptance tests** under `modernized/<system>-reimagined/` and writes a `CLAUDE.md` knowledge handoff for the new system. Two human-in-the-loop checkpoints. Spawns `business-rules-extractor`, `legacy-analyst` (×2), `architecture-critic`, and general-purpose scaffolding agents.
@@ -46,6 +61,9 @@ Greenfield rebuild from extracted intent rather than a structural port. Mines a
 ### `/modernize-transform <system-dir> <module> <target-stack>`
 Surgical, single-module strangler-fig rewrite. Plans first (HITL gate), then writes characterization tests via `test-engineer`, then an idiomatic target implementation under `modernized/<system>/<module>/`, proves equivalence by running the tests, and produces `TRANSFORMATION_NOTES.md` mapping legacy → modern with deliberate deviations called out. Reviewed by `architecture-critic`.

+### `/modernize-status <system-dir>`
+Read-only progress report: artifact inventory with timestamps per workflow stage, staleness flags (e.g. a brief older than the assessment it was built from), secrets-hygiene checks (quarantine file gitignored and never committed), and the single most useful next command. Run it anytime you come back to a modernization after a break.
+
 ### `/modernize-harden <system-dir>`
 Security hardening pass on the **legacy** system: OWASP/CWE scan, dependency CVEs, secrets, injection. Spawns `security-auditor`. Produces `analysis/<system>/SECURITY_FINDINGS.md` ranked Critical / High / Medium / Low and a reviewed `analysis/<system>/security_remediation.patch` with minimal fixes for the Critical/High findings. The patch is reviewed by a second `security-auditor` pass before you see it. **Never edits `legacy/`** — you review and apply the patch yourself when ready, then re-run to verify. Useful as a pre-modernization step when the legacy system will keep running in production during the migration.

@@ -81,17 +99,21 @@ This plugin ships commands and agents, but modernization projects benefit from a
      "Edit(modernized/**)"
    ],
    "deny": [
-      "Edit(legacy/**)"
+      "Edit(legacy/**)",
+      "Write(legacy/**)"
    ]
  }
 }
 ```

-Adjust `legacy/` and `modernized/` to match your actual layout. The key invariants: `Edit` under `legacy/` is denied, and writes are scoped to `analysis/` (for documents) and `modernized/` (for the new code). Every command in this plugin respects this — `/modernize-harden` writes a patch to `analysis/` rather than editing `legacy/` in place.
+Adjust `legacy/` and `modernized/` to match your actual layout. The key invariants: `Edit`/`Write` under `legacy/` are denied, and writes are scoped to `analysis/` (for documents) and `modernized/` (for the new code). Note this guards the file tools — shell commands that mutate files (`sed -i`, `git apply`) still go through the normal Bash permission prompt, so review those prompts with the same invariant in mind. Every command in this plugin respects this — `/modernize-harden` writes a patch to `analysis/` rather than editing `legacy/` in place.

 ## Typical Workflow

 ```bash
+# 0. Check the environment is ready (tools, toolchain, source completeness)
+/modernize-preflight billing
+
 # 1. Inventory the legacy system (or sweep a portfolio of them)
 /modernize-assess billing

@@ -112,6 +134,9 @@ Adjust `legacy/` and `modernized/` to match your actual layout. The key invarian

 # 6. Security-harden the legacy system that's still in production
 /modernize-harden billing
+
+# Anytime: where am I, what's stale, what's next
+/modernize-status billing
 ```

 ## License
--- a/plugins/code-modernization/agents/architecture-critic.md
+++ b/plugins/code-modernization/agents/architecture-critic.md
@@ -29,6 +29,12 @@ For **transformed code**:
 - Does the test suite actually pin behavior, or just exercise code paths?
 - What would the on-call engineer need at 3am that isn't here?

+## Secret handling (mandatory)
+
+When a finding quotes code containing a credential, key, token, or
+connection string, mask the value (`'Pr0d****'`) and cite `file:line` —
+findings get appended verbatim to committed notes files.
+
 ## Output

 Findings ranked **Blocker / High / Medium / Nit**. Each with: what, where,
--- a/plugins/code-modernization/agents/business-rules-extractor.md
+++ b/plugins/code-modernization/agents/business-rules-extractor.md
@@ -40,6 +40,15 @@ of the technology, skip it.
   from structure/names), **Low** (ambiguous; needs SME).
 6. If confidence < High, write the exact question an SME must answer.

+## Secret handling (mandatory)
+
+Rule parameters sometimes *are* credentials — hardcoded passwords in auth
+checks, API keys in partner-service calls, connection strings in batch
+routines. Record the **rule**, never the **value**: write the parameter as
+`<credential — masked, see file:line>` with at most a 2–4 character
+preview. Rule cards flow into briefs and steering decks; a raw credential
+in a parameter list is a leak.
+
 ## Output format

 One "Rule Card" per rule (see the format in the `/modernize-extract-rules`
--- a/plugins/code-modernization/agents/legacy-analyst.md
+++ b/plugins/code-modernization/agents/legacy-analyst.md
@@ -32,6 +32,15 @@ and explain it in terms a modern engineer can act on.
 - **Note what's missing.** Unhandled error paths, TODO comments, commented-out
  blocks, magic numbers — these are signals about history and risk.

+## Secret handling (mandatory)
+
+Legacy code is full of live credentials, and your findings get copied into
+shareable reports. When the evidence for a finding — hardcoded config,
+dead code, debt, an interface payload — includes a credential, API key,
+token, connection string, or private key, **never reproduce the value**.
+Cite `file:line` with a masked preview (`VALUE 'Pr0d****'`,
+`password=****`). The finding is the practice, not the value.
+
 ## Output format

 Default to structured markdown: tables for inventories, Mermaid for graphs,
--- a/plugins/code-modernization/agents/security-auditor.md
+++ b/plugins/code-modernization/agents/security-auditor.md
@@ -39,7 +39,30 @@ terminal/screen items don't apply to a SPA. Work through what's relevant:

 Use available SAST where it helps (npm audit, pip-audit, grep for known-bad
 patterns) but **read the code** — tools miss logic flaws. Show tool output
-verbatim, then add your manual findings.
+verbatim — except secret values, which you redact (see below) — then add
+your manual findings.
+
+## Secret handling (mandatory)
+
+Legacy codebases routinely contain live production credentials, and your
+findings get pasted into decks, tickets, and committed markdown. Copying a
+secret into a report multiplies the exposure you were hired to find.
+
+When you discover a hardcoded credential, API key, token, connection
+string, or private key:
+
+- **Never write the secret's value into any output** — no finding table,
+  no report, no quoted code excerpt, no echoed tool output. Mask it to the
+  first 2–4 identifying characters plus `****` (`AKIA****`,
+  `postgres://app_user:****@db-prod…`). If a scanner prints a secret,
+  redact it before including the excerpt.
+- Cite `file:line`. The source file is the canonical location — anyone who
+  legitimately needs the value can open it there.
+- State what the credential appears to grant access to (database, queue,
+  cloud account, third-party API) and whether it looks like a production
+  or test credential.
+- Recommend rotation for anything that looks live — exposure in source
+  means it is already compromised, independent of any modernization plan.

 ## Reporting standard

--- a/plugins/code-modernization/agents/test-engineer.md
+++ b/plugins/code-modernization/agents/test-engineer.md
@@ -28,6 +28,15 @@ someone thinks it should do) so that a rewrite can be proven equivalent.
  `@Disabled("pending RULE-NNN")` / `@pytest.mark.skip` / `it.todo()` — never
  deleted.

+## Secret handling (mandatory)
+
+Never copy credential-like literals — passwords, API keys, tokens,
+connection strings — from legacy code into test fixtures. Tests live in
+the deliverable codebase and get committed. Substitute clearly-fake values
+of the same shape and length and note the substitution in a comment.
+Anything a test genuinely needs live (e.g. a real database connection for
+a dual-run harness) is read from an environment variable, never inlined.
+
 ## Output

 Idiomatic tests for the requested target stack (JUnit 5 / pytest / Vitest /
--- a/plugins/code-modernization/assets/topology-viewer-screenshot.jpg
+++ b/plugins/code-modernization/assets/topology-viewer-screenshot.jpg
--- a/plugins/code-modernization/assets/topology-viewer.html
+++ b/plugins/code-modernization/assets/topology-viewer.html
--- a/plugins/code-modernization/commands/modernize-assess.md
+++ b/plugins/code-modernization/commands/modernize-assess.md
@@ -1,11 +1,13 @@
 ---
 description: Full discovery & portfolio analysis of a legacy system — inventory, complexity, debt, effort estimation
-argument-hint: <system-dir> | --portfolio <parent-dir>
+argument-hint: <system-dir> [--show-secrets] | --portfolio <parent-dir>
 ---

 **Mode select.** If `$ARGUMENTS` starts with `--portfolio`, run **Portfolio
 mode** against the directory that follows. Otherwise run **Single-system
-mode** against `legacy/$1`.
+mode** against the system dir. Parse flags positionally-independently:
+`--show-secrets` may appear before or after the system dir — the system
+dir is the first non-flag token.

 ---

@@ -108,12 +110,16 @@ Spawn three subagents **in parallel**:
 2. **legacy-analyst** — "Identify technical debt in legacy/$1: dead code,
   deprecated APIs, copy-paste duplication, god objects/programs, missing
   error handling, hardcoded config. Return the top 10 findings ranked by
-   remediation value, each with file:line evidence."
+   remediation value, each with file:line evidence. If evidence contains a
+   credential value, mask it per your secret-handling rules — never quote
+   it."

 3. **security-auditor** — "Scan legacy/$1 for security vulnerabilities:
   injection, auth weaknesses, hardcoded secrets, vulnerable dependencies,
   missing input validation. Return findings in CWE-tagged table form with
-   file:line evidence and severity."
+   file:line evidence and severity. Mask every discovered credential value
+   per your secret-handling rules — file:line plus a 2–4 character masked
+   preview, never the value itself."

 Wait for all three. Synthesize their findings.

@@ -141,6 +147,31 @@ need explained.

 ## Step 6 — Write the assessment

+**Secrets quarantine first.** The assessment gets shared and committed —
+discovered credential values must never appear in it. If the
+security-auditor found any hardcoded credentials:
+
+1. Ensure `analysis/.gitignore` exists and contains the lines
+   `SECRETS.local.md` and `*.local.patch` (create or append as needed —
+   the patch pattern is used by `/modernize-harden`; writing both now
+   means the ignore set is complete from first contact). If the project is a
+   git repo, verify with `git check-ignore -q analysis/$1/SECRETS.local.md`
+   — do not write any findings until the check passes. If there is **no
+   git repo** (check for `.svn`/`.hg`/`CVS` too — a `.gitignore` protects
+   nothing under another VCS): refuse `--show-secrets` and write
+   `SECRETS.local.md` to `~/.modernize/$1/` instead of the project tree,
+   telling the user where it went and why.
+2. Write `SECRETS.local.md`: one row per credential — masked preview,
+   `file:line`, credential type, what it grants access to,
+   production/test guess, rotation recommendation. Only if the user passed
+   `--show-secrets`, add the raw value column here — this file only, never
+   ASSESSMENT.md.
+3. Masking applies to **every section of ASSESSMENT.md**, whichever agent
+   produced the finding — the Technical Debt section quotes hardcoded
+   config; those quotes follow the same masking rule as Security Findings.
+   The Security Findings section adds a one-line pointer:
+   "Credential inventory in SECRETS.local.md (gitignored; not for sharing)."
+
 Create `analysis/$1/ASSESSMENT.md` with these sections:
 - **Executive Summary** (3-4 sentences: what it is, how big, how risky, headline recommendation)
 - **System Inventory** (the scc table + tech fingerprint)
--- a/plugins/code-modernization/commands/modernize-brief.md
+++ b/plugins/code-modernization/commands/modernize-brief.md
@@ -8,10 +8,19 @@ single document a steering committee approves and engineering executes.

 Target stack: `$2` (if blank, recommend one based on the assessment findings).

-Read `analysis/$1/ASSESSMENT.md`, `analysis/$1/TOPOLOGY.html` (and the `.mmd`
-files alongside it), and `analysis/$1/BUSINESS_RULES.md` first. If any are
-missing, say so and stop — they come from `/modernize-assess`, `/modernize-map`,
-and `/modernize-extract-rules` respectively. Run those first.
+Read `analysis/$1/ASSESSMENT.md`, `analysis/$1/topology.json` (plus the
+`.mmd` files alongside it — do NOT read `TOPOLOGY.html`, it's an
+interactive viewer with the data minified inside), and
+`analysis/$1/BUSINESS_RULES.md` first. If any are missing, say so and
+stop — they come from `/modernize-assess`, `/modernize-map`, and
+`/modernize-extract-rules` respectively. Run those first.
+
+**Staleness check:** compare modification times. If any input is newer
+than an existing `MODERNIZATION_BRIEF.md`, the brief is being justifiably
+regenerated; but if an existing brief is newer than all inputs and the
+user re-ran this command anyway, ask what changed. Either way, note the
+input timestamps in the brief's header so reviewers can see what it was
+built from.

 ## The Brief

@@ -31,28 +40,38 @@ fewest-dependencies first. For each phase:
 - Scope (which legacy modules, which target services)
 - Entry criteria (what must be true to start)
 - Exit criteria (what tests/metrics prove it's done)
- Estimated effort (person-weeks, derived from COCOMO + complexity data)
+- Estimated effort (person-months, same unit as the assessment's COCOMO
+  figure — convert deliberately if you present weeks)
 - Risk level + top 2 risks + mitigation

 Render the phases as a Mermaid `gantt` chart.

-### 4. Behavior Contract
+### 4. Business Walkthroughs
+For each persona flow in `analysis/$1/topology.json` (`flows` — produced
+by `/modernize-map`), a short narrative table: persona, what happens in
+business language, which legacy modules implement it today, and which
+phase from §3 replaces each. This is the section non-technical approvers
+actually read — it connects "Phase 2" to "what happens when a customer
+files a claim". If topology.json has no flows, derive 2–3 walkthroughs
+from the entry points and say they need SME confirmation.
+
+### 5. Behavior Contract
 List the **P0 rules** from BUSINESS_RULES.md (the ones tagged `Priority: P0` —
 money, regulatory, data integrity) that MUST be proven equivalent before any
 phase ships. These become the regression suite. Flag any P0 rule with
 Confidence < High as a blocker requiring SME confirmation before its phase
 starts.

-### 5. Validation Strategy
+### 6. Validation Strategy
 State which combination applies: characterization tests, contract tests,
 parallel-run / dual-execution diff, property-based tests, manual UAT.
 Justify per phase.

-### 6. Open Questions
+### 7. Open Questions
 Anything requiring human/SME decision before Phase 1 starts. Each as a
 checkbox the approver must tick.

-### 7. Approval Block
+### 8. Approval Block
 ```
 Approved by: ________________  Date: __________
 Approval covers: Phase 1 only | Full plan
@@ -60,6 +79,7 @@ Approval covers: Phase 1 only | Full plan

 ## Present

-Enter **plan mode** and present a summary of the brief. Do NOT proceed to any
-transformation until the user explicitly approves. This gate is the
-human-in-the-loop control point.
+Present a summary of the brief and **stop — write nothing further until
+the user explicitly approves** (use plan mode if the session supports
+it). This gate is the human-in-the-loop control point; "no objection" is
+not approval.
--- a/plugins/code-modernization/commands/modernize-extract-rules.md
+++ b/plugins/code-modernization/commands/modernize-extract-rules.md
@@ -46,7 +46,7 @@ Merge the three result sets. Deduplicate. For each distinct rule, write a
  When  <trigger>
  Then  <outcome>
  [And  <additional outcome>]
-**Parameters:** <constants, rates, thresholds with their current values>
+**Parameters:** <constants, rates, thresholds with their current values — credentials masked: `<credential — masked, see file:line>`>
 **Edge cases handled:** <list>
 **Suspected defect:** <optional — legacy behavior that looks wrong; decide preserve-vs-fix during transform>
 **Confidence:** High | Medium | Low — <why; if < High, state the exact SME question>
--- a/plugins/code-modernization/commands/modernize-harden.md
+++ b/plugins/code-modernization/commands/modernize-harden.md
@@ -1,14 +1,42 @@
 ---
 description: Security vulnerability scan with a reviewable remediation patch — OWASP, CWE, CVE, secrets, injection
-argument-hint: <system-dir>
+argument-hint: <system-dir> [--show-secrets]
 ---

-Run a **security hardening pass** on `legacy/$1`: find vulnerabilities, rank
-them, and produce a reviewable patch for the critical ones.
+Run a **security hardening pass** on the legacy system: find
+vulnerabilities, rank them, and produce a reviewable patch for the
+critical ones. Parse arguments flag-independently: the system dir
+(referred to as `$1` below) is the first non-flag token in `$ARGUMENTS`;
+`--show-secrets` may appear anywhere.

 This command never edits `legacy/` — it writes findings and a proposed patch
 to `analysis/$1/`. The user reviews and applies (or not).

+## Step 0 — Secrets quarantine setup
+
+Findings files get shared, committed, and pasted into decks — discovered
+credential values must never land in them. Before any scanning:
+
+1. Ensure `analysis/.gitignore` exists and contains the lines
+   `SECRETS.local.md` and `*.local.patch`. Create the file or append the
+   missing lines.
+2. If the project is a git repo, verify with
+   `git check-ignore -q analysis/$1/SECRETS.local.md` — if that exits
+   non-zero, fix the ignore rule before proceeding. Do not write any
+   findings until this check passes.
+3. **If there is no git repo** (check for `.svn`/`.hg`/`CVS` too — a
+   `.gitignore` protects nothing under another VCS): refuse
+   `--show-secrets`, and write `SECRETS.local.md` and any `.local.patch`
+   file to `~/.modernize/$1/` instead of the project tree, telling the
+   user where they went and why.
+
+All secret values in every shareable artifact this command produces are
+**masked** (`AKIA****`, `password=****`) and cited by `file:line`. Raw
+values may appear in exactly two places, both gitignored: the
+`*.local.patch` remediation hunks (unavoidably — see Remediate) and, only
+with `--show-secrets`, `SECRETS.local.md`. Never in SECURITY_FINDINGS.md
+or patch commentary.
+
 ## Scan

 Spawn the **security-auditor** subagent:
@@ -20,7 +48,9 @@ hardcoded secrets, vulnerable dependency versions, missing input validation,
 path traversal. For each finding return: CWE ID, severity
 (Critical/High/Med/Low), file:line, one-sentence exploit scenario, and
 recommended fix. Run any available SAST tooling (npm audit, pip-audit,
-OWASP dependency-check) and include its raw output."
+OWASP dependency-check) and include its raw output. Mask every discovered
+credential value per your secret-handling rules — file:line plus a 2–4
+character masked preview, never the value itself."

 ## Triage

@@ -29,26 +59,50 @@ Write `analysis/$1/SECURITY_FINDINGS.md`:
 - Findings table sorted by severity
 - Dependency CVE table (package, installed version, CVE, fixed version)

+If any hardcoded credentials were found, also write
+`analysis/$1/SECRETS.local.md` (the gitignored quarantine file from Step 0):
+one row per credential — masked preview, `file:line`, credential type, what
+it appears to grant access to, production/test guess, and a rotation
+recommendation. With `--show-secrets`, append the raw value column here —
+this file only. SECURITY_FINDINGS.md gets a one-line pointer:
+"N hardcoded credentials found — inventory in SECRETS.local.md (gitignored;
+not for sharing)."
+
 ## Remediate

 For each **Critical** and **High** finding, draft a minimal, targeted fix.
-Do **not** edit `legacy/` — write all fixes as a single unified diff to
-`analysis/$1/security_remediation.patch`, with a comment line above each
-hunk citing the finding ID it addresses (`# SEC-001: parameterize the query`).
+Do **not** edit `legacy/` — write fixes as unified diffs with **paths
+relative to the project root** (`legacy/$1/...`), applied from the project
+root, with a comment line above each hunk citing the finding ID it
+addresses (`# SEC-001: parameterize the query`).
+
+**Credential findings split into two files.** A diff that removes a
+hardcoded secret necessarily contains the raw value on its `-` and
+context lines — that cannot go in the shareable patch:
+
+- `analysis/$1/security_remediation.patch` (shareable) — every
+  non-credential hunk, plus for each credential finding a comment-only
+  placeholder: `# SEC-NNN: credential remediation — hunk in
+  security_remediation.local.patch (gitignored; not for sharing)`.
+- `analysis/$1/security_remediation.local.patch` (gitignored in Step 0) —
+  the real, applyable hunks for credential findings only.

 Add a **Remediation Log** section to SECURITY_FINDINGS.md mapping each
-finding ID → one-line summary of the proposed fix and the patch hunk that
-implements it.
+finding ID → one-line summary of the proposed fix and which patch file
+carries the hunk.

 ## Verify

-Spawn the **security-auditor** again to **review the patch** against the
-original code:
+Spawn the **security-auditor** again to **review both patches** against
+the original code:

-"Review analysis/$1/security_remediation.patch against legacy/$1. For each
+"Review analysis/$1/security_remediation.patch and
+analysis/$1/security_remediation.local.patch against legacy/$1. For each
 hunk: does it fully remediate the cited finding? Does it introduce new
-vulnerabilities or change behavior beyond the fix? Return one verdict per
-hunk: RESOLVES / PARTIAL / INTRODUCES-RISK, with a one-line reason."
+vulnerabilities or change behavior beyond the fix? Confirm no raw
+credential values appear anywhere in the shareable patch. Return one
+verdict per hunk: RESOLVES / PARTIAL / INTRODUCES-RISK, with a one-line
+reason."

 Add a **Patch Review** section to SECURITY_FINDINGS.md with the verdicts.
 If any hunk is PARTIAL or INTRODUCES-RISK, revise the patch and re-review.
@@ -57,8 +111,12 @@ If any hunk is PARTIAL or INTRODUCES-RISK, revise the patch and re-review.

 Tell the user the artifacts are ready:
 - `analysis/$1/SECURITY_FINDINGS.md` — findings, remediation log, patch review
- `analysis/$1/security_remediation.patch` — review, then apply if appropriate
-  with `git -C legacy/$1 apply ../../analysis/$1/security_remediation.patch`
+- `analysis/$1/security_remediation.patch` — review, then apply **from the
+  project root**: `git apply analysis/$1/security_remediation.patch`
+  (if `legacy/$1` is a symlink, use `git apply --unsafe-paths` or apply
+  with `patch -p0` from the project root)
+- `analysis/$1/security_remediation.local.patch` — the credential fixes;
+  apply the same way, and rotate the affected credentials regardless
 - Re-run `/modernize-harden $1` after applying to confirm resolution

 Suggest: `glow -p analysis/$1/SECURITY_FINDINGS.md`
--- a/plugins/code-modernization/commands/modernize-map.md
+++ b/plugins/code-modernization/commands/modernize-map.md
@@ -55,50 +55,124 @@ re-run and audited. Have it write a machine-readable
 `analysis/$1/topology.json` and print a human summary. Run it; show the
 summary (cap at ~200 lines for very large estates).

-## Render
+`topology.json` must follow this schema — it feeds the interactive viewer:

-From the extracted data, generate **three Mermaid diagrams** and write them
-to `analysis/$1/TOPOLOGY.html` as a self-contained page that renders in any
-browser.
-
-The HTML page must use: dark `#1e1e1e` background, `#d4d4d4` text,
-`#cc785c` for `<h2>`/accents, `system-ui` font, all CSS **inline** (no
-external stylesheets). Load Mermaid from a CDN in `<head>`:
-
-```html
-<script type="module">
-  import mermaid from 'https://cdn.jsdelivr.net/npm/mermaid@11/dist/mermaid.esm.min.mjs';
-  mermaid.initialize({ startOnLoad: true, theme: 'dark' });
-</script>
+```json
+{
+  "system": "<display name>",
+  "root": {
+    "id": "sys", "name": "<system>", "kind": "system",
+    "children": [
+      { "id": "dom:<domain>", "name": "<Domain>", "kind": "domain",
+        "children": [
+          { "id": "<MODULE>", "name": "<MODULE>", "kind": "module",
+            "language": "cobol", "loc": 1234, "file": "src/MODULE.cbl" }
+        ] },
+      { "id": "dom:data", "name": "Data stores", "kind": "domain",
+        "children": [
+          { "id": "ds:<NAME>", "name": "<NAME>", "kind": "datastore" }
+        ] }
+    ]
+  },
+  "edges": [
+    { "source": "<id>", "target": "<id>", "kind": "call" }
+  ],
+  "entryPoints": ["<id>", "..."],
+  "deadEnds": ["<id>", "..."],
+  "observations": ["<architect observation>", "..."],
+  "flows": [
+    { "name": "<business flow>", "persona": "<who experiences it>",
+      "description": "<one sentence, plain language>",
+      "steps": [
+        { "label": "<business-language step>", "nodes": ["<id>", "<id>"] }
+      ] }
+  ]
+}
 ```

-Each diagram goes in a `<pre class="mermaid">...</pre>` block. Do **not**
-wrap diagrams in markdown ` ``` ` fences inside the HTML.
+- Group leaf modules under `domain` containers (use the domains from
+  `/modernize-assess` if available). Leaf kinds: `module`, `datastore`,
+  `job`, `screen`. `loc` drives circle size — include it for modules.
+- Edge kinds: `call` (direct), `dispatch` (dynamic/router), `read`,
+  `write`. Every edge endpoint must be a leaf id that exists in the tree.
+- `deadEnds`: the dead-end candidates from the extraction, rendered with
+  a dashed outline in the viewer. Apply the suppression rules above —
+  anything that could be the target of an unresolved dynamic call does
+  NOT belong here; record that uncertainty in `observations` instead.
+- **Datastore ids and names must be logical identifiers** — DD name,
+  dataset name, table/schema name, at most host:port. If the resolved
+  config value is a URL or DSN, strip userinfo and credential query
+  params before it goes anywhere in topology.json: the file gets
+  committed and the viewer displays names verbatim. Never copy raw
+  config values into `observations`.
+- `observations`: 3–7 architect observations — tight coupling clusters,
+  single points of failure, service-extraction candidates, data stores
+  with too many writers, dispatch targets the extraction could not
+  resolve.
+- `flows` is the **persona walkthrough** section — see below.

-1. **`graph TD` — Module call graph.** Cluster by domain (use `subgraph`).
-   Highlight entry points in a distinct style. Cap at ~40 nodes — if larger,
-   show domain-level with one expanded domain.
+## Persona flows

-2. **`graph LR` — Data lineage.** Programs → data stores.
-   Mark read vs write edges.
+Trace **2–4 end-to-end business flows**, each anchored to a persona —
+the people who experience the system, not the people who maintain it
+(e.g. for a benefits system: the claimant, the caseworker, the auditor;
+for billing: the customer, the billing operator). For each flow:

-3. **`flowchart TD` — Critical path.** Trace ONE end-to-end business flow
-   (e.g., "monthly billing run" or "process payment") through every program
-   and data store it touches, in execution order. If production telemetry is
-   available (see `/modernize-assess` Step 4), annotate each step with its
-   p50/p99 wall-clock.
+- `name` + one-sentence `description` in plain business language —
+  something a steering committee member relates to ("a claimant files a
+  weekly claim"), not a data-flow label ("CLM batch ingest").
+- `steps`: 3–8 steps, each with a business-language `label` and the
+  `nodes` (programs + data stores) that implement that step, in
+  execution order.

-Also export the three diagrams as standalone `.mmd` files for re-use:
-`analysis/$1/call-graph.mmd`, `analysis/$1/data-lineage.mmd`,
-`analysis/$1/critical-path.mmd`.
+This is the bridge between the technical map and non-technical
+stakeholders: the same diagram answers "which program does X" for
+engineers and "what happens when someone files a claim" for everyone else.

-## Annotate
+## Render

-Below each `<pre class="mermaid">` block in TOPOLOGY.html, add a `<ul>`
-with 3-5 **architect observations**: tight coupling clusters, single
-points of failure, candidates for service extraction, data stores
-touched by too many writers.
+`analysis/$1/TOPOLOGY.html` is an **interactive map**: a zoomable
+circle-pack of the whole system (domains as containers, modules sized by
+LOC) with dependency edges, search, per-node detail sidebar, edge-kind
+toggles, and a flow-walkthrough mode that plays each persona flow as a
+numbered path. Build it from the template that ships with this plugin —
+do not hand-write the viewer:
+
+```bash
+python3 - "${CLAUDE_PLUGIN_ROOT}/assets/topology-viewer.html" analysis/$1 <<'EOF'
+import json, sys
+tpl_path, out_dir = sys.argv[1], sys.argv[2]
+tpl = open(tpl_path).read()
+marker = "/*__TOPOLOGY_DATA__*/ null"
+assert marker in tpl, f"injection marker not found in {tpl_path}"
+data = json.dumps(json.load(open(f"{out_dir}/topology.json")))
+open(f"{out_dir}/TOPOLOGY.html", "w").write(
+    tpl.replace(marker, "/*__TOPOLOGY_DATA__*/ " + data))
+print(f"wrote {out_dir}/TOPOLOGY.html")
+EOF
+```
+
+The viewer is fully self-contained (the d3 subset it needs is inlined in
+the template) — it works offline and on air-gapped networks. If the
+`python3` invocation fails to find the template,
+`${CLAUDE_PLUGIN_ROOT}` was not substituted — report that rather than
+hand-writing a viewer.
+
+Mermaid stays for **small, exportable** diagrams. Generate standalone
+`.mmd` files for reuse in docs and PRs — but keep each under ~40 edges;
+collapse to domain level if the full graph is bigger (dense Mermaid
+becomes unreadable, which is exactly what the interactive map is for):
+
+- `analysis/$1/call-graph.mmd` — domain-level `graph TD`, entry points
+  highlighted
+- `analysis/$1/data-lineage.mmd` — `graph LR`, programs → data stores,
+  read vs write marked
+- `analysis/$1/critical-path.mmd` — `flowchart TD` of the primary flow
+  from `flows`, annotated with p50/p99 wall-clock if telemetry is
+  available (see `/modernize-assess` Step 4)

 ## Present

-Tell the user to open `analysis/$1/TOPOLOGY.html` in a browser.
+Tell the user to open `analysis/$1/TOPOLOGY.html` in a browser, and to
+try: search for a module, click it to see its connections, and pick a
+persona flow from the walkthrough dropdown.
--- a/plugins/code-modernization/commands/modernize-preflight.md
+++ b/plugins/code-modernization/commands/modernize-preflight.md
@@ -0,0 +1,98 @@
+---
+description: Environment readiness check — analysis tools, build toolchain, source completeness, telemetry access
+argument-hint: <system-dir> [target-stack]
+---
+
+Check whether this environment is ready to analyze — and eventually
+transform — `legacy/$1`, and tell the user exactly what to fix before the
+other commands run into it. Modernization sessions fail late and
+confusingly when this isn't done: assessment metrics silently degrade
+without analysis tools, characterization tests can't run without a build
+toolchain, and dependency maps come out wrong when half the source isn't
+in the tree.
+
+Run every check even when an early one fails — the point is one complete
+readiness report, not the first error.
+
+## Check 1 — Detect the stack
+
+Fingerprint `legacy/$1` from file extensions and manifests: languages,
+build system, deployment/config descriptors. This drives which checks
+below apply. Report what was detected and the rough file split.
+
+## Check 2 — Analysis tooling
+
+For each, check availability (`command -v`) and report version, what it's
+used for, and what degrades without it:
+
+| Tool | Used by | Without it |
+|---|---|---|
+| `scc` (or `cloc`) | assess | LOC/complexity fall back to `find`+`wc`; COCOMO estimate gets coarser |
+| `lizard` | assess --portfolio | complexity estimated from decision-keyword counts |
+| `glow` | all | markdown artifacts render as plain text |
+| `delta` | transform | side-by-side diffs fall back to `diff -y` |
+
+Include the platform's install one-liner for anything missing
+(`brew install scc`, `apt install cloc`, `pip install lizard`, …).
+
+## Check 3 — Build toolchain (smoke test, not just presence)
+
+Identify the compiler/interpreter for the detected legacy stack — e.g.
+GnuCOBOL (`cobc`) for COBOL, JDK + Maven/Gradle for Java, `cc`/`make` for
+C, `dotnet` for .NET. Then **prove it works on this codebase**: pick one
+representative source file and run a syntax-only compile
+(`cobc -fsyntax-only`, `javac`, `gcc -fsyntax-only`, …).
+
+A failed smoke test is the most valuable output of this command — report
+the actual error and diagnose it: missing copybook/include path, missing
+dialect flag (`-std=ibm` etc.), fixed vs free format, missing dependency
+jar. These are the errors that otherwise surface mid-`/modernize-transform`
+with much less context.
+
+If the user passed a `[target-stack]`, do the same for it: runtime,
+package manager, test framework (`mvn -v`, `npm -v`, `pytest --version`, …).
+
+## Check 4 — Source completeness
+
+The dependency map is only as good as what's in the tree. Check for the
+detected stack's equivalents of:
+
+- **Referenced-but-missing includes** — copybooks (`COPY X` with no
+  `X.cpy`), headers, imports that resolve nowhere. Count and list the top
+  missing names.
+- **Deployment/config descriptors** — JCL for batch COBOL, CICS CSD
+  definitions, `web.xml`/route configs, cron/scheduler definitions.
+  Without these, entry-point detection and the code↔storage join in
+  `/modernize-map` are guesswork.
+- **Data definitions** — DDL, schemas, copybook record layouts, ORM
+  mappings.
+- **Binary-only artifacts** — load modules, jars, DLLs with no matching
+  source. These become unmappable black boxes; flag them now.
+
+## Check 5 — Optional context
+
+- **Production telemetry** — is an observability/APM MCP server connected,
+  or are batch job logs / runtime exports available? (Enables the runtime
+  overlay in `/modernize-assess` Step 4 and timing annotations in
+  `/modernize-map`.)
+- **Version control history** — is `legacy/$1` under git with meaningful
+  history? (Change-frequency data sharpens risk ranking.)
+
+## Report
+
+Write `analysis/$1/PREFLIGHT.md`: a status table — one row per check,
+status ✅ / ⚠️ / ❌, what was found, and the fix for anything not green —
+followed by a **Ready / Ready-with-gaps / Not ready** verdict per command:
+
+- `assess` + `map` + `extract-rules` — need Checks 1–2 green-ish and
+  Check 4's missing-include count low
+- `brief` — needs only the three discovery artifacts; no tooling
+- `transform` + `reimagine` — additionally need Check 3 green for the
+  **target** stack. A red legacy toolchain downgrades these to
+  Ready-with-gaps, not Not-ready: equivalence testing falls back to
+  recorded traces / golden-master fixtures instead of dual execution
+  (common and expected for CICS/IMS code that has no local runtime)
+- `harden` — needs Check 2 plus any stack-specific SAST tooling found
+
+Print the table in the session too, and end with the single most
+important fix if anything is red.
--- a/plugins/code-modernization/commands/modernize-reimagine.md
+++ b/plugins/code-modernization/commands/modernize-reimagine.md
@@ -3,7 +3,11 @@ description: Multi-agent greenfield rebuild — extract specs from legacy, desig
 argument-hint: <system-dir> <target-vision>
 ---

-**Reimagine** `legacy/$1` as: $2
+The first token of `$ARGUMENTS` is the system dir (`$1`); **everything
+after it is the target vision** — it is usually multiple words, so do not
+truncate it to one token. Below, `<vision>` means that full remainder.
+
+**Reimagine** `legacy/$1` as: <vision>

 This is not a port — it's a rebuild from extracted intent. The legacy system
 becomes the *specification source*, not the structural template. This command
@@ -19,7 +23,8 @@ Spawn concurrently and show the user that all three are running:
 2. **legacy-analyst** — "Catalog every external interface of legacy/$1:
   inbound (screens, APIs, batch triggers, queues) and outbound (reports,
   files, downstream calls, DB writes). For each: name, direction, payload
-   shape, frequency/SLA if discernible."
+   shape, frequency/SLA if discernible. Mask any credential embedded in
+   endpoints or payload examples per your secret-handling rules."

 3. **legacy-analyst** — "Identify the core domain entities in legacy/$1 and
   their relationships. Return as an entity list + Mermaid erDiagram."
@@ -32,6 +37,9 @@ Collect results. Write `analysis/$1/AI_NATIVE_SPEC.md` containing:
 - **Non-functional requirements** inferred from legacy (batch windows, volumes)
 - **Behavior Contract** (the Given/When/Then rules — these are the acceptance tests)

+Credential values are masked everywhere in the spec; connection details
+appear as env-var placeholders (`${DATABASE_URL}`), never literals.
+
 ## Phase B — HITL checkpoint #1

 Present the spec summary. Ask the user **one focused question**: "Which of
@@ -40,20 +48,21 @@ should deliberately drop?" Wait for the answer. Record it in the spec.

 ## Phase C — Architecture (single agent, then critique)

-Design the target architecture for "$2":
+Design the target architecture for "<vision>":
 - Mermaid C4 Container diagram
 - Service boundaries with rationale (which rules/entities live where)
 - Technology choices with one-line justification each
 - Data migration approach from legacy stores

 Then spawn **architecture-critic**: "Review this proposed architecture for
-$2 against the spec in analysis/$1/AI_NATIVE_SPEC.md. Identify over-engineering,
+<vision> against the spec in analysis/$1/AI_NATIVE_SPEC.md. Identify over-engineering,
 missed requirements, scaling risks, and simpler alternatives." Incorporate
 the critique. Write the result to `analysis/$1/REIMAGINED_ARCHITECTURE.md`.

 ## Phase D — HITL checkpoint #2

-Enter plan mode. Present the architecture. Wait for approval.
+Present the architecture and **stop — scaffold nothing until the user
+explicitly approves** (use plan mode if the session supports it).

 ## Phase E — Parallel scaffolding

@@ -65,7 +74,9 @@ in parallel**:
 and AI_NATIVE_SPEC.md. Create: project skeleton, domain model, API stubs
 matching the interface contracts, and **executable acceptance tests** for every
 behavior-contract rule assigned to this service (mark unimplemented ones as
-expected-failure/skip with the rule ID). Write to modernized/$1-reimagined/<service-name>/."
+expected-failure/skip with the rule ID). No credential literal from legacy
+code becomes a test fixture or config default — use fake same-shape values
+and env-var placeholders. Write to modernized/$1-reimagined/<service-name>/."

 Show the agents' progress. When all complete, run the acceptance test suites
 and report: total tests, passing (scaffolded behavior), pending (rule IDs
@@ -77,7 +88,9 @@ Write `modernized/$1-reimagined/CLAUDE.md` — the persistent context file for
 the new system, containing: architecture summary, service responsibilities,
 where the spec lives, how to run tests, and the legacy→modern traceability
 map. This file IS the knowledge graph that future agents and engineers will
-load.
+load — and it gets committed: connection details and credentials appear
+only as env-var names with a pointer to where they're provisioned, never
+as values.

 Report: services scaffolded, acceptance tests defined, % behaviors with a
 home, location of all artifacts.
--- a/plugins/code-modernization/commands/modernize-status.md
+++ b/plugins/code-modernization/commands/modernize-status.md
@@ -0,0 +1,54 @@
+---
+description: Where am I in the modernization workflow — artifact inventory, staleness, secrets hygiene, next step
+argument-hint: <system-dir>
+---
+
+Report where the modernization of `$1` stands, in one screen. This is a
+read-only command — inspect, never modify.
+
+## 1 — Artifact inventory
+
+Check `analysis/$1/` and `modernized/$1*/` and build a table — one row per
+workflow stage, with the artifact's presence and modification time:
+
+| Stage | Artifacts |
+|---|---|
+| preflight | `PREFLIGHT.md` |
+| assess | `ASSESSMENT.md`, `ARCHITECTURE.mmd` |
+| map | `topology.json`, `TOPOLOGY.html`, `*.mmd`, `extract_topology.*` |
+| extract-rules | `BUSINESS_RULES.md`, `DATA_OBJECTS.md` |
+| brief | `MODERNIZATION_BRIEF.md` (note whether the approval block is signed) |
+| harden | `SECURITY_FINDINGS.md`, `security_remediation.patch` |
+| transform / reimagine | each `modernized/$1*/<module>/` dir — note test presence and whether `TRANSFORMATION_NOTES.md` exists |
+
+## 2 — Staleness
+
+Flag any artifact older than an upstream artifact it derives from:
+
+- `MODERNIZATION_BRIEF.md` older than `ASSESSMENT.md`, `topology.json`,
+  or `BUSINESS_RULES.md` → the brief no longer reflects discovery;
+  recommend re-running `/modernize-brief`.
+- `TOPOLOGY.html` older than `topology.json` → re-run the injection step
+  from `/modernize-map`.
+- Any `TRANSFORMATION_NOTES.md` older than `BUSINESS_RULES.md` → the
+  module may not implement the latest rule set; list which.
+
+## 3 — Secrets hygiene
+
+- Does `analysis/.gitignore` exist and cover `SECRETS.local.md` /
+  `*.local.patch`? (`git check-ignore` when in a git repo.)
+- If `SECRETS.local.md` exists: confirm it is NOT tracked
+  (`git ls-files --error-unmatch`, expect failure) and has never been
+  committed (`git log --all --oneline -- <path>`, expect empty). If
+  either check fails, say so prominently and recommend rotation plus
+  history scrubbing.
+
+## 4 — Verdict
+
+End with three lines:
+- **Where you are** — the furthest completed stage and roughly how much
+  of the system it covers (e.g. "mapped 100%, 2 of 14 modules
+  transformed").
+- **What's stale** — or "nothing".
+- **Next command** — the single most useful next step, with a one-line
+  reason.
--- a/plugins/code-modernization/commands/modernize-transform.md
+++ b/plugins/code-modernization/commands/modernize-transform.md
@@ -9,10 +9,37 @@ equivalence.
 This is a surgical, single-module transformation — one vertical slice of the
 strangler fig. Output goes to `modernized/$1/$2/`.

-## Step 0 — Plan (HITL gate)
+## Step 0a — Toolchain check (fail fast on target, adapt on legacy)
+
+Verify the build environment **before** planning, not when the tests
+first run:
+
+- **Target stack ($3) — required.** Runtime, package manager, and test
+  framework all respond (`java -version` + `mvn -v`, `node -v` + `npm -v`,
+  `python3 -V` + `pytest --version`, …). If any are missing, stop and
+  report what to install — the new code and its tests cannot run without
+  them, so a plan gate now would just defer the failure an hour. Suggest
+  `/modernize-preflight $1 $3` for the full readiness report.
+- **Legacy stack — advisory, never a blocker.** Try a syntax-only compile
+  of the module being transformed (e.g. `cobc -fsyntax-only`). Legacy
+  code often *cannot* build locally by nature, not by misconfiguration —
+  CICS/IMS programs have no local translator, and the real runtime may be
+  a mainframe you don't have. A failed or impossible legacy compile does
+  **not** stop the transform; it changes the equivalence strategy:
+  - dual-execution proof is off the table — characterization tests
+    assert against **recorded traces / golden-master fixtures** (real
+    production outputs, captured reports/screens, SME-confirmed
+    examples) instead of live legacy runs
+  - say so explicitly in the Step 0b plan and later in
+    TRANSFORMATION_NOTES.md ("equivalence is trace-based; legacy was not
+    executable in this environment"), so reviewers know the strength of
+    the proof they're approving
+
+## Step 0b — Plan (HITL gate)

 Read the source module and any business rules in `analysis/$1/BUSINESS_RULES.md`
-that reference it. Then **enter plan mode** and present:
+that reference it. Then present the plan and **stop — write no code until
+the user explicitly approves** (use plan mode if the session supports it):
 - Which source files are in scope
 - The target module structure (packages/classes/files you'll create)
 - Which business rules / behaviors this module implements
@@ -30,7 +57,9 @@ identify every observable behavior, and encode each as a test case with
 concrete input → expected output pairs derived from the legacy logic.
 Target framework: <appropriate for $3>. Write to
 `modernized/$1/$2/src/test/`. These tests define 'done' — the new code
-must pass all of them."
+must pass all of them. Follow your secret-handling rules: no credential
+literal from legacy code becomes a fixture; substitute fake same-shape
+values and read anything genuinely live from environment variables."

 Show the user the test file. Get a 👍 before proceeding.

@@ -68,6 +97,10 @@ Then show a visual diff of one representative behavior, legacy vs modern:
 ```bash
 delta --side-by-side <(sed -n '<lines>p' legacy/$1/<file>) modernized/$1/$2/src/main/<file>
 ```
+(Fall back to `diff -y --width=160` if `delta` isn't installed.) Never
+pick a credential-bearing line range for this diff, and mask any
+credential-like literal quoted in TRANSFORMATION_NOTES.md — the notes
+live in `modernized/` and get committed.

 ## Step 5 — Architecture review

--- a/plugins/security-guidance/.claude-plugin/plugin.json
+++ b/plugins/security-guidance/.claude-plugin/plugin.json
@@ -1,6 +1,6 @@
 {
  "name": "security-guidance",
-  "version": "2.0.0",
+  "version": "2.0.3",
  "description": "Security review for Claude-generated code. Pattern-based warnings on edits, LLM-powered diff review on Stop, and an agentic commit reviewer that catches injection, XSS, SSRF, hardcoded secrets, and 25+ other vulnerability classes.",
  "author": {
    "name": "David Dworken",
--- a/plugins/security-guidance/hooks/_base.py
+++ b/plugins/security-guidance/hooks/_base.py
@@ -116,7 +116,18 @@ _PV = _read_plugin_version_int()
 # Emitted via _usage_metrics() into the existing emit_metrics() channel so
 # hook metrics rows carry per-invocation token/cost totals
 # alongside the existing skip_reason / vulns_found fields.
-_USAGE = {"in": 0, "out": 0, "cr": 0, "cw": 0, "cost": 0.0, "n": 0}
+_USAGE = {
+    "in": 0, "out": 0, "cr": 0, "cw": 0, "cost": 0.0, "n": 0,
+    # HTTP error visibility (#2098 visibility gap — see emit comment in
+    # _usage_metrics). Without this, API failures from `_call_claude` left
+    # zero fingerprint in telemetry: the call returns None, the caller's
+    # emit_metrics carries no api_calls field, and the failure is
+    # indistinguishable from "no review needed". The deprecation outage
+    # that broke every commit-review LLM call was invisible until users
+    # reported it manually.
+    "http_err_last": 0,    # most recent HTTP error code this invocation
+    "http_err_count": 0,   # total HTTP errors (4xx + 5xx + network)
+}
 _USAGE_LOCK = threading.Lock()

 # $/Mtok (input, output). Used only for the raw-HTTP path; the SDK path
@@ -166,19 +177,55 @@ def _record_usage(usage, model, cost_usd=None):
        _USAGE["n"] += 1


+def _record_http_error(status):
+    """Record an HTTP error from an LLM API call. `status` is the HTTP
+    status code (integer 400–599) or -1 for network/timeout errors. Stored
+    in `_USAGE["http_err_last"]` (most recent) and counted in
+    `_USAGE["http_err_count"]`. Snapshot via `_usage_metrics()` so every
+    subsequent `emit_metrics` includes the failure fingerprint.
+
+    Background: without this, the most recent example was the #2098
+    deprecation 400. Every hook fire's LLM call returned HTTP 400; the
+    plugin caught it and returned None; the emit_metrics carried no
+    api_calls field; aggregate dashboards looked normal. The failure
+    only became visible when a user manually reported errors out of
+    their debug log. With this field, a category-of-failure spike (4xx,
+    5xx, or -1 network) is queryable from BQ in real time.
+    """
+    try:
+        s = int(status)
+    except (TypeError, ValueError):
+        return
+    with _USAGE_LOCK:
+        _USAGE["http_err_last"] = s
+        _USAGE["http_err_count"] += 1
+
+
 def _usage_metrics():
    """Snapshot the accumulator as metric keys. Returns {} when no API calls
-    were made so skip-path emits don't burn key budget. cost_usd rounded to
-    1e-6 to keep the float finite/short for the zod schema."""
-    with _USAGE_LOCK:
-        if _USAGE["n"] == 0:
-            return {}
-        return {
-            "tok_in": _USAGE["in"],
-            "tok_out": _USAGE["out"],
-            "tok_cache_r": _USAGE["cr"],
-            "tok_cache_w": _USAGE["cw"],
-            "cost_usd": round(_USAGE["cost"], 6),
-            "api_calls": _USAGE["n"],
-        }
+    AND no HTTP errors were made so skip-path emits don't burn key budget.
+    cost_usd rounded to 1e-6 to keep the float finite/short for the zod
+    schema.
+
+    HTTP errors (`http_err_last`, `http_err_count`) emitted ONLY when
+    `http_err_count > 0` so successful calls don't pad every metrics row
+    with two zero fields.
+    """
+    with _USAGE_LOCK:
+        if _USAGE["n"] == 0 and _USAGE["http_err_count"] == 0:
+            return {}
+        out = {}
+        if _USAGE["n"] > 0:
+            out.update({
+                "tok_in": _USAGE["in"],
+                "tok_out": _USAGE["out"],
+                "tok_cache_r": _USAGE["cr"],
+                "tok_cache_w": _USAGE["cw"],
+                "cost_usd": round(_USAGE["cost"], 6),
+                "api_calls": _USAGE["n"],
+            })
+        if _USAGE["http_err_count"] > 0:
+            out["http_err_last"] = _USAGE["http_err_last"]
+            out["http_err_count"] = _USAGE["http_err_count"]
+        return out

--- a/plugins/security-guidance/hooks/diffstate.py
+++ b/plugins/security-guidance/hooks/diffstate.py
@@ -355,9 +355,9 @@ def _list_untracked(cwd):
    the holdouts."""
    try:
        repo = _git_toplevel(cwd) or cwd
+        # core.quotePath=false comes from GIT_CMD globally (see gitutil.py).
        r = subprocess.run(
-            [*GIT_CMD, "-c", "core.quotePath=false", "ls-files",
-             "--others", "--exclude-standard", "-z"],
+            [*GIT_CMD, "ls-files", "--others", "--exclude-standard", "-z"],
            cwd=repo, capture_output=True, timeout=15,
        )
        if r.returncode != 0:
--- a/plugins/security-guidance/hooks/ensure_agent_sdk.py
+++ b/plugins/security-guidance/hooks/ensure_agent_sdk.py
@@ -42,6 +42,122 @@ HOOK_PY_INCOMPATIBLE = 6  # hook interpreter is <3.10 — SDK syntax can't load
                          # here no matter how the venv was built. See #2071.


+# Phase + err-kind integer encoding for sdk_bootstrap_phase / sdk_bootstrap_err.
+#
+# Earlier versions emitted these as STRINGS (e.g. "pip", "dns_fail"). CC's
+# plugin-metrics pipeline silently drops plugin-emitted string values —
+# only `bool|finite-number` plugin metrics reach BigQuery. (CC-core
+# metrics like `subscription_type` are exempt because they're injected
+# downstream of plugin validation.) Confirmed empirically: 185K
+# BUILD_FAILED rows in BQ had `sdk_bootstrap_phase`/`sdk_bootstrap_err`
+# = NULL despite the Python code emitting them. This left ~28K
+# BUILD_FAILED sessions/day with no diagnostic split — flying blind on
+# the real failure modes (pip-no-match vs dns-fail vs ssl-verify etc.).
+#
+# Fix: encode as small integers per the maps below. Values are
+# APPEND-ONLY for telemetry stability. Reserve 99 as the "unknown /
+# uncategorized" bucket so an unmapped err_kind (e.g., a new exception
+# type) still emits a non-zero signal.
+SDK_BOOTSTRAP_PHASE_CODES = {
+    "pre":  1,  # pre-venv (state_dir.mkdir, sentinel open)
+    "venv": 2,  # python -m venv --clear
+    "pip":  3,  # pip install
+    "main": 4,  # uncaught exception above main()
+}
+SDK_BOOTSTRAP_ERR_CODES = {
+    "pip_no_match":         1,
+    "dns_fail":             2,
+    "conn_refused":         3,
+    "ssl_verify":           4,
+    "perm_denied":          5,
+    "no_pip":               6,
+    "disk_full":            7,
+    "proxy_auth":           8,
+    "stderr_timeout":       9,   # pip stderr containing "timeout"/"timed out"
+    "subprocess_timeout":   10,  # subprocess.TimeoutExpired (>120s)
+    # Venv-stage specific categories added after PR #2112 telemetry surfaced
+    # 2,406 phase=2/err=99 sessions in the first 3h of v2.0.1 — venv phase
+    # failing in ways the original pip-flavored patterns didn't catch. These
+    # all split out of what was previously collapsing to _uncategorized.
+    "venv_ensurepip_fail":  11,  # Debian/Ubuntu missing python3-venv;
+                                 # stderr mentions ensurepip non-zero exit
+                                 # or "ensurepip is not available"
+    "venv_path_too_long":   12,  # Windows MAX_PATH (260) or POSIX
+                                 # ENAMETOOLONG — venv writes deep paths
+                                 # under state_dir/agent-sdk-venv/Lib/...
+    "venv_no_module":       13,  # `python3 -m venv` itself missing — "No
+                                 # module named 'venv'" / "No module named venv"
+    "venv_already_exists":  14,  # Errno 17 / "file exists" — sentinel race
+                                 # past O_EXCL or stale dir survived --clear
+    "venv_setup_failed":    15,  # Generic "virtual environment was not
+                                 # created successfully" — catches the long
+                                 # tail of venv setup failures that don't
+                                 # match a more specific category above
+    # 16–98 reserved for future categories; APPEND-ONLY.
+    # 99 catches everything else (including "exc:<TypeName>" and "other:<tail>"
+    # — the original string is debug-loggable but the integer is what makes
+    # it to telemetry). For the "other:" tail, `sdk_bootstrap_stderr_sig`
+    # carries a bounded integer hash so we can still distinguish patterns
+    # in BQ aggregation.
+    "_uncategorized":       99,
+}
+
+
+def _encode_phase(s):
+    """Map err_phase string to its telemetry integer code, or 0 if unset.
+    Empty/None → 0 lets `if encoded:` cleanly skip emission. Per
+    SDK_BOOTSTRAP_PHASE_CODES, valid codes are 1-4."""
+    return SDK_BOOTSTRAP_PHASE_CODES.get((s or "").strip(), 0)
+
+
+def _encode_err_kind(s):
+    """Map err_kind string to its telemetry integer code, or 0 if unset.
+    Direct hits use the static map; "exc:<X>" and "other:<tail>" both
+    collapse to _uncategorized (99) — the raw string survives in debug
+    logs, only the integer reaches BQ."""
+    s = (s or "").strip()
+    if not s:
+        return 0
+    if s in SDK_BOOTSTRAP_ERR_CODES:
+        return SDK_BOOTSTRAP_ERR_CODES[s]
+    # Prefix matches for the catch-all categories
+    if s.startswith("exc:") or s.startswith("other:") or s == "other":
+        return SDK_BOOTSTRAP_ERR_CODES["_uncategorized"]
+    # Unknown string — still emit as uncategorized rather than dropping
+    return SDK_BOOTSTRAP_ERR_CODES["_uncategorized"]
+
+
+def _encode_stderr_sig(err_kind):
+    """Bounded integer hash of the stderr tail captured in "other:<tail>"
+    err_kinds. Lets us distinguish patterns INSIDE the _uncategorized
+    (code 99) bucket without unbounded cardinality.
+
+    Returns 0 for non-"other:" err_kinds (so the field auto-omits from
+    emit_metrics on categorized failures — see the emit block in main()).
+
+    Strategy: take the tail's first ~30 chars (post-lowercase, post-trim),
+    SHA-1, fold the first 2 bytes to 0–999. Different stderr messages
+    cluster into different buckets; same stderr always maps to the same
+    bucket. Cardinality is bounded at 1000, well below any "high
+    cardinality" alarm — and a real failure mode typically produces
+    near-identical stderr across thousands of machines, so 1000 buckets
+    is comfortably wide.
+
+    Why first ~30 chars: stderr like "ERROR: Command failed: <full
+    path>" varies the tail wildly (paths) but the categorization signal
+    is in the leading words. Dropping the suffix focuses the hash on
+    the discriminative part.
+    """
+    if not err_kind or not err_kind.startswith("other:"):
+        return 0
+    import hashlib
+    tail = err_kind[len("other:"):].strip().lower()[:30]
+    if not tail:
+        return 0
+    h = hashlib.sha1(tail.encode("utf-8", errors="replace")).digest()
+    return int.from_bytes(h[:2], "big") % 1000
+
+
 def _sdk_on_syspath() -> bool:
    # find_spec is ~10ms; actually importing the SDK pulls in
    # transitive deps and costs ~800ms — too heavy for a
@@ -180,7 +296,34 @@ def main() -> tuple[int, str, str]:
        else:
            stderr_str = str(stderr_b)
        s = stderr_str.lower()
-        if "no matching distribution" in s or "could not find a version" in s:
+        # Venv-specific patterns checked FIRST — they overlap with some pip
+        # patterns (e.g. "no module named ensurepip" could match no_pip OR
+        # venv_ensurepip_fail; the venv-stage interpretation is the right
+        # one when err_phase=="venv"). Order is venv-most-specific →
+        # pip-historical → generic.
+        if err_phase == "venv" and (
+            "ensurepip is not available" in s
+            or ("ensurepip" in s and "returned non-zero" in s)
+            or "the virtual environment was not created" in s and "ensurepip" in s
+        ):
+            err_kind = "venv_ensurepip_fail"
+        elif err_phase == "venv" and (
+            "[errno 36]" in s
+            or "file name too long" in s
+            or "path too long" in s
+        ):
+            err_kind = "venv_path_too_long"
+        elif err_phase == "venv" and (
+            "no module named venv" in s
+            or "no module named 'venv'" in s
+        ):
+            err_kind = "venv_no_module"
+        elif err_phase == "venv" and (
+            "[errno 17]" in s
+            or ("file exists" in s and "venv" in s)
+        ):
+            err_kind = "venv_already_exists"
+        elif "no matching distribution" in s or "could not find a version" in s:
            err_kind = "pip_no_match"
        elif "name or service not known" in s or "name resolution" in s \
                or "nodename nor servname" in s or "temporary failure in name" in s:
@@ -199,6 +342,15 @@ def main() -> tuple[int, str, str]:
            err_kind = "proxy_auth"
        elif "timeout" in s or "timed out" in s:
            err_kind = "stderr_timeout"
+        elif err_phase == "venv" and (
+            "virtual environment was not created" in s
+            or "error: command" in s and "venv" in s
+        ):
+            # Generic venv-setup catch-all — matched AFTER the more specific
+            # venv patterns above so we don't shadow them, but BEFORE the
+            # other: fallback so generic venv setup failures get their own
+            # bucket instead of polluting the long-tail signature space.
+            err_kind = "venv_setup_failed"
        else:
            # First 60 chars of the last non-empty stderr line — bounded to
            # stay inside CC's metric value-length budget. Real failure modes
@@ -288,21 +440,33 @@ if __name__ == "__main__":
    # and takes the FIRST non-{"async":...} JSON line as the hook response;
    # its `metrics` key is forwarded to the hook metrics event on the
    # next attachments pass. Must be a single line — the registry splits on
-    # \n and json-parses each independently. Values must be bool|number OR
-    # short strings (CC accepts string metric values if they're not
-    # null). Stay inside the 10-key emit cap.
+    # \n and json-parses each independently.
+    #
+    # IMPORTANT — values must be bool|finite-number. The validation comment
+    # has historically said "or short strings" but that was wrong: CC's
+    # plugin-metrics pipeline silently drops plugin-emitted string values.
+    # Stay inside the 10-key emit cap.
    metrics: dict[str, object] = {
        "sdk_bootstrap": outcome,
        "sdk_bootstrap_ms": round((time.perf_counter() - t0) * 1000),
    }
    if err_kind:
-        # Truncate defensively; categorized values are <40 chars but the
-        # `other:<tail>` mode could be longer. err_phase may be empty for
-        # pre-venv failures (state_dir.mkdir perm-denied, sentinel O_EXCL
-        # raising a non-FileExistsError OSError) — emit as "pre" so the
-        # err_kind isn't silently dropped.
-        metrics["sdk_bootstrap_phase"] = (err_phase or "pre")[:16]
-        metrics["sdk_bootstrap_err"] = err_kind[:96]
+        # Encode phase + err_kind as integer codes (see
+        # SDK_BOOTSTRAP_PHASE_CODES / SDK_BOOTSTRAP_ERR_CODES). Earlier
+        # versions emitted these as strings and CC dropped them — restoring
+        # the diagnostic split that 28K BUILD_FAILED/day need to triage by
+        # root cause. err_phase defaults to "pre" when empty (pre-venv
+        # failure path, e.g. state_dir.mkdir perm-denied).
+        metrics["sdk_bootstrap_phase"] = _encode_phase(err_phase or "pre")
+        metrics["sdk_bootstrap_err"] = _encode_err_kind(err_kind)
+        # For "other:<tail>" (encoded err==99), emit a bounded integer
+        # hash of the stderr tail so BQ can distinguish patterns inside
+        # the _uncategorized bucket without unbounded cardinality. Zero
+        # when err_kind is categorized — the schema reader treats 0 as
+        # "no signal", matching the absence convention.
+        sig = _encode_stderr_sig(err_kind)
+        if sig:
+            metrics["sdk_bootstrap_stderr_sig"] = sig
    pv = _plugin_version_int()
    if pv:
        metrics["pv"] = pv
--- a/plugins/security-guidance/hooks/gitutil.py
+++ b/plugins/security-guidance/hooks/gitutil.py
@@ -26,6 +26,17 @@ GIT_CMD = [
    "git",
    "-c", "core.fsmonitor=false",
    "-c", "core.hooksPath=/dev/null",
+    # core.quotePath=false: emit raw UTF-8 in path-emitting commands instead
+    # of C-quoting non-ASCII bytes (default `"\\303\\201vila/..."` vs
+    # `Ávila/...`). Downstream parsers — both ours (parse_diff_into_files,
+    # extract_file_paths_from_diff) and Python stdlib (os.path.isabs,
+    # os.path.join) — expect raw paths and silently drop / mishandle the
+    # quoted form. Adding the flag globally to GIT_CMD covers every
+    # subprocess.run site that uses the splat — diff feeders, rev-parse
+    # path queries (--show-toplevel, --git-dir, --git-common-dir),
+    # reflog %gs subjects, ls-files, status, etc. — without per-site
+    # flag duplication. See #2082, #2099.
+    "-c", "core.quotePath=false",
 ]


@@ -222,15 +233,12 @@ def _git_diff_range(repo_root, base, head="HEAD"):
    them reviewed — otherwise unreviewed commits get permanently silenced.
    """
    try:
-        # core.quotePath=false makes git emit raw UTF-8 in `diff --git a/... b/...`
-        # headers instead of C-quoting non-ASCII path bytes (`"a/\303\201vila/..."`
-        # vs `a/Ávila/...`). The downstream `re.match(r'^a/(.+?) b/(.+)$', ...)`
-        # in parse_diff_into_files / extract_file_paths_from_diff matches the
-        # raw form only — quoted headers slip past and the entire file is
-        # silently dropped from review. See #2082 (sibling of #2056 / #2075).
+        # GIT_CMD globally passes core.quotePath=false (see definition) so
+        # non-ASCII paths in `diff --git a/... b/...` headers come through as
+        # raw UTF-8, not C-quoted. Required by the downstream
+        # parse_diff_into_files / extract_file_paths_from_diff regex.
        r = subprocess.run(
-            [*GIT_CMD, "-c", "core.quotePath=false",
-             "diff", "-p", "--no-color", "--no-ext-diff", base, head],
+            [*GIT_CMD, "diff", "-p", "--no-color", "--no-ext-diff", base, head],
            cwd=repo_root, capture_output=True, timeout=30,
        )
        if r.returncode != 0:
@@ -355,8 +363,9 @@ def _git_name_only(cwd, base, include_untracked=False):
    # result.stdout=None, and propagate AttributeError out of the helper.
    # Same fix shape as diffstate._list_untracked. See #2056.
    def _run(env):
+        # core.quotePath=false comes from GIT_CMD globally (see definition).
        result = subprocess.run(
-            [*GIT_CMD, "-c", "core.quotePath=false", "diff", "--name-only", "-z", base],
+            [*GIT_CMD, "diff", "--name-only", "-z", base],
            cwd=cwd, capture_output=True, timeout=30,
            env=env,
        )
@@ -393,9 +402,9 @@ def _git_status_porcelain(cwd):
    # sibling helpers — a non-ASCII path in the worktree would otherwise
    # crash the cp1252 reader thread on Windows. See #2056.
    try:
+        # core.quotePath=false comes from GIT_CMD globally (see definition).
        r = subprocess.run(
-            [*GIT_CMD, "-c", "core.quotePath=false", "status",
-             "--porcelain=v1", "-uall", "-z"],
+            [*GIT_CMD, "status", "--porcelain=v1", "-uall", "-z"],
            cwd=cwd, capture_output=True, timeout=30,
        )
        if r.returncode != 0:
@@ -471,11 +480,8 @@ def get_git_diff(cwd, baseline_sha, full_context=False, paths=None, untracked_pa
        # change exists to fix.
        return ""

-    # core.quotePath=false: emit raw UTF-8 in `diff --git a/... b/...` headers
-    # so non-ASCII paths aren't C-quoted past the downstream parse_diff_into_files
-    # regex. See #2082 (sibling of #2056 / #2075).
-    cmd = [*GIT_CMD, "-c", "core.quotePath=false",
-           "diff", "--no-color", "--no-ext-diff", baseline_sha] + (["--unified=99999"] if full_context else []) + pathspec
+    # core.quotePath=false comes from GIT_CMD globally (see definition).
+    cmd = [*GIT_CMD, "diff", "--no-color", "--no-ext-diff", baseline_sha] + (["--unified=99999"] if full_context else []) + pathspec
    try:
        with _temp_index(cwd, untracked_paths) as env:
            # env is None when no index could be found (bare repo / not a
--- a/plugins/security-guidance/hooks/llm.py
+++ b/plugins/security-guidance/hooks/llm.py
@@ -27,7 +27,7 @@ from typing import Optional, Tuple, Dict, Any, List

 import extensibility
 import review_api
-from _base import debug_log, _record_usage, _PV, PROVENANCE_TAG, state_dir as _resolve_state_dir  # noqa: F401
+from _base import debug_log, _record_usage, _record_http_error, _PV, PROVENANCE_TAG, state_dir as _resolve_state_dir  # noqa: F401
 from session_state import with_locked_state


@@ -368,6 +368,7 @@ def _call_claude_via_sdk(prompt, output_schema, *, max_tokens=16000, model=None)
        except Exception as e:
            debug_log(f"3P sdk-single-turn: SDK unavailable ({e})")
            _last_call_claude_http_error = -1
+            _record_http_error(-1)
            return None

    cli_path = os.environ.get("SG_AGENTIC_CLI_PATH") or None
@@ -425,6 +426,7 @@ def _call_claude_via_sdk(prompt, output_schema, *, max_tokens=16000, model=None)
    except _asyncio.TimeoutError:
        debug_log("3P sdk-single-turn: timeout after 60s")
        _last_call_claude_http_error = -1
+        _record_http_error(-1)
        return None
    except Exception as e:
        debug_log(f"3P sdk-single-turn: query failed ({e})")
@@ -433,6 +435,7 @@ def _call_claude_via_sdk(prompt, output_schema, *, max_tokens=16000, model=None)
            for _l in _captured_stderr[:20]:
                debug_log(f"  | {_l.rstrip()}")
        _last_call_claude_http_error = -1
+        _record_http_error(-1)
        return None


@@ -479,10 +482,21 @@ def _call_claude(prompt, output_schema, thinking_budget=10000, max_tokens=16000,
        "max_tokens": max_tokens,
        "system": CLAUDE_CODE_SYSTEM_PROMPT,
        "messages": [{"role": "user", "content": prompt}],
-        "output_format": {
-            "type": "json_schema",
-            "schema": output_schema
-        }
+        # API moved the structured-output schema from top-level `output_format`
+        # to `output_config.format` per
+        # https://platform.claude.com/docs/en/build-with-claude/structured-outputs.
+        # The old form "continues to work for a transition period" for some
+        # auth modes (API key + non-streaming), but is rejected with
+        # `invalid_request_error: output_format: This field is deprecated.
+        # Use 'output_config.format' instead.` for others (OAuth Bearer +
+        # newer CLI versions hit it consistently — reporter saw 462 errors
+        # in one day). See #2098.
+        "output_config": {
+            "format": {
+                "type": "json_schema",
+                "schema": output_schema,
+            },
+        },
    }
    if thinking_budget > 0:
        # Models trained on adaptive thinking (4.6+) reject the budget_tokens
@@ -490,7 +504,10 @@ def _call_claude(prompt, output_schema, thinking_budget=10000, max_tokens=16000,
        # models (4.5 and earlier, all 3.x) reject adaptive. Pick by model.
        if _model_supports_adaptive_thinking(payload["model"]):
            payload["thinking"] = {"type": "adaptive"}
-            payload["output_config"] = {"effort": "high"}
+            # Merge `effort` into the existing output_config dict (which
+            # now carries the `format` schema) rather than reassigning —
+            # otherwise the schema is silently overwritten. See #2098.
+            payload["output_config"]["effort"] = "high"
        else:
            payload["thinking"] = {
                "type": "enabled",
@@ -528,6 +545,7 @@ def _call_claude(prompt, output_schema, thinking_budget=10000, max_tokens=16000,
                error_body = e.read().decode("utf-8") if e.fp else ""
                debug_log(f"API error: {e.code} - {error_body[:200]}")
                _last_call_claude_http_error = e.code
+                _record_http_error(e.code)
                return None
        except (urllib.error.URLError, TimeoutError) as e:
            if attempt < 2:
@@ -537,6 +555,7 @@ def _call_claude(prompt, output_schema, thinking_budget=10000, max_tokens=16000,
            else:
                debug_log(f"Request failed after retries: {e}")
                _last_call_claude_http_error = -1
+                _record_http_error(-1)
                return None

    if not response_data:
@@ -545,6 +564,7 @@ def _call_claude(prompt, output_schema, thinking_budget=10000, max_tokens=16000,
        # call uses the token; record the 401 so callers don't see error=None.
        if _last_call_claude_http_error is None:
            _last_call_claude_http_error = 401
+            _record_http_error(401)
        return None

    # Find the text block (skip thinking blocks)
--- a/plugins/security-guidance/hooks/security_reminder_hook.py
+++ b/plugins/security-guidance/hooks/security_reminder_hook.py
@@ -221,15 +221,34 @@ def emit_metrics(
    task-notification one-liner. Must be in the same JSON line as the metrics
    because CC stops scanning stdout after the first {-prefixed line.

-    `additional_context` (asyncRewake findings): model-visible guidance text
-    that CC surfaces via the modern hook-output protocol
-    (hookSpecificOutput.additionalContext) instead of the legacy stderr +
-    exit(2) pair. The caller passes the finding-explanation text it would
-    have written to stderr; the JSON channel carries it cleanly so CC's UI
-    shows the reason properly instead of "Permission denied with no reason".
-    See anthropics/claude-plugins-official#1375 and #1783. Empty/None
-    means no hookSpecificOutput field is emitted (preserves backward compat
-    for legacy emit-sites that only want metrics).
+    `additional_context` (asyncRewake findings): model-visible guidance text.
+    Delivery channel depends on `hook_event_name` because CC's hook-output
+    contract is NOT symmetric across events:
+
+      - PostToolUse (commit-review, push-sweep): surfaced via the modern
+        hookSpecificOutput.additionalContext protocol. `PostToolUse` is a
+        member of CC's hookSpecificOutput discriminated union
+        (coreSchemas.ts), so the JSON validates and metrics/rewakeSummary
+        are consumed. See #1375 / #1783 for why this replaced the legacy
+        stderr + exit(2) shape for PostToolUse.
+
+      - Stop / SubagentStop: there is NO `Stop` member in that union, so
+        emitting hookSpecificOutput{hookEventName:"Stop"} makes the whole
+        line fail isSyncHookJSONOutput validation — which on the asyncRewake
+        path silently drops metrics AND rewakeSummary, and (because the
+        legacy stderr write was removed) leaks the raw JSON to the model as
+        the rewake body. CC's asyncRewake delivery actually reads
+        `stderr || stdout` for the model-visible body and only scans stdout
+        JSON for metrics+rewakeSummary — it never reads additionalContext
+        on this path. So for Stop we use the documented clean pattern:
+        guidance on stderr, valid JSON (metrics + rewakeSummary +
+        top-level decision/reason) on stdout. The top-level decision:"block"
+        + reason also covers the sync-fallback path (single-shot `claude -p`,
+        where asyncRewake degrades to a sync Stop hook that reads
+        decision/reason). See #2159.
+
+    Empty/None additional_context emits neither channel (back-compat for
+    metrics-only callers).

    `system_message` (optional, asyncRewake only): user-visible TUI message,
    distinct from rewakeSummary which is the task-notification one-liner.
@@ -237,10 +256,9 @@ def emit_metrics(
    surface; systemMessage adds a per-fire override when the static
    rewakeMessage isn't specific enough for the finding being shown.

-    `hook_event_name` (used only when additional_context is set): which event
-    the hookSpecificOutput attaches to. Defaults to "PostToolUse" since the
-    commit-review and push-sweep handlers are the most common callers;
-    handle_stop_hook explicitly passes "Stop".
+    `hook_event_name` (used only when additional_context is set): selects the
+    delivery channel above. Defaults to "PostToolUse" (commit-review and
+    push-sweep are the most common callers); handle_stop_hook passes "Stop".
    """
    head = {}
    if _PV and "pv" not in metrics:
@@ -252,14 +270,23 @@ def emit_metrics(
    if rewake_summary:
        out["rewakeSummary"] = rewake_summary
    if additional_context:
-        # Wrap in hookSpecificOutput per CC's modern hook-output contract.
-        # Drops the legacy `sys.stderr.write(...) + sys.exit(2)` shape that
-        # left CC's UI showing "denied with no reason" (#1783) and triggered
-        # "json output validation failed" on older CC versions (#1375).
-        out["hookSpecificOutput"] = {
-            "hookEventName": hook_event_name,
-            "additionalContext": additional_context,
-        }
+        if hook_event_name in ("Stop", "SubagentStop"):
+            # Stop is NOT in CC's hookSpecificOutput union — emitting it there
+            # fails schema validation and drops metrics+rewakeSummary (#2159).
+            # Clean pattern: guidance on stderr (the asyncRewake body channel,
+            # delivered via `stderr || stdout`), top-level decision/reason for
+            # the sync-fallback path. stdout JSON stays valid so metrics +
+            # rewakeSummary survive.
+            sys.stderr.write(additional_context)
+            sys.stderr.flush()
+            out["decision"] = "block"
+            out["reason"] = additional_context
+        else:
+            # PostToolUse et al. — valid union member; modern protocol.
+            out["hookSpecificOutput"] = {
+                "hookEventName": hook_event_name,
+                "additionalContext": additional_context,
+            }
    if system_message:
        out["systemMessage"] = system_message
    print(json.dumps(out), flush=True)
@@ -1197,18 +1224,18 @@ def handle_commit_review_posttooluse(input_data):
            # core.quotePath=false: emit raw UTF-8 in `diff --git a/... b/...`
            # headers so non-ASCII paths aren't C-quoted past the downstream
            # parse_diff_into_files regex (sibling of #2056 / #2075). See #2082.
+            # core.quotePath=false comes from GIT_CMD globally (see gitutil.py).
            if pre_amend_sha:
                # Delta review: pre-amend → post-amend. `git diff` (not show)
                # so the output is a pure unified diff with no commit header.
                result = subprocess.run(
-                    [*GIT_CMD, "-c", "core.quotePath=false",
-                     "diff", "--no-color", "--no-ext-diff", pre_amend_sha, sha, "--"],
+                    [*GIT_CMD, "diff", "--no-color", "--no-ext-diff",
+                     pre_amend_sha, sha, "--"],
                    cwd=repo_root, capture_output=True, timeout=15
                )
            else:
                result = subprocess.run(
-                    [*GIT_CMD, "-c", "core.quotePath=false",
-                     "show", "-p", "--no-color", "--no-ext-diff", sha, "--"],
+                    [*GIT_CMD, "show", "-p", "--no-color", "--no-ext-diff", sha, "--"],
                    cwd=repo_root, capture_output=True, timeout=15
                )
        except (subprocess.TimeoutExpired, FileNotFoundError, OSError) as e: