Bump 31 plugin SHA pin(s) to upstream HEAD

2026-06-16 22:33:35 +00:00 · 2026-05-31 08:26:20 +00:00
28 changed files with 301 additions and 2396 deletions
--- a/.claude-plugin/marketplace.json
+++ b/.claude-plugin/marketplace.json
--- a/.github/workflows/bump-plugin-shas.yml
+++ b/.github/workflows/bump-plugin-shas.yml
@@ -2,24 +2,25 @@ name: Bump Plugin SHAs

 # Nightly sweep: for each external entry whose upstream HEAD has moved past
 # its pinned SHA, validate at the new SHA with `claude plugin validate`
-# inline, then open one PR per bumped plugin on branch `bump/<slug>`.
-# Failing entries stay isolated in their own PR; passing bumps merge
-# independently.
+# inline, then open one PR with all passing bumps. Each run force-resets the
+# bump/plugin-shas branch, so a previous night's unmerged PR is replaced (and
+# its review state discarded) — review and merge same-day to avoid churn.
 #
 # Bot-free — uses the default GITHUB_TOKEN. PRs opened with GITHUB_TOKEN don't
-# trigger on:pull_request workflows, so the required status checks on main
-# (`scan` from Scan Plugins, `check` from Check MCP URLs, `validate` from
-# Validate Plugins) would never run and the bump PR could never merge.
-# workflow_dispatch is exempt from that recursion guard, so we dispatch all
-# three ourselves against each per-entry bump branch after its PR is opened.
-# Each check run lands on the branch HEAD — the same SHA as the PR head — and
-# satisfies the corresponding required check. (Each of those workflows runs
-# its job unconditionally on workflow_dispatch, so a dispatch always reports.)
+# trigger on:pull_request workflows, so the policy scan (`Scan Plugins`, a
+# required status check on main) would never run and the bump PR could never
+# merge. workflow_dispatch is exempt from that recursion guard, so we dispatch
+# the scan ourselves on the bump branch after the PR is opened. The check run
+# lands on the branch HEAD — the same SHA as the PR head — and satisfies the
+# required check.
 #
-# max-bumps caps the per-night work for cost control. Per-entry scans are
-# more expensive than a single batched scan, so the cap is conservative.
-# The composite action skips entries that already have an open bump PR, so
-# re-dispatches don't pile up duplicate work.
+# max-bumps is set above the external-entry count so a single run can clear
+# any backlog. The cost-control mechanisms are downstream:
+#   - scan-plugins.yml caches verdicts by (plugin, sha) so an unchanged SHA
+#     is never re-scanned across nightly force-resets.
+#   - revert-failed-bumps.yml drops policy-failing entries from the bump PR
+#     so one bad upstream can't block the rest.
+# See those files for details.

 on:
  schedule:
@@ -29,12 +30,12 @@ on:
      max_bumps:
        description: Cap on plugins bumped this run
        required: false
-        default: '30'
+        default: '130'

 permissions:
  contents: write
  pull-requests: write
-  actions: write  # gh workflow run {scan-plugins,check-mcp-urls,validate-plugins}.yml per bump branch
+  actions: write  # gh workflow run scan-plugins.yml on the bump branch

 concurrency:
  group: bump-plugin-shas
@@ -42,8 +43,8 @@ concurrency:
 jobs:
  bump:
    runs-on: ubuntu-latest
-    # Per-bump cost is ~2s (ls-remote + shallow clone + validate); 30 entries
-    # is ~1-2 min. The 60 min ceiling absorbs slow upstreams without letting a
+    # Per-bump cost is ~2s (ls-remote + shallow clone + validate); 130 entries
+    # is ~5 min. The 60 min ceiling absorbs slow upstreams without letting a
    # pathological run consume the default 360 min budget.
    timeout-minutes: 60
    steps:
@@ -51,44 +52,18 @@ jobs:

      # createCommitOnBranch-based bump so commits are signed by GitHub and
      # satisfy the org-level required_signatures ruleset on main.
-      - uses: anthropics/claude-plugins-community/.github/actions/bump-plugin-shas@e2019b2a01f11aa1484c53540b1cfab5eebbc299
+      - uses: anthropics/claude-plugins-community/.github/actions/bump-plugin-shas@c41c6911de0afffd2bc5cd8b21fb1e06444ee13b
        id: bump
        with:
          marketplace-path: .claude-plugin/marketplace.json
-          max-bumps: ${{ inputs.max_bumps || '30' }}
-          pr-mode: per-entry
+          max-bumps: ${{ inputs.max_bumps || '130' }}
          claude-cli-version: latest

-      # Per-entry fan-out: dispatch the three required checks against each bump
-      # branch. `pr-urls` is a JSON array of {name, old_sha, new_sha, branch,
-      # pr_url} entries emitted by the composite action when pr-mode is
-      # per-entry. All three (scan / check / validate) are required on main and
-      # none fire on the GITHUB_TOKEN-opened PR, so each must be dispatched.
-      # A single failed dispatch (transient API error / rate limit) must not
-      # strand the remaining branches, so we attempt every dispatch, then fail
-      # the step if any failed: a missing required check would otherwise leave
-      # its bump PR silently blocked behind a green run, and the composite
-      # action skips slugs with an open PR so it would never be retried.
-      - name: Dispatch required checks per per-entry PR
-        if: steps.bump.outputs.pr-urls != '' && steps.bump.outputs.pr-urls != '[]'
+      # `bump/plugin-shas` is the action's default `pr-branch`. The scan diffs
+      # the branch against origin/main (the action's base-ref fallback when
+      # there's no pull_request event) and scans only the bumped entries.
+      - name: Dispatch policy scan on bump branch
+        if: steps.bump.outputs.pr-url != ''
        env:
          GH_TOKEN: ${{ github.token }}
-          PR_URLS: ${{ steps.bump.outputs.pr-urls }}
-        run: |
-          set -euo pipefail
-          dispatch_failures="$(mktemp)"
-          jq -c '.[]' <<<"$PR_URLS" | while read -r entry; do
-            branch=$(jq -r '.branch' <<<"$entry")
-            name=$(jq -r '.name' <<<"$entry")
-            for wf in scan-plugins check-mcp-urls validate-plugins; do
-              echo "Dispatching ${wf}.yml against $branch ($name)"
-              if ! gh workflow run "${wf}.yml" --ref "$branch"; then
-                echo "::error::Failed to dispatch ${wf}.yml against $branch ($name) — required check will be missing; re-dispatch with: gh workflow run ${wf}.yml --ref $branch"
-                echo "${wf} ${branch}" >> "$dispatch_failures"
-              fi
-            done
-          done
-          if [ -s "$dispatch_failures" ]; then
-            echo "::error::$(wc -l < "$dispatch_failures" | tr -d ' ') required-check dispatch(es) failed; the affected bump PR(s) are blocked until re-dispatched (see annotations above)."
-            exit 1
-          fi
+        run: gh workflow run scan-plugins.yml --ref bump/plugin-shas
--- a/.github/workflows/validate-plugins.yml
+++ b/.github/workflows/validate-plugins.yml
@@ -12,14 +12,6 @@ on:
    branches: [main]
    paths:
      - '.claude-plugin/**'
-  # `validate` is a required status check on main. Bump PRs are opened with
-  # GITHUB_TOKEN, which doesn't fire on:pull_request (recursion guard), so the
-  # path-filtered trigger above never reports on them and the PR would be
-  # blocked forever. The bump workflow dispatches this against each per-entry
-  # bump branch instead; the check run lands on the branch HEAD (= PR head)
-  # and satisfies the required check. The validate job runs unconditionally,
-  # so a dispatch always reports.
-  workflow_dispatch:

 permissions:
  contents: read
--- a/plugins/code-modernization/.claude-plugin/plugin.json
+++ b/plugins/code-modernization/.claude-plugin/plugin.json
@@ -1,6 +1,6 @@
 {
  "name": "code-modernization",
-  "description": "Modernize legacy codebases (COBOL, legacy Java/C++, monolith web apps) with a structured preflight / assess / map / extract-rules / brief / reimagine / transform / harden workflow, an interactive topology viewer, and specialist review agents",
+  "description": "Modernize legacy codebases (COBOL, legacy Java/C++, monolith web apps) with a structured assess → map → extract-rules → brief → reimagine/transform → harden workflow and specialist review agents",
  "author": {
    "name": "Anthropic",
    "email": "support@anthropic.com"
--- a/plugins/code-modernization/README.md
+++ b/plugins/code-modernization/README.md
@@ -7,7 +7,7 @@ A structured workflow and set of specialist agents for modernizing legacy codeba
 Legacy modernization fails most often not because the target technology is wrong, but because teams skip steps: they transform code before understanding it, reimagine architecture before extracting business rules, or ship without a harness that would catch behavior drift. This plugin enforces a sequence:

 ```
-preflight → assess → map → extract-rules → brief → reimagine | transform → harden
+assess → map → extract-rules → brief → reimagine | transform → harden
 ```

 The discovery commands (`assess`, `map`, `extract-rules`) build artifacts under `analysis/<system>/`. The `brief` command synthesizes them into an approval gate. The build commands (`reimagine`, `transform`) write new code under `modernized/`. The `harden` command audits the legacy system and produces a reviewable remediation patch. Each step has a dedicated slash command, and specialist agents (legacy analyst, business rules extractor, architecture critic, security auditor, test engineer) are invoked from within those commands — or directly — to keep the work honest.
@@ -20,40 +20,25 @@ Commands take a `<system-dir>` argument and assume the system being modernized l
 mkdir -p legacy && ln -s /path/to/your/legacy/codebase legacy/billing
 ```

-## What to give Claude
+## Optional tooling

-The commands degrade gracefully, but each of these makes the output meaningfully better — run `/modernize-preflight <system-dir>` to check all of them at once and get a readiness report:
-
- **Analysis tools**: [`scc`](https://github.com/boyter/scc) (LOC + complexity + COCOMO) or [`cloc`](https://github.com/AlDanial/cloc); [`lizard`](https://github.com/terryyin/lizard) for portfolio mode. Without them, metrics fall back to `find`/`wc` and get coarser.
- **A working build toolchain** for the legacy stack (e.g. GnuCOBOL for COBOL) — required before `/modernize-transform` can prove behavioral equivalence, and verified by preflight with a real smoke compile against your code.
- **The whole system in the tree**: deployment descriptors (JCL, CICS definitions, route configs), copybooks/includes, and DDL/schemas. Entry-point detection and data lineage in `/modernize-map` are guesswork without them.
- **Production telemetry** (optional): an observability MCP server or batch job logs enable the runtime overlay in `/modernize-assess` and timing annotations on critical paths.
-
-## Secret handling
-
-Legacy systems routinely contain live credentials, and assessment artifacts get committed and shared. **Every agent in this plugin masks credential values** — findings, rule-card parameters, architecture notes, and test fixtures cite `file:line` with a masked preview (`AKIA****`), never the value. When credentials are found, a per-credential inventory (type, location, blast radius, rotation recommendation) is written to `analysis/<system>/SECRETS.local.md`, which the commands gitignore before writing; on non-git projects the quarantine file goes to `~/.modernize/<system>/` instead. `/modernize-harden` splits its remediation diff so credential-removal hunks (which necessarily contain the raw value) land in a gitignored `security_remediation.local.patch`, never the shareable patch. Pass `--show-secrets` to include raw values in the quarantine file (and only there). If you ran an earlier version of this plugin on a real system, check whether `analysis/` artifacts containing credentials were committed or shared, and rotate anything that was.
+`/modernize-assess` works best with [`scc`](https://github.com/boyter/scc) (LOC + complexity + COCOMO) or [`cloc`](https://github.com/AlDanial/cloc), and falls back to `find`/`wc` if neither is installed. Portfolio mode also benefits from [`lizard`](https://github.com/terryyin/lizard) (cyclomatic complexity). The commands degrade gracefully without them, but the metrics will be coarser.

 ## Commands

 The commands are designed to be run in order, but each produces a standalone artifact so you can stop, review, and resume.

-### `/modernize-preflight <system-dir> [target-stack]`
-Environment readiness check, meant to run first: detects the legacy stack, checks analysis tooling, **smoke-compiles a real source file** with the legacy toolchain (the errors this surfaces — missing copybooks, wrong dialect flags — are the ones that otherwise appear mid-transform), inventories missing includes / deployment descriptors / binary-only artifacts, and probes for telemetry. Produces `analysis/<system>/PREFLIGHT.md` with a per-command Ready / Ready-with-gaps / Not-ready verdict.
-
 ### `/modernize-assess <system-dir>`  — or — `/modernize-assess --portfolio <parent-dir>`
 Inventory the legacy codebase: languages, line counts, complexity, build system, integrations, technical debt, security posture, documentation gaps, and a COCOMO-derived effort estimate. Produces `analysis/<system>/ASSESSMENT.md` and `analysis/<system>/ARCHITECTURE.mmd`. Spawns `legacy-analyst` (×2) and `security-auditor` in parallel for deep reads. With `--portfolio`, sweeps every subdirectory of a parent directory and writes a sequencing heat-map to `analysis/portfolio.html`.

 ### `/modernize-map <system-dir>`
-
-![Interactive topology map of AWS CardDemo — domains as containers, modules sized by lines of code, dependency edges colored by kind, entry points ringed](assets/topology-viewer-screenshot.jpg)
-
-Build a dependency and topology map of the **legacy** system: program/module call graph, data lineage (programs ↔ data stores), entry points, dead-end candidates, and 2–4 traced business flows each anchored to a persona (the claimant, the operator, the auditor — not the maintainer). Writes a re-runnable extraction script and produces `analysis/<system>/topology.json` plus `analysis/<system>/TOPOLOGY.html` — an **interactive zoomable map** (circle-pack of domains/modules sized by LOC, dependency edges with per-kind toggles, search, click-for-details sidebar, and a walkthrough mode that plays each persona flow as a numbered path with a plain-language narrative). Built from a template shipped with the plugin, so it works on systems far too dense for a static diagram. Small domain-level `call-graph.mmd`, `data-lineage.mmd`, and `critical-path.mmd` are still exported for docs and PRs.
+Build a dependency and topology map of the **legacy** system: program/module call graph, data lineage (programs ↔ data stores), entry points, dead-end candidates, and one traced critical-path business flow. Writes a re-runnable extraction script and produces `analysis/<system>/topology.json` (machine-readable), `analysis/<system>/TOPOLOGY.html` (rendered Mermaid + architect observations), and standalone `call-graph.mmd`, `data-lineage.mmd`, and `critical-path.mmd`.

 ### `/modernize-extract-rules <system-dir> [module-pattern]`
 Mine the business rules embedded in the legacy code — calculations, validations, eligibility, state transitions, policies — into Given/When/Then "Rule Cards" with `file:line` citations and confidence ratings. Spawns three `business-rules-extractor` agents in parallel (calculations, validations, lifecycle). Produces `analysis/<system>/BUSINESS_RULES.md` and `analysis/<system>/DATA_OBJECTS.md`.

 ### `/modernize-brief <system-dir> [target-stack]`
-Synthesize the discovery artifacts into a phased **Modernization Brief** — the single document a steering committee approves and engineering executes: target architecture, strangler-fig phase plan with entry/exit criteria, persona-based business walkthroughs (the section non-technical approvers actually read), behavior contract, validation strategy, open questions, and an approval block. Reads `ASSESSMENT.md`, `TOPOLOGY.html`, and `BUSINESS_RULES.md` and **stops if any are missing** — run the discovery commands first. Produces `analysis/<system>/MODERNIZATION_BRIEF.md` and enters plan mode as a human-in-the-loop gate.
+Synthesize the discovery artifacts into a phased **Modernization Brief** — the single document a steering committee approves and engineering executes: target architecture, strangler-fig phase plan with entry/exit criteria, behavior contract, validation strategy, open questions, and an approval block. Reads `ASSESSMENT.md`, `TOPOLOGY.html`, and `BUSINESS_RULES.md` and **stops if any are missing** — run the discovery commands first. Produces `analysis/<system>/MODERNIZATION_BRIEF.md` and enters plan mode as a human-in-the-loop gate.

 ### `/modernize-reimagine <system-dir> <target-vision>`
 Greenfield rebuild from extracted intent rather than a structural port. Mines a spec (`analysis/<system>/AI_NATIVE_SPEC.md`), designs a target architecture and has it adversarially reviewed (`analysis/<system>/REIMAGINED_ARCHITECTURE.md`), then **scaffolds services with executable acceptance tests** under `modernized/<system>-reimagined/` and writes a `CLAUDE.md` knowledge handoff for the new system. Two human-in-the-loop checkpoints. Spawns `business-rules-extractor`, `legacy-analyst` (×2), `architecture-critic`, and general-purpose scaffolding agents.
@@ -61,9 +46,6 @@ Greenfield rebuild from extracted intent rather than a structural port. Mines a
 ### `/modernize-transform <system-dir> <module> <target-stack>`
 Surgical, single-module strangler-fig rewrite. Plans first (HITL gate), then writes characterization tests via `test-engineer`, then an idiomatic target implementation under `modernized/<system>/<module>/`, proves equivalence by running the tests, and produces `TRANSFORMATION_NOTES.md` mapping legacy → modern with deliberate deviations called out. Reviewed by `architecture-critic`.

-### `/modernize-status <system-dir>`
-Read-only progress report: artifact inventory with timestamps per workflow stage, staleness flags (e.g. a brief older than the assessment it was built from), secrets-hygiene checks (quarantine file gitignored and never committed), and the single most useful next command. Run it anytime you come back to a modernization after a break.
-
 ### `/modernize-harden <system-dir>`
 Security hardening pass on the **legacy** system: OWASP/CWE scan, dependency CVEs, secrets, injection. Spawns `security-auditor`. Produces `analysis/<system>/SECURITY_FINDINGS.md` ranked Critical / High / Medium / Low and a reviewed `analysis/<system>/security_remediation.patch` with minimal fixes for the Critical/High findings. The patch is reviewed by a second `security-auditor` pass before you see it. **Never edits `legacy/`** — you review and apply the patch yourself when ready, then re-run to verify. Useful as a pre-modernization step when the legacy system will keep running in production during the migration.

@@ -99,21 +81,17 @@ This plugin ships commands and agents, but modernization projects benefit from a
      "Edit(modernized/**)"
    ],
    "deny": [
-      "Edit(legacy/**)",
-      "Write(legacy/**)"
+      "Edit(legacy/**)"
    ]
  }
 }
 ```

-Adjust `legacy/` and `modernized/` to match your actual layout. The key invariants: `Edit`/`Write` under `legacy/` are denied, and writes are scoped to `analysis/` (for documents) and `modernized/` (for the new code). Note this guards the file tools — shell commands that mutate files (`sed -i`, `git apply`) still go through the normal Bash permission prompt, so review those prompts with the same invariant in mind. Every command in this plugin respects this — `/modernize-harden` writes a patch to `analysis/` rather than editing `legacy/` in place.
+Adjust `legacy/` and `modernized/` to match your actual layout. The key invariants: `Edit` under `legacy/` is denied, and writes are scoped to `analysis/` (for documents) and `modernized/` (for the new code). Every command in this plugin respects this — `/modernize-harden` writes a patch to `analysis/` rather than editing `legacy/` in place.

 ## Typical Workflow

 ```bash
-# 0. Check the environment is ready (tools, toolchain, source completeness)
-/modernize-preflight billing
-
 # 1. Inventory the legacy system (or sweep a portfolio of them)
 /modernize-assess billing

@@ -134,9 +112,6 @@ Adjust `legacy/` and `modernized/` to match your actual layout. The key invarian

 # 6. Security-harden the legacy system that's still in production
 /modernize-harden billing
-
-# Anytime: where am I, what's stale, what's next
-/modernize-status billing
 ```

 ## License
--- a/plugins/code-modernization/agents/architecture-critic.md
+++ b/plugins/code-modernization/agents/architecture-critic.md
@@ -29,12 +29,6 @@ For **transformed code**:
 - Does the test suite actually pin behavior, or just exercise code paths?
 - What would the on-call engineer need at 3am that isn't here?

-## Secret handling (mandatory)
-
-When a finding quotes code containing a credential, key, token, or
-connection string, mask the value (`'Pr0d****'`) and cite `file:line` —
-findings get appended verbatim to committed notes files.
-
 ## Output

 Findings ranked **Blocker / High / Medium / Nit**. Each with: what, where,
--- a/plugins/code-modernization/agents/business-rules-extractor.md
+++ b/plugins/code-modernization/agents/business-rules-extractor.md
@@ -40,15 +40,6 @@ of the technology, skip it.
   from structure/names), **Low** (ambiguous; needs SME).
 6. If confidence < High, write the exact question an SME must answer.

-## Secret handling (mandatory)
-
-Rule parameters sometimes *are* credentials — hardcoded passwords in auth
-checks, API keys in partner-service calls, connection strings in batch
-routines. Record the **rule**, never the **value**: write the parameter as
-`<credential — masked, see file:line>` with at most a 2–4 character
-preview. Rule cards flow into briefs and steering decks; a raw credential
-in a parameter list is a leak.
-
 ## Output format

 One "Rule Card" per rule (see the format in the `/modernize-extract-rules`
--- a/plugins/code-modernization/agents/legacy-analyst.md
+++ b/plugins/code-modernization/agents/legacy-analyst.md
@@ -32,15 +32,6 @@ and explain it in terms a modern engineer can act on.
 - **Note what's missing.** Unhandled error paths, TODO comments, commented-out
  blocks, magic numbers — these are signals about history and risk.

-## Secret handling (mandatory)
-
-Legacy code is full of live credentials, and your findings get copied into
-shareable reports. When the evidence for a finding — hardcoded config,
-dead code, debt, an interface payload — includes a credential, API key,
-token, connection string, or private key, **never reproduce the value**.
-Cite `file:line` with a masked preview (`VALUE 'Pr0d****'`,
-`password=****`). The finding is the practice, not the value.
-
 ## Output format

 Default to structured markdown: tables for inventories, Mermaid for graphs,
--- a/plugins/code-modernization/agents/security-auditor.md
+++ b/plugins/code-modernization/agents/security-auditor.md
@@ -39,30 +39,7 @@ terminal/screen items don't apply to a SPA. Work through what's relevant:

 Use available SAST where it helps (npm audit, pip-audit, grep for known-bad
 patterns) but **read the code** — tools miss logic flaws. Show tool output
-verbatim — except secret values, which you redact (see below) — then add
-your manual findings.
-
-## Secret handling (mandatory)
-
-Legacy codebases routinely contain live production credentials, and your
-findings get pasted into decks, tickets, and committed markdown. Copying a
-secret into a report multiplies the exposure you were hired to find.
-
-When you discover a hardcoded credential, API key, token, connection
-string, or private key:
-
- **Never write the secret's value into any output** — no finding table,
-  no report, no quoted code excerpt, no echoed tool output. Mask it to the
-  first 2–4 identifying characters plus `****` (`AKIA****`,
-  `postgres://app_user:****@db-prod…`). If a scanner prints a secret,
-  redact it before including the excerpt.
- Cite `file:line`. The source file is the canonical location — anyone who
-  legitimately needs the value can open it there.
- State what the credential appears to grant access to (database, queue,
-  cloud account, third-party API) and whether it looks like a production
-  or test credential.
- Recommend rotation for anything that looks live — exposure in source
-  means it is already compromised, independent of any modernization plan.
+verbatim, then add your manual findings.

 ## Reporting standard

--- a/plugins/code-modernization/agents/test-engineer.md
+++ b/plugins/code-modernization/agents/test-engineer.md
@@ -28,15 +28,6 @@ someone thinks it should do) so that a rewrite can be proven equivalent.
  `@Disabled("pending RULE-NNN")` / `@pytest.mark.skip` / `it.todo()` — never
  deleted.

-## Secret handling (mandatory)
-
-Never copy credential-like literals — passwords, API keys, tokens,
-connection strings — from legacy code into test fixtures. Tests live in
-the deliverable codebase and get committed. Substitute clearly-fake values
-of the same shape and length and note the substitution in a comment.
-Anything a test genuinely needs live (e.g. a real database connection for
-a dual-run harness) is read from an environment variable, never inlined.
-
 ## Output

 Idiomatic tests for the requested target stack (JUnit 5 / pytest / Vitest /
--- a/plugins/code-modernization/assets/topology-viewer-screenshot.jpg
+++ b/plugins/code-modernization/assets/topology-viewer-screenshot.jpg
--- a/plugins/code-modernization/assets/topology-viewer.html
+++ b/plugins/code-modernization/assets/topology-viewer.html
--- a/plugins/code-modernization/commands/modernize-assess.md
+++ b/plugins/code-modernization/commands/modernize-assess.md
@@ -1,13 +1,11 @@
 ---
 description: Full discovery & portfolio analysis of a legacy system — inventory, complexity, debt, effort estimation
-argument-hint: <system-dir> [--show-secrets] | --portfolio <parent-dir>
+argument-hint: <system-dir> | --portfolio <parent-dir>
 ---

 **Mode select.** If `$ARGUMENTS` starts with `--portfolio`, run **Portfolio
 mode** against the directory that follows. Otherwise run **Single-system
-mode** against the system dir. Parse flags positionally-independently:
-`--show-secrets` may appear before or after the system dir — the system
-dir is the first non-flag token.
+mode** against `legacy/$1`.

 ---

@@ -110,16 +108,12 @@ Spawn three subagents **in parallel**:
 2. **legacy-analyst** — "Identify technical debt in legacy/$1: dead code,
   deprecated APIs, copy-paste duplication, god objects/programs, missing
   error handling, hardcoded config. Return the top 10 findings ranked by
-   remediation value, each with file:line evidence. If evidence contains a
-   credential value, mask it per your secret-handling rules — never quote
-   it."
+   remediation value, each with file:line evidence."

 3. **security-auditor** — "Scan legacy/$1 for security vulnerabilities:
   injection, auth weaknesses, hardcoded secrets, vulnerable dependencies,
   missing input validation. Return findings in CWE-tagged table form with
-   file:line evidence and severity. Mask every discovered credential value
-   per your secret-handling rules — file:line plus a 2–4 character masked
-   preview, never the value itself."
+   file:line evidence and severity."

 Wait for all three. Synthesize their findings.

@@ -147,31 +141,6 @@ need explained.

 ## Step 6 — Write the assessment

-**Secrets quarantine first.** The assessment gets shared and committed —
-discovered credential values must never appear in it. If the
-security-auditor found any hardcoded credentials:
-
-1. Ensure `analysis/.gitignore` exists and contains the lines
-   `SECRETS.local.md` and `*.local.patch` (create or append as needed —
-   the patch pattern is used by `/modernize-harden`; writing both now
-   means the ignore set is complete from first contact). If the project is a
-   git repo, verify with `git check-ignore -q analysis/$1/SECRETS.local.md`
-   — do not write any findings until the check passes. If there is **no
-   git repo** (check for `.svn`/`.hg`/`CVS` too — a `.gitignore` protects
-   nothing under another VCS): refuse `--show-secrets` and write
-   `SECRETS.local.md` to `~/.modernize/$1/` instead of the project tree,
-   telling the user where it went and why.
-2. Write `SECRETS.local.md`: one row per credential — masked preview,
-   `file:line`, credential type, what it grants access to,
-   production/test guess, rotation recommendation. Only if the user passed
-   `--show-secrets`, add the raw value column here — this file only, never
-   ASSESSMENT.md.
-3. Masking applies to **every section of ASSESSMENT.md**, whichever agent
-   produced the finding — the Technical Debt section quotes hardcoded
-   config; those quotes follow the same masking rule as Security Findings.
-   The Security Findings section adds a one-line pointer:
-   "Credential inventory in SECRETS.local.md (gitignored; not for sharing)."
-
 Create `analysis/$1/ASSESSMENT.md` with these sections:
 - **Executive Summary** (3-4 sentences: what it is, how big, how risky, headline recommendation)
 - **System Inventory** (the scc table + tech fingerprint)
--- a/plugins/code-modernization/commands/modernize-brief.md
+++ b/plugins/code-modernization/commands/modernize-brief.md
@@ -8,19 +8,10 @@ single document a steering committee approves and engineering executes.

 Target stack: `$2` (if blank, recommend one based on the assessment findings).

-Read `analysis/$1/ASSESSMENT.md`, `analysis/$1/topology.json` (plus the
-`.mmd` files alongside it — do NOT read `TOPOLOGY.html`, it's an
-interactive viewer with the data minified inside), and
-`analysis/$1/BUSINESS_RULES.md` first. If any are missing, say so and
-stop — they come from `/modernize-assess`, `/modernize-map`, and
-`/modernize-extract-rules` respectively. Run those first.
-
-**Staleness check:** compare modification times. If any input is newer
-than an existing `MODERNIZATION_BRIEF.md`, the brief is being justifiably
-regenerated; but if an existing brief is newer than all inputs and the
-user re-ran this command anyway, ask what changed. Either way, note the
-input timestamps in the brief's header so reviewers can see what it was
-built from.
+Read `analysis/$1/ASSESSMENT.md`, `analysis/$1/TOPOLOGY.html` (and the `.mmd`
+files alongside it), and `analysis/$1/BUSINESS_RULES.md` first. If any are
+missing, say so and stop — they come from `/modernize-assess`, `/modernize-map`,
+and `/modernize-extract-rules` respectively. Run those first.

 ## The Brief

@@ -40,38 +31,28 @@ fewest-dependencies first. For each phase:
 - Scope (which legacy modules, which target services)
 - Entry criteria (what must be true to start)
 - Exit criteria (what tests/metrics prove it's done)
- Estimated effort (person-months, same unit as the assessment's COCOMO
-  figure — convert deliberately if you present weeks)
+- Estimated effort (person-weeks, derived from COCOMO + complexity data)
 - Risk level + top 2 risks + mitigation

 Render the phases as a Mermaid `gantt` chart.

-### 4. Business Walkthroughs
-For each persona flow in `analysis/$1/topology.json` (`flows` — produced
-by `/modernize-map`), a short narrative table: persona, what happens in
-business language, which legacy modules implement it today, and which
-phase from §3 replaces each. This is the section non-technical approvers
-actually read — it connects "Phase 2" to "what happens when a customer
-files a claim". If topology.json has no flows, derive 2–3 walkthroughs
-from the entry points and say they need SME confirmation.
-
-### 5. Behavior Contract
+### 4. Behavior Contract
 List the **P0 rules** from BUSINESS_RULES.md (the ones tagged `Priority: P0` —
 money, regulatory, data integrity) that MUST be proven equivalent before any
 phase ships. These become the regression suite. Flag any P0 rule with
 Confidence < High as a blocker requiring SME confirmation before its phase
 starts.

-### 6. Validation Strategy
+### 5. Validation Strategy
 State which combination applies: characterization tests, contract tests,
 parallel-run / dual-execution diff, property-based tests, manual UAT.
 Justify per phase.

-### 7. Open Questions
+### 6. Open Questions
 Anything requiring human/SME decision before Phase 1 starts. Each as a
 checkbox the approver must tick.

-### 8. Approval Block
+### 7. Approval Block
 ```
 Approved by: ________________  Date: __________
 Approval covers: Phase 1 only | Full plan
@@ -79,7 +60,6 @@ Approval covers: Phase 1 only | Full plan

 ## Present

-Present a summary of the brief and **stop — write nothing further until
-the user explicitly approves** (use plan mode if the session supports
-it). This gate is the human-in-the-loop control point; "no objection" is
-not approval.
+Enter **plan mode** and present a summary of the brief. Do NOT proceed to any
+transformation until the user explicitly approves. This gate is the
+human-in-the-loop control point.
--- a/plugins/code-modernization/commands/modernize-extract-rules.md
+++ b/plugins/code-modernization/commands/modernize-extract-rules.md
@@ -46,7 +46,7 @@ Merge the three result sets. Deduplicate. For each distinct rule, write a
  When  <trigger>
  Then  <outcome>
  [And  <additional outcome>]
-**Parameters:** <constants, rates, thresholds with their current values — credentials masked: `<credential — masked, see file:line>`>
+**Parameters:** <constants, rates, thresholds with their current values>
 **Edge cases handled:** <list>
 **Suspected defect:** <optional — legacy behavior that looks wrong; decide preserve-vs-fix during transform>
 **Confidence:** High | Medium | Low — <why; if < High, state the exact SME question>
--- a/plugins/code-modernization/commands/modernize-harden.md
+++ b/plugins/code-modernization/commands/modernize-harden.md
@@ -1,42 +1,14 @@
 ---
 description: Security vulnerability scan with a reviewable remediation patch — OWASP, CWE, CVE, secrets, injection
-argument-hint: <system-dir> [--show-secrets]
+argument-hint: <system-dir>
 ---

-Run a **security hardening pass** on the legacy system: find
-vulnerabilities, rank them, and produce a reviewable patch for the
-critical ones. Parse arguments flag-independently: the system dir
-(referred to as `$1` below) is the first non-flag token in `$ARGUMENTS`;
-`--show-secrets` may appear anywhere.
+Run a **security hardening pass** on `legacy/$1`: find vulnerabilities, rank
+them, and produce a reviewable patch for the critical ones.

 This command never edits `legacy/` — it writes findings and a proposed patch
 to `analysis/$1/`. The user reviews and applies (or not).

-## Step 0 — Secrets quarantine setup
-
-Findings files get shared, committed, and pasted into decks — discovered
-credential values must never land in them. Before any scanning:
-
-1. Ensure `analysis/.gitignore` exists and contains the lines
-   `SECRETS.local.md` and `*.local.patch`. Create the file or append the
-   missing lines.
-2. If the project is a git repo, verify with
-   `git check-ignore -q analysis/$1/SECRETS.local.md` — if that exits
-   non-zero, fix the ignore rule before proceeding. Do not write any
-   findings until this check passes.
-3. **If there is no git repo** (check for `.svn`/`.hg`/`CVS` too — a
-   `.gitignore` protects nothing under another VCS): refuse
-   `--show-secrets`, and write `SECRETS.local.md` and any `.local.patch`
-   file to `~/.modernize/$1/` instead of the project tree, telling the
-   user where they went and why.
-
-All secret values in every shareable artifact this command produces are
-**masked** (`AKIA****`, `password=****`) and cited by `file:line`. Raw
-values may appear in exactly two places, both gitignored: the
-`*.local.patch` remediation hunks (unavoidably — see Remediate) and, only
-with `--show-secrets`, `SECRETS.local.md`. Never in SECURITY_FINDINGS.md
-or patch commentary.
-
 ## Scan

 Spawn the **security-auditor** subagent:
@@ -48,9 +20,7 @@ hardcoded secrets, vulnerable dependency versions, missing input validation,
 path traversal. For each finding return: CWE ID, severity
 (Critical/High/Med/Low), file:line, one-sentence exploit scenario, and
 recommended fix. Run any available SAST tooling (npm audit, pip-audit,
-OWASP dependency-check) and include its raw output. Mask every discovered
-credential value per your secret-handling rules — file:line plus a 2–4
-character masked preview, never the value itself."
+OWASP dependency-check) and include its raw output."

 ## Triage

@@ -59,50 +29,26 @@ Write `analysis/$1/SECURITY_FINDINGS.md`:
 - Findings table sorted by severity
 - Dependency CVE table (package, installed version, CVE, fixed version)

-If any hardcoded credentials were found, also write
-`analysis/$1/SECRETS.local.md` (the gitignored quarantine file from Step 0):
-one row per credential — masked preview, `file:line`, credential type, what
-it appears to grant access to, production/test guess, and a rotation
-recommendation. With `--show-secrets`, append the raw value column here —
-this file only. SECURITY_FINDINGS.md gets a one-line pointer:
-"N hardcoded credentials found — inventory in SECRETS.local.md (gitignored;
-not for sharing)."
-
 ## Remediate

 For each **Critical** and **High** finding, draft a minimal, targeted fix.
-Do **not** edit `legacy/` — write fixes as unified diffs with **paths
-relative to the project root** (`legacy/$1/...`), applied from the project
-root, with a comment line above each hunk citing the finding ID it
-addresses (`# SEC-001: parameterize the query`).
-
-**Credential findings split into two files.** A diff that removes a
-hardcoded secret necessarily contains the raw value on its `-` and
-context lines — that cannot go in the shareable patch:
-
- `analysis/$1/security_remediation.patch` (shareable) — every
-  non-credential hunk, plus for each credential finding a comment-only
-  placeholder: `# SEC-NNN: credential remediation — hunk in
-  security_remediation.local.patch (gitignored; not for sharing)`.
- `analysis/$1/security_remediation.local.patch` (gitignored in Step 0) —
-  the real, applyable hunks for credential findings only.
+Do **not** edit `legacy/` — write all fixes as a single unified diff to
+`analysis/$1/security_remediation.patch`, with a comment line above each
+hunk citing the finding ID it addresses (`# SEC-001: parameterize the query`).

 Add a **Remediation Log** section to SECURITY_FINDINGS.md mapping each
-finding ID → one-line summary of the proposed fix and which patch file
-carries the hunk.
+finding ID → one-line summary of the proposed fix and the patch hunk that
+implements it.

 ## Verify

-Spawn the **security-auditor** again to **review both patches** against
-the original code:
+Spawn the **security-auditor** again to **review the patch** against the
+original code:

-"Review analysis/$1/security_remediation.patch and
-analysis/$1/security_remediation.local.patch against legacy/$1. For each
+"Review analysis/$1/security_remediation.patch against legacy/$1. For each
 hunk: does it fully remediate the cited finding? Does it introduce new
-vulnerabilities or change behavior beyond the fix? Confirm no raw
-credential values appear anywhere in the shareable patch. Return one
-verdict per hunk: RESOLVES / PARTIAL / INTRODUCES-RISK, with a one-line
-reason."
+vulnerabilities or change behavior beyond the fix? Return one verdict per
+hunk: RESOLVES / PARTIAL / INTRODUCES-RISK, with a one-line reason."

 Add a **Patch Review** section to SECURITY_FINDINGS.md with the verdicts.
 If any hunk is PARTIAL or INTRODUCES-RISK, revise the patch and re-review.
@@ -111,12 +57,8 @@ If any hunk is PARTIAL or INTRODUCES-RISK, revise the patch and re-review.

 Tell the user the artifacts are ready:
 - `analysis/$1/SECURITY_FINDINGS.md` — findings, remediation log, patch review
- `analysis/$1/security_remediation.patch` — review, then apply **from the
-  project root**: `git apply analysis/$1/security_remediation.patch`
-  (if `legacy/$1` is a symlink, use `git apply --unsafe-paths` or apply
-  with `patch -p0` from the project root)
- `analysis/$1/security_remediation.local.patch` — the credential fixes;
-  apply the same way, and rotate the affected credentials regardless
+- `analysis/$1/security_remediation.patch` — review, then apply if appropriate
+  with `git -C legacy/$1 apply ../../analysis/$1/security_remediation.patch`
 - Re-run `/modernize-harden $1` after applying to confirm resolution

 Suggest: `glow -p analysis/$1/SECURITY_FINDINGS.md`
--- a/plugins/code-modernization/commands/modernize-map.md
+++ b/plugins/code-modernization/commands/modernize-map.md
@@ -55,124 +55,50 @@ re-run and audited. Have it write a machine-readable
 `analysis/$1/topology.json` and print a human summary. Run it; show the
 summary (cap at ~200 lines for very large estates).

-`topology.json` must follow this schema — it feeds the interactive viewer:
-
-```json
-{
-  "system": "<display name>",
-  "root": {
-    "id": "sys", "name": "<system>", "kind": "system",
-    "children": [
-      { "id": "dom:<domain>", "name": "<Domain>", "kind": "domain",
-        "children": [
-          { "id": "<MODULE>", "name": "<MODULE>", "kind": "module",
-            "language": "cobol", "loc": 1234, "file": "src/MODULE.cbl" }
-        ] },
-      { "id": "dom:data", "name": "Data stores", "kind": "domain",
-        "children": [
-          { "id": "ds:<NAME>", "name": "<NAME>", "kind": "datastore" }
-        ] }
-    ]
-  },
-  "edges": [
-    { "source": "<id>", "target": "<id>", "kind": "call" }
-  ],
-  "entryPoints": ["<id>", "..."],
-  "deadEnds": ["<id>", "..."],
-  "observations": ["<architect observation>", "..."],
-  "flows": [
-    { "name": "<business flow>", "persona": "<who experiences it>",
-      "description": "<one sentence, plain language>",
-      "steps": [
-        { "label": "<business-language step>", "nodes": ["<id>", "<id>"] }
-      ] }
-  ]
-}
-```
-
- Group leaf modules under `domain` containers (use the domains from
-  `/modernize-assess` if available). Leaf kinds: `module`, `datastore`,
-  `job`, `screen`. `loc` drives circle size — include it for modules.
- Edge kinds: `call` (direct), `dispatch` (dynamic/router), `read`,
-  `write`. Every edge endpoint must be a leaf id that exists in the tree.
- `deadEnds`: the dead-end candidates from the extraction, rendered with
-  a dashed outline in the viewer. Apply the suppression rules above —
-  anything that could be the target of an unresolved dynamic call does
-  NOT belong here; record that uncertainty in `observations` instead.
- **Datastore ids and names must be logical identifiers** — DD name,
-  dataset name, table/schema name, at most host:port. If the resolved
-  config value is a URL or DSN, strip userinfo and credential query
-  params before it goes anywhere in topology.json: the file gets
-  committed and the viewer displays names verbatim. Never copy raw
-  config values into `observations`.
- `observations`: 3–7 architect observations — tight coupling clusters,
-  single points of failure, service-extraction candidates, data stores
-  with too many writers, dispatch targets the extraction could not
-  resolve.
- `flows` is the **persona walkthrough** section — see below.
-
-## Persona flows
-
-Trace **2–4 end-to-end business flows**, each anchored to a persona —
-the people who experience the system, not the people who maintain it
-(e.g. for a benefits system: the claimant, the caseworker, the auditor;
-for billing: the customer, the billing operator). For each flow:
-
- `name` + one-sentence `description` in plain business language —
-  something a steering committee member relates to ("a claimant files a
-  weekly claim"), not a data-flow label ("CLM batch ingest").
- `steps`: 3–8 steps, each with a business-language `label` and the
-  `nodes` (programs + data stores) that implement that step, in
-  execution order.
-
-This is the bridge between the technical map and non-technical
-stakeholders: the same diagram answers "which program does X" for
-engineers and "what happens when someone files a claim" for everyone else.
-
 ## Render

-`analysis/$1/TOPOLOGY.html` is an **interactive map**: a zoomable
-circle-pack of the whole system (domains as containers, modules sized by
-LOC) with dependency edges, search, per-node detail sidebar, edge-kind
-toggles, and a flow-walkthrough mode that plays each persona flow as a
-numbered path. Build it from the template that ships with this plugin —
-do not hand-write the viewer:
+From the extracted data, generate **three Mermaid diagrams** and write them
+to `analysis/$1/TOPOLOGY.html` as a self-contained page that renders in any
+browser.

-```bash
-python3 - "${CLAUDE_PLUGIN_ROOT}/assets/topology-viewer.html" analysis/$1 <<'EOF'
-import json, sys
-tpl_path, out_dir = sys.argv[1], sys.argv[2]
-tpl = open(tpl_path).read()
-marker = "/*__TOPOLOGY_DATA__*/ null"
-assert marker in tpl, f"injection marker not found in {tpl_path}"
-data = json.dumps(json.load(open(f"{out_dir}/topology.json")))
-open(f"{out_dir}/TOPOLOGY.html", "w").write(
-    tpl.replace(marker, "/*__TOPOLOGY_DATA__*/ " + data))
-print(f"wrote {out_dir}/TOPOLOGY.html")
-EOF
+The HTML page must use: dark `#1e1e1e` background, `#d4d4d4` text,
+`#cc785c` for `<h2>`/accents, `system-ui` font, all CSS **inline** (no
+external stylesheets). Load Mermaid from a CDN in `<head>`:
+
+```html
+<script type="module">
+  import mermaid from 'https://cdn.jsdelivr.net/npm/mermaid@11/dist/mermaid.esm.min.mjs';
+  mermaid.initialize({ startOnLoad: true, theme: 'dark' });
+</script>
 ```

-The viewer is fully self-contained (the d3 subset it needs is inlined in
-the template) — it works offline and on air-gapped networks. If the
-`python3` invocation fails to find the template,
-`${CLAUDE_PLUGIN_ROOT}` was not substituted — report that rather than
-hand-writing a viewer.
+Each diagram goes in a `<pre class="mermaid">...</pre>` block. Do **not**
+wrap diagrams in markdown ` ``` ` fences inside the HTML.

-Mermaid stays for **small, exportable** diagrams. Generate standalone
-`.mmd` files for reuse in docs and PRs — but keep each under ~40 edges;
-collapse to domain level if the full graph is bigger (dense Mermaid
-becomes unreadable, which is exactly what the interactive map is for):
+1. **`graph TD` — Module call graph.** Cluster by domain (use `subgraph`).
+   Highlight entry points in a distinct style. Cap at ~40 nodes — if larger,
+   show domain-level with one expanded domain.

- `analysis/$1/call-graph.mmd` — domain-level `graph TD`, entry points
-  highlighted
- `analysis/$1/data-lineage.mmd` — `graph LR`, programs → data stores,
-  read vs write marked
- `analysis/$1/critical-path.mmd` — `flowchart TD` of the primary flow
-  from `flows`, annotated with p50/p99 wall-clock if telemetry is
-  available (see `/modernize-assess` Step 4)
+2. **`graph LR` — Data lineage.** Programs → data stores.
+   Mark read vs write edges.
+
+3. **`flowchart TD` — Critical path.** Trace ONE end-to-end business flow
+   (e.g., "monthly billing run" or "process payment") through every program
+   and data store it touches, in execution order. If production telemetry is
+   available (see `/modernize-assess` Step 4), annotate each step with its
+   p50/p99 wall-clock.
+
+Also export the three diagrams as standalone `.mmd` files for re-use:
+`analysis/$1/call-graph.mmd`, `analysis/$1/data-lineage.mmd`,
+`analysis/$1/critical-path.mmd`.
+
+## Annotate
+
+Below each `<pre class="mermaid">` block in TOPOLOGY.html, add a `<ul>`
+with 3-5 **architect observations**: tight coupling clusters, single
+points of failure, candidates for service extraction, data stores
+touched by too many writers.

 ## Present

-Tell the user to open `analysis/$1/TOPOLOGY.html` in a browser, and to
-try: search for a module, click it to see its connections, and pick a
-persona flow from the walkthrough dropdown.
+Tell the user to open `analysis/$1/TOPOLOGY.html` in a browser.
--- a/plugins/code-modernization/commands/modernize-preflight.md
+++ b/plugins/code-modernization/commands/modernize-preflight.md
@@ -1,98 +0,0 @@
---
-description: Environment readiness check — analysis tools, build toolchain, source completeness, telemetry access
-argument-hint: <system-dir> [target-stack]
---
-
-Check whether this environment is ready to analyze — and eventually
-transform — `legacy/$1`, and tell the user exactly what to fix before the
-other commands run into it. Modernization sessions fail late and
-confusingly when this isn't done: assessment metrics silently degrade
-without analysis tools, characterization tests can't run without a build
-toolchain, and dependency maps come out wrong when half the source isn't
-in the tree.
-
-Run every check even when an early one fails — the point is one complete
-readiness report, not the first error.
-
-## Check 1 — Detect the stack
-
-Fingerprint `legacy/$1` from file extensions and manifests: languages,
-build system, deployment/config descriptors. This drives which checks
-below apply. Report what was detected and the rough file split.
-
-## Check 2 — Analysis tooling
-
-For each, check availability (`command -v`) and report version, what it's
-used for, and what degrades without it:
-
-| Tool | Used by | Without it |
-|---|---|---|
-| `scc` (or `cloc`) | assess | LOC/complexity fall back to `find`+`wc`; COCOMO estimate gets coarser |
-| `lizard` | assess --portfolio | complexity estimated from decision-keyword counts |
-| `glow` | all | markdown artifacts render as plain text |
-| `delta` | transform | side-by-side diffs fall back to `diff -y` |
-
-Include the platform's install one-liner for anything missing
-(`brew install scc`, `apt install cloc`, `pip install lizard`, …).
-
-## Check 3 — Build toolchain (smoke test, not just presence)
-
-Identify the compiler/interpreter for the detected legacy stack — e.g.
-GnuCOBOL (`cobc`) for COBOL, JDK + Maven/Gradle for Java, `cc`/`make` for
-C, `dotnet` for .NET. Then **prove it works on this codebase**: pick one
-representative source file and run a syntax-only compile
-(`cobc -fsyntax-only`, `javac`, `gcc -fsyntax-only`, …).
-
-A failed smoke test is the most valuable output of this command — report
-the actual error and diagnose it: missing copybook/include path, missing
-dialect flag (`-std=ibm` etc.), fixed vs free format, missing dependency
-jar. These are the errors that otherwise surface mid-`/modernize-transform`
-with much less context.
-
-If the user passed a `[target-stack]`, do the same for it: runtime,
-package manager, test framework (`mvn -v`, `npm -v`, `pytest --version`, …).
-
-## Check 4 — Source completeness
-
-The dependency map is only as good as what's in the tree. Check for the
-detected stack's equivalents of:
-
- **Referenced-but-missing includes** — copybooks (`COPY X` with no
-  `X.cpy`), headers, imports that resolve nowhere. Count and list the top
-  missing names.
- **Deployment/config descriptors** — JCL for batch COBOL, CICS CSD
-  definitions, `web.xml`/route configs, cron/scheduler definitions.
-  Without these, entry-point detection and the code↔storage join in
-  `/modernize-map` are guesswork.
- **Data definitions** — DDL, schemas, copybook record layouts, ORM
-  mappings.
- **Binary-only artifacts** — load modules, jars, DLLs with no matching
-  source. These become unmappable black boxes; flag them now.
-
-## Check 5 — Optional context
-
- **Production telemetry** — is an observability/APM MCP server connected,
-  or are batch job logs / runtime exports available? (Enables the runtime
-  overlay in `/modernize-assess` Step 4 and timing annotations in
-  `/modernize-map`.)
- **Version control history** — is `legacy/$1` under git with meaningful
-  history? (Change-frequency data sharpens risk ranking.)
-
-## Report
-
-Write `analysis/$1/PREFLIGHT.md`: a status table — one row per check,
-status ✅ / ⚠️ / ❌, what was found, and the fix for anything not green —
-followed by a **Ready / Ready-with-gaps / Not ready** verdict per command:
-
- `assess` + `map` + `extract-rules` — need Checks 1–2 green-ish and
-  Check 4's missing-include count low
- `brief` — needs only the three discovery artifacts; no tooling
- `transform` + `reimagine` — additionally need Check 3 green for the
-  **target** stack. A red legacy toolchain downgrades these to
-  Ready-with-gaps, not Not-ready: equivalence testing falls back to
-  recorded traces / golden-master fixtures instead of dual execution
-  (common and expected for CICS/IMS code that has no local runtime)
- `harden` — needs Check 2 plus any stack-specific SAST tooling found
-
-Print the table in the session too, and end with the single most
-important fix if anything is red.
--- a/plugins/code-modernization/commands/modernize-reimagine.md
+++ b/plugins/code-modernization/commands/modernize-reimagine.md
@@ -3,11 +3,7 @@ description: Multi-agent greenfield rebuild — extract specs from legacy, desig
 argument-hint: <system-dir> <target-vision>
 ---

-The first token of `$ARGUMENTS` is the system dir (`$1`); **everything
-after it is the target vision** — it is usually multiple words, so do not
-truncate it to one token. Below, `<vision>` means that full remainder.
-
-**Reimagine** `legacy/$1` as: <vision>
+**Reimagine** `legacy/$1` as: $2

 This is not a port — it's a rebuild from extracted intent. The legacy system
 becomes the *specification source*, not the structural template. This command
@@ -23,8 +19,7 @@ Spawn concurrently and show the user that all three are running:
 2. **legacy-analyst** — "Catalog every external interface of legacy/$1:
   inbound (screens, APIs, batch triggers, queues) and outbound (reports,
   files, downstream calls, DB writes). For each: name, direction, payload
-   shape, frequency/SLA if discernible. Mask any credential embedded in
-   endpoints or payload examples per your secret-handling rules."
+   shape, frequency/SLA if discernible."

 3. **legacy-analyst** — "Identify the core domain entities in legacy/$1 and
   their relationships. Return as an entity list + Mermaid erDiagram."
@@ -37,9 +32,6 @@ Collect results. Write `analysis/$1/AI_NATIVE_SPEC.md` containing:
 - **Non-functional requirements** inferred from legacy (batch windows, volumes)
 - **Behavior Contract** (the Given/When/Then rules — these are the acceptance tests)

-Credential values are masked everywhere in the spec; connection details
-appear as env-var placeholders (`${DATABASE_URL}`), never literals.
-
 ## Phase B — HITL checkpoint #1

 Present the spec summary. Ask the user **one focused question**: "Which of
@@ -48,21 +40,20 @@ should deliberately drop?" Wait for the answer. Record it in the spec.

 ## Phase C — Architecture (single agent, then critique)

-Design the target architecture for "<vision>":
+Design the target architecture for "$2":
 - Mermaid C4 Container diagram
 - Service boundaries with rationale (which rules/entities live where)
 - Technology choices with one-line justification each
 - Data migration approach from legacy stores

 Then spawn **architecture-critic**: "Review this proposed architecture for
-<vision> against the spec in analysis/$1/AI_NATIVE_SPEC.md. Identify over-engineering,
+$2 against the spec in analysis/$1/AI_NATIVE_SPEC.md. Identify over-engineering,
 missed requirements, scaling risks, and simpler alternatives." Incorporate
 the critique. Write the result to `analysis/$1/REIMAGINED_ARCHITECTURE.md`.

 ## Phase D — HITL checkpoint #2

-Present the architecture and **stop — scaffold nothing until the user
-explicitly approves** (use plan mode if the session supports it).
+Enter plan mode. Present the architecture. Wait for approval.

 ## Phase E — Parallel scaffolding

@@ -74,9 +65,7 @@ in parallel**:
 and AI_NATIVE_SPEC.md. Create: project skeleton, domain model, API stubs
 matching the interface contracts, and **executable acceptance tests** for every
 behavior-contract rule assigned to this service (mark unimplemented ones as
-expected-failure/skip with the rule ID). No credential literal from legacy
-code becomes a test fixture or config default — use fake same-shape values
-and env-var placeholders. Write to modernized/$1-reimagined/<service-name>/."
+expected-failure/skip with the rule ID). Write to modernized/$1-reimagined/<service-name>/."

 Show the agents' progress. When all complete, run the acceptance test suites
 and report: total tests, passing (scaffolded behavior), pending (rule IDs
@@ -88,9 +77,7 @@ Write `modernized/$1-reimagined/CLAUDE.md` — the persistent context file for
 the new system, containing: architecture summary, service responsibilities,
 where the spec lives, how to run tests, and the legacy→modern traceability
 map. This file IS the knowledge graph that future agents and engineers will
-load — and it gets committed: connection details and credentials appear
-only as env-var names with a pointer to where they're provisioned, never
-as values.
+load.

 Report: services scaffolded, acceptance tests defined, % behaviors with a
 home, location of all artifacts.
--- a/plugins/code-modernization/commands/modernize-status.md
+++ b/plugins/code-modernization/commands/modernize-status.md
@@ -1,54 +0,0 @@
---
-description: Where am I in the modernization workflow — artifact inventory, staleness, secrets hygiene, next step
-argument-hint: <system-dir>
---
-
-Report where the modernization of `$1` stands, in one screen. This is a
-read-only command — inspect, never modify.
-
-## 1 — Artifact inventory
-
-Check `analysis/$1/` and `modernized/$1*/` and build a table — one row per
-workflow stage, with the artifact's presence and modification time:
-
-| Stage | Artifacts |
-|---|---|
-| preflight | `PREFLIGHT.md` |
-| assess | `ASSESSMENT.md`, `ARCHITECTURE.mmd` |
-| map | `topology.json`, `TOPOLOGY.html`, `*.mmd`, `extract_topology.*` |
-| extract-rules | `BUSINESS_RULES.md`, `DATA_OBJECTS.md` |
-| brief | `MODERNIZATION_BRIEF.md` (note whether the approval block is signed) |
-| harden | `SECURITY_FINDINGS.md`, `security_remediation.patch` |
-| transform / reimagine | each `modernized/$1*/<module>/` dir — note test presence and whether `TRANSFORMATION_NOTES.md` exists |
-
-## 2 — Staleness
-
-Flag any artifact older than an upstream artifact it derives from:
-
- `MODERNIZATION_BRIEF.md` older than `ASSESSMENT.md`, `topology.json`,
-  or `BUSINESS_RULES.md` → the brief no longer reflects discovery;
-  recommend re-running `/modernize-brief`.
- `TOPOLOGY.html` older than `topology.json` → re-run the injection step
-  from `/modernize-map`.
- Any `TRANSFORMATION_NOTES.md` older than `BUSINESS_RULES.md` → the
-  module may not implement the latest rule set; list which.
-
-## 3 — Secrets hygiene
-
- Does `analysis/.gitignore` exist and cover `SECRETS.local.md` /
-  `*.local.patch`? (`git check-ignore` when in a git repo.)
- If `SECRETS.local.md` exists: confirm it is NOT tracked
-  (`git ls-files --error-unmatch`, expect failure) and has never been
-  committed (`git log --all --oneline -- <path>`, expect empty). If
-  either check fails, say so prominently and recommend rotation plus
-  history scrubbing.
-
-## 4 — Verdict
-
-End with three lines:
- **Where you are** — the furthest completed stage and roughly how much
-  of the system it covers (e.g. "mapped 100%, 2 of 14 modules
-  transformed").
- **What's stale** — or "nothing".
- **Next command** — the single most useful next step, with a one-line
-  reason.
--- a/plugins/code-modernization/commands/modernize-transform.md
+++ b/plugins/code-modernization/commands/modernize-transform.md
@@ -9,37 +9,10 @@ equivalence.
 This is a surgical, single-module transformation — one vertical slice of the
 strangler fig. Output goes to `modernized/$1/$2/`.

-## Step 0a — Toolchain check (fail fast on target, adapt on legacy)
-
-Verify the build environment **before** planning, not when the tests
-first run:
-
- **Target stack ($3) — required.** Runtime, package manager, and test
-  framework all respond (`java -version` + `mvn -v`, `node -v` + `npm -v`,
-  `python3 -V` + `pytest --version`, …). If any are missing, stop and
-  report what to install — the new code and its tests cannot run without
-  them, so a plan gate now would just defer the failure an hour. Suggest
-  `/modernize-preflight $1 $3` for the full readiness report.
- **Legacy stack — advisory, never a blocker.** Try a syntax-only compile
-  of the module being transformed (e.g. `cobc -fsyntax-only`). Legacy
-  code often *cannot* build locally by nature, not by misconfiguration —
-  CICS/IMS programs have no local translator, and the real runtime may be
-  a mainframe you don't have. A failed or impossible legacy compile does
-  **not** stop the transform; it changes the equivalence strategy:
-  - dual-execution proof is off the table — characterization tests
-    assert against **recorded traces / golden-master fixtures** (real
-    production outputs, captured reports/screens, SME-confirmed
-    examples) instead of live legacy runs
-  - say so explicitly in the Step 0b plan and later in
-    TRANSFORMATION_NOTES.md ("equivalence is trace-based; legacy was not
-    executable in this environment"), so reviewers know the strength of
-    the proof they're approving
-
-## Step 0b — Plan (HITL gate)
+## Step 0 — Plan (HITL gate)

 Read the source module and any business rules in `analysis/$1/BUSINESS_RULES.md`
-that reference it. Then present the plan and **stop — write no code until
-the user explicitly approves** (use plan mode if the session supports it):
+that reference it. Then **enter plan mode** and present:
 - Which source files are in scope
 - The target module structure (packages/classes/files you'll create)
 - Which business rules / behaviors this module implements
@@ -57,9 +30,7 @@ identify every observable behavior, and encode each as a test case with
 concrete input → expected output pairs derived from the legacy logic.
 Target framework: <appropriate for $3>. Write to
 `modernized/$1/$2/src/test/`. These tests define 'done' — the new code
-must pass all of them. Follow your secret-handling rules: no credential
-literal from legacy code becomes a fixture; substitute fake same-shape
-values and read anything genuinely live from environment variables."
+must pass all of them."

 Show the user the test file. Get a 👍 before proceeding.

@@ -97,10 +68,6 @@ Then show a visual diff of one representative behavior, legacy vs modern:
 ```bash
 delta --side-by-side <(sed -n '<lines>p' legacy/$1/<file>) modernized/$1/$2/src/main/<file>
 ```
-(Fall back to `diff -y --width=160` if `delta` isn't installed.) Never
-pick a credential-bearing line range for this diff, and mask any
-credential-like literal quoted in TRANSFORMATION_NOTES.md — the notes
-live in `modernized/` and get committed.

 ## Step 5 — Architecture review

--- a/plugins/frontend-design/skills/frontend-design/LICENSE.txt
+++ b/plugins/frontend-design/skills/frontend-design/LICENSE.txt
@@ -1,177 +0,0 @@
-
-                                 Apache License
-                           Version 2.0, January 2004
-                        http://www.apache.org/licenses/
-
-   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
-
-   1. Definitions.
-
-      "License" shall mean the terms and conditions for use, reproduction,
-      and distribution as defined by Sections 1 through 9 of this document.
-
-      "Licensor" shall mean the copyright owner or entity authorized by
-      the copyright owner that is granting the License.
-
-      "Legal Entity" shall mean the union of the acting entity and all
-      other entities that control, are controlled by, or are under common
-      control with that entity. For the purposes of this definition,
-      "control" means (i) the power, direct or indirect, to cause the
-      direction or management of such entity, whether by contract or
-      otherwise, or (ii) ownership of fifty percent (50%) or more of the
-      outstanding shares, or (iii) beneficial ownership of such entity.
-
-      "You" (or "Your") shall mean an individual or Legal Entity
-      exercising permissions granted by this License.
-
-      "Source" form shall mean the preferred form for making modifications,
-      including but not limited to software source code, documentation
-      source, and configuration files.
-
-      "Object" form shall mean any form resulting from mechanical
-      transformation or translation of a Source form, including but
-      not limited to compiled object code, generated documentation,
-      and conversions to other media types.
-
-      "Work" shall mean the work of authorship, whether in Source or
-      Object form, made available under the License, as indicated by a
-      copyright notice that is included in or attached to the work
-      (an example is provided in the Appendix below).
-
-      "Derivative Works" shall mean any work, whether in Source or Object
-      form, that is based on (or derived from) the Work and for which the
-      editorial revisions, annotations, elaborations, or other modifications
-      represent, as a whole, an original work of authorship. For the purposes
-      of this License, Derivative Works shall not include works that remain
-      separable from, or merely link (or bind by name) to the interfaces of,
-      the Work and Derivative Works thereof.
-
-      "Contribution" shall mean any work of authorship, including
-      the original version of the Work and any modifications or additions
-      to that Work or Derivative Works thereof, that is intentionally
-      submitted to Licensor for inclusion in the Work by the copyright owner
-      or by an individual or Legal Entity authorized to submit on behalf of
-      the copyright owner. For the purposes of this definition, "submitted"
-      means any form of electronic, verbal, or written communication sent
-      to the Licensor or its representatives, including but not limited to
-      communication on electronic mailing lists, source code control systems,
-      and issue tracking systems that are managed by, or on behalf of, the
-      Licensor for the purpose of discussing and improving the Work, but
-      excluding communication that is conspicuously marked or otherwise
-      designated in writing by the copyright owner as "Not a Contribution."
-
-      "Contributor" shall mean Licensor and any individual or Legal Entity
-      on behalf of whom a Contribution has been received by Licensor and
-      subsequently incorporated within the Work.
-
-   2. Grant of Copyright License. Subject to the terms and conditions of
-      this License, each Contributor hereby grants to You a perpetual,
-      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
-      copyright license to reproduce, prepare Derivative Works of,
-      publicly display, publicly perform, sublicense, and distribute the
-      Work and such Derivative Works in Source or Object form.
-
-   3. Grant of Patent License. Subject to the terms and conditions of
-      this License, each Contributor hereby grants to You a perpetual,
-      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
-      (except as stated in this section) patent license to make, have made,
-      use, offer to sell, sell, import, and otherwise transfer the Work,
-      where such license applies only to those patent claims licensable
-      by such Contributor that are necessarily infringed by their
-      Contribution(s) alone or by combination of their Contribution(s)
-      with the Work to which such Contribution(s) was submitted. If You
-      institute patent litigation against any entity (including a
-      cross-claim or counterclaim in a lawsuit) alleging that the Work
-      or a Contribution incorporated within the Work constitutes direct
-      or contributory patent infringement, then any patent licenses
-      granted to You under this License for that Work shall terminate
-      as of the date such litigation is filed.
-
-   4. Redistribution. You may reproduce and distribute copies of the
-      Work or Derivative Works thereof in any medium, with or without
-      modifications, and in Source or Object form, provided that You
-      meet the following conditions:
-
-      (a) You must give any other recipients of the Work or
-          Derivative Works a copy of this License; and
-
-      (b) You must cause any modified files to carry prominent notices
-          stating that You changed the files; and
-
-      (c) You must retain, in the Source form of any Derivative Works
-          that You distribute, all copyright, patent, trademark, and
-          attribution notices from the Source form of the Work,
-          excluding those notices that do not pertain to any part of
-          the Derivative Works; and
-
-      (d) If the Work includes a "NOTICE" text file as part of its
-          distribution, then any Derivative Works that You distribute must
-          include a readable copy of the attribution notices contained
-          within such NOTICE file, excluding those notices that do not
-          pertain to any part of the Derivative Works, in at least one
-          of the following places: within a NOTICE text file distributed
-          as part of the Derivative Works; within the Source form or
-          documentation, if provided along with the Derivative Works; or,
-          within a display generated by the Derivative Works, if and
-          wherever such third-party notices normally appear. The contents
-          of the NOTICE file are for informational purposes only and
-          do not modify the License. You may add Your own attribution
-          notices within Derivative Works that You distribute, alongside
-          or as an addendum to the NOTICE text from the Work, provided
-          that such additional attribution notices cannot be construed
-          as modifying the License.
-
-      You may add Your own copyright statement to Your modifications and
-      may provide additional or different license terms and conditions
-      for use, reproduction, or distribution of Your modifications, or
-      for any such Derivative Works as a whole, provided Your use,
-      reproduction, and distribution of the Work otherwise complies with
-      the conditions stated in this License.
-
-   5. Submission of Contributions. Unless You explicitly state otherwise,
-      any Contribution intentionally submitted for inclusion in the Work
-      by You to the Licensor shall be under the terms and conditions of
-      this License, without any additional terms or conditions.
-      Notwithstanding the above, nothing herein shall supersede or modify
-      the terms of any separate license agreement you may have executed
-      with Licensor regarding such Contributions.
-
-   6. Trademarks. This License does not grant permission to use the trade
-      names, trademarks, service marks, or product names of the Licensor,
-      except as required for reasonable and customary use in describing the
-      origin of the Work and reproducing the content of the NOTICE file.
-
-   7. Disclaimer of Warranty. Unless required by applicable law or
-      agreed to in writing, Licensor provides the Work (and each
-      Contributor provides its Contributions) on an "AS IS" BASIS,
-      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
-      implied, including, without limitation, any warranties or conditions
-      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
-      PARTICULAR PURPOSE. You are solely responsible for determining the
-      appropriateness of using or redistributing the Work and assume any
-      risks associated with Your exercise of permissions under this License.
-
-   8. Limitation of Liability. In no event and under no legal theory,
-      whether in tort (including negligence), contract, or otherwise,
-      unless required by applicable law (such as deliberate and grossly
-      negligent acts) or agreed to in writing, shall any Contributor be
-      liable to You for damages, including any direct, indirect, special,
-      incidental, or consequential damages of any character arising as a
-      result of this License or out of the use or inability to use the
-      Work (including but not limited to damages for loss of goodwill,
-      work stoppage, computer failure or malfunction, or any and all
-      other commercial damages or losses), even if such Contributor
-      has been advised of the possibility of such damages.
-
-   9. Accepting Warranty or Additional Liability. While redistributing
-      the Work or Derivative Works thereof, You may choose to offer,
-      and charge a fee for, acceptance of support, warranty, indemnity,
-      or other liability obligations and/or rights consistent with this
-      License. However, in accepting such obligations, You may act only
-      on Your own behalf and on Your sole responsibility, not on behalf
-      of any other Contributor, and only if You agree to indemnify,
-      defend, and hold each Contributor harmless for any liability
-      incurred by, or claims asserted against, such Contributor by reason
-      of your accepting any such warranty or additional liability.
-
-   END OF TERMS AND CONDITIONS
--- a/plugins/frontend-design/skills/frontend-design/SKILL.md
+++ b/plugins/frontend-design/skills/frontend-design/SKILL.md
@@ -1,55 +1,42 @@
 ---
 name: frontend-design
-description: Guidance for distinctive, intentional visual design when building new UI or reshaping an existing one. Helps with aesthetic direction, typography, and making choices that don't read as templated defaults.
+description: Create distinctive, production-grade frontend interfaces with high design quality. Use this skill when the user asks to build web components, pages, or applications. Generates creative, polished code that avoids generic AI aesthetics.
 license: Complete terms in LICENSE.txt
 ---

-# Frontend Design
+This skill guides creation of distinctive, production-grade frontend interfaces that avoid generic "AI slop" aesthetics. Implement real working code with exceptional attention to aesthetic details and creative choices.

-Approach this as the design lead at a small studio known for giving every client a visual identity that could not be mistaken for anyone else's. This client has already rejected proposals that felt templated, and is paying for a distinctive point of view: make deliberate, opinionated choices about palette, typography, and layout that are specific to this brief, and take one real aesthetic risk you can justify.
+The user provides frontend requirements: a component, page, application, or interface to build. They may include context about the purpose, audience, or technical constraints.

-## Ground it in the subject
+## Design Thinking

-If the brief does not pin down what the product or subject is, pin it yourself before designing: name one concrete subject, its audience, and the page's single job, and state your choice. If there's any information in your memory about the human's preferences, context about what they're building, or designs you've made before – use that as a hint. The subject's own world, its materials, instruments, artifacts, and vernacular, is where distinctive choices come from. Build with the brief's real content and subject matter throughout.
+Before coding, understand the context and commit to a BOLD aesthetic direction:
+- **Purpose**: What problem does this interface solve? Who uses it?
+- **Tone**: Pick an extreme: brutally minimal, maximalist chaos, retro-futuristic, organic/natural, luxury/refined, playful/toy-like, editorial/magazine, brutalist/raw, art deco/geometric, soft/pastel, industrial/utilitarian, etc. There are so many flavors to choose from. Use these for inspiration but design one that is true to the aesthetic direction.
+- **Constraints**: Technical requirements (framework, performance, accessibility).
+- **Differentiation**: What makes this UNFORGETTABLE? What's the one thing someone will remember?

-## Design principles
+**CRITICAL**: Choose a clear conceptual direction and execute it with precision. Bold maximalism and refined minimalism both work - the key is intentionality, not intensity.

-For web designs, the hero is a thesis. Open with the most characteristic thing in the subject's world, in whatever form makes sense for it: a headline, an image, an animation, a live demo, an interactive moment. Be deliberate with your choice: a big number with a small label, supporting stats, and a gradient accent is the template answer, only use if that's truly the best option.
+Then implement working code (HTML/CSS/JS, React, Vue, etc.) that is:
+- Production-grade and functional
+- Visually striking and memorable
+- Cohesive with a clear aesthetic point-of-view
+- Meticulously refined in every detail

-Typography carries the personality of the page. Pair the display and body faces deliberately, not the same families you would reach for on any other project, and set a clear type scale with intentional weights, widths, and spacing. Make the type treatment itself a memorable part of the design, not a neutral delivery vehicle for the content.
+## Frontend Aesthetics Guidelines

-Structure is information. Structural devices, numbering, eyebrows, dividers, labels, should encode something true about the content, not decorate it. Many generic designs use numbered markers (01 / 02 / 03), but that's only appropriate if the content actually is a sequence - like a real process or a typed timeline where order carries information the reader needs. Question if choices like numbered markers actually make sense before incorporating them.
+Focus on:
+- **Typography**: Choose fonts that are beautiful, unique, and interesting. Avoid generic fonts like Arial and Inter; opt instead for distinctive choices that elevate the frontend's aesthetics; unexpected, characterful font choices. Pair a distinctive display font with a refined body font.
+- **Color & Theme**: Commit to a cohesive aesthetic. Use CSS variables for consistency. Dominant colors with sharp accents outperform timid, evenly-distributed palettes.
+- **Motion**: Use animations for effects and micro-interactions. Prioritize CSS-only solutions for HTML. Use Motion library for React when available. Focus on high-impact moments: one well-orchestrated page load with staggered reveals (animation-delay) creates more delight than scattered micro-interactions. Use scroll-triggering and hover states that surprise.
+- **Spatial Composition**: Unexpected layouts. Asymmetry. Overlap. Diagonal flow. Grid-breaking elements. Generous negative space OR controlled density.
+- **Backgrounds & Visual Details**: Create atmosphere and depth rather than defaulting to solid colors. Add contextual effects and textures that match the overall aesthetic. Apply creative forms like gradient meshes, noise textures, geometric patterns, layered transparencies, dramatic shadows, decorative borders, custom cursors, and grain overlays.

-Leverage motion deliberately. Think about where and if animation can serve the subject: a page-load sequence, a scroll-triggered reveal, hover micro-interactions, ambient atmosphere. An orchestrated moment usually lands harder than scattered effects; choose what the direction calls for. However, sometimes less is more, and extra animation contributes to the feeling that the design is AI-generated.
+NEVER use generic AI-generated aesthetics like overused font families (Inter, Roboto, Arial, system fonts), cliched color schemes (particularly purple gradients on white backgrounds), predictable layouts and component patterns, and cookie-cutter design that lacks context-specific character.

-Match complexity to the vision. Maximalist directions need elaborate execution; minimal directions need precision in spacing, type, and detail. Elegance is executing the chosen vision well.
+Interpret creatively and make unexpected choices that feel genuinely designed for the context. No design should be the same. Vary between light and dark themes, different fonts, different aesthetics. NEVER converge on common choices (Space Grotesk, for example) across generations.

-Consider written content carefully. Often a design brief may not contain real content, and it's up to you to come up with copy. Copy can make a design feel as templated as the design itself. See the below section on writing for more guidance.
+**IMPORTANT**: Match implementation complexity to the aesthetic vision. Maximalist designs need elaborate code with extensive animations and effects. Minimalist or refined designs need restraint, precision, and careful attention to spacing, typography, and subtle details. Elegance comes from executing the vision well.

-## Process: brainstorm, explore, plan, critique, build, critique again
-
-For calibration: AI-generated design right now clusters around three looks: (1) a warm cream background (near #F4F1EA) with a high-contrast serif display and a terracotta accent; (2) a near-black background with a single bright acid-green or vermilion accent; (3) a broadsheet-style layout with hairline rules, zero border-radius, and dense newspaper-like columns. All three are legitimate for some briefs, but they are defaults rather than choices, and they appear regardless of subject. Where the brief pins down a visual direction, follow it exactly — the brief's own words always win, including when it asks for one of these looks. Where it leaves an axis free, don't spend that freedom on one of these defaults. Just like a human designer who's hired, there's often a careful balance between doing what you're good at and taking each project as a chance to experiment and learn.
-
-Work in two passes. First, brainstorm a short design plan based on the human's design brief: create a compact token system with color, type, layout, and signature. Color: describe the palette as 4–6 named hex values. Type: the typefaces for 2+ roles (a characterful display face that's used with restraint, a complementary body face, and a utility face for captions or data if needed). Layout: a layout concept, using one-sentence prose descriptions and ASCII wireframes to ideate and compare. Signature: the single unique element this page will be remembered by that embodies the brief in an appropriate way.
-
-Then review that plan against the brief before building: if any part of it reads like the generic default you would produce for any similar page (work through a similar prompt to see if you arrive somewhere similar) rather than a choice made for this specific brief — revise that part, say what you changed and why. Only after you've confirmed the relative uniqueness of your design plan should you start to write the code, following the revised plan exactly and deriving every color and type decision from it.
-
-When writing the code, be careful of structuring your CSS selector specificities. It's easy to generate CSS classes that cancel each other out (especially with a type-based selector like .section and a element-based selector like .cta). This can happen often with paddings/margins between sections.
-
-Try to do a lot of this planning and iteration in your thinking, and only show ideas to the user when you have higher confidence it'll delight them.
-
-## Restraint and self-critique
-
-Spend your boldness in one place. Let the signature element be the one memorable thing, keep everything around it quiet and disciplined, and cut any decoration that does not serve the brief. Not taking a risk can be a risk itself! Build to a quality floor without announcing it: responsive down to mobile, visible keyboard focus, reduced motion respected. Critique your own work as you build, taking screenshots if your environment supports it – a picture is worth 1000 tokens. Consider Chanel's advice: before leaving the house, take a look in the mirror and remove one accessory. Human creators have memory and always try to do something new, so if you have a space to quickly jot down notes about what you've tried, it can help you in future passes.
-
-## More on writing in design
-
-Words appear in a design for one reason: to make it easier to understand, and therefore easier to use. They are design material, not decoration. Bring the same intentionality to copy that you would bring to spacing and color. Before writing anything, ask what the design needs to say, and how it can best be said to help the person navigate the experience.
-
-Write from the end user's side of the screen. Name things by what people control and recognize, never by how the system is built. A person manages notifications, not webhook config. Describe what something does in plain terms rather than selling it. Being specific is always better than being clever.
-
-Use active voice as default. A control should say exactly what happens when it's used: "Save changes," not "Submit." An action keeps the same name through the whole flow, so the button that says "Publish" produces a toast that says "Published." The vocabulary of an interface is the signposting for someone navigating the product. Cohesion and consistency are how people learn their way around.
-
-Treat failure and emptiness as moments for direction, not mood. Explain what went wrong and how to fix it, in the interface's voice rather than a person's. Errors don't apologize, and they are never vague about what happened. An empty screen is an invitation to act.
-
-Keep the register conversational and tuned: plain verbs, sentence case, no filler, with tone matched to the brand and the audience. Let each element do exactly one job. A label labels, an example demonstrates, and nothing quietly does double duty.
+Remember: Claude is capable of extraordinary creative work. Don't hold back, show what can truly be created when thinking outside the box and committing fully to a distinctive vision.
--- a/plugins/security-guidance/.claude-plugin/plugin.json
+++ b/plugins/security-guidance/.claude-plugin/plugin.json
@@ -1,6 +1,6 @@
 {
  "name": "security-guidance",
-  "version": "2.0.6",
+  "version": "2.0.0",
  "description": "Security review for Claude-generated code. Pattern-based warnings on edits, LLM-powered diff review on Stop, and an agentic commit reviewer that catches injection, XSS, SSRF, hardcoded secrets, and 25+ other vulnerability classes.",
  "author": {
    "name": "David Dworken",
--- a/plugins/security-guidance/hooks/_base.py
+++ b/plugins/security-guidance/hooks/_base.py
@@ -116,18 +116,7 @@ _PV = _read_plugin_version_int()
 # Emitted via _usage_metrics() into the existing emit_metrics() channel so
 # hook metrics rows carry per-invocation token/cost totals
 # alongside the existing skip_reason / vulns_found fields.
-_USAGE = {
-    "in": 0, "out": 0, "cr": 0, "cw": 0, "cost": 0.0, "n": 0,
-    # HTTP error visibility (#2098 visibility gap — see emit comment in
-    # _usage_metrics). Without this, API failures from `_call_claude` left
-    # zero fingerprint in telemetry: the call returns None, the caller's
-    # emit_metrics carries no api_calls field, and the failure is
-    # indistinguishable from "no review needed". The deprecation outage
-    # that broke every commit-review LLM call was invisible until users
-    # reported it manually.
-    "http_err_last": 0,    # most recent HTTP error code this invocation
-    "http_err_count": 0,   # total HTTP errors (4xx + 5xx + network)
-}
+_USAGE = {"in": 0, "out": 0, "cr": 0, "cw": 0, "cost": 0.0, "n": 0}
 _USAGE_LOCK = threading.Lock()

 # $/Mtok (input, output). Used only for the raw-HTTP path; the SDK path
@@ -177,55 +166,19 @@ def _record_usage(usage, model, cost_usd=None):
        _USAGE["n"] += 1


-def _record_http_error(status):
-    """Record an HTTP error from an LLM API call. `status` is the HTTP
-    status code (integer 400–599) or -1 for network/timeout errors. Stored
-    in `_USAGE["http_err_last"]` (most recent) and counted in
-    `_USAGE["http_err_count"]`. Snapshot via `_usage_metrics()` so every
-    subsequent `emit_metrics` includes the failure fingerprint.
-
-    Background: without this, the most recent example was the #2098
-    deprecation 400. Every hook fire's LLM call returned HTTP 400; the
-    plugin caught it and returned None; the emit_metrics carried no
-    api_calls field; aggregate dashboards looked normal. The failure
-    only became visible when a user manually reported errors out of
-    their debug log. With this field, a category-of-failure spike (4xx,
-    5xx, or -1 network) is queryable from BQ in real time.
-    """
-    try:
-        s = int(status)
-    except (TypeError, ValueError):
-        return
-    with _USAGE_LOCK:
-        _USAGE["http_err_last"] = s
-        _USAGE["http_err_count"] += 1
-
-
 def _usage_metrics():
    """Snapshot the accumulator as metric keys. Returns {} when no API calls
-    AND no HTTP errors were made so skip-path emits don't burn key budget.
-    cost_usd rounded to 1e-6 to keep the float finite/short for the zod
-    schema.
-
-    HTTP errors (`http_err_last`, `http_err_count`) emitted ONLY when
-    `http_err_count > 0` so successful calls don't pad every metrics row
-    with two zero fields.
-    """
+    were made so skip-path emits don't burn key budget. cost_usd rounded to
+    1e-6 to keep the float finite/short for the zod schema."""
    with _USAGE_LOCK:
-        if _USAGE["n"] == 0 and _USAGE["http_err_count"] == 0:
+        if _USAGE["n"] == 0:
            return {}
-        out = {}
-        if _USAGE["n"] > 0:
-            out.update({
-                "tok_in": _USAGE["in"],
-                "tok_out": _USAGE["out"],
-                "tok_cache_r": _USAGE["cr"],
-                "tok_cache_w": _USAGE["cw"],
-                "cost_usd": round(_USAGE["cost"], 6),
-                "api_calls": _USAGE["n"],
-            })
-        if _USAGE["http_err_count"] > 0:
-            out["http_err_last"] = _USAGE["http_err_last"]
-            out["http_err_count"] = _USAGE["http_err_count"]
-        return out
+        return {
+            "tok_in": _USAGE["in"],
+            "tok_out": _USAGE["out"],
+            "tok_cache_r": _USAGE["cr"],
+            "tok_cache_w": _USAGE["cw"],
+            "cost_usd": round(_USAGE["cost"], 6),
+            "api_calls": _USAGE["n"],
+        }

--- a/plugins/security-guidance/hooks/ensure_agent_sdk.py
+++ b/plugins/security-guidance/hooks/ensure_agent_sdk.py
@@ -40,377 +40,6 @@ BUILD_FAILED = 3     # venv create or pip install raised/timed out
 SKIP_SENTINEL = 5    # another SessionStart is currently building
 HOOK_PY_INCOMPATIBLE = 6  # hook interpreter is <3.10 — SDK syntax can't load
                          # here no matter how the venv was built. See #2071.
-# --target fallback: when `python -m venv` can't bootstrap pip (ensurepip
-# missing — Debian python3-venv not installed, or a python.org/pyenv build
-# without ensurepip), fall back to `pip install --target <dir>` which needs
-# only the system pip, not venv/ensurepip. Telemetry (v2.0.4 sdk_has_pip
-# probe) confirmed ~95% of venv_ensurepip_fail users HAVE pip, so this
-# recovers the agentic reviewer for them instead of degrading to pattern +
-# single-shot review. See #2154 follow-up.
-BUILT_TARGET = 7     # venv ensurepip failed → SDK pip-installed via --target
-NOOP_TARGET = 8      # --target libs already present and importable
-SKIP_COOLDOWN = 9    # a recent build was signal-killed (memory pressure) — not
-                     # retrying this session to avoid burning the user's
-                     # memory/CPU on a build that keeps getting killed. CCR
-                     # repro confirmed the dominant Linux BUILD_FAILED is a
-                     # SIGKILL/SIGSEGV of the memory-heavy venv+pip subprocess
-                     # (rc<0, empty streams). See #2154 follow-up.
-
-# How long to skip rebuilds after a signal kill. Retries at most once per
-# window so a machine whose memory frees up still recovers (just not every
-# session). Keyed by marker mtime.
-SIGNAL_KILL_COOLDOWN_SEC = 24 * 3600
-
-
-# Phase + err-kind integer encoding for sdk_bootstrap_phase / sdk_bootstrap_err.
-#
-# Earlier versions emitted these as STRINGS (e.g. "pip", "dns_fail"). CC's
-# plugin-metrics pipeline silently drops plugin-emitted string values —
-# only `bool|finite-number` plugin metrics reach BigQuery. (CC-core
-# metrics like `subscription_type` are exempt because they're injected
-# downstream of plugin validation.) Confirmed empirically: 185K
-# BUILD_FAILED rows in BQ had `sdk_bootstrap_phase`/`sdk_bootstrap_err`
-# = NULL despite the Python code emitting them. This left ~28K
-# BUILD_FAILED sessions/day with no diagnostic split — flying blind on
-# the real failure modes (pip-no-match vs dns-fail vs ssl-verify etc.).
-#
-# Fix: encode as small integers per the maps below. Values are
-# APPEND-ONLY for telemetry stability. Reserve 99 as the "unknown /
-# uncategorized" bucket so an unmapped err_kind (e.g., a new exception
-# type) still emits a non-zero signal.
-SDK_BOOTSTRAP_PHASE_CODES = {
-    "pre":  1,  # pre-venv (state_dir.mkdir, sentinel open)
-    "venv": 2,  # python -m venv --clear
-    "pip":  3,  # pip install
-    "main": 4,  # uncaught exception above main()
-    "pip_target": 5,  # `pip install --target` fallback (venv ensurepip failed)
-}
-SDK_BOOTSTRAP_ERR_CODES = {
-    "pip_no_match":         1,
-    "dns_fail":             2,
-    "conn_refused":         3,
-    "ssl_verify":           4,
-    "perm_denied":          5,
-    "no_pip":               6,
-    "disk_full":            7,
-    "proxy_auth":           8,
-    "stderr_timeout":       9,   # pip stderr containing "timeout"/"timed out"
-    "subprocess_timeout":   10,  # subprocess.TimeoutExpired (>120s)
-    "signal_killed":        16,  # venv/pip subprocess killed by a signal
-                                 # (rc<0 or 128+sig) — OOM-killer SIGKILL /
-                                 # RLIMIT_AS SIGSEGV, empty streams. The
-                                 # actual rc rides in sdk_bootstrap_rc. This
-                                 # is the dominant Linux failure (CCR repro).
-    # Venv-stage specific categories added after PR #2112 telemetry surfaced
-    # 2,406 phase=2/err=99 sessions in the first 3h of v2.0.1 — venv phase
-    # failing in ways the original pip-flavored patterns didn't catch. These
-    # all split out of what was previously collapsing to _uncategorized.
-    "venv_ensurepip_fail":  11,  # Debian/Ubuntu missing python3-venv;
-                                 # stderr mentions ensurepip non-zero exit
-                                 # or "ensurepip is not available"
-    "venv_path_too_long":   12,  # Windows MAX_PATH (260) or POSIX
-                                 # ENAMETOOLONG — venv writes deep paths
-                                 # under state_dir/agent-sdk-venv/Lib/...
-    "venv_no_module":       13,  # `python3 -m venv` itself missing — "No
-                                 # module named 'venv'" / "No module named venv"
-    "venv_already_exists":  14,  # Errno 17 / "file exists" — sentinel race
-                                 # past O_EXCL or stale dir survived --clear
-    "venv_setup_failed":    15,  # Generic "virtual environment was not
-                                 # created successfully" — catches the long
-                                 # tail of venv setup failures that don't
-                                 # match a more specific category above
-    # 16–98 reserved for future categories; APPEND-ONLY.
-    # 99 catches everything else (including "exc:<TypeName>" and "other:<tail>"
-    # — the original string is debug-loggable but the integer is what makes
-    # it to telemetry). For the "other:" tail, `sdk_bootstrap_stderr_sig`
-    # carries a bounded integer hash so we can still distinguish patterns
-    # in BQ aggregation.
-    "_uncategorized":       99,
-}
-
-# Exception-type encoding for the "exc:<TypeName>" err_kinds (the generic
-# `except Exception` path — venv/pip raised a Python exception rather than
-# a CalledProcessError with categorizable stderr).
-#
-# #2154 telemetry surfaced that the dominant remaining venv BUILD_FAILED
-# bucket (phase=venv, err=99) is ~99% `exc:` with stderr_sig=NULL — i.e.
-# exceptions, not stderr-bearing subprocess failures — so the stderr_sig
-# hash couldn't distinguish them. This maps the exception TYPE to a stable
-# code so BQ can tell FileNotFoundError (python/venv binary missing) from
-# PermissionError (read-only home) from a bare OSError, etc.
-#
-# All the FileNotFoundError/PermissionError/etc. entries are OSError
-# subclasses, so they ALSO carry an errno (see _encode_errno) — the type
-# code gives the Python class, errno gives the OS-level cause. APPEND-ONLY.
-SDK_BOOTSTRAP_EXC_CODES = {
-    "FileNotFoundError":  1,   # interpreter/venv path component missing
-    "PermissionError":    2,   # read-only home, sandboxed FS
-    "NotADirectoryError": 3,
-    "IsADirectoryError":  4,
-    "FileExistsError":    5,   # (sentinel race is handled separately; this
-                               # is FileExistsError from elsewhere in venv)
-    "OSError":            6,   # bare OSError — errno carries the real cause
-    "BlockingIOError":    7,
-    "BrokenPipeError":    8,
-    "ConnectionError":    9,
-    "TimeoutError":       10,  # distinct from subprocess.TimeoutExpired
-    "InterruptedError":   11,
-    "MemoryError":        12,
-    "UnicodeDecodeError": 13,
-    "ValueError":         14,
-    "RuntimeError":       15,
-    # 16–98 reserved; APPEND-ONLY.
-    "_other_exc":         99,  # an exception type not in this map
-}
-
-
-def _encode_phase(s):
-    """Map err_phase string to its telemetry integer code, or 0 if unset.
-    Empty/None → 0 lets `if encoded:` cleanly skip emission. Per
-    SDK_BOOTSTRAP_PHASE_CODES, valid codes are 1-4."""
-    return SDK_BOOTSTRAP_PHASE_CODES.get((s or "").strip(), 0)
-
-
-def _encode_err_kind(s):
-    """Map err_kind string to its telemetry integer code, or 0 if unset.
-    Direct hits use the static map; "exc:<X>" and "other:<tail>" both
-    collapse to _uncategorized (99) — the raw string survives in debug
-    logs, only the integer reaches BQ."""
-    s = (s or "").strip()
-    if not s:
-        return 0
-    if s in SDK_BOOTSTRAP_ERR_CODES:
-        return SDK_BOOTSTRAP_ERR_CODES[s]
-    # "signal_killed:<rc>" carries the returncode in sdk_bootstrap_rc; the
-    # category maps to the signal_killed code.
-    if s.startswith("signal_killed"):
-        return SDK_BOOTSTRAP_ERR_CODES["signal_killed"]
-    # Prefix matches for the catch-all categories
-    if s.startswith("exc:") or s.startswith("other:") or s == "other":
-        return SDK_BOOTSTRAP_ERR_CODES["_uncategorized"]
-    # Unknown string — still emit as uncategorized rather than dropping
-    return SDK_BOOTSTRAP_ERR_CODES["_uncategorized"]
-
-
-def _encode_rc(err_kind):
-    """Extract the subprocess returncode embedded in a 'signal_killed:<rc>'
-    err_kind (e.g. -11 SIGSEGV / -9 SIGKILL / 139 shell-wrapped). Emitted as
-    sdk_bootstrap_rc so BQ can tell OOM-killer (-9) from RLIMIT_AS (-11).
-    Returns 0 when absent/non-numeric."""
-    if not err_kind or not err_kind.startswith("signal_killed:"):
-        return 0
-    try:
-        return int(err_kind.split(":", 1)[1])
-    except (ValueError, IndexError):
-        return 0
-
-
-def _is_signal_kill(returncode) -> bool:
-    """A subprocess killed by a signal rather than a clean non-zero exit.
-    subprocess.run (no shell, as used here) reports negative rc = -signum
-    (SIGKILL→-9 OOM-killer, SIGSEGV→-11 RLIMIT_AS, SIGABRT→-6). The 128+sig
-    forms (134/137/139) are defensive for any shell-wrapped path. Paired with
-    empty stdout+stderr this is the memory-kill signature (CCR repro)."""
-    if returncode is None:
-        return False
-    return returncode < 0 or returncode in (134, 137, 139)
-
-
-def _cooldown_remaining(state_dir) -> float:
-    """Seconds left in the signal-kill cooldown (0 if none/expired). Reads the
-    marker's mtime; a missing/unreadable marker means not in cooldown."""
-    marker = Path(state_dir) / "agent-sdk-venv.cooldown"
-    try:
-        age = time.time() - marker.stat().st_mtime
-    except OSError:
-        return 0.0
-    return max(0.0, SIGNAL_KILL_COOLDOWN_SEC - age)
-
-
-def _write_cooldown(state_dir) -> None:
-    """Start/refresh the signal-kill cooldown so we stop re-attempting a build
-    that keeps getting killed every session. Best-effort."""
-    try:
-        Path(state_dir).mkdir(parents=True, exist_ok=True)
-        (Path(state_dir) / "agent-sdk-venv.cooldown").write_text(
-            time.strftime("%Y-%m-%dT%H:%M:%SZ", time.gmtime()))
-    except OSError:
-        pass
-
-
-def _encode_stderr_sig(err_kind):
-    """Bounded integer hash of the stderr tail captured in "other:<tail>"
-    err_kinds. Lets us distinguish patterns INSIDE the _uncategorized
-    (code 99) bucket without unbounded cardinality.
-
-    Returns 0 for non-"other:" err_kinds (so the field auto-omits from
-    emit_metrics on categorized failures — see the emit block in main()).
-
-    Strategy: take the tail's first ~30 chars (post-lowercase, post-trim),
-    SHA-1, fold the first 2 bytes to 0–999. Different stderr messages
-    cluster into different buckets; same stderr always maps to the same
-    bucket. Cardinality is bounded at 1000, well below any "high
-    cardinality" alarm — and a real failure mode typically produces
-    near-identical stderr across thousands of machines, so 1000 buckets
-    is comfortably wide.
-
-    Why first ~30 chars: stderr like "ERROR: Command failed: <full
-    path>" varies the tail wildly (paths) but the categorization signal
-    is in the leading words. Dropping the suffix focuses the hash on
-    the discriminative part.
-    """
-    if not err_kind or not err_kind.startswith("other:"):
-        return 0
-    import hashlib
-    tail = err_kind[len("other:"):].strip().lower()[:30]
-    if not tail:
-        return 0
-    h = hashlib.sha1(tail.encode("utf-8", errors="replace")).digest()
-    return int.from_bytes(h[:2], "big") % 1000
-
-
-def _encode_exc_kind(err_kind):
-    """Map an "exc:<TypeName>[:errno]" err_kind to its exception-type code
-    (SDK_BOOTSTRAP_EXC_CODES). Returns 0 for non-exc err_kinds (so the
-    sdk_bootstrap_exc field auto-omits on stderr/categorized failures).
-    Unmapped exception types → 99 (_other_exc)."""
-    if not err_kind or not err_kind.startswith("exc:"):
-        return 0
-    # "exc:OSError:28" → "OSError"; "exc:RuntimeError" → "RuntimeError"
-    name = err_kind[len("exc:"):].split(":", 1)[0].strip()
-    if not name:
-        return 0
-    return SDK_BOOTSTRAP_EXC_CODES.get(name, SDK_BOOTSTRAP_EXC_CODES["_other_exc"])
-
-
-def _encode_errno(err_kind):
-    """Extract the OS errno from an "exc:<TypeName>:<errno>" err_kind.
-    OSError-family exceptions embed their errno (ENOENT=2, EACCES=13,
-    ENOSPC=28, …) — the OS-level cause is far more actionable than the
-    Python class alone. Returns 0 when absent/non-numeric (field omitted)."""
-    if not err_kind or not err_kind.startswith("exc:"):
-        return 0
-    parts = err_kind.split(":")
-    if len(parts) < 3:
-        return 0
-    try:
-        return int(parts[2])
-    except (ValueError, IndexError):
-        return 0
-
-
-def _probe_has_pip() -> bool:
-    """True iff the current interpreter can run pip (`-m pip --version`).
-
-    Probed only on the venv_ensurepip_fail path (see __main__), NOT on the
-    happy path — it's an extra subprocess we only want when diagnosing a
-    failure. The result decides whether a `pip install --target` fallback
-    (Option A) is even viable for this machine: ensurepip/venv missing but
-    pip present → --target would work; pip also missing → it wouldn't, and
-    the user needs a system package (python3-venv / a complete Python)."""
-    try:
-        r = subprocess.run(
-            [sys.executable, "-m", "pip", "--version"],
-            capture_output=True, timeout=10,
-        )
-        return r.returncode == 0
-    except Exception:
-        return False
-
-
-def _pip_err_from_stderr(stderr_b):
-    """Categorize a pip-install stderr into a known err_kind (the pip subset
-    of SDK_BOOTSTRAP_ERR_CODES). Used by the --target fallback; mirrors the
-    pip branches of main()'s inline categorizer. Kept as a sibling rather
-    than extracting main()'s chain (which also has venv-phase branches) to
-    avoid disturbing the working venv categorization."""
-    if isinstance(stderr_b, bytes):
-        s = stderr_b.decode("utf-8", errors="replace")
-    else:
-        s = str(stderr_b or "")
-    low = s.lower()
-    if "no matching distribution" in low or "could not find a version" in low:
-        return "pip_no_match"
-    if ("name or service not known" in low or "name resolution" in low
-            or "nodename nor servname" in low or "temporary failure in name" in low):
-        return "dns_fail"
-    if "connection refused" in low or "connection reset" in low:
-        return "conn_refused"
-    if "ssl" in low and ("verify" in low or "certificate" in low):
-        return "ssl_verify"
-    if "permission denied" in low or "read-only file system" in low:
-        return "perm_denied"
-    if "no module named pip" in low or "no module named ensurepip" in low:
-        return "no_pip"
-    if "no space left" in low or "disk quota" in low:
-        return "disk_full"
-    if "proxy" in low and ("authent" in low or "tunnel" in low or "407" in low):
-        return "proxy_auth"
-    if "timeout" in low or "timed out" in low:
-        return "stderr_timeout"
-    tail = next((ln.strip() for ln in reversed(s.splitlines()) if ln.strip()), "")[:60]
-    return f"other:{tail}" if tail else "other"
-
-
-def _target_dir(state_dir) -> Path:
-    return Path(state_dir) / "agent-sdk-libs"
-
-
-def _target_sdk_importable(state_dir) -> bool:
-    """True iff the --target libs dir has an importable claude_agent_sdk,
-    probed with THIS interpreter (the one llm.py will import it from) and the
-    target dir prepended to sys.path. Cheap dir-check first to avoid a
-    subprocess on the common no-target path."""
-    target = _target_dir(state_dir)
-    if not (target / "claude_agent_sdk").is_dir():
-        return False
-    try:
-        r = subprocess.run(
-            [sys.executable, "-c",
-             "import sys; sys.path.insert(0, sys.argv[1]); import claude_agent_sdk",
-             str(target)],
-            capture_output=True, timeout=10,
-        )
-        return r.returncode == 0
-    except Exception:
-        return False
-
-
-def _build_via_target(state_dir) -> tuple[int, str, str]:
-    """Fallback install when `python -m venv` can't bootstrap pip (ensurepip
-    missing — Debian python3-venv absent, or a python.org/pyenv build without
-    ensurepip). `pip install --target <dir>` needs only the system pip, not
-    venv/ensurepip. v2.0.4 telemetry (sdk_has_pip) confirmed ~95% of
-    venv_ensurepip_fail users have pip. The consumer (llm.py) adds this flat
-    dir to sys.path. Returns (outcome, err_phase, err_kind).
-
-    --upgrade so a stale/partial target dir from a prior failed attempt
-    doesn't make pip refuse; --prefer-binary mirrors the venv path's wheel
-    preference (ARM64 Windows cryptography)."""
-    target = _target_dir(state_dir)
-    try:
-        subprocess.run(
-            [sys.executable, "-m", "pip", "install",
-             "--target", str(target), "--upgrade",
-             "--disable-pip-version-check", "--prefer-binary", "--no-cache-dir",
-             "claude-agent-sdk"],
-            capture_output=True, timeout=120, check=True,
-        )
-        return BUILT_TARGET, "", ""
-    except subprocess.CalledProcessError as e:
-        # A --target pip install is also memory-heavy, so it too can be
-        # signal-killed under memory pressure — cool down, same as the venv path.
-        if _is_signal_kill(e.returncode):
-            _write_cooldown(state_dir)
-            return BUILD_FAILED, "pip_target", f"signal_killed:{e.returncode}"
-        return BUILD_FAILED, "pip_target", _pip_err_from_stderr(e.stderr)
-    except subprocess.TimeoutExpired:
-        return BUILD_FAILED, "pip_target", "subprocess_timeout"
-    except Exception as e:
-        errno = getattr(e, "errno", None)
-        if isinstance(errno, int):
-            return BUILD_FAILED, "pip_target", f"exc:{type(e).__name__}:{errno}"
-        return BUILD_FAILED, "pip_target", f"exc:{type(e).__name__}"


 def _sdk_on_syspath() -> bool:
@@ -501,20 +130,6 @@ def main() -> tuple[int, str, str]:
        except Exception:
            pass  # broken venv; rebuild below

-    # If a prior run installed the SDK via the --target fallback (ensurepip
-    # path), reuse it. Only reached when there's no working venv, so healthy
-    # NOOP_VENV users never pay for this probe.
-    if _target_sdk_importable(state_dir):
-        return NOOP_TARGET, "", ""
-
-    # If a recent build was signal-killed (memory pressure), don't re-attempt
-    # this session — the memory-heavy venv+pip just gets killed again, burning
-    # the user's resources. Retry at most once per cooldown window. Reached
-    # only after all no-op probes, so a machine that later gets the SDK via
-    # system/venv/target still short-circuits above.
-    if _cooldown_remaining(state_dir) > 0:
-        return SKIP_COOLDOWN, "", ""
-
    err_phase = ""
    err_kind = ""
    we_own_sentinel = False
@@ -547,25 +162,14 @@ def main() -> tuple[int, str, str]:
        # --prefer-binary tells pip to pick it. Cross-platform safe: no-op
        # on platforms where the latest version already has a wheel.
        err_phase = "pip"
-        # --no-cache-dir trims pip's peak memory (no cache read/write/unpack
-        # buffering) — helps marginal low-memory machines get under the OOM
-        # threshold that kills the dominant Linux builds (CCR repro).
        subprocess.run(
            [str(venv_py), "-m", "pip", "install", "--quiet",
-             "--disable-pip-version-check", "--prefer-binary", "--no-cache-dir",
+             "--disable-pip-version-check", "--prefer-binary",
             "claude-agent-sdk"],
            capture_output=True, timeout=120, check=True,
        )
        return BUILT, "", ""
    except subprocess.CalledProcessError as e:
-        # Signal kill (OOM-killer SIGKILL / RLIMIT_AS SIGSEGV) — rc<0, empty
-        # streams. The dominant Linux failure. Record the rc, start a cooldown
-        # so we stop retry-storming a build that keeps getting killed, and
-        # skip the stderr categorization (there's nothing in stderr). err_phase
-        # says whether it died creating the venv or installing via pip.
-        if _is_signal_kill(e.returncode):
-            _write_cooldown(state_dir)
-            return BUILD_FAILED, err_phase, f"signal_killed:{e.returncode}"
        # Capture a stderr fingerprint so telemetry can split BUILD_FAILED by
        # root cause (no-network, package-not-found, dns-fail, etc.).
        # Categorize first, then keep a short raw tail for the long tail of
@@ -576,34 +180,7 @@ def main() -> tuple[int, str, str]:
        else:
            stderr_str = str(stderr_b)
        s = stderr_str.lower()
-        # Venv-specific patterns checked FIRST — they overlap with some pip
-        # patterns (e.g. "no module named ensurepip" could match no_pip OR
-        # venv_ensurepip_fail; the venv-stage interpretation is the right
-        # one when err_phase=="venv"). Order is venv-most-specific →
-        # pip-historical → generic.
-        if err_phase == "venv" and (
-            "ensurepip is not available" in s
-            or ("ensurepip" in s and "returned non-zero" in s)
-            or "the virtual environment was not created" in s and "ensurepip" in s
-        ):
-            err_kind = "venv_ensurepip_fail"
-        elif err_phase == "venv" and (
-            "[errno 36]" in s
-            or "file name too long" in s
-            or "path too long" in s
-        ):
-            err_kind = "venv_path_too_long"
-        elif err_phase == "venv" and (
-            "no module named venv" in s
-            or "no module named 'venv'" in s
-        ):
-            err_kind = "venv_no_module"
-        elif err_phase == "venv" and (
-            "[errno 17]" in s
-            or ("file exists" in s and "venv" in s)
-        ):
-            err_kind = "venv_already_exists"
-        elif "no matching distribution" in s or "could not find a version" in s:
+        if "no matching distribution" in s or "could not find a version" in s:
            err_kind = "pip_no_match"
        elif "name or service not known" in s or "name resolution" in s \
                or "nodename nor servname" in s or "temporary failure in name" in s:
@@ -622,15 +199,6 @@ def main() -> tuple[int, str, str]:
            err_kind = "proxy_auth"
        elif "timeout" in s or "timed out" in s:
            err_kind = "stderr_timeout"
-        elif err_phase == "venv" and (
-            "virtual environment was not created" in s
-            or "error: command" in s and "venv" in s
-        ):
-            # Generic venv-setup catch-all — matched AFTER the more specific
-            # venv patterns above so we don't shadow them, but BEFORE the
-            # other: fallback so generic venv setup failures get their own
-            # bucket instead of polluting the long-tail signature space.
-            err_kind = "venv_setup_failed"
        else:
            # First 60 chars of the last non-empty stderr line — bounded to
            # stay inside CC's metric value-length budget. Real failure modes
@@ -640,27 +208,10 @@ def main() -> tuple[int, str, str]:
                "",
            )[:60]
            err_kind = f"other:{tail}" if tail else "other"
-        # venv couldn't bootstrap pip (ensurepip missing) but pip itself may
-        # work — fall back to a flat `pip install --target`. Only this one
-        # category falls through; every other venv/pip failure is terminal.
-        # The finally block unlinks our sentinel first (so the target build
-        # isn't blocked by it); _build_via_target does the target install.
-        if err_kind == "venv_ensurepip_fail":
-            if we_own_sentinel:
-                sentinel.unlink(missing_ok=True)
-                we_own_sentinel = False
-            return _build_via_target(state_dir)
        return BUILD_FAILED, err_phase, err_kind
    except subprocess.TimeoutExpired:
        return BUILD_FAILED, err_phase, "subprocess_timeout"
    except Exception as e:
-        # Embed errno for OSError-family exceptions ("exc:OSError:28") so
-        # telemetry can decode the OS-level cause (ENOENT/EACCES/ENOSPC/…),
-        # not just the Python class. #2154 follow-up: this is the dominant
-        # remaining venv BUILD_FAILED bucket. See _encode_exc_kind/_encode_errno.
-        errno = getattr(e, "errno", None)
-        if isinstance(errno, int):
-            return BUILD_FAILED, err_phase, f"exc:{type(e).__name__}:{errno}"
        return BUILD_FAILED, err_phase, f"exc:{type(e).__name__}"
    finally:
        # Only remove the sentinel if THIS process created it. The
@@ -737,63 +288,21 @@ if __name__ == "__main__":
    # and takes the FIRST non-{"async":...} JSON line as the hook response;
    # its `metrics` key is forwarded to the hook metrics event on the
    # next attachments pass. Must be a single line — the registry splits on
-    # \n and json-parses each independently.
-    #
-    # IMPORTANT — values must be bool|finite-number. The validation comment
-    # has historically said "or short strings" but that was wrong: CC's
-    # plugin-metrics pipeline silently drops plugin-emitted string values.
-    # Stay inside the 10-key emit cap.
+    # \n and json-parses each independently. Values must be bool|number OR
+    # short strings (CC accepts string metric values if they're not
+    # null). Stay inside the 10-key emit cap.
    metrics: dict[str, object] = {
        "sdk_bootstrap": outcome,
        "sdk_bootstrap_ms": round((time.perf_counter() - t0) * 1000),
    }
    if err_kind:
-        # Encode phase + err_kind as integer codes (see
-        # SDK_BOOTSTRAP_PHASE_CODES / SDK_BOOTSTRAP_ERR_CODES). Earlier
-        # versions emitted these as strings and CC dropped them — restoring
-        # the diagnostic split that 28K BUILD_FAILED/day need to triage by
-        # root cause. err_phase defaults to "pre" when empty (pre-venv
-        # failure path, e.g. state_dir.mkdir perm-denied).
-        metrics["sdk_bootstrap_phase"] = _encode_phase(err_phase or "pre")
-        metrics["sdk_bootstrap_err"] = _encode_err_kind(err_kind)
-        # For "other:<tail>" (encoded err==99), emit a bounded integer
-        # hash of the stderr tail so BQ can distinguish patterns inside
-        # the _uncategorized bucket without unbounded cardinality. Zero
-        # when err_kind is categorized — the schema reader treats 0 as
-        # "no signal", matching the absence convention.
-        sig = _encode_stderr_sig(err_kind)
-        if sig:
-            metrics["sdk_bootstrap_stderr_sig"] = sig
-        # Exception-type + errno for the "exc:" bucket (the dominant
-        # remaining venv BUILD_FAILED mode per #2154 telemetry). Both
-        # auto-omit (0) on stderr/categorized failures.
-        exc = _encode_exc_kind(err_kind)
-        if exc:
-            metrics["sdk_bootstrap_exc"] = exc
-        exc_errno = _encode_errno(err_kind)
-        if exc_errno:
-            metrics["sdk_bootstrap_errno"] = exc_errno
-        # Subprocess returncode for signal kills (-9 OOM-killer / -11
-        # RLIMIT_AS / -6 abort). Confirms in prod which signal dominates the
-        # Linux memory-kill bucket. 0 (omitted) for non-signal failures.
-        rc = _encode_rc(err_kind)
-        if rc:
-            metrics["sdk_bootstrap_rc"] = rc
-        # venv_ensurepip_fail (code 11) is the top categorizable venv
-        # failure, and telemetry shows it's NOT just Debian — macOS has the
-        # most distinct affected users. Probe whether this interpreter has
-        # pip so we know if a `pip install --target` fallback (Option A)
-        # would actually help, vs the user needing a system package. Probed
-        # only here (not on the happy path) to avoid an extra subprocess
-        # per healthy session.
-        if _encode_err_kind(err_kind) == 11:
-            metrics["sdk_has_pip"] = _probe_has_pip()
-    # Interpreter version (major*100 + minor, e.g. 309 / 312), emitted on
-    # every bootstrap. Disambiguates the macOS cohort (Apple 3.9 vs a 3.10+
-    # with broken ensurepip) for both venv_ensurepip_fail AND
-    # HOOK_PY_INCOMPATIBLE (whose "py_3.9" err_kind otherwise collapses to
-    # err=99, losing the version). Cheap — no subprocess, just sys.version_info.
-    metrics["sdk_hook_py"] = sys.version_info[0] * 100 + sys.version_info[1]
+        # Truncate defensively; categorized values are <40 chars but the
+        # `other:<tail>` mode could be longer. err_phase may be empty for
+        # pre-venv failures (state_dir.mkdir perm-denied, sentinel O_EXCL
+        # raising a non-FileExistsError OSError) — emit as "pre" so the
+        # err_kind isn't silently dropped.
+        metrics["sdk_bootstrap_phase"] = (err_phase or "pre")[:16]
+        metrics["sdk_bootstrap_err"] = err_kind[:96]
    pv = _plugin_version_int()
    if pv:
        metrics["pv"] = pv
--- a/plugins/security-guidance/hooks/llm.py
+++ b/plugins/security-guidance/hooks/llm.py
@@ -27,7 +27,7 @@ from typing import Optional, Tuple, Dict, Any, List

 import extensibility
 import review_api
-from _base import debug_log, _record_usage, _record_http_error, _PV, PROVENANCE_TAG, state_dir as _resolve_state_dir  # noqa: F401
+from _base import debug_log, _record_usage, _PV, PROVENANCE_TAG, state_dir as _resolve_state_dir  # noqa: F401
 from session_state import with_locked_state


@@ -55,12 +55,6 @@ def _inject_agent_sdk_venv_into_syspath(state_dir):
    candidates = (
        glob.glob(os.path.join(venv_root, "lib", "python*", "site-packages"))
        + glob.glob(os.path.join(venv_root, "Lib", "site-packages"))
-        # `pip install --target` fallback (ensure_agent_sdk BUILT_TARGET, used
-        # when venv can't bootstrap pip): a FLAT layout — packages sit directly
-        # in agent-sdk-libs/, not under a site-packages subdir. See #2154
-        # follow-up. The pywin32 .pth bootstrap below applies here too (target
-        # installs don't process .pth at runtime, same as a manual venv insert).
-        + [os.path.join(state_dir, "agent-sdk-libs")]
    )
    added = False
    for sp in candidates:
@@ -374,7 +368,6 @@ def _call_claude_via_sdk(prompt, output_schema, *, max_tokens=16000, model=None)
        except Exception as e:
            debug_log(f"3P sdk-single-turn: SDK unavailable ({e})")
            _last_call_claude_http_error = -1
-            _record_http_error(-1)
            return None

    cli_path = os.environ.get("SG_AGENTIC_CLI_PATH") or None
@@ -432,7 +425,6 @@ def _call_claude_via_sdk(prompt, output_schema, *, max_tokens=16000, model=None)
    except _asyncio.TimeoutError:
        debug_log("3P sdk-single-turn: timeout after 60s")
        _last_call_claude_http_error = -1
-        _record_http_error(-1)
        return None
    except Exception as e:
        debug_log(f"3P sdk-single-turn: query failed ({e})")
@@ -441,7 +433,6 @@ def _call_claude_via_sdk(prompt, output_schema, *, max_tokens=16000, model=None)
            for _l in _captured_stderr[:20]:
                debug_log(f"  | {_l.rstrip()}")
        _last_call_claude_http_error = -1
-        _record_http_error(-1)
        return None


@@ -551,7 +542,6 @@ def _call_claude(prompt, output_schema, thinking_budget=10000, max_tokens=16000,
                error_body = e.read().decode("utf-8") if e.fp else ""
                debug_log(f"API error: {e.code} - {error_body[:200]}")
                _last_call_claude_http_error = e.code
-                _record_http_error(e.code)
                return None
        except (urllib.error.URLError, TimeoutError) as e:
            if attempt < 2:
@@ -561,7 +551,6 @@ def _call_claude(prompt, output_schema, thinking_budget=10000, max_tokens=16000,
            else:
                debug_log(f"Request failed after retries: {e}")
                _last_call_claude_http_error = -1
-                _record_http_error(-1)
                return None

    if not response_data:
@@ -570,7 +559,6 @@ def _call_claude(prompt, output_schema, thinking_budget=10000, max_tokens=16000,
        # call uses the token; record the 401 so callers don't see error=None.
        if _last_call_claude_http_error is None:
            _last_call_claude_http_error = 401
-            _record_http_error(401)
        return None

    # Find the text block (skip thinking blocks)
--- a/plugins/security-guidance/hooks/security_reminder_hook.py
+++ b/plugins/security-guidance/hooks/security_reminder_hook.py
@@ -221,34 +221,15 @@ def emit_metrics(
    task-notification one-liner. Must be in the same JSON line as the metrics
    because CC stops scanning stdout after the first {-prefixed line.

-    `additional_context` (asyncRewake findings): model-visible guidance text.
-    Delivery channel depends on `hook_event_name` because CC's hook-output
-    contract is NOT symmetric across events:
-
-      - PostToolUse (commit-review, push-sweep): surfaced via the modern
-        hookSpecificOutput.additionalContext protocol. `PostToolUse` is a
-        member of CC's hookSpecificOutput discriminated union
-        (coreSchemas.ts), so the JSON validates and metrics/rewakeSummary
-        are consumed. See #1375 / #1783 for why this replaced the legacy
-        stderr + exit(2) shape for PostToolUse.
-
-      - Stop / SubagentStop: there is NO `Stop` member in that union, so
-        emitting hookSpecificOutput{hookEventName:"Stop"} makes the whole
-        line fail isSyncHookJSONOutput validation — which on the asyncRewake
-        path silently drops metrics AND rewakeSummary, and (because the
-        legacy stderr write was removed) leaks the raw JSON to the model as
-        the rewake body. CC's asyncRewake delivery actually reads
-        `stderr || stdout` for the model-visible body and only scans stdout
-        JSON for metrics+rewakeSummary — it never reads additionalContext
-        on this path. So for Stop we use the documented clean pattern:
-        guidance on stderr, valid JSON (metrics + rewakeSummary +
-        top-level decision/reason) on stdout. The top-level decision:"block"
-        + reason also covers the sync-fallback path (single-shot `claude -p`,
-        where asyncRewake degrades to a sync Stop hook that reads
-        decision/reason). See #2159.
-
-    Empty/None additional_context emits neither channel (back-compat for
-    metrics-only callers).
+    `additional_context` (asyncRewake findings): model-visible guidance text
+    that CC surfaces via the modern hook-output protocol
+    (hookSpecificOutput.additionalContext) instead of the legacy stderr +
+    exit(2) pair. The caller passes the finding-explanation text it would
+    have written to stderr; the JSON channel carries it cleanly so CC's UI
+    shows the reason properly instead of "Permission denied with no reason".
+    See anthropics/claude-plugins-official#1375 and #1783. Empty/None
+    means no hookSpecificOutput field is emitted (preserves backward compat
+    for legacy emit-sites that only want metrics).

    `system_message` (optional, asyncRewake only): user-visible TUI message,
    distinct from rewakeSummary which is the task-notification one-liner.
@@ -256,9 +237,10 @@ def emit_metrics(
    surface; systemMessage adds a per-fire override when the static
    rewakeMessage isn't specific enough for the finding being shown.

-    `hook_event_name` (used only when additional_context is set): selects the
-    delivery channel above. Defaults to "PostToolUse" (commit-review and
-    push-sweep are the most common callers); handle_stop_hook passes "Stop".
+    `hook_event_name` (used only when additional_context is set): which event
+    the hookSpecificOutput attaches to. Defaults to "PostToolUse" since the
+    commit-review and push-sweep handlers are the most common callers;
+    handle_stop_hook explicitly passes "Stop".
    """
    head = {}
    if _PV and "pv" not in metrics:
@@ -270,23 +252,14 @@ def emit_metrics(
    if rewake_summary:
        out["rewakeSummary"] = rewake_summary
    if additional_context:
-        if hook_event_name in ("Stop", "SubagentStop"):
-            # Stop is NOT in CC's hookSpecificOutput union — emitting it there
-            # fails schema validation and drops metrics+rewakeSummary (#2159).
-            # Clean pattern: guidance on stderr (the asyncRewake body channel,
-            # delivered via `stderr || stdout`), top-level decision/reason for
-            # the sync-fallback path. stdout JSON stays valid so metrics +
-            # rewakeSummary survive.
-            sys.stderr.write(additional_context)
-            sys.stderr.flush()
-            out["decision"] = "block"
-            out["reason"] = additional_context
-        else:
-            # PostToolUse et al. — valid union member; modern protocol.
-            out["hookSpecificOutput"] = {
-                "hookEventName": hook_event_name,
-                "additionalContext": additional_context,
-            }
+        # Wrap in hookSpecificOutput per CC's modern hook-output contract.
+        # Drops the legacy `sys.stderr.write(...) + sys.exit(2)` shape that
+        # left CC's UI showing "denied with no reason" (#1783) and triggered
+        # "json output validation failed" on older CC versions (#1375).
+        out["hookSpecificOutput"] = {
+            "hookEventName": hook_event_name,
+            "additionalContext": additional_context,
+        }
    if system_message:
        out["systemMessage"] = system_message
    print(json.dumps(out), flush=True)