Bump 31 plugin SHA pin(s) to upstream HEAD

2026-06-11 18:53:33 +00:00 · 2026-05-31 08:26:20 +00:00
8 changed files with 164 additions and 702 deletions
--- a/.claude-plugin/marketplace.json
+++ b/.claude-plugin/marketplace.json
--- a/.github/workflows/bump-plugin-shas.yml
+++ b/.github/workflows/bump-plugin-shas.yml
@@ -2,24 +2,25 @@ name: Bump Plugin SHAs

 # Nightly sweep: for each external entry whose upstream HEAD has moved past
 # its pinned SHA, validate at the new SHA with `claude plugin validate`
-# inline, then open one PR per bumped plugin on branch `bump/<slug>`.
-# Failing entries stay isolated in their own PR; passing bumps merge
-# independently.
+# inline, then open one PR with all passing bumps. Each run force-resets the
+# bump/plugin-shas branch, so a previous night's unmerged PR is replaced (and
+# its review state discarded) — review and merge same-day to avoid churn.
 #
 # Bot-free — uses the default GITHUB_TOKEN. PRs opened with GITHUB_TOKEN don't
-# trigger on:pull_request workflows, so the required status checks on main
-# (`scan` from Scan Plugins, `check` from Check MCP URLs, `validate` from
-# Validate Plugins) would never run and the bump PR could never merge.
-# workflow_dispatch is exempt from that recursion guard, so we dispatch all
-# three ourselves against each per-entry bump branch after its PR is opened.
-# Each check run lands on the branch HEAD — the same SHA as the PR head — and
-# satisfies the corresponding required check. (Each of those workflows runs
-# its job unconditionally on workflow_dispatch, so a dispatch always reports.)
+# trigger on:pull_request workflows, so the policy scan (`Scan Plugins`, a
+# required status check on main) would never run and the bump PR could never
+# merge. workflow_dispatch is exempt from that recursion guard, so we dispatch
+# the scan ourselves on the bump branch after the PR is opened. The check run
+# lands on the branch HEAD — the same SHA as the PR head — and satisfies the
+# required check.
 #
-# max-bumps caps the per-night work for cost control. Per-entry scans are
-# more expensive than a single batched scan, so the cap is conservative.
-# The composite action skips entries that already have an open bump PR, so
-# re-dispatches don't pile up duplicate work.
+# max-bumps is set above the external-entry count so a single run can clear
+# any backlog. The cost-control mechanisms are downstream:
+#   - scan-plugins.yml caches verdicts by (plugin, sha) so an unchanged SHA
+#     is never re-scanned across nightly force-resets.
+#   - revert-failed-bumps.yml drops policy-failing entries from the bump PR
+#     so one bad upstream can't block the rest.
+# See those files for details.

 on:
  schedule:
@@ -29,12 +30,12 @@ on:
      max_bumps:
        description: Cap on plugins bumped this run
        required: false
-        default: '30'
+        default: '130'

 permissions:
  contents: write
  pull-requests: write
-  actions: write  # gh workflow run {scan-plugins,check-mcp-urls,validate-plugins}.yml per bump branch
+  actions: write  # gh workflow run scan-plugins.yml on the bump branch

 concurrency:
  group: bump-plugin-shas
@@ -42,8 +43,8 @@ concurrency:
 jobs:
  bump:
    runs-on: ubuntu-latest
-    # Per-bump cost is ~2s (ls-remote + shallow clone + validate); 30 entries
-    # is ~1-2 min. The 60 min ceiling absorbs slow upstreams without letting a
+    # Per-bump cost is ~2s (ls-remote + shallow clone + validate); 130 entries
+    # is ~5 min. The 60 min ceiling absorbs slow upstreams without letting a
    # pathological run consume the default 360 min budget.
    timeout-minutes: 60
    steps:
@@ -51,44 +52,18 @@ jobs:

      # createCommitOnBranch-based bump so commits are signed by GitHub and
      # satisfy the org-level required_signatures ruleset on main.
-      - uses: anthropics/claude-plugins-community/.github/actions/bump-plugin-shas@e2019b2a01f11aa1484c53540b1cfab5eebbc299
+      - uses: anthropics/claude-plugins-community/.github/actions/bump-plugin-shas@c41c6911de0afffd2bc5cd8b21fb1e06444ee13b
        id: bump
        with:
          marketplace-path: .claude-plugin/marketplace.json
-          max-bumps: ${{ inputs.max_bumps || '30' }}
-          pr-mode: per-entry
+          max-bumps: ${{ inputs.max_bumps || '130' }}
          claude-cli-version: latest

-      # Per-entry fan-out: dispatch the three required checks against each bump
-      # branch. `pr-urls` is a JSON array of {name, old_sha, new_sha, branch,
-      # pr_url} entries emitted by the composite action when pr-mode is
-      # per-entry. All three (scan / check / validate) are required on main and
-      # none fire on the GITHUB_TOKEN-opened PR, so each must be dispatched.
-      # A single failed dispatch (transient API error / rate limit) must not
-      # strand the remaining branches, so we attempt every dispatch, then fail
-      # the step if any failed: a missing required check would otherwise leave
-      # its bump PR silently blocked behind a green run, and the composite
-      # action skips slugs with an open PR so it would never be retried.
-      - name: Dispatch required checks per per-entry PR
-        if: steps.bump.outputs.pr-urls != '' && steps.bump.outputs.pr-urls != '[]'
+      # `bump/plugin-shas` is the action's default `pr-branch`. The scan diffs
+      # the branch against origin/main (the action's base-ref fallback when
+      # there's no pull_request event) and scans only the bumped entries.
+      - name: Dispatch policy scan on bump branch
+        if: steps.bump.outputs.pr-url != ''
        env:
          GH_TOKEN: ${{ github.token }}
-          PR_URLS: ${{ steps.bump.outputs.pr-urls }}
-        run: |
-          set -euo pipefail
-          dispatch_failures="$(mktemp)"
-          jq -c '.[]' <<<"$PR_URLS" | while read -r entry; do
-            branch=$(jq -r '.branch' <<<"$entry")
-            name=$(jq -r '.name' <<<"$entry")
-            for wf in scan-plugins check-mcp-urls validate-plugins; do
-              echo "Dispatching ${wf}.yml against $branch ($name)"
-              if ! gh workflow run "${wf}.yml" --ref "$branch"; then
-                echo "::error::Failed to dispatch ${wf}.yml against $branch ($name) — required check will be missing; re-dispatch with: gh workflow run ${wf}.yml --ref $branch"
-                echo "${wf} ${branch}" >> "$dispatch_failures"
-              fi
-            done
-          done
-          if [ -s "$dispatch_failures" ]; then
-            echo "::error::$(wc -l < "$dispatch_failures" | tr -d ' ') required-check dispatch(es) failed; the affected bump PR(s) are blocked until re-dispatched (see annotations above)."
-            exit 1
-          fi
+        run: gh workflow run scan-plugins.yml --ref bump/plugin-shas
--- a/.github/workflows/validate-plugins.yml
+++ b/.github/workflows/validate-plugins.yml
@@ -12,14 +12,6 @@ on:
    branches: [main]
    paths:
      - '.claude-plugin/**'
-  # `validate` is a required status check on main. Bump PRs are opened with
-  # GITHUB_TOKEN, which doesn't fire on:pull_request (recursion guard), so the
-  # path-filtered trigger above never reports on them and the PR would be
-  # blocked forever. The bump workflow dispatches this against each per-entry
-  # bump branch instead; the check run lands on the branch HEAD (= PR head)
-  # and satisfies the required check. The validate job runs unconditionally,
-  # so a dispatch always reports.
-  workflow_dispatch:

 permissions:
  contents: read
--- a/plugins/security-guidance/.claude-plugin/plugin.json
+++ b/plugins/security-guidance/.claude-plugin/plugin.json
@@ -1,6 +1,6 @@
 {
  "name": "security-guidance",
-  "version": "2.0.3",
+  "version": "2.0.0",
  "description": "Security review for Claude-generated code. Pattern-based warnings on edits, LLM-powered diff review on Stop, and an agentic commit reviewer that catches injection, XSS, SSRF, hardcoded secrets, and 25+ other vulnerability classes.",
  "author": {
    "name": "David Dworken",
--- a/plugins/security-guidance/hooks/_base.py
+++ b/plugins/security-guidance/hooks/_base.py
@@ -116,18 +116,7 @@ _PV = _read_plugin_version_int()
 # Emitted via _usage_metrics() into the existing emit_metrics() channel so
 # hook metrics rows carry per-invocation token/cost totals
 # alongside the existing skip_reason / vulns_found fields.
-_USAGE = {
-    "in": 0, "out": 0, "cr": 0, "cw": 0, "cost": 0.0, "n": 0,
-    # HTTP error visibility (#2098 visibility gap — see emit comment in
-    # _usage_metrics). Without this, API failures from `_call_claude` left
-    # zero fingerprint in telemetry: the call returns None, the caller's
-    # emit_metrics carries no api_calls field, and the failure is
-    # indistinguishable from "no review needed". The deprecation outage
-    # that broke every commit-review LLM call was invisible until users
-    # reported it manually.
-    "http_err_last": 0,    # most recent HTTP error code this invocation
-    "http_err_count": 0,   # total HTTP errors (4xx + 5xx + network)
-}
+_USAGE = {"in": 0, "out": 0, "cr": 0, "cw": 0, "cost": 0.0, "n": 0}
 _USAGE_LOCK = threading.Lock()

 # $/Mtok (input, output). Used only for the raw-HTTP path; the SDK path
@@ -177,55 +166,19 @@ def _record_usage(usage, model, cost_usd=None):
        _USAGE["n"] += 1


-def _record_http_error(status):
-    """Record an HTTP error from an LLM API call. `status` is the HTTP
-    status code (integer 400–599) or -1 for network/timeout errors. Stored
-    in `_USAGE["http_err_last"]` (most recent) and counted in
-    `_USAGE["http_err_count"]`. Snapshot via `_usage_metrics()` so every
-    subsequent `emit_metrics` includes the failure fingerprint.
-
-    Background: without this, the most recent example was the #2098
-    deprecation 400. Every hook fire's LLM call returned HTTP 400; the
-    plugin caught it and returned None; the emit_metrics carried no
-    api_calls field; aggregate dashboards looked normal. The failure
-    only became visible when a user manually reported errors out of
-    their debug log. With this field, a category-of-failure spike (4xx,
-    5xx, or -1 network) is queryable from BQ in real time.
-    """
-    try:
-        s = int(status)
-    except (TypeError, ValueError):
-        return
-    with _USAGE_LOCK:
-        _USAGE["http_err_last"] = s
-        _USAGE["http_err_count"] += 1
-
-
 def _usage_metrics():
    """Snapshot the accumulator as metric keys. Returns {} when no API calls
-    AND no HTTP errors were made so skip-path emits don't burn key budget.
-    cost_usd rounded to 1e-6 to keep the float finite/short for the zod
-    schema.
-
-    HTTP errors (`http_err_last`, `http_err_count`) emitted ONLY when
-    `http_err_count > 0` so successful calls don't pad every metrics row
-    with two zero fields.
-    """
+    were made so skip-path emits don't burn key budget. cost_usd rounded to
+    1e-6 to keep the float finite/short for the zod schema."""
    with _USAGE_LOCK:
-        if _USAGE["n"] == 0 and _USAGE["http_err_count"] == 0:
+        if _USAGE["n"] == 0:
            return {}
-        out = {}
-        if _USAGE["n"] > 0:
-            out.update({
-                "tok_in": _USAGE["in"],
-                "tok_out": _USAGE["out"],
-                "tok_cache_r": _USAGE["cr"],
-                "tok_cache_w": _USAGE["cw"],
-                "cost_usd": round(_USAGE["cost"], 6),
-                "api_calls": _USAGE["n"],
-            })
-        if _USAGE["http_err_count"] > 0:
-            out["http_err_last"] = _USAGE["http_err_last"]
-            out["http_err_count"] = _USAGE["http_err_count"]
-        return out
+        return {
+            "tok_in": _USAGE["in"],
+            "tok_out": _USAGE["out"],
+            "tok_cache_r": _USAGE["cr"],
+            "tok_cache_w": _USAGE["cw"],
+            "cost_usd": round(_USAGE["cost"], 6),
+            "api_calls": _USAGE["n"],
+        }

--- a/plugins/security-guidance/hooks/ensure_agent_sdk.py
+++ b/plugins/security-guidance/hooks/ensure_agent_sdk.py
@@ -42,122 +42,6 @@ HOOK_PY_INCOMPATIBLE = 6  # hook interpreter is <3.10 — SDK syntax can't load
                          # here no matter how the venv was built. See #2071.


-# Phase + err-kind integer encoding for sdk_bootstrap_phase / sdk_bootstrap_err.
-#
-# Earlier versions emitted these as STRINGS (e.g. "pip", "dns_fail"). CC's
-# plugin-metrics pipeline silently drops plugin-emitted string values —
-# only `bool|finite-number` plugin metrics reach BigQuery. (CC-core
-# metrics like `subscription_type` are exempt because they're injected
-# downstream of plugin validation.) Confirmed empirically: 185K
-# BUILD_FAILED rows in BQ had `sdk_bootstrap_phase`/`sdk_bootstrap_err`
-# = NULL despite the Python code emitting them. This left ~28K
-# BUILD_FAILED sessions/day with no diagnostic split — flying blind on
-# the real failure modes (pip-no-match vs dns-fail vs ssl-verify etc.).
-#
-# Fix: encode as small integers per the maps below. Values are
-# APPEND-ONLY for telemetry stability. Reserve 99 as the "unknown /
-# uncategorized" bucket so an unmapped err_kind (e.g., a new exception
-# type) still emits a non-zero signal.
-SDK_BOOTSTRAP_PHASE_CODES = {
-    "pre":  1,  # pre-venv (state_dir.mkdir, sentinel open)
-    "venv": 2,  # python -m venv --clear
-    "pip":  3,  # pip install
-    "main": 4,  # uncaught exception above main()
-}
-SDK_BOOTSTRAP_ERR_CODES = {
-    "pip_no_match":         1,
-    "dns_fail":             2,
-    "conn_refused":         3,
-    "ssl_verify":           4,
-    "perm_denied":          5,
-    "no_pip":               6,
-    "disk_full":            7,
-    "proxy_auth":           8,
-    "stderr_timeout":       9,   # pip stderr containing "timeout"/"timed out"
-    "subprocess_timeout":   10,  # subprocess.TimeoutExpired (>120s)
-    # Venv-stage specific categories added after PR #2112 telemetry surfaced
-    # 2,406 phase=2/err=99 sessions in the first 3h of v2.0.1 — venv phase
-    # failing in ways the original pip-flavored patterns didn't catch. These
-    # all split out of what was previously collapsing to _uncategorized.
-    "venv_ensurepip_fail":  11,  # Debian/Ubuntu missing python3-venv;
-                                 # stderr mentions ensurepip non-zero exit
-                                 # or "ensurepip is not available"
-    "venv_path_too_long":   12,  # Windows MAX_PATH (260) or POSIX
-                                 # ENAMETOOLONG — venv writes deep paths
-                                 # under state_dir/agent-sdk-venv/Lib/...
-    "venv_no_module":       13,  # `python3 -m venv` itself missing — "No
-                                 # module named 'venv'" / "No module named venv"
-    "venv_already_exists":  14,  # Errno 17 / "file exists" — sentinel race
-                                 # past O_EXCL or stale dir survived --clear
-    "venv_setup_failed":    15,  # Generic "virtual environment was not
-                                 # created successfully" — catches the long
-                                 # tail of venv setup failures that don't
-                                 # match a more specific category above
-    # 16–98 reserved for future categories; APPEND-ONLY.
-    # 99 catches everything else (including "exc:<TypeName>" and "other:<tail>"
-    # — the original string is debug-loggable but the integer is what makes
-    # it to telemetry). For the "other:" tail, `sdk_bootstrap_stderr_sig`
-    # carries a bounded integer hash so we can still distinguish patterns
-    # in BQ aggregation.
-    "_uncategorized":       99,
-}
-
-
-def _encode_phase(s):
-    """Map err_phase string to its telemetry integer code, or 0 if unset.
-    Empty/None → 0 lets `if encoded:` cleanly skip emission. Per
-    SDK_BOOTSTRAP_PHASE_CODES, valid codes are 1-4."""
-    return SDK_BOOTSTRAP_PHASE_CODES.get((s or "").strip(), 0)
-
-
-def _encode_err_kind(s):
-    """Map err_kind string to its telemetry integer code, or 0 if unset.
-    Direct hits use the static map; "exc:<X>" and "other:<tail>" both
-    collapse to _uncategorized (99) — the raw string survives in debug
-    logs, only the integer reaches BQ."""
-    s = (s or "").strip()
-    if not s:
-        return 0
-    if s in SDK_BOOTSTRAP_ERR_CODES:
-        return SDK_BOOTSTRAP_ERR_CODES[s]
-    # Prefix matches for the catch-all categories
-    if s.startswith("exc:") or s.startswith("other:") or s == "other":
-        return SDK_BOOTSTRAP_ERR_CODES["_uncategorized"]
-    # Unknown string — still emit as uncategorized rather than dropping
-    return SDK_BOOTSTRAP_ERR_CODES["_uncategorized"]
-
-
-def _encode_stderr_sig(err_kind):
-    """Bounded integer hash of the stderr tail captured in "other:<tail>"
-    err_kinds. Lets us distinguish patterns INSIDE the _uncategorized
-    (code 99) bucket without unbounded cardinality.
-
-    Returns 0 for non-"other:" err_kinds (so the field auto-omits from
-    emit_metrics on categorized failures — see the emit block in main()).
-
-    Strategy: take the tail's first ~30 chars (post-lowercase, post-trim),
-    SHA-1, fold the first 2 bytes to 0–999. Different stderr messages
-    cluster into different buckets; same stderr always maps to the same
-    bucket. Cardinality is bounded at 1000, well below any "high
-    cardinality" alarm — and a real failure mode typically produces
-    near-identical stderr across thousands of machines, so 1000 buckets
-    is comfortably wide.
-
-    Why first ~30 chars: stderr like "ERROR: Command failed: <full
-    path>" varies the tail wildly (paths) but the categorization signal
-    is in the leading words. Dropping the suffix focuses the hash on
-    the discriminative part.
-    """
-    if not err_kind or not err_kind.startswith("other:"):
-        return 0
-    import hashlib
-    tail = err_kind[len("other:"):].strip().lower()[:30]
-    if not tail:
-        return 0
-    h = hashlib.sha1(tail.encode("utf-8", errors="replace")).digest()
-    return int.from_bytes(h[:2], "big") % 1000
-
-
 def _sdk_on_syspath() -> bool:
    # find_spec is ~10ms; actually importing the SDK pulls in
    # transitive deps and costs ~800ms — too heavy for a
@@ -296,34 +180,7 @@ def main() -> tuple[int, str, str]:
        else:
            stderr_str = str(stderr_b)
        s = stderr_str.lower()
-        # Venv-specific patterns checked FIRST — they overlap with some pip
-        # patterns (e.g. "no module named ensurepip" could match no_pip OR
-        # venv_ensurepip_fail; the venv-stage interpretation is the right
-        # one when err_phase=="venv"). Order is venv-most-specific →
-        # pip-historical → generic.
-        if err_phase == "venv" and (
-            "ensurepip is not available" in s
-            or ("ensurepip" in s and "returned non-zero" in s)
-            or "the virtual environment was not created" in s and "ensurepip" in s
-        ):
-            err_kind = "venv_ensurepip_fail"
-        elif err_phase == "venv" and (
-            "[errno 36]" in s
-            or "file name too long" in s
-            or "path too long" in s
-        ):
-            err_kind = "venv_path_too_long"
-        elif err_phase == "venv" and (
-            "no module named venv" in s
-            or "no module named 'venv'" in s
-        ):
-            err_kind = "venv_no_module"
-        elif err_phase == "venv" and (
-            "[errno 17]" in s
-            or ("file exists" in s and "venv" in s)
-        ):
-            err_kind = "venv_already_exists"
-        elif "no matching distribution" in s or "could not find a version" in s:
+        if "no matching distribution" in s or "could not find a version" in s:
            err_kind = "pip_no_match"
        elif "name or service not known" in s or "name resolution" in s \
                or "nodename nor servname" in s or "temporary failure in name" in s:
@@ -342,15 +199,6 @@ def main() -> tuple[int, str, str]:
            err_kind = "proxy_auth"
        elif "timeout" in s or "timed out" in s:
            err_kind = "stderr_timeout"
-        elif err_phase == "venv" and (
-            "virtual environment was not created" in s
-            or "error: command" in s and "venv" in s
-        ):
-            # Generic venv-setup catch-all — matched AFTER the more specific
-            # venv patterns above so we don't shadow them, but BEFORE the
-            # other: fallback so generic venv setup failures get their own
-            # bucket instead of polluting the long-tail signature space.
-            err_kind = "venv_setup_failed"
        else:
            # First 60 chars of the last non-empty stderr line — bounded to
            # stay inside CC's metric value-length budget. Real failure modes
@@ -440,33 +288,21 @@ if __name__ == "__main__":
    # and takes the FIRST non-{"async":...} JSON line as the hook response;
    # its `metrics` key is forwarded to the hook metrics event on the
    # next attachments pass. Must be a single line — the registry splits on
-    # \n and json-parses each independently.
-    #
-    # IMPORTANT — values must be bool|finite-number. The validation comment
-    # has historically said "or short strings" but that was wrong: CC's
-    # plugin-metrics pipeline silently drops plugin-emitted string values.
-    # Stay inside the 10-key emit cap.
+    # \n and json-parses each independently. Values must be bool|number OR
+    # short strings (CC accepts string metric values if they're not
+    # null). Stay inside the 10-key emit cap.
    metrics: dict[str, object] = {
        "sdk_bootstrap": outcome,
        "sdk_bootstrap_ms": round((time.perf_counter() - t0) * 1000),
    }
    if err_kind:
-        # Encode phase + err_kind as integer codes (see
-        # SDK_BOOTSTRAP_PHASE_CODES / SDK_BOOTSTRAP_ERR_CODES). Earlier
-        # versions emitted these as strings and CC dropped them — restoring
-        # the diagnostic split that 28K BUILD_FAILED/day need to triage by
-        # root cause. err_phase defaults to "pre" when empty (pre-venv
-        # failure path, e.g. state_dir.mkdir perm-denied).
-        metrics["sdk_bootstrap_phase"] = _encode_phase(err_phase or "pre")
-        metrics["sdk_bootstrap_err"] = _encode_err_kind(err_kind)
-        # For "other:<tail>" (encoded err==99), emit a bounded integer
-        # hash of the stderr tail so BQ can distinguish patterns inside
-        # the _uncategorized bucket without unbounded cardinality. Zero
-        # when err_kind is categorized — the schema reader treats 0 as
-        # "no signal", matching the absence convention.
-        sig = _encode_stderr_sig(err_kind)
-        if sig:
-            metrics["sdk_bootstrap_stderr_sig"] = sig
+        # Truncate defensively; categorized values are <40 chars but the
+        # `other:<tail>` mode could be longer. err_phase may be empty for
+        # pre-venv failures (state_dir.mkdir perm-denied, sentinel O_EXCL
+        # raising a non-FileExistsError OSError) — emit as "pre" so the
+        # err_kind isn't silently dropped.
+        metrics["sdk_bootstrap_phase"] = (err_phase or "pre")[:16]
+        metrics["sdk_bootstrap_err"] = err_kind[:96]
    pv = _plugin_version_int()
    if pv:
        metrics["pv"] = pv
--- a/plugins/security-guidance/hooks/llm.py
+++ b/plugins/security-guidance/hooks/llm.py
@@ -27,7 +27,7 @@ from typing import Optional, Tuple, Dict, Any, List

 import extensibility
 import review_api
-from _base import debug_log, _record_usage, _record_http_error, _PV, PROVENANCE_TAG, state_dir as _resolve_state_dir  # noqa: F401
+from _base import debug_log, _record_usage, _PV, PROVENANCE_TAG, state_dir as _resolve_state_dir  # noqa: F401
 from session_state import with_locked_state


@@ -368,7 +368,6 @@ def _call_claude_via_sdk(prompt, output_schema, *, max_tokens=16000, model=None)
        except Exception as e:
            debug_log(f"3P sdk-single-turn: SDK unavailable ({e})")
            _last_call_claude_http_error = -1
-            _record_http_error(-1)
            return None

    cli_path = os.environ.get("SG_AGENTIC_CLI_PATH") or None
@@ -426,7 +425,6 @@ def _call_claude_via_sdk(prompt, output_schema, *, max_tokens=16000, model=None)
    except _asyncio.TimeoutError:
        debug_log("3P sdk-single-turn: timeout after 60s")
        _last_call_claude_http_error = -1
-        _record_http_error(-1)
        return None
    except Exception as e:
        debug_log(f"3P sdk-single-turn: query failed ({e})")
@@ -435,7 +433,6 @@ def _call_claude_via_sdk(prompt, output_schema, *, max_tokens=16000, model=None)
            for _l in _captured_stderr[:20]:
                debug_log(f"  | {_l.rstrip()}")
        _last_call_claude_http_error = -1
-        _record_http_error(-1)
        return None


@@ -545,7 +542,6 @@ def _call_claude(prompt, output_schema, thinking_budget=10000, max_tokens=16000,
                error_body = e.read().decode("utf-8") if e.fp else ""
                debug_log(f"API error: {e.code} - {error_body[:200]}")
                _last_call_claude_http_error = e.code
-                _record_http_error(e.code)
                return None
        except (urllib.error.URLError, TimeoutError) as e:
            if attempt < 2:
@@ -555,7 +551,6 @@ def _call_claude(prompt, output_schema, thinking_budget=10000, max_tokens=16000,
            else:
                debug_log(f"Request failed after retries: {e}")
                _last_call_claude_http_error = -1
-                _record_http_error(-1)
                return None

    if not response_data:
@@ -564,7 +559,6 @@ def _call_claude(prompt, output_schema, thinking_budget=10000, max_tokens=16000,
        # call uses the token; record the 401 so callers don't see error=None.
        if _last_call_claude_http_error is None:
            _last_call_claude_http_error = 401
-            _record_http_error(401)
        return None

    # Find the text block (skip thinking blocks)
--- a/plugins/security-guidance/hooks/security_reminder_hook.py
+++ b/plugins/security-guidance/hooks/security_reminder_hook.py
@@ -221,34 +221,15 @@ def emit_metrics(
    task-notification one-liner. Must be in the same JSON line as the metrics
    because CC stops scanning stdout after the first {-prefixed line.

-    `additional_context` (asyncRewake findings): model-visible guidance text.
-    Delivery channel depends on `hook_event_name` because CC's hook-output
-    contract is NOT symmetric across events:
-
-      - PostToolUse (commit-review, push-sweep): surfaced via the modern
-        hookSpecificOutput.additionalContext protocol. `PostToolUse` is a
-        member of CC's hookSpecificOutput discriminated union
-        (coreSchemas.ts), so the JSON validates and metrics/rewakeSummary
-        are consumed. See #1375 / #1783 for why this replaced the legacy
-        stderr + exit(2) shape for PostToolUse.
-
-      - Stop / SubagentStop: there is NO `Stop` member in that union, so
-        emitting hookSpecificOutput{hookEventName:"Stop"} makes the whole
-        line fail isSyncHookJSONOutput validation — which on the asyncRewake
-        path silently drops metrics AND rewakeSummary, and (because the
-        legacy stderr write was removed) leaks the raw JSON to the model as
-        the rewake body. CC's asyncRewake delivery actually reads
-        `stderr || stdout` for the model-visible body and only scans stdout
-        JSON for metrics+rewakeSummary — it never reads additionalContext
-        on this path. So for Stop we use the documented clean pattern:
-        guidance on stderr, valid JSON (metrics + rewakeSummary +
-        top-level decision/reason) on stdout. The top-level decision:"block"
-        + reason also covers the sync-fallback path (single-shot `claude -p`,
-        where asyncRewake degrades to a sync Stop hook that reads
-        decision/reason). See #2159.
-
-    Empty/None additional_context emits neither channel (back-compat for
-    metrics-only callers).
+    `additional_context` (asyncRewake findings): model-visible guidance text
+    that CC surfaces via the modern hook-output protocol
+    (hookSpecificOutput.additionalContext) instead of the legacy stderr +
+    exit(2) pair. The caller passes the finding-explanation text it would
+    have written to stderr; the JSON channel carries it cleanly so CC's UI
+    shows the reason properly instead of "Permission denied with no reason".
+    See anthropics/claude-plugins-official#1375 and #1783. Empty/None
+    means no hookSpecificOutput field is emitted (preserves backward compat
+    for legacy emit-sites that only want metrics).

    `system_message` (optional, asyncRewake only): user-visible TUI message,
    distinct from rewakeSummary which is the task-notification one-liner.
@@ -256,9 +237,10 @@ def emit_metrics(
    surface; systemMessage adds a per-fire override when the static
    rewakeMessage isn't specific enough for the finding being shown.

-    `hook_event_name` (used only when additional_context is set): selects the
-    delivery channel above. Defaults to "PostToolUse" (commit-review and
-    push-sweep are the most common callers); handle_stop_hook passes "Stop".
+    `hook_event_name` (used only when additional_context is set): which event
+    the hookSpecificOutput attaches to. Defaults to "PostToolUse" since the
+    commit-review and push-sweep handlers are the most common callers;
+    handle_stop_hook explicitly passes "Stop".
    """
    head = {}
    if _PV and "pv" not in metrics:
@@ -270,23 +252,14 @@ def emit_metrics(
    if rewake_summary:
        out["rewakeSummary"] = rewake_summary
    if additional_context:
-        if hook_event_name in ("Stop", "SubagentStop"):
-            # Stop is NOT in CC's hookSpecificOutput union — emitting it there
-            # fails schema validation and drops metrics+rewakeSummary (#2159).
-            # Clean pattern: guidance on stderr (the asyncRewake body channel,
-            # delivered via `stderr || stdout`), top-level decision/reason for
-            # the sync-fallback path. stdout JSON stays valid so metrics +
-            # rewakeSummary survive.
-            sys.stderr.write(additional_context)
-            sys.stderr.flush()
-            out["decision"] = "block"
-            out["reason"] = additional_context
-        else:
-            # PostToolUse et al. — valid union member; modern protocol.
-            out["hookSpecificOutput"] = {
-                "hookEventName": hook_event_name,
-                "additionalContext": additional_context,
-            }
+        # Wrap in hookSpecificOutput per CC's modern hook-output contract.
+        # Drops the legacy `sys.stderr.write(...) + sys.exit(2)` shape that
+        # left CC's UI showing "denied with no reason" (#1783) and triggered
+        # "json output validation failed" on older CC versions (#1375).
+        out["hookSpecificOutput"] = {
+            "hookEventName": hook_event_name,
+            "additionalContext": additional_context,
+        }
    if system_message:
        out["systemMessage"] = system_message
    print(json.dumps(out), flush=True)