mirror of
https://github.com/anthropics/claude-plugins-official.git
synced 2026-06-11 02:33:33 +00:00
Compare commits
6 Commits
fix-2099-t
...
telemetry-
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
475038edfc | ||
|
|
3d368d2972 | ||
|
|
84011d43b1 | ||
|
|
2a822c0787 | ||
|
|
a40c9f1e83 | ||
|
|
c7a3e2ffa0 |
@@ -116,7 +116,18 @@ _PV = _read_plugin_version_int()
|
||||
# Emitted via _usage_metrics() into the existing emit_metrics() channel so
|
||||
# hook metrics rows carry per-invocation token/cost totals
|
||||
# alongside the existing skip_reason / vulns_found fields.
|
||||
_USAGE = {"in": 0, "out": 0, "cr": 0, "cw": 0, "cost": 0.0, "n": 0}
|
||||
_USAGE = {
|
||||
"in": 0, "out": 0, "cr": 0, "cw": 0, "cost": 0.0, "n": 0,
|
||||
# HTTP error visibility (#2098 visibility gap — see emit comment in
|
||||
# _usage_metrics). Without this, API failures from `_call_claude` left
|
||||
# zero fingerprint in telemetry: the call returns None, the caller's
|
||||
# emit_metrics carries no api_calls field, and the failure is
|
||||
# indistinguishable from "no review needed". The deprecation outage
|
||||
# that broke every commit-review LLM call was invisible until users
|
||||
# reported it manually.
|
||||
"http_err_last": 0, # most recent HTTP error code this invocation
|
||||
"http_err_count": 0, # total HTTP errors (4xx + 5xx + network)
|
||||
}
|
||||
_USAGE_LOCK = threading.Lock()
|
||||
|
||||
# $/Mtok (input, output). Used only for the raw-HTTP path; the SDK path
|
||||
@@ -166,19 +177,55 @@ def _record_usage(usage, model, cost_usd=None):
|
||||
_USAGE["n"] += 1
|
||||
|
||||
|
||||
def _record_http_error(status):
|
||||
"""Record an HTTP error from an LLM API call. `status` is the HTTP
|
||||
status code (integer 400–599) or -1 for network/timeout errors. Stored
|
||||
in `_USAGE["http_err_last"]` (most recent) and counted in
|
||||
`_USAGE["http_err_count"]`. Snapshot via `_usage_metrics()` so every
|
||||
subsequent `emit_metrics` includes the failure fingerprint.
|
||||
|
||||
Background: without this, the most recent example was the #2098
|
||||
deprecation 400. Every hook fire's LLM call returned HTTP 400; the
|
||||
plugin caught it and returned None; the emit_metrics carried no
|
||||
api_calls field; aggregate dashboards looked normal. The failure
|
||||
only became visible when a user manually reported errors out of
|
||||
their debug log. With this field, a category-of-failure spike (4xx,
|
||||
5xx, or -1 network) is queryable from BQ in real time.
|
||||
"""
|
||||
try:
|
||||
s = int(status)
|
||||
except (TypeError, ValueError):
|
||||
return
|
||||
with _USAGE_LOCK:
|
||||
_USAGE["http_err_last"] = s
|
||||
_USAGE["http_err_count"] += 1
|
||||
|
||||
|
||||
def _usage_metrics():
|
||||
"""Snapshot the accumulator as metric keys. Returns {} when no API calls
|
||||
were made so skip-path emits don't burn key budget. cost_usd rounded to
|
||||
1e-6 to keep the float finite/short for the zod schema."""
|
||||
with _USAGE_LOCK:
|
||||
if _USAGE["n"] == 0:
|
||||
return {}
|
||||
return {
|
||||
"tok_in": _USAGE["in"],
|
||||
"tok_out": _USAGE["out"],
|
||||
"tok_cache_r": _USAGE["cr"],
|
||||
"tok_cache_w": _USAGE["cw"],
|
||||
"cost_usd": round(_USAGE["cost"], 6),
|
||||
"api_calls": _USAGE["n"],
|
||||
}
|
||||
AND no HTTP errors were made so skip-path emits don't burn key budget.
|
||||
cost_usd rounded to 1e-6 to keep the float finite/short for the zod
|
||||
schema.
|
||||
|
||||
HTTP errors (`http_err_last`, `http_err_count`) emitted ONLY when
|
||||
`http_err_count > 0` so successful calls don't pad every metrics row
|
||||
with two zero fields.
|
||||
"""
|
||||
with _USAGE_LOCK:
|
||||
if _USAGE["n"] == 0 and _USAGE["http_err_count"] == 0:
|
||||
return {}
|
||||
out = {}
|
||||
if _USAGE["n"] > 0:
|
||||
out.update({
|
||||
"tok_in": _USAGE["in"],
|
||||
"tok_out": _USAGE["out"],
|
||||
"tok_cache_r": _USAGE["cr"],
|
||||
"tok_cache_w": _USAGE["cw"],
|
||||
"cost_usd": round(_USAGE["cost"], 6),
|
||||
"api_calls": _USAGE["n"],
|
||||
})
|
||||
if _USAGE["http_err_count"] > 0:
|
||||
out["http_err_last"] = _USAGE["http_err_last"]
|
||||
out["http_err_count"] = _USAGE["http_err_count"]
|
||||
return out
|
||||
|
||||
|
||||
@@ -355,9 +355,9 @@ def _list_untracked(cwd):
|
||||
the holdouts."""
|
||||
try:
|
||||
repo = _git_toplevel(cwd) or cwd
|
||||
# core.quotePath=false comes from GIT_CMD globally (see gitutil.py).
|
||||
r = subprocess.run(
|
||||
[*GIT_CMD, "-c", "core.quotePath=false", "ls-files",
|
||||
"--others", "--exclude-standard", "-z"],
|
||||
[*GIT_CMD, "ls-files", "--others", "--exclude-standard", "-z"],
|
||||
cwd=repo, capture_output=True, timeout=15,
|
||||
)
|
||||
if r.returncode != 0:
|
||||
|
||||
@@ -42,6 +42,71 @@ HOOK_PY_INCOMPATIBLE = 6 # hook interpreter is <3.10 — SDK syntax can't load
|
||||
# here no matter how the venv was built. See #2071.
|
||||
|
||||
|
||||
# Phase + err-kind integer encoding for sdk_bootstrap_phase / sdk_bootstrap_err.
|
||||
#
|
||||
# Earlier versions emitted these as STRINGS (e.g. "pip", "dns_fail"). CC's
|
||||
# plugin-metrics pipeline silently drops plugin-emitted string values —
|
||||
# only `bool|finite-number` plugin metrics reach BigQuery. (CC-core
|
||||
# metrics like `subscription_type` are exempt because they're injected
|
||||
# downstream of plugin validation.) Confirmed empirically: 185K
|
||||
# BUILD_FAILED rows in BQ had `sdk_bootstrap_phase`/`sdk_bootstrap_err`
|
||||
# = NULL despite the Python code emitting them. This left ~28K
|
||||
# BUILD_FAILED sessions/day with no diagnostic split — flying blind on
|
||||
# the real failure modes (pip-no-match vs dns-fail vs ssl-verify etc.).
|
||||
#
|
||||
# Fix: encode as small integers per the maps below. Values are
|
||||
# APPEND-ONLY for telemetry stability. Reserve 99 as the "unknown /
|
||||
# uncategorized" bucket so an unmapped err_kind (e.g., a new exception
|
||||
# type) still emits a non-zero signal.
|
||||
SDK_BOOTSTRAP_PHASE_CODES = {
|
||||
"pre": 1, # pre-venv (state_dir.mkdir, sentinel open)
|
||||
"venv": 2, # python -m venv --clear
|
||||
"pip": 3, # pip install
|
||||
"main": 4, # uncaught exception above main()
|
||||
}
|
||||
SDK_BOOTSTRAP_ERR_CODES = {
|
||||
"pip_no_match": 1,
|
||||
"dns_fail": 2,
|
||||
"conn_refused": 3,
|
||||
"ssl_verify": 4,
|
||||
"perm_denied": 5,
|
||||
"no_pip": 6,
|
||||
"disk_full": 7,
|
||||
"proxy_auth": 8,
|
||||
"stderr_timeout": 9, # pip stderr containing "timeout"/"timed out"
|
||||
"subprocess_timeout": 10, # subprocess.TimeoutExpired (>120s)
|
||||
# 11–98 reserved for future categories; APPEND-ONLY.
|
||||
# 99 catches everything else (including "exc:<TypeName>" and "other:<tail>"
|
||||
# — the original string is debug-loggable but the integer is what makes
|
||||
# it to telemetry).
|
||||
"_uncategorized": 99,
|
||||
}
|
||||
|
||||
|
||||
def _encode_phase(s):
|
||||
"""Map err_phase string to its telemetry integer code, or 0 if unset.
|
||||
Empty/None → 0 lets `if encoded:` cleanly skip emission. Per
|
||||
SDK_BOOTSTRAP_PHASE_CODES, valid codes are 1-4."""
|
||||
return SDK_BOOTSTRAP_PHASE_CODES.get((s or "").strip(), 0)
|
||||
|
||||
|
||||
def _encode_err_kind(s):
|
||||
"""Map err_kind string to its telemetry integer code, or 0 if unset.
|
||||
Direct hits use the static map; "exc:<X>" and "other:<tail>" both
|
||||
collapse to _uncategorized (99) — the raw string survives in debug
|
||||
logs, only the integer reaches BQ."""
|
||||
s = (s or "").strip()
|
||||
if not s:
|
||||
return 0
|
||||
if s in SDK_BOOTSTRAP_ERR_CODES:
|
||||
return SDK_BOOTSTRAP_ERR_CODES[s]
|
||||
# Prefix matches for the catch-all categories
|
||||
if s.startswith("exc:") or s.startswith("other:") or s == "other":
|
||||
return SDK_BOOTSTRAP_ERR_CODES["_uncategorized"]
|
||||
# Unknown string — still emit as uncategorized rather than dropping
|
||||
return SDK_BOOTSTRAP_ERR_CODES["_uncategorized"]
|
||||
|
||||
|
||||
def _sdk_on_syspath() -> bool:
|
||||
# find_spec is ~10ms; actually importing the SDK pulls in
|
||||
# transitive deps and costs ~800ms — too heavy for a
|
||||
@@ -288,21 +353,25 @@ if __name__ == "__main__":
|
||||
# and takes the FIRST non-{"async":...} JSON line as the hook response;
|
||||
# its `metrics` key is forwarded to the hook metrics event on the
|
||||
# next attachments pass. Must be a single line — the registry splits on
|
||||
# \n and json-parses each independently. Values must be bool|number OR
|
||||
# short strings (CC accepts string metric values if they're not
|
||||
# null). Stay inside the 10-key emit cap.
|
||||
# \n and json-parses each independently.
|
||||
#
|
||||
# IMPORTANT — values must be bool|finite-number. The validation comment
|
||||
# has historically said "or short strings" but that was wrong: CC's
|
||||
# plugin-metrics pipeline silently drops plugin-emitted string values.
|
||||
# Stay inside the 10-key emit cap.
|
||||
metrics: dict[str, object] = {
|
||||
"sdk_bootstrap": outcome,
|
||||
"sdk_bootstrap_ms": round((time.perf_counter() - t0) * 1000),
|
||||
}
|
||||
if err_kind:
|
||||
# Truncate defensively; categorized values are <40 chars but the
|
||||
# `other:<tail>` mode could be longer. err_phase may be empty for
|
||||
# pre-venv failures (state_dir.mkdir perm-denied, sentinel O_EXCL
|
||||
# raising a non-FileExistsError OSError) — emit as "pre" so the
|
||||
# err_kind isn't silently dropped.
|
||||
metrics["sdk_bootstrap_phase"] = (err_phase or "pre")[:16]
|
||||
metrics["sdk_bootstrap_err"] = err_kind[:96]
|
||||
# Encode phase + err_kind as integer codes (see
|
||||
# SDK_BOOTSTRAP_PHASE_CODES / SDK_BOOTSTRAP_ERR_CODES). Earlier
|
||||
# versions emitted these as strings and CC dropped them — restoring
|
||||
# the diagnostic split that 28K BUILD_FAILED/day need to triage by
|
||||
# root cause. err_phase defaults to "pre" when empty (pre-venv
|
||||
# failure path, e.g. state_dir.mkdir perm-denied).
|
||||
metrics["sdk_bootstrap_phase"] = _encode_phase(err_phase or "pre")
|
||||
metrics["sdk_bootstrap_err"] = _encode_err_kind(err_kind)
|
||||
pv = _plugin_version_int()
|
||||
if pv:
|
||||
metrics["pv"] = pv
|
||||
|
||||
@@ -26,6 +26,17 @@ GIT_CMD = [
|
||||
"git",
|
||||
"-c", "core.fsmonitor=false",
|
||||
"-c", "core.hooksPath=/dev/null",
|
||||
# core.quotePath=false: emit raw UTF-8 in path-emitting commands instead
|
||||
# of C-quoting non-ASCII bytes (default `"\\303\\201vila/..."` vs
|
||||
# `Ávila/...`). Downstream parsers — both ours (parse_diff_into_files,
|
||||
# extract_file_paths_from_diff) and Python stdlib (os.path.isabs,
|
||||
# os.path.join) — expect raw paths and silently drop / mishandle the
|
||||
# quoted form. Adding the flag globally to GIT_CMD covers every
|
||||
# subprocess.run site that uses the splat — diff feeders, rev-parse
|
||||
# path queries (--show-toplevel, --git-dir, --git-common-dir),
|
||||
# reflog %gs subjects, ls-files, status, etc. — without per-site
|
||||
# flag duplication. See #2082, #2099.
|
||||
"-c", "core.quotePath=false",
|
||||
]
|
||||
|
||||
|
||||
@@ -222,15 +233,12 @@ def _git_diff_range(repo_root, base, head="HEAD"):
|
||||
them reviewed — otherwise unreviewed commits get permanently silenced.
|
||||
"""
|
||||
try:
|
||||
# core.quotePath=false makes git emit raw UTF-8 in `diff --git a/... b/...`
|
||||
# headers instead of C-quoting non-ASCII path bytes (`"a/\303\201vila/..."`
|
||||
# vs `a/Ávila/...`). The downstream `re.match(r'^a/(.+?) b/(.+)$', ...)`
|
||||
# in parse_diff_into_files / extract_file_paths_from_diff matches the
|
||||
# raw form only — quoted headers slip past and the entire file is
|
||||
# silently dropped from review. See #2082 (sibling of #2056 / #2075).
|
||||
# GIT_CMD globally passes core.quotePath=false (see definition) so
|
||||
# non-ASCII paths in `diff --git a/... b/...` headers come through as
|
||||
# raw UTF-8, not C-quoted. Required by the downstream
|
||||
# parse_diff_into_files / extract_file_paths_from_diff regex.
|
||||
r = subprocess.run(
|
||||
[*GIT_CMD, "-c", "core.quotePath=false",
|
||||
"diff", "-p", "--no-color", "--no-ext-diff", base, head],
|
||||
[*GIT_CMD, "diff", "-p", "--no-color", "--no-ext-diff", base, head],
|
||||
cwd=repo_root, capture_output=True, timeout=30,
|
||||
)
|
||||
if r.returncode != 0:
|
||||
@@ -355,8 +363,9 @@ def _git_name_only(cwd, base, include_untracked=False):
|
||||
# result.stdout=None, and propagate AttributeError out of the helper.
|
||||
# Same fix shape as diffstate._list_untracked. See #2056.
|
||||
def _run(env):
|
||||
# core.quotePath=false comes from GIT_CMD globally (see definition).
|
||||
result = subprocess.run(
|
||||
[*GIT_CMD, "-c", "core.quotePath=false", "diff", "--name-only", "-z", base],
|
||||
[*GIT_CMD, "diff", "--name-only", "-z", base],
|
||||
cwd=cwd, capture_output=True, timeout=30,
|
||||
env=env,
|
||||
)
|
||||
@@ -393,9 +402,9 @@ def _git_status_porcelain(cwd):
|
||||
# sibling helpers — a non-ASCII path in the worktree would otherwise
|
||||
# crash the cp1252 reader thread on Windows. See #2056.
|
||||
try:
|
||||
# core.quotePath=false comes from GIT_CMD globally (see definition).
|
||||
r = subprocess.run(
|
||||
[*GIT_CMD, "-c", "core.quotePath=false", "status",
|
||||
"--porcelain=v1", "-uall", "-z"],
|
||||
[*GIT_CMD, "status", "--porcelain=v1", "-uall", "-z"],
|
||||
cwd=cwd, capture_output=True, timeout=30,
|
||||
)
|
||||
if r.returncode != 0:
|
||||
@@ -471,11 +480,8 @@ def get_git_diff(cwd, baseline_sha, full_context=False, paths=None, untracked_pa
|
||||
# change exists to fix.
|
||||
return ""
|
||||
|
||||
# core.quotePath=false: emit raw UTF-8 in `diff --git a/... b/...` headers
|
||||
# so non-ASCII paths aren't C-quoted past the downstream parse_diff_into_files
|
||||
# regex. See #2082 (sibling of #2056 / #2075).
|
||||
cmd = [*GIT_CMD, "-c", "core.quotePath=false",
|
||||
"diff", "--no-color", "--no-ext-diff", baseline_sha] + (["--unified=99999"] if full_context else []) + pathspec
|
||||
# core.quotePath=false comes from GIT_CMD globally (see definition).
|
||||
cmd = [*GIT_CMD, "diff", "--no-color", "--no-ext-diff", baseline_sha] + (["--unified=99999"] if full_context else []) + pathspec
|
||||
try:
|
||||
with _temp_index(cwd, untracked_paths) as env:
|
||||
# env is None when no index could be found (bare repo / not a
|
||||
|
||||
@@ -27,7 +27,7 @@ from typing import Optional, Tuple, Dict, Any, List
|
||||
|
||||
import extensibility
|
||||
import review_api
|
||||
from _base import debug_log, _record_usage, _PV, PROVENANCE_TAG, state_dir as _resolve_state_dir # noqa: F401
|
||||
from _base import debug_log, _record_usage, _record_http_error, _PV, PROVENANCE_TAG, state_dir as _resolve_state_dir # noqa: F401
|
||||
from session_state import with_locked_state
|
||||
|
||||
|
||||
@@ -368,6 +368,7 @@ def _call_claude_via_sdk(prompt, output_schema, *, max_tokens=16000, model=None)
|
||||
except Exception as e:
|
||||
debug_log(f"3P sdk-single-turn: SDK unavailable ({e})")
|
||||
_last_call_claude_http_error = -1
|
||||
_record_http_error(-1)
|
||||
return None
|
||||
|
||||
cli_path = os.environ.get("SG_AGENTIC_CLI_PATH") or None
|
||||
@@ -425,6 +426,7 @@ def _call_claude_via_sdk(prompt, output_schema, *, max_tokens=16000, model=None)
|
||||
except _asyncio.TimeoutError:
|
||||
debug_log("3P sdk-single-turn: timeout after 60s")
|
||||
_last_call_claude_http_error = -1
|
||||
_record_http_error(-1)
|
||||
return None
|
||||
except Exception as e:
|
||||
debug_log(f"3P sdk-single-turn: query failed ({e})")
|
||||
@@ -433,6 +435,7 @@ def _call_claude_via_sdk(prompt, output_schema, *, max_tokens=16000, model=None)
|
||||
for _l in _captured_stderr[:20]:
|
||||
debug_log(f" | {_l.rstrip()}")
|
||||
_last_call_claude_http_error = -1
|
||||
_record_http_error(-1)
|
||||
return None
|
||||
|
||||
|
||||
@@ -479,10 +482,21 @@ def _call_claude(prompt, output_schema, thinking_budget=10000, max_tokens=16000,
|
||||
"max_tokens": max_tokens,
|
||||
"system": CLAUDE_CODE_SYSTEM_PROMPT,
|
||||
"messages": [{"role": "user", "content": prompt}],
|
||||
"output_format": {
|
||||
"type": "json_schema",
|
||||
"schema": output_schema
|
||||
}
|
||||
# API moved the structured-output schema from top-level `output_format`
|
||||
# to `output_config.format` per
|
||||
# https://platform.claude.com/docs/en/build-with-claude/structured-outputs.
|
||||
# The old form "continues to work for a transition period" for some
|
||||
# auth modes (API key + non-streaming), but is rejected with
|
||||
# `invalid_request_error: output_format: This field is deprecated.
|
||||
# Use 'output_config.format' instead.` for others (OAuth Bearer +
|
||||
# newer CLI versions hit it consistently — reporter saw 462 errors
|
||||
# in one day). See #2098.
|
||||
"output_config": {
|
||||
"format": {
|
||||
"type": "json_schema",
|
||||
"schema": output_schema,
|
||||
},
|
||||
},
|
||||
}
|
||||
if thinking_budget > 0:
|
||||
# Models trained on adaptive thinking (4.6+) reject the budget_tokens
|
||||
@@ -490,7 +504,10 @@ def _call_claude(prompt, output_schema, thinking_budget=10000, max_tokens=16000,
|
||||
# models (4.5 and earlier, all 3.x) reject adaptive. Pick by model.
|
||||
if _model_supports_adaptive_thinking(payload["model"]):
|
||||
payload["thinking"] = {"type": "adaptive"}
|
||||
payload["output_config"] = {"effort": "high"}
|
||||
# Merge `effort` into the existing output_config dict (which
|
||||
# now carries the `format` schema) rather than reassigning —
|
||||
# otherwise the schema is silently overwritten. See #2098.
|
||||
payload["output_config"]["effort"] = "high"
|
||||
else:
|
||||
payload["thinking"] = {
|
||||
"type": "enabled",
|
||||
@@ -528,6 +545,7 @@ def _call_claude(prompt, output_schema, thinking_budget=10000, max_tokens=16000,
|
||||
error_body = e.read().decode("utf-8") if e.fp else ""
|
||||
debug_log(f"API error: {e.code} - {error_body[:200]}")
|
||||
_last_call_claude_http_error = e.code
|
||||
_record_http_error(e.code)
|
||||
return None
|
||||
except (urllib.error.URLError, TimeoutError) as e:
|
||||
if attempt < 2:
|
||||
@@ -537,6 +555,7 @@ def _call_claude(prompt, output_schema, thinking_budget=10000, max_tokens=16000,
|
||||
else:
|
||||
debug_log(f"Request failed after retries: {e}")
|
||||
_last_call_claude_http_error = -1
|
||||
_record_http_error(-1)
|
||||
return None
|
||||
|
||||
if not response_data:
|
||||
@@ -545,6 +564,7 @@ def _call_claude(prompt, output_schema, thinking_budget=10000, max_tokens=16000,
|
||||
# call uses the token; record the 401 so callers don't see error=None.
|
||||
if _last_call_claude_http_error is None:
|
||||
_last_call_claude_http_error = 401
|
||||
_record_http_error(401)
|
||||
return None
|
||||
|
||||
# Find the text block (skip thinking blocks)
|
||||
|
||||
@@ -1197,18 +1197,18 @@ def handle_commit_review_posttooluse(input_data):
|
||||
# core.quotePath=false: emit raw UTF-8 in `diff --git a/... b/...`
|
||||
# headers so non-ASCII paths aren't C-quoted past the downstream
|
||||
# parse_diff_into_files regex (sibling of #2056 / #2075). See #2082.
|
||||
# core.quotePath=false comes from GIT_CMD globally (see gitutil.py).
|
||||
if pre_amend_sha:
|
||||
# Delta review: pre-amend → post-amend. `git diff` (not show)
|
||||
# so the output is a pure unified diff with no commit header.
|
||||
result = subprocess.run(
|
||||
[*GIT_CMD, "-c", "core.quotePath=false",
|
||||
"diff", "--no-color", "--no-ext-diff", pre_amend_sha, sha, "--"],
|
||||
[*GIT_CMD, "diff", "--no-color", "--no-ext-diff",
|
||||
pre_amend_sha, sha, "--"],
|
||||
cwd=repo_root, capture_output=True, timeout=15
|
||||
)
|
||||
else:
|
||||
result = subprocess.run(
|
||||
[*GIT_CMD, "-c", "core.quotePath=false",
|
||||
"show", "-p", "--no-color", "--no-ext-diff", sha, "--"],
|
||||
[*GIT_CMD, "show", "-p", "--no-color", "--no-ext-diff", sha, "--"],
|
||||
cwd=repo_root, capture_output=True, timeout=15
|
||||
)
|
||||
except (subprocess.TimeoutExpired, FileNotFoundError, OSError) as e:
|
||||
|
||||
Reference in New Issue
Block a user