mirror of
https://github.com/anthropics/claude-plugins-official.git
synced 2026-06-17 14:53:28 +00:00
Compare commits
6 Commits
fix-2071-m
...
fix-2082-d
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
38b298d5b2 | ||
|
|
68a700837c | ||
|
|
3d349d40b9 | ||
|
|
6a63e35e75 | ||
|
|
12a5376e20 | ||
|
|
04127de5d1 |
@@ -138,7 +138,17 @@ def restore_unreviewed_stop_state(session_id, paths, baseline_sha):
|
||||
|
||||
|
||||
def get_baseline_file_content(session_id, file_path, cwd):
|
||||
"""Get the content of a file at the baseline SHA. Returns None if unavailable."""
|
||||
"""Get the content of a file at the baseline SHA. Returns None if unavailable.
|
||||
|
||||
Decode the file content as UTF-8 with errors="replace" rather than using
|
||||
text=True: source files in user repos can be latin-1 / cp1252 / shift-jis
|
||||
/ etc., and on Windows text=True would decode via locale.getpreferredencoding()
|
||||
in strict mode and raise UnicodeDecodeError in the subprocess reader
|
||||
thread — leaving result.stdout=None and propagating AttributeError when
|
||||
the caller tries to use it. Same class as the existing migrations at
|
||||
security_reminder_hook.py:540 (reflog subjects) and :1115 (commit
|
||||
diffs); this helper was missed in that pass. See
|
||||
anthropics/claude-plugins-official#2056."""
|
||||
baseline_sha = load_baseline_sha(session_id)
|
||||
if not baseline_sha:
|
||||
return None
|
||||
@@ -151,12 +161,12 @@ def get_baseline_file_content(session_id, file_path, cwd):
|
||||
return None
|
||||
result = subprocess.run(
|
||||
[*GIT_CMD, "show", f"{baseline_sha}:{rel_path}"],
|
||||
cwd=cwd, capture_output=True, text=True, timeout=5
|
||||
cwd=cwd, capture_output=True, timeout=5
|
||||
)
|
||||
if result.returncode == 0:
|
||||
return result.stdout
|
||||
return (result.stdout or b"").decode("utf-8", errors="replace")
|
||||
return None
|
||||
except (subprocess.TimeoutExpired, FileNotFoundError, OSError):
|
||||
except (subprocess.TimeoutExpired, FileNotFoundError, OSError, ValueError):
|
||||
return None
|
||||
|
||||
|
||||
@@ -173,11 +183,16 @@ def capture_git_baseline(cwd):
|
||||
and `compute_v2_review_set` subtracts that set so pre-existing untracked
|
||||
files are not reviewed as Claude-authored.
|
||||
"""
|
||||
# stdout is a SHA so text=True is safe on stdout, but a non-ASCII
|
||||
# filename in `git stash create`'s STDERR warning (e.g. a worktree
|
||||
# with `Ávila_report.txt` triggers a quotePath/locale warning) would
|
||||
# trip the stderr reader thread on Windows cp1252. Decode both streams
|
||||
# leniently for symmetry with _list_untracked. See #2056.
|
||||
try:
|
||||
# Check if HEAD exists (i.e., repo has at least one commit)
|
||||
head_check = subprocess.run(
|
||||
[*GIT_CMD, "rev-parse", "HEAD"],
|
||||
cwd=cwd, capture_output=True, text=True, timeout=5
|
||||
cwd=cwd, capture_output=True, timeout=5
|
||||
)
|
||||
if head_check.returncode != 0:
|
||||
# No commits yet — skip review rather than creating commits in the user's repo
|
||||
@@ -186,20 +201,20 @@ def capture_git_baseline(cwd):
|
||||
|
||||
result = subprocess.run(
|
||||
[*GIT_CMD, "stash", "create"],
|
||||
cwd=cwd, capture_output=True, text=True, timeout=15
|
||||
cwd=cwd, capture_output=True, timeout=15
|
||||
)
|
||||
sha = result.stdout.strip()
|
||||
sha = (result.stdout or b"").decode("utf-8", errors="replace").strip()
|
||||
if sha:
|
||||
return sha
|
||||
|
||||
# Working tree is clean — stash create returns empty. Use HEAD.
|
||||
result = subprocess.run(
|
||||
[*GIT_CMD, "rev-parse", "HEAD"],
|
||||
cwd=cwd, capture_output=True, text=True, timeout=5
|
||||
cwd=cwd, capture_output=True, timeout=5
|
||||
)
|
||||
sha = result.stdout.strip()
|
||||
sha = (result.stdout or b"").decode("utf-8", errors="replace").strip()
|
||||
return sha if sha else None
|
||||
except (subprocess.TimeoutExpired, FileNotFoundError, OSError) as e:
|
||||
except (subprocess.TimeoutExpired, FileNotFoundError, OSError, ValueError) as e:
|
||||
debug_log(f"Failed to capture git baseline: {e}")
|
||||
return None
|
||||
|
||||
@@ -323,19 +338,35 @@ def _list_untracked(cwd):
|
||||
mtime is captured so an in-place edit during the turn is still reviewed.
|
||||
|
||||
Uses ls-files (not status) for the UPS path: the index diff isn't needed,
|
||||
and ls-files --others only walks the worktree against .gitignore."""
|
||||
and ls-files --others only walks the worktree against .gitignore.
|
||||
|
||||
Decodes stdout/stderr as UTF-8 with errors="replace" instead of using
|
||||
text=True. With core.quotePath=false git emits raw UTF-8 bytes for
|
||||
non-ASCII filenames; text=True decodes via locale.getpreferredencoding()
|
||||
in strict mode — on Windows that's cp1252 with several undefined bytes
|
||||
(0x81/0x8D/0x8F/0x90/0x9D), all of which appear in UTF-8 encodings of
|
||||
common accented capitals (Á Í Ï Ð Ý) and most CJK/emoji codepoints.
|
||||
A non-ASCII filename in the worktree crashed the subprocess reader
|
||||
thread, left r.stdout=None, and propagated AttributeError out of the
|
||||
helper — silently losing the baseline snapshot every UserPromptSubmit.
|
||||
See anthropics/claude-plugins-official#2056. The sibling helpers in
|
||||
gitutil.py already follow the lenient pattern; this function and
|
||||
capture_git_baseline / _git_name_only / _git_status_porcelain were
|
||||
the holdouts."""
|
||||
try:
|
||||
repo = _git_toplevel(cwd) or cwd
|
||||
r = subprocess.run(
|
||||
[*GIT_CMD, "-c", "core.quotePath=false", "ls-files",
|
||||
"--others", "--exclude-standard", "-z"],
|
||||
cwd=repo, capture_output=True, text=True, timeout=15,
|
||||
cwd=repo, capture_output=True, timeout=15,
|
||||
)
|
||||
if r.returncode != 0:
|
||||
debug_log(f"_list_untracked rc={r.returncode}: {r.stderr[:200]}")
|
||||
stderr_str = (r.stderr or b"").decode("utf-8", errors="replace")
|
||||
debug_log(f"_list_untracked rc={r.returncode}: {stderr_str[:200]}")
|
||||
return {}
|
||||
stdout = (r.stdout or b"").decode("utf-8", errors="replace")
|
||||
out = {}
|
||||
for p in r.stdout.split("\0"):
|
||||
for p in stdout.split("\0"):
|
||||
if not p:
|
||||
continue
|
||||
try:
|
||||
@@ -346,7 +377,9 @@ def _list_untracked(cwd):
|
||||
debug_log(f"_list_untracked: capped at {UNTRACKED_BASELINE_CAP}")
|
||||
break
|
||||
return out
|
||||
except (subprocess.TimeoutExpired, FileNotFoundError, OSError) as e:
|
||||
except (subprocess.TimeoutExpired, FileNotFoundError, OSError, ValueError) as e:
|
||||
# ValueError guards against any future strict-decode regression
|
||||
# so the helper degrades to {} instead of crashing the hook.
|
||||
debug_log(f"_list_untracked error: {e}")
|
||||
return {}
|
||||
|
||||
|
||||
@@ -199,8 +199,15 @@ def _git_diff_range(repo_root, base, head="HEAD"):
|
||||
them reviewed — otherwise unreviewed commits get permanently silenced.
|
||||
"""
|
||||
try:
|
||||
# core.quotePath=false makes git emit raw UTF-8 in `diff --git a/... b/...`
|
||||
# headers instead of C-quoting non-ASCII path bytes (`"a/\303\201vila/..."`
|
||||
# vs `a/Ávila/...`). The downstream `re.match(r'^a/(.+?) b/(.+)$', ...)`
|
||||
# in parse_diff_into_files / extract_file_paths_from_diff matches the
|
||||
# raw form only — quoted headers slip past and the entire file is
|
||||
# silently dropped from review. See #2082 (sibling of #2056 / #2075).
|
||||
r = subprocess.run(
|
||||
[*GIT_CMD, "diff", "-p", "--no-color", "--no-ext-diff", base, head],
|
||||
[*GIT_CMD, "-c", "core.quotePath=false",
|
||||
"diff", "-p", "--no-color", "--no-ext-diff", base, head],
|
||||
cwd=repo_root, capture_output=True, timeout=30,
|
||||
)
|
||||
if r.returncode != 0:
|
||||
@@ -259,19 +266,29 @@ def _git_reflog_recent_commits(repo_root, max_age_s=120, max_n=5):
|
||||
# %gs (the reflog subject) is `commit: <commit-msg first line>` and can
|
||||
# contain `|`; put it LAST so split("|", 2) leaves it intact. %H is
|
||||
# hex and %ct is integer, so the first two fields are delimiter-safe.
|
||||
#
|
||||
# Bytes + decode utf-8/replace: %gs embeds commit-message subjects
|
||||
# which git stores as raw bytes — commits can be authored in
|
||||
# latin-1 / cp1252 / shift-jis etc., and text=True would raise
|
||||
# UnicodeDecodeError in the subprocess reader thread on Windows
|
||||
# cp1252 (subprocess.run returns r.stdout=None, then
|
||||
# r.stdout.splitlines() AttributeErrors). Mirrors the existing
|
||||
# migration at security_reminder_hook.py:540 — same pattern was
|
||||
# missed here. See anthropics/claude-plugins-official#2056.
|
||||
r = subprocess.run(
|
||||
[*GIT_CMD, "log", "-g", "-n", str(max_n),
|
||||
"--format=%H|%ct|%gs", "HEAD"],
|
||||
cwd=repo_root, capture_output=True, text=True, timeout=5,
|
||||
cwd=repo_root, capture_output=True, timeout=5,
|
||||
)
|
||||
except (subprocess.TimeoutExpired, FileNotFoundError, OSError):
|
||||
except (subprocess.TimeoutExpired, FileNotFoundError, OSError, ValueError):
|
||||
return [], 0
|
||||
if r.returncode != 0:
|
||||
return [], 0
|
||||
stdout = (r.stdout or b"").decode("utf-8", errors="replace")
|
||||
import time as _time
|
||||
now = int(_time.time())
|
||||
fresh, stale = [], 0
|
||||
for idx, line in enumerate(r.stdout.splitlines()):
|
||||
for idx, line in enumerate(stdout.splitlines()):
|
||||
parts = line.split("|", 2)
|
||||
if len(parts) != 3:
|
||||
continue
|
||||
@@ -306,23 +323,31 @@ def _git_name_only(cwd, base, include_untracked=False):
|
||||
must distinguish None (error → don't trust as a filter) from set()
|
||||
(genuinely nothing changed). `-c core.quotePath=false -z` keeps non-ASCII
|
||||
and space-containing paths intact."""
|
||||
# Decode stdout/stderr as UTF-8 with errors="replace" instead of using
|
||||
# text=True. core.quotePath=false makes git emit raw UTF-8 for non-ASCII
|
||||
# paths, and text=True on Windows decodes via cp1252 strict — a non-ASCII
|
||||
# changed path would crash the subprocess reader thread, leave
|
||||
# result.stdout=None, and propagate AttributeError out of the helper.
|
||||
# Same fix shape as diffstate._list_untracked. See #2056.
|
||||
def _run(env):
|
||||
result = subprocess.run(
|
||||
[*GIT_CMD, "-c", "core.quotePath=false", "diff", "--name-only", "-z", base],
|
||||
cwd=cwd, capture_output=True, text=True, timeout=30,
|
||||
cwd=cwd, capture_output=True, timeout=30,
|
||||
env=env,
|
||||
)
|
||||
if result.returncode != 0:
|
||||
debug_log(f"_git_name_only({base!r}) rc={result.returncode}: {result.stderr[:200]}")
|
||||
stderr_str = (result.stderr or b"").decode("utf-8", errors="replace")
|
||||
debug_log(f"_git_name_only({base!r}) rc={result.returncode}: {stderr_str[:200]}")
|
||||
return None
|
||||
return {p for p in result.stdout.split("\0") if p}
|
||||
stdout = (result.stdout or b"").decode("utf-8", errors="replace")
|
||||
return {p for p in stdout.split("\0") if p}
|
||||
|
||||
try:
|
||||
if not include_untracked:
|
||||
return _run(None)
|
||||
with _temp_index(cwd) as env:
|
||||
return _run(env)
|
||||
except (subprocess.TimeoutExpired, FileNotFoundError, OSError) as e:
|
||||
except (subprocess.TimeoutExpired, FileNotFoundError, OSError, ValueError) as e:
|
||||
debug_log(f"_git_name_only({base!r}) error: {e}")
|
||||
return None
|
||||
|
||||
@@ -339,17 +364,22 @@ def _git_status_porcelain(cwd):
|
||||
collapses to `dir/`). Required so the untracked set subtracts cleanly
|
||||
against the UPS-time `_list_untracked` snapshot, which uses ls-files and
|
||||
therefore always lists individual files."""
|
||||
# Lenient decode: same UTF-8 + errors="replace" pattern as the
|
||||
# sibling helpers — a non-ASCII path in the worktree would otherwise
|
||||
# crash the cp1252 reader thread on Windows. See #2056.
|
||||
try:
|
||||
r = subprocess.run(
|
||||
[*GIT_CMD, "-c", "core.quotePath=false", "status",
|
||||
"--porcelain=v1", "-uall", "-z"],
|
||||
cwd=cwd, capture_output=True, text=True, timeout=30,
|
||||
cwd=cwd, capture_output=True, timeout=30,
|
||||
)
|
||||
if r.returncode != 0:
|
||||
debug_log(f"_git_status_porcelain rc={r.returncode}: {r.stderr[:200]}")
|
||||
stderr_str = (r.stderr or b"").decode("utf-8", errors="replace")
|
||||
debug_log(f"_git_status_porcelain rc={r.returncode}: {stderr_str[:200]}")
|
||||
return None, None
|
||||
tracked, untracked = set(), set()
|
||||
entries = r.stdout.split("\0")
|
||||
stdout = (r.stdout or b"").decode("utf-8", errors="replace")
|
||||
entries = stdout.split("\0")
|
||||
i = 0
|
||||
while i < len(entries):
|
||||
e = entries[i]
|
||||
@@ -368,7 +398,9 @@ def _git_status_porcelain(cwd):
|
||||
i += 1
|
||||
i += 1
|
||||
return tracked, untracked
|
||||
except (subprocess.TimeoutExpired, FileNotFoundError, OSError) as e:
|
||||
except (subprocess.TimeoutExpired, FileNotFoundError, OSError, ValueError) as e:
|
||||
# ValueError guards against any future strict-decode regression
|
||||
# so the helper degrades to (None, None) instead of crashing.
|
||||
debug_log(f"_git_status_porcelain error: {e}")
|
||||
return None, None
|
||||
|
||||
@@ -411,7 +443,11 @@ def get_git_diff(cwd, baseline_sha, full_context=False, paths=None, untracked_pa
|
||||
# change exists to fix.
|
||||
return ""
|
||||
|
||||
cmd = [*GIT_CMD, "diff", "--no-color", "--no-ext-diff", baseline_sha] + (["--unified=99999"] if full_context else []) + pathspec
|
||||
# core.quotePath=false: emit raw UTF-8 in `diff --git a/... b/...` headers
|
||||
# so non-ASCII paths aren't C-quoted past the downstream parse_diff_into_files
|
||||
# regex. See #2082 (sibling of #2056 / #2075).
|
||||
cmd = [*GIT_CMD, "-c", "core.quotePath=false",
|
||||
"diff", "--no-color", "--no-ext-diff", baseline_sha] + (["--unified=99999"] if full_context else []) + pathspec
|
||||
try:
|
||||
with _temp_index(cwd, untracked_paths) as env:
|
||||
# env is None when no index could be found (bare repo / not a
|
||||
|
||||
@@ -94,6 +94,9 @@ Only use exec() if you absolutely need shell features and the input is guarantee
|
||||
},
|
||||
{
|
||||
"ruleName": "new_function_injection",
|
||||
# JS-only construct: gate to JS/TS files so docs/.md and other prose
|
||||
# mentioning "new Function" don't trip the warning.
|
||||
"path_filter": lambda p: p.endswith(_JS_EXTS),
|
||||
"substrings": ["new Function"],
|
||||
"reminder": "\u26a0\ufe0f Security Warning: Using new Function() with string interpolation is a CODE INJECTION vulnerability. If any variable is concatenated or interpolated into the function body string, an attacker controlling that variable can execute arbitrary code. Use safe alternatives: for property access use obj[key] or array.reduce((o, k) => o[k], root); for computation use a safe expression parser. NEVER interpolate untrusted strings into new Function() bodies.",
|
||||
},
|
||||
@@ -107,16 +110,24 @@ Only use exec() if you absolutely need shell features and the input is guarantee
|
||||
},
|
||||
{
|
||||
"ruleName": "react_dangerously_set_html",
|
||||
# JS/TS-only (React); gate so .md docs / .py / .go files don't trip.
|
||||
"path_filter": lambda p: p.endswith(_JS_EXTS),
|
||||
"substrings": ["dangerouslySetInnerHTML"],
|
||||
"reminder": "⚠️ Security Warning: dangerouslySetInnerHTML can lead to XSS vulnerabilities if used with untrusted content. Ensure all content is properly sanitized using an HTML sanitizer library like DOMPurify, or use safe alternatives.",
|
||||
},
|
||||
{
|
||||
"ruleName": "document_write_xss",
|
||||
# Browser DOM API: only meaningful in JS/TS source.
|
||||
"path_filter": lambda p: p.endswith(_JS_EXTS),
|
||||
"substrings": ["document.write"],
|
||||
"reminder": "⚠️ Security Warning: document.write() can be exploited for XSS attacks and has performance issues. Use DOM manipulation methods like createElement() and appendChild() instead.",
|
||||
},
|
||||
{
|
||||
"ruleName": "innerHTML_xss",
|
||||
# Browser DOM API: only meaningful in JS/TS source. Closes FPs like
|
||||
# docs/example HTML, playground/self-contained skills that hardcode
|
||||
# innerHTML strings with zero user input (#410).
|
||||
"path_filter": lambda p: p.endswith(_JS_EXTS),
|
||||
"substrings": [".innerHTML =", ".innerHTML="],
|
||||
"reminder": "⚠️ Security Warning: Setting innerHTML with untrusted content can lead to XSS vulnerabilities. Use textContent for plain text or safe DOM methods for HTML content. If you need HTML support, consider using an HTML sanitizer library such as DOMPurify.",
|
||||
},
|
||||
@@ -217,11 +228,15 @@ Additionally, validate user inputs:
|
||||
},
|
||||
{
|
||||
"ruleName": "outerHTML_xss",
|
||||
# Browser DOM API: only meaningful in JS/TS source.
|
||||
"path_filter": lambda p: p.endswith(_JS_EXTS),
|
||||
"substrings": [".outerHTML =", ".outerHTML="],
|
||||
"reminder": "⚠️ Security Warning: Use textContent or sanitize with DOMPurify. outerHTML assignment is an XSS sink equivalent to innerHTML.",
|
||||
},
|
||||
{
|
||||
"ruleName": "insertAdjacentHTML_xss",
|
||||
# Browser DOM API: only meaningful in JS/TS source.
|
||||
"path_filter": lambda p: p.endswith(_JS_EXTS),
|
||||
"substrings": [".insertAdjacentHTML("],
|
||||
"reminder": "⚠️ Security Warning: Use insertAdjacentText() or sanitize with DOMPurify. insertAdjacentHTML is an XSS sink.",
|
||||
},
|
||||
|
||||
@@ -1118,16 +1118,21 @@ def handle_commit_review_posttooluse(input_data):
|
||||
resolved = 0
|
||||
for sha in shas:
|
||||
try:
|
||||
# core.quotePath=false: emit raw UTF-8 in `diff --git a/... b/...`
|
||||
# headers so non-ASCII paths aren't C-quoted past the downstream
|
||||
# parse_diff_into_files regex (sibling of #2056 / #2075). See #2082.
|
||||
if pre_amend_sha:
|
||||
# Delta review: pre-amend → post-amend. `git diff` (not show)
|
||||
# so the output is a pure unified diff with no commit header.
|
||||
result = subprocess.run(
|
||||
[*GIT_CMD, "diff", "--no-color", "--no-ext-diff", pre_amend_sha, sha, "--"],
|
||||
[*GIT_CMD, "-c", "core.quotePath=false",
|
||||
"diff", "--no-color", "--no-ext-diff", pre_amend_sha, sha, "--"],
|
||||
cwd=repo_root, capture_output=True, timeout=15
|
||||
)
|
||||
else:
|
||||
result = subprocess.run(
|
||||
[*GIT_CMD, "show", "-p", "--no-color", "--no-ext-diff", sha, "--"],
|
||||
[*GIT_CMD, "-c", "core.quotePath=false",
|
||||
"show", "-p", "--no-color", "--no-ext-diff", sha, "--"],
|
||||
cwd=repo_root, capture_output=True, timeout=15
|
||||
)
|
||||
except (subprocess.TimeoutExpired, FileNotFoundError, OSError) as e:
|
||||
|
||||
Reference in New Issue
Block a user