mirror of
https://github.com/anthropics/claude-plugins-official.git
synced 2026-06-10 02:03:34 +00:00
Compare commits
1 Commits
fix-xss-ru
...
fix-2056-w
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
6a63e35e75 |
@@ -138,7 +138,17 @@ def restore_unreviewed_stop_state(session_id, paths, baseline_sha):
|
||||
|
||||
|
||||
def get_baseline_file_content(session_id, file_path, cwd):
|
||||
"""Get the content of a file at the baseline SHA. Returns None if unavailable."""
|
||||
"""Get the content of a file at the baseline SHA. Returns None if unavailable.
|
||||
|
||||
Decode the file content as UTF-8 with errors="replace" rather than using
|
||||
text=True: source files in user repos can be latin-1 / cp1252 / shift-jis
|
||||
/ etc., and on Windows text=True would decode via locale.getpreferredencoding()
|
||||
in strict mode and raise UnicodeDecodeError in the subprocess reader
|
||||
thread — leaving result.stdout=None and propagating AttributeError when
|
||||
the caller tries to use it. Same class as the existing migrations at
|
||||
security_reminder_hook.py:540 (reflog subjects) and :1115 (commit
|
||||
diffs); this helper was missed in that pass. See
|
||||
anthropics/claude-plugins-official#2056."""
|
||||
baseline_sha = load_baseline_sha(session_id)
|
||||
if not baseline_sha:
|
||||
return None
|
||||
@@ -151,12 +161,12 @@ def get_baseline_file_content(session_id, file_path, cwd):
|
||||
return None
|
||||
result = subprocess.run(
|
||||
[*GIT_CMD, "show", f"{baseline_sha}:{rel_path}"],
|
||||
cwd=cwd, capture_output=True, text=True, timeout=5
|
||||
cwd=cwd, capture_output=True, timeout=5
|
||||
)
|
||||
if result.returncode == 0:
|
||||
return result.stdout
|
||||
return (result.stdout or b"").decode("utf-8", errors="replace")
|
||||
return None
|
||||
except (subprocess.TimeoutExpired, FileNotFoundError, OSError):
|
||||
except (subprocess.TimeoutExpired, FileNotFoundError, OSError, ValueError):
|
||||
return None
|
||||
|
||||
|
||||
@@ -173,11 +183,16 @@ def capture_git_baseline(cwd):
|
||||
and `compute_v2_review_set` subtracts that set so pre-existing untracked
|
||||
files are not reviewed as Claude-authored.
|
||||
"""
|
||||
# stdout is a SHA so text=True is safe on stdout, but a non-ASCII
|
||||
# filename in `git stash create`'s STDERR warning (e.g. a worktree
|
||||
# with `Ávila_report.txt` triggers a quotePath/locale warning) would
|
||||
# trip the stderr reader thread on Windows cp1252. Decode both streams
|
||||
# leniently for symmetry with _list_untracked. See #2056.
|
||||
try:
|
||||
# Check if HEAD exists (i.e., repo has at least one commit)
|
||||
head_check = subprocess.run(
|
||||
[*GIT_CMD, "rev-parse", "HEAD"],
|
||||
cwd=cwd, capture_output=True, text=True, timeout=5
|
||||
cwd=cwd, capture_output=True, timeout=5
|
||||
)
|
||||
if head_check.returncode != 0:
|
||||
# No commits yet — skip review rather than creating commits in the user's repo
|
||||
@@ -186,20 +201,20 @@ def capture_git_baseline(cwd):
|
||||
|
||||
result = subprocess.run(
|
||||
[*GIT_CMD, "stash", "create"],
|
||||
cwd=cwd, capture_output=True, text=True, timeout=15
|
||||
cwd=cwd, capture_output=True, timeout=15
|
||||
)
|
||||
sha = result.stdout.strip()
|
||||
sha = (result.stdout or b"").decode("utf-8", errors="replace").strip()
|
||||
if sha:
|
||||
return sha
|
||||
|
||||
# Working tree is clean — stash create returns empty. Use HEAD.
|
||||
result = subprocess.run(
|
||||
[*GIT_CMD, "rev-parse", "HEAD"],
|
||||
cwd=cwd, capture_output=True, text=True, timeout=5
|
||||
cwd=cwd, capture_output=True, timeout=5
|
||||
)
|
||||
sha = result.stdout.strip()
|
||||
sha = (result.stdout or b"").decode("utf-8", errors="replace").strip()
|
||||
return sha if sha else None
|
||||
except (subprocess.TimeoutExpired, FileNotFoundError, OSError) as e:
|
||||
except (subprocess.TimeoutExpired, FileNotFoundError, OSError, ValueError) as e:
|
||||
debug_log(f"Failed to capture git baseline: {e}")
|
||||
return None
|
||||
|
||||
@@ -323,19 +338,35 @@ def _list_untracked(cwd):
|
||||
mtime is captured so an in-place edit during the turn is still reviewed.
|
||||
|
||||
Uses ls-files (not status) for the UPS path: the index diff isn't needed,
|
||||
and ls-files --others only walks the worktree against .gitignore."""
|
||||
and ls-files --others only walks the worktree against .gitignore.
|
||||
|
||||
Decodes stdout/stderr as UTF-8 with errors="replace" instead of using
|
||||
text=True. With core.quotePath=false git emits raw UTF-8 bytes for
|
||||
non-ASCII filenames; text=True decodes via locale.getpreferredencoding()
|
||||
in strict mode — on Windows that's cp1252 with several undefined bytes
|
||||
(0x81/0x8D/0x8F/0x90/0x9D), all of which appear in UTF-8 encodings of
|
||||
common accented capitals (Á Í Ï Ð Ý) and most CJK/emoji codepoints.
|
||||
A non-ASCII filename in the worktree crashed the subprocess reader
|
||||
thread, left r.stdout=None, and propagated AttributeError out of the
|
||||
helper — silently losing the baseline snapshot every UserPromptSubmit.
|
||||
See anthropics/claude-plugins-official#2056. The sibling helpers in
|
||||
gitutil.py already follow the lenient pattern; this function and
|
||||
capture_git_baseline / _git_name_only / _git_status_porcelain were
|
||||
the holdouts."""
|
||||
try:
|
||||
repo = _git_toplevel(cwd) or cwd
|
||||
r = subprocess.run(
|
||||
[*GIT_CMD, "-c", "core.quotePath=false", "ls-files",
|
||||
"--others", "--exclude-standard", "-z"],
|
||||
cwd=repo, capture_output=True, text=True, timeout=15,
|
||||
cwd=repo, capture_output=True, timeout=15,
|
||||
)
|
||||
if r.returncode != 0:
|
||||
debug_log(f"_list_untracked rc={r.returncode}: {r.stderr[:200]}")
|
||||
stderr_str = (r.stderr or b"").decode("utf-8", errors="replace")
|
||||
debug_log(f"_list_untracked rc={r.returncode}: {stderr_str[:200]}")
|
||||
return {}
|
||||
stdout = (r.stdout or b"").decode("utf-8", errors="replace")
|
||||
out = {}
|
||||
for p in r.stdout.split("\0"):
|
||||
for p in stdout.split("\0"):
|
||||
if not p:
|
||||
continue
|
||||
try:
|
||||
@@ -346,7 +377,9 @@ def _list_untracked(cwd):
|
||||
debug_log(f"_list_untracked: capped at {UNTRACKED_BASELINE_CAP}")
|
||||
break
|
||||
return out
|
||||
except (subprocess.TimeoutExpired, FileNotFoundError, OSError) as e:
|
||||
except (subprocess.TimeoutExpired, FileNotFoundError, OSError, ValueError) as e:
|
||||
# ValueError guards against any future strict-decode regression
|
||||
# so the helper degrades to {} instead of crashing the hook.
|
||||
debug_log(f"_list_untracked error: {e}")
|
||||
return {}
|
||||
|
||||
|
||||
@@ -259,19 +259,29 @@ def _git_reflog_recent_commits(repo_root, max_age_s=120, max_n=5):
|
||||
# %gs (the reflog subject) is `commit: <commit-msg first line>` and can
|
||||
# contain `|`; put it LAST so split("|", 2) leaves it intact. %H is
|
||||
# hex and %ct is integer, so the first two fields are delimiter-safe.
|
||||
#
|
||||
# Bytes + decode utf-8/replace: %gs embeds commit-message subjects
|
||||
# which git stores as raw bytes — commits can be authored in
|
||||
# latin-1 / cp1252 / shift-jis etc., and text=True would raise
|
||||
# UnicodeDecodeError in the subprocess reader thread on Windows
|
||||
# cp1252 (subprocess.run returns r.stdout=None, then
|
||||
# r.stdout.splitlines() AttributeErrors). Mirrors the existing
|
||||
# migration at security_reminder_hook.py:540 — same pattern was
|
||||
# missed here. See anthropics/claude-plugins-official#2056.
|
||||
r = subprocess.run(
|
||||
[*GIT_CMD, "log", "-g", "-n", str(max_n),
|
||||
"--format=%H|%ct|%gs", "HEAD"],
|
||||
cwd=repo_root, capture_output=True, text=True, timeout=5,
|
||||
cwd=repo_root, capture_output=True, timeout=5,
|
||||
)
|
||||
except (subprocess.TimeoutExpired, FileNotFoundError, OSError):
|
||||
except (subprocess.TimeoutExpired, FileNotFoundError, OSError, ValueError):
|
||||
return [], 0
|
||||
if r.returncode != 0:
|
||||
return [], 0
|
||||
stdout = (r.stdout or b"").decode("utf-8", errors="replace")
|
||||
import time as _time
|
||||
now = int(_time.time())
|
||||
fresh, stale = [], 0
|
||||
for idx, line in enumerate(r.stdout.splitlines()):
|
||||
for idx, line in enumerate(stdout.splitlines()):
|
||||
parts = line.split("|", 2)
|
||||
if len(parts) != 3:
|
||||
continue
|
||||
@@ -306,23 +316,31 @@ def _git_name_only(cwd, base, include_untracked=False):
|
||||
must distinguish None (error → don't trust as a filter) from set()
|
||||
(genuinely nothing changed). `-c core.quotePath=false -z` keeps non-ASCII
|
||||
and space-containing paths intact."""
|
||||
# Decode stdout/stderr as UTF-8 with errors="replace" instead of using
|
||||
# text=True. core.quotePath=false makes git emit raw UTF-8 for non-ASCII
|
||||
# paths, and text=True on Windows decodes via cp1252 strict — a non-ASCII
|
||||
# changed path would crash the subprocess reader thread, leave
|
||||
# result.stdout=None, and propagate AttributeError out of the helper.
|
||||
# Same fix shape as diffstate._list_untracked. See #2056.
|
||||
def _run(env):
|
||||
result = subprocess.run(
|
||||
[*GIT_CMD, "-c", "core.quotePath=false", "diff", "--name-only", "-z", base],
|
||||
cwd=cwd, capture_output=True, text=True, timeout=30,
|
||||
cwd=cwd, capture_output=True, timeout=30,
|
||||
env=env,
|
||||
)
|
||||
if result.returncode != 0:
|
||||
debug_log(f"_git_name_only({base!r}) rc={result.returncode}: {result.stderr[:200]}")
|
||||
stderr_str = (result.stderr or b"").decode("utf-8", errors="replace")
|
||||
debug_log(f"_git_name_only({base!r}) rc={result.returncode}: {stderr_str[:200]}")
|
||||
return None
|
||||
return {p for p in result.stdout.split("\0") if p}
|
||||
stdout = (result.stdout or b"").decode("utf-8", errors="replace")
|
||||
return {p for p in stdout.split("\0") if p}
|
||||
|
||||
try:
|
||||
if not include_untracked:
|
||||
return _run(None)
|
||||
with _temp_index(cwd) as env:
|
||||
return _run(env)
|
||||
except (subprocess.TimeoutExpired, FileNotFoundError, OSError) as e:
|
||||
except (subprocess.TimeoutExpired, FileNotFoundError, OSError, ValueError) as e:
|
||||
debug_log(f"_git_name_only({base!r}) error: {e}")
|
||||
return None
|
||||
|
||||
@@ -339,17 +357,22 @@ def _git_status_porcelain(cwd):
|
||||
collapses to `dir/`). Required so the untracked set subtracts cleanly
|
||||
against the UPS-time `_list_untracked` snapshot, which uses ls-files and
|
||||
therefore always lists individual files."""
|
||||
# Lenient decode: same UTF-8 + errors="replace" pattern as the
|
||||
# sibling helpers — a non-ASCII path in the worktree would otherwise
|
||||
# crash the cp1252 reader thread on Windows. See #2056.
|
||||
try:
|
||||
r = subprocess.run(
|
||||
[*GIT_CMD, "-c", "core.quotePath=false", "status",
|
||||
"--porcelain=v1", "-uall", "-z"],
|
||||
cwd=cwd, capture_output=True, text=True, timeout=30,
|
||||
cwd=cwd, capture_output=True, timeout=30,
|
||||
)
|
||||
if r.returncode != 0:
|
||||
debug_log(f"_git_status_porcelain rc={r.returncode}: {r.stderr[:200]}")
|
||||
stderr_str = (r.stderr or b"").decode("utf-8", errors="replace")
|
||||
debug_log(f"_git_status_porcelain rc={r.returncode}: {stderr_str[:200]}")
|
||||
return None, None
|
||||
tracked, untracked = set(), set()
|
||||
entries = r.stdout.split("\0")
|
||||
stdout = (r.stdout or b"").decode("utf-8", errors="replace")
|
||||
entries = stdout.split("\0")
|
||||
i = 0
|
||||
while i < len(entries):
|
||||
e = entries[i]
|
||||
@@ -368,7 +391,9 @@ def _git_status_porcelain(cwd):
|
||||
i += 1
|
||||
i += 1
|
||||
return tracked, untracked
|
||||
except (subprocess.TimeoutExpired, FileNotFoundError, OSError) as e:
|
||||
except (subprocess.TimeoutExpired, FileNotFoundError, OSError, ValueError) as e:
|
||||
# ValueError guards against any future strict-decode regression
|
||||
# so the helper degrades to (None, None) instead of crashing.
|
||||
debug_log(f"_git_status_porcelain error: {e}")
|
||||
return None, None
|
||||
|
||||
|
||||
Reference in New Issue
Block a user