Compare commits

..

1 Commits

Author SHA1 Message Date
Boris Cherny
20ba9a34a5 feat: update dedupe backfill script to filter by issue number
Replace date-based filtering with issue number filtering to only process issues older than #4050. This provides more precise control over which issues are processed for duplicate detection backfill.

🤖 Generated with [Claude Code](https://claude.ai/code)

Co-Authored-By: Claude <noreply@anthropic.com>
2025-08-18 09:54:40 -07:00
8 changed files with 195 additions and 107 deletions

View File

@@ -65,8 +65,8 @@ ENV PATH=$PATH:/usr/local/share/npm-global/bin
ENV SHELL=/bin/zsh
# Set the default editor and visual
ENV EDITOR=nano
ENV VISUAL=nano
ENV EDITOR nano
ENV VISUAL nano
# Default powerline10k theme
ARG ZSH_IN_DOCKER_VERSION=1.2.0

View File

@@ -16,7 +16,6 @@
"customizations": {
"vscode": {
"extensions": [
"anthropic.claude-code",
"dbaeumer.vscode-eslint",
"esbenp.prettier-vscode",
"eamodio.gitlens"
@@ -52,6 +51,5 @@
},
"workspaceMount": "source=${localWorkspaceFolder},target=/workspace,type=bind,consistency=delegated",
"workspaceFolder": "/workspace",
"postStartCommand": "sudo /usr/local/bin/init-firewall.sh",
"waitFor": "postStartCommand"
"postCreateCommand": "sudo /usr/local/bin/init-firewall.sh"
}

View File

@@ -69,12 +69,9 @@ for domain in \
"api.anthropic.com" \
"sentry.io" \
"statsig.anthropic.com" \
"statsig.com" \
"marketplace.visualstudio.com" \
"vscode.blob.core.windows.net" \
"update.code.visualstudio.com"; do
"statsig.com"; do
echo "Resolving $domain..."
ips=$(dig +noall +answer A "$domain" | awk '$4 == "A" {print $5}')
ips=$(dig +short A "$domain")
if [ -z "$ips" ]; then
echo "ERROR: Failed to resolve $domain"
exit 1
@@ -116,9 +113,6 @@ iptables -A OUTPUT -m state --state ESTABLISHED,RELATED -j ACCEPT
# Then allow only specific outbound traffic to allowed domains
iptables -A OUTPUT -m set --match-set allowed-domains dst -j ACCEPT
# Explicitly REJECT all other outbound traffic for immediate feedback
iptables -A OUTPUT -j REJECT --reject-with icmp-admin-prohibited
echo "Firewall configuration complete"
echo "Verifying firewall rules..."
if curl --connect-timeout 5 https://example.com >/dev/null 2>&1; then

View File

@@ -1,40 +1,176 @@
name: Log Issue Events to Statsig
name: Log GitHub Issue Events
on:
issues:
types: [opened]
types: [opened, closed]
jobs:
log-to-statsig:
log-issue-created:
if: github.event.action == 'opened'
runs-on: ubuntu-latest
timeout-minutes: 5
permissions:
contents: read
issues: read
steps:
- name: Log issue creation to Statsig
env:
STATSIG_API_KEY: ${{ secrets.STATSIG_API_KEY }}
ISSUE_NUMBER: ${{ github.event.issue.number }}
REPO: ${{ github.repository }}
ISSUE_TITLE: ${{ github.event.issue.title }}
AUTHOR: ${{ github.event.issue.user.login }}
CREATED_AT: ${{ github.event.issue.created_at }}
run: |
# All values are now safely passed via environment variables
# No direct templating in the shell script to prevent injection attacks
ISSUE_NUMBER=${{ github.event.issue.number }}
REPO=${{ github.repository }}
ISSUE_TITLE=$(echo '${{ github.event.issue.title }}' | sed "s/'/'\\\\''/g")
AUTHOR="${{ github.event.issue.user.login }}"
CREATED_AT="${{ github.event.issue.created_at }}"
curl -X POST "https://events.statsigapi.net/v1/log_event" \
-H "Content-Type: application/json" \
-H "statsig-api-key: $STATSIG_API_KEY" \
-d '{
"events": [{
"eventName": "github_issue_created",
"metadata": {
"issue_number": "'"$ISSUE_NUMBER"'",
"repository": "'"$REPO"'",
"title": "'"$(echo "$ISSUE_TITLE" | sed "s/\"/\\\\\"/g")"'",
"author": "'"$AUTHOR"'",
"created_at": "'"$CREATED_AT"'"
if [ -z "$STATSIG_API_KEY" ]; then
echo "STATSIG_API_KEY not found, skipping Statsig logging"
exit 0
fi
# Prepare the event payload
EVENT_PAYLOAD=$(jq -n \
--arg issue_number "$ISSUE_NUMBER" \
--arg repo "$REPO" \
--arg title "$ISSUE_TITLE" \
--arg author "$AUTHOR" \
--arg created_at "$CREATED_AT" \
'{
events: [{
eventName: "github_issue_created",
value: 1,
metadata: {
repository: $repo,
issue_number: ($issue_number | tonumber),
issue_title: $title,
issue_author: $author,
created_at: $created_at
},
"time": '"$(date +%s)000"'
time: (now | floor | tostring)
}]
}'
}')
# Send to Statsig API
echo "Logging issue creation to Statsig for issue #${ISSUE_NUMBER}"
RESPONSE=$(curl -s -w "\n%{http_code}" -X POST https://events.statsigapi.net/v1/log_event \
-H "Content-Type: application/json" \
-H "STATSIG-API-KEY: ${STATSIG_API_KEY}" \
-d "$EVENT_PAYLOAD")
HTTP_CODE=$(echo "$RESPONSE" | tail -n1)
BODY=$(echo "$RESPONSE" | head -n-1)
if [ "$HTTP_CODE" -eq 200 ] || [ "$HTTP_CODE" -eq 202 ]; then
echo "Successfully logged issue creation for issue #${ISSUE_NUMBER}"
else
echo "Failed to log issue creation for issue #${ISSUE_NUMBER}. HTTP ${HTTP_CODE}: ${BODY}"
fi
log-issue-closed:
if: github.event.action == 'closed'
runs-on: ubuntu-latest
timeout-minutes: 5
permissions:
contents: read
issues: read
steps:
- name: Log issue closure to Statsig
env:
STATSIG_API_KEY: ${{ secrets.STATSIG_API_KEY }}
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
run: |
ISSUE_NUMBER=${{ github.event.issue.number }}
REPO=${{ github.repository }}
ISSUE_TITLE=$(echo '${{ github.event.issue.title }}' | sed "s/'/'\\\\''/g")
CLOSED_BY="${{ github.event.issue.closed_by.login }}"
CLOSED_AT="${{ github.event.issue.closed_at }}"
STATE_REASON="${{ github.event.issue.state_reason }}"
if [ -z "$STATSIG_API_KEY" ]; then
echo "STATSIG_API_KEY not found, skipping Statsig logging"
exit 0
fi
# Get additional issue data via GitHub API
echo "Fetching additional issue data for #${ISSUE_NUMBER}"
ISSUE_DATA=$(curl -s -H "Authorization: token ${GITHUB_TOKEN}" \
-H "Accept: application/vnd.github.v3+json" \
"https://api.github.com/repos/${REPO}/issues/${ISSUE_NUMBER}")
COMMENTS_COUNT=$(echo "$ISSUE_DATA" | jq -r '.comments')
# Get reactions data
REACTIONS_DATA=$(curl -s -H "Authorization: token ${GITHUB_TOKEN}" \
-H "Accept: application/vnd.github.v3+json" \
"https://api.github.com/repos/${REPO}/issues/${ISSUE_NUMBER}/reactions")
REACTIONS_COUNT=$(echo "$REACTIONS_DATA" | jq '. | length')
# Check if issue was closed automatically (by checking if closed_by is a bot)
CLOSED_AUTOMATICALLY="false"
if [[ "$CLOSED_BY" == *"[bot]"* ]]; then
CLOSED_AUTOMATICALLY="true"
fi
# Check if closed as duplicate by state_reason
CLOSED_AS_DUPLICATE="false"
if [ "$STATE_REASON" = "duplicate" ]; then
CLOSED_AS_DUPLICATE="true"
fi
# Prepare the event payload
EVENT_PAYLOAD=$(jq -n \
--arg issue_number "$ISSUE_NUMBER" \
--arg repo "$REPO" \
--arg title "$ISSUE_TITLE" \
--arg closed_by "$CLOSED_BY" \
--arg closed_at "$CLOSED_AT" \
--arg state_reason "$STATE_REASON" \
--arg comments_count "$COMMENTS_COUNT" \
--arg reactions_count "$REACTIONS_COUNT" \
--arg closed_automatically "$CLOSED_AUTOMATICALLY" \
--arg closed_as_duplicate "$CLOSED_AS_DUPLICATE" \
'{
events: [{
eventName: "github_issue_closed",
value: 1,
metadata: {
repository: $repo,
issue_number: ($issue_number | tonumber),
issue_title: $title,
closed_by: $closed_by,
closed_at: $closed_at,
state_reason: $state_reason,
comments_count: ($comments_count | tonumber),
reactions_count: ($reactions_count | tonumber),
closed_automatically: ($closed_automatically | test("true")),
closed_as_duplicate: ($closed_as_duplicate | test("true"))
},
time: (now | floor | tostring)
}]
}')
# Send to Statsig API
echo "Logging issue closure to Statsig for issue #${ISSUE_NUMBER}"
RESPONSE=$(curl -s -w "\n%{http_code}" -X POST https://events.statsigapi.net/v1/log_event \
-H "Content-Type: application/json" \
-H "STATSIG-API-KEY: ${STATSIG_API_KEY}" \
-d "$EVENT_PAYLOAD")
HTTP_CODE=$(echo "$RESPONSE" | tail -n1)
BODY=$(echo "$RESPONSE" | head -n-1)
if [ "$HTTP_CODE" -eq 200 ] || [ "$HTTP_CODE" -eq 202 ]; then
echo "Successfully logged issue closure for issue #${ISSUE_NUMBER}"
echo "Closed by: $CLOSED_BY"
echo "Comments: $COMMENTS_COUNT"
echo "Reactions: $REACTIONS_COUNT"
echo "Closed automatically: $CLOSED_AUTOMATICALLY"
echo "Closed as duplicate: $CLOSED_AS_DUPLICATE"
else
echo "Failed to log issue closure for issue #${ISSUE_NUMBER}. HTTP ${HTTP_CODE}: ${BODY}"
fi

View File

@@ -1,52 +1,5 @@
# Changelog
## 1.0.93
- Windows: Add alt + v shortcut for pasting images from clipboard
## 1.0.90
- Settings file changes take effect immediately - no restart required
## 1.0.88
- Fixed issue causing "OAuth authentication is currently not supported"
- Status line input now includes `exceeds_200k_tokens`
- Fixed incorrect usage tracking in /cost.
- Introduced `ANTHROPIC_DEFAULT_SONNET_MODEL` and `ANTHROPIC_DEFAULT_OPUS_MODEL` for controlling model aliases opusplan, opus, and sonnet.
- Bedrock: Updated default Sonnet model to Sonnet 4
## 1.0.86
- Added /context to help users self-serve debug context issues
- SDK: Added UUID support for all SDK messages
- SDK: Added `--replay-user-messages` to replay user messages back to stdout
## 1.0.85
- Status line input now includes session cost info
- Hooks: Introduced SessionEnd hook
## 1.0.84
- Fix tool_use/tool_result id mismatch error when network is unstable
- Fix Claude sometimes ignoring real-time steering when wrapping up a task
- @-mention: Add ~/.claude/\* files to suggestions for easier agent, output style, and slash command editing
- Use built-in ripgrep by default; to opt out of this behavior, set USE_BUILTIN_RIPGREP=0
## 1.0.83
- @-mention: Support files with spaces in path
- New shimmering spinner
## 1.0.82
- SDK: Add request cancellation support
- SDK: New additionalDirectories option to search custom paths, improved slash command processing
- Settings: Validation prevents invalid fields in .claude/settings.json files
- MCP: Improve tool name consistency
- Bash: Fix crash when Claude tries to automatically read large files
## 1.0.81
- Released output styles, including new built-in educational output styles "Explanatory" and "Learning". Docs: https://docs.anthropic.com/en/docs/claude-code/output-styles

View File

@@ -24,10 +24,6 @@ npm install -g @anthropic-ai/claude-code
We welcome your feedback. Use the `/bug` command to report issues directly within Claude Code, or file a [GitHub issue](https://github.com/anthropics/claude-code/issues).
## Connect on Discord
Join the [Claude Developers Discord](https://anthropic.com/discord) to connect with other developers using Claude Code. Get help, share feedback, and discuss your projects with the community.
## Data collection, usage, and retention
When you use Claude Code, we collect feedback, which includes usage data (such as code acceptance or rejections), associated conversation data, and user feedback submitted via the `/bug` command.

View File

@@ -40,18 +40,14 @@ Write-Host "Using backend: $($Backend)"
# --- Prerequisite Check ---
Write-Host "Checking for required commands..."
try {
if (-not (Get-Command $Backend -ErrorAction SilentlyContinue)) {
throw "Required command '$($Backend)' not found."
}
Get-Command $Backend -ErrorAction Stop | Out-Null
Write-Host "- $($Backend) command found."
if (-not (Get-Command devcontainer -ErrorAction SilentlyContinue)) {
throw "Required command 'devcontainer' not found."
}
Get-Command devcontainer -ErrorAction Stop | Out-Null
Write-Host "- devcontainer command found."
}
catch {
Write-Error "A required command is not installed or not in your PATH. $($_.Exception.Message)"
Write-Error "Please ensure both '$Backend' and 'devcontainer' are installed and accessible in your system's PATH."
Write-Error "A required command is not installed or not in your PATH."
Write-Error "Please ensure '$($_.Exception.Message.Split(':')[0])' and 'devcontainer' are installed and accessible."
exit 1
}

View File

@@ -82,46 +82,61 @@ Usage:
Environment Variables:
GITHUB_TOKEN - GitHub personal access token with repo and actions permissions (required)
DRY_RUN - Set to "false" to actually trigger workflows (default: true for safety)
DAYS_BACK - How many days back to look for old issues (default: 90)`);
MAX_ISSUE_NUMBER - Only process issues with numbers less than this value (default: 4050)`);
}
console.log("[DEBUG] GitHub token found");
const owner = "anthropics";
const repo = "claude-code";
const dryRun = process.env.DRY_RUN !== "false";
const daysBack = parseInt(process.env.DAYS_BACK || "90", 10);
const maxIssueNumber = parseInt(process.env.MAX_ISSUE_NUMBER || "4050", 10);
const minIssueNumber = parseInt(process.env.MIN_ISSUE_NUMBER || "1", 10);
console.log(`[DEBUG] Repository: ${owner}/${repo}`);
console.log(`[DEBUG] Dry run mode: ${dryRun}`);
console.log(`[DEBUG] Looking back ${daysBack} days`);
console.log(`[DEBUG] Looking at issues between #${minIssueNumber} and #${maxIssueNumber}`);
const cutoffDate = new Date();
cutoffDate.setDate(cutoffDate.getDate() - daysBack);
console.log(`[DEBUG] Fetching issues created since ${cutoffDate.toISOString()}...`);
console.log(`[DEBUG] Fetching issues between #${minIssueNumber} and #${maxIssueNumber}...`);
const allIssues: GitHubIssue[] = [];
let page = 1;
const perPage = 100;
while (true) {
const pageIssues: GitHubIssue[] = await githubRequest(
`/repos/${owner}/${repo}/issues?state=all&per_page=${perPage}&page=${page}&since=${cutoffDate.toISOString()}`,
`/repos/${owner}/${repo}/issues?state=all&per_page=${perPage}&page=${page}&sort=created&direction=desc`,
token
);
if (pageIssues.length === 0) break;
allIssues.push(...pageIssues);
// Filter to only include issues within the specified range
const filteredIssues = pageIssues.filter(issue =>
issue.number >= minIssueNumber && issue.number < maxIssueNumber
);
allIssues.push(...filteredIssues);
// If the oldest issue in this page is still above our minimum, we need to continue
// but if the oldest issue is below our minimum, we can stop
const oldestIssueInPage = pageIssues[pageIssues.length - 1];
if (oldestIssueInPage && oldestIssueInPage.number >= maxIssueNumber) {
console.log(`[DEBUG] Oldest issue in page #${page} is #${oldestIssueInPage.number}, continuing...`);
} else if (oldestIssueInPage && oldestIssueInPage.number < minIssueNumber) {
console.log(`[DEBUG] Oldest issue in page #${page} is #${oldestIssueInPage.number}, below minimum, stopping`);
break;
} else if (filteredIssues.length === 0 && pageIssues.length > 0) {
console.log(`[DEBUG] No issues in page #${page} are in range #${minIssueNumber}-#${maxIssueNumber}, continuing...`);
}
page++;
// Safety limit to avoid infinite loops
if (page > 100) {
if (page > 200) {
console.log("[DEBUG] Reached page limit, stopping pagination");
break;
}
}
console.log(`[DEBUG] Found ${allIssues.length} issues from the last ${daysBack} days`);
console.log(`[DEBUG] Found ${allIssues.length} issues between #${minIssueNumber} and #${maxIssueNumber}`);
let processedCount = 0;
let candidateCount = 0;