mirror of
https://github.com/anthropics/claude-plugins-official.git
synced 2026-05-06 03:12:42 +00:00
Compare commits
29 Commits
add-42crun
...
main
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
06f52cd3ac | ||
|
|
574a879067 | ||
|
|
ac45fdae4b | ||
|
|
b392f51899 | ||
|
|
f0b80b185d | ||
|
|
8f221ca1e9 | ||
|
|
69fc2571cd | ||
|
|
fc98ea052c | ||
|
|
9c19d662fc | ||
|
|
920fdd0d32 | ||
|
|
5aba544c01 | ||
|
|
0742692199 | ||
|
|
e73e9a6257 | ||
|
|
ce721c1f1d | ||
|
|
5b643ee82a | ||
|
|
38b5056c44 | ||
|
|
18113ade5c | ||
|
|
99832739a1 | ||
|
|
c5837a2c23 | ||
|
|
f4b5494fb4 | ||
|
|
068a59e000 | ||
|
|
1c81b81299 | ||
|
|
7d42fe2132 | ||
|
|
71545a2994 | ||
|
|
458b2799c5 | ||
|
|
26973b887b | ||
|
|
6fc0a4b36a | ||
|
|
27cab8ee35 | ||
|
|
bdca23e8e4 |
@@ -8,14 +8,35 @@
|
||||
},
|
||||
"plugins": [
|
||||
{
|
||||
"name": "adlc",
|
||||
"description": "Agentforce Agent Development Life Cycle — author, discover, scaffold, deploy, test, and optimize .agent files",
|
||||
"category": "development",
|
||||
"source": {
|
||||
"source": "url",
|
||||
"url": "https://github.com/SalesforceAIResearch/agentforce-adlc.git"
|
||||
"name": "42crunch-api-security-testing",
|
||||
"description": "Automate API security directly in Claude Code with 42Crunch - automatically audit OpenAPI specs, detect vulnerabilities aligned with OWASP API Security risks (including BOLA/BFLA), and apply AI-powered fixes. Designed for AI-assisted development workflows, it provides continuous guardrails through an audit->scan->remediate->validate loop, ensuring APIs meet enterprise security standards before deployment.",
|
||||
"author": {
|
||||
"name": "42Crunch"
|
||||
},
|
||||
"homepage": "https://github.com/SalesforceAIResearch/agentforce-adlc"
|
||||
"category": "security",
|
||||
"source": {
|
||||
"source": "git-subdir",
|
||||
"url": "https://github.com/42Crunch-AI/claude-plugins.git",
|
||||
"path": "plugins/api-security-testing",
|
||||
"ref": "v1.0.1",
|
||||
"sha": "56273e0e20762d76640838300a7431c4260cad32"
|
||||
},
|
||||
"homepage": "https://42crunch.com"
|
||||
},
|
||||
{
|
||||
"name": "adobe-for-creativity",
|
||||
"description": "Harness Adobe's creative AI-powered tools to edit images, automate design workflows, and bring creative visions to life — from background removal to vectorization and professional retouching.",
|
||||
"author": {
|
||||
"name": "Adobe"
|
||||
},
|
||||
"category": "design",
|
||||
"source": {
|
||||
"source": "git-subdir",
|
||||
"url": "https://github.com/adobe/skills.git",
|
||||
"path": "plugins/creative-cloud/adobe-for-creativity",
|
||||
"ref": "main"
|
||||
},
|
||||
"homepage": "https://github.com/adobe/skills/tree/main/plugins/creative-cloud/adobe-for-creativity"
|
||||
},
|
||||
{
|
||||
"name": "adspirer-ads-agent",
|
||||
@@ -39,6 +60,16 @@
|
||||
"category": "development",
|
||||
"homepage": "https://github.com/anthropics/claude-plugins-public/tree/main/plugins/agent-sdk-dev"
|
||||
},
|
||||
{
|
||||
"name": "agentforce-adlc",
|
||||
"description": "Agentforce Agent Development Life Cycle — author, discover, scaffold, deploy, test, and optimize .agent files",
|
||||
"category": "development",
|
||||
"source": {
|
||||
"source": "url",
|
||||
"url": "https://github.com/SalesforceAIResearch/agentforce-adlc.git"
|
||||
},
|
||||
"homepage": "https://github.com/SalesforceAIResearch/agentforce-adlc"
|
||||
},
|
||||
{
|
||||
"name": "ai-firstify",
|
||||
"description": "AI-first project auditor and re-engineer based on the 9 design principles and 7 design patterns from the TechWolf AI-First Bootcamp",
|
||||
@@ -71,6 +102,20 @@
|
||||
},
|
||||
"homepage": "https://github.com/AikidoSec/aikido-claude-plugin"
|
||||
},
|
||||
{
|
||||
"name": "aiven",
|
||||
"description": "Easily deploy managed PostgreSQL, Kafka, OpenSearch, Clickhouse, and other databases, streaming, and apps through Aiven. Free tier available, up and running in minutes.",
|
||||
"author": {
|
||||
"name": "Aiven"
|
||||
},
|
||||
"category": "database",
|
||||
"source": {
|
||||
"source": "github",
|
||||
"repo": "aiven/aiven-ai-plugins",
|
||||
"commit": "d2a7697b53826588d0faf795f39d2aa2362330da"
|
||||
},
|
||||
"homepage": "https://aiven.io"
|
||||
},
|
||||
{
|
||||
"name": "alloydb",
|
||||
"description": "Create, connect, and interact with an AlloyDB for PostgreSQL database and data.",
|
||||
@@ -223,6 +268,22 @@
|
||||
},
|
||||
"homepage": "https://github.com/awslabs/agent-plugins"
|
||||
},
|
||||
{
|
||||
"name": "aws-dev-toolkit",
|
||||
"description": "AWS development toolkit — 34 skills, 11 agents, and 3 MCP servers for building, migrating, and performing architecture reviews on AWS.",
|
||||
"author": {
|
||||
"name": "aws-samples"
|
||||
},
|
||||
"category": "development",
|
||||
"source": {
|
||||
"source": "git-subdir",
|
||||
"url": "https://github.com/aws-samples/sample-claude-code-plugins-for-startups.git",
|
||||
"path": "plugins/aws-dev-toolkit",
|
||||
"ref": "main",
|
||||
"sha": "ddea7fdd605b42ed3900374815f358a2d4600db5"
|
||||
},
|
||||
"homepage": "https://github.com/aws-samples/sample-claude-code-plugins-for-startups"
|
||||
},
|
||||
{
|
||||
"name": "aws-serverless",
|
||||
"description": "Design, build, deploy, test, and debug serverless applications with AWS Serverless services.",
|
||||
@@ -435,6 +496,17 @@
|
||||
},
|
||||
"homepage": "https://github.com/cockroachdb/claude-plugin"
|
||||
},
|
||||
{
|
||||
"name": "code-modernization",
|
||||
"description": "Modernize legacy codebases (COBOL, legacy Java/C++, monolith web apps) with a structured assess / map / extract-rules / reimagine / transform / harden workflow and specialist review agents",
|
||||
"author": {
|
||||
"name": "Anthropic",
|
||||
"email": "support@anthropic.com"
|
||||
},
|
||||
"source": "./plugins/code-modernization",
|
||||
"category": "development",
|
||||
"homepage": "https://github.com/anthropics/claude-plugins-official/tree/main/plugins/code-modernization"
|
||||
},
|
||||
{
|
||||
"name": "code-review",
|
||||
"description": "Automated code review for pull requests using multiple specialized agents with confidence-based scoring to filter false positives",
|
||||
@@ -463,9 +535,9 @@
|
||||
"category": "productivity",
|
||||
"source": {
|
||||
"source": "url",
|
||||
"url": "https://github.com/coderabbitai/claude-plugin.git"
|
||||
"url": "https://github.com/coderabbitai/skills.git"
|
||||
},
|
||||
"homepage": "https://github.com/coderabbitai/claude-plugin.git"
|
||||
"homepage": "https://github.com/coderabbitai/skills"
|
||||
},
|
||||
{
|
||||
"name": "commit-commands",
|
||||
@@ -488,6 +560,19 @@
|
||||
"community-managed"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "crowdstrike-falcon-foundry",
|
||||
"description": "CrowdStrike Falcon Foundry development skills for building cybersecurity applications on the Falcon platform. Includes UI development, collections, functions, workflows, API integration, security patterns, and debugging workflows.",
|
||||
"author": {
|
||||
"name": "CrowdStrike"
|
||||
},
|
||||
"category": "security",
|
||||
"source": {
|
||||
"source": "url",
|
||||
"url": "https://github.com/CrowdStrike/foundry-skills.git"
|
||||
},
|
||||
"homepage": "https://github.com/CrowdStrike/foundry-skills"
|
||||
},
|
||||
{
|
||||
"name": "csharp-lsp",
|
||||
"description": "C# language server for code intelligence",
|
||||
@@ -508,6 +593,18 @@
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
"name": "cwc-makers",
|
||||
"description": "Onboard a Code-with-Claude Makers Cardputer with one /maker-setup command — clones the build-with-claude repo, flashes UIFlow firmware, and installs the Claude Buddy app bundle.",
|
||||
"version": "1.0.0",
|
||||
"author": {
|
||||
"name": "Anthropic",
|
||||
"email": "support@anthropic.com"
|
||||
},
|
||||
"source": "./plugins/cwc-makers",
|
||||
"category": "productivity",
|
||||
"homepage": "https://claude.com/cwc-makers"
|
||||
},
|
||||
{
|
||||
"name": "data",
|
||||
"description": "Data engineering for Apache Airflow and Astronomer. Author DAGs with best practices, debug pipeline failures, trace data lineage, profile tables, migrate Airflow 2 to 3, and manage local and cloud deployments.",
|
||||
@@ -567,6 +664,20 @@
|
||||
},
|
||||
"homepage": "https://www.datadoghq.com/"
|
||||
},
|
||||
{
|
||||
"name": "datarobot-agent-skills",
|
||||
"description": "DataRobot skills for AI/ML workflows — model training, deployment, predictions, feature engineering, monitoring, explainability, data preparation, App Framework CI/CD, and external agent monitoring.",
|
||||
"author": {
|
||||
"name": "DataRobot"
|
||||
},
|
||||
"category": "development",
|
||||
"source": {
|
||||
"source": "url",
|
||||
"url": "https://github.com/datarobot-oss/datarobot-agent-skills.git",
|
||||
"sha": "b3e8fd33d7c36592c802359026c15f3e067a0646"
|
||||
},
|
||||
"homepage": "https://datarobot.com"
|
||||
},
|
||||
{
|
||||
"name": "dataverse",
|
||||
"description": "Agent skills for building on, analyzing, and managing Microsoft Dataverse — with Dataverse MCP, PAC CLI, and Python SDK.",
|
||||
@@ -591,6 +702,22 @@
|
||||
},
|
||||
"homepage": "https://github.com/awslabs/agent-plugins"
|
||||
},
|
||||
{
|
||||
"name": "desktop-commander",
|
||||
"description": "MCP server for terminal commands, process management, and file operations across text, code, PDF, DOCX, Excel, images, and structured data.",
|
||||
"author": {
|
||||
"name": "Desktop Commander"
|
||||
},
|
||||
"category": "productivity",
|
||||
"source": {
|
||||
"source": "git-subdir",
|
||||
"url": "https://github.com/wonderwhy-er/DesktopCommanderMCP.git",
|
||||
"path": "plugins/claude",
|
||||
"ref": "main",
|
||||
"sha": "8c03d3392d1633923057f4492f2b5014e2c4a6bf"
|
||||
},
|
||||
"homepage": "https://desktopcommander.app"
|
||||
},
|
||||
{
|
||||
"name": "discord",
|
||||
"description": "Discord messaging bridge with built-in access control. Manage pairing, allowlists, and policy via /discord:access.",
|
||||
@@ -749,6 +876,20 @@
|
||||
"category": "development",
|
||||
"homepage": "https://github.com/anthropics/claude-plugins-public/tree/main/plugins/frontend-design"
|
||||
},
|
||||
{
|
||||
"name": "fullstory",
|
||||
"description": "Connect Claude to Fullstory to query behavioral analytics, session replays, and customer experience insights.",
|
||||
"author": {
|
||||
"name": "Fullstory"
|
||||
},
|
||||
"category": "monitoring",
|
||||
"source": {
|
||||
"source": "github",
|
||||
"repo": "fullstorydev/fullstory-skills",
|
||||
"commit": "1ec5865e7ab1449f9a0859d164c4b6a8c53b6e2f"
|
||||
},
|
||||
"homepage": "https://www.fullstory.com"
|
||||
},
|
||||
{
|
||||
"name": "github",
|
||||
"description": "Official GitHub MCP server for repository management. Create issues, manage pull requests, review code, search repositories, and interact with GitHub's full API directly from Claude Code.",
|
||||
@@ -871,6 +1012,21 @@
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
"name": "jfrog",
|
||||
"description": "Use the JFrog Platform from Claude Code: Artifactory repos and artifacts, security findings and exposures, Catalog package safety and downloads, workflows across the SDLC, and platform administration.",
|
||||
"author": {
|
||||
"name": "JFrog Ltd.",
|
||||
"url": "https://jfrog.com"
|
||||
},
|
||||
"category": "security",
|
||||
"source": {
|
||||
"source": "github",
|
||||
"repo": "jfrog/claude-plugin",
|
||||
"commit": "761921eaa12b845beba1688d699a2d45091dfe83"
|
||||
},
|
||||
"homepage": "https://jfrog.com"
|
||||
},
|
||||
{
|
||||
"name": "kotlin-lsp",
|
||||
"description": "Kotlin language server for code intelligence",
|
||||
@@ -966,6 +1122,21 @@
|
||||
},
|
||||
"homepage": "https://github.com/Shopify/liquid-skills/tree/main/plugins/liquid-skills"
|
||||
},
|
||||
{
|
||||
"name": "logfire",
|
||||
"description": "Add Logfire observability to Python applications with auto-instrumentation for FastAPI, httpx, asyncpg, SQLAlchemy, and more",
|
||||
"author": {
|
||||
"name": "Pydantic"
|
||||
},
|
||||
"category": "monitoring",
|
||||
"source": {
|
||||
"source": "git-subdir",
|
||||
"url": "https://github.com/pydantic/skills.git",
|
||||
"path": "plugins/logfire",
|
||||
"ref": "main"
|
||||
},
|
||||
"homepage": "https://github.com/pydantic/skills/tree/main/plugins/logfire"
|
||||
},
|
||||
{
|
||||
"name": "lua-lsp",
|
||||
"description": "Lua language server for code intelligence",
|
||||
@@ -1149,6 +1320,22 @@
|
||||
},
|
||||
"homepage": "https://getoptimal.ai"
|
||||
},
|
||||
{
|
||||
"name": "oracle-ai-data-platform-workbench-spark-connectors",
|
||||
"description": "Oracle AI Data Platform Workbench Spark connectors for Claude Code. 18 connector skills covering every data source workbench customers commonly need: Oracle Autonomous DB family (ALH/ADW/ATP) via wallet/IAM-DB-Token/API-key, ExaCS, Fusion ERP REST, Fusion BICC, EPM Cloud Planning, Essbase 21c, OCI Streaming (Kafka), OCI Object Storage, Apache Iceberg, plus external systems (PostgreSQL, MySQL/HeatWave, SQL Server, Snowflake, Azure ADLS Gen2, AWS S3, generic REST, custom JDBC, Excel). Live-validated on the workbench `tpcds` cluster (Spark 3.5.0): 17 PASS / 4 ship-as-is out of 21 test rows.",
|
||||
"author": {
|
||||
"name": "Oracle"
|
||||
},
|
||||
"category": "development",
|
||||
"source": {
|
||||
"source": "git-subdir",
|
||||
"url": "https://github.com/oracle-samples/oracle-aidp-samples.git",
|
||||
"path": "ai/claude-code-plugins/oracle-ai-data-platform-workbench-spark-connectors",
|
||||
"ref": "main",
|
||||
"sha": "f436f3a40dfaedbef6a076ad3992b697ba5dcef6"
|
||||
},
|
||||
"homepage": "https://docs.oracle.com/en/cloud/paas/ai-data-platform/index.html"
|
||||
},
|
||||
{
|
||||
"name": "pagerduty",
|
||||
"description": "Enhance code quality and security through PagerDuty risk scoring and incident correlation. Score pre-commit diffs against historical incident data and surface deployment risk before you ship.",
|
||||
@@ -1368,6 +1555,21 @@
|
||||
},
|
||||
"homepage": "https://quarkus.io"
|
||||
},
|
||||
{
|
||||
"name": "rails-query",
|
||||
"description": "Run read-only database queries against a Ruby on Rails 8.2+ app's database via `rails query` — ActiveRecord or SQL, schema/model introspection, EXPLAIN, pagination, and remote execution via Kamal.",
|
||||
"author": {
|
||||
"name": "Lewis Buckley",
|
||||
"url": "https://github.com/lewispb"
|
||||
},
|
||||
"category": "development",
|
||||
"source": {
|
||||
"source": "github",
|
||||
"repo": "lewispb/rails-query-skill",
|
||||
"commit": "0f53fa861089e1f46097db9a92aea311f340c355"
|
||||
},
|
||||
"homepage": "https://github.com/lewispb/rails-query-skill"
|
||||
},
|
||||
{
|
||||
"name": "railway",
|
||||
"description": "Deploy and manage apps, databases, and infrastructure on Railway. Covers project setup, deploys, environment configuration, networking, troubleshooting, and monitoring.",
|
||||
@@ -1494,6 +1696,20 @@
|
||||
},
|
||||
"homepage": "https://www.sanity.io"
|
||||
},
|
||||
{
|
||||
"name": "sap-mdk-server",
|
||||
"description": "MCP server for SAP Mobile Development Kit (MDK). Build and modify MDK applications with AI assistance — schema lookups, action validation, rule editing, and project scaffolding.",
|
||||
"author": {
|
||||
"name": "SAP"
|
||||
},
|
||||
"category": "development",
|
||||
"source": {
|
||||
"source": "url",
|
||||
"url": "https://github.com/SAP/mdk-mcp-server.git",
|
||||
"sha": "af81fe6c2421c5748388c65241da6a1b319a2c8f"
|
||||
},
|
||||
"homepage": "https://help.sap.com/docs/MDK"
|
||||
},
|
||||
{
|
||||
"name": "searchfit-seo",
|
||||
"description": "Free AI-powered SEO toolkit — audit websites, plan content strategy, optimize pages, generate schema markup, cluster keywords, and track AI visibility. Works with any website or codebase.",
|
||||
@@ -1546,6 +1762,22 @@
|
||||
"community-managed"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "servicenow-sdk",
|
||||
"description": "Create, edit, and deploy ServiceNow applications with the Fluent SDK effortlessly through Claude AI.",
|
||||
"author": {
|
||||
"name": "ServiceNow"
|
||||
},
|
||||
"category": "development",
|
||||
"source": {
|
||||
"source": "git-subdir",
|
||||
"url": "https://github.com/ServiceNow/sdk.git",
|
||||
"path": "providers/claude/plugin",
|
||||
"ref": "main",
|
||||
"sha": "06adf37ca78c270a57f93e7b9dfbb7bf16e24611"
|
||||
},
|
||||
"homepage": "https://servicenow.github.io/sdk/"
|
||||
},
|
||||
{
|
||||
"name": "session-report",
|
||||
"description": "Generate an explorable HTML report of Claude Code session usage — tokens, cache efficiency, subagents, skills, and the most expensive prompts — from local ~/.claude/projects transcripts.",
|
||||
@@ -1604,6 +1836,22 @@
|
||||
},
|
||||
"homepage": "https://github.com/slackapi/slack-mcp-plugin/tree/main"
|
||||
},
|
||||
{
|
||||
"name": "snowflake-cortex-code",
|
||||
"description": "Automatically route Snowflake prompts from Claude Code to Cortex Code for execution. Provides slash commands for code review and task delegation, plus skills for routing, run, and setup.",
|
||||
"author": {
|
||||
"name": "Snowflake"
|
||||
},
|
||||
"category": "development",
|
||||
"source": {
|
||||
"source": "git-subdir",
|
||||
"url": "https://github.com/Snowflake-Labs/snowflake-ai-kit.git",
|
||||
"path": "plugins/cortex-code",
|
||||
"ref": "main",
|
||||
"sha": "28192345cae4a758a909f5e510e24fea10666400"
|
||||
},
|
||||
"homepage": "https://docs.snowflake.com/en/user-guide/cortex-code"
|
||||
},
|
||||
{
|
||||
"name": "sonarqube",
|
||||
"description": "Automatically enforce SonarQube code quality and security in the agent coding loop — 7,000+ rules, secrets scanning, agentic analysis, and quality gates across 40+ languages. PostToolUse hooks run analysis after every file edit. Pre-tool secrets scanning prevents 450+ patterns from reaching the LLM. Slash commands give on-demand access to quality gate status, coverage, duplication, and dependency risks. Includes SonarQube CLI, MCP Server, skills, hooks, and slash commands.",
|
||||
@@ -1638,6 +1886,19 @@
|
||||
},
|
||||
"homepage": "https://sourcegraph.com"
|
||||
},
|
||||
{
|
||||
"name": "speakai",
|
||||
"description": "Search transcripts, summarize meetings, extract quotes, create clips, and manage Speak AI media through MCP.",
|
||||
"author": {
|
||||
"name": "Speak AI"
|
||||
},
|
||||
"category": "productivity",
|
||||
"source": {
|
||||
"source": "url",
|
||||
"url": "https://github.com/speakai/speakai-mcp.git"
|
||||
},
|
||||
"homepage": "https://mcp.speakai.co"
|
||||
},
|
||||
{
|
||||
"name": "spotify-ads-api",
|
||||
"description": "Manage Spotify ad campaigns with natural language. Create campaigns, ad sets, ads, pull reports, and handle OAuth — all through conversation.",
|
||||
@@ -1818,8 +2079,7 @@
|
||||
"category": "security",
|
||||
"source": {
|
||||
"source": "url",
|
||||
"url": "https://github.com/VantaInc/vanta-mcp-plugin.git",
|
||||
"sha": "46e5bebf0484f08fc4a3c4054437cf5ec06298c9"
|
||||
"url": "https://github.com/VantaInc/vanta-mcp-plugin.git"
|
||||
},
|
||||
"homepage": "https://help.vanta.com/en/articles/14094979-connecting-to-vanta-mcp#h_887ce3f337"
|
||||
},
|
||||
@@ -1892,6 +2152,20 @@
|
||||
},
|
||||
"homepage": "https://developer.wordpress.com/wordpress-com-claude-code-plugin/"
|
||||
},
|
||||
{
|
||||
"name": "youdotcom-agent-skills",
|
||||
"description": "You.com agent skills for web search, research with citations, and content extraction. Guided integrations for Vercel AI SDK, Claude Agent SDK, OpenAI Agents SDK, crewAI, LangChain, Microsoft Teams.ai, direct REST API, and bash CLI.",
|
||||
"author": {
|
||||
"name": "You.com"
|
||||
},
|
||||
"category": "productivity",
|
||||
"source": {
|
||||
"source": "url",
|
||||
"url": "https://github.com/youdotcom-oss/agent-skills.git",
|
||||
"sha": "362d510732362bd679e1647f72f734ca2d2fa710"
|
||||
},
|
||||
"homepage": "https://you.com"
|
||||
},
|
||||
{
|
||||
"name": "zapier",
|
||||
"description": "Connect 8,000+ apps to your AI workflow. Discover, enable, and execute Zapier actions directly from your client.",
|
||||
|
||||
15
.github/workflows/validate-frontmatter.yml
vendored
15
.github/workflows/validate-frontmatter.yml
vendored
@@ -9,6 +9,10 @@ on:
|
||||
|
||||
jobs:
|
||||
validate:
|
||||
# Fork PRs are auto-closed by close-external-prs.yml, so skip validation
|
||||
# for them entirely. This also prevents untrusted filenames from forks
|
||||
# from ever reaching the shell steps below.
|
||||
if: github.event.pull_request.head.repo.full_name == github.repository
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
@@ -20,16 +24,19 @@ jobs:
|
||||
|
||||
- name: Get changed frontmatter files
|
||||
id: changed
|
||||
env:
|
||||
GH_TOKEN: ${{ github.token }}
|
||||
PR_NUMBER: ${{ github.event.pull_request.number }}
|
||||
run: |
|
||||
# Use diff-filter=AMRC to exclude deleted files (D) - only Added, Modified, Renamed, Copied
|
||||
FILES=$(gh pr diff ${{ github.event.pull_request.number }} --name-only --diff-filter=AMRC | grep -E '(agents/.*\.md|skills/.*/SKILL\.md|commands/.*\.md)$' || true)
|
||||
FILES=$(gh pr diff "$PR_NUMBER" --name-only --diff-filter=AMRC | grep -E '(agents/.*\.md|skills/.*/SKILL\.md|commands/.*\.md)$' || true)
|
||||
echo "files<<EOF" >> "$GITHUB_OUTPUT"
|
||||
echo "$FILES" >> "$GITHUB_OUTPUT"
|
||||
echo "EOF" >> "$GITHUB_OUTPUT"
|
||||
env:
|
||||
GH_TOKEN: ${{ github.token }}
|
||||
|
||||
- name: Validate frontmatter
|
||||
if: steps.changed.outputs.files != ''
|
||||
env:
|
||||
FILES: ${{ steps.changed.outputs.files }}
|
||||
run: |
|
||||
echo "${{ steps.changed.outputs.files }}" | xargs bun .github/scripts/validate-frontmatter.ts
|
||||
printf '%s\n' "$FILES" | xargs bun .github/scripts/validate-frontmatter.ts
|
||||
|
||||
8
plugins/code-modernization/.claude-plugin/plugin.json
Normal file
8
plugins/code-modernization/.claude-plugin/plugin.json
Normal file
@@ -0,0 +1,8 @@
|
||||
{
|
||||
"name": "code-modernization",
|
||||
"description": "Modernize legacy codebases (COBOL, legacy Java/C++, monolith web apps) with a structured assess → map → extract-rules → reimagine → transform → harden workflow and specialist review agents",
|
||||
"author": {
|
||||
"name": "Anthropic",
|
||||
"email": "support@anthropic.com"
|
||||
}
|
||||
}
|
||||
202
plugins/code-modernization/LICENSE
Normal file
202
plugins/code-modernization/LICENSE
Normal file
@@ -0,0 +1,202 @@
|
||||
|
||||
Apache License
|
||||
Version 2.0, January 2004
|
||||
http://www.apache.org/licenses/
|
||||
|
||||
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
|
||||
|
||||
1. Definitions.
|
||||
|
||||
"License" shall mean the terms and conditions for use, reproduction,
|
||||
and distribution as defined by Sections 1 through 9 of this document.
|
||||
|
||||
"Licensor" shall mean the copyright owner or entity authorized by
|
||||
the copyright owner that is granting the License.
|
||||
|
||||
"Legal Entity" shall mean the union of the acting entity and all
|
||||
other entities that control, are controlled by, or are under common
|
||||
control with that entity. For the purposes of this definition,
|
||||
"control" means (i) the power, direct or indirect, to cause the
|
||||
direction or management of such entity, whether by contract or
|
||||
otherwise, or (ii) ownership of fifty percent (50%) or more of the
|
||||
outstanding shares, or (iii) beneficial ownership of such entity.
|
||||
|
||||
"You" (or "Your") shall mean an individual or Legal Entity
|
||||
exercising permissions granted by this License.
|
||||
|
||||
"Source" form shall mean the preferred form for making modifications,
|
||||
including but not limited to software source code, documentation
|
||||
source, and configuration files.
|
||||
|
||||
"Object" form shall mean any form resulting from mechanical
|
||||
transformation or translation of a Source form, including but
|
||||
not limited to compiled object code, generated documentation,
|
||||
and conversions to other media types.
|
||||
|
||||
"Work" shall mean the work of authorship, whether in Source or
|
||||
Object form, made available under the License, as indicated by a
|
||||
copyright notice that is included in or attached to the work
|
||||
(an example is provided in the Appendix below).
|
||||
|
||||
"Derivative Works" shall mean any work, whether in Source or Object
|
||||
form, that is based on (or derived from) the Work and for which the
|
||||
editorial revisions, annotations, elaborations, or other modifications
|
||||
represent, as a whole, an original work of authorship. For the purposes
|
||||
of this License, Derivative Works shall not include works that remain
|
||||
separable from, or merely link (or bind by name) to the interfaces of,
|
||||
the Work and Derivative Works thereof.
|
||||
|
||||
"Contribution" shall mean any work of authorship, including
|
||||
the original version of the Work and any modifications or additions
|
||||
to that Work or Derivative Works thereof, that is intentionally
|
||||
submitted to Licensor for inclusion in the Work by the copyright owner
|
||||
or by an individual or Legal Entity authorized to submit on behalf of
|
||||
the copyright owner. For the purposes of this definition, "submitted"
|
||||
means any form of electronic, verbal, or written communication sent
|
||||
to the Licensor or its representatives, including but not limited to
|
||||
communication on electronic mailing lists, source code control systems,
|
||||
and issue tracking systems that are managed by, or on behalf of, the
|
||||
Licensor for the purpose of discussing and improving the Work, but
|
||||
excluding communication that is conspicuously marked or otherwise
|
||||
designated in writing by the copyright owner as "Not a Contribution."
|
||||
|
||||
"Contributor" shall mean Licensor and any individual or Legal Entity
|
||||
on behalf of whom a Contribution has been received by Licensor and
|
||||
subsequently incorporated within the Work.
|
||||
|
||||
2. Grant of Copyright License. Subject to the terms and conditions of
|
||||
this License, each Contributor hereby grants to You a perpetual,
|
||||
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
||||
copyright license to reproduce, prepare Derivative Works of,
|
||||
publicly display, publicly perform, sublicense, and distribute the
|
||||
Work and such Derivative Works in Source or Object form.
|
||||
|
||||
3. Grant of Patent License. Subject to the terms and conditions of
|
||||
this License, each Contributor hereby grants to You a perpetual,
|
||||
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
||||
(except as stated in this section) patent license to make, have made,
|
||||
use, offer to sell, sell, import, and otherwise transfer the Work,
|
||||
where such license applies only to those patent claims licensable
|
||||
by such Contributor that are necessarily infringed by their
|
||||
Contribution(s) alone or by combination of their Contribution(s)
|
||||
with the Work to which such Contribution(s) was submitted. If You
|
||||
institute patent litigation against any entity (including a
|
||||
cross-claim or counterclaim in a lawsuit) alleging that the Work
|
||||
or a Contribution incorporated within the Work constitutes direct
|
||||
or contributory patent infringement, then any patent licenses
|
||||
granted to You under this License for that Work shall terminate
|
||||
as of the date such litigation is filed.
|
||||
|
||||
4. Redistribution. You may reproduce and distribute copies of the
|
||||
Work or Derivative Works thereof in any medium, with or without
|
||||
modifications, and in Source or Object form, provided that You
|
||||
meet the following conditions:
|
||||
|
||||
(a) You must give any other recipients of the Work or
|
||||
Derivative Works a copy of this License; and
|
||||
|
||||
(b) You must cause any modified files to carry prominent notices
|
||||
stating that You changed the files; and
|
||||
|
||||
(c) You must retain, in the Source form of any Derivative Works
|
||||
that You distribute, all copyright, patent, trademark, and
|
||||
attribution notices from the Source form of the Work,
|
||||
excluding those notices that do not pertain to any part of
|
||||
the Derivative Works; and
|
||||
|
||||
(d) If the Work includes a "NOTICE" text file as part of its
|
||||
distribution, then any Derivative Works that You distribute must
|
||||
include a readable copy of the attribution notices contained
|
||||
within such NOTICE file, excluding those notices that do not
|
||||
pertain to any part of the Derivative Works, in at least one
|
||||
of the following places: within a NOTICE text file distributed
|
||||
as part of the Derivative Works; within the Source form or
|
||||
documentation, if provided along with the Derivative Works; or,
|
||||
within a display generated by the Derivative Works, if and
|
||||
wherever such third-party notices normally appear. The contents
|
||||
of the NOTICE file are for informational purposes only and
|
||||
do not modify the License. You may add Your own attribution
|
||||
notices within Derivative Works that You distribute, alongside
|
||||
or as an addendum to the NOTICE text from the Work, provided
|
||||
that such additional attribution notices cannot be construed
|
||||
as modifying the License.
|
||||
|
||||
You may add Your own copyright statement to Your modifications and
|
||||
may provide additional or different license terms and conditions
|
||||
for use, reproduction, or distribution of Your modifications, or
|
||||
for any such Derivative Works as a whole, provided Your use,
|
||||
reproduction, and distribution of the Work otherwise complies with
|
||||
the conditions stated in this License.
|
||||
|
||||
5. Submission of Contributions. Unless You explicitly state otherwise,
|
||||
any Contribution intentionally submitted for inclusion in the Work
|
||||
by You to the Licensor shall be under the terms and conditions of
|
||||
this License, without any additional terms or conditions.
|
||||
Notwithstanding the above, nothing herein shall supersede or modify
|
||||
the terms of any separate license agreement you may have executed
|
||||
with Licensor regarding such Contributions.
|
||||
|
||||
6. Trademarks. This License does not grant permission to use the trade
|
||||
names, trademarks, service marks, or product names of the Licensor,
|
||||
except as required for reasonable and customary use in describing the
|
||||
origin of the Work and reproducing the content of the NOTICE file.
|
||||
|
||||
7. Disclaimer of Warranty. Unless required by applicable law or
|
||||
agreed to in writing, Licensor provides the Work (and each
|
||||
Contributor provides its Contributions) on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
|
||||
implied, including, without limitation, any warranties or conditions
|
||||
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
|
||||
PARTICULAR PURPOSE. You are solely responsible for determining the
|
||||
appropriateness of using or redistributing the Work and assume any
|
||||
risks associated with Your exercise of permissions under this License.
|
||||
|
||||
8. Limitation of Liability. In no event and under no legal theory,
|
||||
whether in tort (including negligence), contract, or otherwise,
|
||||
unless required by applicable law (such as deliberate and grossly
|
||||
negligent acts) or agreed to in writing, shall any Contributor be
|
||||
liable to You for damages, including any direct, indirect, special,
|
||||
incidental, or consequential damages of any character arising as a
|
||||
result of this License or out of the use or inability to use the
|
||||
Work (including but not limited to damages for loss of goodwill,
|
||||
work stoppage, computer failure or malfunction, or any and all
|
||||
other commercial damages or losses), even if such Contributor
|
||||
has been advised of the possibility of such damages.
|
||||
|
||||
9. Accepting Warranty or Additional Liability. While redistributing
|
||||
the Work or Derivative Works thereof, You may choose to offer,
|
||||
and charge a fee for, acceptance of support, warranty, indemnity,
|
||||
or other liability obligations and/or rights consistent with this
|
||||
License. However, in accepting such obligations, You may act only
|
||||
on Your own behalf and on Your sole responsibility, not on behalf
|
||||
of any other Contributor, and only if You agree to indemnify,
|
||||
defend, and hold each Contributor harmless for any liability
|
||||
incurred by, or claims asserted against, such Contributor by reason
|
||||
of your accepting any such warranty or additional liability.
|
||||
|
||||
END OF TERMS AND CONDITIONS
|
||||
|
||||
APPENDIX: How to apply the Apache License to your work.
|
||||
|
||||
To apply the Apache License to your work, attach the following
|
||||
boilerplate notice, with the fields enclosed by brackets "[]"
|
||||
replaced with your own identifying information. (Don't include
|
||||
the brackets!) The text should be enclosed in the appropriate
|
||||
comment syntax for the file format. We also recommend that a
|
||||
file or class name and description of purpose be included on the
|
||||
same "printed page" as the copyright notice for easier
|
||||
identification within third-party archives.
|
||||
|
||||
Copyright [yyyy] [name of copyright owner]
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
107
plugins/code-modernization/README.md
Normal file
107
plugins/code-modernization/README.md
Normal file
@@ -0,0 +1,107 @@
|
||||
# Code Modernization Plugin
|
||||
|
||||
A structured workflow and set of specialist agents for modernizing legacy codebases — COBOL, legacy Java/C++, monolith web apps — into current stacks while preserving behavior.
|
||||
|
||||
## Overview
|
||||
|
||||
Legacy modernization fails most often not because the target technology is wrong, but because teams skip steps: they transform code before understanding it, reimagine architecture before extracting business rules, or ship without a harness that would catch behavior drift. This plugin enforces a sequence:
|
||||
|
||||
```
|
||||
assess → map → extract-rules → reimagine → transform → harden
|
||||
```
|
||||
|
||||
Each step has a dedicated slash command. Specialist agents (legacy analyst, business rules extractor, architecture critic, security auditor, test engineer) are invoked from within those commands — or directly — to keep the work honest.
|
||||
|
||||
## Commands
|
||||
|
||||
The commands are designed to be run in order, but each produces a standalone artifact so you can stop, review, and resume.
|
||||
|
||||
### `/modernize-brief`
|
||||
Capture the modernization brief: what's being modernized, why now, constraints (regulatory, data, runtime), non-goals, and success criteria. Produces `analysis/brief.md`. Run this first.
|
||||
|
||||
### `/modernize-assess`
|
||||
Inventory the legacy codebase: languages, line counts, module boundaries, external integrations, build system, test coverage, known pain points. Produces `analysis/assessment.md`. Uses the `legacy-analyst` agent for deep reads on unfamiliar dialects.
|
||||
|
||||
### `/modernize-map`
|
||||
Map the legacy structure onto a target architecture: which legacy modules become which target services/packages, data-flow diagrams, migration sequencing. Produces `analysis/map.md`. Uses the `architecture-critic` agent to pressure-test the design.
|
||||
|
||||
### `/modernize-extract-rules`
|
||||
Extract business rules from the legacy code — the rules that are encoded in procedural logic, COBOL copybooks, stored procedures, or config files — into human-readable form with citations back to source. Produces `analysis/rules.md`. Uses the `business-rules-extractor` agent.
|
||||
|
||||
### `/modernize-reimagine`
|
||||
Propose the target design: APIs, data model, runtime. Explicitly list what changes from legacy and what stays identical. Produces `analysis/design.md`. Uses the `architecture-critic` agent to challenge over-engineering.
|
||||
|
||||
### `/modernize-transform`
|
||||
Do the actual code transformation — module by module. Writes to `modernized/`. Pairs each transformed module with a test suite that pins the pre-transform behavior.
|
||||
|
||||
### `/modernize-harden`
|
||||
Post-transform review pass: security audit, test coverage, error handling, observability. Uses `security-auditor` and `test-engineer` agents. Produces a findings report ranked Blocker / High / Medium / Nit.
|
||||
|
||||
## Agents
|
||||
|
||||
- **`legacy-analyst`** — Reads legacy code (COBOL, legacy Java/C++, procedural PHP, classic ASP) and produces structured summaries. Good at spotting implicit dependencies, copybook inheritance, and "JOBOL" patterns (procedural code wearing a modern syntax).
|
||||
- **`business-rules-extractor`** — Extracts business rules from procedural code with source citations. Each rule includes: what, where it's implemented, which conditions fire it, and any corner cases hidden in data.
|
||||
- **`architecture-critic`** — Adversarial reviewer for target architectures and transformed code. Default stance is skeptical: asks "do we actually need this?" Flags microservices-for-the-resume, ceremonial error handling, abstractions with one implementation.
|
||||
- **`security-auditor`** — Reviews transformed code for auth, input validation, secret handling, and dependency CVEs. Tuned for the kinds of issues that appear when translating security primitives across stacks (e.g., session handling from servlet to stateless JWT).
|
||||
- **`test-engineer`** — Audits test suites for behavior-pinning vs. coverage-theater. Flags tests that exercise code paths without asserting outcomes.
|
||||
|
||||
## Installation
|
||||
|
||||
```
|
||||
/plugin install code-modernization@claude-plugins-official
|
||||
```
|
||||
|
||||
## Recommended Workspace Setup
|
||||
|
||||
This plugin ships commands and agents, but modernization projects benefit from a workspace permission layout that enforces the "never touch legacy, freely edit modernized" rule. A starting-point `.claude/settings.json` for the project directory you're modernizing:
|
||||
|
||||
```json
|
||||
{
|
||||
"permissions": {
|
||||
"allow": [
|
||||
"Bash(git diff:*)",
|
||||
"Bash(git log:*)",
|
||||
"Bash(git status:*)",
|
||||
"Read(**)",
|
||||
"Write(analysis/**)",
|
||||
"Write(modernized/**)",
|
||||
"Edit(analysis/**)",
|
||||
"Edit(modernized/**)"
|
||||
],
|
||||
"deny": [
|
||||
"Edit(legacy/**)"
|
||||
]
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
Adjust `legacy/` and `modernized/` to match your actual layout. The key invariants: `Edit` under `legacy/` is denied, and writes are scoped to `analysis/` (for documents) and `modernized/` (for the new code).
|
||||
|
||||
## Typical Workflow
|
||||
|
||||
```bash
|
||||
# 1. Write the brief — what are we modernizing and why?
|
||||
/modernize-brief
|
||||
|
||||
# 2. Inventory the legacy code
|
||||
/modernize-assess
|
||||
|
||||
# 3. Extract business rules before touching the code
|
||||
/modernize-extract-rules
|
||||
|
||||
# 4. Map legacy structure to target
|
||||
/modernize-map
|
||||
|
||||
# 5. Propose the target design and review it
|
||||
/modernize-reimagine
|
||||
|
||||
# 6. Transform module by module
|
||||
/modernize-transform
|
||||
|
||||
# 7. Harden: security, tests, observability
|
||||
/modernize-harden
|
||||
```
|
||||
|
||||
## License
|
||||
|
||||
Apache 2.0. See `LICENSE`.
|
||||
36
plugins/code-modernization/agents/architecture-critic.md
Normal file
36
plugins/code-modernization/agents/architecture-critic.md
Normal file
@@ -0,0 +1,36 @@
|
||||
---
|
||||
name: architecture-critic
|
||||
description: Reviews proposed target architectures and transformed code against modern best practice. Adversarial — looks for over-engineering, missed requirements, and simpler alternatives.
|
||||
tools: Read, Glob, Grep, Bash
|
||||
---
|
||||
|
||||
You are a principal engineer reviewing a modernization design or a freshly
|
||||
transformed module. Your default stance is **skeptical**. The team is excited
|
||||
about the new shiny; your job is to ask "do we actually need this?"
|
||||
|
||||
## Review lens
|
||||
|
||||
For **architecture proposals**:
|
||||
- Does every service boundary correspond to a real domain seam, or is this
|
||||
microservices-for-the-resume?
|
||||
- What's the simplest design that meets the stated requirements? How does
|
||||
the proposal compare?
|
||||
- Which non-functional requirements (latency, throughput, consistency) are
|
||||
unstated, and does the design accidentally violate them?
|
||||
- What's the data migration story? "We'll figure it out" is a finding.
|
||||
- What happens when service X is down? Trace one failure mode end-to-end.
|
||||
|
||||
For **transformed code**:
|
||||
- Is this idiomatic for the target stack, or is legacy structure leaking
|
||||
through? (Flag "JOBOL" — procedural Java with COBOL variable names.)
|
||||
- Is error handling meaningful or ceremonial?
|
||||
- Are there abstractions with exactly one implementation and no second use
|
||||
case in sight?
|
||||
- Does the test suite actually pin behavior, or just exercise code paths?
|
||||
- What would the on-call engineer need at 3am that isn't here?
|
||||
|
||||
## Output
|
||||
|
||||
Findings ranked **Blocker / High / Medium / Nit**. Each with: what, where,
|
||||
why it matters, and a concrete suggested change. End with one paragraph:
|
||||
"If I could only change one thing, it would be ___."
|
||||
@@ -0,0 +1,46 @@
|
||||
---
|
||||
name: business-rules-extractor
|
||||
description: Mines domain logic, calculations, validations, and policies from legacy code into testable Given/When/Then specifications. Use when you need to separate "what the business requires" from "how the old code happened to implement it."
|
||||
tools: Read, Glob, Grep, Bash
|
||||
---
|
||||
|
||||
You are a business analyst who reads code. Your job is to find the **rules**
|
||||
hidden inside legacy systems — the calculations, thresholds, eligibility
|
||||
checks, and policies that define how the business actually operates — and
|
||||
express them in a form that survives the rewrite.
|
||||
|
||||
## What counts as a business rule
|
||||
|
||||
- **Calculations**: interest, fees, taxes, discounts, scores, aggregates
|
||||
- **Validations**: required fields, format checks, range limits, cross-field
|
||||
- **Eligibility / authorization**: who can do what, when, under which conditions
|
||||
- **State transitions**: status lifecycles, what triggers each transition
|
||||
- **Policies**: retention periods, retry limits, cutoff times, rounding rules
|
||||
|
||||
## What does NOT count
|
||||
|
||||
Infrastructure, logging, error handling, UI layout, technical retries,
|
||||
connection pooling. If a rule would be the same regardless of what language
|
||||
the system was written in, it's a business rule. If it only exists because
|
||||
of the technology, skip it.
|
||||
|
||||
## Extraction discipline
|
||||
|
||||
1. Find the rule in code. Record exact `file:line-line`.
|
||||
2. State it in plain English a non-engineer would recognize.
|
||||
3. Encode it as Given/When/Then with **concrete values**:
|
||||
```
|
||||
Given an account with balance $1,250.00 and APR 18.5%
|
||||
When the monthly interest batch runs
|
||||
Then the interest charged is $19.27 (balance × APR ÷ 12, rounded half-up to cents)
|
||||
```
|
||||
4. List the parameters (rates, limits, magic numbers) with their current
|
||||
hardcoded values — these often need to become configuration.
|
||||
5. Rate your confidence: **High** (logic is explicit), **Medium** (inferred
|
||||
from structure/names), **Low** (ambiguous; needs SME).
|
||||
6. If confidence < High, write the exact question an SME must answer.
|
||||
|
||||
## Output format
|
||||
|
||||
One "Rule Card" per rule (see the format in the modernize:extract-rules
|
||||
command). Group by category. Lead with a summary table.
|
||||
39
plugins/code-modernization/agents/legacy-analyst.md
Normal file
39
plugins/code-modernization/agents/legacy-analyst.md
Normal file
@@ -0,0 +1,39 @@
|
||||
---
|
||||
name: legacy-analyst
|
||||
description: Deep-reads legacy codebases (COBOL, Java, .NET, Node, anything) to build structural and behavioral understanding. Use for discovery, dependency mapping, dead-code detection, and "what does this system actually do" questions.
|
||||
tools: Read, Glob, Grep, Bash
|
||||
---
|
||||
|
||||
You are a senior legacy systems analyst with 20 years of experience reading
|
||||
code nobody else wants to read — COBOL, JCL, RPG, classic ASP, EJB 2,
|
||||
Struts 1, raw servlets, Perl CGI.
|
||||
|
||||
Your job is **understanding, not judgment**. The code in front of you kept a
|
||||
business running for decades. Treat it with respect, figure out what it does,
|
||||
and explain it in terms a modern engineer can act on.
|
||||
|
||||
## How you work
|
||||
|
||||
- **Read before you grep.** Open the entry points (main programs, JCL jobs,
|
||||
controllers, routes) and trace the actual flow. Pattern-matching on names
|
||||
lies; control flow doesn't.
|
||||
- **Cite everything.** Every claim gets a `path/to/file:line` reference.
|
||||
If you can't point to a line, you don't know it — say so.
|
||||
- **Distinguish "is" from "appears to be."** When you're inferring intent
|
||||
from structure, flag it: "appears to handle X (inferred from variable
|
||||
names; no comments confirm)."
|
||||
- **Use the right vocabulary for the stack.** COBOL has paragraphs,
|
||||
copybooks, and FD entries. CICS has transactions and BMS maps. JCL has
|
||||
steps and DD statements. Java has packages and beans. Use the native
|
||||
terms so SMEs trust your output.
|
||||
- **Find the data first.** In legacy systems, the data structures (copybooks,
|
||||
DDL, schemas) are usually more stable and truthful than the procedural
|
||||
code. Map the data, then map who touches it.
|
||||
- **Note what's missing.** Unhandled error paths, TODO comments, commented-out
|
||||
blocks, magic numbers — these are signals about history and risk.
|
||||
|
||||
## Output format
|
||||
|
||||
Default to structured markdown: tables for inventories, Mermaid for graphs,
|
||||
bullet lists for findings. Always include a "Confidence & Gaps" footer
|
||||
listing what you couldn't determine and what you'd ask an SME.
|
||||
47
plugins/code-modernization/agents/security-auditor.md
Normal file
47
plugins/code-modernization/agents/security-auditor.md
Normal file
@@ -0,0 +1,47 @@
|
||||
---
|
||||
name: security-auditor
|
||||
description: Adversarial security reviewer — OWASP Top 10, CWE, dependency CVEs, secrets, injection. Use for security debt scanning and pre-modernization hardening.
|
||||
tools: Read, Glob, Grep, Bash
|
||||
---
|
||||
|
||||
You are an application security engineer performing an adversarial review.
|
||||
Assume the code is hostile until proven otherwise. Your job is to find
|
||||
vulnerabilities a real attacker would find — and explain them in terms an
|
||||
engineer can fix.
|
||||
|
||||
## Coverage checklist
|
||||
|
||||
Work through systematically:
|
||||
- **Injection** (SQL, NoSQL, OS command, LDAP, XPath, template) — trace every
|
||||
user-controlled input to every sink
|
||||
- **Authentication / session** — hardcoded creds, weak session handling,
|
||||
missing auth checks on sensitive routes
|
||||
- **Sensitive data exposure** — secrets in source, weak crypto, PII in logs
|
||||
- **Access control** — IDOR, missing ownership checks, privilege escalation paths
|
||||
- **XSS / CSRF** — unescaped output, missing tokens
|
||||
- **Insecure deserialization** — pickle/yaml.load/ObjectInputStream on
|
||||
untrusted data
|
||||
- **Vulnerable dependencies** — run `npm audit` / `pip-audit` /
|
||||
read manifests and flag versions with known CVEs
|
||||
- **SSRF / path traversal / open redirect**
|
||||
- **Security misconfiguration** — debug mode, verbose errors, default creds
|
||||
|
||||
## Tooling
|
||||
|
||||
Use available SAST where it helps (npm audit, pip-audit, grep for known-bad
|
||||
patterns) but **read the code** — tools miss logic flaws. Show tool output
|
||||
verbatim, then add your manual findings.
|
||||
|
||||
## Reporting standard
|
||||
|
||||
For each finding:
|
||||
| Field | Content |
|
||||
|---|---|
|
||||
| **ID** | SEC-NNN |
|
||||
| **CWE** | CWE-XXX with name |
|
||||
| **Severity** | Critical / High / Medium / Low (CVSS-ish reasoning) |
|
||||
| **Location** | `file:line` |
|
||||
| **Exploit scenario** | One sentence: how an attacker uses this |
|
||||
| **Fix** | Concrete code-level remediation |
|
||||
|
||||
No hand-waving. If you can't write the exploit scenario, downgrade severity.
|
||||
36
plugins/code-modernization/agents/test-engineer.md
Normal file
36
plugins/code-modernization/agents/test-engineer.md
Normal file
@@ -0,0 +1,36 @@
|
||||
---
|
||||
name: test-engineer
|
||||
description: Writes characterization, contract, and equivalence tests that pin down legacy behavior so transformation can be proven correct. Use before any rewrite.
|
||||
tools: Read, Write, Edit, Glob, Grep, Bash
|
||||
---
|
||||
|
||||
You are a test engineer specializing in **characterization testing** —
|
||||
writing tests that capture what legacy code *actually does* (not what
|
||||
someone thinks it should do) so that a rewrite can be proven equivalent.
|
||||
|
||||
## Principles
|
||||
|
||||
- **The legacy code is the oracle.** If the legacy computes 19.27 and the
|
||||
spec says 19.28, the test asserts 19.27 and you flag the discrepancy
|
||||
separately. We're proving equivalence first; fixing bugs is a separate
|
||||
decision.
|
||||
- **Concrete over abstract.** Every test has literal input values and literal
|
||||
expected outputs. No "should calculate correctly" — instead "given balance
|
||||
1250.00 and APR 18.5%, returns 19.27".
|
||||
- **Cover the edges the legacy covers.** Read the legacy code's branches.
|
||||
Every IF/EVALUATE/switch arm gets at least one test case. Boundary values
|
||||
(zero, negative, max, empty) get explicit cases.
|
||||
- **Tests must run against BOTH.** Structure tests so the same inputs can be
|
||||
fed to the legacy implementation (or a recorded trace of it) and the modern
|
||||
one. The test harness compares.
|
||||
- **Executable, not aspirational.** Tests compile and run from day one.
|
||||
Behaviors not yet implemented in the target are marked
|
||||
`@Disabled("pending RULE-NNN")` / `@pytest.mark.skip` / `it.todo()` — never
|
||||
deleted.
|
||||
|
||||
## Output
|
||||
|
||||
Idiomatic tests for the requested target stack (JUnit 5 / pytest / Vitest /
|
||||
xUnit), one test class/file per legacy module, test method names that read
|
||||
as specifications. Include a `README.md` in the test directory explaining
|
||||
how to run them and how to add a new case.
|
||||
142
plugins/code-modernization/commands/modernize-assess.md
Normal file
142
plugins/code-modernization/commands/modernize-assess.md
Normal file
@@ -0,0 +1,142 @@
|
||||
---
|
||||
description: Full discovery & portfolio analysis of a legacy system — inventory, complexity, debt, effort estimation
|
||||
argument-hint: <system-dir> | --portfolio <parent-dir>
|
||||
---
|
||||
|
||||
**Mode select.** If `$ARGUMENTS` starts with `--portfolio`, run **Portfolio
|
||||
mode** against the directory that follows. Otherwise run **Single-system
|
||||
mode** against `legacy/$1`.
|
||||
|
||||
---
|
||||
|
||||
# Portfolio mode (`--portfolio <parent-dir>`)
|
||||
|
||||
Sweep every immediate subdirectory of the parent dir and produce a
|
||||
heat-map a steering committee can use to sequence a multi-year program.
|
||||
|
||||
## Step P1 — Per-system metrics
|
||||
|
||||
For each subdirectory `<sys>`:
|
||||
|
||||
```bash
|
||||
cloc --quiet --csv <parent>/<sys> # LOC by language
|
||||
lizard -s cyclomatic_complexity <parent>/<sys> 2>/dev/null | tail -1
|
||||
```
|
||||
|
||||
Capture: total SLOC, dominant language, file count, mean & max
|
||||
cyclomatic complexity (CCN). For dependency freshness, locate the
|
||||
manifest (`package.json`, `pom.xml`, `*.csproj`, `requirements*.txt`,
|
||||
copybook dir) and note its age / pinned-version count.
|
||||
|
||||
## Step P2 — COCOMO-II effort
|
||||
|
||||
Compute person-months per system using COCOMO-II basic:
|
||||
`PM = 2.94 × (KSLOC)^1.10` (nominal scale factors). Show the formula and
|
||||
inputs so the figure is defensible, not a guess.
|
||||
|
||||
## Step P3 — Documentation coverage
|
||||
|
||||
For each system, count source files with vs without a header comment
|
||||
block, and list architecture docs present (`README`, `docs/`, ADRs).
|
||||
Report coverage % and the top undocumented subsystems.
|
||||
|
||||
## Step P4 — Render the heat-map
|
||||
|
||||
Write `analysis/portfolio.html` (dark `#1e1e1e` bg, `#d4d4d4` text,
|
||||
`#cc785c` accent, system-ui font, all CSS inline). One row per system;
|
||||
columns: **System · Lang · KSLOC · Files · Mean CCN · Max CCN · Dep
|
||||
Freshness · Doc Coverage % · COCOMO PM · Risk**. Color-grade the PM and
|
||||
Risk cells (green→amber→red). Below the table, a 2-3 sentence
|
||||
sequencing recommendation: which system first and why.
|
||||
|
||||
Then stop. Tell the user to open `analysis/portfolio.html`.
|
||||
|
||||
---
|
||||
|
||||
# Single-system mode
|
||||
|
||||
Perform a complete **modernization assessment** of `legacy/$1`.
|
||||
|
||||
This is the discovery phase — the goal is a fact-grounded executive brief that
|
||||
a VP of Engineering could take into a budget meeting. Work in this order:
|
||||
|
||||
## Step 1 — Quantitative inventory
|
||||
|
||||
Run and show the output of:
|
||||
```bash
|
||||
scc legacy/$1
|
||||
```
|
||||
Then run `scc --by-file -s complexity legacy/$1 | head -25` to identify the
|
||||
highest-complexity files. Capture the COCOMO effort/cost estimate scc provides.
|
||||
|
||||
## Step 2 — Technology fingerprint
|
||||
|
||||
Identify, with file evidence:
|
||||
- Languages, frameworks, and runtime versions in use
|
||||
- Build system and dependency manifest locations
|
||||
- Data stores (schemas, copybooks, DDL, ORM configs)
|
||||
- Integration points (queues, APIs, batch interfaces, screen maps)
|
||||
- Test presence and approximate coverage signal
|
||||
|
||||
## Step 3 — Parallel deep analysis
|
||||
|
||||
Spawn three subagents **concurrently** using the Task tool:
|
||||
|
||||
1. **legacy-analyst** — "Build a structural map of legacy/$1: what are the
|
||||
5-10 major functional domains, which source files belong to each, and how
|
||||
do they depend on each other? Return a markdown table + a Mermaid
|
||||
`graph TD` of domain-level dependencies. Cite file paths."
|
||||
|
||||
2. **legacy-analyst** — "Identify technical debt in legacy/$1: dead code,
|
||||
deprecated APIs, copy-paste duplication, god objects/programs, missing
|
||||
error handling, hardcoded config. Return the top 10 findings ranked by
|
||||
remediation value, each with file:line evidence."
|
||||
|
||||
3. **security-auditor** — "Scan legacy/$1 for security vulnerabilities:
|
||||
injection, auth weaknesses, hardcoded secrets, vulnerable dependencies,
|
||||
missing input validation. Return findings in CWE-tagged table form with
|
||||
file:line evidence and severity."
|
||||
|
||||
Wait for all three. Synthesize their findings.
|
||||
|
||||
## Step 4 — Production runtime overlay (observability)
|
||||
|
||||
If the system has batch jobs (e.g. JCL members under `app/jcl/`), call the
|
||||
`observability` MCP tool `get_batch_runtimes` for each business-relevant
|
||||
job name (interest, posting, statement, reporting). Use the returned
|
||||
p50/p95/p99 and 90-day series to:
|
||||
|
||||
- Tag each functional domain from Step 3 with its production wall-clock
|
||||
cost and **p99 variance** (p99/p50 ratio).
|
||||
- Flag the highest-variance domain as the highest operational risk —
|
||||
this is telemetry-grounded, not a static-analysis opinion.
|
||||
|
||||
Include a small **Batch Runtime** table (Job · Domain · p50 · p95 · p99 ·
|
||||
p99/p50) in the assessment.
|
||||
|
||||
## Step 5 — Documentation gap analysis
|
||||
|
||||
Compare what the code *does* against what README/docs/comments *say*. List
|
||||
the top 5 undocumented behaviors or subsystems that a new engineer would
|
||||
need explained.
|
||||
|
||||
## Step 6 — Write the assessment
|
||||
|
||||
Create `analysis/$1/ASSESSMENT.md` with these sections:
|
||||
- **Executive Summary** (3-4 sentences: what it is, how big, how risky, headline recommendation)
|
||||
- **System Inventory** (the scc table + tech fingerprint)
|
||||
- **Architecture-at-a-Glance** (the domain table; reference the diagram)
|
||||
- **Production Runtime Profile** (the batch-runtime table from Step 4, with the highest-variance domain called out)
|
||||
- **Technical Debt** (top 10, ranked)
|
||||
- **Security Findings** (CWE table)
|
||||
- **Documentation Gaps** (top 5)
|
||||
- **Effort Estimation** (COCOMO-derived person-months, ±range, key cost drivers)
|
||||
- **Recommended Modernization Pattern** (one of: Rehost / Replatform / Refactor / Rearchitect / Rebuild / Replace — with one-paragraph rationale)
|
||||
|
||||
Also create `analysis/$1/ARCHITECTURE.mmd` containing the Mermaid domain
|
||||
dependency diagram from the legacy-analyst.
|
||||
|
||||
## Step 7 — Present
|
||||
|
||||
Tell the user the assessment is ready and suggest:
|
||||
`glow -p analysis/$1/ASSESSMENT.md`
|
||||
60
plugins/code-modernization/commands/modernize-brief.md
Normal file
60
plugins/code-modernization/commands/modernize-brief.md
Normal file
@@ -0,0 +1,60 @@
|
||||
---
|
||||
description: Generate a phased Modernization Brief — the approved plan that transformation agents will execute against
|
||||
argument-hint: <system-dir> [target-stack]
|
||||
---
|
||||
|
||||
Synthesize everything in `analysis/$1/` into a **Modernization Brief** — the
|
||||
single document a steering committee approves and engineering executes.
|
||||
|
||||
Target stack: `$2` (if blank, recommend one based on the assessment findings).
|
||||
|
||||
Read `analysis/$1/ASSESSMENT.md`, `TOPOLOGY.md`, and `BUSINESS_RULES.md` first.
|
||||
If any are missing, say so and stop.
|
||||
|
||||
## The Brief
|
||||
|
||||
Write `analysis/$1/MODERNIZATION_BRIEF.md`:
|
||||
|
||||
### 1. Objective
|
||||
One paragraph: from what, to what, why now.
|
||||
|
||||
### 2. Target Architecture
|
||||
Mermaid C4 Container diagram of the *end state*. Name every service, data
|
||||
store, and integration. Below it, a table mapping legacy component → target
|
||||
component(s).
|
||||
|
||||
### 3. Phased Sequence
|
||||
Break the work into 3-6 phases using **strangler-fig ordering** — lowest-risk,
|
||||
fewest-dependencies first. For each phase:
|
||||
- Scope (which legacy modules, which target services)
|
||||
- Entry criteria (what must be true to start)
|
||||
- Exit criteria (what tests/metrics prove it's done)
|
||||
- Estimated effort (person-weeks, derived from COCOMO + complexity data)
|
||||
- Risk level + top 2 risks + mitigation
|
||||
|
||||
Render the phases as a Mermaid `gantt` chart.
|
||||
|
||||
### 4. Behavior Contract
|
||||
List the **P0 behaviors** from BUSINESS_RULES.md that MUST be proven
|
||||
equivalent before any phase ships. These become the regression suite.
|
||||
|
||||
### 5. Validation Strategy
|
||||
State which combination applies: characterization tests, contract tests,
|
||||
parallel-run / dual-execution diff, property-based tests, manual UAT.
|
||||
Justify per phase.
|
||||
|
||||
### 6. Open Questions
|
||||
Anything requiring human/SME decision before Phase 1 starts. Each as a
|
||||
checkbox the approver must tick.
|
||||
|
||||
### 7. Approval Block
|
||||
```
|
||||
Approved by: ________________ Date: __________
|
||||
Approval covers: Phase 1 only | Full plan
|
||||
```
|
||||
|
||||
## Present
|
||||
|
||||
Enter **plan mode** and present a summary of the brief. Do NOT proceed to any
|
||||
transformation until the user explicitly approves. This gate is the
|
||||
human-in-the-loop control point.
|
||||
@@ -0,0 +1,68 @@
|
||||
---
|
||||
description: Mine business logic from legacy code into testable, human-readable rule specifications
|
||||
argument-hint: <system-dir> [module-pattern]
|
||||
---
|
||||
|
||||
Extract the **business rules** embedded in `legacy/$1` into a structured,
|
||||
testable specification — the institutional knowledge that's currently locked
|
||||
in code and in the heads of engineers who are about to retire.
|
||||
|
||||
Scope: if a module pattern was given (`$2`), focus there; otherwise cover the
|
||||
entire system. Either way, prioritize calculation, validation, eligibility,
|
||||
and state-transition logic over plumbing.
|
||||
|
||||
## Method
|
||||
|
||||
Spawn **three business-rules-extractor subagents in parallel**, each assigned
|
||||
a different lens. If `$2` is non-empty, include "focusing on files matching
|
||||
$2" in each prompt.
|
||||
|
||||
1. **Calculations** — "Find every formula, rate, threshold, and computed value
|
||||
in legacy/$1. For each: what does it compute, what are the inputs, what is
|
||||
the exact formula/algorithm, where is it implemented (file:line), and what
|
||||
edge cases does the code handle?"
|
||||
|
||||
2. **Validations & eligibility** — "Find every business validation, eligibility
|
||||
check, and guard condition in legacy/$1. For each: what is being checked,
|
||||
what happens on pass/fail, where is it (file:line)?"
|
||||
|
||||
3. **State & lifecycle** — "Find every status field, state machine, and
|
||||
lifecycle transition in legacy/$1. For each entity: what states exist,
|
||||
what triggers transitions, what side-effects fire?"
|
||||
|
||||
## Synthesize
|
||||
|
||||
Merge the three result sets. Deduplicate. For each distinct rule, write a
|
||||
**Rule Card** in this exact format:
|
||||
|
||||
```
|
||||
### RULE-NNN: <plain-English name>
|
||||
**Category:** Calculation | Validation | Lifecycle | Policy
|
||||
**Source:** `path/to/file.ext:line-line`
|
||||
**Plain English:** One sentence a business analyst would recognize.
|
||||
**Specification:**
|
||||
Given <precondition>
|
||||
When <trigger>
|
||||
Then <outcome>
|
||||
[And <additional outcome>]
|
||||
**Parameters:** <constants, rates, thresholds with their current values>
|
||||
**Edge cases handled:** <list>
|
||||
**Confidence:** High | Medium | Low — <why>
|
||||
```
|
||||
|
||||
Write all rule cards to `analysis/$1/BUSINESS_RULES.md` with:
|
||||
- A summary table at top (ID, name, category, source, confidence)
|
||||
- Rule cards grouped by category
|
||||
- A final **"Rules requiring SME confirmation"** section listing every
|
||||
Medium/Low confidence rule with the specific question a human needs to answer
|
||||
|
||||
## Generate the DTO catalog
|
||||
|
||||
As a companion, create `analysis/$1/DATA_OBJECTS.md` cataloging the core
|
||||
data transfer objects / records / entities: name, fields with types, which
|
||||
rules consume/produce them, source location.
|
||||
|
||||
## Present
|
||||
|
||||
Report: total rules found, breakdown by category, count needing SME review.
|
||||
Suggest: `glow -p analysis/$1/BUSINESS_RULES.md`
|
||||
46
plugins/code-modernization/commands/modernize-harden.md
Normal file
46
plugins/code-modernization/commands/modernize-harden.md
Normal file
@@ -0,0 +1,46 @@
|
||||
---
|
||||
description: Security vulnerability scan + remediation — OWASP, CVE, secrets, injection
|
||||
argument-hint: <system-dir>
|
||||
---
|
||||
|
||||
Run a **security hardening pass** on `legacy/$1`: find vulnerabilities, rank
|
||||
them, and fix the critical ones.
|
||||
|
||||
## Scan
|
||||
|
||||
Spawn the **security-auditor** subagent:
|
||||
|
||||
"Adversarially audit legacy/$1 for security vulnerabilities. Cover:
|
||||
OWASP Top 10 (injection, broken auth, XSS, SSRF, etc.), hardcoded secrets,
|
||||
vulnerable dependency versions (check package manifests against known CVEs),
|
||||
missing input validation, insecure deserialization, path traversal.
|
||||
For each finding return: CWE ID, severity (Critical/High/Med/Low), file:line,
|
||||
one-sentence exploit scenario, and recommended fix. Also run any available
|
||||
SAST tooling (npm audit, pip-audit, OWASP dependency-check) and include
|
||||
its raw output."
|
||||
|
||||
## Triage
|
||||
|
||||
Write `analysis/$1/SECURITY_FINDINGS.md`:
|
||||
- Summary scorecard (count by severity, top CWE categories)
|
||||
- Findings table sorted by severity
|
||||
- Dependency CVE table (package, installed version, CVE, fixed version)
|
||||
|
||||
## Remediate
|
||||
|
||||
For each **Critical** and **High** finding, fix it directly in the source.
|
||||
Make minimal, targeted changes. After each fix, add a one-line entry under
|
||||
"Remediation Log" in SECURITY_FINDINGS.md: finding ID → commit-style summary
|
||||
of what changed.
|
||||
|
||||
Show the cumulative diff:
|
||||
```bash
|
||||
git -C legacy/$1 diff
|
||||
```
|
||||
|
||||
## Verify
|
||||
|
||||
Re-run the security-auditor against the patched code to confirm the
|
||||
Critical/High findings are resolved. Update the scorecard with before/after.
|
||||
|
||||
Suggest: `glow -p analysis/$1/SECURITY_FINDINGS.md`
|
||||
66
plugins/code-modernization/commands/modernize-map.md
Normal file
66
plugins/code-modernization/commands/modernize-map.md
Normal file
@@ -0,0 +1,66 @@
|
||||
---
|
||||
description: Dependency & topology mapping — call graphs, data lineage, batch flows, rendered as navigable diagrams
|
||||
argument-hint: <system-dir>
|
||||
---
|
||||
|
||||
Build a **dependency and topology map** of `legacy/$1` and render it visually.
|
||||
|
||||
The assessment gave us domains. Now go one level deeper: how do the *pieces*
|
||||
connect? This is the map an engineer needs before touching anything.
|
||||
|
||||
## What to produce
|
||||
|
||||
Write a one-off analysis script (Python or shell — your choice) that parses
|
||||
the source under `legacy/$1` and extracts:
|
||||
|
||||
- **Program/module call graph** — who calls whom (for COBOL: `CALL` statements
|
||||
and CICS `LINK`/`XCTL`; for Java: class-level imports/invocations; for Node:
|
||||
`require`/`import`)
|
||||
- **Data dependency graph** — which programs read/write which data stores
|
||||
(COBOL: copybooks + VSAM/DB2 in JCL DD statements; Java: JPA entities/tables;
|
||||
Node: model files)
|
||||
- **Entry points** — batch jobs, transaction IDs, HTTP routes, CLI commands
|
||||
- **Dead-end candidates** — modules with no inbound edges (potential dead code)
|
||||
|
||||
Save the script as `analysis/$1/extract_topology.py` (or `.sh`) so it can be
|
||||
re-run and audited. Run it. Show the raw output.
|
||||
|
||||
## Render
|
||||
|
||||
From the extracted data, generate **three Mermaid diagrams** and write them
|
||||
to `analysis/$1/TOPOLOGY.html` so the artifact pane renders them live.
|
||||
|
||||
The HTML page must use: dark `#1e1e1e` background, `#d4d4d4` text,
|
||||
`#cc785c` for `<h2>`/accents, `system-ui` font, all CSS **inline** (no
|
||||
external stylesheets). Each diagram goes in a
|
||||
`<pre class="mermaid">...</pre>` block — the artifact server loads
|
||||
mermaid.js and renders client-side. Do **not** wrap diagrams in
|
||||
markdown ` ``` ` fences inside the HTML.
|
||||
|
||||
1. **`graph TD` — Module call graph.** Cluster by domain (use `subgraph`).
|
||||
Highlight entry points in a distinct style. Cap at ~40 nodes — if larger,
|
||||
show domain-level with one expanded domain.
|
||||
|
||||
2. **`graph LR` — Data lineage.** Programs → data stores.
|
||||
Mark read vs write edges.
|
||||
|
||||
3. **`flowchart TD` — Critical path.** Trace ONE end-to-end business flow
|
||||
(e.g., "monthly billing run" or "process payment") through every program
|
||||
and data store it touches, in execution order. If the `observability`
|
||||
MCP server is connected, annotate each batch step with its p50/p99
|
||||
wall-clock from `get_batch_runtimes`.
|
||||
|
||||
Also export the three diagrams as standalone `.mmd` files for re-use:
|
||||
`analysis/$1/call-graph.mmd`, `analysis/$1/data-lineage.mmd`,
|
||||
`analysis/$1/critical-path.mmd`.
|
||||
|
||||
## Annotate
|
||||
|
||||
Below each `<pre class="mermaid">` block in TOPOLOGY.html, add a `<ul>`
|
||||
with 3-5 **architect observations**: tight coupling clusters, single
|
||||
points of failure, candidates for service extraction, data stores
|
||||
touched by too many writers.
|
||||
|
||||
## Present
|
||||
|
||||
Tell the user to open `analysis/$1/TOPOLOGY.html` in the artifact pane.
|
||||
82
plugins/code-modernization/commands/modernize-reimagine.md
Normal file
82
plugins/code-modernization/commands/modernize-reimagine.md
Normal file
@@ -0,0 +1,82 @@
|
||||
---
|
||||
description: Multi-agent greenfield rebuild — extract specs from legacy, design AI-native, scaffold & validate with HITL
|
||||
argument-hint: <system-dir> <target-vision>
|
||||
---
|
||||
|
||||
**Reimagine** `legacy/$1` as: $2
|
||||
|
||||
This is not a port — it's a rebuild from extracted intent. The legacy system
|
||||
becomes the *specification source*, not the structural template. This command
|
||||
orchestrates a multi-agent team with explicit human checkpoints.
|
||||
|
||||
## Phase A — Specification mining (parallel agents)
|
||||
|
||||
Spawn concurrently and show the user that all three are running:
|
||||
|
||||
1. **business-rules-extractor** — "Extract every business rule from legacy/$1
|
||||
into Given/When/Then form. Output to a structured list I can parse."
|
||||
|
||||
2. **legacy-analyst** — "Catalog every external interface of legacy/$1:
|
||||
inbound (screens, APIs, batch triggers, queues) and outbound (reports,
|
||||
files, downstream calls, DB writes). For each: name, direction, payload
|
||||
shape, frequency/SLA if discernible."
|
||||
|
||||
3. **legacy-analyst** — "Identify the core domain entities in legacy/$1 and
|
||||
their relationships. Return as an entity list + Mermaid erDiagram."
|
||||
|
||||
Collect results. Write `analysis/$1/AI_NATIVE_SPEC.md` containing:
|
||||
- **Capabilities** (what the system must do — derived from rules + interfaces)
|
||||
- **Domain Model** (entities + erDiagram)
|
||||
- **Interface Contracts** (each external interface as an OpenAPI fragment or
|
||||
AsyncAPI fragment)
|
||||
- **Non-functional requirements** inferred from legacy (batch windows, volumes)
|
||||
- **Behavior Contract** (the Given/When/Then rules — these are the acceptance tests)
|
||||
|
||||
## Phase B — HITL checkpoint #1
|
||||
|
||||
Present the spec summary. Ask the user **one focused question**: "Which of
|
||||
these capabilities are P0 for the reimagined system, and are there any we
|
||||
should deliberately drop?" Wait for the answer. Record it in the spec.
|
||||
|
||||
## Phase C — Architecture (single agent, then critique)
|
||||
|
||||
Design the target architecture for "$2":
|
||||
- Mermaid C4 Container diagram
|
||||
- Service boundaries with rationale (which rules/entities live where)
|
||||
- Technology choices with one-line justification each
|
||||
- Data migration approach from legacy stores
|
||||
|
||||
Then spawn **architecture-critic**: "Review this proposed architecture for
|
||||
$2 against the spec in analysis/$1/AI_NATIVE_SPEC.md. Identify over-engineering,
|
||||
missed requirements, scaling risks, and simpler alternatives." Incorporate
|
||||
the critique. Write the result to `analysis/$1/REIMAGINED_ARCHITECTURE.md`.
|
||||
|
||||
## Phase D — HITL checkpoint #2
|
||||
|
||||
Enter plan mode. Present the architecture. Wait for approval.
|
||||
|
||||
## Phase E — Parallel scaffolding
|
||||
|
||||
For each service in the approved architecture (cap at 3 for the demo), spawn
|
||||
a **general-purpose agent in parallel**:
|
||||
|
||||
"Scaffold the <service-name> service per analysis/$1/REIMAGINED_ARCHITECTURE.md
|
||||
and AI_NATIVE_SPEC.md. Create: project skeleton, domain model, API stubs
|
||||
matching the interface contracts, and **executable acceptance tests** for every
|
||||
behavior-contract rule assigned to this service (mark unimplemented ones as
|
||||
expected-failure/skip with the rule ID). Write to modernized/$1-reimagined/<service-name>/."
|
||||
|
||||
Show the agents' progress. When all complete, run the acceptance test suites
|
||||
and report: total tests, passing (scaffolded behavior), pending (rule IDs
|
||||
awaiting implementation).
|
||||
|
||||
## Phase F — Knowledge graph handoff
|
||||
|
||||
Write `modernized/$1-reimagined/CLAUDE.md` — the persistent context file for
|
||||
the new system, containing: architecture summary, service responsibilities,
|
||||
where the spec lives, how to run tests, and the legacy→modern traceability
|
||||
map. This file IS the knowledge graph that future agents and engineers will
|
||||
load.
|
||||
|
||||
Report: services scaffolded, acceptance tests defined, % behaviors with a
|
||||
home, location of all artifacts.
|
||||
78
plugins/code-modernization/commands/modernize-transform.md
Normal file
78
plugins/code-modernization/commands/modernize-transform.md
Normal file
@@ -0,0 +1,78 @@
|
||||
---
|
||||
description: Transform one legacy module to the target stack — idiomatic rewrite with behavior-equivalence tests
|
||||
argument-hint: <system-dir> <module> <target-stack>
|
||||
---
|
||||
|
||||
Transform `legacy/$1` module **`$2`** into **$3**, with proof of behavioral
|
||||
equivalence.
|
||||
|
||||
This is a surgical, single-module transformation — one vertical slice of the
|
||||
strangler fig. Output goes to `modernized/$1/$2/`.
|
||||
|
||||
## Step 0 — Plan (HITL gate)
|
||||
|
||||
Read the source module and any business rules in `analysis/$1/BUSINESS_RULES.md`
|
||||
that reference it. Then **enter plan mode** and present:
|
||||
- Which source files are in scope
|
||||
- The target module structure (packages/classes/files you'll create)
|
||||
- Which business rules / behaviors this module implements
|
||||
- How you'll prove equivalence (test strategy)
|
||||
- Anything ambiguous that needs a human decision NOW
|
||||
|
||||
Wait for approval before writing any code.
|
||||
|
||||
## Step 1 — Characterization tests FIRST
|
||||
|
||||
Before writing target code, spawn the **test-engineer** subagent:
|
||||
|
||||
"Write characterization tests for legacy/$1 module $2. Read the source,
|
||||
identify every observable behavior, and encode each as a test case with
|
||||
concrete input → expected output pairs derived from the legacy logic.
|
||||
Target framework: <appropriate for $3>. Write to
|
||||
`modernized/$1/$2/src/test/`. These tests define 'done' — the new code
|
||||
must pass all of them."
|
||||
|
||||
Show the user the test file. Get a 👍 before proceeding.
|
||||
|
||||
## Step 2 — Idiomatic transformation
|
||||
|
||||
Write the target implementation in `modernized/$1/$2/src/main/`.
|
||||
|
||||
**Critical:** Write code a senior $3 engineer would write from the
|
||||
*specification*, not from the legacy structure. Do NOT mirror COBOL paragraphs
|
||||
as methods, do NOT preserve legacy variable names like `WS-TEMP-AMT-X`.
|
||||
Use the target language's idioms: records/dataclasses, streams, dependency
|
||||
injection, proper error types, etc.
|
||||
|
||||
Include: domain model, service logic, API surface (REST controller or
|
||||
equivalent), and configuration. Add concise Javadoc/docstrings linking each
|
||||
class back to the rule IDs it implements.
|
||||
|
||||
## Step 3 — Prove it
|
||||
|
||||
Run the characterization tests:
|
||||
```bash
|
||||
cd modernized/$1/$2 && <appropriate test command for $3>
|
||||
```
|
||||
Show the output. If anything fails, fix and re-run until green.
|
||||
|
||||
## Step 4 — Side-by-side review
|
||||
|
||||
Generate `modernized/$1/$2/TRANSFORMATION_NOTES.md`:
|
||||
- Mapping table: legacy file:lines → target file:lines, per behavior
|
||||
- Deliberate deviations from legacy behavior (with rationale)
|
||||
- What was NOT migrated (dead code, unreachable branches) and why
|
||||
- Follow-ups for the next module that depends on this one
|
||||
|
||||
Then show a visual diff of one representative behavior, legacy vs modern:
|
||||
```bash
|
||||
delta --side-by-side <(sed -n '<lines>p' legacy/$1/<file>) modernized/$1/$2/src/main/<file>
|
||||
```
|
||||
|
||||
## Step 5 — Architecture review
|
||||
|
||||
Spawn the **architecture-critic** subagent to review the transformed code
|
||||
against $3 best practices. Apply any HIGH-severity feedback; list the rest
|
||||
in TRANSFORMATION_NOTES.md.
|
||||
|
||||
Report: tests passing, lines of legacy retired, location of artifacts.
|
||||
21
plugins/cwc-makers/.claude-plugin/plugin.json
Normal file
21
plugins/cwc-makers/.claude-plugin/plugin.json
Normal file
@@ -0,0 +1,21 @@
|
||||
{
|
||||
"name": "cwc-makers",
|
||||
"version": "1.0.0",
|
||||
"description": "Seamless onboarding for the Code-with-Claude Makers Cardputer: one /maker-setup command clones the build-with-claude repo, flashes UIFlow firmware, and installs the Claude Buddy app bundle onto a freshly-plugged-in M5Stack Cardputer-Adv.",
|
||||
"author": {
|
||||
"name": "Anthropic",
|
||||
"email": "support@anthropic.com"
|
||||
},
|
||||
"homepage": "https://claude.com/cwc-makers",
|
||||
"repository": "https://github.com/moremas/build-with-claude",
|
||||
"license": "Apache-2.0",
|
||||
"keywords": [
|
||||
"cardputer",
|
||||
"m5stack",
|
||||
"esp32",
|
||||
"hardware",
|
||||
"maker",
|
||||
"onboarding",
|
||||
"cwc"
|
||||
]
|
||||
}
|
||||
202
plugins/cwc-makers/LICENSE
Normal file
202
plugins/cwc-makers/LICENSE
Normal file
@@ -0,0 +1,202 @@
|
||||
|
||||
Apache License
|
||||
Version 2.0, January 2004
|
||||
http://www.apache.org/licenses/
|
||||
|
||||
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
|
||||
|
||||
1. Definitions.
|
||||
|
||||
"License" shall mean the terms and conditions for use, reproduction,
|
||||
and distribution as defined by Sections 1 through 9 of this document.
|
||||
|
||||
"Licensor" shall mean the copyright owner or entity authorized by
|
||||
the copyright owner that is granting the License.
|
||||
|
||||
"Legal Entity" shall mean the union of the acting entity and all
|
||||
other entities that control, are controlled by, or are under common
|
||||
control with that entity. For the purposes of this definition,
|
||||
"control" means (i) the power, direct or indirect, to cause the
|
||||
direction or management of such entity, whether by contract or
|
||||
otherwise, or (ii) ownership of fifty percent (50%) or more of the
|
||||
outstanding shares, or (iii) beneficial ownership of such entity.
|
||||
|
||||
"You" (or "Your") shall mean an individual or Legal Entity
|
||||
exercising permissions granted by this License.
|
||||
|
||||
"Source" form shall mean the preferred form for making modifications,
|
||||
including but not limited to software source code, documentation
|
||||
source, and configuration files.
|
||||
|
||||
"Object" form shall mean any form resulting from mechanical
|
||||
transformation or translation of a Source form, including but
|
||||
not limited to compiled object code, generated documentation,
|
||||
and conversions to other media types.
|
||||
|
||||
"Work" shall mean the work of authorship, whether in Source or
|
||||
Object form, made available under the License, as indicated by a
|
||||
copyright notice that is included in or attached to the work
|
||||
(an example is provided in the Appendix below).
|
||||
|
||||
"Derivative Works" shall mean any work, whether in Source or Object
|
||||
form, that is based on (or derived from) the Work and for which the
|
||||
editorial revisions, annotations, elaborations, or other modifications
|
||||
represent, as a whole, an original work of authorship. For the purposes
|
||||
of this License, Derivative Works shall not include works that remain
|
||||
separable from, or merely link (or bind by name) to the interfaces of,
|
||||
the Work and Derivative Works thereof.
|
||||
|
||||
"Contribution" shall mean any work of authorship, including
|
||||
the original version of the Work and any modifications or additions
|
||||
to that Work or Derivative Works thereof, that is intentionally
|
||||
submitted to Licensor for inclusion in the Work by the copyright owner
|
||||
or by an individual or Legal Entity authorized to submit on behalf of
|
||||
the copyright owner. For the purposes of this definition, "submitted"
|
||||
means any form of electronic, verbal, or written communication sent
|
||||
to the Licensor or its representatives, including but not limited to
|
||||
communication on electronic mailing lists, source code control systems,
|
||||
and issue tracking systems that are managed by, or on behalf of, the
|
||||
Licensor for the purpose of discussing and improving the Work, but
|
||||
excluding communication that is conspicuously marked or otherwise
|
||||
designated in writing by the copyright owner as "Not a Contribution."
|
||||
|
||||
"Contributor" shall mean Licensor and any individual or Legal Entity
|
||||
on behalf of whom a Contribution has been received by Licensor and
|
||||
subsequently incorporated within the Work.
|
||||
|
||||
2. Grant of Copyright License. Subject to the terms and conditions of
|
||||
this License, each Contributor hereby grants to You a perpetual,
|
||||
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
||||
copyright license to reproduce, prepare Derivative Works of,
|
||||
publicly display, publicly perform, sublicense, and distribute the
|
||||
Work and such Derivative Works in Source or Object form.
|
||||
|
||||
3. Grant of Patent License. Subject to the terms and conditions of
|
||||
this License, each Contributor hereby grants to You a perpetual,
|
||||
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
||||
(except as stated in this section) patent license to make, have made,
|
||||
use, offer to sell, sell, import, and otherwise transfer the Work,
|
||||
where such license applies only to those patent claims licensable
|
||||
by such Contributor that are necessarily infringed by their
|
||||
Contribution(s) alone or by combination of their Contribution(s)
|
||||
with the Work to which such Contribution(s) was submitted. If You
|
||||
institute patent litigation against any entity (including a
|
||||
cross-claim or counterclaim in a lawsuit) alleging that the Work
|
||||
or a Contribution incorporated within the Work constitutes direct
|
||||
or contributory patent infringement, then any patent licenses
|
||||
granted to You under this License for that Work shall terminate
|
||||
as of the date such litigation is filed.
|
||||
|
||||
4. Redistribution. You may reproduce and distribute copies of the
|
||||
Work or Derivative Works thereof in any medium, with or without
|
||||
modifications, and in Source or Object form, provided that You
|
||||
meet the following conditions:
|
||||
|
||||
(a) You must give any other recipients of the Work or
|
||||
Derivative Works a copy of this License; and
|
||||
|
||||
(b) You must cause any modified files to carry prominent notices
|
||||
stating that You changed the files; and
|
||||
|
||||
(c) You must retain, in the Source form of any Derivative Works
|
||||
that You distribute, all copyright, patent, trademark, and
|
||||
attribution notices from the Source form of the Work,
|
||||
excluding those notices that do not pertain to any part of
|
||||
the Derivative Works; and
|
||||
|
||||
(d) If the Work includes a "NOTICE" text file as part of its
|
||||
distribution, then any Derivative Works that You distribute must
|
||||
include a readable copy of the attribution notices contained
|
||||
within such NOTICE file, excluding those notices that do not
|
||||
pertain to any part of the Derivative Works, in at least one
|
||||
of the following places: within a NOTICE text file distributed
|
||||
as part of the Derivative Works; within the Source form or
|
||||
documentation, if provided along with the Derivative Works; or,
|
||||
within a display generated by the Derivative Works, if and
|
||||
wherever such third-party notices normally appear. The contents
|
||||
of the NOTICE file are for informational purposes only and
|
||||
do not modify the License. You may add Your own attribution
|
||||
notices within Derivative Works that You distribute, alongside
|
||||
or as an addendum to the NOTICE text from the Work, provided
|
||||
that such additional attribution notices cannot be construed
|
||||
as modifying the License.
|
||||
|
||||
You may add Your own copyright statement to Your modifications and
|
||||
may provide additional or different license terms and conditions
|
||||
for use, reproduction, or distribution of Your modifications, or
|
||||
for any such Derivative Works as a whole, provided Your use,
|
||||
reproduction, and distribution of the Work otherwise complies with
|
||||
the conditions stated in this License.
|
||||
|
||||
5. Submission of Contributions. Unless You explicitly state otherwise,
|
||||
any Contribution intentionally submitted for inclusion in the Work
|
||||
by You to the Licensor shall be under the terms and conditions of
|
||||
this License, without any additional terms or conditions.
|
||||
Notwithstanding the above, nothing herein shall supersede or modify
|
||||
the terms of any separate license agreement you may have executed
|
||||
with Licensor regarding such Contributions.
|
||||
|
||||
6. Trademarks. This License does not grant permission to use the trade
|
||||
names, trademarks, service marks, or product names of the Licensor,
|
||||
except as required for reasonable and customary use in describing the
|
||||
origin of the Work and reproducing the content of the NOTICE file.
|
||||
|
||||
7. Disclaimer of Warranty. Unless required by applicable law or
|
||||
agreed to in writing, Licensor provides the Work (and each
|
||||
Contributor provides its Contributions) on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
|
||||
implied, including, without limitation, any warranties or conditions
|
||||
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
|
||||
PARTICULAR PURPOSE. You are solely responsible for determining the
|
||||
appropriateness of using or redistributing the Work and assume any
|
||||
risks associated with Your exercise of permissions under this License.
|
||||
|
||||
8. Limitation of Liability. In no event and under no legal theory,
|
||||
whether in tort (including negligence), contract, or otherwise,
|
||||
unless required by applicable law (such as deliberate and grossly
|
||||
negligent acts) or agreed to in writing, shall any Contributor be
|
||||
liable to You for damages, including any direct, indirect, special,
|
||||
incidental, or consequential damages of any character arising as a
|
||||
result of this License or out of the use or inability to use the
|
||||
Work (including but not limited to damages for loss of goodwill,
|
||||
work stoppage, computer failure or malfunction, or any and all
|
||||
other commercial damages or losses), even if such Contributor
|
||||
has been advised of the possibility of such damages.
|
||||
|
||||
9. Accepting Warranty or Additional Liability. While redistributing
|
||||
the Work or Derivative Works thereof, You may choose to offer,
|
||||
and charge a fee for, acceptance of support, warranty, indemnity,
|
||||
or other liability obligations and/or rights consistent with this
|
||||
License. However, in accepting such obligations, You may act only
|
||||
on Your own behalf and on Your sole responsibility, not on behalf
|
||||
of any other Contributor, and only if You agree to indemnify,
|
||||
defend, and hold each Contributor harmless for any liability
|
||||
incurred by, or claims asserted against, such Contributor by reason
|
||||
of your accepting any such warranty or additional liability.
|
||||
|
||||
END OF TERMS AND CONDITIONS
|
||||
|
||||
APPENDIX: How to apply the Apache License to your work.
|
||||
|
||||
To apply the Apache License to your work, attach the following
|
||||
boilerplate notice, with the fields enclosed by brackets "[]"
|
||||
replaced with your own identifying information. (Don't include
|
||||
the brackets!) The text should be enclosed in the appropriate
|
||||
comment syntax for the file format. We also recommend that a
|
||||
file or class name and description of purpose be included on the
|
||||
same "printed page" as the copyright notice for easier
|
||||
identification within third-party archives.
|
||||
|
||||
Copyright [yyyy] [name of copyright owner]
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
38
plugins/cwc-makers/README.md
Normal file
38
plugins/cwc-makers/README.md
Normal file
@@ -0,0 +1,38 @@
|
||||
# cwc-makers
|
||||
|
||||
Seamless onboarding for the [Code-with-Claude Makers](https://claude.com/cwc-makers) Cardputer kit.
|
||||
|
||||
## What it does
|
||||
|
||||
Plug in your M5Stack Cardputer-Adv over USB-C, type `/maker-setup`, and Claude will:
|
||||
|
||||
1. Clone [`moremas/build-with-claude`](https://github.com/moremas/build-with-claude)
|
||||
2. Detect the device, flash UIFlow 2.0 firmware, and install the Claude Buddy + Hello + Snake app bundle
|
||||
3. Walk you through the one physical step (the download-mode button press on the back of the device)
|
||||
4. Hand you a working pocket computer that pairs with Claude Desktop over BLE
|
||||
|
||||
Then ask Claude to build whatever you want next — a magic 8-ball, a pixel pet, a weather ticker — and it'll write the MicroPython and push it to the device without re-flashing.
|
||||
|
||||
## Install
|
||||
|
||||
```
|
||||
/plugin install cwc-makers@claude-plugins-official
|
||||
```
|
||||
|
||||
## Components
|
||||
|
||||
| Path | Type | User-invocable | Purpose |
|
||||
|------|------|----------------|---------|
|
||||
| `commands/maker-setup.md` | slash command | ✅ `/maker-setup` | Entry point — clone repo + run full onboarding |
|
||||
| `skills/m5-onboard/` | skill | ✅ `/m5-onboard` | Full provisioning playbook (detect, flash, install, every gotcha) |
|
||||
| `skills/cardputer-buddy/` | skill | ✅ `/cardputer-buddy` | Iterate on apps after onboarding (push, tail, REPL) |
|
||||
|
||||
`/maker-setup` is the intended entry point; the skills are also auto-triggered by Claude when relevant. Skill content is vendored from the upstream repo so Claude has the domain knowledge in-context without symlinking anything into `~/.claude/skills/`.
|
||||
|
||||
## Prerequisites
|
||||
|
||||
Python 3.10+ on the host machine (git is optional — `/maker-setup` falls back to a curl+tar download if it's missing). The onboarding scripts auto-install `esptool` on first run; `pyserial` is vendored in the upstream repo.
|
||||
|
||||
## License
|
||||
|
||||
Apache-2.0. Skill content vendored from [`moremas/build-with-claude`](https://github.com/moremas/build-with-claude) (Apache-2.0).
|
||||
15
plugins/cwc-makers/commands/maker-setup.md
Normal file
15
plugins/cwc-makers/commands/maker-setup.md
Normal file
@@ -0,0 +1,15 @@
|
||||
---
|
||||
description: Onboard a Code-with-Claude Makers Cardputer — fetch the build-with-claude repo, flash firmware, and install the Claude Buddy apps.
|
||||
disable-model-invocation: true
|
||||
---
|
||||
|
||||
The user has a Cardputer-Adv from claude.com/cwc-makers plugged in over USB-C.
|
||||
|
||||
1. Get https://github.com/moremas/build-with-claude into a `build-with-claude/` directory under cwd:
|
||||
- If `git` is available: `git clone` (or `git pull` if it already exists).
|
||||
- If `git` is **not** available: don't install it. Download the GitHub tarball instead — `curl` and `tar` ship with macOS, Linux, and Windows 10+ out of the box:
|
||||
- macOS / Linux: `curl -L https://github.com/moremas/build-with-claude/archive/refs/heads/main.tar.gz | tar xz && mv build-with-claude-main build-with-claude`
|
||||
- Windows (PowerShell): `curl.exe -L -o bwc.zip https://github.com/moremas/build-with-claude/archive/refs/heads/main.zip; tar -xf bwc.zip; Rename-Item build-with-claude-main build-with-claude`
|
||||
- Re-running `/maker-setup` later just re-downloads (~500KB) — no update mechanism needed.
|
||||
2. Invoke the `m5-onboard` skill and follow it to run `onboard/scripts/onboard.py --apps buddy` from inside `build-with-claude/`, surfacing the download-mode button prompt to the user.
|
||||
3. When done, tell the user how to launch Claude Buddy and ask what they want to build next (see the `cardputer-buddy` skill for iterating).
|
||||
46
plugins/cwc-makers/skills/cardputer-buddy/SKILL.md
Normal file
46
plugins/cwc-makers/skills/cardputer-buddy/SKILL.md
Normal file
@@ -0,0 +1,46 @@
|
||||
---
|
||||
name: cardputer-buddy
|
||||
description: Iterate on the Cardputer-Adv MicroPython app bundle (Claude Buddy, Snake, Hello) after the device is already provisioned via m5-onboard. Use when the user wants to add a new app, push a single changed .py without re-flashing, watch device serial logs, or run a one-shot REPL command. Trigger on "add an app", "push to the cardputer", "tail the device", "run on the device", or follow-up work after /maker-setup.
|
||||
---
|
||||
|
||||
# Cardputer Buddy app bundle
|
||||
|
||||
The `buddy/` directory in the local `build-with-claude` clone is the MicroPython payload that `m5-onboard` installs onto `/flash/`. Work inside that clone.
|
||||
|
||||
## Device layout
|
||||
|
||||
```
|
||||
/flash/
|
||||
├── main.py launcher menu (replaces UIFlow's boot flow)
|
||||
├── buddy_*.py shared libs (BLE, UI, state, protocol, chars)
|
||||
├── burst_frames.py sprite frames
|
||||
└── apps/
|
||||
├── claude_buddy.py BLE client → Claude Desktop's Hardware Buddy
|
||||
├── hello_cardputer.py
|
||||
└── snake.py
|
||||
```
|
||||
|
||||
`main.py` scans `/flash/apps/` at boot and lists every `.py` as a menu entry. Drop a file into `buddy/device/apps/`, push it, and it appears on next boot.
|
||||
|
||||
## Adding an app
|
||||
|
||||
Crib from `buddy/device/apps/hello_cardputer.py` — smallest example of keyboard polling, font, and exit conventions. Then push without re-flashing:
|
||||
|
||||
```bash
|
||||
python3 onboard/scripts/install_apps.py --port <PORT> --src buddy
|
||||
```
|
||||
|
||||
`<PORT>` is whatever `detect.py` reported last run (e.g. `/dev/cu.usbmodem1101`, `/dev/ttyACM0`, `COM3`).
|
||||
|
||||
## Dev loop tooling (`buddy/scripts/`)
|
||||
|
||||
```bash
|
||||
# Push a subset of files over USB-serial
|
||||
python3 buddy/scripts/push.py --port <PORT> --files apps/snake.py
|
||||
|
||||
# Watch device logs
|
||||
python3 buddy/scripts/tail_serial.py --port <PORT>
|
||||
|
||||
# One-shot REPL exec
|
||||
python3 buddy/scripts/repl_run.py --port <PORT> --script "import os; print(os.listdir('/flash'))"
|
||||
```
|
||||
185
plugins/cwc-makers/skills/m5-onboard/SKILL.md
Normal file
185
plugins/cwc-makers/skills/m5-onboard/SKILL.md
Normal file
@@ -0,0 +1,185 @@
|
||||
---
|
||||
name: m5-onboard
|
||||
description: End-to-end onboarding for a freshly-plugged-in M5Stack ESP32 device (Cardputer, Cardputer-Adv, Core, CoreS3, Stick) — detect on USB, flash UIFlow 2.0 firmware, and install the Claude Buddy MicroPython app bundle. Use whenever the user plugs in or wants to flash/provision/reset an M5Stack or ESP32 board, or says "m5-onboard go".
|
||||
---
|
||||
|
||||
# M5Stack Onboarding
|
||||
|
||||
This skill automates the full cold-start workflow for an M5Stack ESP32 device: detect on USB, identify model, flash UIFlow 2.0, and push a MicroPython app bundle onto `/flash/` so the device boots into user software. The apps we ship (Claude Buddy, Snake, Hello) talk over BLE or USB. The workflow runs on macOS, Linux, and Windows; the skill was developed against an M5Stack Basic v2.6 (CH9102 bridge, ESP32-D0WDQ6-V3, 16 MB flash) and generalized to cover the rest of the Core family, with the Cardputer-Adv (ESP32-S3, native USB) as the current default target.
|
||||
|
||||
## Where the scripts live
|
||||
|
||||
This skill ships as part of the `cwc-makers` plugin for reference, but the executable scripts and the `buddy/` app bundle live in a local clone of https://github.com/moremas/build-with-claude (the `/maker-setup` command creates this clone). Run every `scripts/*.py` invocation below from inside that clone's `onboard/` directory so `--apps buddy` resolves to the sibling `buddy/device/` payload.
|
||||
|
||||
|
||||
## When to use
|
||||
|
||||
Use this when a user plugs in an M5Stack device and wants it provisioned. The decision tree:
|
||||
|
||||
- **Fresh/unknown device** → run `onboard.py --apps buddy` end-to-end (detect → identify → flash → install apps). This is the default path.
|
||||
- **Already-flashed device, user just wants apps installed/refreshed** → run `install_apps.py --src buddy` (or any `--src <path>` to a directory of `.py` files).
|
||||
- **Flashed device, something feels broken** → run `smoke_test.py` (I2C + LCD + speaker + button check).
|
||||
- **User wants to know what's on the bus / what the device can do** → `smoke_test.py`.
|
||||
|
||||
If multiple devices are plugged in, ask which port to target — don't guess. If the user is provisioning a device they previously worked with (e.g. "same thing as last time" or "another Buddy"), default to `--apps buddy` unless they say otherwise.
|
||||
|
||||
### Which variant to assume
|
||||
|
||||
The rig this skill lives on provisions **Cardputer-Adv** boards overwhelmingly, so `onboard.py` now defaults to `--variant cardputer-adv`. In practice that means:
|
||||
|
||||
- If the user says nothing about the model, go with the default. They're almost certainly holding a Cardputer-Adv.
|
||||
- If the user says "Cardputer" (no "Adv"), ask — the two models share a form factor but take different firmware images, and flashing the wrong one boot-loops the device.
|
||||
- If the user names any other board ("Core2", "CoreS3", "Basic", "Fire"), pass the matching `--variant` explicitly — the default won't apply.
|
||||
- The chip is ESP32-S3 either way, and `detect.py` won't be able to tell Cardputer from Cardputer-Adv before UIFlow is flashed (same native USB-JTAG VID, no pre-flash I2C probe). So this is a user-intent question, not a hardware-fingerprint one.
|
||||
|
||||
## The workflow
|
||||
|
||||
The main orchestrator is `scripts/onboard.py`. It drives the sub-scripts in order and handles the handoffs between them (waiting for reboots, capturing MAC, reporting progress). Prefer calling it directly over stitching the sub-scripts yourself unless the user asks for a partial run.
|
||||
|
||||
The default provisioning command (fresh Cardputer-Adv, install the buddy bundle):
|
||||
|
||||
```
|
||||
python3 scripts/onboard.py --apps buddy
|
||||
```
|
||||
|
||||
**How to invoke this from Claude Code's Bash tool.** Do NOT call `onboard.py` as a foreground Bash command. The Bash tool captures output and does not stream it back to the assistant until the command exits — and this command runs 2–3 minutes. That silence looks identical to a hang, and the assistant will usually give up before the button-dance prompt ever reaches the user. Instead, always run with `run_in_background: true`, `tee` to a log file, and then use the Monitor tool (or periodic `tail` via Read) to surface stage banners, heartbeats, and prompts to the user in real time. `2>&1` is not the fix — all progress already writes to stderr, which a terminal shows fine. The fix is streaming semantics, not redirection. The pattern that works:
|
||||
|
||||
```
|
||||
# Launch (background, tee log):
|
||||
python3 scripts/onboard.py --apps buddy 2>&1 | tee /tmp/m5-onboard.log
|
||||
|
||||
# Monitor (surfaces key events without drowning in byte-progress spam):
|
||||
tail -f /tmp/m5-onboard.log | grep -E --line-buffered \
|
||||
"^====|heartbeat|Heads up|Enter download mode|download mode!|rebooted into UIFlow|Manual reset|DONE|ERROR|Error|Traceback|FAIL|failed|No USB|not detected|Attempt [0-9]|Device already in download|Download mode port|Post-flash port|Waiting for device"
|
||||
```
|
||||
|
||||
### Relaying physical steps to the user (REQUIRED)
|
||||
|
||||
The flash stage **cannot proceed without a manual button press** on native-USB boards — there is no software path. When the monitored log shows `Enter download mode` (or the script appears to wait at the FLASH stage), you MUST stop and tell the user to do the following on the **back of the Cardputer**, in your own words, before continuing:
|
||||
|
||||
1. Press and **hold** the **G0** button
|
||||
2. While still holding G0, briefly press and release the **RST** button
|
||||
3. Keep holding G0 for about one more second, then release it
|
||||
4. The screen should go fully dark — that means download mode is active
|
||||
|
||||
If the device reboots into UIFlow instead of going dark, tell the user G0 was released too early and to try again holding it longer. Do not move on, retry the script, or attempt a software workaround until the user confirms the screen is dark — the flash will not start otherwise. The same applies to any later `Manual reset` prompt: relay the physical step and wait for the user.
|
||||
|
||||
Users running `onboard.py` directly in their own terminal (not via Claude Code) will see all output live — no changes needed there.
|
||||
|
||||
If `--port` is omitted, `detect.py` picks the most likely candidate across all three OSes: native-USB ESP32-S3 (`/dev/cu.usbmodem*` on macOS, `/dev/ttyACM*` on Linux, `COMx` on Windows), or a CH9102/CP210x UART bridge on older boards. Bluetooth-serial ports are filtered out. If multiple candidates are present, it asks.
|
||||
|
||||
The known apps name `buddy` resolves to the `buddy/device/` directory in this repo (custom launcher + Hello + Claude Buddy BLE client + Snake). Any other `--apps` value is treated as a filesystem path.
|
||||
|
||||
To skip re-flashing and just push (or refresh) the apps onto an already-provisioned device:
|
||||
|
||||
```
|
||||
python3 scripts/install_apps.py --port <PORT> --src buddy
|
||||
```
|
||||
|
||||
Where `<PORT>` is whatever `detect.py` printed on the last full run — for example `/dev/cu.usbmodem1101`, `/dev/ttyACM0`, or `COM3`.
|
||||
|
||||
### Stages
|
||||
|
||||
1. **Detect** (`detect.py`) — enumerate serial ports, filter to USB-UART bridges (CH9102 vendor `0x1A86`, Silabs CP210x `0x10C4`, FTDI `0x0403`) or the ESP32-S3 native USB-JTAG interface (`0x303A`). Probe with esptool to confirm the chip. Port names differ per OS (`/dev/cu.usbmodem*` on macOS, `/dev/ttyACM*`/`ttyUSB*` on Linux, `COMx` on Windows) but pyserial abstracts that.
|
||||
2. **Identify** (`detect.py`) — alongside port discovery, `detect.py` reads the factory-test partition signature and/or scans I2C once UIFlow is on, and cross-references `references/hardware_signatures.md` to suggest the right firmware variant (Basic-16MB, Core2, CoreS3, Cardputer-Adv, etc.). User-facing variant choice happens via `onboard.py --variant`; there is no separate `detect.py --identify` flag.
|
||||
3. **Fetch firmware** (`fetch_firmware.py`) — query the M5Burner manifest API and download the appropriate UIFlow 2.0 binary into the system temp dir. Cached between runs — safe to clear the cache anytime, it just re-downloads.
|
||||
4. **Flash** (`flash.py`) — `esptool write_flash 0x0 <image>` at **460800 baud** for UART bridges, `--no-stub` at 115200 baud for native-USB S3 devices. 921600 fails intermittently on the CH9102 bridge — do not increase it. Native-USB flash can intermittently throw `Lost connection, retrying` mid-erase; esptool recovers. The post-flash `watchdog-reset` teardown step can fail even when the flash itself succeeded — `flash.py` parses esptool's stdout, treats that specific failure pattern as non-fatal when `Hash of data verified` appeared, and `onboard.py` falls back to `flash.native_reset()` and then manual-RESET coaching if needed.
|
||||
5. **Install apps** (optional, `install_apps.py`) — paste-mode REPL upload of every `.py` from a source directory into `/flash/`, then reboot via `repl_reset` (DTR/RTS is a no-op on native USB — don't reach for it). Source layout: root `*.py` → `/flash/`, `apps/*.py` → `/flash/apps/` (UIFlow's stock launcher scans that). When the bundle ships a root `main.py`, `install_apps.py` also sets NVS `boot_option=2` so UIFlow's own launcher doesn't run and our `main.py` takes over the boot flow — critical for BLE-using apps on ESP32-S3 (see gotchas below).
|
||||
6. **Smoke test** (optional, `smoke_test.py`) — I2C scan, LCD test pattern, speaker beep, button read.
|
||||
|
||||
## Critical gotchas (baked into the scripts — do not second-guess)
|
||||
|
||||
These are things the scripts already handle correctly but which you should not override if the user asks you to "just run esptool manually" or similar:
|
||||
|
||||
- **Native-USB ESP32-S3 boards (Cardputer, Cardputer-Adv, CoreS3) require a physical BtnG0+BtnRST dance to enter download mode.** There is no software path. The chip has no DTR/RTS bridge, so nothing esptool or pyserial can do will put it into the ROM bootloader — the user has to hold GPIO0 low across a reset pulse with the hardware buttons. On Cardputer-Adv specifically both buttons (BtnG0 and BtnRST) are on the **back of the device** — small, flush-mounted, often easiest to press with a fingernail. `onboard.py:_wait_for_download_port` prompts for this at runtime during FLASH: *press and HOLD BtnG0, briefly press BtnRST, release BtnRST first, keep holding BtnG0 for ~1 more second, release BtnG0, screen should be fully dark.* If the device reboots back into UIFlow instead, BtnG0 was released too early — the coaching retries and tells the user to hold it longer. Do NOT try to automate this with `esptool --before default_reset` or pyserial's DTR/RTS; both are no-ops on native USB (the pins aren't wired to EN), and adding them just hides the real prompt.
|
||||
- **Do not unplug the device during FLASH.** Especially on native USB. A mid-flash disconnect leaves the internal flash in an inconsistent state. Mask ROM is usually reachable afterwards (press BtnG0 alone on the back, or do the full BtnG0+BtnRST dance), so the recovery is just to re-run `m5-onboard go` — it's idempotent and will re-enter download mode, re-flash, re-push apps. Don't panic and don't start opening the case; the mask ROM is in silicon and survives a corrupted flash as long as the USB PHY is intact.
|
||||
- **Baud rate is 460800 on UART bridges, 115200 with `--no-stub` on native USB.** Not 921600 on either. The CH9102 bridge loses sync on `erase_flash` at 921600 (not theoretical — it fails). Native USB's stub-baud-bump path produces "Lost connection" mid-flash; 115200 no-stub is counterintuitively faster end-to-end because it never fails.
|
||||
- **NVS writes must use `set_str`, not `set_blob`** *(relevant to `install_apps.py`'s `boot_option` setter).* UIFlow's startup calls `nvs.get_str()` and ESP-IDF tags blob and string entries separately. A blob-tagged key returns `ESP_ERR_NVS_NOT_FOUND` to `get_str`, and the device boot-loops. If a prior attempt wrote a blob, call `nvs.erase_key(name)` before `set_str`.
|
||||
- **REPL multi-line blocks need paste mode.** Sending `try:`/`except:` line-by-line makes the REPL accumulate indentation forever. Use Ctrl-E to enter paste mode, send the block, Ctrl-D to execute. `mpy_repl.py` wraps this.
|
||||
- **Hard reset is DTR=False, RTS=True, 100ms, RTS=False — but only on UART-bridge devices.** On native-USB ESP32-S3 boards the DTR/RTS lines aren't wired to EN/GPIO0, so that pulse is a silent no-op. Use `mpy_repl.repl_reset()` (sends `machine.reset()` through the REPL) for post-install reboots on those devices — `install_apps.py` already does this. If you bypass `install_apps.py` and stitch your own flow, don't reach for DTR/RTS on a usbmodem port and expect a reboot; files will be on disk but the old code will still be running. That regression bit us once.
|
||||
- **The idle heap-debug loop is normal.** UIFlow 2.0 prints asyncio diagnostics while waiting at the pairing screen. Don't interpret it as a hang.
|
||||
- **Cardputer-Adv (ESP32-S3) BLE peripherals require NVS `boot_option=2` + a custom `main.py`.** UIFlow's default `boot_option=1` starts a background Flow-pairing BLE advertise that wedges the NimBLE controller — subsequent `gap_advertise(adv_data=...)` calls from user code hit OSError(-519) "Memory Capacity Exceeded" regardless of payload shape, and the device ends up advertising with empty AD fields that iOS and the desktop Claude Buddy app filter out. The bundle's `main.py` lives at `/flash/` and takes over the boot flow (showing a simple menu over `/flash/apps/`), never touches BLE itself, and leaves the controller pristine for whichever app the user picks. `install_apps.py` now sets `boot_option=2` automatically when the bundle ships a root `main.py` — don't regress that behavior.
|
||||
|
||||
## After provisioning (what the user sees on the device)
|
||||
|
||||
Once `m5-onboard go` finishes at the `DONE` banner, the device is ready to use on its own:
|
||||
|
||||
- **Power.** Slide the switch on the right edge of the Cardputer-Adv to turn it on. Same switch turns it off. The board runs off its internal LiPo when unplugged; USB-C charges it.
|
||||
- **Boot.** A short boot log scrolls, then the launcher menu appears automatically. The menu lists every `.py` in `/flash/apps/` plus the top-level `/flash/*.py` entries.
|
||||
- **Navigation.** Arrow keys (or the keyboard's trackpoint-style cursor keys) scroll the menu; Enter launches the highlighted app; ESC returns to the launcher from inside an app.
|
||||
- **Event WiFi auto-connect.** The bundle's `main.py` connects to a hard-coded event WiFi (SSID `cardputer`) on every boot and shows the result on the LCD before the launcher menu appears. Credentials live in `buddy/device/wifi_event.py`; the connect is best-effort and the launcher always continues even if the connect fails. If you're using this bundle outside the event, edit `wifi_event.py` or remove the `_connect_wifi_with_splash()` call from `main.py`.
|
||||
- **Claude Buddy over BLE.** First time only: in Claude Desktop, **Help → Troubleshooting → Enable Developer Tools** (one-time, persists across launches). Then **Developer menu → Hardware Buddy → Connect**. BLE works regardless of the WiFi state — the link to Claude.app is local.
|
||||
- **Getting back to UIFlow.** The buddy bundle ships only a `main.py` at `/flash/` (no replacement `boot.py`), so the stock UIFlow `boot.py` is never touched and there's no `boot_uiflow.py` backup to restore. Revert by removing our `main.py` from the device REPL: `os.remove('/flash/main.py')` followed by `machine.reset()`. UIFlow's stock launcher takes over on the next boot. To start completely fresh including the firmware, re-run the skill without `--apps`.
|
||||
|
||||
## Files
|
||||
|
||||
- `scripts/onboard.py` — main orchestrator
|
||||
- `scripts/detect.py` — port discovery + chip ID
|
||||
- `scripts/fetch_firmware.py` — M5Burner API + download
|
||||
- `scripts/flash.py` — esptool wrapper
|
||||
- `scripts/install_apps.py` — push a directory of `.py` files into `/flash/` via paste-mode REPL; backs up `boot.py` as `boot_uiflow.py` before overwriting; also writes the `boot_option` NVS key when the bundle ships a root `main.py`
|
||||
- `scripts/smoke_test.py` — I2C + LCD + speaker + buttons
|
||||
- `scripts/mpy_repl.py` — shared serial/REPL helpers (paste mode, hard reset, boot-log capture)
|
||||
- `references/hardware_signatures.md` — chip + I2C fingerprints → model → firmware
|
||||
- `references/uiflow2_nvs.md` — NVS key reference with types and failure modes
|
||||
|
||||
## Dependencies
|
||||
|
||||
- `pyserial` — vendored at `onboard/scripts/vendor/serial/` (pinned 3.5, BSD-3-Clause).
|
||||
- `esptool` — pip dependency, declared in `requirements.txt`. Importable check happens via `importlib.util.find_spec("esptool")`; binary backstop search covers `~/Library/Python/*/bin/` on macOS, `~/.local/bin/` on Linux, `%APPDATA%\Python\Python3XX\Scripts\` on Windows.
|
||||
|
||||
`onboard.py` runs a preflight check at startup: if `esptool` (or, in the rare prune-vendor case, `pyserial`) is missing, it lists what's needed and asks the user whether to install now. On `Y` (or Enter) it runs `python -m pip install --user <missing>` in the current interpreter, then verifies. Inside a venv the `--user` flag is dropped so the install lands in the venv's site-packages. Non-interactive callers (piped stdin) get a manual-install hint instead of a prompt.
|
||||
|
||||
Python itself has to exist before this skill can do anything — you can't bootstrap an interpreter from inside one. `git` is **not** required — the `/maker-setup` command falls back to downloading the GitHub tarball with `curl`+`tar` (both pre-installed on macOS, Linux, and Windows 10+) when `git --version` fails. Claude's responsible for detecting Python and installing it if missing *before* running any `scripts/*.py` invocation. Detection is just running `python3 --version` / `python --version` — if it fails, Claude fetches Python with the host's native package manager before anything else.
|
||||
|
||||
**Per-OS Python bootstrap (Claude's responsibility if missing):**
|
||||
|
||||
- **Windows** — `winget install -e --id Python.Python.3.13 --silent --accept-source-agreements --accept-package-agreements`. Takes ~30 seconds, no UI, gets PATH right. If the current shell can't see `python` afterwards, tell the user to close and reopen the terminal (Windows updates PATH only on new shells).
|
||||
- **macOS** — Python 3 is usually pre-installed as `/usr/bin/python3` on any current macOS (shipped by Apple). If for some reason it isn't, `brew install python@3.13` via Homebrew is the go-to; if Homebrew itself is missing, offer to install it via `/bin/bash -c "$(curl -fsSL https://raw.githubusercontent.com/Homebrew/install/HEAD/install.sh)"` (but only if the user confirms — Homebrew is a larger commitment than winget).
|
||||
- **Linux** — use the distro package manager. Debian/Ubuntu: `sudo apt-get update && sudo apt-get install -y python3 python3-pip`. Fedora: `sudo dnf install -y python3 python3-pip`. Arch: `sudo pacman -S --noconfirm python python-pip`. You may need to sudo and should surface the password prompt to the user if needed.
|
||||
|
||||
**pyserial — bundled with the skill:**
|
||||
|
||||
A pinned `pyserial 3.5` ships under `scripts/vendor/` (BSD-3-Clause, Apache-compatible). Every script that imports `serial` calls `vendor_path.ensure_on_syspath()` before the first third-party import, which prepends `scripts/vendor/` to `sys.path`, so the vendored copy resolves regardless of whatever the user has system-wide. Net effect: port enumeration and REPL I/O work on a fresh clone with zero pip step. ~500 KB, pure-Python, same tree on macOS / Linux / Windows.
|
||||
|
||||
**esptool — pip dependency, auto-installed on first run:**
|
||||
|
||||
`esptool` is GPLv2+ and is intentionally **not** vendored — keeping the repository cleanly Apache-2.0 means the GPL bits live in the user's pip-managed environment, not in the tree. The skill's preflight checks for an importable `esptool` and, if missing, prompts to install it (`python -m pip install --user esptool` — `--user` dropped inside a venv so it lands in site-packages). For subprocess calls we use `[sys.executable, "-m", "esptool", ...]`; the subprocess inherits user-site so the pip-installed module imports cleanly. `requirements.txt` declares this for explicit setup; the prompt path is the default for first-time attendees who haven't run pip yet.
|
||||
|
||||
Non-interactive callers (piped stdin, CI) skip the prompt and get a `python -m pip install --user esptool` hint instead.
|
||||
|
||||
**Fallback if someone prunes `scripts/vendor/`:**
|
||||
|
||||
The same preflight path also re-installs pyserial via pip if the vendor copy is gone. This handles the case where someone downloaded a source-only zip that excluded vendor, or manually trimmed the repo to save space.
|
||||
|
||||
**USB driver — Windows-specific, only for older boards:**
|
||||
|
||||
The CH9102 USB-UART driver is still a manual install on Windows — WCH doesn't publish a winget manifest. Only needed for UART-bridge boards (Basic, Fire, Core2, StickC). Native-USB ESP32-S3 boards (Cardputer, Cardputer-Adv, CoreS3) enumerate as composite USB-CDC devices using Windows' in-box drivers and need no extra install.
|
||||
|
||||
## Platform notes
|
||||
|
||||
The skill runs on macOS, Linux, and Windows. Non-obvious bits:
|
||||
|
||||
- **Port naming.** pyserial abstracts the lookup but what the user sees looks different per OS. Pass whichever form `detect.py` reports:
|
||||
- macOS: `/dev/cu.usbmodem1101` (native USB) or `/dev/cu.usbserial-XXXX` (CH9102)
|
||||
- Linux: `/dev/ttyACM0` (native USB) or `/dev/ttyUSB0` (UART bridge)
|
||||
- Windows: `COM3`, `COM4`, etc. (Device Manager → Ports if unsure)
|
||||
- **Linux permissions — read this before blaming hardware.** On most distros, accessing `/dev/ttyUSB*` / `/dev/ttyACM*` without sudo requires group membership (`dialout` on Debian/Ubuntu/Arch, `uucp` on Fedora). Symptom: `detect.py` finds the port, but the flash step fails with `Permission denied` or `Could not open port`. Fix once, long-term:
|
||||
```bash
|
||||
sudo usermod -aG dialout $USER
|
||||
# log out / log back in — group change only takes effect for new sessions
|
||||
```
|
||||
`sudo python3 scripts/onboard.py ...` works as a one-off but adding the group membership is strictly better because pyserial's port-open in user mode succeeds cleanly from then on.
|
||||
- **Windows PATH gotchas.** Python's `pip install --user esptool` lands the executable in `%APPDATA%\Python\Python3XX\Scripts\`. If that directory isn't on PATH, `pip` prints a warning and nothing else picks up the install. `detect.py` looks there directly as a backstop, so the skill still works even without PATH fixed. But if you're invoking esptool outside the skill (or hitting "esptool not found" errors from other tools), either:
|
||||
- Re-run the Python installer and tick "Add Python to PATH" (the install's default), OR
|
||||
- Add `%APPDATA%\Python\Python3XX\Scripts` to PATH via System Properties → Environment Variables, OR
|
||||
- Use `python -m esptool ...` which always works regardless of PATH.
|
||||
- **Windows Store Python.** Newer Windows 11 machines may have Python pre-installed via Microsoft Store. It works but has quirky PATH behavior (lives under `%LOCALAPPDATA%\Packages\PythonSoftwareFoundation.Python.*\`). `detect.py` checks that location too. If you have the choice, the `winget install Python.Python.3.13` version is more predictable.
|
||||
- **Bundle path resolution.** `install_apps.py`'s `--src buddy` shorthand resolves in this order:
|
||||
1. `$M5_BUDDY_DIR` if set — explicit override, always wins. Useful when you want to point at a fork or a customized bundle that isn't in this clone.
|
||||
2. The `buddy/device/` directory inside this repo, found via `os.path.realpath(__file__)` walking up from `install_apps.py`. Works for any clone location, including symlinked skill installs at `~/.claude/skills/m5-onboard/`.
|
||||
3. `~/Downloads/m5stack/buddy/device`.
|
||||
4. `~/Desktop/m5stack/buddy/device`.
|
||||
|
||||
Most installs hit (2). Set `M5_BUDDY_DIR` only for the unusual case of pointing at a bundle outside this clone: `export M5_BUDDY_DIR=/path/to/buddy/device` (Unix) or `$env:M5_BUDDY_DIR="C:\path\to\buddy\device"` (PowerShell).
|
||||
- **Firmware cache.** Downloaded firmware lands at `~/.cache/m5-onboard/` (or `$XDG_CACHE_HOME/m5-onboard/`), created at mode 0700 if missing. Cache files are MD5-verified at write time and re-verified on hit. Clearing the cache is safe; the next run re-downloads.
|
||||
@@ -1,6 +1,6 @@
|
||||
---
|
||||
name: conversation-analyzer
|
||||
description: Use this agent when analyzing conversation transcripts to find behaviors worth preventing with hooks. Examples: <example>Context: User is running /hookify command without arguments\nuser: "/hookify"\nassistant: "I'll analyze the conversation to find behaviors you want to prevent"\n<commentary>The /hookify command without arguments triggers conversation analysis to find unwanted behaviors.</commentary></example><example>Context: User wants to create hooks from recent frustrations\nuser: "Can you look back at this conversation and help me create hooks for the mistakes you made?"\nassistant: "I'll use the conversation-analyzer agent to identify the issues and suggest hooks."\n<commentary>User explicitly asks to analyze conversation for mistakes that should be prevented.</commentary></example>
|
||||
description: Use this agent when analyzing conversation transcripts to find behaviors worth preventing with hooks. Typical triggers include the /hookify command being invoked without arguments, or the user explicitly asking to look back at the current conversation and surface mistakes that should be prevented in the future. See "When to invoke" in the agent body for worked scenarios.
|
||||
model: inherit
|
||||
color: yellow
|
||||
tools: ["Read", "Grep"]
|
||||
@@ -8,6 +8,15 @@ tools: ["Read", "Grep"]
|
||||
|
||||
You are a conversation analysis specialist that identifies problematic behaviors in Claude Code sessions that could be prevented with hooks.
|
||||
|
||||
## When to invoke
|
||||
|
||||
Two representative scenarios:
|
||||
|
||||
- **Scenario A — `/hookify` invoked with no arguments.** Treat the bare `/hookify` invocation as a request to analyze the current conversation and surface unwanted behaviors. Respond by saying you'll analyze the conversation, then run the analysis described below.
|
||||
- **Scenario B — User asks to learn from recent frustrations.** When the user asks (in their own words) to look back over the conversation and create hooks for mistakes that were made, run the same analysis and propose hook rules for the issues found.
|
||||
|
||||
|
||||
|
||||
**Your Core Responsibilities:**
|
||||
1. Read and analyze user messages to find frustration signals
|
||||
2. Identify specific tool usage patterns that caused issues
|
||||
|
||||
@@ -14,10 +14,15 @@ The UI layer is **additive**. Under the hood it's still tools, resources, and th
|
||||
|
||||
## Claude host specifics
|
||||
|
||||
- `_meta.ui.prefersBorder: false` on a `ui://` resource removes the outer card border (mobile).
|
||||
| `_meta.ui.*` key | Where | Effect |
|
||||
|---|---|---|
|
||||
| `resourceUri` | tool | Which `ui://` resource the host renders for this tool's results. |
|
||||
| `visibility: ["app"]` | tool | Hide a widget-only helper tool (e.g. geometry/image fetcher called via `callServerTool`) from Claude's tool list. |
|
||||
| `prefersBorder: false` | resource | Drop the host's outer card border (mobile). |
|
||||
| `csp.{connectDomains, resourceDomains, baseUriDomains}` | resource | Declare external origins; default is block-all. `frameDomains` is currently restricted in Claude. |
|
||||
|
||||
- `hostContext.safeAreaInsets: {top, right, bottom, left}` (px) — honor these for notches and the composer overlay.
|
||||
- `_meta.ui.csp.{connectDomains, resourceDomains, baseUriDomains}` — declare external origins per resource; default is block-all. `frameDomains` is currently restricted in Claude.
|
||||
- Directory submission for MCP Apps requires 3–5 PNG screenshots, ≥1000px wide, cropped to the app response only (no prompt in the image). See https://claude.com/docs/connectors/building/submission#asset-specifications.
|
||||
- Directory submission requires OAuth or **authless** (`none`) — static bearer is private-deploy only and blocks listing — plus tool `annotations` and 3–5 PNG screenshots; see `references/directory-checklist.md`.
|
||||
|
||||
---
|
||||
|
||||
@@ -104,6 +109,7 @@ const server = new McpServer({ name: "contacts", version: "1.0.0" });
|
||||
// 1. The tool — returns DATA, declares which UI to show
|
||||
registerAppTool(server, "pick_contact", {
|
||||
description: "Open an interactive contact picker",
|
||||
annotations: { title: "Pick Contact", readOnlyHint: true },
|
||||
inputSchema: { filter: z.string().optional() },
|
||||
_meta: { ui: { resourceUri: "ui://widgets/contact-picker.html" } },
|
||||
}, async ({ filter }) => {
|
||||
@@ -172,7 +178,10 @@ The `/*__EXT_APPS_BUNDLE__*/` placeholder gets replaced by the server at startup
|
||||
| `app.updateModelContext({...})` | Widget → host | Update context silently (no visible message) |
|
||||
| `app.callServerTool({name, arguments})` | Widget → server | Call another tool on your server |
|
||||
| `app.openLink({url})` | Widget → host | Open a URL in a new tab (sandbox blocks `window.open`) |
|
||||
| `app.getHostContext()` / `app.onhostcontextchanged` | Host → widget | Theme (`light`/`dark`), locale, etc. |
|
||||
| `app.getHostContext()` / `app.onhostcontextchanged` | Host → widget | Theme, host CSS vars, `containerDimensions`, `displayMode`, `deviceCapabilities` |
|
||||
| `app.requestDisplayMode({mode})` | Widget → host | Ask for `inline` / `pip` / `fullscreen` |
|
||||
| `app.downloadFile({name, mimeType, content})` | Widget → host | Host-mediated download (base64 content) |
|
||||
| `new App(info, caps, {autoResize: true})` | — | Iframe height tracks rendered content |
|
||||
|
||||
`sendMessage` is the typical "user picked something, tell Claude" path. `updateModelContext` is for state that Claude should know about but shouldn't clutter the chat. `openLink` is **required** for any outbound navigation — `window.open` and `<a target="_blank">` are blocked by the sandbox attribute.
|
||||
|
||||
@@ -225,6 +234,7 @@ const pickerHtml = readFileSync("./widgets/picker.html", "utf8")
|
||||
|
||||
registerAppTool(server, "pick_contact", {
|
||||
description: "Open an interactive contact picker. User selects one contact.",
|
||||
annotations: { title: "Pick Contact", readOnlyHint: true },
|
||||
inputSchema: { filter: z.string().optional().describe("Name/email prefix filter") },
|
||||
_meta: { ui: { resourceUri: "ui://widgets/picker.html" } },
|
||||
}, async ({ filter }) => {
|
||||
@@ -348,6 +358,24 @@ Desktop caches UI resources aggressively. After editing widget HTML, **fully qui
|
||||
|
||||
The `sleep` keeps stdin open long enough to collect all responses. Parse the jsonl output with `jq` or a Python one-liner.
|
||||
|
||||
**Widget dev loop** — avoid the ⌘Q-relaunch cycle entirely by serving the inlined widget HTML at a plain GET route with a fake `ExtApps` shim that fires `ontoolresult` from a query param:
|
||||
|
||||
```ts
|
||||
app.get("/widget-preview", (_req, res) => {
|
||||
const shim = `globalThis.ExtApps={applyHostStyleVariables:()=>{},App:class{
|
||||
constructor(){this.h={}} ontoolresult;onhostcontextchanged;
|
||||
async connect(){const p=new URLSearchParams(location.search).get("payload");
|
||||
if(p)this.ontoolresult?.({content:[{type:"text",text:p}]});}
|
||||
getHostContext(){return{theme:"light"}}
|
||||
sendMessage(m){console.log("sendMessage",m)} updateModelContext(){}
|
||||
callServerTool(){return Promise.resolve({content:[]})} openLink(){} downloadFile(){}
|
||||
}};`;
|
||||
res.type("html").send(widgetHtml.replace("/*__EXT_APPS_BUNDLE__*/", shim));
|
||||
});
|
||||
```
|
||||
|
||||
Open `http://localhost:3000/widget-preview?payload={"rows":[...]}` in a normal browser tab and iterate with ordinary devtools.
|
||||
|
||||
**Host fallback** — use a host without the apps surface (or MCP Inspector) and confirm the tool's text content degrades gracefully.
|
||||
|
||||
**CSP debugging** — open the iframe's own devtools console. CSP violations are the #1 reason widgets silently fail (blank rectangle, no error in the main console). See `references/iframe-sandbox.md`.
|
||||
@@ -356,6 +384,9 @@ The `sleep` keeps stdin open long enough to collect all responses. Parse the jso
|
||||
|
||||
## Reference files
|
||||
|
||||
- `references/iframe-sandbox.md` — CSP/sandbox constraints, the bundle-inlining pattern, image handling
|
||||
- `references/iframe-sandbox.md` — CSP/sandbox constraints, the bundle-inlining pattern, image handling, host theming
|
||||
- `references/widget-templates.md` — reusable HTML scaffolds for picker / confirm / progress / display
|
||||
- `references/apps-sdk-messages.md` — the `App` class API: widget ↔ host ↔ server messaging
|
||||
- `references/apps-sdk-messages.md` — the `App` class API: widget ↔ host ↔ server messaging, lifecycle & supersession
|
||||
- `references/payload-budgeting.md` — host tool-result size caps, prune-then-truncate, heavy assets via `callServerTool`
|
||||
- `references/abuse-protection.md` — Anthropic egress CIDRs, tiered rate limiting, `trust proxy`, response caching
|
||||
- `references/directory-checklist.md` — pre-flight for connector-directory submission
|
||||
|
||||
@@ -0,0 +1,60 @@
|
||||
# Abuse protection for authless hosted servers
|
||||
|
||||
An authless StreamableHTTP server is reachable by anything on the internet.
|
||||
There are three resources to protect: your compute, any upstream API quota
|
||||
your tools consume, and egress bandwidth for large `callServerTool` payloads.
|
||||
|
||||
## You don't get a per-user identity
|
||||
|
||||
In authless mode there is no token and stateless transport gives no session
|
||||
ID. Traffic from claude.ai is proxied through Anthropic's egress — every web
|
||||
user arrives from the same small set of IPs:
|
||||
|
||||
```
|
||||
160.79.104.0/21
|
||||
2607:6bc0::/48
|
||||
```
|
||||
|
||||
(See https://platform.claude.com/docs/en/api/ip-addresses.)
|
||||
|
||||
Claude Desktop, Claude Code, and other hosts connect **directly from the
|
||||
user's machine**, so those *do* have distinct per-user IPs. Per-IP limiting
|
||||
therefore works for direct-connect clients; for claude.ai you can only limit
|
||||
the aggregate Anthropic pool. If true per-user limits matter, that's the
|
||||
trigger to add OAuth.
|
||||
|
||||
## Tiered token-bucket (per-replica backstop)
|
||||
|
||||
```ts
|
||||
const ANTHROPIC_CIDRS = ["160.79.104.0/21", "2607:6bc0::/48"];
|
||||
const TIERS = {
|
||||
anthropic: { capacity: 600, refillPerSec: 100 }, // shared pool
|
||||
other: { capacity: 30, refillPerSec: 2 }, // per-IP
|
||||
};
|
||||
```
|
||||
|
||||
Match `req.ip` against the CIDRs, pick a bucket (`"anthropic"` or
|
||||
`"ip:<addr>"`), 429 + `Retry-After` on exhaust. This is a per-replica
|
||||
backstop — cross-replica enforcement belongs at the edge (Cloudflare, Cloud
|
||||
Armor), which keeps the containers stateless.
|
||||
|
||||
## `trust proxy` must match your topology
|
||||
|
||||
`req.ip` only honours `X-Forwarded-For` if `app.set('trust proxy', N)` is
|
||||
set. `true` trusts every hop, which lets a direct client send
|
||||
`X-Forwarded-For: 160.79.108.42` and claim the Anthropic tier. Set it to the
|
||||
exact number of trusted hops (e.g. `1` behind a single LB, `2` behind
|
||||
Cloudflare → origin LB) and **never `true` in production**.
|
||||
|
||||
## Hard-allowlisting Anthropic IPs is a product decision
|
||||
|
||||
Blocking everything outside `160.79.104.0/21` locks out Desktop, Claude Code,
|
||||
and every other MCP host. Use the CIDRs to **tier** rate limits, not to gate
|
||||
access, unless claude.ai-only is an explicit goal.
|
||||
|
||||
## Cache upstream responses
|
||||
|
||||
For tools that wrap a third-party API, an in-process LRU keyed on the
|
||||
normalized query (TTL hours, no secrets in the key) is the primary cost
|
||||
control — repeat queries become free and absorb thundering-herd. Rate limits
|
||||
are the safety net, not the first line.
|
||||
@@ -2,6 +2,18 @@
|
||||
|
||||
The `@modelcontextprotocol/ext-apps` package provides the `App` class (browser side) and `registerAppTool`/`registerAppResource` helpers (server side). Messaging is bidirectional and persistent.
|
||||
|
||||
## Construction
|
||||
|
||||
```js
|
||||
const app = new App(
|
||||
{ name: "MyWidget", version: "1.0.0" },
|
||||
{}, // capabilities
|
||||
{ autoResize: true }, // options
|
||||
);
|
||||
```
|
||||
|
||||
`autoResize: true` wires a `ResizeObserver` that emits `ui/notifications/size-changed` so the host iframe height tracks your rendered content. Without it the frame is fixed-height and tall renders get clipped — set it for any widget whose height depends on data.
|
||||
|
||||
---
|
||||
|
||||
## Widget → Host
|
||||
@@ -63,6 +75,26 @@ card.querySelector("a").addEventListener("click", (e) => {
|
||||
|
||||
Host-mediated download (sandbox blocks direct `<a download>`). `content` is a base64 string.
|
||||
|
||||
```js
|
||||
const csv = rows.map((r) => Object.values(r).join(",")).join("\n");
|
||||
app.downloadFile({
|
||||
name: "export.csv",
|
||||
mimeType: "text/csv",
|
||||
content: btoa(unescape(encodeURIComponent(csv))),
|
||||
});
|
||||
```
|
||||
|
||||
### `app.requestDisplayMode({ mode })`
|
||||
|
||||
Ask the host to switch the widget between `"inline"`, `"pip"`, or `"fullscreen"`. Check `getHostContext().availableDisplayModes` first; hide the control if the mode isn't offered. The host responds by firing `onhostcontextchanged` with new `displayMode` and `containerDimensions` — re-render at the new size.
|
||||
|
||||
```js
|
||||
if (app.getHostContext()?.availableDisplayModes?.includes("fullscreen")) {
|
||||
expandBtn.hidden = false;
|
||||
expandBtn.onclick = () => app.requestDisplayMode({ mode: "fullscreen" });
|
||||
}
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Host → Widget
|
||||
@@ -84,9 +116,22 @@ app.ontoolresult = ({ content }) => {
|
||||
|
||||
Fires with the arguments Claude passed to the tool. Useful if the widget needs to know what was asked for (e.g., highlight the search term).
|
||||
|
||||
### `app.ontoolinputpartial = ({ arguments }) => {...}` / `app.ontoolcancelled = () => {...}`
|
||||
|
||||
`ontoolinputpartial` fires while Claude is still streaming arguments — use it to show a skeleton ("Preparing: <title>…") before the result lands. `ontoolcancelled` fires if the call is aborted; clear the skeleton.
|
||||
|
||||
### `app.getHostContext()` / `app.onhostcontextchanged = (ctx) => {...}`
|
||||
|
||||
Read and subscribe to host context — `theme` (`"light"` / `"dark"`), locale, etc. Call `getHostContext()` **after** `connect()`. Subscribe for live updates (user toggles dark mode mid-conversation).
|
||||
Read and subscribe to host context. Call `getHostContext()` **after** `connect()`. Subscribe for live updates (user toggles dark mode, expands to fullscreen).
|
||||
|
||||
| `ctx.` field | Use |
|
||||
|---|---|
|
||||
| `theme` | `"light"` / `"dark"` — toggle a `.dark` class |
|
||||
| `styles.variables` | Host CSS tokens — pass to `applyHostStyleVariables()` so colors/fonts match host chrome |
|
||||
| `displayMode` / `availableDisplayModes` | Current mode and which `requestDisplayMode` targets are valid |
|
||||
| `containerDimensions.{maxHeight,width}` | Size your render to this instead of hard-coded px |
|
||||
| `deviceCapabilities.touch` | Switch hover-only affordances to tap (`pointerdown`) |
|
||||
| `safeAreaInsets` | Padding for notches / composer overlay |
|
||||
|
||||
```js
|
||||
const applyTheme = (t) =>
|
||||
@@ -129,14 +174,36 @@ No `{ notify }` destructure — `extra` is `RequestHandlerExtra`; progress goes
|
||||
## Lifecycle
|
||||
|
||||
1. Claude calls a tool with `_meta.ui.resourceUri` declared
|
||||
2. Host fetches the resource (your HTML) and renders it in an iframe
|
||||
2. Host fetches the resource (your HTML) and mounts a **fresh iframe** for this call
|
||||
3. Widget script runs, sets handlers, calls `await app.connect()`
|
||||
4. Host pipes the tool's return value → `ontoolresult` fires
|
||||
5. Widget renders, user interacts
|
||||
6. Widget calls `sendMessage` / `updateModelContext` / `callServerTool` as needed
|
||||
7. Widget persists until conversation context moves on — subsequent calls to the same tool reuse the iframe and fire `ontoolresult` again
|
||||
7. Iframe persists in the transcript; **the next call to the same tool mounts another iframe** alongside it
|
||||
|
||||
There's no explicit "submit and close" — the widget is a long-lived surface.
|
||||
There's no explicit "submit and close" — each instance is long-lived, but instances are not reused across calls.
|
||||
|
||||
### Supersession
|
||||
|
||||
Because earlier instances stay mounted, a click on a stale widget can `sendMessage` after a newer one has rendered. Detect this with a `BroadcastChannel` and make older instances inert:
|
||||
|
||||
```js
|
||||
let superseded = false;
|
||||
const seq = Date.now() + Math.random();
|
||||
const bc = new BroadcastChannel("my-widget");
|
||||
bc.onmessage = (e) => {
|
||||
if (e.data?.seq > seq) {
|
||||
superseded = true;
|
||||
document.body.classList.add("superseded"); // opacity:.45; pointer-events:none
|
||||
}
|
||||
};
|
||||
bc.postMessage({ seq });
|
||||
|
||||
// Guard outbound calls:
|
||||
function safeSend(msg) {
|
||||
if (!superseded) app.sendMessage(msg);
|
||||
}
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
|
||||
@@ -0,0 +1,18 @@
|
||||
# Connector-directory submission checklist
|
||||
|
||||
Pre-flight before submitting a remote MCP app to the Claude connector
|
||||
directory. Each item is a hard review criterion.
|
||||
|
||||
| Area | Requirement |
|
||||
|---|---|
|
||||
| **Auth** | OAuth (DCR or CIMD) or **`none`** (authless). Static bearer tokens are private-deploy only and block listing. Authless is valid for public-data servers — the server holds any upstream API keys. |
|
||||
| **Tool annotations** | Every tool sets `annotations.title` plus the relevant hints: `readOnlyHint: true` for fetch/search tools, `destructiveHint` / `idempotentHint` for writes, `openWorldHint: true` if the tool reaches an external system. |
|
||||
| **Tool names** | ≤ 64 characters, snake/kebab case. |
|
||||
| **Widget layout** | Inline height ≤ 500px, no nested scroll containers, 44pt minimum touch targets, WCAG-AA contrast in both themes. |
|
||||
| **Theming** | `html, body { background: transparent }`, `<meta name="color-scheme" content="light dark">`, adopt host CSS tokens via `applyHostStyleVariables`. |
|
||||
| **External links** | Use `app.openLink`. Declare each origin (e.g. `https://api.example.com`) in the connector's *Allowed link URIs* so the link skips the confirm modal. |
|
||||
| **Helper tools** | Widget-only tools (geometry/image fetchers) carry `_meta.ui.visibility: ["app"]` so they don't appear in Claude's tool list. |
|
||||
| **Screenshots** | 3–5 PNGs, ≥ 1000px wide, cropped to the app response only — no prompt text in frame. |
|
||||
|
||||
See `abuse-protection.md` for rate-limit and IP-tiering guidance once the
|
||||
authless endpoint is public.
|
||||
@@ -122,23 +122,38 @@ that survives un-inlined.
|
||||
|
||||
---
|
||||
|
||||
## Dark mode
|
||||
## Theme & host styles
|
||||
|
||||
```js
|
||||
const applyTheme = (theme) =>
|
||||
document.documentElement.classList.toggle("dark", theme === "dark");
|
||||
The host renders the iframe inside its own card chrome — paint a **transparent** background and adopt host CSS tokens so the widget blends in across light/dark and across hosts.
|
||||
|
||||
app.onhostcontextchanged = (ctx) => applyTheme(ctx.theme);
|
||||
await app.connect();
|
||||
applyTheme(app.getHostContext()?.theme);
|
||||
```html
|
||||
<meta name="color-scheme" content="light dark" />
|
||||
```
|
||||
|
||||
```css
|
||||
:root { --ink:#0f1111; --bg:#fff; color-scheme:light; }
|
||||
:root.dark { --ink:#e6e6e6; --bg:#1f2428; color-scheme:dark; }
|
||||
:root {
|
||||
--ink: var(--color-text-primary, #0f1111);
|
||||
--sub: var(--color-text-secondary, #5a6270);
|
||||
--line: var(--color-border-default, #e3e6ea);
|
||||
}
|
||||
html, body { background: transparent; color: var(--ink); }
|
||||
:root.dark .thumb { mix-blend-mode: normal; } /* multiply → images vanish in dark */
|
||||
```
|
||||
|
||||
```js
|
||||
const { App, applyHostStyleVariables } = globalThis.ExtApps;
|
||||
|
||||
function applyHostContext(ctx) {
|
||||
document.documentElement.classList.toggle("dark", ctx?.theme === "dark");
|
||||
if (ctx?.styles?.variables) applyHostStyleVariables(ctx.styles.variables);
|
||||
}
|
||||
app.onhostcontextchanged = applyHostContext;
|
||||
await app.connect();
|
||||
applyHostContext(app.getHostContext());
|
||||
```
|
||||
|
||||
`applyHostStyleVariables` writes the host's `--color-*` / `--font-*` / `--border-radius-*` tokens onto `:root`; the hex values above are fallbacks for hosts that don't supply them.
|
||||
|
||||
---
|
||||
|
||||
## Debugging
|
||||
|
||||
@@ -0,0 +1,54 @@
|
||||
# Payload budgeting
|
||||
|
||||
Hosts cap tool-result text. claude.ai and Claude Desktop truncate at roughly
|
||||
**150,000 characters**; Claude Code at ~25k tokens. When a tool result exceeds
|
||||
the cap, the host substitutes a file-pointer string in place of your JSON. The
|
||||
widget then receives non-JSON in `ontoolresult`, `JSON.parse` throws, and the
|
||||
user sees something like *"Bad payload: SyntaxError: Unexpected token 'E'"* —
|
||||
with no hint that size was the cause.
|
||||
|
||||
## Symptom → cause
|
||||
|
||||
| Symptom | Likely cause |
|
||||
|---|---|
|
||||
| Widget shows a JSON parse error on `content[0].text` | Result over the host cap; host swapped in a file-pointer string |
|
||||
| Works for one query, breaks for "all of X" | Row count × column count crossed the cap |
|
||||
| Works in MCP Inspector, breaks in Desktop | Inspector has no cap; Desktop does |
|
||||
|
||||
## Strategy
|
||||
|
||||
Cap your own payload at ~130KB and degrade in order:
|
||||
|
||||
1. **Ship full rows** when `JSON.stringify(rows).length` is under the cap.
|
||||
2. **Prune columns** to those the rendering spec actually references. Walk the
|
||||
spec for both `field: "..."` keys *and* `datum.X` / `datum['X']` inside
|
||||
expression strings — if the spec aliases a column via a `calculate`
|
||||
transform, the alias appears as `field:` but the source column only appears
|
||||
as `datum.X`, and dropping it leaves the widget with NaN.
|
||||
3. **Truncate rows** as a last resort and include `{ truncated: N }` in the
|
||||
payload so the widget can label it.
|
||||
|
||||
```ts
|
||||
const MAX = 130_000;
|
||||
let out = rows;
|
||||
if (JSON.stringify(out).length > MAX) {
|
||||
const keep = referencedFields(spec); // field: + datum.X refs
|
||||
out = rows.map((r) => pick(r, keep));
|
||||
if (JSON.stringify(out).length > MAX) {
|
||||
const per = JSON.stringify(out[0] ?? {}).length || 1;
|
||||
out = out.slice(0, Math.floor(MAX / per));
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
## Heavy assets go via `callServerTool`, not the result
|
||||
|
||||
Geometry, image bytes, or any blob the widget needs but Claude doesn't should
|
||||
be served by a separate tool the widget calls after mount:
|
||||
|
||||
```js
|
||||
const topo = await app.callServerTool({ name: "get-topojson", arguments: { level } });
|
||||
```
|
||||
|
||||
Mark that helper tool with `_meta.ui.visibility: ["app"]` so it doesn't appear
|
||||
in Claude's tool list.
|
||||
@@ -24,21 +24,7 @@ Agents are autonomous subprocesses that handle complex, multi-step tasks indepen
|
||||
```markdown
|
||||
---
|
||||
name: agent-identifier
|
||||
description: Use this agent when [triggering conditions]. Examples:
|
||||
|
||||
<example>
|
||||
Context: [Situation description]
|
||||
user: "[User request]"
|
||||
assistant: "[How assistant should respond and use this agent]"
|
||||
<commentary>
|
||||
[Why this agent should be triggered]
|
||||
</commentary>
|
||||
</example>
|
||||
|
||||
<example>
|
||||
[Additional example...]
|
||||
</example>
|
||||
|
||||
description: Use this agent when [triggering conditions]. Typical triggers include [scenario 1 in prose], [scenario 2 in prose], and [scenario 3 in prose]. See "When to invoke" in the agent body for worked scenarios.
|
||||
model: inherit
|
||||
color: blue
|
||||
tools: ["Read", "Write", "Grep"]
|
||||
@@ -46,6 +32,12 @@ tools: ["Read", "Write", "Grep"]
|
||||
|
||||
You are [agent role description]...
|
||||
|
||||
## When to invoke
|
||||
|
||||
[Two to four representative scenarios written as prose, e.g.:]
|
||||
- **[Scenario name].** [What the situation looks like and what the agent should do.]
|
||||
- **[Scenario name].** [Same.]
|
||||
|
||||
**Your Core Responsibilities:**
|
||||
1. [Responsibility 1]
|
||||
2. [Responsibility 2]
|
||||
@@ -81,36 +73,24 @@ Agent identifier used for namespacing and invocation.
|
||||
|
||||
### description (required)
|
||||
|
||||
Defines when Claude should trigger this agent. **This is the most critical field.**
|
||||
Defines when Claude should trigger this agent. **This is the most critical field** — it is loaded into context whenever the agent is registered, so the harness can decide when to dispatch.
|
||||
|
||||
**Must include:**
|
||||
1. Triggering conditions ("Use this agent when...")
|
||||
2. Multiple `<example>` blocks showing usage
|
||||
3. Context, user request, and assistant response in each example
|
||||
4. `<commentary>` explaining why agent triggers
|
||||
2. A short prose summary of the typical trigger scenarios
|
||||
3. A pointer to a "When to invoke" section in the agent body for the detailed worked scenarios
|
||||
|
||||
**Format:**
|
||||
```
|
||||
Use this agent when [conditions]. Examples:
|
||||
|
||||
<example>
|
||||
Context: [Scenario description]
|
||||
user: "[What user says]"
|
||||
assistant: "[How Claude should respond]"
|
||||
<commentary>
|
||||
[Why this agent is appropriate]
|
||||
</commentary>
|
||||
</example>
|
||||
|
||||
[More examples...]
|
||||
Use this agent when [conditions]. Typical triggers include [scenario 1 in prose], [scenario 2 in prose], and [scenario 3 in prose]. See "When to invoke" in the agent body for worked scenarios.
|
||||
```
|
||||
|
||||
**Best practices:**
|
||||
- Include 2-4 concrete examples
|
||||
- Show proactive and reactive triggering
|
||||
- Cover different phrasings of same intent
|
||||
- Explain reasoning in commentary
|
||||
- Name 2-4 trigger scenarios in the prose summary
|
||||
- Cover both proactive (assistant invokes itself) and reactive (user requests) triggering
|
||||
- Cover different phrasings of the same intent
|
||||
- Be specific about when NOT to use the agent
|
||||
- Put detailed scenarios in the body under "When to invoke" as a bullet list of prose descriptions
|
||||
|
||||
### model (required)
|
||||
|
||||
@@ -231,14 +211,14 @@ Requirements:
|
||||
- Specific methodologies
|
||||
- Edge case handling
|
||||
- Output format
|
||||
- A "When to invoke" section listing 2-4 trigger scenarios as prose bullets
|
||||
4. Create identifier (lowercase, hyphens, 3-50 chars)
|
||||
5. Write description with triggering conditions
|
||||
6. Include 2-3 <example> blocks showing when to use
|
||||
5. Write description with triggering conditions and a short prose summary of trigger scenarios
|
||||
|
||||
Return JSON with:
|
||||
{
|
||||
"identifier": "agent-name",
|
||||
"whenToUse": "Use this agent when... Examples: <example>...</example>",
|
||||
"whenToUse": "Use this agent when... Typical triggers include [...]. See \"When to invoke\" in the agent body.",
|
||||
"systemPrompt": "You are..."
|
||||
}
|
||||
```
|
||||
@@ -332,13 +312,18 @@ Ensure system prompt is complete:
|
||||
```markdown
|
||||
---
|
||||
name: simple-agent
|
||||
description: Use this agent when... Examples: <example>...</example>
|
||||
description: Use this agent when [condition]. Typical triggers include [trigger 1] and [trigger 2]. See "When to invoke" in the agent body.
|
||||
model: inherit
|
||||
color: blue
|
||||
---
|
||||
|
||||
You are an agent that [does X].
|
||||
|
||||
## When to invoke
|
||||
|
||||
- **[Scenario A].** [Description.]
|
||||
- **[Scenario B].** [Description.]
|
||||
|
||||
Process:
|
||||
1. [Step 1]
|
||||
2. [Step 2]
|
||||
@@ -351,7 +336,7 @@ Output: [What to provide]
|
||||
| Field | Required | Format | Example |
|
||||
|-------|----------|--------|---------|
|
||||
| name | Yes | lowercase-hyphens | code-reviewer |
|
||||
| description | Yes | Text + examples | Use when... <example>... |
|
||||
| description | Yes | Prose triggers | Use when... Typical triggers include... |
|
||||
| model | Yes | inherit/sonnet/opus/haiku | inherit |
|
||||
| color | Yes | Color name | blue |
|
||||
| tools | No | Array of tool names | ["Read", "Grep"] |
|
||||
@@ -359,7 +344,8 @@ Output: [What to provide]
|
||||
### Best Practices
|
||||
|
||||
**DO:**
|
||||
- ✅ Include 2-4 concrete examples in description
|
||||
- ✅ Name 2-4 trigger scenarios in the description (as prose)
|
||||
- ✅ Put detailed worked scenarios in a "When to invoke" body section, as prose bullets
|
||||
- ✅ Write specific triggering conditions
|
||||
- ✅ Use `inherit` for model unless specific need
|
||||
- ✅ Choose appropriate tools (least privilege)
|
||||
@@ -367,7 +353,7 @@ Output: [What to provide]
|
||||
- ✅ Test agent triggering thoroughly
|
||||
|
||||
**DON'T:**
|
||||
- ❌ Use generic descriptions without examples
|
||||
- ❌ Use generic descriptions without trigger scenarios
|
||||
- ❌ Omit triggering conditions
|
||||
- ❌ Give all agents same color
|
||||
- ❌ Grant unnecessary tool access
|
||||
@@ -407,7 +393,7 @@ To create an agent for a plugin:
|
||||
3. Create `agents/agent-name.md` file
|
||||
4. Write frontmatter with all required fields
|
||||
5. Write system prompt following best practices
|
||||
6. Include 2-4 triggering examples in description
|
||||
6. Name 2-4 trigger scenarios in description (prose) and detail them in a "When to invoke" body section
|
||||
7. Validate with `scripts/validate-agent.sh`
|
||||
8. Test triggering with real scenarios
|
||||
9. Document agent in plugin README
|
||||
|
||||
@@ -31,11 +31,13 @@ Claude will return:
|
||||
```json
|
||||
{
|
||||
"identifier": "agent-name",
|
||||
"whenToUse": "Use this agent when... Examples: <example>...</example>",
|
||||
"systemPrompt": "You are... **Your Core Responsibilities:**..."
|
||||
"whenToUse": "Use this agent when... Typical triggers include [scenario 1], [scenario 2], and [scenario 3]. See \"When to invoke\" in the agent body for worked scenarios.",
|
||||
"systemPrompt": "You are...\n\n## When to invoke\n\n- **[Scenario A].** [Description]\n- **[Scenario B].** [Description]\n\n**Your Core Responsibilities:**..."
|
||||
}
|
||||
```
|
||||
|
||||
`whenToUse` is flat prose. `systemPrompt` includes a "When to invoke" section with prose bullets.
|
||||
|
||||
### Step 4: Convert to Agent File
|
||||
|
||||
Create `agents/[identifier].md`:
|
||||
@@ -63,8 +65,8 @@ I need an agent that reviews code changes for quality issues, security vulnerabi
|
||||
```json
|
||||
{
|
||||
"identifier": "code-quality-reviewer",
|
||||
"whenToUse": "Use this agent when the user has written code and needs quality review, or explicitly asks to review code changes. Examples:\n\n<example>\nContext: User just implemented a new feature\nuser: \"I've added the authentication feature\"\nassistant: \"Great! Let me review the code quality.\"\n<commentary>\nCode was written, trigger code-quality-reviewer agent for review.\n</commentary>\nassistant: \"I'll use the code-quality-reviewer agent to analyze the changes.\"\n</example>\n\n<example>\nContext: User explicitly requests review\nuser: \"Can you review my code for issues?\"\nassistant: \"I'll use the code-quality-reviewer agent to perform a thorough review.\"\n<commentary>\nExplicit review request triggers the agent.\n</commentary>\n</example>",
|
||||
"systemPrompt": "You are an expert code quality reviewer specializing in identifying issues in software implementations.\n\n**Your Core Responsibilities:**\n1. Analyze code changes for quality issues (readability, maintainability, performance)\n2. Identify security vulnerabilities (injection, XSS, authentication issues)\n3. Check adherence to project best practices and coding standards\n4. Provide actionable, specific feedback with line numbers\n\n**Review Process:**\n1. Read the code changes using available tools\n2. Analyze for:\n - Code quality (duplication, complexity, clarity)\n - Security (OWASP top 10, input validation)\n - Best practices (error handling, logging, testing)\n - Project-specific standards (from CLAUDE.md)\n3. Identify issues with severity (critical/major/minor)\n4. Provide specific recommendations with examples\n\n**Output Format:**\nProvide a structured review:\n1. Summary (2-3 sentences)\n2. Critical Issues (must fix)\n3. Major Issues (should fix)\n4. Minor Issues (nice to fix)\n5. Positive observations\n6. Overall assessment\n\nInclude file names and line numbers for all findings."
|
||||
"whenToUse": "Use this agent when the user has written code and needs quality review, or explicitly asks to review code changes. Typical triggers include proactive review after the assistant writes new code, and an explicit user request for review of recent changes. See \"When to invoke\" in the agent body for worked scenarios.",
|
||||
"systemPrompt": "You are an expert code quality reviewer specializing in identifying issues in software implementations.\n\n## When to invoke\n\n- **Proactive review after new code.** The assistant has just written or modified code (e.g. an authentication feature). Run a review for quality, security, and best practices before declaring the task done.\n- **Explicit review request.** The user asks for the recent changes to be reviewed for issues. Run a thorough review and report findings.\n\n**Your Core Responsibilities:**\n1. Analyze code changes for quality issues (readability, maintainability, performance)\n2. Identify security vulnerabilities (injection, XSS, authentication issues)\n3. Check adherence to project best practices and coding standards\n4. Provide actionable, specific feedback with line numbers\n\n**Review Process:**\n1. Read the code changes using available tools\n2. Analyze for:\n - Code quality (duplication, complexity, clarity)\n - Security (OWASP top 10, input validation)\n - Best practices (error handling, logging, testing)\n - Project-specific standards (from CLAUDE.md)\n3. Identify issues with severity (critical/major/minor)\n4. Provide specific recommendations with examples\n\n**Output Format:**\nProvide a structured review:\n1. Summary (2-3 sentences)\n2. Critical Issues (must fix)\n3. Major Issues (should fix)\n4. Minor Issues (nice to fix)\n5. Positive observations\n6. Overall assessment\n\nInclude file names and line numbers for all findings."
|
||||
}
|
||||
```
|
||||
|
||||
@@ -75,27 +77,7 @@ File: `agents/code-quality-reviewer.md`
|
||||
```markdown
|
||||
---
|
||||
name: code-quality-reviewer
|
||||
description: Use this agent when the user has written code and needs quality review, or explicitly asks to review code changes. Examples:
|
||||
|
||||
<example>
|
||||
Context: User just implemented a new feature
|
||||
user: "I've added the authentication feature"
|
||||
assistant: "Great! Let me review the code quality."
|
||||
<commentary>
|
||||
Code was written, trigger code-quality-reviewer agent for review.
|
||||
</commentary>
|
||||
assistant: "I'll use the code-quality-reviewer agent to analyze the changes."
|
||||
</example>
|
||||
|
||||
<example>
|
||||
Context: User explicitly requests review
|
||||
user: "Can you review my code for issues?"
|
||||
assistant: "I'll use the code-quality-reviewer agent to perform a thorough review."
|
||||
<commentary>
|
||||
Explicit review request triggers the agent.
|
||||
</commentary>
|
||||
</example>
|
||||
|
||||
description: Use this agent when the user has written code and needs quality review, or explicitly asks to review code changes. Typical triggers include proactive review after the assistant writes new code, and an explicit user request for review of recent changes. See "When to invoke" in the agent body for worked scenarios.
|
||||
model: inherit
|
||||
color: blue
|
||||
tools: ["Read", "Grep", "Glob"]
|
||||
@@ -103,6 +85,11 @@ tools: ["Read", "Grep", "Glob"]
|
||||
|
||||
You are an expert code quality reviewer specializing in identifying issues in software implementations.
|
||||
|
||||
## When to invoke
|
||||
|
||||
- **Proactive review after new code.** The assistant has just written or modified code (e.g. an authentication feature). Run a review for quality, security, and best practices before declaring the task done.
|
||||
- **Explicit review request.** The user asks for the recent changes to be reviewed for issues. Run a thorough review and report findings.
|
||||
|
||||
**Your Core Responsibilities:**
|
||||
1. Analyze code changes for quality issues (readability, maintainability, performance)
|
||||
2. Identify security vulnerabilities (injection, XSS, authentication issues)
|
||||
@@ -142,8 +129,8 @@ Create an agent that generates unit tests for code. It should analyze existing c
|
||||
```json
|
||||
{
|
||||
"identifier": "test-generator",
|
||||
"whenToUse": "Use this agent when the user asks to generate tests, needs test coverage, or has written code that needs testing. Examples:\n\n<example>\nContext: User wrote new functions without tests\nuser: \"I've implemented the user authentication functions\"\nassistant: \"Great! Let me generate tests for these functions.\"\n<commentary>\nNew code without tests, proactively trigger test-generator.\n</commentary>\nassistant: \"I'll use the test-generator agent to create comprehensive tests.\"\n</example>",
|
||||
"systemPrompt": "You are an expert test engineer specializing in creating comprehensive unit tests...\n\n**Your Core Responsibilities:**\n1. Analyze code to understand behavior\n2. Generate test cases covering happy paths and edge cases\n3. Follow project testing conventions\n4. Ensure high code coverage\n\n**Test Generation Process:**\n1. Read target code\n2. Identify testable units (functions, classes, methods)\n3. Design test cases (inputs, expected outputs, edge cases)\n4. Generate tests following project patterns\n5. Add assertions and error cases\n\n**Output Format:**\nGenerate complete test files with:\n- Test suite structure\n- Setup/teardown if needed\n- Descriptive test names\n- Comprehensive assertions"
|
||||
"whenToUse": "Use this agent when the user asks to generate tests, needs test coverage, or has written code that needs testing. Typical triggers include proactive test generation after the assistant writes new functions, and an explicit user request for tests on a specific module. See \"When to invoke\" in the agent body.",
|
||||
"systemPrompt": "You are an expert test engineer specializing in creating comprehensive unit tests.\n\n## When to invoke\n\n- **Proactive coverage after new code.** The assistant has just implemented new functions (e.g. user authentication functions) without tests. Generate a comprehensive test suite before declaring the task done.\n- **Explicit test request.** The user asks for tests on a specific surface. Generate the requested suite following project conventions.\n\n**Your Core Responsibilities:**\n1. Analyze code to understand behavior\n2. Generate test cases covering happy paths and edge cases\n3. Follow project testing conventions\n4. Ensure high code coverage\n\n**Test Generation Process:**\n1. Read target code\n2. Identify testable units (functions, classes, methods)\n3. Design test cases (inputs, expected outputs, edge cases)\n4. Generate tests following project patterns\n5. Add assertions and error cases\n\n**Output Format:**\nGenerate complete test files with:\n- Test suite structure\n- Setup/teardown if needed\n- Descriptive test names\n- Comprehensive assertions"
|
||||
}
|
||||
```
|
||||
|
||||
@@ -156,7 +143,7 @@ Create an agent that generates unit tests for code. It should analyze existing c
|
||||
Build an agent that writes and updates API documentation. It should analyze code and generate clear, comprehensive docs.
|
||||
```
|
||||
|
||||
**Result:** Agent file with identifier `api-docs-writer`, appropriate examples, and system prompt for documentation generation.
|
||||
**Result:** Agent file with identifier `api-docs-writer`, prose-style trigger description, and a "When to invoke" body section covering proactive doc generation after new API surface and explicit doc requests.
|
||||
|
||||
## Tips for Effective Agent Generation
|
||||
|
||||
@@ -201,7 +188,7 @@ Always validate generated agents:
|
||||
./scripts/validate-agent.sh agents/your-agent.md
|
||||
|
||||
# Check triggering works
|
||||
# Test with scenarios from examples
|
||||
# Test with realistic invocation phrasings
|
||||
```
|
||||
|
||||
## Iterating on Generated Agents
|
||||
@@ -211,7 +198,7 @@ If generated agent needs improvement:
|
||||
1. Identify what's missing or wrong
|
||||
2. Manually edit the agent file
|
||||
3. Focus on:
|
||||
- Better examples in description
|
||||
- Better-named trigger scenarios in `description:` and "When to invoke"
|
||||
- More specific system prompt
|
||||
- Clearer process steps
|
||||
- Better output format definition
|
||||
@@ -223,7 +210,6 @@ If generated agent needs improvement:
|
||||
- **Comprehensive**: Claude includes edge cases and quality checks
|
||||
- **Consistent**: Follows proven patterns
|
||||
- **Fast**: Seconds vs manual writing
|
||||
- **Examples**: Auto-generates triggering examples
|
||||
- **Complete**: Provides full system prompt structure
|
||||
|
||||
## When to Edit Manually
|
||||
|
||||
@@ -9,38 +9,7 @@ Full, production-ready agent examples for common use cases. Use these as templat
|
||||
```markdown
|
||||
---
|
||||
name: code-reviewer
|
||||
description: Use this agent when the user has written code and needs quality review, security analysis, or best practices validation. Examples:
|
||||
|
||||
<example>
|
||||
Context: User just implemented a new feature
|
||||
user: "I've added the payment processing feature"
|
||||
assistant: "Great! Let me review the implementation."
|
||||
<commentary>
|
||||
Code written for payment processing (security-critical). Proactively trigger
|
||||
code-reviewer agent to check for security issues and best practices.
|
||||
</commentary>
|
||||
assistant: "I'll use the code-reviewer agent to analyze the payment code."
|
||||
</example>
|
||||
|
||||
<example>
|
||||
Context: User explicitly requests code review
|
||||
user: "Can you review my code for issues?"
|
||||
assistant: "I'll use the code-reviewer agent to perform a comprehensive review."
|
||||
<commentary>
|
||||
Explicit code review request triggers the agent.
|
||||
</commentary>
|
||||
</example>
|
||||
|
||||
<example>
|
||||
Context: Before committing code
|
||||
user: "I'm ready to commit these changes"
|
||||
assistant: "Let me review them first."
|
||||
<commentary>
|
||||
Before commit, proactively review code quality.
|
||||
</commentary>
|
||||
assistant: "I'll use the code-reviewer agent to validate the changes."
|
||||
</example>
|
||||
|
||||
description: Use this agent when the user has written code and needs quality review, security analysis, or best practices validation. Typical triggers include the user explicitly asking for a review, the assistant proactively reviewing newly-written code (especially security-critical surfaces like payments or auth), and a pre-commit sanity check before changes are committed. See "When to invoke" in the agent body.
|
||||
model: inherit
|
||||
color: blue
|
||||
tools: ["Read", "Grep", "Glob"]
|
||||
@@ -48,6 +17,12 @@ tools: ["Read", "Grep", "Glob"]
|
||||
|
||||
You are an expert code quality reviewer specializing in identifying issues, security vulnerabilities, and opportunities for improvement in software implementations.
|
||||
|
||||
## When to invoke
|
||||
|
||||
- **Proactive review of security-critical code.** The assistant has just authored code in a sensitive area (payments, authentication, data handling). Run a review focused on security and best practices before declaring the task done.
|
||||
- **Explicit review request.** The user asks (in any phrasing) for the recent changes to be reviewed. Run a comprehensive review of the unstaged diff.
|
||||
- **Pre-commit validation.** The user signals readiness to commit. Run a review first to surface issues before they land.
|
||||
|
||||
**Your Core Responsibilities:**
|
||||
1. Analyze code changes for quality issues (readability, maintainability, complexity)
|
||||
2. Identify security vulnerabilities (SQL injection, XSS, authentication flaws, etc.)
|
||||
@@ -118,27 +93,7 @@ You are an expert code quality reviewer specializing in identifying issues, secu
|
||||
```markdown
|
||||
---
|
||||
name: test-generator
|
||||
description: Use this agent when the user has written code without tests, explicitly asks for test generation, or needs test coverage improvement. Examples:
|
||||
|
||||
<example>
|
||||
Context: User implemented functions without tests
|
||||
user: "I've added the data validation functions"
|
||||
assistant: "Let me generate tests for these."
|
||||
<commentary>
|
||||
New code without tests. Proactively trigger test-generator agent.
|
||||
</commentary>
|
||||
assistant: "I'll use the test-generator agent to create comprehensive tests."
|
||||
</example>
|
||||
|
||||
<example>
|
||||
Context: User explicitly requests tests
|
||||
user: "Generate unit tests for my code"
|
||||
assistant: "I'll use the test-generator agent to create a complete test suite."
|
||||
<commentary>
|
||||
Direct test generation request triggers the agent.
|
||||
</commentary>
|
||||
</example>
|
||||
|
||||
description: Use this agent when the user has written code without tests, explicitly asks for test generation, or needs test coverage improvement. Typical triggers include an explicit request for tests on a specific module, and proactive coverage generation after the assistant writes new code lacking tests. See "When to invoke" in the agent body.
|
||||
model: inherit
|
||||
color: green
|
||||
tools: ["Read", "Write", "Grep", "Bash"]
|
||||
@@ -146,6 +101,11 @@ tools: ["Read", "Write", "Grep", "Bash"]
|
||||
|
||||
You are an expert test engineer specializing in creating comprehensive, maintainable unit tests that ensure code correctness and reliability.
|
||||
|
||||
## When to invoke
|
||||
|
||||
- **Proactive coverage after new code.** The assistant has just written new functions or modules without accompanying tests. Generate a test suite before declaring the task done.
|
||||
- **Explicit test request.** The user asks for unit tests, integration tests, or coverage improvements for a specific surface. Generate the requested suite.
|
||||
|
||||
**Your Core Responsibilities:**
|
||||
1. Generate high-quality unit tests with excellent coverage
|
||||
2. Follow project testing conventions and patterns
|
||||
@@ -215,27 +175,7 @@ describe('[module name]', () => {
|
||||
```markdown
|
||||
---
|
||||
name: docs-generator
|
||||
description: Use this agent when the user has written code needing documentation, API endpoints requiring docs, or explicitly requests documentation generation. Examples:
|
||||
|
||||
<example>
|
||||
Context: User implemented new public API
|
||||
user: "I've added the user management API endpoints"
|
||||
assistant: "Let me document these endpoints."
|
||||
<commentary>
|
||||
New public API needs documentation. Proactively trigger docs-generator.
|
||||
</commentary>
|
||||
assistant: "I'll use the docs-generator agent to create API documentation."
|
||||
</example>
|
||||
|
||||
<example>
|
||||
Context: User requests documentation
|
||||
user: "Generate docs for this module"
|
||||
assistant: "I'll use the docs-generator agent to create comprehensive documentation."
|
||||
<commentary>
|
||||
Explicit documentation request triggers the agent.
|
||||
</commentary>
|
||||
</example>
|
||||
|
||||
description: Use this agent when the user has written code needing documentation, API endpoints requiring docs, or explicitly requests documentation generation. Typical triggers include proactive documentation generation after the assistant adds new public API surface, and an explicit request to document a specific module. See "When to invoke" in the agent body.
|
||||
model: inherit
|
||||
color: cyan
|
||||
tools: ["Read", "Write", "Grep", "Glob"]
|
||||
@@ -243,6 +183,11 @@ tools: ["Read", "Write", "Grep", "Glob"]
|
||||
|
||||
You are an expert technical writer specializing in creating clear, comprehensive documentation for software projects.
|
||||
|
||||
## When to invoke
|
||||
|
||||
- **Proactive docs for new API surface.** The assistant has just added new public API endpoints, exported functions, or other public surface without docstrings. Generate documentation before declaring the task done.
|
||||
- **Explicit doc request.** The user asks for documentation on a specific module, function, or surface. Generate comprehensive docs in the project's standard format.
|
||||
|
||||
**Your Core Responsibilities:**
|
||||
1. Generate accurate, clear documentation from code
|
||||
2. Follow project documentation standards
|
||||
@@ -300,27 +245,7 @@ Create documentation in project's standard format:
|
||||
```markdown
|
||||
---
|
||||
name: security-analyzer
|
||||
description: Use this agent when the user implements security-critical code (auth, payments, data handling), explicitly requests security analysis, or before deploying sensitive changes. Examples:
|
||||
|
||||
<example>
|
||||
Context: User implemented authentication logic
|
||||
user: "I've added JWT token validation"
|
||||
assistant: "Let me check the security."
|
||||
<commentary>
|
||||
Authentication code is security-critical. Proactively trigger security-analyzer.
|
||||
</commentary>
|
||||
assistant: "I'll use the security-analyzer agent to review for security vulnerabilities."
|
||||
</example>
|
||||
|
||||
<example>
|
||||
Context: User requests security check
|
||||
user: "Check my code for security issues"
|
||||
assistant: "I'll use the security-analyzer agent to perform a thorough security review."
|
||||
<commentary>
|
||||
Explicit security review request triggers the agent.
|
||||
</commentary>
|
||||
</example>
|
||||
|
||||
description: Use this agent when the user implements security-critical code (auth, payments, data handling), explicitly requests security analysis, or before deploying sensitive changes. Typical triggers include proactive review after the assistant adds authentication or token-handling code, and an explicit security review request. See "When to invoke" in the agent body.
|
||||
model: inherit
|
||||
color: red
|
||||
tools: ["Read", "Grep", "Glob"]
|
||||
@@ -328,6 +253,11 @@ tools: ["Read", "Grep", "Glob"]
|
||||
|
||||
You are an expert security analyst specializing in identifying vulnerabilities and security issues in software implementations.
|
||||
|
||||
## When to invoke
|
||||
|
||||
- **Proactive review of security-critical code.** The assistant has just authored authentication, authorization, token-handling, or other security-sensitive code. Run a security review before declaring the task done.
|
||||
- **Explicit security analysis request.** The user asks for a security check on recent code or a specific surface. Run a thorough analysis and report vulnerabilities.
|
||||
|
||||
**Your Core Responsibilities:**
|
||||
1. Identify security vulnerabilities (OWASP Top 10 and beyond)
|
||||
2. Analyze authentication and authorization logic
|
||||
@@ -419,7 +349,7 @@ Choose colors that match agent purpose:
|
||||
1. Copy template that matches your use case
|
||||
2. Replace placeholders with your specifics
|
||||
3. Customize process steps for your domain
|
||||
4. Adjust examples to your triggering scenarios
|
||||
4. Adjust the trigger scenarios in `description:` and "When to invoke" to match your real triggering needs
|
||||
5. Validate with `scripts/validate-agent.sh`
|
||||
6. Test triggering with real scenarios
|
||||
7. Iterate based on agent performance
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
# Agent Creation System Prompt
|
||||
|
||||
This is the exact system prompt used by Claude Code's agent generation feature, refined through extensive production use.
|
||||
This is the system prompt to drive AI-assisted agent generation. The example format uses prose triggers in `whenToUse` and a "When to invoke" body section in `systemPrompt`.
|
||||
|
||||
## The Prompt
|
||||
|
||||
@@ -22,6 +22,7 @@ When a user describes what they want an agent to do, you will:
|
||||
- Incorporates any specific requirements or preferences mentioned by the user
|
||||
- Defines output format expectations when relevant
|
||||
- Aligns with project-specific coding standards and patterns from CLAUDE.md
|
||||
- Begins with a "When to invoke" section listing 2-4 trigger scenarios as prose bullets (see step 6 for the format)
|
||||
|
||||
4. **Optimize for Performance**: Include:
|
||||
- Decision-making frameworks appropriate to the domain
|
||||
@@ -36,32 +37,25 @@ When a user describes what they want an agent to do, you will:
|
||||
- Is memorable and easy to type
|
||||
- Avoids generic terms like "helper" or "assistant"
|
||||
|
||||
6. **Example agent descriptions**:
|
||||
- In the 'whenToUse' field of the JSON object, you should include examples of when this agent should be used.
|
||||
- Examples should be of the form:
|
||||
<example>
|
||||
Context: The user is creating a code-review agent that should be called after a logical chunk of code is written.
|
||||
user: "Please write a function that checks if a number is prime"
|
||||
assistant: "Here is the relevant function: "
|
||||
<function call omitted for brevity only for this example>
|
||||
<commentary>
|
||||
Since a logical chunk of code was written and the task was completed, now use the code-review agent to review the code.
|
||||
</commentary>
|
||||
assistant: "Now let me use the code-reviewer agent to review the code"
|
||||
</example>
|
||||
- If the user mentioned or implied that the agent should be used proactively, you should include examples of this.
|
||||
- NOTE: Ensure that in the examples, you are making the assistant use the Agent tool and not simply respond directly to the task.
|
||||
6. **Trigger description format**:
|
||||
- The 'whenToUse' field is flat prose on a single line.
|
||||
- Format: "Use this agent when [conditions]. Typical triggers include [scenario 1], [scenario 2], and [scenario 3]. See \"When to invoke\" in the agent body for worked scenarios."
|
||||
- Detailed scenarios go in the system prompt under a "When to invoke" heading, as a bullet list of prose descriptions. Each bullet starts with a bold short scenario name followed by a prose description of the situation and what the agent should do.
|
||||
- Example bullets:
|
||||
- "**Proactive review after new code.** The assistant has just written a function in response to a user request. Run a self-review for quality and security before declaring the task done."
|
||||
- "**Explicit review request.** The user asks for the recent changes to be reviewed. Run a thorough review and report findings."
|
||||
- Cover both proactive and reactive triggers when applicable. Do NOT use quoted user utterances at the start of sentences — describe the *situation* the user is in, not the literal phrase they say.
|
||||
|
||||
Your output must be a valid JSON object with exactly these fields:
|
||||
{
|
||||
"identifier": "A unique, descriptive identifier using lowercase letters, numbers, and hyphens (e.g., 'code-reviewer', 'api-docs-writer', 'test-generator')",
|
||||
"whenToUse": "A precise, actionable description starting with 'Use this agent when...' that clearly defines the triggering conditions and use cases. Ensure you include examples as described above.",
|
||||
"systemPrompt": "The complete system prompt that will govern the agent's behavior, written in second person ('You are...', 'You will...') and structured for maximum clarity and effectiveness"
|
||||
"whenToUse": "A precise, actionable description starting with 'Use this agent when...' that clearly defines the triggering conditions and use cases. Flat prose only. End with a pointer to the 'When to invoke' section in the agent body.",
|
||||
"systemPrompt": "The complete system prompt that will govern the agent's behavior, written in second person ('You are...', 'You will...'). Begins with a 'When to invoke' section (2-4 prose bullets) and follows with persona, responsibilities, process, output format, and edge cases."
|
||||
}
|
||||
|
||||
Key principles for your system prompts:
|
||||
- Be specific rather than generic - avoid vague instructions
|
||||
- Include concrete examples when they would clarify behavior
|
||||
- Include concrete examples when they would clarify behavior (as prose)
|
||||
- Balance comprehensiveness with clarity - every instruction should add value
|
||||
- Ensure the agent has enough context to handle variations of the core task
|
||||
- Make the agent proactive in seeking clarification when needed
|
||||
@@ -74,17 +68,19 @@ Remember: The agents you create should be autonomous experts capable of handling
|
||||
|
||||
Use this prompt to generate agent configurations:
|
||||
|
||||
```markdown
|
||||
**User input:** "I need an agent that reviews pull requests for code quality issues"
|
||||
|
||||
**You send to Claude with the system prompt above:**
|
||||
```
|
||||
Create an agent configuration based on this request: "I need an agent that reviews pull requests for code quality issues"
|
||||
```
|
||||
|
||||
**Claude returns JSON:**
|
||||
**Claude returns JSON (note: prose `whenToUse`, "When to invoke" section in `systemPrompt`):**
|
||||
```json
|
||||
{
|
||||
"identifier": "pr-quality-reviewer",
|
||||
"whenToUse": "Use this agent when the user asks to review a pull request, check code quality, or analyze PR changes. Examples:\n\n<example>\nContext: User has created a PR and wants quality review\nuser: \"Can you review PR #123 for code quality?\"\nassistant: \"I'll use the pr-quality-reviewer agent to analyze the PR.\"\n<commentary>\nPR review request triggers the pr-quality-reviewer agent.\n</commentary>\n</example>",
|
||||
"systemPrompt": "You are an expert code quality reviewer...\n\n**Your Core Responsibilities:**\n1. Analyze code changes for quality issues\n2. Check adherence to best practices\n..."
|
||||
"whenToUse": "Use this agent when the user asks to review a pull request, check code quality, or analyze PR changes. Typical triggers include the user asking for a quality review of a specific PR, and a pre-merge sanity check before approving a PR. See \"When to invoke\" in the agent body for worked scenarios.",
|
||||
"systemPrompt": "You are an expert code quality reviewer...\n\n## When to invoke\n\n- **PR quality review request.** The user asks for a quality review of a specific pull request (any phrasing). Fetch the PR diff and run a thorough quality review.\n- **Pre-merge sanity check.** The user signals they're about to merge a PR. Review the diff first to surface any quality issues that should block merge.\n\n**Your Core Responsibilities:**\n1. Analyze code changes for quality issues\n2. Check adherence to best practices\n..."
|
||||
}
|
||||
```
|
||||
|
||||
@@ -96,23 +92,18 @@ Take the JSON output and create the agent markdown file:
|
||||
```markdown
|
||||
---
|
||||
name: pr-quality-reviewer
|
||||
description: Use this agent when the user asks to review a pull request, check code quality, or analyze PR changes. Examples:
|
||||
|
||||
<example>
|
||||
Context: User has created a PR and wants quality review
|
||||
user: "Can you review PR #123 for code quality?"
|
||||
assistant: "I'll use the pr-quality-reviewer agent to analyze the PR."
|
||||
<commentary>
|
||||
PR review request triggers the pr-quality-reviewer agent.
|
||||
</commentary>
|
||||
</example>
|
||||
|
||||
description: Use this agent when the user asks to review a pull request, check code quality, or analyze PR changes. Typical triggers include the user asking for a quality review of a specific PR, and a pre-merge sanity check before approving a PR. See "When to invoke" in the agent body for worked scenarios.
|
||||
model: inherit
|
||||
color: blue
|
||||
---
|
||||
|
||||
You are an expert code quality reviewer...
|
||||
|
||||
## When to invoke
|
||||
|
||||
- **PR quality review request.** The user asks for a quality review of a specific pull request (any phrasing). Fetch the PR diff and run a thorough quality review.
|
||||
- **Pre-merge sanity check.** The user signals they're about to merge a PR. Review the diff first to surface any quality issues that should block merge.
|
||||
|
||||
**Your Core Responsibilities:**
|
||||
1. Analyze code changes for quality issues
|
||||
2. Check adherence to best practices
|
||||
@@ -123,7 +114,7 @@ You are an expert code quality reviewer...
|
||||
|
||||
### Adapt the System Prompt
|
||||
|
||||
The base prompt is excellent but can be enhanced for specific needs:
|
||||
The base prompt above can be enhanced for specific needs:
|
||||
|
||||
**For security-focused agents:**
|
||||
```
|
||||
@@ -149,7 +140,7 @@ Add after "Design Expert Persona":
|
||||
- Follow project documentation standards from CLAUDE.md
|
||||
```
|
||||
|
||||
## Best Practices from Internal Implementation
|
||||
## Best Practices
|
||||
|
||||
### 1. Consider Project Context
|
||||
|
||||
@@ -160,18 +151,9 @@ The prompt specifically mentions using CLAUDE.md context:
|
||||
|
||||
### 2. Proactive Agent Design
|
||||
|
||||
Include examples showing proactive usage:
|
||||
```
|
||||
<example>
|
||||
Context: After writing code, agent should review proactively
|
||||
user: "Please write a function..."
|
||||
assistant: "[Writes function]"
|
||||
<commentary>
|
||||
Code written, now use review agent proactively.
|
||||
</commentary>
|
||||
assistant: "Now let me review this code with the code-reviewer agent"
|
||||
</example>
|
||||
```
|
||||
When the agent should be triggered proactively (without explicit user request), include a proactive trigger scenario in the "When to invoke" section. Describe the situation in prose:
|
||||
|
||||
> - **Proactive review after new code.** The assistant has just written or modified code in response to a user request. Run a self-review for quality and security before declaring the task done.
|
||||
|
||||
### 3. Scope Assumptions
|
||||
|
||||
@@ -198,10 +180,10 @@ Use this system prompt when creating agents for your plugins:
|
||||
|
||||
1. Take user request for agent functionality
|
||||
2. Feed to Claude with this system prompt
|
||||
3. Get JSON output (identifier, whenToUse, systemPrompt)
|
||||
3. Get JSON output (`identifier`, `whenToUse`, `systemPrompt`)
|
||||
4. Convert to agent markdown file with frontmatter
|
||||
5. Validate with agent validation rules
|
||||
5. Validate the file with agent validation rules
|
||||
6. Test triggering conditions
|
||||
7. Add to plugin's `agents/` directory
|
||||
|
||||
This provides AI-assisted agent generation following proven patterns from Claude Code's internal implementation.
|
||||
This provides AI-assisted agent generation.
|
||||
|
||||
@@ -1,491 +1,217 @@
|
||||
# Agent Triggering Examples: Best Practices
|
||||
# Agent Triggering: Best Practices
|
||||
|
||||
Complete guide to writing effective `<example>` blocks in agent descriptions for reliable triggering.
|
||||
Complete guide to writing trigger descriptions that cause an agent to be dispatched reliably.
|
||||
|
||||
## Example Block Format
|
||||
## Where trigger descriptions live
|
||||
|
||||
The standard format for triggering examples:
|
||||
An agent file has two places that talk about triggering:
|
||||
|
||||
1. **`description:` field in YAML frontmatter.** Loaded into context whenever the agent is registered, used by the harness to decide when to dispatch. Keep it flat prose.
|
||||
2. **A "When to invoke" section in the agent body.** Loaded only when the agent is actually invoked. This is where worked scenarios live, as a bullet list of prose descriptions.
|
||||
|
||||
## Format
|
||||
|
||||
### `description:` field
|
||||
|
||||
```
|
||||
description: Use this agent when [conditions]. Typical triggers include [scenario 1 phrased as a prose noun phrase], [scenario 2], and [scenario 3]. See "When to invoke" in the agent body for worked scenarios.
|
||||
```
|
||||
|
||||
Rules:
|
||||
- Single line of flat prose within the YAML scalar.
|
||||
- Name 2-4 trigger scenarios as noun phrases.
|
||||
- End with the pointer to the body's "When to invoke" section.
|
||||
|
||||
### "When to invoke" body section
|
||||
|
||||
```markdown
|
||||
<example>
|
||||
Context: [Describe the situation - what led to this interaction]
|
||||
user: "[Exact user message or request]"
|
||||
assistant: "[How Claude should respond before triggering]"
|
||||
<commentary>
|
||||
[Explanation of why this agent should be triggered in this scenario]
|
||||
</commentary>
|
||||
assistant: "[How Claude triggers the agent - usually 'I'll use the [agent-name] agent...']"
|
||||
</example>
|
||||
## When to invoke
|
||||
|
||||
[Two to four representative scenarios as prose bullets. Each describes the situation
|
||||
in third person and what the agent should do.]
|
||||
|
||||
- **[Short scenario name].** [What the situation looks like — what just happened or what
|
||||
the user is asking for — and what the agent should do in response.]
|
||||
- **[Short scenario name].** [Same.]
|
||||
```
|
||||
|
||||
## Anatomy of a Good Example
|
||||
## Anatomy of a good scenario
|
||||
|
||||
### Context
|
||||
### Scenario name (the bold lead)
|
||||
|
||||
**Purpose:** Set the scene - what happened before the user's message
|
||||
**Purpose:** A short noun phrase identifying the situation type.
|
||||
|
||||
**Good contexts:**
|
||||
```
|
||||
Context: User just implemented a new authentication feature
|
||||
Context: User has created a PR and wants it reviewed
|
||||
Context: User is debugging a test failure
|
||||
Context: After writing several functions without documentation
|
||||
**Good names:**
|
||||
- *User-requested review after a feature lands.*
|
||||
- *Proactive review of newly-written code.*
|
||||
- *Pre-PR sanity check.*
|
||||
- *PR updated with new logic.*
|
||||
|
||||
**Bad names:**
|
||||
- *Normal usage.* (not specific)
|
||||
- *User needs help.* (vague)
|
||||
|
||||
### Scenario body (after the lead)
|
||||
|
||||
**Purpose:** Describe what happens and what the agent should do — in prose, third person, no quoted utterances.
|
||||
|
||||
**Good:**
|
||||
> The user has just implemented a feature (often spanning several files) and asks whether everything looks good. Run a review of the recent diff and report findings.
|
||||
|
||||
**Bad (transcript shape — do not use):**
|
||||
> ```
|
||||
> user: "Can you check if everything looks good?"
|
||||
> assistant: "I'll use the reviewer agent..."
|
||||
> ```
|
||||
|
||||
The bad version mixes a turn-marker shape into the agent file. Keep scenarios as situation descriptions in prose.
|
||||
|
||||
## Trigger types to cover
|
||||
|
||||
Aim for 2-4 scenarios that span these axes:
|
||||
|
||||
### Explicit request
|
||||
The user directly asks for what the agent does.
|
||||
- *User-requested security check.* The user explicitly asks for a security review of recent code.
|
||||
|
||||
### Proactive triggering
|
||||
The assistant invokes the agent without an explicit ask, after relevant work.
|
||||
- *Proactive review after writing database code.* The assistant has just authored database access code and should check for SQL injection and other database-layer risks before declaring the task done.
|
||||
|
||||
### Implicit request
|
||||
The user implies need without naming the agent.
|
||||
- *Code-clarity complaint.* The user describes existing code as confusing or hard to follow. Treat as a request to refactor for readability.
|
||||
|
||||
### Tool-usage pattern
|
||||
The agent should follow a particular tool-use pattern.
|
||||
- *Post-test-edit verification.* The assistant has just made multiple edits to test files. Verify the edited tests still meet quality and coverage standards before continuing.
|
||||
|
||||
## Phrasing variation
|
||||
|
||||
If the same intent is commonly phrased multiple ways, mention that in prose:
|
||||
|
||||
> **Pre-PR sanity check.** The user signals (in any phrasing — "ready to open a PR", "I think we're done here", "let's ship this") that they're about to open a pull request.
|
||||
|
||||
Don't write three near-duplicate scenarios that differ only in the literal phrase — collapse them into one prose scenario that names the variation.
|
||||
|
||||
## How many scenarios?
|
||||
|
||||
- **Minimum: 2.** Usually one explicit + one proactive.
|
||||
- **Recommended: 3-4.** Explicit, proactive, and one implicit or edge case.
|
||||
- **Maximum: 5.** More than that bloats the body without adding routing signal.
|
||||
|
||||
## Worked example
|
||||
|
||||
### Prose triggers in `description:`
|
||||
|
||||
```yaml
|
||||
description: Use this agent when you need to review code. Typical triggers include user-requested review after a feature lands, proactive review of freshly-written code, and a pre-PR sanity check. See "When to invoke" in the agent body for worked scenarios.
|
||||
```
|
||||
|
||||
**Bad contexts:**
|
||||
```
|
||||
Context: User needs help (too vague)
|
||||
Context: Normal usage (not specific)
|
||||
```
|
||||
|
||||
### User Message
|
||||
|
||||
**Purpose:** Show the exact phrasing that should trigger the agent
|
||||
|
||||
**Good user messages:**
|
||||
```
|
||||
user: "I've added the OAuth flow, can you check it?"
|
||||
user: "Review PR #123"
|
||||
user: "Why is this test failing?"
|
||||
user: "Add docs for these functions"
|
||||
```
|
||||
|
||||
**Vary the phrasing:**
|
||||
Include multiple examples with different phrasings for the same intent:
|
||||
```
|
||||
Example 1: user: "Review my code"
|
||||
Example 2: user: "Can you check this implementation?"
|
||||
Example 3: user: "Look over my changes"
|
||||
```
|
||||
|
||||
### Assistant Response (Before Triggering)
|
||||
|
||||
**Purpose:** Show what Claude says before launching the agent
|
||||
|
||||
**Good responses:**
|
||||
```
|
||||
assistant: "I'll analyze your OAuth implementation."
|
||||
assistant: "Let me review that PR for you."
|
||||
assistant: "I'll investigate the test failure."
|
||||
```
|
||||
|
||||
**Proactive example:**
|
||||
```
|
||||
assistant: "Great! Now let me review the code quality."
|
||||
<commentary>
|
||||
Code was just written, proactively trigger review agent.
|
||||
</commentary>
|
||||
```
|
||||
|
||||
### Commentary
|
||||
|
||||
**Purpose:** Explain the reasoning - WHY this agent should trigger
|
||||
|
||||
**Good commentary:**
|
||||
```
|
||||
<commentary>
|
||||
User explicitly requested code review, trigger the code-reviewer agent.
|
||||
</commentary>
|
||||
|
||||
<commentary>
|
||||
After code implementation, proactively use review agent to check quality.
|
||||
</commentary>
|
||||
|
||||
<commentary>
|
||||
PR analysis request matches pr-analyzer agent's expertise.
|
||||
</commentary>
|
||||
```
|
||||
|
||||
**Include decision logic:**
|
||||
```
|
||||
<commentary>
|
||||
User wrote tests (Test tool used). The test-analyzer agent should check
|
||||
test quality and coverage before continuing.
|
||||
</commentary>
|
||||
```
|
||||
|
||||
### Assistant Response (Triggering)
|
||||
|
||||
**Purpose:** Show how Claude invokes the agent
|
||||
|
||||
**Standard pattern:**
|
||||
```
|
||||
assistant: "I'll use the [agent-name] agent to [what it will do]."
|
||||
```
|
||||
|
||||
**Examples:**
|
||||
```
|
||||
assistant: "I'll use the code-reviewer agent to analyze the changes."
|
||||
assistant: "Let me use the test-generator agent to create comprehensive tests."
|
||||
assistant: "I'll use the security-analyzer agent to check for vulnerabilities."
|
||||
```
|
||||
|
||||
## Example Types
|
||||
|
||||
### Type 1: Explicit Request
|
||||
|
||||
User directly asks for what the agent does:
|
||||
### Scenarios as situation descriptions in the body
|
||||
|
||||
```markdown
|
||||
<example>
|
||||
Context: User explicitly requests security analysis
|
||||
user: "Check my code for security vulnerabilities"
|
||||
assistant: "I'll use the security-analyzer agent to perform a thorough security review."
|
||||
<commentary>
|
||||
Direct security analysis request triggers the security-analyzer agent.
|
||||
</commentary>
|
||||
</example>
|
||||
## When to invoke
|
||||
|
||||
- **User-requested review.** The user asks for a review of recent changes (any phrasing). Run a review of the unstaged diff.
|
||||
```
|
||||
|
||||
### Type 2: Proactive Triggering
|
||||
|
||||
Agent triggers after relevant work without explicit request:
|
||||
### Trigger condition only — output format goes elsewhere
|
||||
|
||||
```markdown
|
||||
<example>
|
||||
Context: User just implemented database queries
|
||||
user: "I've added the user search functionality"
|
||||
assistant: "Great! Let me review it for security."
|
||||
<commentary>
|
||||
Database code written. Proactively trigger security-analyzer to check
|
||||
for SQL injection and other database security issues.
|
||||
</commentary>
|
||||
assistant: "I'll use the security-analyzer agent to check the database code."
|
||||
</example>
|
||||
- **Review.** The user asks for a review. Run the review and report findings as specified in the Output Format section.
|
||||
```
|
||||
|
||||
### Type 3: Implicit Request
|
||||
## Template library
|
||||
|
||||
User implies need without stating it directly:
|
||||
### Code review agent
|
||||
|
||||
```yaml
|
||||
description: Use this agent when you need to review code for adherence to project guidelines and best practices. Typical triggers include the user asking for a review of a feature they just implemented, proactive review of newly-written code before declaring a task done, and a pre-PR sanity check. See "When to invoke" in the agent body.
|
||||
```
|
||||
|
||||
```markdown
|
||||
<example>
|
||||
Context: User mentions code is hard to understand
|
||||
user: "This code is confusing and hard to follow"
|
||||
assistant: "I can help improve the code clarity."
|
||||
<commentary>
|
||||
User indicates complexity issues. Trigger code-simplifier agent to refactor
|
||||
for better readability.
|
||||
</commentary>
|
||||
assistant: "I'll use the code-simplifier agent to suggest improvements."
|
||||
</example>
|
||||
## When to invoke
|
||||
|
||||
- **User-requested review after a feature lands.** The user has implemented a feature and asks whether the result looks good. Review the recent diff and report findings.
|
||||
- **Proactive review of newly-written code.** The assistant has just authored new code in response to a user request. Run a self-review before declaring the task done.
|
||||
- **Pre-PR sanity check.** The user signals readiness to open a pull request. Review the full diff first.
|
||||
```
|
||||
|
||||
### Type 4: Tool Usage Pattern
|
||||
### Test generation agent
|
||||
|
||||
Agent triggers based on prior tool usage:
|
||||
```yaml
|
||||
description: Use this agent when you need to generate tests for code that lacks them. Typical triggers include the user explicitly asking for tests for a function or module, and the assistant proactively generating tests after writing new code that has no test coverage. See "When to invoke" in the agent body.
|
||||
```
|
||||
|
||||
```markdown
|
||||
<example>
|
||||
Context: User made multiple edits to test files
|
||||
user: "I've updated all the tests"
|
||||
assistant: "Great! Let me verify test quality."
|
||||
<commentary>
|
||||
Multiple Edit tools used on test files. Proactively trigger test-quality-analyzer
|
||||
to ensure tests follow best practices.
|
||||
</commentary>
|
||||
assistant: "I'll use the test-quality-analyzer agent to review the tests."
|
||||
</example>
|
||||
## When to invoke
|
||||
|
||||
- **Explicit test request.** The user asks for tests covering a specific function, module, or feature. Generate a comprehensive test suite.
|
||||
- **Proactive coverage after new code.** The assistant has just written new code with no accompanying tests. Generate tests before declaring the task done.
|
||||
```
|
||||
|
||||
## Multiple Examples Strategy
|
||||
### Documentation agent
|
||||
|
||||
### Cover Different Phrasings
|
||||
```yaml
|
||||
description: Use this agent when you need to write or improve documentation for code, especially APIs. Typical triggers include the user asking for docs on a specific function or endpoint, and proactive documentation generation after the assistant adds new API surface. See "When to invoke" in the agent body.
|
||||
```
|
||||
|
||||
```markdown
|
||||
<example>
|
||||
user: "Review my code"
|
||||
[...]
|
||||
</example>
|
||||
## When to invoke
|
||||
|
||||
<example>
|
||||
user: "Can you check my implementation?"
|
||||
[...]
|
||||
</example>
|
||||
|
||||
<example>
|
||||
user: "Look over these changes"
|
||||
[...]
|
||||
</example>
|
||||
- **Explicit doc request.** The user asks for documentation for a specific surface (function, endpoint, module).
|
||||
- **Proactive docs for new API surface.** The assistant has just added new API endpoints or public functions without docstrings.
|
||||
```
|
||||
|
||||
### Cover Proactive and Reactive
|
||||
### Validation agent
|
||||
|
||||
```yaml
|
||||
description: Use this agent when you need to validate code before commit or merge. Typical triggers include the user signaling readiness to commit, and an explicit validation request. See "When to invoke" in the agent body.
|
||||
```
|
||||
|
||||
```markdown
|
||||
<example>
|
||||
Context: User explicitly requests review
|
||||
user: "Review my code for issues"
|
||||
[...]
|
||||
</example>
|
||||
## When to invoke
|
||||
|
||||
<example>
|
||||
Context: After user writes code
|
||||
user: "I've implemented the feature"
|
||||
assistant: "Great! Now let me review it."
|
||||
<commentary>
|
||||
Code written, proactively review.
|
||||
</commentary>
|
||||
[...]
|
||||
</example>
|
||||
- **Pre-commit validation.** The user signals readiness to commit. Run validation first and surface any issues.
|
||||
- **Explicit validation request.** The user asks for the code to be validated.
|
||||
```
|
||||
|
||||
### Cover Edge Cases
|
||||
## Debugging triggering issues
|
||||
|
||||
```markdown
|
||||
<example>
|
||||
Context: Typical usage
|
||||
user: "Check my PR"
|
||||
[...]
|
||||
</example>
|
||||
### Agent not triggering
|
||||
|
||||
<example>
|
||||
Context: Large PR that needs thorough analysis
|
||||
user: "This is a big PR, can you do a deep review?"
|
||||
[...]
|
||||
<commentary>
|
||||
Large PR mentioned, use thorough analysis mode.
|
||||
</commentary>
|
||||
</example>
|
||||
```
|
||||
Check:
|
||||
1. The `description:` prose names the right trigger scenarios.
|
||||
2. The scenarios in the body cover the actual phrasings the user uses.
|
||||
3. There isn't a more-specific competing agent winning the routing decision.
|
||||
|
||||
## Common Mistakes
|
||||
Fix: add or expand scenarios in the body, and tighten the prose summary in `description:`.
|
||||
|
||||
### ❌ Missing Context
|
||||
### Agent triggers too often
|
||||
|
||||
```markdown
|
||||
<example>
|
||||
user: "Review my code"
|
||||
assistant: "I'll use the code-reviewer agent."
|
||||
</example>
|
||||
```
|
||||
Check:
|
||||
1. The trigger scenarios are too generic or overlap with other agents.
|
||||
2. The `description:` doesn't say when NOT to use the agent.
|
||||
|
||||
**Why bad:** No context about what led to this request.
|
||||
Fix: narrow the scenarios; add a "Do not invoke when..." line to `description:` if needed.
|
||||
|
||||
### ✅ With Context
|
||||
### Agent triggers in the wrong scenarios
|
||||
|
||||
```markdown
|
||||
<example>
|
||||
Context: User just finished implementing authentication
|
||||
user: "Review my code"
|
||||
assistant: "I'll use the code-reviewer agent to analyze the auth implementation."
|
||||
<commentary>
|
||||
Auth code written, review for security and best practices.
|
||||
</commentary>
|
||||
</example>
|
||||
```
|
||||
Check:
|
||||
1. Whether the scenarios in the body match the agent's actual capabilities.
|
||||
|
||||
### ❌ No Commentary
|
||||
Fix: rewrite scenarios to match what the agent actually does.
|
||||
|
||||
```markdown
|
||||
<example>
|
||||
Context: User requests review
|
||||
user: "Check my changes"
|
||||
assistant: "I'll use the reviewer agent."
|
||||
</example>
|
||||
```
|
||||
## Best practices summary
|
||||
|
||||
**Why bad:** Doesn't explain WHY agent triggers.
|
||||
|
||||
### ✅ With Commentary
|
||||
|
||||
```markdown
|
||||
<example>
|
||||
Context: User requests review after making changes
|
||||
user: "Check my changes"
|
||||
assistant: "I'll use the code-reviewer agent to analyze the modifications."
|
||||
<commentary>
|
||||
Code review request triggers agent to check quality, security, and adherence to standards.
|
||||
</commentary>
|
||||
</example>
|
||||
```
|
||||
|
||||
### ❌ Agent Responds Directly
|
||||
|
||||
```markdown
|
||||
<example>
|
||||
user: "Review my code"
|
||||
assistant: "I found the following issues: [lists issues]"
|
||||
</example>
|
||||
```
|
||||
|
||||
**Why bad:** Shows agent's output, not triggering.
|
||||
|
||||
### ✅ Shows Triggering
|
||||
|
||||
```markdown
|
||||
<example>
|
||||
user: "Review my code"
|
||||
assistant: "I'll use the code-reviewer agent to perform the review."
|
||||
<commentary>
|
||||
Review request triggers the agent.
|
||||
</commentary>
|
||||
</example>
|
||||
```
|
||||
|
||||
## How Many Examples?
|
||||
|
||||
### Minimum: 2 Examples
|
||||
|
||||
Cover at least:
|
||||
1. Explicit request
|
||||
2. One variation or proactive trigger
|
||||
|
||||
### Recommended: 3-4 Examples
|
||||
|
||||
Cover:
|
||||
1. Explicit request (direct ask)
|
||||
2. Implicit request (user implies need)
|
||||
3. Proactive trigger (after relevant work)
|
||||
4. Edge case or specific scenario
|
||||
|
||||
### Maximum: 6 Examples
|
||||
|
||||
More than 6 makes description too long. Focus on most important scenarios.
|
||||
|
||||
## Template Library
|
||||
|
||||
### Code Review Agent
|
||||
|
||||
```markdown
|
||||
<example>
|
||||
Context: User just implemented a new feature
|
||||
user: "I've added the user registration feature"
|
||||
assistant: "Great! Let me review the code quality."
|
||||
<commentary>
|
||||
Code written, proactively trigger code-reviewer agent.
|
||||
</commentary>
|
||||
assistant: "I'll use the code-reviewer agent to analyze the implementation."
|
||||
</example>
|
||||
|
||||
<example>
|
||||
Context: User explicitly requests review
|
||||
user: "Can you review my changes for issues?"
|
||||
assistant: "I'll use the code-reviewer agent to perform a thorough review."
|
||||
<commentary>
|
||||
Explicit review request triggers the agent.
|
||||
</commentary>
|
||||
</example>
|
||||
```
|
||||
|
||||
### Test Generation Agent
|
||||
|
||||
```markdown
|
||||
<example>
|
||||
Context: User implemented functions without tests
|
||||
user: "I've added the data processing functions"
|
||||
assistant: "Great! Let me generate tests for these."
|
||||
<commentary>
|
||||
New code without tests, proactively trigger test-generator agent.
|
||||
</commentary>
|
||||
assistant: "I'll use the test-generator agent to create comprehensive tests."
|
||||
</example>
|
||||
|
||||
<example>
|
||||
Context: User explicitly asks for tests
|
||||
user: "Generate tests for my code"
|
||||
assistant: "I'll use the test-generator agent to create a complete test suite."
|
||||
<commentary>
|
||||
Direct test generation request triggers the agent.
|
||||
</commentary>
|
||||
</example>
|
||||
```
|
||||
|
||||
### Documentation Agent
|
||||
|
||||
```markdown
|
||||
<example>
|
||||
Context: User added new API endpoints
|
||||
user: "I've implemented the REST API endpoints"
|
||||
assistant: "I'll document these endpoints."
|
||||
<commentary>
|
||||
New API code, proactively generate documentation.
|
||||
</commentary>
|
||||
assistant: "I'll use the api-docs-writer agent to create comprehensive documentation."
|
||||
</example>
|
||||
|
||||
<example>
|
||||
Context: User requests documentation
|
||||
user: "Write docs for this API"
|
||||
assistant: "I'll use the api-docs-writer agent to generate clear documentation."
|
||||
<commentary>
|
||||
Documentation request triggers the agent.
|
||||
</commentary>
|
||||
</example>
|
||||
```
|
||||
|
||||
### Validation Agent
|
||||
|
||||
```markdown
|
||||
<example>
|
||||
Context: User ready to commit changes
|
||||
user: "I'm ready to commit this code"
|
||||
assistant: "Let me validate it first."
|
||||
<commentary>
|
||||
Before commit, proactively validate with validation-agent.
|
||||
</commentary>
|
||||
assistant: "I'll use the code-validator agent to check for issues."
|
||||
</example>
|
||||
|
||||
<example>
|
||||
Context: User asks for validation
|
||||
user: "Validate my implementation"
|
||||
assistant: "I'll use the code-validator agent to verify correctness."
|
||||
<commentary>
|
||||
Explicit validation request triggers the agent.
|
||||
</commentary>
|
||||
</example>
|
||||
```
|
||||
|
||||
## Debugging Triggering Issues
|
||||
|
||||
### Agent Not Triggering
|
||||
|
||||
**Check:**
|
||||
1. Examples include relevant keywords from user message
|
||||
2. Context matches actual usage scenarios
|
||||
3. Commentary explains triggering logic clearly
|
||||
4. Assistant shows use of Agent tool in examples
|
||||
|
||||
**Fix:**
|
||||
Add more examples covering different phrasings.
|
||||
|
||||
### Agent Triggers Too Often
|
||||
|
||||
**Check:**
|
||||
1. Examples are too broad or generic
|
||||
2. Triggering conditions overlap with other agents
|
||||
3. Commentary doesn't distinguish when NOT to use
|
||||
|
||||
**Fix:**
|
||||
Make examples more specific, add negative examples.
|
||||
|
||||
### Agent Triggers in Wrong Scenarios
|
||||
|
||||
**Check:**
|
||||
1. Examples don't match actual intended use
|
||||
2. Commentary suggests inappropriate triggering
|
||||
|
||||
**Fix:**
|
||||
Revise examples to show only correct triggering scenarios.
|
||||
|
||||
## Best Practices Summary
|
||||
|
||||
✅ **DO:**
|
||||
- Include 2-4 concrete, specific examples
|
||||
- Show both explicit and proactive triggering
|
||||
- Provide clear context for each example
|
||||
- Explain reasoning in commentary
|
||||
- Vary user message phrasing
|
||||
- Show Claude using Agent tool
|
||||
|
||||
❌ **DON'T:**
|
||||
- Use generic, vague examples
|
||||
- Omit context or commentary
|
||||
- Show only one type of triggering
|
||||
- Skip the agent invocation step
|
||||
- Make examples too similar
|
||||
- Forget to explain why agent triggers
|
||||
- Keep `description:` as flat prose with a short summary of trigger scenarios
|
||||
- Put detailed scenarios in a "When to invoke" body section, as prose bullets
|
||||
- Cover both explicit and proactive triggering
|
||||
- Describe situations the agent should respond to
|
||||
- Mention phrasing variation in prose ("any phrasing — 'ready to ship', 'looks done'") rather than via multiple near-duplicate scenarios
|
||||
- Keep trigger scenarios separate from output format
|
||||
|
||||
## Conclusion
|
||||
|
||||
Well-crafted examples are crucial for reliable agent triggering. Invest time in creating diverse, specific examples that clearly demonstrate when and why the agent should be used.
|
||||
Reliable triggering comes from prose descriptions of the situations an agent should respond to.
|
||||
|
||||
@@ -1,12 +1,21 @@
|
||||
---
|
||||
name: code-reviewer
|
||||
description: Use this agent when you need to review code for adherence to project guidelines, style guides, and best practices. This agent should be used proactively after writing or modifying code, especially before committing changes or creating pull requests. It will check for style violations, potential issues, and ensure code follows the established patterns in CLAUDE.md. Also the agent needs to know which files to focus on for the review. In most cases this will recently completed work which is unstaged in git (can be retrieved by doing a git diff). However there can be cases where this is different, make sure to specify this as the agent input when calling the agent. \n\nExamples:\n<example>\nContext: The user has just implemented a new feature with several TypeScript files.\nuser: "I've added the new authentication feature. Can you check if everything looks good?"\nassistant: "I'll use the Task tool to launch the code-reviewer agent to review your recent changes."\n<commentary>\nSince the user has completed a feature and wants validation, use the code-reviewer agent to ensure the code meets project standards.\n</commentary>\n</example>\n<example>\nContext: The assistant has just written a new utility function.\nuser: "Please create a function to validate email addresses"\nassistant: "Here's the email validation function:"\n<function call omitted for brevity>\nassistant: "Now I'll use the Task tool to launch the code-reviewer agent to review this implementation."\n<commentary>\nProactively use the code-reviewer agent after writing new code to catch issues early.\n</commentary>\n</example>\n<example>\nContext: The user is about to create a PR.\nuser: "I think I'm ready to create a PR for this feature"\nassistant: "Before creating the PR, I'll use the Task tool to launch the code-reviewer agent to ensure all code meets our standards."\n<commentary>\nProactively review code before PR creation to avoid review comments and iterations.\n</commentary>\n</example>
|
||||
description: Use this agent when you need to review code for adherence to project guidelines, style guides, and best practices. This agent should be used proactively after writing or modifying code, especially before committing changes or creating pull requests. It will check for style violations, potential issues, and ensure code follows the established patterns in CLAUDE.md. Also the agent needs to know which files to focus on for the review. In most cases this will be recently completed work which is unstaged in git (can be retrieved by running git diff). However there can be cases where this is different, make sure to specify this as the agent input when calling the agent. Typical triggers include the user asking for a review of a feature they just implemented, the assistant proactively reviewing its own newly-written code before declaring a task done, and a final pre-PR check before opening a pull request. See "When to invoke" in the agent body for worked scenarios.
|
||||
model: opus
|
||||
color: green
|
||||
---
|
||||
|
||||
You are an expert code reviewer specializing in modern software development across multiple languages and frameworks. Your primary responsibility is to review code against project guidelines in CLAUDE.md with high precision to minimize false positives.
|
||||
|
||||
## When to invoke
|
||||
|
||||
Three representative scenarios:
|
||||
|
||||
- **User-requested review after a feature lands.** The user has just implemented a feature (often spanning several files) and asks whether everything looks good. Run a review of the recent diff and report findings.
|
||||
- **Proactive review of newly-written code.** The assistant has just written new code (e.g. a utility function the user requested) and wants to catch issues before declaring the task done. Spawn this agent on the freshly written files.
|
||||
- **Pre-PR sanity check.** The user signals they're ready to open a pull request. Run a review of the full diff first to avoid round-trips on the PR itself.
|
||||
|
||||
|
||||
## Review Scope
|
||||
|
||||
By default, review unstaged changes from `git diff`. The user may specify different files or scope to review.
|
||||
|
||||
@@ -1,12 +1,21 @@
|
||||
---
|
||||
name: comment-analyzer
|
||||
description: Use this agent when you need to analyze code comments for accuracy, completeness, and long-term maintainability. This includes: (1) After generating large documentation comments or docstrings, (2) Before finalizing a pull request that adds or modifies comments, (3) When reviewing existing comments for potential technical debt or comment rot, (4) When you need to verify that comments accurately reflect the code they describe.\n\n<example>\nContext: The user is working on a pull request that adds several documentation comments to functions.\nuser: "I've added documentation to these functions. Can you check if the comments are accurate?"\nassistant: "I'll use the comment-analyzer agent to thoroughly review all the comments in this pull request for accuracy and completeness."\n<commentary>\nSince the user has added documentation comments and wants them checked, use the comment-analyzer agent to verify their accuracy against the actual code.\n</commentary>\n</example>\n\n<example>\nContext: The user just asked to generate comprehensive documentation for a complex function.\nuser: "Add detailed documentation for this authentication handler function"\nassistant: "I've added the documentation. Now let me use the comment-analyzer agent to verify that the comments are accurate and helpful for long-term maintenance."\n<commentary>\nAfter generating large documentation comments, proactively use the comment-analyzer to ensure quality.\n</commentary>\n</example>\n\n<example>\nContext: The user is preparing to create a pull request with multiple code changes and comments.\nuser: "I think we're ready to create the PR now"\nassistant: "Before creating the pull request, let me use the comment-analyzer agent to review all the comments we've added or modified to ensure they're accurate and won't create technical debt."\n<commentary>\nBefore finalizing a PR, use the comment-analyzer to review all comment changes.\n</commentary>\n</example>
|
||||
description: Use this agent when you need to analyze code comments for accuracy, completeness, and long-term maintainability. This includes (1) after generating large documentation comments or docstrings, (2) before finalizing a pull request that adds or modifies comments, (3) when reviewing existing comments for potential technical debt or comment rot, and (4) when you need to verify that comments accurately reflect the code they describe. See "When to invoke" in the agent body for worked scenarios.
|
||||
model: inherit
|
||||
color: green
|
||||
---
|
||||
|
||||
You are a meticulous code comment analyzer with deep expertise in technical documentation and long-term code maintainability. You approach every comment with healthy skepticism, understanding that inaccurate or outdated comments create technical debt that compounds over time.
|
||||
|
||||
## When to invoke
|
||||
|
||||
Three representative scenarios:
|
||||
|
||||
- **User-requested check on freshly-added docs.** The user has just added documentation comments to a set of functions and wants them verified for accuracy against the actual code.
|
||||
- **Proactive check after generating documentation.** The assistant has just authored detailed documentation (e.g. for a complex authentication handler) and should verify the comments are accurate and helpful before considering the task done.
|
||||
- **Pre-PR sweep for comment changes.** Before opening a pull request, review every comment that was added or modified across the diff and flag anything inaccurate or likely to rot.
|
||||
|
||||
|
||||
Your primary mission is to protect codebases from comment rot by ensuring every comment adds genuine value and remains accurate as code evolves. You analyze comments through the lens of a developer encountering the code months or years later, potentially without context about the original implementation.
|
||||
|
||||
When analyzing comments, you will:
|
||||
|
||||
@@ -1,12 +1,21 @@
|
||||
---
|
||||
name: pr-test-analyzer
|
||||
description: Use this agent when you need to review a pull request for test coverage quality and completeness. This agent should be invoked after a PR is created or updated to ensure tests adequately cover new functionality and edge cases. Examples:\n\n<example>\nContext: Daisy has just created a pull request with new functionality.\nuser: "I've created the PR. Can you check if the tests are thorough?"\nassistant: "I'll use the pr-test-analyzer agent to review the test coverage and identify any critical gaps."\n<commentary>\nSince Daisy is asking about test thoroughness in a PR, use the Task tool to launch the pr-test-analyzer agent.\n</commentary>\n</example>\n\n<example>\nContext: A pull request has been updated with new code changes.\nuser: "The PR is ready for review - I added the new validation logic we discussed"\nassistant: "Let me analyze the PR to ensure the tests adequately cover the new validation logic and edge cases."\n<commentary>\nThe PR has new functionality that needs test coverage analysis, so use the pr-test-analyzer agent.\n</commentary>\n</example>\n\n<example>\nContext: Reviewing PR feedback before marking as ready.\nuser: "Before I mark this PR as ready, can you double-check the test coverage?"\nassistant: "I'll use the pr-test-analyzer agent to thoroughly review the test coverage and identify any critical gaps before you mark it ready."\n<commentary>\nDaisy wants a final test coverage check before marking PR ready, use the pr-test-analyzer agent.\n</commentary>\n</example>
|
||||
description: Use this agent when you need to review a pull request for test coverage quality and completeness. This agent should be invoked after a PR is created or updated to ensure tests adequately cover new functionality and edge cases. Typical triggers include the user asking whether tests on a freshly-created PR are thorough, an updated PR adding new logic that needs coverage analysis, and a final pre-merge double-check before marking a PR ready. See "When to invoke" in the agent body for worked scenarios.
|
||||
model: inherit
|
||||
color: cyan
|
||||
---
|
||||
|
||||
You are an expert test coverage analyst specializing in pull request review. Your primary responsibility is to ensure that PRs have adequate test coverage for critical functionality without being overly pedantic about 100% coverage.
|
||||
|
||||
## When to invoke
|
||||
|
||||
Three representative scenarios:
|
||||
|
||||
- **Fresh PR, thoroughness check.** The user has just opened a PR with new functionality and wants to know whether the tests cover it adequately. Analyze the diff and report critical gaps.
|
||||
- **PR updated with new logic.** A PR has been pushed with new validation, parsing, or business logic. Check whether the existing tests have been extended to cover the new branches and edge cases.
|
||||
- **Pre-ready double-check.** Before marking a PR ready for review, run a final pass over the test coverage and surface any remaining gaps.
|
||||
|
||||
|
||||
**Your Core Responsibilities:**
|
||||
|
||||
1. **Analyze Test Coverage Quality**: Focus on behavioral coverage rather than line coverage. Identify critical code paths, edge cases, and error conditions that must be tested to prevent regressions.
|
||||
|
||||
@@ -1,12 +1,20 @@
|
||||
---
|
||||
name: type-design-analyzer
|
||||
description: Use this agent when you need expert analysis of type design in your codebase. Specifically use it: (1) when introducing a new type to ensure it follows best practices for encapsulation and invariant expression, (2) during pull request creation to review all types being added, (3) when refactoring existing types to improve their design quality. The agent will provide both qualitative feedback and quantitative ratings on encapsulation, invariant expression, usefulness, and enforcement.\n\n<example>\nContext: Daisy is writing code that introduces a new UserAccount type and wants to ensure it has well-designed invariants.\nuser: "I've just created a new UserAccount type that handles user authentication and permissions"\nassistant: "I'll use the type-design-analyzer agent to review the UserAccount type design"\n<commentary>\nSince a new type is being introduced, use the type-design-analyzer to ensure it has strong invariants and proper encapsulation.\n</commentary>\n</example>\n\n<example>\nContext: Daisy is creating a pull request and wants to review all newly added types.\nuser: "I'm about to create a PR with several new data model types"\nassistant: "Let me use the type-design-analyzer agent to review all the types being added in this PR"\n<commentary>\nDuring PR creation with new types, use the type-design-analyzer to review their design quality.\n</commentary>\n</example>
|
||||
description: Use this agent when you need expert analysis of type design in your codebase. Specifically use it (1) when introducing a new type to ensure it follows best practices for encapsulation and invariant expression, (2) during pull request creation to review all types being added, and (3) when refactoring existing types to improve their design quality. The agent will provide both qualitative feedback and quantitative ratings on encapsulation, invariant expression, usefulness, and enforcement. See "When to invoke" in the agent body for worked scenarios.
|
||||
model: inherit
|
||||
color: pink
|
||||
---
|
||||
|
||||
You are a type design expert with extensive experience in large-scale software architecture. Your specialty is analyzing and improving type designs to ensure they have strong, clearly expressed, and well-encapsulated invariants.
|
||||
|
||||
## When to invoke
|
||||
|
||||
Two representative scenarios:
|
||||
|
||||
- **New type introduced.** The user has just authored a new type (e.g. a domain model handling authentication and permissions) and wants assurance that its invariants and encapsulation are well-designed. Review the type and rate it on the four axes.
|
||||
- **PR adding several new types.** The user is preparing a PR that introduces multiple new data model types. Review every newly-added type in the diff for design quality.
|
||||
|
||||
|
||||
**Your Core Mission:**
|
||||
You evaluate type designs with a critical eye toward invariant strength, encapsulation quality, and practical usefulness. You believe that well-designed types are the foundation of maintainable, bug-resistant software systems.
|
||||
|
||||
|
||||
Reference in New Issue
Block a user