Compare commits

..

1 Commits

Author SHA1 Message Date
Bryan Thompson
32a559ba29 Add 42crunch-api-security-testing plugin 2026-04-24 12:07:55 -05:00
33 changed files with 687 additions and 1797 deletions

View File

@@ -33,21 +33,6 @@
},
"homepage": "https://github.com/SalesforceAIResearch/agentforce-adlc"
},
{
"name": "adobe-for-creativity",
"description": "Harness Adobe's creative AI-powered tools to edit images, automate design workflows, and bring creative visions to life — from background removal to vectorization and professional retouching.",
"author": {
"name": "Adobe"
},
"category": "design",
"source": {
"source": "git-subdir",
"url": "https://github.com/adobe/skills.git",
"path": "plugins/creative-cloud/adobe-for-creativity",
"ref": "main"
},
"homepage": "https://github.com/adobe/skills/tree/main/plugins/creative-cloud/adobe-for-creativity"
},
{
"name": "adspirer-ads-agent",
"description": "Cross-platform ad management for Google Ads, Meta Ads, TikTok Ads, and LinkedIn Ads. 91 tools for keyword research, campaign creation, performance analysis, and budget optimization.",
@@ -102,20 +87,6 @@
},
"homepage": "https://github.com/AikidoSec/aikido-claude-plugin"
},
{
"name": "aiven",
"description": "Easily deploy managed PostgreSQL, Kafka, OpenSearch, Clickhouse, and other databases, streaming, and apps through Aiven. Free tier available, up and running in minutes.",
"author": {
"name": "Aiven"
},
"category": "database",
"source": {
"source": "github",
"repo": "aiven/aiven-ai-plugins",
"commit": "d2a7697b53826588d0faf795f39d2aa2362330da"
},
"homepage": "https://aiven.io"
},
{
"name": "alloydb",
"description": "Create, connect, and interact with an AlloyDB for PostgreSQL database and data.",
@@ -268,22 +239,6 @@
},
"homepage": "https://github.com/awslabs/agent-plugins"
},
{
"name": "aws-dev-toolkit",
"description": "AWS development toolkit — 34 skills, 11 agents, and 3 MCP servers for building, migrating, and performing architecture reviews on AWS.",
"author": {
"name": "aws-samples"
},
"category": "development",
"source": {
"source": "git-subdir",
"url": "https://github.com/aws-samples/sample-claude-code-plugins-for-startups.git",
"path": "plugins/aws-dev-toolkit",
"ref": "main",
"sha": "ddea7fdd605b42ed3900374815f358a2d4600db5"
},
"homepage": "https://github.com/aws-samples/sample-claude-code-plugins-for-startups"
},
{
"name": "aws-serverless",
"description": "Design, build, deploy, test, and debug serverless applications with AWS Serverless services.",
@@ -496,17 +451,6 @@
},
"homepage": "https://github.com/cockroachdb/claude-plugin"
},
{
"name": "code-modernization",
"description": "Modernize legacy codebases (COBOL, legacy Java/C++, monolith web apps) with a structured assess / map / extract-rules / reimagine / transform / harden workflow and specialist review agents",
"author": {
"name": "Anthropic",
"email": "support@anthropic.com"
},
"source": "./plugins/code-modernization",
"category": "development",
"homepage": "https://github.com/anthropics/claude-plugins-official/tree/main/plugins/code-modernization"
},
{
"name": "code-review",
"description": "Automated code review for pull requests using multiple specialized agents with confidence-based scoring to filter false positives",
@@ -560,19 +504,6 @@
"community-managed"
]
},
{
"name": "crowdstrike-falcon-foundry",
"description": "CrowdStrike Falcon Foundry development skills for building cybersecurity applications on the Falcon platform. Includes UI development, collections, functions, workflows, API integration, security patterns, and debugging workflows.",
"author": {
"name": "CrowdStrike"
},
"category": "security",
"source": {
"source": "url",
"url": "https://github.com/CrowdStrike/foundry-skills.git"
},
"homepage": "https://github.com/CrowdStrike/foundry-skills"
},
{
"name": "csharp-lsp",
"description": "C# language server for code intelligence",
@@ -652,20 +583,6 @@
},
"homepage": "https://www.datadoghq.com/"
},
{
"name": "datarobot-agent-skills",
"description": "DataRobot skills for AI/ML workflows — model training, deployment, predictions, feature engineering, monitoring, explainability, data preparation, App Framework CI/CD, and external agent monitoring.",
"author": {
"name": "DataRobot"
},
"category": "development",
"source": {
"source": "url",
"url": "https://github.com/datarobot-oss/datarobot-agent-skills.git",
"sha": "b3e8fd33d7c36592c802359026c15f3e067a0646"
},
"homepage": "https://datarobot.com"
},
{
"name": "dataverse",
"description": "Agent skills for building on, analyzing, and managing Microsoft Dataverse — with Dataverse MCP, PAC CLI, and Python SDK.",
@@ -848,20 +765,6 @@
"category": "development",
"homepage": "https://github.com/anthropics/claude-plugins-public/tree/main/plugins/frontend-design"
},
{
"name": "fullstory",
"description": "Connect Claude to Fullstory to query behavioral analytics, session replays, and customer experience insights.",
"author": {
"name": "Fullstory"
},
"category": "monitoring",
"source": {
"source": "github",
"repo": "fullstorydev/fullstory-skills",
"commit": "1ec5865e7ab1449f9a0859d164c4b6a8c53b6e2f"
},
"homepage": "https://www.fullstory.com"
},
{
"name": "github",
"description": "Official GitHub MCP server for repository management. Create issues, manage pull requests, review code, search repositories, and interact with GitHub's full API directly from Claude Code.",
@@ -984,21 +887,6 @@
}
}
},
{
"name": "jfrog",
"description": "Use the JFrog Platform from Claude Code: Artifactory repos and artifacts, security findings and exposures, Catalog package safety and downloads, workflows across the SDLC, and platform administration.",
"author": {
"name": "JFrog Ltd.",
"url": "https://jfrog.com"
},
"category": "security",
"source": {
"source": "github",
"repo": "jfrog/claude-plugin",
"commit": "761921eaa12b845beba1688d699a2d45091dfe83"
},
"homepage": "https://jfrog.com"
},
{
"name": "kotlin-lsp",
"description": "Kotlin language server for code intelligence",
@@ -1094,21 +982,6 @@
},
"homepage": "https://github.com/Shopify/liquid-skills/tree/main/plugins/liquid-skills"
},
{
"name": "logfire",
"description": "Add Logfire observability to Python applications with auto-instrumentation for FastAPI, httpx, asyncpg, SQLAlchemy, and more",
"author": {
"name": "Pydantic"
},
"category": "monitoring",
"source": {
"source": "git-subdir",
"url": "https://github.com/pydantic/skills.git",
"path": "plugins/logfire",
"ref": "main"
},
"homepage": "https://github.com/pydantic/skills/tree/main/plugins/logfire"
},
{
"name": "lua-lsp",
"description": "Lua language server for code intelligence",
@@ -1511,21 +1384,6 @@
},
"homepage": "https://quarkus.io"
},
{
"name": "rails-query",
"description": "Run read-only database queries against a Ruby on Rails 8.2+ app's database via `rails query` — ActiveRecord or SQL, schema/model introspection, EXPLAIN, pagination, and remote execution via Kamal.",
"author": {
"name": "Lewis Buckley",
"url": "https://github.com/lewispb"
},
"category": "development",
"source": {
"source": "github",
"repo": "lewispb/rails-query-skill",
"commit": "0f53fa861089e1f46097db9a92aea311f340c355"
},
"homepage": "https://github.com/lewispb/rails-query-skill"
},
{
"name": "railway",
"description": "Deploy and manage apps, databases, and infrastructure on Railway. Covers project setup, deploys, environment configuration, networking, troubleshooting, and monitoring.",
@@ -1976,7 +1834,8 @@
"category": "security",
"source": {
"source": "url",
"url": "https://github.com/VantaInc/vanta-mcp-plugin.git"
"url": "https://github.com/VantaInc/vanta-mcp-plugin.git",
"sha": "46e5bebf0484f08fc4a3c4054437cf5ec06298c9"
},
"homepage": "https://help.vanta.com/en/articles/14094979-connecting-to-vanta-mcp#h_887ce3f337"
},

View File

@@ -9,10 +9,6 @@ on:
jobs:
validate:
# Fork PRs are auto-closed by close-external-prs.yml, so skip validation
# for them entirely. This also prevents untrusted filenames from forks
# from ever reaching the shell steps below.
if: github.event.pull_request.head.repo.full_name == github.repository
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
@@ -24,19 +20,16 @@ jobs:
- name: Get changed frontmatter files
id: changed
env:
GH_TOKEN: ${{ github.token }}
PR_NUMBER: ${{ github.event.pull_request.number }}
run: |
# Use diff-filter=AMRC to exclude deleted files (D) - only Added, Modified, Renamed, Copied
FILES=$(gh pr diff "$PR_NUMBER" --name-only --diff-filter=AMRC | grep -E '(agents/.*\.md|skills/.*/SKILL\.md|commands/.*\.md)$' || true)
FILES=$(gh pr diff ${{ github.event.pull_request.number }} --name-only --diff-filter=AMRC | grep -E '(agents/.*\.md|skills/.*/SKILL\.md|commands/.*\.md)$' || true)
echo "files<<EOF" >> "$GITHUB_OUTPUT"
echo "$FILES" >> "$GITHUB_OUTPUT"
echo "EOF" >> "$GITHUB_OUTPUT"
env:
GH_TOKEN: ${{ github.token }}
- name: Validate frontmatter
if: steps.changed.outputs.files != ''
env:
FILES: ${{ steps.changed.outputs.files }}
run: |
printf '%s\n' "$FILES" | xargs bun .github/scripts/validate-frontmatter.ts
echo "${{ steps.changed.outputs.files }}" | xargs bun .github/scripts/validate-frontmatter.ts

View File

@@ -1,8 +0,0 @@
{
"name": "code-modernization",
"description": "Modernize legacy codebases (COBOL, legacy Java/C++, monolith web apps) with a structured assess → map → extract-rules → reimagine → transform → harden workflow and specialist review agents",
"author": {
"name": "Anthropic",
"email": "support@anthropic.com"
}
}

View File

@@ -1,202 +0,0 @@
Apache License
Version 2.0, January 2004
http://www.apache.org/licenses/
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
1. Definitions.
"License" shall mean the terms and conditions for use, reproduction,
and distribution as defined by Sections 1 through 9 of this document.
"Licensor" shall mean the copyright owner or entity authorized by
the copyright owner that is granting the License.
"Legal Entity" shall mean the union of the acting entity and all
other entities that control, are controlled by, or are under common
control with that entity. For the purposes of this definition,
"control" means (i) the power, direct or indirect, to cause the
direction or management of such entity, whether by contract or
otherwise, or (ii) ownership of fifty percent (50%) or more of the
outstanding shares, or (iii) beneficial ownership of such entity.
"You" (or "Your") shall mean an individual or Legal Entity
exercising permissions granted by this License.
"Source" form shall mean the preferred form for making modifications,
including but not limited to software source code, documentation
source, and configuration files.
"Object" form shall mean any form resulting from mechanical
transformation or translation of a Source form, including but
not limited to compiled object code, generated documentation,
and conversions to other media types.
"Work" shall mean the work of authorship, whether in Source or
Object form, made available under the License, as indicated by a
copyright notice that is included in or attached to the work
(an example is provided in the Appendix below).
"Derivative Works" shall mean any work, whether in Source or Object
form, that is based on (or derived from) the Work and for which the
editorial revisions, annotations, elaborations, or other modifications
represent, as a whole, an original work of authorship. For the purposes
of this License, Derivative Works shall not include works that remain
separable from, or merely link (or bind by name) to the interfaces of,
the Work and Derivative Works thereof.
"Contribution" shall mean any work of authorship, including
the original version of the Work and any modifications or additions
to that Work or Derivative Works thereof, that is intentionally
submitted to Licensor for inclusion in the Work by the copyright owner
or by an individual or Legal Entity authorized to submit on behalf of
the copyright owner. For the purposes of this definition, "submitted"
means any form of electronic, verbal, or written communication sent
to the Licensor or its representatives, including but not limited to
communication on electronic mailing lists, source code control systems,
and issue tracking systems that are managed by, or on behalf of, the
Licensor for the purpose of discussing and improving the Work, but
excluding communication that is conspicuously marked or otherwise
designated in writing by the copyright owner as "Not a Contribution."
"Contributor" shall mean Licensor and any individual or Legal Entity
on behalf of whom a Contribution has been received by Licensor and
subsequently incorporated within the Work.
2. Grant of Copyright License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
copyright license to reproduce, prepare Derivative Works of,
publicly display, publicly perform, sublicense, and distribute the
Work and such Derivative Works in Source or Object form.
3. Grant of Patent License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
(except as stated in this section) patent license to make, have made,
use, offer to sell, sell, import, and otherwise transfer the Work,
where such license applies only to those patent claims licensable
by such Contributor that are necessarily infringed by their
Contribution(s) alone or by combination of their Contribution(s)
with the Work to which such Contribution(s) was submitted. If You
institute patent litigation against any entity (including a
cross-claim or counterclaim in a lawsuit) alleging that the Work
or a Contribution incorporated within the Work constitutes direct
or contributory patent infringement, then any patent licenses
granted to You under this License for that Work shall terminate
as of the date such litigation is filed.
4. Redistribution. You may reproduce and distribute copies of the
Work or Derivative Works thereof in any medium, with or without
modifications, and in Source or Object form, provided that You
meet the following conditions:
(a) You must give any other recipients of the Work or
Derivative Works a copy of this License; and
(b) You must cause any modified files to carry prominent notices
stating that You changed the files; and
(c) You must retain, in the Source form of any Derivative Works
that You distribute, all copyright, patent, trademark, and
attribution notices from the Source form of the Work,
excluding those notices that do not pertain to any part of
the Derivative Works; and
(d) If the Work includes a "NOTICE" text file as part of its
distribution, then any Derivative Works that You distribute must
include a readable copy of the attribution notices contained
within such NOTICE file, excluding those notices that do not
pertain to any part of the Derivative Works, in at least one
of the following places: within a NOTICE text file distributed
as part of the Derivative Works; within the Source form or
documentation, if provided along with the Derivative Works; or,
within a display generated by the Derivative Works, if and
wherever such third-party notices normally appear. The contents
of the NOTICE file are for informational purposes only and
do not modify the License. You may add Your own attribution
notices within Derivative Works that You distribute, alongside
or as an addendum to the NOTICE text from the Work, provided
that such additional attribution notices cannot be construed
as modifying the License.
You may add Your own copyright statement to Your modifications and
may provide additional or different license terms and conditions
for use, reproduction, or distribution of Your modifications, or
for any such Derivative Works as a whole, provided Your use,
reproduction, and distribution of the Work otherwise complies with
the conditions stated in this License.
5. Submission of Contributions. Unless You explicitly state otherwise,
any Contribution intentionally submitted for inclusion in the Work
by You to the Licensor shall be under the terms and conditions of
this License, without any additional terms or conditions.
Notwithstanding the above, nothing herein shall supersede or modify
the terms of any separate license agreement you may have executed
with Licensor regarding such Contributions.
6. Trademarks. This License does not grant permission to use the trade
names, trademarks, service marks, or product names of the Licensor,
except as required for reasonable and customary use in describing the
origin of the Work and reproducing the content of the NOTICE file.
7. Disclaimer of Warranty. Unless required by applicable law or
agreed to in writing, Licensor provides the Work (and each
Contributor provides its Contributions) on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
implied, including, without limitation, any warranties or conditions
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
PARTICULAR PURPOSE. You are solely responsible for determining the
appropriateness of using or redistributing the Work and assume any
risks associated with Your exercise of permissions under this License.
8. Limitation of Liability. In no event and under no legal theory,
whether in tort (including negligence), contract, or otherwise,
unless required by applicable law (such as deliberate and grossly
negligent acts) or agreed to in writing, shall any Contributor be
liable to You for damages, including any direct, indirect, special,
incidental, or consequential damages of any character arising as a
result of this License or out of the use or inability to use the
Work (including but not limited to damages for loss of goodwill,
work stoppage, computer failure or malfunction, or any and all
other commercial damages or losses), even if such Contributor
has been advised of the possibility of such damages.
9. Accepting Warranty or Additional Liability. While redistributing
the Work or Derivative Works thereof, You may choose to offer,
and charge a fee for, acceptance of support, warranty, indemnity,
or other liability obligations and/or rights consistent with this
License. However, in accepting such obligations, You may act only
on Your own behalf and on Your sole responsibility, not on behalf
of any other Contributor, and only if You agree to indemnify,
defend, and hold each Contributor harmless for any liability
incurred by, or claims asserted against, such Contributor by reason
of your accepting any such warranty or additional liability.
END OF TERMS AND CONDITIONS
APPENDIX: How to apply the Apache License to your work.
To apply the Apache License to your work, attach the following
boilerplate notice, with the fields enclosed by brackets "[]"
replaced with your own identifying information. (Don't include
the brackets!) The text should be enclosed in the appropriate
comment syntax for the file format. We also recommend that a
file or class name and description of purpose be included on the
same "printed page" as the copyright notice for easier
identification within third-party archives.
Copyright [yyyy] [name of copyright owner]
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.

View File

@@ -1,107 +0,0 @@
# Code Modernization Plugin
A structured workflow and set of specialist agents for modernizing legacy codebases — COBOL, legacy Java/C++, monolith web apps — into current stacks while preserving behavior.
## Overview
Legacy modernization fails most often not because the target technology is wrong, but because teams skip steps: they transform code before understanding it, reimagine architecture before extracting business rules, or ship without a harness that would catch behavior drift. This plugin enforces a sequence:
```
assess → map → extract-rules → reimagine → transform → harden
```
Each step has a dedicated slash command. Specialist agents (legacy analyst, business rules extractor, architecture critic, security auditor, test engineer) are invoked from within those commands — or directly — to keep the work honest.
## Commands
The commands are designed to be run in order, but each produces a standalone artifact so you can stop, review, and resume.
### `/modernize-brief`
Capture the modernization brief: what's being modernized, why now, constraints (regulatory, data, runtime), non-goals, and success criteria. Produces `analysis/brief.md`. Run this first.
### `/modernize-assess`
Inventory the legacy codebase: languages, line counts, module boundaries, external integrations, build system, test coverage, known pain points. Produces `analysis/assessment.md`. Uses the `legacy-analyst` agent for deep reads on unfamiliar dialects.
### `/modernize-map`
Map the legacy structure onto a target architecture: which legacy modules become which target services/packages, data-flow diagrams, migration sequencing. Produces `analysis/map.md`. Uses the `architecture-critic` agent to pressure-test the design.
### `/modernize-extract-rules`
Extract business rules from the legacy code — the rules that are encoded in procedural logic, COBOL copybooks, stored procedures, or config files — into human-readable form with citations back to source. Produces `analysis/rules.md`. Uses the `business-rules-extractor` agent.
### `/modernize-reimagine`
Propose the target design: APIs, data model, runtime. Explicitly list what changes from legacy and what stays identical. Produces `analysis/design.md`. Uses the `architecture-critic` agent to challenge over-engineering.
### `/modernize-transform`
Do the actual code transformation — module by module. Writes to `modernized/`. Pairs each transformed module with a test suite that pins the pre-transform behavior.
### `/modernize-harden`
Post-transform review pass: security audit, test coverage, error handling, observability. Uses `security-auditor` and `test-engineer` agents. Produces a findings report ranked Blocker / High / Medium / Nit.
## Agents
- **`legacy-analyst`** — Reads legacy code (COBOL, legacy Java/C++, procedural PHP, classic ASP) and produces structured summaries. Good at spotting implicit dependencies, copybook inheritance, and "JOBOL" patterns (procedural code wearing a modern syntax).
- **`business-rules-extractor`** — Extracts business rules from procedural code with source citations. Each rule includes: what, where it's implemented, which conditions fire it, and any corner cases hidden in data.
- **`architecture-critic`** — Adversarial reviewer for target architectures and transformed code. Default stance is skeptical: asks "do we actually need this?" Flags microservices-for-the-resume, ceremonial error handling, abstractions with one implementation.
- **`security-auditor`** — Reviews transformed code for auth, input validation, secret handling, and dependency CVEs. Tuned for the kinds of issues that appear when translating security primitives across stacks (e.g., session handling from servlet to stateless JWT).
- **`test-engineer`** — Audits test suites for behavior-pinning vs. coverage-theater. Flags tests that exercise code paths without asserting outcomes.
## Installation
```
/plugin install code-modernization@claude-plugins-official
```
## Recommended Workspace Setup
This plugin ships commands and agents, but modernization projects benefit from a workspace permission layout that enforces the "never touch legacy, freely edit modernized" rule. A starting-point `.claude/settings.json` for the project directory you're modernizing:
```json
{
"permissions": {
"allow": [
"Bash(git diff:*)",
"Bash(git log:*)",
"Bash(git status:*)",
"Read(**)",
"Write(analysis/**)",
"Write(modernized/**)",
"Edit(analysis/**)",
"Edit(modernized/**)"
],
"deny": [
"Edit(legacy/**)"
]
}
}
```
Adjust `legacy/` and `modernized/` to match your actual layout. The key invariants: `Edit` under `legacy/` is denied, and writes are scoped to `analysis/` (for documents) and `modernized/` (for the new code).
## Typical Workflow
```bash
# 1. Write the brief — what are we modernizing and why?
/modernize-brief
# 2. Inventory the legacy code
/modernize-assess
# 3. Extract business rules before touching the code
/modernize-extract-rules
# 4. Map legacy structure to target
/modernize-map
# 5. Propose the target design and review it
/modernize-reimagine
# 6. Transform module by module
/modernize-transform
# 7. Harden: security, tests, observability
/modernize-harden
```
## License
Apache 2.0. See `LICENSE`.

View File

@@ -1,36 +0,0 @@
---
name: architecture-critic
description: Reviews proposed target architectures and transformed code against modern best practice. Adversarial — looks for over-engineering, missed requirements, and simpler alternatives.
tools: Read, Glob, Grep, Bash
---
You are a principal engineer reviewing a modernization design or a freshly
transformed module. Your default stance is **skeptical**. The team is excited
about the new shiny; your job is to ask "do we actually need this?"
## Review lens
For **architecture proposals**:
- Does every service boundary correspond to a real domain seam, or is this
microservices-for-the-resume?
- What's the simplest design that meets the stated requirements? How does
the proposal compare?
- Which non-functional requirements (latency, throughput, consistency) are
unstated, and does the design accidentally violate them?
- What's the data migration story? "We'll figure it out" is a finding.
- What happens when service X is down? Trace one failure mode end-to-end.
For **transformed code**:
- Is this idiomatic for the target stack, or is legacy structure leaking
through? (Flag "JOBOL" — procedural Java with COBOL variable names.)
- Is error handling meaningful or ceremonial?
- Are there abstractions with exactly one implementation and no second use
case in sight?
- Does the test suite actually pin behavior, or just exercise code paths?
- What would the on-call engineer need at 3am that isn't here?
## Output
Findings ranked **Blocker / High / Medium / Nit**. Each with: what, where,
why it matters, and a concrete suggested change. End with one paragraph:
"If I could only change one thing, it would be ___."

View File

@@ -1,46 +0,0 @@
---
name: business-rules-extractor
description: Mines domain logic, calculations, validations, and policies from legacy code into testable Given/When/Then specifications. Use when you need to separate "what the business requires" from "how the old code happened to implement it."
tools: Read, Glob, Grep, Bash
---
You are a business analyst who reads code. Your job is to find the **rules**
hidden inside legacy systems — the calculations, thresholds, eligibility
checks, and policies that define how the business actually operates — and
express them in a form that survives the rewrite.
## What counts as a business rule
- **Calculations**: interest, fees, taxes, discounts, scores, aggregates
- **Validations**: required fields, format checks, range limits, cross-field
- **Eligibility / authorization**: who can do what, when, under which conditions
- **State transitions**: status lifecycles, what triggers each transition
- **Policies**: retention periods, retry limits, cutoff times, rounding rules
## What does NOT count
Infrastructure, logging, error handling, UI layout, technical retries,
connection pooling. If a rule would be the same regardless of what language
the system was written in, it's a business rule. If it only exists because
of the technology, skip it.
## Extraction discipline
1. Find the rule in code. Record exact `file:line-line`.
2. State it in plain English a non-engineer would recognize.
3. Encode it as Given/When/Then with **concrete values**:
```
Given an account with balance $1,250.00 and APR 18.5%
When the monthly interest batch runs
Then the interest charged is $19.27 (balance × APR ÷ 12, rounded half-up to cents)
```
4. List the parameters (rates, limits, magic numbers) with their current
hardcoded values — these often need to become configuration.
5. Rate your confidence: **High** (logic is explicit), **Medium** (inferred
from structure/names), **Low** (ambiguous; needs SME).
6. If confidence < High, write the exact question an SME must answer.
## Output format
One "Rule Card" per rule (see the format in the modernize:extract-rules
command). Group by category. Lead with a summary table.

View File

@@ -1,39 +0,0 @@
---
name: legacy-analyst
description: Deep-reads legacy codebases (COBOL, Java, .NET, Node, anything) to build structural and behavioral understanding. Use for discovery, dependency mapping, dead-code detection, and "what does this system actually do" questions.
tools: Read, Glob, Grep, Bash
---
You are a senior legacy systems analyst with 20 years of experience reading
code nobody else wants to read — COBOL, JCL, RPG, classic ASP, EJB 2,
Struts 1, raw servlets, Perl CGI.
Your job is **understanding, not judgment**. The code in front of you kept a
business running for decades. Treat it with respect, figure out what it does,
and explain it in terms a modern engineer can act on.
## How you work
- **Read before you grep.** Open the entry points (main programs, JCL jobs,
controllers, routes) and trace the actual flow. Pattern-matching on names
lies; control flow doesn't.
- **Cite everything.** Every claim gets a `path/to/file:line` reference.
If you can't point to a line, you don't know it — say so.
- **Distinguish "is" from "appears to be."** When you're inferring intent
from structure, flag it: "appears to handle X (inferred from variable
names; no comments confirm)."
- **Use the right vocabulary for the stack.** COBOL has paragraphs,
copybooks, and FD entries. CICS has transactions and BMS maps. JCL has
steps and DD statements. Java has packages and beans. Use the native
terms so SMEs trust your output.
- **Find the data first.** In legacy systems, the data structures (copybooks,
DDL, schemas) are usually more stable and truthful than the procedural
code. Map the data, then map who touches it.
- **Note what's missing.** Unhandled error paths, TODO comments, commented-out
blocks, magic numbers — these are signals about history and risk.
## Output format
Default to structured markdown: tables for inventories, Mermaid for graphs,
bullet lists for findings. Always include a "Confidence & Gaps" footer
listing what you couldn't determine and what you'd ask an SME.

View File

@@ -1,47 +0,0 @@
---
name: security-auditor
description: Adversarial security reviewer — OWASP Top 10, CWE, dependency CVEs, secrets, injection. Use for security debt scanning and pre-modernization hardening.
tools: Read, Glob, Grep, Bash
---
You are an application security engineer performing an adversarial review.
Assume the code is hostile until proven otherwise. Your job is to find
vulnerabilities a real attacker would find — and explain them in terms an
engineer can fix.
## Coverage checklist
Work through systematically:
- **Injection** (SQL, NoSQL, OS command, LDAP, XPath, template) — trace every
user-controlled input to every sink
- **Authentication / session** — hardcoded creds, weak session handling,
missing auth checks on sensitive routes
- **Sensitive data exposure** — secrets in source, weak crypto, PII in logs
- **Access control** — IDOR, missing ownership checks, privilege escalation paths
- **XSS / CSRF** — unescaped output, missing tokens
- **Insecure deserialization** — pickle/yaml.load/ObjectInputStream on
untrusted data
- **Vulnerable dependencies** — run `npm audit` / `pip-audit` /
read manifests and flag versions with known CVEs
- **SSRF / path traversal / open redirect**
- **Security misconfiguration** — debug mode, verbose errors, default creds
## Tooling
Use available SAST where it helps (npm audit, pip-audit, grep for known-bad
patterns) but **read the code** — tools miss logic flaws. Show tool output
verbatim, then add your manual findings.
## Reporting standard
For each finding:
| Field | Content |
|---|---|
| **ID** | SEC-NNN |
| **CWE** | CWE-XXX with name |
| **Severity** | Critical / High / Medium / Low (CVSS-ish reasoning) |
| **Location** | `file:line` |
| **Exploit scenario** | One sentence: how an attacker uses this |
| **Fix** | Concrete code-level remediation |
No hand-waving. If you can't write the exploit scenario, downgrade severity.

View File

@@ -1,36 +0,0 @@
---
name: test-engineer
description: Writes characterization, contract, and equivalence tests that pin down legacy behavior so transformation can be proven correct. Use before any rewrite.
tools: Read, Write, Edit, Glob, Grep, Bash
---
You are a test engineer specializing in **characterization testing**
writing tests that capture what legacy code *actually does* (not what
someone thinks it should do) so that a rewrite can be proven equivalent.
## Principles
- **The legacy code is the oracle.** If the legacy computes 19.27 and the
spec says 19.28, the test asserts 19.27 and you flag the discrepancy
separately. We're proving equivalence first; fixing bugs is a separate
decision.
- **Concrete over abstract.** Every test has literal input values and literal
expected outputs. No "should calculate correctly" — instead "given balance
1250.00 and APR 18.5%, returns 19.27".
- **Cover the edges the legacy covers.** Read the legacy code's branches.
Every IF/EVALUATE/switch arm gets at least one test case. Boundary values
(zero, negative, max, empty) get explicit cases.
- **Tests must run against BOTH.** Structure tests so the same inputs can be
fed to the legacy implementation (or a recorded trace of it) and the modern
one. The test harness compares.
- **Executable, not aspirational.** Tests compile and run from day one.
Behaviors not yet implemented in the target are marked
`@Disabled("pending RULE-NNN")` / `@pytest.mark.skip` / `it.todo()` — never
deleted.
## Output
Idiomatic tests for the requested target stack (JUnit 5 / pytest / Vitest /
xUnit), one test class/file per legacy module, test method names that read
as specifications. Include a `README.md` in the test directory explaining
how to run them and how to add a new case.

View File

@@ -1,142 +0,0 @@
---
description: Full discovery & portfolio analysis of a legacy system — inventory, complexity, debt, effort estimation
argument-hint: <system-dir> | --portfolio <parent-dir>
---
**Mode select.** If `$ARGUMENTS` starts with `--portfolio`, run **Portfolio
mode** against the directory that follows. Otherwise run **Single-system
mode** against `legacy/$1`.
---
# Portfolio mode (`--portfolio <parent-dir>`)
Sweep every immediate subdirectory of the parent dir and produce a
heat-map a steering committee can use to sequence a multi-year program.
## Step P1 — Per-system metrics
For each subdirectory `<sys>`:
```bash
cloc --quiet --csv <parent>/<sys> # LOC by language
lizard -s cyclomatic_complexity <parent>/<sys> 2>/dev/null | tail -1
```
Capture: total SLOC, dominant language, file count, mean & max
cyclomatic complexity (CCN). For dependency freshness, locate the
manifest (`package.json`, `pom.xml`, `*.csproj`, `requirements*.txt`,
copybook dir) and note its age / pinned-version count.
## Step P2 — COCOMO-II effort
Compute person-months per system using COCOMO-II basic:
`PM = 2.94 × (KSLOC)^1.10` (nominal scale factors). Show the formula and
inputs so the figure is defensible, not a guess.
## Step P3 — Documentation coverage
For each system, count source files with vs without a header comment
block, and list architecture docs present (`README`, `docs/`, ADRs).
Report coverage % and the top undocumented subsystems.
## Step P4 — Render the heat-map
Write `analysis/portfolio.html` (dark `#1e1e1e` bg, `#d4d4d4` text,
`#cc785c` accent, system-ui font, all CSS inline). One row per system;
columns: **System · Lang · KSLOC · Files · Mean CCN · Max CCN · Dep
Freshness · Doc Coverage % · COCOMO PM · Risk**. Color-grade the PM and
Risk cells (green→amber→red). Below the table, a 2-3 sentence
sequencing recommendation: which system first and why.
Then stop. Tell the user to open `analysis/portfolio.html`.
---
# Single-system mode
Perform a complete **modernization assessment** of `legacy/$1`.
This is the discovery phase — the goal is a fact-grounded executive brief that
a VP of Engineering could take into a budget meeting. Work in this order:
## Step 1 — Quantitative inventory
Run and show the output of:
```bash
scc legacy/$1
```
Then run `scc --by-file -s complexity legacy/$1 | head -25` to identify the
highest-complexity files. Capture the COCOMO effort/cost estimate scc provides.
## Step 2 — Technology fingerprint
Identify, with file evidence:
- Languages, frameworks, and runtime versions in use
- Build system and dependency manifest locations
- Data stores (schemas, copybooks, DDL, ORM configs)
- Integration points (queues, APIs, batch interfaces, screen maps)
- Test presence and approximate coverage signal
## Step 3 — Parallel deep analysis
Spawn three subagents **concurrently** using the Task tool:
1. **legacy-analyst** — "Build a structural map of legacy/$1: what are the
5-10 major functional domains, which source files belong to each, and how
do they depend on each other? Return a markdown table + a Mermaid
`graph TD` of domain-level dependencies. Cite file paths."
2. **legacy-analyst** — "Identify technical debt in legacy/$1: dead code,
deprecated APIs, copy-paste duplication, god objects/programs, missing
error handling, hardcoded config. Return the top 10 findings ranked by
remediation value, each with file:line evidence."
3. **security-auditor** — "Scan legacy/$1 for security vulnerabilities:
injection, auth weaknesses, hardcoded secrets, vulnerable dependencies,
missing input validation. Return findings in CWE-tagged table form with
file:line evidence and severity."
Wait for all three. Synthesize their findings.
## Step 4 — Production runtime overlay (observability)
If the system has batch jobs (e.g. JCL members under `app/jcl/`), call the
`observability` MCP tool `get_batch_runtimes` for each business-relevant
job name (interest, posting, statement, reporting). Use the returned
p50/p95/p99 and 90-day series to:
- Tag each functional domain from Step 3 with its production wall-clock
cost and **p99 variance** (p99/p50 ratio).
- Flag the highest-variance domain as the highest operational risk —
this is telemetry-grounded, not a static-analysis opinion.
Include a small **Batch Runtime** table (Job · Domain · p50 · p95 · p99 ·
p99/p50) in the assessment.
## Step 5 — Documentation gap analysis
Compare what the code *does* against what README/docs/comments *say*. List
the top 5 undocumented behaviors or subsystems that a new engineer would
need explained.
## Step 6 — Write the assessment
Create `analysis/$1/ASSESSMENT.md` with these sections:
- **Executive Summary** (3-4 sentences: what it is, how big, how risky, headline recommendation)
- **System Inventory** (the scc table + tech fingerprint)
- **Architecture-at-a-Glance** (the domain table; reference the diagram)
- **Production Runtime Profile** (the batch-runtime table from Step 4, with the highest-variance domain called out)
- **Technical Debt** (top 10, ranked)
- **Security Findings** (CWE table)
- **Documentation Gaps** (top 5)
- **Effort Estimation** (COCOMO-derived person-months, ±range, key cost drivers)
- **Recommended Modernization Pattern** (one of: Rehost / Replatform / Refactor / Rearchitect / Rebuild / Replace — with one-paragraph rationale)
Also create `analysis/$1/ARCHITECTURE.mmd` containing the Mermaid domain
dependency diagram from the legacy-analyst.
## Step 7 — Present
Tell the user the assessment is ready and suggest:
`glow -p analysis/$1/ASSESSMENT.md`

View File

@@ -1,60 +0,0 @@
---
description: Generate a phased Modernization Brief — the approved plan that transformation agents will execute against
argument-hint: <system-dir> [target-stack]
---
Synthesize everything in `analysis/$1/` into a **Modernization Brief** — the
single document a steering committee approves and engineering executes.
Target stack: `$2` (if blank, recommend one based on the assessment findings).
Read `analysis/$1/ASSESSMENT.md`, `TOPOLOGY.md`, and `BUSINESS_RULES.md` first.
If any are missing, say so and stop.
## The Brief
Write `analysis/$1/MODERNIZATION_BRIEF.md`:
### 1. Objective
One paragraph: from what, to what, why now.
### 2. Target Architecture
Mermaid C4 Container diagram of the *end state*. Name every service, data
store, and integration. Below it, a table mapping legacy component → target
component(s).
### 3. Phased Sequence
Break the work into 3-6 phases using **strangler-fig ordering** — lowest-risk,
fewest-dependencies first. For each phase:
- Scope (which legacy modules, which target services)
- Entry criteria (what must be true to start)
- Exit criteria (what tests/metrics prove it's done)
- Estimated effort (person-weeks, derived from COCOMO + complexity data)
- Risk level + top 2 risks + mitigation
Render the phases as a Mermaid `gantt` chart.
### 4. Behavior Contract
List the **P0 behaviors** from BUSINESS_RULES.md that MUST be proven
equivalent before any phase ships. These become the regression suite.
### 5. Validation Strategy
State which combination applies: characterization tests, contract tests,
parallel-run / dual-execution diff, property-based tests, manual UAT.
Justify per phase.
### 6. Open Questions
Anything requiring human/SME decision before Phase 1 starts. Each as a
checkbox the approver must tick.
### 7. Approval Block
```
Approved by: ________________ Date: __________
Approval covers: Phase 1 only | Full plan
```
## Present
Enter **plan mode** and present a summary of the brief. Do NOT proceed to any
transformation until the user explicitly approves. This gate is the
human-in-the-loop control point.

View File

@@ -1,68 +0,0 @@
---
description: Mine business logic from legacy code into testable, human-readable rule specifications
argument-hint: <system-dir> [module-pattern]
---
Extract the **business rules** embedded in `legacy/$1` into a structured,
testable specification — the institutional knowledge that's currently locked
in code and in the heads of engineers who are about to retire.
Scope: if a module pattern was given (`$2`), focus there; otherwise cover the
entire system. Either way, prioritize calculation, validation, eligibility,
and state-transition logic over plumbing.
## Method
Spawn **three business-rules-extractor subagents in parallel**, each assigned
a different lens. If `$2` is non-empty, include "focusing on files matching
$2" in each prompt.
1. **Calculations** — "Find every formula, rate, threshold, and computed value
in legacy/$1. For each: what does it compute, what are the inputs, what is
the exact formula/algorithm, where is it implemented (file:line), and what
edge cases does the code handle?"
2. **Validations & eligibility** — "Find every business validation, eligibility
check, and guard condition in legacy/$1. For each: what is being checked,
what happens on pass/fail, where is it (file:line)?"
3. **State & lifecycle** — "Find every status field, state machine, and
lifecycle transition in legacy/$1. For each entity: what states exist,
what triggers transitions, what side-effects fire?"
## Synthesize
Merge the three result sets. Deduplicate. For each distinct rule, write a
**Rule Card** in this exact format:
```
### RULE-NNN: <plain-English name>
**Category:** Calculation | Validation | Lifecycle | Policy
**Source:** `path/to/file.ext:line-line`
**Plain English:** One sentence a business analyst would recognize.
**Specification:**
Given <precondition>
When <trigger>
Then <outcome>
[And <additional outcome>]
**Parameters:** <constants, rates, thresholds with their current values>
**Edge cases handled:** <list>
**Confidence:** High | Medium | Low — <why>
```
Write all rule cards to `analysis/$1/BUSINESS_RULES.md` with:
- A summary table at top (ID, name, category, source, confidence)
- Rule cards grouped by category
- A final **"Rules requiring SME confirmation"** section listing every
Medium/Low confidence rule with the specific question a human needs to answer
## Generate the DTO catalog
As a companion, create `analysis/$1/DATA_OBJECTS.md` cataloging the core
data transfer objects / records / entities: name, fields with types, which
rules consume/produce them, source location.
## Present
Report: total rules found, breakdown by category, count needing SME review.
Suggest: `glow -p analysis/$1/BUSINESS_RULES.md`

View File

@@ -1,46 +0,0 @@
---
description: Security vulnerability scan + remediation — OWASP, CVE, secrets, injection
argument-hint: <system-dir>
---
Run a **security hardening pass** on `legacy/$1`: find vulnerabilities, rank
them, and fix the critical ones.
## Scan
Spawn the **security-auditor** subagent:
"Adversarially audit legacy/$1 for security vulnerabilities. Cover:
OWASP Top 10 (injection, broken auth, XSS, SSRF, etc.), hardcoded secrets,
vulnerable dependency versions (check package manifests against known CVEs),
missing input validation, insecure deserialization, path traversal.
For each finding return: CWE ID, severity (Critical/High/Med/Low), file:line,
one-sentence exploit scenario, and recommended fix. Also run any available
SAST tooling (npm audit, pip-audit, OWASP dependency-check) and include
its raw output."
## Triage
Write `analysis/$1/SECURITY_FINDINGS.md`:
- Summary scorecard (count by severity, top CWE categories)
- Findings table sorted by severity
- Dependency CVE table (package, installed version, CVE, fixed version)
## Remediate
For each **Critical** and **High** finding, fix it directly in the source.
Make minimal, targeted changes. After each fix, add a one-line entry under
"Remediation Log" in SECURITY_FINDINGS.md: finding ID → commit-style summary
of what changed.
Show the cumulative diff:
```bash
git -C legacy/$1 diff
```
## Verify
Re-run the security-auditor against the patched code to confirm the
Critical/High findings are resolved. Update the scorecard with before/after.
Suggest: `glow -p analysis/$1/SECURITY_FINDINGS.md`

View File

@@ -1,66 +0,0 @@
---
description: Dependency & topology mapping — call graphs, data lineage, batch flows, rendered as navigable diagrams
argument-hint: <system-dir>
---
Build a **dependency and topology map** of `legacy/$1` and render it visually.
The assessment gave us domains. Now go one level deeper: how do the *pieces*
connect? This is the map an engineer needs before touching anything.
## What to produce
Write a one-off analysis script (Python or shell — your choice) that parses
the source under `legacy/$1` and extracts:
- **Program/module call graph** — who calls whom (for COBOL: `CALL` statements
and CICS `LINK`/`XCTL`; for Java: class-level imports/invocations; for Node:
`require`/`import`)
- **Data dependency graph** — which programs read/write which data stores
(COBOL: copybooks + VSAM/DB2 in JCL DD statements; Java: JPA entities/tables;
Node: model files)
- **Entry points** — batch jobs, transaction IDs, HTTP routes, CLI commands
- **Dead-end candidates** — modules with no inbound edges (potential dead code)
Save the script as `analysis/$1/extract_topology.py` (or `.sh`) so it can be
re-run and audited. Run it. Show the raw output.
## Render
From the extracted data, generate **three Mermaid diagrams** and write them
to `analysis/$1/TOPOLOGY.html` so the artifact pane renders them live.
The HTML page must use: dark `#1e1e1e` background, `#d4d4d4` text,
`#cc785c` for `<h2>`/accents, `system-ui` font, all CSS **inline** (no
external stylesheets). Each diagram goes in a
`<pre class="mermaid">...</pre>` block — the artifact server loads
mermaid.js and renders client-side. Do **not** wrap diagrams in
markdown ` ``` ` fences inside the HTML.
1. **`graph TD` — Module call graph.** Cluster by domain (use `subgraph`).
Highlight entry points in a distinct style. Cap at ~40 nodes — if larger,
show domain-level with one expanded domain.
2. **`graph LR` — Data lineage.** Programs → data stores.
Mark read vs write edges.
3. **`flowchart TD` — Critical path.** Trace ONE end-to-end business flow
(e.g., "monthly billing run" or "process payment") through every program
and data store it touches, in execution order. If the `observability`
MCP server is connected, annotate each batch step with its p50/p99
wall-clock from `get_batch_runtimes`.
Also export the three diagrams as standalone `.mmd` files for re-use:
`analysis/$1/call-graph.mmd`, `analysis/$1/data-lineage.mmd`,
`analysis/$1/critical-path.mmd`.
## Annotate
Below each `<pre class="mermaid">` block in TOPOLOGY.html, add a `<ul>`
with 3-5 **architect observations**: tight coupling clusters, single
points of failure, candidates for service extraction, data stores
touched by too many writers.
## Present
Tell the user to open `analysis/$1/TOPOLOGY.html` in the artifact pane.

View File

@@ -1,82 +0,0 @@
---
description: Multi-agent greenfield rebuild — extract specs from legacy, design AI-native, scaffold & validate with HITL
argument-hint: <system-dir> <target-vision>
---
**Reimagine** `legacy/$1` as: $2
This is not a port — it's a rebuild from extracted intent. The legacy system
becomes the *specification source*, not the structural template. This command
orchestrates a multi-agent team with explicit human checkpoints.
## Phase A — Specification mining (parallel agents)
Spawn concurrently and show the user that all three are running:
1. **business-rules-extractor** — "Extract every business rule from legacy/$1
into Given/When/Then form. Output to a structured list I can parse."
2. **legacy-analyst** — "Catalog every external interface of legacy/$1:
inbound (screens, APIs, batch triggers, queues) and outbound (reports,
files, downstream calls, DB writes). For each: name, direction, payload
shape, frequency/SLA if discernible."
3. **legacy-analyst** — "Identify the core domain entities in legacy/$1 and
their relationships. Return as an entity list + Mermaid erDiagram."
Collect results. Write `analysis/$1/AI_NATIVE_SPEC.md` containing:
- **Capabilities** (what the system must do — derived from rules + interfaces)
- **Domain Model** (entities + erDiagram)
- **Interface Contracts** (each external interface as an OpenAPI fragment or
AsyncAPI fragment)
- **Non-functional requirements** inferred from legacy (batch windows, volumes)
- **Behavior Contract** (the Given/When/Then rules — these are the acceptance tests)
## Phase B — HITL checkpoint #1
Present the spec summary. Ask the user **one focused question**: "Which of
these capabilities are P0 for the reimagined system, and are there any we
should deliberately drop?" Wait for the answer. Record it in the spec.
## Phase C — Architecture (single agent, then critique)
Design the target architecture for "$2":
- Mermaid C4 Container diagram
- Service boundaries with rationale (which rules/entities live where)
- Technology choices with one-line justification each
- Data migration approach from legacy stores
Then spawn **architecture-critic**: "Review this proposed architecture for
$2 against the spec in analysis/$1/AI_NATIVE_SPEC.md. Identify over-engineering,
missed requirements, scaling risks, and simpler alternatives." Incorporate
the critique. Write the result to `analysis/$1/REIMAGINED_ARCHITECTURE.md`.
## Phase D — HITL checkpoint #2
Enter plan mode. Present the architecture. Wait for approval.
## Phase E — Parallel scaffolding
For each service in the approved architecture (cap at 3 for the demo), spawn
a **general-purpose agent in parallel**:
"Scaffold the <service-name> service per analysis/$1/REIMAGINED_ARCHITECTURE.md
and AI_NATIVE_SPEC.md. Create: project skeleton, domain model, API stubs
matching the interface contracts, and **executable acceptance tests** for every
behavior-contract rule assigned to this service (mark unimplemented ones as
expected-failure/skip with the rule ID). Write to modernized/$1-reimagined/<service-name>/."
Show the agents' progress. When all complete, run the acceptance test suites
and report: total tests, passing (scaffolded behavior), pending (rule IDs
awaiting implementation).
## Phase F — Knowledge graph handoff
Write `modernized/$1-reimagined/CLAUDE.md` — the persistent context file for
the new system, containing: architecture summary, service responsibilities,
where the spec lives, how to run tests, and the legacy→modern traceability
map. This file IS the knowledge graph that future agents and engineers will
load.
Report: services scaffolded, acceptance tests defined, % behaviors with a
home, location of all artifacts.

View File

@@ -1,78 +0,0 @@
---
description: Transform one legacy module to the target stack — idiomatic rewrite with behavior-equivalence tests
argument-hint: <system-dir> <module> <target-stack>
---
Transform `legacy/$1` module **`$2`** into **$3**, with proof of behavioral
equivalence.
This is a surgical, single-module transformation — one vertical slice of the
strangler fig. Output goes to `modernized/$1/$2/`.
## Step 0 — Plan (HITL gate)
Read the source module and any business rules in `analysis/$1/BUSINESS_RULES.md`
that reference it. Then **enter plan mode** and present:
- Which source files are in scope
- The target module structure (packages/classes/files you'll create)
- Which business rules / behaviors this module implements
- How you'll prove equivalence (test strategy)
- Anything ambiguous that needs a human decision NOW
Wait for approval before writing any code.
## Step 1 — Characterization tests FIRST
Before writing target code, spawn the **test-engineer** subagent:
"Write characterization tests for legacy/$1 module $2. Read the source,
identify every observable behavior, and encode each as a test case with
concrete input → expected output pairs derived from the legacy logic.
Target framework: <appropriate for $3>. Write to
`modernized/$1/$2/src/test/`. These tests define 'done' — the new code
must pass all of them."
Show the user the test file. Get a 👍 before proceeding.
## Step 2 — Idiomatic transformation
Write the target implementation in `modernized/$1/$2/src/main/`.
**Critical:** Write code a senior $3 engineer would write from the
*specification*, not from the legacy structure. Do NOT mirror COBOL paragraphs
as methods, do NOT preserve legacy variable names like `WS-TEMP-AMT-X`.
Use the target language's idioms: records/dataclasses, streams, dependency
injection, proper error types, etc.
Include: domain model, service logic, API surface (REST controller or
equivalent), and configuration. Add concise Javadoc/docstrings linking each
class back to the rule IDs it implements.
## Step 3 — Prove it
Run the characterization tests:
```bash
cd modernized/$1/$2 && <appropriate test command for $3>
```
Show the output. If anything fails, fix and re-run until green.
## Step 4 — Side-by-side review
Generate `modernized/$1/$2/TRANSFORMATION_NOTES.md`:
- Mapping table: legacy file:lines → target file:lines, per behavior
- Deliberate deviations from legacy behavior (with rationale)
- What was NOT migrated (dead code, unreachable branches) and why
- Follow-ups for the next module that depends on this one
Then show a visual diff of one representative behavior, legacy vs modern:
```bash
delta --side-by-side <(sed -n '<lines>p' legacy/$1/<file>) modernized/$1/$2/src/main/<file>
```
## Step 5 — Architecture review
Spawn the **architecture-critic** subagent to review the transformed code
against $3 best practices. Apply any HIGH-severity feedback; list the rest
in TRANSFORMATION_NOTES.md.
Report: tests passing, lines of legacy retired, location of artifacts.

View File

@@ -1,6 +1,6 @@
---
name: conversation-analyzer
description: Use this agent when analyzing conversation transcripts to find behaviors worth preventing with hooks. Typical triggers include the /hookify command being invoked without arguments, or the user explicitly asking to look back at the current conversation and surface mistakes that should be prevented in the future. See "When to invoke" in the agent body for worked scenarios.
description: Use this agent when analyzing conversation transcripts to find behaviors worth preventing with hooks. Examples: <example>Context: User is running /hookify command without arguments\nuser: "/hookify"\nassistant: "I'll analyze the conversation to find behaviors you want to prevent"\n<commentary>The /hookify command without arguments triggers conversation analysis to find unwanted behaviors.</commentary></example><example>Context: User wants to create hooks from recent frustrations\nuser: "Can you look back at this conversation and help me create hooks for the mistakes you made?"\nassistant: "I'll use the conversation-analyzer agent to identify the issues and suggest hooks."\n<commentary>User explicitly asks to analyze conversation for mistakes that should be prevented.</commentary></example>
model: inherit
color: yellow
tools: ["Read", "Grep"]
@@ -8,15 +8,6 @@ tools: ["Read", "Grep"]
You are a conversation analysis specialist that identifies problematic behaviors in Claude Code sessions that could be prevented with hooks.
## When to invoke
Two representative scenarios:
- **Scenario A — `/hookify` invoked with no arguments.** Treat the bare `/hookify` invocation as a request to analyze the current conversation and surface unwanted behaviors. Respond by saying you'll analyze the conversation, then run the analysis described below.
- **Scenario B — User asks to learn from recent frustrations.** When the user asks (in their own words) to look back over the conversation and create hooks for mistakes that were made, run the same analysis and propose hook rules for the issues found.
**Your Core Responsibilities:**
1. Read and analyze user messages to find frustration signals
2. Identify specific tool usage patterns that caused issues

View File

@@ -14,15 +14,10 @@ The UI layer is **additive**. Under the hood it's still tools, resources, and th
## Claude host specifics
| `_meta.ui.*` key | Where | Effect |
|---|---|---|
| `resourceUri` | tool | Which `ui://` resource the host renders for this tool's results. |
| `visibility: ["app"]` | tool | Hide a widget-only helper tool (e.g. geometry/image fetcher called via `callServerTool`) from Claude's tool list. |
| `prefersBorder: false` | resource | Drop the host's outer card border (mobile). |
| `csp.{connectDomains, resourceDomains, baseUriDomains}` | resource | Declare external origins; default is block-all. `frameDomains` is currently restricted in Claude. |
- `_meta.ui.prefersBorder: false` on a `ui://` resource removes the outer card border (mobile).
- `hostContext.safeAreaInsets: {top, right, bottom, left}` (px) — honor these for notches and the composer overlay.
- Directory submission requires OAuth or **authless** (`none`)static bearer is private-deploy only and blocks listing — plus tool `annotations` and 35 PNG screenshots; see `references/directory-checklist.md`.
- `_meta.ui.csp.{connectDomains, resourceDomains, baseUriDomains}`declare external origins per resource; default is block-all. `frameDomains` is currently restricted in Claude.
- Directory submission for MCP Apps requires 35 PNG screenshots, ≥1000px wide, cropped to the app response only (no prompt in the image). See https://claude.com/docs/connectors/building/submission#asset-specifications.
---
@@ -109,7 +104,6 @@ const server = new McpServer({ name: "contacts", version: "1.0.0" });
// 1. The tool — returns DATA, declares which UI to show
registerAppTool(server, "pick_contact", {
description: "Open an interactive contact picker",
annotations: { title: "Pick Contact", readOnlyHint: true },
inputSchema: { filter: z.string().optional() },
_meta: { ui: { resourceUri: "ui://widgets/contact-picker.html" } },
}, async ({ filter }) => {
@@ -178,10 +172,7 @@ The `/*__EXT_APPS_BUNDLE__*/` placeholder gets replaced by the server at startup
| `app.updateModelContext({...})` | Widget → host | Update context silently (no visible message) |
| `app.callServerTool({name, arguments})` | Widget → server | Call another tool on your server |
| `app.openLink({url})` | Widget → host | Open a URL in a new tab (sandbox blocks `window.open`) |
| `app.getHostContext()` / `app.onhostcontextchanged` | Host → widget | Theme, host CSS vars, `containerDimensions`, `displayMode`, `deviceCapabilities` |
| `app.requestDisplayMode({mode})` | Widget → host | Ask for `inline` / `pip` / `fullscreen` |
| `app.downloadFile({name, mimeType, content})` | Widget → host | Host-mediated download (base64 content) |
| `new App(info, caps, {autoResize: true})` | — | Iframe height tracks rendered content |
| `app.getHostContext()` / `app.onhostcontextchanged` | Host → widget | Theme (`light`/`dark`), locale, etc. |
`sendMessage` is the typical "user picked something, tell Claude" path. `updateModelContext` is for state that Claude should know about but shouldn't clutter the chat. `openLink` is **required** for any outbound navigation — `window.open` and `<a target="_blank">` are blocked by the sandbox attribute.
@@ -234,7 +225,6 @@ const pickerHtml = readFileSync("./widgets/picker.html", "utf8")
registerAppTool(server, "pick_contact", {
description: "Open an interactive contact picker. User selects one contact.",
annotations: { title: "Pick Contact", readOnlyHint: true },
inputSchema: { filter: z.string().optional().describe("Name/email prefix filter") },
_meta: { ui: { resourceUri: "ui://widgets/picker.html" } },
}, async ({ filter }) => {
@@ -358,24 +348,6 @@ Desktop caches UI resources aggressively. After editing widget HTML, **fully qui
The `sleep` keeps stdin open long enough to collect all responses. Parse the jsonl output with `jq` or a Python one-liner.
**Widget dev loop** — avoid the ⌘Q-relaunch cycle entirely by serving the inlined widget HTML at a plain GET route with a fake `ExtApps` shim that fires `ontoolresult` from a query param:
```ts
app.get("/widget-preview", (_req, res) => {
const shim = `globalThis.ExtApps={applyHostStyleVariables:()=>{},App:class{
constructor(){this.h={}} ontoolresult;onhostcontextchanged;
async connect(){const p=new URLSearchParams(location.search).get("payload");
if(p)this.ontoolresult?.({content:[{type:"text",text:p}]});}
getHostContext(){return{theme:"light"}}
sendMessage(m){console.log("sendMessage",m)} updateModelContext(){}
callServerTool(){return Promise.resolve({content:[]})} openLink(){} downloadFile(){}
}};`;
res.type("html").send(widgetHtml.replace("/*__EXT_APPS_BUNDLE__*/", shim));
});
```
Open `http://localhost:3000/widget-preview?payload={"rows":[...]}` in a normal browser tab and iterate with ordinary devtools.
**Host fallback** — use a host without the apps surface (or MCP Inspector) and confirm the tool's text content degrades gracefully.
**CSP debugging** — open the iframe's own devtools console. CSP violations are the #1 reason widgets silently fail (blank rectangle, no error in the main console). See `references/iframe-sandbox.md`.
@@ -384,9 +356,6 @@ Open `http://localhost:3000/widget-preview?payload={"rows":[...]}` in a normal b
## Reference files
- `references/iframe-sandbox.md` — CSP/sandbox constraints, the bundle-inlining pattern, image handling, host theming
- `references/iframe-sandbox.md` — CSP/sandbox constraints, the bundle-inlining pattern, image handling
- `references/widget-templates.md` — reusable HTML scaffolds for picker / confirm / progress / display
- `references/apps-sdk-messages.md` — the `App` class API: widget ↔ host ↔ server messaging, lifecycle & supersession
- `references/payload-budgeting.md` — host tool-result size caps, prune-then-truncate, heavy assets via `callServerTool`
- `references/abuse-protection.md` — Anthropic egress CIDRs, tiered rate limiting, `trust proxy`, response caching
- `references/directory-checklist.md` — pre-flight for connector-directory submission
- `references/apps-sdk-messages.md` — the `App` class API: widget ↔ host ↔ server messaging

View File

@@ -1,60 +0,0 @@
# Abuse protection for authless hosted servers
An authless StreamableHTTP server is reachable by anything on the internet.
There are three resources to protect: your compute, any upstream API quota
your tools consume, and egress bandwidth for large `callServerTool` payloads.
## You don't get a per-user identity
In authless mode there is no token and stateless transport gives no session
ID. Traffic from claude.ai is proxied through Anthropic's egress — every web
user arrives from the same small set of IPs:
```
160.79.104.0/21
2607:6bc0::/48
```
(See https://platform.claude.com/docs/en/api/ip-addresses.)
Claude Desktop, Claude Code, and other hosts connect **directly from the
user's machine**, so those *do* have distinct per-user IPs. Per-IP limiting
therefore works for direct-connect clients; for claude.ai you can only limit
the aggregate Anthropic pool. If true per-user limits matter, that's the
trigger to add OAuth.
## Tiered token-bucket (per-replica backstop)
```ts
const ANTHROPIC_CIDRS = ["160.79.104.0/21", "2607:6bc0::/48"];
const TIERS = {
anthropic: { capacity: 600, refillPerSec: 100 }, // shared pool
other: { capacity: 30, refillPerSec: 2 }, // per-IP
};
```
Match `req.ip` against the CIDRs, pick a bucket (`"anthropic"` or
`"ip:<addr>"`), 429 + `Retry-After` on exhaust. This is a per-replica
backstop — cross-replica enforcement belongs at the edge (Cloudflare, Cloud
Armor), which keeps the containers stateless.
## `trust proxy` must match your topology
`req.ip` only honours `X-Forwarded-For` if `app.set('trust proxy', N)` is
set. `true` trusts every hop, which lets a direct client send
`X-Forwarded-For: 160.79.108.42` and claim the Anthropic tier. Set it to the
exact number of trusted hops (e.g. `1` behind a single LB, `2` behind
Cloudflare → origin LB) and **never `true` in production**.
## Hard-allowlisting Anthropic IPs is a product decision
Blocking everything outside `160.79.104.0/21` locks out Desktop, Claude Code,
and every other MCP host. Use the CIDRs to **tier** rate limits, not to gate
access, unless claude.ai-only is an explicit goal.
## Cache upstream responses
For tools that wrap a third-party API, an in-process LRU keyed on the
normalized query (TTL hours, no secrets in the key) is the primary cost
control — repeat queries become free and absorb thundering-herd. Rate limits
are the safety net, not the first line.

View File

@@ -2,18 +2,6 @@
The `@modelcontextprotocol/ext-apps` package provides the `App` class (browser side) and `registerAppTool`/`registerAppResource` helpers (server side). Messaging is bidirectional and persistent.
## Construction
```js
const app = new App(
{ name: "MyWidget", version: "1.0.0" },
{}, // capabilities
{ autoResize: true }, // options
);
```
`autoResize: true` wires a `ResizeObserver` that emits `ui/notifications/size-changed` so the host iframe height tracks your rendered content. Without it the frame is fixed-height and tall renders get clipped — set it for any widget whose height depends on data.
---
## Widget → Host
@@ -75,26 +63,6 @@ card.querySelector("a").addEventListener("click", (e) => {
Host-mediated download (sandbox blocks direct `<a download>`). `content` is a base64 string.
```js
const csv = rows.map((r) => Object.values(r).join(",")).join("\n");
app.downloadFile({
name: "export.csv",
mimeType: "text/csv",
content: btoa(unescape(encodeURIComponent(csv))),
});
```
### `app.requestDisplayMode({ mode })`
Ask the host to switch the widget between `"inline"`, `"pip"`, or `"fullscreen"`. Check `getHostContext().availableDisplayModes` first; hide the control if the mode isn't offered. The host responds by firing `onhostcontextchanged` with new `displayMode` and `containerDimensions` — re-render at the new size.
```js
if (app.getHostContext()?.availableDisplayModes?.includes("fullscreen")) {
expandBtn.hidden = false;
expandBtn.onclick = () => app.requestDisplayMode({ mode: "fullscreen" });
}
```
---
## Host → Widget
@@ -116,22 +84,9 @@ app.ontoolresult = ({ content }) => {
Fires with the arguments Claude passed to the tool. Useful if the widget needs to know what was asked for (e.g., highlight the search term).
### `app.ontoolinputpartial = ({ arguments }) => {...}` / `app.ontoolcancelled = () => {...}`
`ontoolinputpartial` fires while Claude is still streaming arguments — use it to show a skeleton ("Preparing: <title>…") before the result lands. `ontoolcancelled` fires if the call is aborted; clear the skeleton.
### `app.getHostContext()` / `app.onhostcontextchanged = (ctx) => {...}`
Read and subscribe to host context. Call `getHostContext()` **after** `connect()`. Subscribe for live updates (user toggles dark mode, expands to fullscreen).
| `ctx.` field | Use |
|---|---|
| `theme` | `"light"` / `"dark"` — toggle a `.dark` class |
| `styles.variables` | Host CSS tokens — pass to `applyHostStyleVariables()` so colors/fonts match host chrome |
| `displayMode` / `availableDisplayModes` | Current mode and which `requestDisplayMode` targets are valid |
| `containerDimensions.{maxHeight,width}` | Size your render to this instead of hard-coded px |
| `deviceCapabilities.touch` | Switch hover-only affordances to tap (`pointerdown`) |
| `safeAreaInsets` | Padding for notches / composer overlay |
Read and subscribe to host context`theme` (`"light"` / `"dark"`), locale, etc. Call `getHostContext()` **after** `connect()`. Subscribe for live updates (user toggles dark mode mid-conversation).
```js
const applyTheme = (t) =>
@@ -174,36 +129,14 @@ No `{ notify }` destructure — `extra` is `RequestHandlerExtra`; progress goes
## Lifecycle
1. Claude calls a tool with `_meta.ui.resourceUri` declared
2. Host fetches the resource (your HTML) and mounts a **fresh iframe** for this call
2. Host fetches the resource (your HTML) and renders it in an iframe
3. Widget script runs, sets handlers, calls `await app.connect()`
4. Host pipes the tool's return value → `ontoolresult` fires
5. Widget renders, user interacts
6. Widget calls `sendMessage` / `updateModelContext` / `callServerTool` as needed
7. Iframe persists in the transcript; **the next call to the same tool mounts another iframe** alongside it
7. Widget persists until conversation context moves on — subsequent calls to the same tool reuse the iframe and fire `ontoolresult` again
There's no explicit "submit and close" — each instance is long-lived, but instances are not reused across calls.
### Supersession
Because earlier instances stay mounted, a click on a stale widget can `sendMessage` after a newer one has rendered. Detect this with a `BroadcastChannel` and make older instances inert:
```js
let superseded = false;
const seq = Date.now() + Math.random();
const bc = new BroadcastChannel("my-widget");
bc.onmessage = (e) => {
if (e.data?.seq > seq) {
superseded = true;
document.body.classList.add("superseded"); // opacity:.45; pointer-events:none
}
};
bc.postMessage({ seq });
// Guard outbound calls:
function safeSend(msg) {
if (!superseded) app.sendMessage(msg);
}
```
There's no explicit "submit and close" — the widget is a long-lived surface.
---

View File

@@ -1,18 +0,0 @@
# Connector-directory submission checklist
Pre-flight before submitting a remote MCP app to the Claude connector
directory. Each item is a hard review criterion.
| Area | Requirement |
|---|---|
| **Auth** | OAuth (DCR or CIMD) or **`none`** (authless). Static bearer tokens are private-deploy only and block listing. Authless is valid for public-data servers — the server holds any upstream API keys. |
| **Tool annotations** | Every tool sets `annotations.title` plus the relevant hints: `readOnlyHint: true` for fetch/search tools, `destructiveHint` / `idempotentHint` for writes, `openWorldHint: true` if the tool reaches an external system. |
| **Tool names** | ≤ 64 characters, snake/kebab case. |
| **Widget layout** | Inline height ≤ 500px, no nested scroll containers, 44pt minimum touch targets, WCAG-AA contrast in both themes. |
| **Theming** | `html, body { background: transparent }`, `<meta name="color-scheme" content="light dark">`, adopt host CSS tokens via `applyHostStyleVariables`. |
| **External links** | Use `app.openLink`. Declare each origin (e.g. `https://api.example.com`) in the connector's *Allowed link URIs* so the link skips the confirm modal. |
| **Helper tools** | Widget-only tools (geometry/image fetchers) carry `_meta.ui.visibility: ["app"]` so they don't appear in Claude's tool list. |
| **Screenshots** | 35 PNGs, ≥ 1000px wide, cropped to the app response only — no prompt text in frame. |
See `abuse-protection.md` for rate-limit and IP-tiering guidance once the
authless endpoint is public.

View File

@@ -122,38 +122,23 @@ that survives un-inlined.
---
## Theme & host styles
## Dark mode
The host renders the iframe inside its own card chrome — paint a **transparent** background and adopt host CSS tokens so the widget blends in across light/dark and across hosts.
```js
const applyTheme = (theme) =>
document.documentElement.classList.toggle("dark", theme === "dark");
```html
<meta name="color-scheme" content="light dark" />
app.onhostcontextchanged = (ctx) => applyTheme(ctx.theme);
await app.connect();
applyTheme(app.getHostContext()?.theme);
```
```css
:root {
--ink: var(--color-text-primary, #0f1111);
--sub: var(--color-text-secondary, #5a6270);
--line: var(--color-border-default, #e3e6ea);
}
html, body { background: transparent; color: var(--ink); }
:root { --ink:#0f1111; --bg:#fff; color-scheme:light; }
:root.dark { --ink:#e6e6e6; --bg:#1f2428; color-scheme:dark; }
:root.dark .thumb { mix-blend-mode: normal; } /* multiply → images vanish in dark */
```
```js
const { App, applyHostStyleVariables } = globalThis.ExtApps;
function applyHostContext(ctx) {
document.documentElement.classList.toggle("dark", ctx?.theme === "dark");
if (ctx?.styles?.variables) applyHostStyleVariables(ctx.styles.variables);
}
app.onhostcontextchanged = applyHostContext;
await app.connect();
applyHostContext(app.getHostContext());
```
`applyHostStyleVariables` writes the host's `--color-*` / `--font-*` / `--border-radius-*` tokens onto `:root`; the hex values above are fallbacks for hosts that don't supply them.
---
## Debugging

View File

@@ -1,54 +0,0 @@
# Payload budgeting
Hosts cap tool-result text. claude.ai and Claude Desktop truncate at roughly
**150,000 characters**; Claude Code at ~25k tokens. When a tool result exceeds
the cap, the host substitutes a file-pointer string in place of your JSON. The
widget then receives non-JSON in `ontoolresult`, `JSON.parse` throws, and the
user sees something like *"Bad payload: SyntaxError: Unexpected token 'E'"*
with no hint that size was the cause.
## Symptom → cause
| Symptom | Likely cause |
|---|---|
| Widget shows a JSON parse error on `content[0].text` | Result over the host cap; host swapped in a file-pointer string |
| Works for one query, breaks for "all of X" | Row count × column count crossed the cap |
| Works in MCP Inspector, breaks in Desktop | Inspector has no cap; Desktop does |
## Strategy
Cap your own payload at ~130KB and degrade in order:
1. **Ship full rows** when `JSON.stringify(rows).length` is under the cap.
2. **Prune columns** to those the rendering spec actually references. Walk the
spec for both `field: "..."` keys *and* `datum.X` / `datum['X']` inside
expression strings — if the spec aliases a column via a `calculate`
transform, the alias appears as `field:` but the source column only appears
as `datum.X`, and dropping it leaves the widget with NaN.
3. **Truncate rows** as a last resort and include `{ truncated: N }` in the
payload so the widget can label it.
```ts
const MAX = 130_000;
let out = rows;
if (JSON.stringify(out).length > MAX) {
const keep = referencedFields(spec); // field: + datum.X refs
out = rows.map((r) => pick(r, keep));
if (JSON.stringify(out).length > MAX) {
const per = JSON.stringify(out[0] ?? {}).length || 1;
out = out.slice(0, Math.floor(MAX / per));
}
}
```
## Heavy assets go via `callServerTool`, not the result
Geometry, image bytes, or any blob the widget needs but Claude doesn't should
be served by a separate tool the widget calls after mount:
```js
const topo = await app.callServerTool({ name: "get-topojson", arguments: { level } });
```
Mark that helper tool with `_meta.ui.visibility: ["app"]` so it doesn't appear
in Claude's tool list.

View File

@@ -24,7 +24,21 @@ Agents are autonomous subprocesses that handle complex, multi-step tasks indepen
```markdown
---
name: agent-identifier
description: Use this agent when [triggering conditions]. Typical triggers include [scenario 1 in prose], [scenario 2 in prose], and [scenario 3 in prose]. See "When to invoke" in the agent body for worked scenarios.
description: Use this agent when [triggering conditions]. Examples:
<example>
Context: [Situation description]
user: "[User request]"
assistant: "[How assistant should respond and use this agent]"
<commentary>
[Why this agent should be triggered]
</commentary>
</example>
<example>
[Additional example...]
</example>
model: inherit
color: blue
tools: ["Read", "Write", "Grep"]
@@ -32,12 +46,6 @@ tools: ["Read", "Write", "Grep"]
You are [agent role description]...
## When to invoke
[Two to four representative scenarios written as prose, e.g.:]
- **[Scenario name].** [What the situation looks like and what the agent should do.]
- **[Scenario name].** [Same.]
**Your Core Responsibilities:**
1. [Responsibility 1]
2. [Responsibility 2]
@@ -73,24 +81,36 @@ Agent identifier used for namespacing and invocation.
### description (required)
Defines when Claude should trigger this agent. **This is the most critical field** — it is loaded into context whenever the agent is registered, so the harness can decide when to dispatch.
Defines when Claude should trigger this agent. **This is the most critical field.**
**Must include:**
1. Triggering conditions ("Use this agent when...")
2. A short prose summary of the typical trigger scenarios
3. A pointer to a "When to invoke" section in the agent body for the detailed worked scenarios
2. Multiple `<example>` blocks showing usage
3. Context, user request, and assistant response in each example
4. `<commentary>` explaining why agent triggers
**Format:**
```
Use this agent when [conditions]. Typical triggers include [scenario 1 in prose], [scenario 2 in prose], and [scenario 3 in prose]. See "When to invoke" in the agent body for worked scenarios.
Use this agent when [conditions]. Examples:
<example>
Context: [Scenario description]
user: "[What user says]"
assistant: "[How Claude should respond]"
<commentary>
[Why this agent is appropriate]
</commentary>
</example>
[More examples...]
```
**Best practices:**
- Name 2-4 trigger scenarios in the prose summary
- Cover both proactive (assistant invokes itself) and reactive (user requests) triggering
- Cover different phrasings of the same intent
- Include 2-4 concrete examples
- Show proactive and reactive triggering
- Cover different phrasings of same intent
- Explain reasoning in commentary
- Be specific about when NOT to use the agent
- Put detailed scenarios in the body under "When to invoke" as a bullet list of prose descriptions
### model (required)
@@ -211,14 +231,14 @@ Requirements:
- Specific methodologies
- Edge case handling
- Output format
- A "When to invoke" section listing 2-4 trigger scenarios as prose bullets
4. Create identifier (lowercase, hyphens, 3-50 chars)
5. Write description with triggering conditions and a short prose summary of trigger scenarios
5. Write description with triggering conditions
6. Include 2-3 <example> blocks showing when to use
Return JSON with:
{
"identifier": "agent-name",
"whenToUse": "Use this agent when... Typical triggers include [...]. See \"When to invoke\" in the agent body.",
"whenToUse": "Use this agent when... Examples: <example>...</example>",
"systemPrompt": "You are..."
}
```
@@ -312,18 +332,13 @@ Ensure system prompt is complete:
```markdown
---
name: simple-agent
description: Use this agent when [condition]. Typical triggers include [trigger 1] and [trigger 2]. See "When to invoke" in the agent body.
description: Use this agent when... Examples: <example>...</example>
model: inherit
color: blue
---
You are an agent that [does X].
## When to invoke
- **[Scenario A].** [Description.]
- **[Scenario B].** [Description.]
Process:
1. [Step 1]
2. [Step 2]
@@ -336,7 +351,7 @@ Output: [What to provide]
| Field | Required | Format | Example |
|-------|----------|--------|---------|
| name | Yes | lowercase-hyphens | code-reviewer |
| description | Yes | Prose triggers | Use when... Typical triggers include... |
| description | Yes | Text + examples | Use when... <example>... |
| model | Yes | inherit/sonnet/opus/haiku | inherit |
| color | Yes | Color name | blue |
| tools | No | Array of tool names | ["Read", "Grep"] |
@@ -344,8 +359,7 @@ Output: [What to provide]
### Best Practices
**DO:**
-Name 2-4 trigger scenarios in the description (as prose)
- ✅ Put detailed worked scenarios in a "When to invoke" body section, as prose bullets
-Include 2-4 concrete examples in description
- ✅ Write specific triggering conditions
- ✅ Use `inherit` for model unless specific need
- ✅ Choose appropriate tools (least privilege)
@@ -353,7 +367,7 @@ Output: [What to provide]
- ✅ Test agent triggering thoroughly
**DON'T:**
- ❌ Use generic descriptions without trigger scenarios
- ❌ Use generic descriptions without examples
- ❌ Omit triggering conditions
- ❌ Give all agents same color
- ❌ Grant unnecessary tool access
@@ -393,7 +407,7 @@ To create an agent for a plugin:
3. Create `agents/agent-name.md` file
4. Write frontmatter with all required fields
5. Write system prompt following best practices
6. Name 2-4 trigger scenarios in description (prose) and detail them in a "When to invoke" body section
6. Include 2-4 triggering examples in description
7. Validate with `scripts/validate-agent.sh`
8. Test triggering with real scenarios
9. Document agent in plugin README

View File

@@ -31,13 +31,11 @@ Claude will return:
```json
{
"identifier": "agent-name",
"whenToUse": "Use this agent when... Typical triggers include [scenario 1], [scenario 2], and [scenario 3]. See \"When to invoke\" in the agent body for worked scenarios.",
"systemPrompt": "You are...\n\n## When to invoke\n\n- **[Scenario A].** [Description]\n- **[Scenario B].** [Description]\n\n**Your Core Responsibilities:**..."
"whenToUse": "Use this agent when... Examples: <example>...</example>",
"systemPrompt": "You are... **Your Core Responsibilities:**..."
}
```
`whenToUse` is flat prose. `systemPrompt` includes a "When to invoke" section with prose bullets.
### Step 4: Convert to Agent File
Create `agents/[identifier].md`:
@@ -65,8 +63,8 @@ I need an agent that reviews code changes for quality issues, security vulnerabi
```json
{
"identifier": "code-quality-reviewer",
"whenToUse": "Use this agent when the user has written code and needs quality review, or explicitly asks to review code changes. Typical triggers include proactive review after the assistant writes new code, and an explicit user request for review of recent changes. See \"When to invoke\" in the agent body for worked scenarios.",
"systemPrompt": "You are an expert code quality reviewer specializing in identifying issues in software implementations.\n\n## When to invoke\n\n- **Proactive review after new code.** The assistant has just written or modified code (e.g. an authentication feature). Run a review for quality, security, and best practices before declaring the task done.\n- **Explicit review request.** The user asks for the recent changes to be reviewed for issues. Run a thorough review and report findings.\n\n**Your Core Responsibilities:**\n1. Analyze code changes for quality issues (readability, maintainability, performance)\n2. Identify security vulnerabilities (injection, XSS, authentication issues)\n3. Check adherence to project best practices and coding standards\n4. Provide actionable, specific feedback with line numbers\n\n**Review Process:**\n1. Read the code changes using available tools\n2. Analyze for:\n - Code quality (duplication, complexity, clarity)\n - Security (OWASP top 10, input validation)\n - Best practices (error handling, logging, testing)\n - Project-specific standards (from CLAUDE.md)\n3. Identify issues with severity (critical/major/minor)\n4. Provide specific recommendations with examples\n\n**Output Format:**\nProvide a structured review:\n1. Summary (2-3 sentences)\n2. Critical Issues (must fix)\n3. Major Issues (should fix)\n4. Minor Issues (nice to fix)\n5. Positive observations\n6. Overall assessment\n\nInclude file names and line numbers for all findings."
"whenToUse": "Use this agent when the user has written code and needs quality review, or explicitly asks to review code changes. Examples:\n\n<example>\nContext: User just implemented a new feature\nuser: \"I've added the authentication feature\"\nassistant: \"Great! Let me review the code quality.\"\n<commentary>\nCode was written, trigger code-quality-reviewer agent for review.\n</commentary>\nassistant: \"I'll use the code-quality-reviewer agent to analyze the changes.\"\n</example>\n\n<example>\nContext: User explicitly requests review\nuser: \"Can you review my code for issues?\"\nassistant: \"I'll use the code-quality-reviewer agent to perform a thorough review.\"\n<commentary>\nExplicit review request triggers the agent.\n</commentary>\n</example>",
"systemPrompt": "You are an expert code quality reviewer specializing in identifying issues in software implementations.\n\n**Your Core Responsibilities:**\n1. Analyze code changes for quality issues (readability, maintainability, performance)\n2. Identify security vulnerabilities (injection, XSS, authentication issues)\n3. Check adherence to project best practices and coding standards\n4. Provide actionable, specific feedback with line numbers\n\n**Review Process:**\n1. Read the code changes using available tools\n2. Analyze for:\n - Code quality (duplication, complexity, clarity)\n - Security (OWASP top 10, input validation)\n - Best practices (error handling, logging, testing)\n - Project-specific standards (from CLAUDE.md)\n3. Identify issues with severity (critical/major/minor)\n4. Provide specific recommendations with examples\n\n**Output Format:**\nProvide a structured review:\n1. Summary (2-3 sentences)\n2. Critical Issues (must fix)\n3. Major Issues (should fix)\n4. Minor Issues (nice to fix)\n5. Positive observations\n6. Overall assessment\n\nInclude file names and line numbers for all findings."
}
```
@@ -77,7 +75,27 @@ File: `agents/code-quality-reviewer.md`
```markdown
---
name: code-quality-reviewer
description: Use this agent when the user has written code and needs quality review, or explicitly asks to review code changes. Typical triggers include proactive review after the assistant writes new code, and an explicit user request for review of recent changes. See "When to invoke" in the agent body for worked scenarios.
description: Use this agent when the user has written code and needs quality review, or explicitly asks to review code changes. Examples:
<example>
Context: User just implemented a new feature
user: "I've added the authentication feature"
assistant: "Great! Let me review the code quality."
<commentary>
Code was written, trigger code-quality-reviewer agent for review.
</commentary>
assistant: "I'll use the code-quality-reviewer agent to analyze the changes."
</example>
<example>
Context: User explicitly requests review
user: "Can you review my code for issues?"
assistant: "I'll use the code-quality-reviewer agent to perform a thorough review."
<commentary>
Explicit review request triggers the agent.
</commentary>
</example>
model: inherit
color: blue
tools: ["Read", "Grep", "Glob"]
@@ -85,11 +103,6 @@ tools: ["Read", "Grep", "Glob"]
You are an expert code quality reviewer specializing in identifying issues in software implementations.
## When to invoke
- **Proactive review after new code.** The assistant has just written or modified code (e.g. an authentication feature). Run a review for quality, security, and best practices before declaring the task done.
- **Explicit review request.** The user asks for the recent changes to be reviewed for issues. Run a thorough review and report findings.
**Your Core Responsibilities:**
1. Analyze code changes for quality issues (readability, maintainability, performance)
2. Identify security vulnerabilities (injection, XSS, authentication issues)
@@ -129,8 +142,8 @@ Create an agent that generates unit tests for code. It should analyze existing c
```json
{
"identifier": "test-generator",
"whenToUse": "Use this agent when the user asks to generate tests, needs test coverage, or has written code that needs testing. Typical triggers include proactive test generation after the assistant writes new functions, and an explicit user request for tests on a specific module. See \"When to invoke\" in the agent body.",
"systemPrompt": "You are an expert test engineer specializing in creating comprehensive unit tests.\n\n## When to invoke\n\n- **Proactive coverage after new code.** The assistant has just implemented new functions (e.g. user authentication functions) without tests. Generate a comprehensive test suite before declaring the task done.\n- **Explicit test request.** The user asks for tests on a specific surface. Generate the requested suite following project conventions.\n\n**Your Core Responsibilities:**\n1. Analyze code to understand behavior\n2. Generate test cases covering happy paths and edge cases\n3. Follow project testing conventions\n4. Ensure high code coverage\n\n**Test Generation Process:**\n1. Read target code\n2. Identify testable units (functions, classes, methods)\n3. Design test cases (inputs, expected outputs, edge cases)\n4. Generate tests following project patterns\n5. Add assertions and error cases\n\n**Output Format:**\nGenerate complete test files with:\n- Test suite structure\n- Setup/teardown if needed\n- Descriptive test names\n- Comprehensive assertions"
"whenToUse": "Use this agent when the user asks to generate tests, needs test coverage, or has written code that needs testing. Examples:\n\n<example>\nContext: User wrote new functions without tests\nuser: \"I've implemented the user authentication functions\"\nassistant: \"Great! Let me generate tests for these functions.\"\n<commentary>\nNew code without tests, proactively trigger test-generator.\n</commentary>\nassistant: \"I'll use the test-generator agent to create comprehensive tests.\"\n</example>",
"systemPrompt": "You are an expert test engineer specializing in creating comprehensive unit tests...\n\n**Your Core Responsibilities:**\n1. Analyze code to understand behavior\n2. Generate test cases covering happy paths and edge cases\n3. Follow project testing conventions\n4. Ensure high code coverage\n\n**Test Generation Process:**\n1. Read target code\n2. Identify testable units (functions, classes, methods)\n3. Design test cases (inputs, expected outputs, edge cases)\n4. Generate tests following project patterns\n5. Add assertions and error cases\n\n**Output Format:**\nGenerate complete test files with:\n- Test suite structure\n- Setup/teardown if needed\n- Descriptive test names\n- Comprehensive assertions"
}
```
@@ -143,7 +156,7 @@ Create an agent that generates unit tests for code. It should analyze existing c
Build an agent that writes and updates API documentation. It should analyze code and generate clear, comprehensive docs.
```
**Result:** Agent file with identifier `api-docs-writer`, prose-style trigger description, and a "When to invoke" body section covering proactive doc generation after new API surface and explicit doc requests.
**Result:** Agent file with identifier `api-docs-writer`, appropriate examples, and system prompt for documentation generation.
## Tips for Effective Agent Generation
@@ -188,7 +201,7 @@ Always validate generated agents:
./scripts/validate-agent.sh agents/your-agent.md
# Check triggering works
# Test with realistic invocation phrasings
# Test with scenarios from examples
```
## Iterating on Generated Agents
@@ -198,7 +211,7 @@ If generated agent needs improvement:
1. Identify what's missing or wrong
2. Manually edit the agent file
3. Focus on:
- Better-named trigger scenarios in `description:` and "When to invoke"
- Better examples in description
- More specific system prompt
- Clearer process steps
- Better output format definition
@@ -210,6 +223,7 @@ If generated agent needs improvement:
- **Comprehensive**: Claude includes edge cases and quality checks
- **Consistent**: Follows proven patterns
- **Fast**: Seconds vs manual writing
- **Examples**: Auto-generates triggering examples
- **Complete**: Provides full system prompt structure
## When to Edit Manually

View File

@@ -9,7 +9,38 @@ Full, production-ready agent examples for common use cases. Use these as templat
```markdown
---
name: code-reviewer
description: Use this agent when the user has written code and needs quality review, security analysis, or best practices validation. Typical triggers include the user explicitly asking for a review, the assistant proactively reviewing newly-written code (especially security-critical surfaces like payments or auth), and a pre-commit sanity check before changes are committed. See "When to invoke" in the agent body.
description: Use this agent when the user has written code and needs quality review, security analysis, or best practices validation. Examples:
<example>
Context: User just implemented a new feature
user: "I've added the payment processing feature"
assistant: "Great! Let me review the implementation."
<commentary>
Code written for payment processing (security-critical). Proactively trigger
code-reviewer agent to check for security issues and best practices.
</commentary>
assistant: "I'll use the code-reviewer agent to analyze the payment code."
</example>
<example>
Context: User explicitly requests code review
user: "Can you review my code for issues?"
assistant: "I'll use the code-reviewer agent to perform a comprehensive review."
<commentary>
Explicit code review request triggers the agent.
</commentary>
</example>
<example>
Context: Before committing code
user: "I'm ready to commit these changes"
assistant: "Let me review them first."
<commentary>
Before commit, proactively review code quality.
</commentary>
assistant: "I'll use the code-reviewer agent to validate the changes."
</example>
model: inherit
color: blue
tools: ["Read", "Grep", "Glob"]
@@ -17,12 +48,6 @@ tools: ["Read", "Grep", "Glob"]
You are an expert code quality reviewer specializing in identifying issues, security vulnerabilities, and opportunities for improvement in software implementations.
## When to invoke
- **Proactive review of security-critical code.** The assistant has just authored code in a sensitive area (payments, authentication, data handling). Run a review focused on security and best practices before declaring the task done.
- **Explicit review request.** The user asks (in any phrasing) for the recent changes to be reviewed. Run a comprehensive review of the unstaged diff.
- **Pre-commit validation.** The user signals readiness to commit. Run a review first to surface issues before they land.
**Your Core Responsibilities:**
1. Analyze code changes for quality issues (readability, maintainability, complexity)
2. Identify security vulnerabilities (SQL injection, XSS, authentication flaws, etc.)
@@ -93,7 +118,27 @@ You are an expert code quality reviewer specializing in identifying issues, secu
```markdown
---
name: test-generator
description: Use this agent when the user has written code without tests, explicitly asks for test generation, or needs test coverage improvement. Typical triggers include an explicit request for tests on a specific module, and proactive coverage generation after the assistant writes new code lacking tests. See "When to invoke" in the agent body.
description: Use this agent when the user has written code without tests, explicitly asks for test generation, or needs test coverage improvement. Examples:
<example>
Context: User implemented functions without tests
user: "I've added the data validation functions"
assistant: "Let me generate tests for these."
<commentary>
New code without tests. Proactively trigger test-generator agent.
</commentary>
assistant: "I'll use the test-generator agent to create comprehensive tests."
</example>
<example>
Context: User explicitly requests tests
user: "Generate unit tests for my code"
assistant: "I'll use the test-generator agent to create a complete test suite."
<commentary>
Direct test generation request triggers the agent.
</commentary>
</example>
model: inherit
color: green
tools: ["Read", "Write", "Grep", "Bash"]
@@ -101,11 +146,6 @@ tools: ["Read", "Write", "Grep", "Bash"]
You are an expert test engineer specializing in creating comprehensive, maintainable unit tests that ensure code correctness and reliability.
## When to invoke
- **Proactive coverage after new code.** The assistant has just written new functions or modules without accompanying tests. Generate a test suite before declaring the task done.
- **Explicit test request.** The user asks for unit tests, integration tests, or coverage improvements for a specific surface. Generate the requested suite.
**Your Core Responsibilities:**
1. Generate high-quality unit tests with excellent coverage
2. Follow project testing conventions and patterns
@@ -175,7 +215,27 @@ describe('[module name]', () => {
```markdown
---
name: docs-generator
description: Use this agent when the user has written code needing documentation, API endpoints requiring docs, or explicitly requests documentation generation. Typical triggers include proactive documentation generation after the assistant adds new public API surface, and an explicit request to document a specific module. See "When to invoke" in the agent body.
description: Use this agent when the user has written code needing documentation, API endpoints requiring docs, or explicitly requests documentation generation. Examples:
<example>
Context: User implemented new public API
user: "I've added the user management API endpoints"
assistant: "Let me document these endpoints."
<commentary>
New public API needs documentation. Proactively trigger docs-generator.
</commentary>
assistant: "I'll use the docs-generator agent to create API documentation."
</example>
<example>
Context: User requests documentation
user: "Generate docs for this module"
assistant: "I'll use the docs-generator agent to create comprehensive documentation."
<commentary>
Explicit documentation request triggers the agent.
</commentary>
</example>
model: inherit
color: cyan
tools: ["Read", "Write", "Grep", "Glob"]
@@ -183,11 +243,6 @@ tools: ["Read", "Write", "Grep", "Glob"]
You are an expert technical writer specializing in creating clear, comprehensive documentation for software projects.
## When to invoke
- **Proactive docs for new API surface.** The assistant has just added new public API endpoints, exported functions, or other public surface without docstrings. Generate documentation before declaring the task done.
- **Explicit doc request.** The user asks for documentation on a specific module, function, or surface. Generate comprehensive docs in the project's standard format.
**Your Core Responsibilities:**
1. Generate accurate, clear documentation from code
2. Follow project documentation standards
@@ -245,7 +300,27 @@ Create documentation in project's standard format:
```markdown
---
name: security-analyzer
description: Use this agent when the user implements security-critical code (auth, payments, data handling), explicitly requests security analysis, or before deploying sensitive changes. Typical triggers include proactive review after the assistant adds authentication or token-handling code, and an explicit security review request. See "When to invoke" in the agent body.
description: Use this agent when the user implements security-critical code (auth, payments, data handling), explicitly requests security analysis, or before deploying sensitive changes. Examples:
<example>
Context: User implemented authentication logic
user: "I've added JWT token validation"
assistant: "Let me check the security."
<commentary>
Authentication code is security-critical. Proactively trigger security-analyzer.
</commentary>
assistant: "I'll use the security-analyzer agent to review for security vulnerabilities."
</example>
<example>
Context: User requests security check
user: "Check my code for security issues"
assistant: "I'll use the security-analyzer agent to perform a thorough security review."
<commentary>
Explicit security review request triggers the agent.
</commentary>
</example>
model: inherit
color: red
tools: ["Read", "Grep", "Glob"]
@@ -253,11 +328,6 @@ tools: ["Read", "Grep", "Glob"]
You are an expert security analyst specializing in identifying vulnerabilities and security issues in software implementations.
## When to invoke
- **Proactive review of security-critical code.** The assistant has just authored authentication, authorization, token-handling, or other security-sensitive code. Run a security review before declaring the task done.
- **Explicit security analysis request.** The user asks for a security check on recent code or a specific surface. Run a thorough analysis and report vulnerabilities.
**Your Core Responsibilities:**
1. Identify security vulnerabilities (OWASP Top 10 and beyond)
2. Analyze authentication and authorization logic
@@ -349,7 +419,7 @@ Choose colors that match agent purpose:
1. Copy template that matches your use case
2. Replace placeholders with your specifics
3. Customize process steps for your domain
4. Adjust the trigger scenarios in `description:` and "When to invoke" to match your real triggering needs
4. Adjust examples to your triggering scenarios
5. Validate with `scripts/validate-agent.sh`
6. Test triggering with real scenarios
7. Iterate based on agent performance

View File

@@ -1,6 +1,6 @@
# Agent Creation System Prompt
This is the system prompt to drive AI-assisted agent generation. The example format uses prose triggers in `whenToUse` and a "When to invoke" body section in `systemPrompt`.
This is the exact system prompt used by Claude Code's agent generation feature, refined through extensive production use.
## The Prompt
@@ -22,7 +22,6 @@ When a user describes what they want an agent to do, you will:
- Incorporates any specific requirements or preferences mentioned by the user
- Defines output format expectations when relevant
- Aligns with project-specific coding standards and patterns from CLAUDE.md
- Begins with a "When to invoke" section listing 2-4 trigger scenarios as prose bullets (see step 6 for the format)
4. **Optimize for Performance**: Include:
- Decision-making frameworks appropriate to the domain
@@ -37,25 +36,32 @@ When a user describes what they want an agent to do, you will:
- Is memorable and easy to type
- Avoids generic terms like "helper" or "assistant"
6. **Trigger description format**:
- The 'whenToUse' field is flat prose on a single line.
- Format: "Use this agent when [conditions]. Typical triggers include [scenario 1], [scenario 2], and [scenario 3]. See \"When to invoke\" in the agent body for worked scenarios."
- Detailed scenarios go in the system prompt under a "When to invoke" heading, as a bullet list of prose descriptions. Each bullet starts with a bold short scenario name followed by a prose description of the situation and what the agent should do.
- Example bullets:
- "**Proactive review after new code.** The assistant has just written a function in response to a user request. Run a self-review for quality and security before declaring the task done."
- "**Explicit review request.** The user asks for the recent changes to be reviewed. Run a thorough review and report findings."
- Cover both proactive and reactive triggers when applicable. Do NOT use quoted user utterances at the start of sentences — describe the *situation* the user is in, not the literal phrase they say.
6. **Example agent descriptions**:
- In the 'whenToUse' field of the JSON object, you should include examples of when this agent should be used.
- Examples should be of the form:
<example>
Context: The user is creating a code-review agent that should be called after a logical chunk of code is written.
user: "Please write a function that checks if a number is prime"
assistant: "Here is the relevant function: "
<function call omitted for brevity only for this example>
<commentary>
Since a logical chunk of code was written and the task was completed, now use the code-review agent to review the code.
</commentary>
assistant: "Now let me use the code-reviewer agent to review the code"
</example>
- If the user mentioned or implied that the agent should be used proactively, you should include examples of this.
- NOTE: Ensure that in the examples, you are making the assistant use the Agent tool and not simply respond directly to the task.
Your output must be a valid JSON object with exactly these fields:
{
"identifier": "A unique, descriptive identifier using lowercase letters, numbers, and hyphens (e.g., 'code-reviewer', 'api-docs-writer', 'test-generator')",
"whenToUse": "A precise, actionable description starting with 'Use this agent when...' that clearly defines the triggering conditions and use cases. Flat prose only. End with a pointer to the 'When to invoke' section in the agent body.",
"systemPrompt": "The complete system prompt that will govern the agent's behavior, written in second person ('You are...', 'You will...'). Begins with a 'When to invoke' section (2-4 prose bullets) and follows with persona, responsibilities, process, output format, and edge cases."
"whenToUse": "A precise, actionable description starting with 'Use this agent when...' that clearly defines the triggering conditions and use cases. Ensure you include examples as described above.",
"systemPrompt": "The complete system prompt that will govern the agent's behavior, written in second person ('You are...', 'You will...') and structured for maximum clarity and effectiveness"
}
Key principles for your system prompts:
- Be specific rather than generic - avoid vague instructions
- Include concrete examples when they would clarify behavior (as prose)
- Include concrete examples when they would clarify behavior
- Balance comprehensiveness with clarity - every instruction should add value
- Ensure the agent has enough context to handle variations of the core task
- Make the agent proactive in seeking clarification when needed
@@ -68,19 +74,17 @@ Remember: The agents you create should be autonomous experts capable of handling
Use this prompt to generate agent configurations:
```markdown
**User input:** "I need an agent that reviews pull requests for code quality issues"
**You send to Claude with the system prompt above:**
```
Create an agent configuration based on this request: "I need an agent that reviews pull requests for code quality issues"
```
**Claude returns JSON (note: prose `whenToUse`, "When to invoke" section in `systemPrompt`):**
```json
**Claude returns JSON:**
{
"identifier": "pr-quality-reviewer",
"whenToUse": "Use this agent when the user asks to review a pull request, check code quality, or analyze PR changes. Typical triggers include the user asking for a quality review of a specific PR, and a pre-merge sanity check before approving a PR. See \"When to invoke\" in the agent body for worked scenarios.",
"systemPrompt": "You are an expert code quality reviewer...\n\n## When to invoke\n\n- **PR quality review request.** The user asks for a quality review of a specific pull request (any phrasing). Fetch the PR diff and run a thorough quality review.\n- **Pre-merge sanity check.** The user signals they're about to merge a PR. Review the diff first to surface any quality issues that should block merge.\n\n**Your Core Responsibilities:**\n1. Analyze code changes for quality issues\n2. Check adherence to best practices\n..."
"whenToUse": "Use this agent when the user asks to review a pull request, check code quality, or analyze PR changes. Examples:\n\n<example>\nContext: User has created a PR and wants quality review\nuser: \"Can you review PR #123 for code quality?\"\nassistant: \"I'll use the pr-quality-reviewer agent to analyze the PR.\"\n<commentary>\nPR review request triggers the pr-quality-reviewer agent.\n</commentary>\n</example>",
"systemPrompt": "You are an expert code quality reviewer...\n\n**Your Core Responsibilities:**\n1. Analyze code changes for quality issues\n2. Check adherence to best practices\n..."
}
```
@@ -92,18 +96,23 @@ Take the JSON output and create the agent markdown file:
```markdown
---
name: pr-quality-reviewer
description: Use this agent when the user asks to review a pull request, check code quality, or analyze PR changes. Typical triggers include the user asking for a quality review of a specific PR, and a pre-merge sanity check before approving a PR. See "When to invoke" in the agent body for worked scenarios.
description: Use this agent when the user asks to review a pull request, check code quality, or analyze PR changes. Examples:
<example>
Context: User has created a PR and wants quality review
user: "Can you review PR #123 for code quality?"
assistant: "I'll use the pr-quality-reviewer agent to analyze the PR."
<commentary>
PR review request triggers the pr-quality-reviewer agent.
</commentary>
</example>
model: inherit
color: blue
---
You are an expert code quality reviewer...
## When to invoke
- **PR quality review request.** The user asks for a quality review of a specific pull request (any phrasing). Fetch the PR diff and run a thorough quality review.
- **Pre-merge sanity check.** The user signals they're about to merge a PR. Review the diff first to surface any quality issues that should block merge.
**Your Core Responsibilities:**
1. Analyze code changes for quality issues
2. Check adherence to best practices
@@ -114,7 +123,7 @@ You are an expert code quality reviewer...
### Adapt the System Prompt
The base prompt above can be enhanced for specific needs:
The base prompt is excellent but can be enhanced for specific needs:
**For security-focused agents:**
```
@@ -140,7 +149,7 @@ Add after "Design Expert Persona":
- Follow project documentation standards from CLAUDE.md
```
## Best Practices
## Best Practices from Internal Implementation
### 1. Consider Project Context
@@ -151,9 +160,18 @@ The prompt specifically mentions using CLAUDE.md context:
### 2. Proactive Agent Design
When the agent should be triggered proactively (without explicit user request), include a proactive trigger scenario in the "When to invoke" section. Describe the situation in prose:
> - **Proactive review after new code.** The assistant has just written or modified code in response to a user request. Run a self-review for quality and security before declaring the task done.
Include examples showing proactive usage:
```
<example>
Context: After writing code, agent should review proactively
user: "Please write a function..."
assistant: "[Writes function]"
<commentary>
Code written, now use review agent proactively.
</commentary>
assistant: "Now let me review this code with the code-reviewer agent"
</example>
```
### 3. Scope Assumptions
@@ -180,10 +198,10 @@ Use this system prompt when creating agents for your plugins:
1. Take user request for agent functionality
2. Feed to Claude with this system prompt
3. Get JSON output (`identifier`, `whenToUse`, `systemPrompt`)
3. Get JSON output (identifier, whenToUse, systemPrompt)
4. Convert to agent markdown file with frontmatter
5. Validate the file with agent validation rules
5. Validate with agent validation rules
6. Test triggering conditions
7. Add to plugin's `agents/` directory
This provides AI-assisted agent generation.
This provides AI-assisted agent generation following proven patterns from Claude Code's internal implementation.

View File

@@ -1,217 +1,491 @@
# Agent Triggering: Best Practices
# Agent Triggering Examples: Best Practices
Complete guide to writing trigger descriptions that cause an agent to be dispatched reliably.
Complete guide to writing effective `<example>` blocks in agent descriptions for reliable triggering.
## Where trigger descriptions live
## Example Block Format
An agent file has two places that talk about triggering:
1. **`description:` field in YAML frontmatter.** Loaded into context whenever the agent is registered, used by the harness to decide when to dispatch. Keep it flat prose.
2. **A "When to invoke" section in the agent body.** Loaded only when the agent is actually invoked. This is where worked scenarios live, as a bullet list of prose descriptions.
## Format
### `description:` field
```
description: Use this agent when [conditions]. Typical triggers include [scenario 1 phrased as a prose noun phrase], [scenario 2], and [scenario 3]. See "When to invoke" in the agent body for worked scenarios.
```
Rules:
- Single line of flat prose within the YAML scalar.
- Name 2-4 trigger scenarios as noun phrases.
- End with the pointer to the body's "When to invoke" section.
### "When to invoke" body section
The standard format for triggering examples:
```markdown
## When to invoke
[Two to four representative scenarios as prose bullets. Each describes the situation
in third person and what the agent should do.]
- **[Short scenario name].** [What the situation looks like — what just happened or what
the user is asking for — and what the agent should do in response.]
- **[Short scenario name].** [Same.]
<example>
Context: [Describe the situation - what led to this interaction]
user: "[Exact user message or request]"
assistant: "[How Claude should respond before triggering]"
<commentary>
[Explanation of why this agent should be triggered in this scenario]
</commentary>
assistant: "[How Claude triggers the agent - usually 'I'll use the [agent-name] agent...']"
</example>
```
## Anatomy of a good scenario
## Anatomy of a Good Example
### Scenario name (the bold lead)
### Context
**Purpose:** A short noun phrase identifying the situation type.
**Purpose:** Set the scene - what happened before the user's message
**Good names:**
- *User-requested review after a feature lands.*
- *Proactive review of newly-written code.*
- *Pre-PR sanity check.*
- *PR updated with new logic.*
**Bad names:**
- *Normal usage.* (not specific)
- *User needs help.* (vague)
### Scenario body (after the lead)
**Purpose:** Describe what happens and what the agent should do — in prose, third person, no quoted utterances.
**Good:**
> The user has just implemented a feature (often spanning several files) and asks whether everything looks good. Run a review of the recent diff and report findings.
**Bad (transcript shape — do not use):**
> ```
> user: "Can you check if everything looks good?"
> assistant: "I'll use the reviewer agent..."
> ```
The bad version mixes a turn-marker shape into the agent file. Keep scenarios as situation descriptions in prose.
## Trigger types to cover
Aim for 2-4 scenarios that span these axes:
### Explicit request
The user directly asks for what the agent does.
- *User-requested security check.* The user explicitly asks for a security review of recent code.
### Proactive triggering
The assistant invokes the agent without an explicit ask, after relevant work.
- *Proactive review after writing database code.* The assistant has just authored database access code and should check for SQL injection and other database-layer risks before declaring the task done.
### Implicit request
The user implies need without naming the agent.
- *Code-clarity complaint.* The user describes existing code as confusing or hard to follow. Treat as a request to refactor for readability.
### Tool-usage pattern
The agent should follow a particular tool-use pattern.
- *Post-test-edit verification.* The assistant has just made multiple edits to test files. Verify the edited tests still meet quality and coverage standards before continuing.
## Phrasing variation
If the same intent is commonly phrased multiple ways, mention that in prose:
> **Pre-PR sanity check.** The user signals (in any phrasing — "ready to open a PR", "I think we're done here", "let's ship this") that they're about to open a pull request.
Don't write three near-duplicate scenarios that differ only in the literal phrase — collapse them into one prose scenario that names the variation.
## How many scenarios?
- **Minimum: 2.** Usually one explicit + one proactive.
- **Recommended: 3-4.** Explicit, proactive, and one implicit or edge case.
- **Maximum: 5.** More than that bloats the body without adding routing signal.
## Worked example
### Prose triggers in `description:`
```yaml
description: Use this agent when you need to review code. Typical triggers include user-requested review after a feature lands, proactive review of freshly-written code, and a pre-PR sanity check. See "When to invoke" in the agent body for worked scenarios.
**Good contexts:**
```
Context: User just implemented a new authentication feature
Context: User has created a PR and wants it reviewed
Context: User is debugging a test failure
Context: After writing several functions without documentation
```
### Scenarios as situation descriptions in the body
**Bad contexts:**
```
Context: User needs help (too vague)
Context: Normal usage (not specific)
```
### User Message
**Purpose:** Show the exact phrasing that should trigger the agent
**Good user messages:**
```
user: "I've added the OAuth flow, can you check it?"
user: "Review PR #123"
user: "Why is this test failing?"
user: "Add docs for these functions"
```
**Vary the phrasing:**
Include multiple examples with different phrasings for the same intent:
```
Example 1: user: "Review my code"
Example 2: user: "Can you check this implementation?"
Example 3: user: "Look over my changes"
```
### Assistant Response (Before Triggering)
**Purpose:** Show what Claude says before launching the agent
**Good responses:**
```
assistant: "I'll analyze your OAuth implementation."
assistant: "Let me review that PR for you."
assistant: "I'll investigate the test failure."
```
**Proactive example:**
```
assistant: "Great! Now let me review the code quality."
<commentary>
Code was just written, proactively trigger review agent.
</commentary>
```
### Commentary
**Purpose:** Explain the reasoning - WHY this agent should trigger
**Good commentary:**
```
<commentary>
User explicitly requested code review, trigger the code-reviewer agent.
</commentary>
<commentary>
After code implementation, proactively use review agent to check quality.
</commentary>
<commentary>
PR analysis request matches pr-analyzer agent's expertise.
</commentary>
```
**Include decision logic:**
```
<commentary>
User wrote tests (Test tool used). The test-analyzer agent should check
test quality and coverage before continuing.
</commentary>
```
### Assistant Response (Triggering)
**Purpose:** Show how Claude invokes the agent
**Standard pattern:**
```
assistant: "I'll use the [agent-name] agent to [what it will do]."
```
**Examples:**
```
assistant: "I'll use the code-reviewer agent to analyze the changes."
assistant: "Let me use the test-generator agent to create comprehensive tests."
assistant: "I'll use the security-analyzer agent to check for vulnerabilities."
```
## Example Types
### Type 1: Explicit Request
User directly asks for what the agent does:
```markdown
## When to invoke
- **User-requested review.** The user asks for a review of recent changes (any phrasing). Run a review of the unstaged diff.
<example>
Context: User explicitly requests security analysis
user: "Check my code for security vulnerabilities"
assistant: "I'll use the security-analyzer agent to perform a thorough security review."
<commentary>
Direct security analysis request triggers the security-analyzer agent.
</commentary>
</example>
```
### Trigger condition only — output format goes elsewhere
### Type 2: Proactive Triggering
Agent triggers after relevant work without explicit request:
```markdown
- **Review.** The user asks for a review. Run the review and report findings as specified in the Output Format section.
<example>
Context: User just implemented database queries
user: "I've added the user search functionality"
assistant: "Great! Let me review it for security."
<commentary>
Database code written. Proactively trigger security-analyzer to check
for SQL injection and other database security issues.
</commentary>
assistant: "I'll use the security-analyzer agent to check the database code."
</example>
```
## Template library
### Type 3: Implicit Request
### Code review agent
```yaml
description: Use this agent when you need to review code for adherence to project guidelines and best practices. Typical triggers include the user asking for a review of a feature they just implemented, proactive review of newly-written code before declaring a task done, and a pre-PR sanity check. See "When to invoke" in the agent body.
```
User implies need without stating it directly:
```markdown
## When to invoke
- **User-requested review after a feature lands.** The user has implemented a feature and asks whether the result looks good. Review the recent diff and report findings.
- **Proactive review of newly-written code.** The assistant has just authored new code in response to a user request. Run a self-review before declaring the task done.
- **Pre-PR sanity check.** The user signals readiness to open a pull request. Review the full diff first.
<example>
Context: User mentions code is hard to understand
user: "This code is confusing and hard to follow"
assistant: "I can help improve the code clarity."
<commentary>
User indicates complexity issues. Trigger code-simplifier agent to refactor
for better readability.
</commentary>
assistant: "I'll use the code-simplifier agent to suggest improvements."
</example>
```
### Test generation agent
### Type 4: Tool Usage Pattern
```yaml
description: Use this agent when you need to generate tests for code that lacks them. Typical triggers include the user explicitly asking for tests for a function or module, and the assistant proactively generating tests after writing new code that has no test coverage. See "When to invoke" in the agent body.
```
Agent triggers based on prior tool usage:
```markdown
## When to invoke
- **Explicit test request.** The user asks for tests covering a specific function, module, or feature. Generate a comprehensive test suite.
- **Proactive coverage after new code.** The assistant has just written new code with no accompanying tests. Generate tests before declaring the task done.
<example>
Context: User made multiple edits to test files
user: "I've updated all the tests"
assistant: "Great! Let me verify test quality."
<commentary>
Multiple Edit tools used on test files. Proactively trigger test-quality-analyzer
to ensure tests follow best practices.
</commentary>
assistant: "I'll use the test-quality-analyzer agent to review the tests."
</example>
```
### Documentation agent
## Multiple Examples Strategy
```yaml
description: Use this agent when you need to write or improve documentation for code, especially APIs. Typical triggers include the user asking for docs on a specific function or endpoint, and proactive documentation generation after the assistant adds new API surface. See "When to invoke" in the agent body.
```
### Cover Different Phrasings
```markdown
## When to invoke
<example>
user: "Review my code"
[...]
</example>
- **Explicit doc request.** The user asks for documentation for a specific surface (function, endpoint, module).
- **Proactive docs for new API surface.** The assistant has just added new API endpoints or public functions without docstrings.
<example>
user: "Can you check my implementation?"
[...]
</example>
<example>
user: "Look over these changes"
[...]
</example>
```
### Validation agent
```yaml
description: Use this agent when you need to validate code before commit or merge. Typical triggers include the user signaling readiness to commit, and an explicit validation request. See "When to invoke" in the agent body.
```
### Cover Proactive and Reactive
```markdown
## When to invoke
<example>
Context: User explicitly requests review
user: "Review my code for issues"
[...]
</example>
- **Pre-commit validation.** The user signals readiness to commit. Run validation first and surface any issues.
- **Explicit validation request.** The user asks for the code to be validated.
<example>
Context: After user writes code
user: "I've implemented the feature"
assistant: "Great! Now let me review it."
<commentary>
Code written, proactively review.
</commentary>
[...]
</example>
```
## Debugging triggering issues
### Cover Edge Cases
### Agent not triggering
```markdown
<example>
Context: Typical usage
user: "Check my PR"
[...]
</example>
Check:
1. The `description:` prose names the right trigger scenarios.
2. The scenarios in the body cover the actual phrasings the user uses.
3. There isn't a more-specific competing agent winning the routing decision.
<example>
Context: Large PR that needs thorough analysis
user: "This is a big PR, can you do a deep review?"
[...]
<commentary>
Large PR mentioned, use thorough analysis mode.
</commentary>
</example>
```
Fix: add or expand scenarios in the body, and tighten the prose summary in `description:`.
## Common Mistakes
### Agent triggers too often
### ❌ Missing Context
Check:
1. The trigger scenarios are too generic or overlap with other agents.
2. The `description:` doesn't say when NOT to use the agent.
```markdown
<example>
user: "Review my code"
assistant: "I'll use the code-reviewer agent."
</example>
```
Fix: narrow the scenarios; add a "Do not invoke when..." line to `description:` if needed.
**Why bad:** No context about what led to this request.
### Agent triggers in the wrong scenarios
### ✅ With Context
Check:
1. Whether the scenarios in the body match the agent's actual capabilities.
```markdown
<example>
Context: User just finished implementing authentication
user: "Review my code"
assistant: "I'll use the code-reviewer agent to analyze the auth implementation."
<commentary>
Auth code written, review for security and best practices.
</commentary>
</example>
```
Fix: rewrite scenarios to match what the agent actually does.
### ❌ No Commentary
## Best practices summary
```markdown
<example>
Context: User requests review
user: "Check my changes"
assistant: "I'll use the reviewer agent."
</example>
```
- Keep `description:` as flat prose with a short summary of trigger scenarios
- Put detailed scenarios in a "When to invoke" body section, as prose bullets
- Cover both explicit and proactive triggering
- Describe situations the agent should respond to
- Mention phrasing variation in prose ("any phrasing — 'ready to ship', 'looks done'") rather than via multiple near-duplicate scenarios
- Keep trigger scenarios separate from output format
**Why bad:** Doesn't explain WHY agent triggers.
### ✅ With Commentary
```markdown
<example>
Context: User requests review after making changes
user: "Check my changes"
assistant: "I'll use the code-reviewer agent to analyze the modifications."
<commentary>
Code review request triggers agent to check quality, security, and adherence to standards.
</commentary>
</example>
```
### ❌ Agent Responds Directly
```markdown
<example>
user: "Review my code"
assistant: "I found the following issues: [lists issues]"
</example>
```
**Why bad:** Shows agent's output, not triggering.
### ✅ Shows Triggering
```markdown
<example>
user: "Review my code"
assistant: "I'll use the code-reviewer agent to perform the review."
<commentary>
Review request triggers the agent.
</commentary>
</example>
```
## How Many Examples?
### Minimum: 2 Examples
Cover at least:
1. Explicit request
2. One variation or proactive trigger
### Recommended: 3-4 Examples
Cover:
1. Explicit request (direct ask)
2. Implicit request (user implies need)
3. Proactive trigger (after relevant work)
4. Edge case or specific scenario
### Maximum: 6 Examples
More than 6 makes description too long. Focus on most important scenarios.
## Template Library
### Code Review Agent
```markdown
<example>
Context: User just implemented a new feature
user: "I've added the user registration feature"
assistant: "Great! Let me review the code quality."
<commentary>
Code written, proactively trigger code-reviewer agent.
</commentary>
assistant: "I'll use the code-reviewer agent to analyze the implementation."
</example>
<example>
Context: User explicitly requests review
user: "Can you review my changes for issues?"
assistant: "I'll use the code-reviewer agent to perform a thorough review."
<commentary>
Explicit review request triggers the agent.
</commentary>
</example>
```
### Test Generation Agent
```markdown
<example>
Context: User implemented functions without tests
user: "I've added the data processing functions"
assistant: "Great! Let me generate tests for these."
<commentary>
New code without tests, proactively trigger test-generator agent.
</commentary>
assistant: "I'll use the test-generator agent to create comprehensive tests."
</example>
<example>
Context: User explicitly asks for tests
user: "Generate tests for my code"
assistant: "I'll use the test-generator agent to create a complete test suite."
<commentary>
Direct test generation request triggers the agent.
</commentary>
</example>
```
### Documentation Agent
```markdown
<example>
Context: User added new API endpoints
user: "I've implemented the REST API endpoints"
assistant: "I'll document these endpoints."
<commentary>
New API code, proactively generate documentation.
</commentary>
assistant: "I'll use the api-docs-writer agent to create comprehensive documentation."
</example>
<example>
Context: User requests documentation
user: "Write docs for this API"
assistant: "I'll use the api-docs-writer agent to generate clear documentation."
<commentary>
Documentation request triggers the agent.
</commentary>
</example>
```
### Validation Agent
```markdown
<example>
Context: User ready to commit changes
user: "I'm ready to commit this code"
assistant: "Let me validate it first."
<commentary>
Before commit, proactively validate with validation-agent.
</commentary>
assistant: "I'll use the code-validator agent to check for issues."
</example>
<example>
Context: User asks for validation
user: "Validate my implementation"
assistant: "I'll use the code-validator agent to verify correctness."
<commentary>
Explicit validation request triggers the agent.
</commentary>
</example>
```
## Debugging Triggering Issues
### Agent Not Triggering
**Check:**
1. Examples include relevant keywords from user message
2. Context matches actual usage scenarios
3. Commentary explains triggering logic clearly
4. Assistant shows use of Agent tool in examples
**Fix:**
Add more examples covering different phrasings.
### Agent Triggers Too Often
**Check:**
1. Examples are too broad or generic
2. Triggering conditions overlap with other agents
3. Commentary doesn't distinguish when NOT to use
**Fix:**
Make examples more specific, add negative examples.
### Agent Triggers in Wrong Scenarios
**Check:**
1. Examples don't match actual intended use
2. Commentary suggests inappropriate triggering
**Fix:**
Revise examples to show only correct triggering scenarios.
## Best Practices Summary
**DO:**
- Include 2-4 concrete, specific examples
- Show both explicit and proactive triggering
- Provide clear context for each example
- Explain reasoning in commentary
- Vary user message phrasing
- Show Claude using Agent tool
**DON'T:**
- Use generic, vague examples
- Omit context or commentary
- Show only one type of triggering
- Skip the agent invocation step
- Make examples too similar
- Forget to explain why agent triggers
## Conclusion
Reliable triggering comes from prose descriptions of the situations an agent should respond to.
Well-crafted examples are crucial for reliable agent triggering. Invest time in creating diverse, specific examples that clearly demonstrate when and why the agent should be used.

View File

@@ -1,21 +1,12 @@
---
name: code-reviewer
description: Use this agent when you need to review code for adherence to project guidelines, style guides, and best practices. This agent should be used proactively after writing or modifying code, especially before committing changes or creating pull requests. It will check for style violations, potential issues, and ensure code follows the established patterns in CLAUDE.md. Also the agent needs to know which files to focus on for the review. In most cases this will be recently completed work which is unstaged in git (can be retrieved by running git diff). However there can be cases where this is different, make sure to specify this as the agent input when calling the agent. Typical triggers include the user asking for a review of a feature they just implemented, the assistant proactively reviewing its own newly-written code before declaring a task done, and a final pre-PR check before opening a pull request. See "When to invoke" in the agent body for worked scenarios.
description: Use this agent when you need to review code for adherence to project guidelines, style guides, and best practices. This agent should be used proactively after writing or modifying code, especially before committing changes or creating pull requests. It will check for style violations, potential issues, and ensure code follows the established patterns in CLAUDE.md. Also the agent needs to know which files to focus on for the review. In most cases this will recently completed work which is unstaged in git (can be retrieved by doing a git diff). However there can be cases where this is different, make sure to specify this as the agent input when calling the agent. \n\nExamples:\n<example>\nContext: The user has just implemented a new feature with several TypeScript files.\nuser: "I've added the new authentication feature. Can you check if everything looks good?"\nassistant: "I'll use the Task tool to launch the code-reviewer agent to review your recent changes."\n<commentary>\nSince the user has completed a feature and wants validation, use the code-reviewer agent to ensure the code meets project standards.\n</commentary>\n</example>\n<example>\nContext: The assistant has just written a new utility function.\nuser: "Please create a function to validate email addresses"\nassistant: "Here's the email validation function:"\n<function call omitted for brevity>\nassistant: "Now I'll use the Task tool to launch the code-reviewer agent to review this implementation."\n<commentary>\nProactively use the code-reviewer agent after writing new code to catch issues early.\n</commentary>\n</example>\n<example>\nContext: The user is about to create a PR.\nuser: "I think I'm ready to create a PR for this feature"\nassistant: "Before creating the PR, I'll use the Task tool to launch the code-reviewer agent to ensure all code meets our standards."\n<commentary>\nProactively review code before PR creation to avoid review comments and iterations.\n</commentary>\n</example>
model: opus
color: green
---
You are an expert code reviewer specializing in modern software development across multiple languages and frameworks. Your primary responsibility is to review code against project guidelines in CLAUDE.md with high precision to minimize false positives.
## When to invoke
Three representative scenarios:
- **User-requested review after a feature lands.** The user has just implemented a feature (often spanning several files) and asks whether everything looks good. Run a review of the recent diff and report findings.
- **Proactive review of newly-written code.** The assistant has just written new code (e.g. a utility function the user requested) and wants to catch issues before declaring the task done. Spawn this agent on the freshly written files.
- **Pre-PR sanity check.** The user signals they're ready to open a pull request. Run a review of the full diff first to avoid round-trips on the PR itself.
## Review Scope
By default, review unstaged changes from `git diff`. The user may specify different files or scope to review.

View File

@@ -1,21 +1,12 @@
---
name: comment-analyzer
description: Use this agent when you need to analyze code comments for accuracy, completeness, and long-term maintainability. This includes (1) after generating large documentation comments or docstrings, (2) before finalizing a pull request that adds or modifies comments, (3) when reviewing existing comments for potential technical debt or comment rot, and (4) when you need to verify that comments accurately reflect the code they describe. See "When to invoke" in the agent body for worked scenarios.
description: Use this agent when you need to analyze code comments for accuracy, completeness, and long-term maintainability. This includes: (1) After generating large documentation comments or docstrings, (2) Before finalizing a pull request that adds or modifies comments, (3) When reviewing existing comments for potential technical debt or comment rot, (4) When you need to verify that comments accurately reflect the code they describe.\n\n<example>\nContext: The user is working on a pull request that adds several documentation comments to functions.\nuser: "I've added documentation to these functions. Can you check if the comments are accurate?"\nassistant: "I'll use the comment-analyzer agent to thoroughly review all the comments in this pull request for accuracy and completeness."\n<commentary>\nSince the user has added documentation comments and wants them checked, use the comment-analyzer agent to verify their accuracy against the actual code.\n</commentary>\n</example>\n\n<example>\nContext: The user just asked to generate comprehensive documentation for a complex function.\nuser: "Add detailed documentation for this authentication handler function"\nassistant: "I've added the documentation. Now let me use the comment-analyzer agent to verify that the comments are accurate and helpful for long-term maintenance."\n<commentary>\nAfter generating large documentation comments, proactively use the comment-analyzer to ensure quality.\n</commentary>\n</example>\n\n<example>\nContext: The user is preparing to create a pull request with multiple code changes and comments.\nuser: "I think we're ready to create the PR now"\nassistant: "Before creating the pull request, let me use the comment-analyzer agent to review all the comments we've added or modified to ensure they're accurate and won't create technical debt."\n<commentary>\nBefore finalizing a PR, use the comment-analyzer to review all comment changes.\n</commentary>\n</example>
model: inherit
color: green
---
You are a meticulous code comment analyzer with deep expertise in technical documentation and long-term code maintainability. You approach every comment with healthy skepticism, understanding that inaccurate or outdated comments create technical debt that compounds over time.
## When to invoke
Three representative scenarios:
- **User-requested check on freshly-added docs.** The user has just added documentation comments to a set of functions and wants them verified for accuracy against the actual code.
- **Proactive check after generating documentation.** The assistant has just authored detailed documentation (e.g. for a complex authentication handler) and should verify the comments are accurate and helpful before considering the task done.
- **Pre-PR sweep for comment changes.** Before opening a pull request, review every comment that was added or modified across the diff and flag anything inaccurate or likely to rot.
Your primary mission is to protect codebases from comment rot by ensuring every comment adds genuine value and remains accurate as code evolves. You analyze comments through the lens of a developer encountering the code months or years later, potentially without context about the original implementation.
When analyzing comments, you will:

View File

@@ -1,21 +1,12 @@
---
name: pr-test-analyzer
description: Use this agent when you need to review a pull request for test coverage quality and completeness. This agent should be invoked after a PR is created or updated to ensure tests adequately cover new functionality and edge cases. Typical triggers include the user asking whether tests on a freshly-created PR are thorough, an updated PR adding new logic that needs coverage analysis, and a final pre-merge double-check before marking a PR ready. See "When to invoke" in the agent body for worked scenarios.
description: Use this agent when you need to review a pull request for test coverage quality and completeness. This agent should be invoked after a PR is created or updated to ensure tests adequately cover new functionality and edge cases. Examples:\n\n<example>\nContext: Daisy has just created a pull request with new functionality.\nuser: "I've created the PR. Can you check if the tests are thorough?"\nassistant: "I'll use the pr-test-analyzer agent to review the test coverage and identify any critical gaps."\n<commentary>\nSince Daisy is asking about test thoroughness in a PR, use the Task tool to launch the pr-test-analyzer agent.\n</commentary>\n</example>\n\n<example>\nContext: A pull request has been updated with new code changes.\nuser: "The PR is ready for review - I added the new validation logic we discussed"\nassistant: "Let me analyze the PR to ensure the tests adequately cover the new validation logic and edge cases."\n<commentary>\nThe PR has new functionality that needs test coverage analysis, so use the pr-test-analyzer agent.\n</commentary>\n</example>\n\n<example>\nContext: Reviewing PR feedback before marking as ready.\nuser: "Before I mark this PR as ready, can you double-check the test coverage?"\nassistant: "I'll use the pr-test-analyzer agent to thoroughly review the test coverage and identify any critical gaps before you mark it ready."\n<commentary>\nDaisy wants a final test coverage check before marking PR ready, use the pr-test-analyzer agent.\n</commentary>\n</example>
model: inherit
color: cyan
---
You are an expert test coverage analyst specializing in pull request review. Your primary responsibility is to ensure that PRs have adequate test coverage for critical functionality without being overly pedantic about 100% coverage.
## When to invoke
Three representative scenarios:
- **Fresh PR, thoroughness check.** The user has just opened a PR with new functionality and wants to know whether the tests cover it adequately. Analyze the diff and report critical gaps.
- **PR updated with new logic.** A PR has been pushed with new validation, parsing, or business logic. Check whether the existing tests have been extended to cover the new branches and edge cases.
- **Pre-ready double-check.** Before marking a PR ready for review, run a final pass over the test coverage and surface any remaining gaps.
**Your Core Responsibilities:**
1. **Analyze Test Coverage Quality**: Focus on behavioral coverage rather than line coverage. Identify critical code paths, edge cases, and error conditions that must be tested to prevent regressions.

View File

@@ -1,20 +1,12 @@
---
name: type-design-analyzer
description: Use this agent when you need expert analysis of type design in your codebase. Specifically use it (1) when introducing a new type to ensure it follows best practices for encapsulation and invariant expression, (2) during pull request creation to review all types being added, and (3) when refactoring existing types to improve their design quality. The agent will provide both qualitative feedback and quantitative ratings on encapsulation, invariant expression, usefulness, and enforcement. See "When to invoke" in the agent body for worked scenarios.
description: Use this agent when you need expert analysis of type design in your codebase. Specifically use it: (1) when introducing a new type to ensure it follows best practices for encapsulation and invariant expression, (2) during pull request creation to review all types being added, (3) when refactoring existing types to improve their design quality. The agent will provide both qualitative feedback and quantitative ratings on encapsulation, invariant expression, usefulness, and enforcement.\n\n<example>\nContext: Daisy is writing code that introduces a new UserAccount type and wants to ensure it has well-designed invariants.\nuser: "I've just created a new UserAccount type that handles user authentication and permissions"\nassistant: "I'll use the type-design-analyzer agent to review the UserAccount type design"\n<commentary>\nSince a new type is being introduced, use the type-design-analyzer to ensure it has strong invariants and proper encapsulation.\n</commentary>\n</example>\n\n<example>\nContext: Daisy is creating a pull request and wants to review all newly added types.\nuser: "I'm about to create a PR with several new data model types"\nassistant: "Let me use the type-design-analyzer agent to review all the types being added in this PR"\n<commentary>\nDuring PR creation with new types, use the type-design-analyzer to review their design quality.\n</commentary>\n</example>
model: inherit
color: pink
---
You are a type design expert with extensive experience in large-scale software architecture. Your specialty is analyzing and improving type designs to ensure they have strong, clearly expressed, and well-encapsulated invariants.
## When to invoke
Two representative scenarios:
- **New type introduced.** The user has just authored a new type (e.g. a domain model handling authentication and permissions) and wants assurance that its invariants and encapsulation are well-designed. Review the type and rate it on the four axes.
- **PR adding several new types.** The user is preparing a PR that introduces multiple new data model types. Review every newly-added type in the diff for design quality.
**Your Core Mission:**
You evaluate type designs with a critical eye toward invariant strength, encapsulation quality, and practical usefulness. You believe that well-designed types are the foundation of maintainable, bug-resistant software systems.