mirror of
https://github.com/anthropics/claude-plugins-official.git
synced 2026-04-30 06:42:40 +00:00
Compare commits
10 Commits
fix/valida
...
add-hyperf
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
ed2fd47bf3 | ||
|
|
e73e9a6257 | ||
|
|
ce721c1f1d | ||
|
|
5b643ee82a | ||
|
|
38b5056c44 | ||
|
|
18113ade5c | ||
|
|
99832739a1 | ||
|
|
c5837a2c23 | ||
|
|
f4b5494fb4 | ||
|
|
bdca23e8e4 |
@@ -33,6 +33,21 @@
|
||||
},
|
||||
"homepage": "https://github.com/SalesforceAIResearch/agentforce-adlc"
|
||||
},
|
||||
{
|
||||
"name": "adobe-for-creativity",
|
||||
"description": "Harness Adobe's creative AI-powered tools to edit images, automate design workflows, and bring creative visions to life — from background removal to vectorization and professional retouching.",
|
||||
"author": {
|
||||
"name": "Adobe"
|
||||
},
|
||||
"category": "design",
|
||||
"source": {
|
||||
"source": "git-subdir",
|
||||
"url": "https://github.com/adobe/skills.git",
|
||||
"path": "plugins/creative-cloud/adobe-for-creativity",
|
||||
"ref": "main"
|
||||
},
|
||||
"homepage": "https://github.com/adobe/skills/tree/main/plugins/creative-cloud/adobe-for-creativity"
|
||||
},
|
||||
{
|
||||
"name": "adspirer-ads-agent",
|
||||
"description": "Cross-platform ad management for Google Ads, Meta Ads, TikTok Ads, and LinkedIn Ads. 91 tools for keyword research, campaign creation, performance analysis, and budget optimization.",
|
||||
@@ -253,6 +268,22 @@
|
||||
},
|
||||
"homepage": "https://github.com/awslabs/agent-plugins"
|
||||
},
|
||||
{
|
||||
"name": "aws-dev-toolkit",
|
||||
"description": "AWS development toolkit — 34 skills, 11 agents, and 3 MCP servers for building, migrating, and performing architecture reviews on AWS.",
|
||||
"author": {
|
||||
"name": "aws-samples"
|
||||
},
|
||||
"category": "development",
|
||||
"source": {
|
||||
"source": "git-subdir",
|
||||
"url": "https://github.com/aws-samples/sample-claude-code-plugins-for-startups.git",
|
||||
"path": "plugins/aws-dev-toolkit",
|
||||
"ref": "main",
|
||||
"sha": "ddea7fdd605b42ed3900374815f358a2d4600db5"
|
||||
},
|
||||
"homepage": "https://github.com/aws-samples/sample-claude-code-plugins-for-startups"
|
||||
},
|
||||
{
|
||||
"name": "aws-serverless",
|
||||
"description": "Design, build, deploy, test, and debug serverless applications with AWS Serverless services.",
|
||||
@@ -465,6 +496,17 @@
|
||||
},
|
||||
"homepage": "https://github.com/cockroachdb/claude-plugin"
|
||||
},
|
||||
{
|
||||
"name": "code-modernization",
|
||||
"description": "Modernize legacy codebases (COBOL, legacy Java/C++, monolith web apps) with a structured assess / map / extract-rules / reimagine / transform / harden workflow and specialist review agents",
|
||||
"author": {
|
||||
"name": "Anthropic",
|
||||
"email": "support@anthropic.com"
|
||||
},
|
||||
"source": "./plugins/code-modernization",
|
||||
"category": "development",
|
||||
"homepage": "https://github.com/anthropics/claude-plugins-official/tree/main/plugins/code-modernization"
|
||||
},
|
||||
{
|
||||
"name": "code-review",
|
||||
"description": "Automated code review for pull requests using multiple specialized agents with confidence-based scoring to filter false positives",
|
||||
@@ -891,6 +933,20 @@
|
||||
},
|
||||
"homepage": "https://github.com/huggingface/skills.git"
|
||||
},
|
||||
{
|
||||
"name": "hyperframes",
|
||||
"description": "HyperFrames by HeyGen. Write HTML, render video. Compositions, GSAP animations, captions, voiceovers, audio-reactive visuals, and website-to-video capture.",
|
||||
"author": {
|
||||
"name": "HeyGen"
|
||||
},
|
||||
"category": "development",
|
||||
"source": {
|
||||
"source": "url",
|
||||
"url": "https://github.com/heygen-com/hyperframes.git",
|
||||
"sha": "eb065260dc23cbd5d87904b4fcf3e55fbc2f2647"
|
||||
},
|
||||
"homepage": "https://hyperframes.heygen.com"
|
||||
},
|
||||
{
|
||||
"name": "imessage",
|
||||
"description": "iMessage messaging bridge with built-in access control. Reads chat.db directly, sends via AppleScript. Manage pairing, allowlists, and policy via /imessage:access.",
|
||||
@@ -1921,8 +1977,7 @@
|
||||
"category": "security",
|
||||
"source": {
|
||||
"source": "url",
|
||||
"url": "https://github.com/VantaInc/vanta-mcp-plugin.git",
|
||||
"sha": "46e5bebf0484f08fc4a3c4054437cf5ec06298c9"
|
||||
"url": "https://github.com/VantaInc/vanta-mcp-plugin.git"
|
||||
},
|
||||
"homepage": "https://help.vanta.com/en/articles/14094979-connecting-to-vanta-mcp#h_887ce3f337"
|
||||
},
|
||||
|
||||
8
plugins/code-modernization/.claude-plugin/plugin.json
Normal file
8
plugins/code-modernization/.claude-plugin/plugin.json
Normal file
@@ -0,0 +1,8 @@
|
||||
{
|
||||
"name": "code-modernization",
|
||||
"description": "Modernize legacy codebases (COBOL, legacy Java/C++, monolith web apps) with a structured assess → map → extract-rules → reimagine → transform → harden workflow and specialist review agents",
|
||||
"author": {
|
||||
"name": "Anthropic",
|
||||
"email": "support@anthropic.com"
|
||||
}
|
||||
}
|
||||
202
plugins/code-modernization/LICENSE
Normal file
202
plugins/code-modernization/LICENSE
Normal file
@@ -0,0 +1,202 @@
|
||||
|
||||
Apache License
|
||||
Version 2.0, January 2004
|
||||
http://www.apache.org/licenses/
|
||||
|
||||
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
|
||||
|
||||
1. Definitions.
|
||||
|
||||
"License" shall mean the terms and conditions for use, reproduction,
|
||||
and distribution as defined by Sections 1 through 9 of this document.
|
||||
|
||||
"Licensor" shall mean the copyright owner or entity authorized by
|
||||
the copyright owner that is granting the License.
|
||||
|
||||
"Legal Entity" shall mean the union of the acting entity and all
|
||||
other entities that control, are controlled by, or are under common
|
||||
control with that entity. For the purposes of this definition,
|
||||
"control" means (i) the power, direct or indirect, to cause the
|
||||
direction or management of such entity, whether by contract or
|
||||
otherwise, or (ii) ownership of fifty percent (50%) or more of the
|
||||
outstanding shares, or (iii) beneficial ownership of such entity.
|
||||
|
||||
"You" (or "Your") shall mean an individual or Legal Entity
|
||||
exercising permissions granted by this License.
|
||||
|
||||
"Source" form shall mean the preferred form for making modifications,
|
||||
including but not limited to software source code, documentation
|
||||
source, and configuration files.
|
||||
|
||||
"Object" form shall mean any form resulting from mechanical
|
||||
transformation or translation of a Source form, including but
|
||||
not limited to compiled object code, generated documentation,
|
||||
and conversions to other media types.
|
||||
|
||||
"Work" shall mean the work of authorship, whether in Source or
|
||||
Object form, made available under the License, as indicated by a
|
||||
copyright notice that is included in or attached to the work
|
||||
(an example is provided in the Appendix below).
|
||||
|
||||
"Derivative Works" shall mean any work, whether in Source or Object
|
||||
form, that is based on (or derived from) the Work and for which the
|
||||
editorial revisions, annotations, elaborations, or other modifications
|
||||
represent, as a whole, an original work of authorship. For the purposes
|
||||
of this License, Derivative Works shall not include works that remain
|
||||
separable from, or merely link (or bind by name) to the interfaces of,
|
||||
the Work and Derivative Works thereof.
|
||||
|
||||
"Contribution" shall mean any work of authorship, including
|
||||
the original version of the Work and any modifications or additions
|
||||
to that Work or Derivative Works thereof, that is intentionally
|
||||
submitted to Licensor for inclusion in the Work by the copyright owner
|
||||
or by an individual or Legal Entity authorized to submit on behalf of
|
||||
the copyright owner. For the purposes of this definition, "submitted"
|
||||
means any form of electronic, verbal, or written communication sent
|
||||
to the Licensor or its representatives, including but not limited to
|
||||
communication on electronic mailing lists, source code control systems,
|
||||
and issue tracking systems that are managed by, or on behalf of, the
|
||||
Licensor for the purpose of discussing and improving the Work, but
|
||||
excluding communication that is conspicuously marked or otherwise
|
||||
designated in writing by the copyright owner as "Not a Contribution."
|
||||
|
||||
"Contributor" shall mean Licensor and any individual or Legal Entity
|
||||
on behalf of whom a Contribution has been received by Licensor and
|
||||
subsequently incorporated within the Work.
|
||||
|
||||
2. Grant of Copyright License. Subject to the terms and conditions of
|
||||
this License, each Contributor hereby grants to You a perpetual,
|
||||
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
||||
copyright license to reproduce, prepare Derivative Works of,
|
||||
publicly display, publicly perform, sublicense, and distribute the
|
||||
Work and such Derivative Works in Source or Object form.
|
||||
|
||||
3. Grant of Patent License. Subject to the terms and conditions of
|
||||
this License, each Contributor hereby grants to You a perpetual,
|
||||
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
||||
(except as stated in this section) patent license to make, have made,
|
||||
use, offer to sell, sell, import, and otherwise transfer the Work,
|
||||
where such license applies only to those patent claims licensable
|
||||
by such Contributor that are necessarily infringed by their
|
||||
Contribution(s) alone or by combination of their Contribution(s)
|
||||
with the Work to which such Contribution(s) was submitted. If You
|
||||
institute patent litigation against any entity (including a
|
||||
cross-claim or counterclaim in a lawsuit) alleging that the Work
|
||||
or a Contribution incorporated within the Work constitutes direct
|
||||
or contributory patent infringement, then any patent licenses
|
||||
granted to You under this License for that Work shall terminate
|
||||
as of the date such litigation is filed.
|
||||
|
||||
4. Redistribution. You may reproduce and distribute copies of the
|
||||
Work or Derivative Works thereof in any medium, with or without
|
||||
modifications, and in Source or Object form, provided that You
|
||||
meet the following conditions:
|
||||
|
||||
(a) You must give any other recipients of the Work or
|
||||
Derivative Works a copy of this License; and
|
||||
|
||||
(b) You must cause any modified files to carry prominent notices
|
||||
stating that You changed the files; and
|
||||
|
||||
(c) You must retain, in the Source form of any Derivative Works
|
||||
that You distribute, all copyright, patent, trademark, and
|
||||
attribution notices from the Source form of the Work,
|
||||
excluding those notices that do not pertain to any part of
|
||||
the Derivative Works; and
|
||||
|
||||
(d) If the Work includes a "NOTICE" text file as part of its
|
||||
distribution, then any Derivative Works that You distribute must
|
||||
include a readable copy of the attribution notices contained
|
||||
within such NOTICE file, excluding those notices that do not
|
||||
pertain to any part of the Derivative Works, in at least one
|
||||
of the following places: within a NOTICE text file distributed
|
||||
as part of the Derivative Works; within the Source form or
|
||||
documentation, if provided along with the Derivative Works; or,
|
||||
within a display generated by the Derivative Works, if and
|
||||
wherever such third-party notices normally appear. The contents
|
||||
of the NOTICE file are for informational purposes only and
|
||||
do not modify the License. You may add Your own attribution
|
||||
notices within Derivative Works that You distribute, alongside
|
||||
or as an addendum to the NOTICE text from the Work, provided
|
||||
that such additional attribution notices cannot be construed
|
||||
as modifying the License.
|
||||
|
||||
You may add Your own copyright statement to Your modifications and
|
||||
may provide additional or different license terms and conditions
|
||||
for use, reproduction, or distribution of Your modifications, or
|
||||
for any such Derivative Works as a whole, provided Your use,
|
||||
reproduction, and distribution of the Work otherwise complies with
|
||||
the conditions stated in this License.
|
||||
|
||||
5. Submission of Contributions. Unless You explicitly state otherwise,
|
||||
any Contribution intentionally submitted for inclusion in the Work
|
||||
by You to the Licensor shall be under the terms and conditions of
|
||||
this License, without any additional terms or conditions.
|
||||
Notwithstanding the above, nothing herein shall supersede or modify
|
||||
the terms of any separate license agreement you may have executed
|
||||
with Licensor regarding such Contributions.
|
||||
|
||||
6. Trademarks. This License does not grant permission to use the trade
|
||||
names, trademarks, service marks, or product names of the Licensor,
|
||||
except as required for reasonable and customary use in describing the
|
||||
origin of the Work and reproducing the content of the NOTICE file.
|
||||
|
||||
7. Disclaimer of Warranty. Unless required by applicable law or
|
||||
agreed to in writing, Licensor provides the Work (and each
|
||||
Contributor provides its Contributions) on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
|
||||
implied, including, without limitation, any warranties or conditions
|
||||
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
|
||||
PARTICULAR PURPOSE. You are solely responsible for determining the
|
||||
appropriateness of using or redistributing the Work and assume any
|
||||
risks associated with Your exercise of permissions under this License.
|
||||
|
||||
8. Limitation of Liability. In no event and under no legal theory,
|
||||
whether in tort (including negligence), contract, or otherwise,
|
||||
unless required by applicable law (such as deliberate and grossly
|
||||
negligent acts) or agreed to in writing, shall any Contributor be
|
||||
liable to You for damages, including any direct, indirect, special,
|
||||
incidental, or consequential damages of any character arising as a
|
||||
result of this License or out of the use or inability to use the
|
||||
Work (including but not limited to damages for loss of goodwill,
|
||||
work stoppage, computer failure or malfunction, or any and all
|
||||
other commercial damages or losses), even if such Contributor
|
||||
has been advised of the possibility of such damages.
|
||||
|
||||
9. Accepting Warranty or Additional Liability. While redistributing
|
||||
the Work or Derivative Works thereof, You may choose to offer,
|
||||
and charge a fee for, acceptance of support, warranty, indemnity,
|
||||
or other liability obligations and/or rights consistent with this
|
||||
License. However, in accepting such obligations, You may act only
|
||||
on Your own behalf and on Your sole responsibility, not on behalf
|
||||
of any other Contributor, and only if You agree to indemnify,
|
||||
defend, and hold each Contributor harmless for any liability
|
||||
incurred by, or claims asserted against, such Contributor by reason
|
||||
of your accepting any such warranty or additional liability.
|
||||
|
||||
END OF TERMS AND CONDITIONS
|
||||
|
||||
APPENDIX: How to apply the Apache License to your work.
|
||||
|
||||
To apply the Apache License to your work, attach the following
|
||||
boilerplate notice, with the fields enclosed by brackets "[]"
|
||||
replaced with your own identifying information. (Don't include
|
||||
the brackets!) The text should be enclosed in the appropriate
|
||||
comment syntax for the file format. We also recommend that a
|
||||
file or class name and description of purpose be included on the
|
||||
same "printed page" as the copyright notice for easier
|
||||
identification within third-party archives.
|
||||
|
||||
Copyright [yyyy] [name of copyright owner]
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
107
plugins/code-modernization/README.md
Normal file
107
plugins/code-modernization/README.md
Normal file
@@ -0,0 +1,107 @@
|
||||
# Code Modernization Plugin
|
||||
|
||||
A structured workflow and set of specialist agents for modernizing legacy codebases — COBOL, legacy Java/C++, monolith web apps — into current stacks while preserving behavior.
|
||||
|
||||
## Overview
|
||||
|
||||
Legacy modernization fails most often not because the target technology is wrong, but because teams skip steps: they transform code before understanding it, reimagine architecture before extracting business rules, or ship without a harness that would catch behavior drift. This plugin enforces a sequence:
|
||||
|
||||
```
|
||||
assess → map → extract-rules → reimagine → transform → harden
|
||||
```
|
||||
|
||||
Each step has a dedicated slash command. Specialist agents (legacy analyst, business rules extractor, architecture critic, security auditor, test engineer) are invoked from within those commands — or directly — to keep the work honest.
|
||||
|
||||
## Commands
|
||||
|
||||
The commands are designed to be run in order, but each produces a standalone artifact so you can stop, review, and resume.
|
||||
|
||||
### `/modernize-brief`
|
||||
Capture the modernization brief: what's being modernized, why now, constraints (regulatory, data, runtime), non-goals, and success criteria. Produces `analysis/brief.md`. Run this first.
|
||||
|
||||
### `/modernize-assess`
|
||||
Inventory the legacy codebase: languages, line counts, module boundaries, external integrations, build system, test coverage, known pain points. Produces `analysis/assessment.md`. Uses the `legacy-analyst` agent for deep reads on unfamiliar dialects.
|
||||
|
||||
### `/modernize-map`
|
||||
Map the legacy structure onto a target architecture: which legacy modules become which target services/packages, data-flow diagrams, migration sequencing. Produces `analysis/map.md`. Uses the `architecture-critic` agent to pressure-test the design.
|
||||
|
||||
### `/modernize-extract-rules`
|
||||
Extract business rules from the legacy code — the rules that are encoded in procedural logic, COBOL copybooks, stored procedures, or config files — into human-readable form with citations back to source. Produces `analysis/rules.md`. Uses the `business-rules-extractor` agent.
|
||||
|
||||
### `/modernize-reimagine`
|
||||
Propose the target design: APIs, data model, runtime. Explicitly list what changes from legacy and what stays identical. Produces `analysis/design.md`. Uses the `architecture-critic` agent to challenge over-engineering.
|
||||
|
||||
### `/modernize-transform`
|
||||
Do the actual code transformation — module by module. Writes to `modernized/`. Pairs each transformed module with a test suite that pins the pre-transform behavior.
|
||||
|
||||
### `/modernize-harden`
|
||||
Post-transform review pass: security audit, test coverage, error handling, observability. Uses `security-auditor` and `test-engineer` agents. Produces a findings report ranked Blocker / High / Medium / Nit.
|
||||
|
||||
## Agents
|
||||
|
||||
- **`legacy-analyst`** — Reads legacy code (COBOL, legacy Java/C++, procedural PHP, classic ASP) and produces structured summaries. Good at spotting implicit dependencies, copybook inheritance, and "JOBOL" patterns (procedural code wearing a modern syntax).
|
||||
- **`business-rules-extractor`** — Extracts business rules from procedural code with source citations. Each rule includes: what, where it's implemented, which conditions fire it, and any corner cases hidden in data.
|
||||
- **`architecture-critic`** — Adversarial reviewer for target architectures and transformed code. Default stance is skeptical: asks "do we actually need this?" Flags microservices-for-the-resume, ceremonial error handling, abstractions with one implementation.
|
||||
- **`security-auditor`** — Reviews transformed code for auth, input validation, secret handling, and dependency CVEs. Tuned for the kinds of issues that appear when translating security primitives across stacks (e.g., session handling from servlet to stateless JWT).
|
||||
- **`test-engineer`** — Audits test suites for behavior-pinning vs. coverage-theater. Flags tests that exercise code paths without asserting outcomes.
|
||||
|
||||
## Installation
|
||||
|
||||
```
|
||||
/plugin install code-modernization@claude-plugins-official
|
||||
```
|
||||
|
||||
## Recommended Workspace Setup
|
||||
|
||||
This plugin ships commands and agents, but modernization projects benefit from a workspace permission layout that enforces the "never touch legacy, freely edit modernized" rule. A starting-point `.claude/settings.json` for the project directory you're modernizing:
|
||||
|
||||
```json
|
||||
{
|
||||
"permissions": {
|
||||
"allow": [
|
||||
"Bash(git diff:*)",
|
||||
"Bash(git log:*)",
|
||||
"Bash(git status:*)",
|
||||
"Read(**)",
|
||||
"Write(analysis/**)",
|
||||
"Write(modernized/**)",
|
||||
"Edit(analysis/**)",
|
||||
"Edit(modernized/**)"
|
||||
],
|
||||
"deny": [
|
||||
"Edit(legacy/**)"
|
||||
]
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
Adjust `legacy/` and `modernized/` to match your actual layout. The key invariants: `Edit` under `legacy/` is denied, and writes are scoped to `analysis/` (for documents) and `modernized/` (for the new code).
|
||||
|
||||
## Typical Workflow
|
||||
|
||||
```bash
|
||||
# 1. Write the brief — what are we modernizing and why?
|
||||
/modernize-brief
|
||||
|
||||
# 2. Inventory the legacy code
|
||||
/modernize-assess
|
||||
|
||||
# 3. Extract business rules before touching the code
|
||||
/modernize-extract-rules
|
||||
|
||||
# 4. Map legacy structure to target
|
||||
/modernize-map
|
||||
|
||||
# 5. Propose the target design and review it
|
||||
/modernize-reimagine
|
||||
|
||||
# 6. Transform module by module
|
||||
/modernize-transform
|
||||
|
||||
# 7. Harden: security, tests, observability
|
||||
/modernize-harden
|
||||
```
|
||||
|
||||
## License
|
||||
|
||||
Apache 2.0. See `LICENSE`.
|
||||
36
plugins/code-modernization/agents/architecture-critic.md
Normal file
36
plugins/code-modernization/agents/architecture-critic.md
Normal file
@@ -0,0 +1,36 @@
|
||||
---
|
||||
name: architecture-critic
|
||||
description: Reviews proposed target architectures and transformed code against modern best practice. Adversarial — looks for over-engineering, missed requirements, and simpler alternatives.
|
||||
tools: Read, Glob, Grep, Bash
|
||||
---
|
||||
|
||||
You are a principal engineer reviewing a modernization design or a freshly
|
||||
transformed module. Your default stance is **skeptical**. The team is excited
|
||||
about the new shiny; your job is to ask "do we actually need this?"
|
||||
|
||||
## Review lens
|
||||
|
||||
For **architecture proposals**:
|
||||
- Does every service boundary correspond to a real domain seam, or is this
|
||||
microservices-for-the-resume?
|
||||
- What's the simplest design that meets the stated requirements? How does
|
||||
the proposal compare?
|
||||
- Which non-functional requirements (latency, throughput, consistency) are
|
||||
unstated, and does the design accidentally violate them?
|
||||
- What's the data migration story? "We'll figure it out" is a finding.
|
||||
- What happens when service X is down? Trace one failure mode end-to-end.
|
||||
|
||||
For **transformed code**:
|
||||
- Is this idiomatic for the target stack, or is legacy structure leaking
|
||||
through? (Flag "JOBOL" — procedural Java with COBOL variable names.)
|
||||
- Is error handling meaningful or ceremonial?
|
||||
- Are there abstractions with exactly one implementation and no second use
|
||||
case in sight?
|
||||
- Does the test suite actually pin behavior, or just exercise code paths?
|
||||
- What would the on-call engineer need at 3am that isn't here?
|
||||
|
||||
## Output
|
||||
|
||||
Findings ranked **Blocker / High / Medium / Nit**. Each with: what, where,
|
||||
why it matters, and a concrete suggested change. End with one paragraph:
|
||||
"If I could only change one thing, it would be ___."
|
||||
@@ -0,0 +1,46 @@
|
||||
---
|
||||
name: business-rules-extractor
|
||||
description: Mines domain logic, calculations, validations, and policies from legacy code into testable Given/When/Then specifications. Use when you need to separate "what the business requires" from "how the old code happened to implement it."
|
||||
tools: Read, Glob, Grep, Bash
|
||||
---
|
||||
|
||||
You are a business analyst who reads code. Your job is to find the **rules**
|
||||
hidden inside legacy systems — the calculations, thresholds, eligibility
|
||||
checks, and policies that define how the business actually operates — and
|
||||
express them in a form that survives the rewrite.
|
||||
|
||||
## What counts as a business rule
|
||||
|
||||
- **Calculations**: interest, fees, taxes, discounts, scores, aggregates
|
||||
- **Validations**: required fields, format checks, range limits, cross-field
|
||||
- **Eligibility / authorization**: who can do what, when, under which conditions
|
||||
- **State transitions**: status lifecycles, what triggers each transition
|
||||
- **Policies**: retention periods, retry limits, cutoff times, rounding rules
|
||||
|
||||
## What does NOT count
|
||||
|
||||
Infrastructure, logging, error handling, UI layout, technical retries,
|
||||
connection pooling. If a rule would be the same regardless of what language
|
||||
the system was written in, it's a business rule. If it only exists because
|
||||
of the technology, skip it.
|
||||
|
||||
## Extraction discipline
|
||||
|
||||
1. Find the rule in code. Record exact `file:line-line`.
|
||||
2. State it in plain English a non-engineer would recognize.
|
||||
3. Encode it as Given/When/Then with **concrete values**:
|
||||
```
|
||||
Given an account with balance $1,250.00 and APR 18.5%
|
||||
When the monthly interest batch runs
|
||||
Then the interest charged is $19.27 (balance × APR ÷ 12, rounded half-up to cents)
|
||||
```
|
||||
4. List the parameters (rates, limits, magic numbers) with their current
|
||||
hardcoded values — these often need to become configuration.
|
||||
5. Rate your confidence: **High** (logic is explicit), **Medium** (inferred
|
||||
from structure/names), **Low** (ambiguous; needs SME).
|
||||
6. If confidence < High, write the exact question an SME must answer.
|
||||
|
||||
## Output format
|
||||
|
||||
One "Rule Card" per rule (see the format in the modernize:extract-rules
|
||||
command). Group by category. Lead with a summary table.
|
||||
39
plugins/code-modernization/agents/legacy-analyst.md
Normal file
39
plugins/code-modernization/agents/legacy-analyst.md
Normal file
@@ -0,0 +1,39 @@
|
||||
---
|
||||
name: legacy-analyst
|
||||
description: Deep-reads legacy codebases (COBOL, Java, .NET, Node, anything) to build structural and behavioral understanding. Use for discovery, dependency mapping, dead-code detection, and "what does this system actually do" questions.
|
||||
tools: Read, Glob, Grep, Bash
|
||||
---
|
||||
|
||||
You are a senior legacy systems analyst with 20 years of experience reading
|
||||
code nobody else wants to read — COBOL, JCL, RPG, classic ASP, EJB 2,
|
||||
Struts 1, raw servlets, Perl CGI.
|
||||
|
||||
Your job is **understanding, not judgment**. The code in front of you kept a
|
||||
business running for decades. Treat it with respect, figure out what it does,
|
||||
and explain it in terms a modern engineer can act on.
|
||||
|
||||
## How you work
|
||||
|
||||
- **Read before you grep.** Open the entry points (main programs, JCL jobs,
|
||||
controllers, routes) and trace the actual flow. Pattern-matching on names
|
||||
lies; control flow doesn't.
|
||||
- **Cite everything.** Every claim gets a `path/to/file:line` reference.
|
||||
If you can't point to a line, you don't know it — say so.
|
||||
- **Distinguish "is" from "appears to be."** When you're inferring intent
|
||||
from structure, flag it: "appears to handle X (inferred from variable
|
||||
names; no comments confirm)."
|
||||
- **Use the right vocabulary for the stack.** COBOL has paragraphs,
|
||||
copybooks, and FD entries. CICS has transactions and BMS maps. JCL has
|
||||
steps and DD statements. Java has packages and beans. Use the native
|
||||
terms so SMEs trust your output.
|
||||
- **Find the data first.** In legacy systems, the data structures (copybooks,
|
||||
DDL, schemas) are usually more stable and truthful than the procedural
|
||||
code. Map the data, then map who touches it.
|
||||
- **Note what's missing.** Unhandled error paths, TODO comments, commented-out
|
||||
blocks, magic numbers — these are signals about history and risk.
|
||||
|
||||
## Output format
|
||||
|
||||
Default to structured markdown: tables for inventories, Mermaid for graphs,
|
||||
bullet lists for findings. Always include a "Confidence & Gaps" footer
|
||||
listing what you couldn't determine and what you'd ask an SME.
|
||||
47
plugins/code-modernization/agents/security-auditor.md
Normal file
47
plugins/code-modernization/agents/security-auditor.md
Normal file
@@ -0,0 +1,47 @@
|
||||
---
|
||||
name: security-auditor
|
||||
description: Adversarial security reviewer — OWASP Top 10, CWE, dependency CVEs, secrets, injection. Use for security debt scanning and pre-modernization hardening.
|
||||
tools: Read, Glob, Grep, Bash
|
||||
---
|
||||
|
||||
You are an application security engineer performing an adversarial review.
|
||||
Assume the code is hostile until proven otherwise. Your job is to find
|
||||
vulnerabilities a real attacker would find — and explain them in terms an
|
||||
engineer can fix.
|
||||
|
||||
## Coverage checklist
|
||||
|
||||
Work through systematically:
|
||||
- **Injection** (SQL, NoSQL, OS command, LDAP, XPath, template) — trace every
|
||||
user-controlled input to every sink
|
||||
- **Authentication / session** — hardcoded creds, weak session handling,
|
||||
missing auth checks on sensitive routes
|
||||
- **Sensitive data exposure** — secrets in source, weak crypto, PII in logs
|
||||
- **Access control** — IDOR, missing ownership checks, privilege escalation paths
|
||||
- **XSS / CSRF** — unescaped output, missing tokens
|
||||
- **Insecure deserialization** — pickle/yaml.load/ObjectInputStream on
|
||||
untrusted data
|
||||
- **Vulnerable dependencies** — run `npm audit` / `pip-audit` /
|
||||
read manifests and flag versions with known CVEs
|
||||
- **SSRF / path traversal / open redirect**
|
||||
- **Security misconfiguration** — debug mode, verbose errors, default creds
|
||||
|
||||
## Tooling
|
||||
|
||||
Use available SAST where it helps (npm audit, pip-audit, grep for known-bad
|
||||
patterns) but **read the code** — tools miss logic flaws. Show tool output
|
||||
verbatim, then add your manual findings.
|
||||
|
||||
## Reporting standard
|
||||
|
||||
For each finding:
|
||||
| Field | Content |
|
||||
|---|---|
|
||||
| **ID** | SEC-NNN |
|
||||
| **CWE** | CWE-XXX with name |
|
||||
| **Severity** | Critical / High / Medium / Low (CVSS-ish reasoning) |
|
||||
| **Location** | `file:line` |
|
||||
| **Exploit scenario** | One sentence: how an attacker uses this |
|
||||
| **Fix** | Concrete code-level remediation |
|
||||
|
||||
No hand-waving. If you can't write the exploit scenario, downgrade severity.
|
||||
36
plugins/code-modernization/agents/test-engineer.md
Normal file
36
plugins/code-modernization/agents/test-engineer.md
Normal file
@@ -0,0 +1,36 @@
|
||||
---
|
||||
name: test-engineer
|
||||
description: Writes characterization, contract, and equivalence tests that pin down legacy behavior so transformation can be proven correct. Use before any rewrite.
|
||||
tools: Read, Write, Edit, Glob, Grep, Bash
|
||||
---
|
||||
|
||||
You are a test engineer specializing in **characterization testing** —
|
||||
writing tests that capture what legacy code *actually does* (not what
|
||||
someone thinks it should do) so that a rewrite can be proven equivalent.
|
||||
|
||||
## Principles
|
||||
|
||||
- **The legacy code is the oracle.** If the legacy computes 19.27 and the
|
||||
spec says 19.28, the test asserts 19.27 and you flag the discrepancy
|
||||
separately. We're proving equivalence first; fixing bugs is a separate
|
||||
decision.
|
||||
- **Concrete over abstract.** Every test has literal input values and literal
|
||||
expected outputs. No "should calculate correctly" — instead "given balance
|
||||
1250.00 and APR 18.5%, returns 19.27".
|
||||
- **Cover the edges the legacy covers.** Read the legacy code's branches.
|
||||
Every IF/EVALUATE/switch arm gets at least one test case. Boundary values
|
||||
(zero, negative, max, empty) get explicit cases.
|
||||
- **Tests must run against BOTH.** Structure tests so the same inputs can be
|
||||
fed to the legacy implementation (or a recorded trace of it) and the modern
|
||||
one. The test harness compares.
|
||||
- **Executable, not aspirational.** Tests compile and run from day one.
|
||||
Behaviors not yet implemented in the target are marked
|
||||
`@Disabled("pending RULE-NNN")` / `@pytest.mark.skip` / `it.todo()` — never
|
||||
deleted.
|
||||
|
||||
## Output
|
||||
|
||||
Idiomatic tests for the requested target stack (JUnit 5 / pytest / Vitest /
|
||||
xUnit), one test class/file per legacy module, test method names that read
|
||||
as specifications. Include a `README.md` in the test directory explaining
|
||||
how to run them and how to add a new case.
|
||||
142
plugins/code-modernization/commands/modernize-assess.md
Normal file
142
plugins/code-modernization/commands/modernize-assess.md
Normal file
@@ -0,0 +1,142 @@
|
||||
---
|
||||
description: Full discovery & portfolio analysis of a legacy system — inventory, complexity, debt, effort estimation
|
||||
argument-hint: <system-dir> | --portfolio <parent-dir>
|
||||
---
|
||||
|
||||
**Mode select.** If `$ARGUMENTS` starts with `--portfolio`, run **Portfolio
|
||||
mode** against the directory that follows. Otherwise run **Single-system
|
||||
mode** against `legacy/$1`.
|
||||
|
||||
---
|
||||
|
||||
# Portfolio mode (`--portfolio <parent-dir>`)
|
||||
|
||||
Sweep every immediate subdirectory of the parent dir and produce a
|
||||
heat-map a steering committee can use to sequence a multi-year program.
|
||||
|
||||
## Step P1 — Per-system metrics
|
||||
|
||||
For each subdirectory `<sys>`:
|
||||
|
||||
```bash
|
||||
cloc --quiet --csv <parent>/<sys> # LOC by language
|
||||
lizard -s cyclomatic_complexity <parent>/<sys> 2>/dev/null | tail -1
|
||||
```
|
||||
|
||||
Capture: total SLOC, dominant language, file count, mean & max
|
||||
cyclomatic complexity (CCN). For dependency freshness, locate the
|
||||
manifest (`package.json`, `pom.xml`, `*.csproj`, `requirements*.txt`,
|
||||
copybook dir) and note its age / pinned-version count.
|
||||
|
||||
## Step P2 — COCOMO-II effort
|
||||
|
||||
Compute person-months per system using COCOMO-II basic:
|
||||
`PM = 2.94 × (KSLOC)^1.10` (nominal scale factors). Show the formula and
|
||||
inputs so the figure is defensible, not a guess.
|
||||
|
||||
## Step P3 — Documentation coverage
|
||||
|
||||
For each system, count source files with vs without a header comment
|
||||
block, and list architecture docs present (`README`, `docs/`, ADRs).
|
||||
Report coverage % and the top undocumented subsystems.
|
||||
|
||||
## Step P4 — Render the heat-map
|
||||
|
||||
Write `analysis/portfolio.html` (dark `#1e1e1e` bg, `#d4d4d4` text,
|
||||
`#cc785c` accent, system-ui font, all CSS inline). One row per system;
|
||||
columns: **System · Lang · KSLOC · Files · Mean CCN · Max CCN · Dep
|
||||
Freshness · Doc Coverage % · COCOMO PM · Risk**. Color-grade the PM and
|
||||
Risk cells (green→amber→red). Below the table, a 2-3 sentence
|
||||
sequencing recommendation: which system first and why.
|
||||
|
||||
Then stop. Tell the user to open `analysis/portfolio.html`.
|
||||
|
||||
---
|
||||
|
||||
# Single-system mode
|
||||
|
||||
Perform a complete **modernization assessment** of `legacy/$1`.
|
||||
|
||||
This is the discovery phase — the goal is a fact-grounded executive brief that
|
||||
a VP of Engineering could take into a budget meeting. Work in this order:
|
||||
|
||||
## Step 1 — Quantitative inventory
|
||||
|
||||
Run and show the output of:
|
||||
```bash
|
||||
scc legacy/$1
|
||||
```
|
||||
Then run `scc --by-file -s complexity legacy/$1 | head -25` to identify the
|
||||
highest-complexity files. Capture the COCOMO effort/cost estimate scc provides.
|
||||
|
||||
## Step 2 — Technology fingerprint
|
||||
|
||||
Identify, with file evidence:
|
||||
- Languages, frameworks, and runtime versions in use
|
||||
- Build system and dependency manifest locations
|
||||
- Data stores (schemas, copybooks, DDL, ORM configs)
|
||||
- Integration points (queues, APIs, batch interfaces, screen maps)
|
||||
- Test presence and approximate coverage signal
|
||||
|
||||
## Step 3 — Parallel deep analysis
|
||||
|
||||
Spawn three subagents **concurrently** using the Task tool:
|
||||
|
||||
1. **legacy-analyst** — "Build a structural map of legacy/$1: what are the
|
||||
5-10 major functional domains, which source files belong to each, and how
|
||||
do they depend on each other? Return a markdown table + a Mermaid
|
||||
`graph TD` of domain-level dependencies. Cite file paths."
|
||||
|
||||
2. **legacy-analyst** — "Identify technical debt in legacy/$1: dead code,
|
||||
deprecated APIs, copy-paste duplication, god objects/programs, missing
|
||||
error handling, hardcoded config. Return the top 10 findings ranked by
|
||||
remediation value, each with file:line evidence."
|
||||
|
||||
3. **security-auditor** — "Scan legacy/$1 for security vulnerabilities:
|
||||
injection, auth weaknesses, hardcoded secrets, vulnerable dependencies,
|
||||
missing input validation. Return findings in CWE-tagged table form with
|
||||
file:line evidence and severity."
|
||||
|
||||
Wait for all three. Synthesize their findings.
|
||||
|
||||
## Step 4 — Production runtime overlay (observability)
|
||||
|
||||
If the system has batch jobs (e.g. JCL members under `app/jcl/`), call the
|
||||
`observability` MCP tool `get_batch_runtimes` for each business-relevant
|
||||
job name (interest, posting, statement, reporting). Use the returned
|
||||
p50/p95/p99 and 90-day series to:
|
||||
|
||||
- Tag each functional domain from Step 3 with its production wall-clock
|
||||
cost and **p99 variance** (p99/p50 ratio).
|
||||
- Flag the highest-variance domain as the highest operational risk —
|
||||
this is telemetry-grounded, not a static-analysis opinion.
|
||||
|
||||
Include a small **Batch Runtime** table (Job · Domain · p50 · p95 · p99 ·
|
||||
p99/p50) in the assessment.
|
||||
|
||||
## Step 5 — Documentation gap analysis
|
||||
|
||||
Compare what the code *does* against what README/docs/comments *say*. List
|
||||
the top 5 undocumented behaviors or subsystems that a new engineer would
|
||||
need explained.
|
||||
|
||||
## Step 6 — Write the assessment
|
||||
|
||||
Create `analysis/$1/ASSESSMENT.md` with these sections:
|
||||
- **Executive Summary** (3-4 sentences: what it is, how big, how risky, headline recommendation)
|
||||
- **System Inventory** (the scc table + tech fingerprint)
|
||||
- **Architecture-at-a-Glance** (the domain table; reference the diagram)
|
||||
- **Production Runtime Profile** (the batch-runtime table from Step 4, with the highest-variance domain called out)
|
||||
- **Technical Debt** (top 10, ranked)
|
||||
- **Security Findings** (CWE table)
|
||||
- **Documentation Gaps** (top 5)
|
||||
- **Effort Estimation** (COCOMO-derived person-months, ±range, key cost drivers)
|
||||
- **Recommended Modernization Pattern** (one of: Rehost / Replatform / Refactor / Rearchitect / Rebuild / Replace — with one-paragraph rationale)
|
||||
|
||||
Also create `analysis/$1/ARCHITECTURE.mmd` containing the Mermaid domain
|
||||
dependency diagram from the legacy-analyst.
|
||||
|
||||
## Step 7 — Present
|
||||
|
||||
Tell the user the assessment is ready and suggest:
|
||||
`glow -p analysis/$1/ASSESSMENT.md`
|
||||
60
plugins/code-modernization/commands/modernize-brief.md
Normal file
60
plugins/code-modernization/commands/modernize-brief.md
Normal file
@@ -0,0 +1,60 @@
|
||||
---
|
||||
description: Generate a phased Modernization Brief — the approved plan that transformation agents will execute against
|
||||
argument-hint: <system-dir> [target-stack]
|
||||
---
|
||||
|
||||
Synthesize everything in `analysis/$1/` into a **Modernization Brief** — the
|
||||
single document a steering committee approves and engineering executes.
|
||||
|
||||
Target stack: `$2` (if blank, recommend one based on the assessment findings).
|
||||
|
||||
Read `analysis/$1/ASSESSMENT.md`, `TOPOLOGY.md`, and `BUSINESS_RULES.md` first.
|
||||
If any are missing, say so and stop.
|
||||
|
||||
## The Brief
|
||||
|
||||
Write `analysis/$1/MODERNIZATION_BRIEF.md`:
|
||||
|
||||
### 1. Objective
|
||||
One paragraph: from what, to what, why now.
|
||||
|
||||
### 2. Target Architecture
|
||||
Mermaid C4 Container diagram of the *end state*. Name every service, data
|
||||
store, and integration. Below it, a table mapping legacy component → target
|
||||
component(s).
|
||||
|
||||
### 3. Phased Sequence
|
||||
Break the work into 3-6 phases using **strangler-fig ordering** — lowest-risk,
|
||||
fewest-dependencies first. For each phase:
|
||||
- Scope (which legacy modules, which target services)
|
||||
- Entry criteria (what must be true to start)
|
||||
- Exit criteria (what tests/metrics prove it's done)
|
||||
- Estimated effort (person-weeks, derived from COCOMO + complexity data)
|
||||
- Risk level + top 2 risks + mitigation
|
||||
|
||||
Render the phases as a Mermaid `gantt` chart.
|
||||
|
||||
### 4. Behavior Contract
|
||||
List the **P0 behaviors** from BUSINESS_RULES.md that MUST be proven
|
||||
equivalent before any phase ships. These become the regression suite.
|
||||
|
||||
### 5. Validation Strategy
|
||||
State which combination applies: characterization tests, contract tests,
|
||||
parallel-run / dual-execution diff, property-based tests, manual UAT.
|
||||
Justify per phase.
|
||||
|
||||
### 6. Open Questions
|
||||
Anything requiring human/SME decision before Phase 1 starts. Each as a
|
||||
checkbox the approver must tick.
|
||||
|
||||
### 7. Approval Block
|
||||
```
|
||||
Approved by: ________________ Date: __________
|
||||
Approval covers: Phase 1 only | Full plan
|
||||
```
|
||||
|
||||
## Present
|
||||
|
||||
Enter **plan mode** and present a summary of the brief. Do NOT proceed to any
|
||||
transformation until the user explicitly approves. This gate is the
|
||||
human-in-the-loop control point.
|
||||
@@ -0,0 +1,68 @@
|
||||
---
|
||||
description: Mine business logic from legacy code into testable, human-readable rule specifications
|
||||
argument-hint: <system-dir> [module-pattern]
|
||||
---
|
||||
|
||||
Extract the **business rules** embedded in `legacy/$1` into a structured,
|
||||
testable specification — the institutional knowledge that's currently locked
|
||||
in code and in the heads of engineers who are about to retire.
|
||||
|
||||
Scope: if a module pattern was given (`$2`), focus there; otherwise cover the
|
||||
entire system. Either way, prioritize calculation, validation, eligibility,
|
||||
and state-transition logic over plumbing.
|
||||
|
||||
## Method
|
||||
|
||||
Spawn **three business-rules-extractor subagents in parallel**, each assigned
|
||||
a different lens. If `$2` is non-empty, include "focusing on files matching
|
||||
$2" in each prompt.
|
||||
|
||||
1. **Calculations** — "Find every formula, rate, threshold, and computed value
|
||||
in legacy/$1. For each: what does it compute, what are the inputs, what is
|
||||
the exact formula/algorithm, where is it implemented (file:line), and what
|
||||
edge cases does the code handle?"
|
||||
|
||||
2. **Validations & eligibility** — "Find every business validation, eligibility
|
||||
check, and guard condition in legacy/$1. For each: what is being checked,
|
||||
what happens on pass/fail, where is it (file:line)?"
|
||||
|
||||
3. **State & lifecycle** — "Find every status field, state machine, and
|
||||
lifecycle transition in legacy/$1. For each entity: what states exist,
|
||||
what triggers transitions, what side-effects fire?"
|
||||
|
||||
## Synthesize
|
||||
|
||||
Merge the three result sets. Deduplicate. For each distinct rule, write a
|
||||
**Rule Card** in this exact format:
|
||||
|
||||
```
|
||||
### RULE-NNN: <plain-English name>
|
||||
**Category:** Calculation | Validation | Lifecycle | Policy
|
||||
**Source:** `path/to/file.ext:line-line`
|
||||
**Plain English:** One sentence a business analyst would recognize.
|
||||
**Specification:**
|
||||
Given <precondition>
|
||||
When <trigger>
|
||||
Then <outcome>
|
||||
[And <additional outcome>]
|
||||
**Parameters:** <constants, rates, thresholds with their current values>
|
||||
**Edge cases handled:** <list>
|
||||
**Confidence:** High | Medium | Low — <why>
|
||||
```
|
||||
|
||||
Write all rule cards to `analysis/$1/BUSINESS_RULES.md` with:
|
||||
- A summary table at top (ID, name, category, source, confidence)
|
||||
- Rule cards grouped by category
|
||||
- A final **"Rules requiring SME confirmation"** section listing every
|
||||
Medium/Low confidence rule with the specific question a human needs to answer
|
||||
|
||||
## Generate the DTO catalog
|
||||
|
||||
As a companion, create `analysis/$1/DATA_OBJECTS.md` cataloging the core
|
||||
data transfer objects / records / entities: name, fields with types, which
|
||||
rules consume/produce them, source location.
|
||||
|
||||
## Present
|
||||
|
||||
Report: total rules found, breakdown by category, count needing SME review.
|
||||
Suggest: `glow -p analysis/$1/BUSINESS_RULES.md`
|
||||
46
plugins/code-modernization/commands/modernize-harden.md
Normal file
46
plugins/code-modernization/commands/modernize-harden.md
Normal file
@@ -0,0 +1,46 @@
|
||||
---
|
||||
description: Security vulnerability scan + remediation — OWASP, CVE, secrets, injection
|
||||
argument-hint: <system-dir>
|
||||
---
|
||||
|
||||
Run a **security hardening pass** on `legacy/$1`: find vulnerabilities, rank
|
||||
them, and fix the critical ones.
|
||||
|
||||
## Scan
|
||||
|
||||
Spawn the **security-auditor** subagent:
|
||||
|
||||
"Adversarially audit legacy/$1 for security vulnerabilities. Cover:
|
||||
OWASP Top 10 (injection, broken auth, XSS, SSRF, etc.), hardcoded secrets,
|
||||
vulnerable dependency versions (check package manifests against known CVEs),
|
||||
missing input validation, insecure deserialization, path traversal.
|
||||
For each finding return: CWE ID, severity (Critical/High/Med/Low), file:line,
|
||||
one-sentence exploit scenario, and recommended fix. Also run any available
|
||||
SAST tooling (npm audit, pip-audit, OWASP dependency-check) and include
|
||||
its raw output."
|
||||
|
||||
## Triage
|
||||
|
||||
Write `analysis/$1/SECURITY_FINDINGS.md`:
|
||||
- Summary scorecard (count by severity, top CWE categories)
|
||||
- Findings table sorted by severity
|
||||
- Dependency CVE table (package, installed version, CVE, fixed version)
|
||||
|
||||
## Remediate
|
||||
|
||||
For each **Critical** and **High** finding, fix it directly in the source.
|
||||
Make minimal, targeted changes. After each fix, add a one-line entry under
|
||||
"Remediation Log" in SECURITY_FINDINGS.md: finding ID → commit-style summary
|
||||
of what changed.
|
||||
|
||||
Show the cumulative diff:
|
||||
```bash
|
||||
git -C legacy/$1 diff
|
||||
```
|
||||
|
||||
## Verify
|
||||
|
||||
Re-run the security-auditor against the patched code to confirm the
|
||||
Critical/High findings are resolved. Update the scorecard with before/after.
|
||||
|
||||
Suggest: `glow -p analysis/$1/SECURITY_FINDINGS.md`
|
||||
66
plugins/code-modernization/commands/modernize-map.md
Normal file
66
plugins/code-modernization/commands/modernize-map.md
Normal file
@@ -0,0 +1,66 @@
|
||||
---
|
||||
description: Dependency & topology mapping — call graphs, data lineage, batch flows, rendered as navigable diagrams
|
||||
argument-hint: <system-dir>
|
||||
---
|
||||
|
||||
Build a **dependency and topology map** of `legacy/$1` and render it visually.
|
||||
|
||||
The assessment gave us domains. Now go one level deeper: how do the *pieces*
|
||||
connect? This is the map an engineer needs before touching anything.
|
||||
|
||||
## What to produce
|
||||
|
||||
Write a one-off analysis script (Python or shell — your choice) that parses
|
||||
the source under `legacy/$1` and extracts:
|
||||
|
||||
- **Program/module call graph** — who calls whom (for COBOL: `CALL` statements
|
||||
and CICS `LINK`/`XCTL`; for Java: class-level imports/invocations; for Node:
|
||||
`require`/`import`)
|
||||
- **Data dependency graph** — which programs read/write which data stores
|
||||
(COBOL: copybooks + VSAM/DB2 in JCL DD statements; Java: JPA entities/tables;
|
||||
Node: model files)
|
||||
- **Entry points** — batch jobs, transaction IDs, HTTP routes, CLI commands
|
||||
- **Dead-end candidates** — modules with no inbound edges (potential dead code)
|
||||
|
||||
Save the script as `analysis/$1/extract_topology.py` (or `.sh`) so it can be
|
||||
re-run and audited. Run it. Show the raw output.
|
||||
|
||||
## Render
|
||||
|
||||
From the extracted data, generate **three Mermaid diagrams** and write them
|
||||
to `analysis/$1/TOPOLOGY.html` so the artifact pane renders them live.
|
||||
|
||||
The HTML page must use: dark `#1e1e1e` background, `#d4d4d4` text,
|
||||
`#cc785c` for `<h2>`/accents, `system-ui` font, all CSS **inline** (no
|
||||
external stylesheets). Each diagram goes in a
|
||||
`<pre class="mermaid">...</pre>` block — the artifact server loads
|
||||
mermaid.js and renders client-side. Do **not** wrap diagrams in
|
||||
markdown ` ``` ` fences inside the HTML.
|
||||
|
||||
1. **`graph TD` — Module call graph.** Cluster by domain (use `subgraph`).
|
||||
Highlight entry points in a distinct style. Cap at ~40 nodes — if larger,
|
||||
show domain-level with one expanded domain.
|
||||
|
||||
2. **`graph LR` — Data lineage.** Programs → data stores.
|
||||
Mark read vs write edges.
|
||||
|
||||
3. **`flowchart TD` — Critical path.** Trace ONE end-to-end business flow
|
||||
(e.g., "monthly billing run" or "process payment") through every program
|
||||
and data store it touches, in execution order. If the `observability`
|
||||
MCP server is connected, annotate each batch step with its p50/p99
|
||||
wall-clock from `get_batch_runtimes`.
|
||||
|
||||
Also export the three diagrams as standalone `.mmd` files for re-use:
|
||||
`analysis/$1/call-graph.mmd`, `analysis/$1/data-lineage.mmd`,
|
||||
`analysis/$1/critical-path.mmd`.
|
||||
|
||||
## Annotate
|
||||
|
||||
Below each `<pre class="mermaid">` block in TOPOLOGY.html, add a `<ul>`
|
||||
with 3-5 **architect observations**: tight coupling clusters, single
|
||||
points of failure, candidates for service extraction, data stores
|
||||
touched by too many writers.
|
||||
|
||||
## Present
|
||||
|
||||
Tell the user to open `analysis/$1/TOPOLOGY.html` in the artifact pane.
|
||||
82
plugins/code-modernization/commands/modernize-reimagine.md
Normal file
82
plugins/code-modernization/commands/modernize-reimagine.md
Normal file
@@ -0,0 +1,82 @@
|
||||
---
|
||||
description: Multi-agent greenfield rebuild — extract specs from legacy, design AI-native, scaffold & validate with HITL
|
||||
argument-hint: <system-dir> <target-vision>
|
||||
---
|
||||
|
||||
**Reimagine** `legacy/$1` as: $2
|
||||
|
||||
This is not a port — it's a rebuild from extracted intent. The legacy system
|
||||
becomes the *specification source*, not the structural template. This command
|
||||
orchestrates a multi-agent team with explicit human checkpoints.
|
||||
|
||||
## Phase A — Specification mining (parallel agents)
|
||||
|
||||
Spawn concurrently and show the user that all three are running:
|
||||
|
||||
1. **business-rules-extractor** — "Extract every business rule from legacy/$1
|
||||
into Given/When/Then form. Output to a structured list I can parse."
|
||||
|
||||
2. **legacy-analyst** — "Catalog every external interface of legacy/$1:
|
||||
inbound (screens, APIs, batch triggers, queues) and outbound (reports,
|
||||
files, downstream calls, DB writes). For each: name, direction, payload
|
||||
shape, frequency/SLA if discernible."
|
||||
|
||||
3. **legacy-analyst** — "Identify the core domain entities in legacy/$1 and
|
||||
their relationships. Return as an entity list + Mermaid erDiagram."
|
||||
|
||||
Collect results. Write `analysis/$1/AI_NATIVE_SPEC.md` containing:
|
||||
- **Capabilities** (what the system must do — derived from rules + interfaces)
|
||||
- **Domain Model** (entities + erDiagram)
|
||||
- **Interface Contracts** (each external interface as an OpenAPI fragment or
|
||||
AsyncAPI fragment)
|
||||
- **Non-functional requirements** inferred from legacy (batch windows, volumes)
|
||||
- **Behavior Contract** (the Given/When/Then rules — these are the acceptance tests)
|
||||
|
||||
## Phase B — HITL checkpoint #1
|
||||
|
||||
Present the spec summary. Ask the user **one focused question**: "Which of
|
||||
these capabilities are P0 for the reimagined system, and are there any we
|
||||
should deliberately drop?" Wait for the answer. Record it in the spec.
|
||||
|
||||
## Phase C — Architecture (single agent, then critique)
|
||||
|
||||
Design the target architecture for "$2":
|
||||
- Mermaid C4 Container diagram
|
||||
- Service boundaries with rationale (which rules/entities live where)
|
||||
- Technology choices with one-line justification each
|
||||
- Data migration approach from legacy stores
|
||||
|
||||
Then spawn **architecture-critic**: "Review this proposed architecture for
|
||||
$2 against the spec in analysis/$1/AI_NATIVE_SPEC.md. Identify over-engineering,
|
||||
missed requirements, scaling risks, and simpler alternatives." Incorporate
|
||||
the critique. Write the result to `analysis/$1/REIMAGINED_ARCHITECTURE.md`.
|
||||
|
||||
## Phase D — HITL checkpoint #2
|
||||
|
||||
Enter plan mode. Present the architecture. Wait for approval.
|
||||
|
||||
## Phase E — Parallel scaffolding
|
||||
|
||||
For each service in the approved architecture (cap at 3 for the demo), spawn
|
||||
a **general-purpose agent in parallel**:
|
||||
|
||||
"Scaffold the <service-name> service per analysis/$1/REIMAGINED_ARCHITECTURE.md
|
||||
and AI_NATIVE_SPEC.md. Create: project skeleton, domain model, API stubs
|
||||
matching the interface contracts, and **executable acceptance tests** for every
|
||||
behavior-contract rule assigned to this service (mark unimplemented ones as
|
||||
expected-failure/skip with the rule ID). Write to modernized/$1-reimagined/<service-name>/."
|
||||
|
||||
Show the agents' progress. When all complete, run the acceptance test suites
|
||||
and report: total tests, passing (scaffolded behavior), pending (rule IDs
|
||||
awaiting implementation).
|
||||
|
||||
## Phase F — Knowledge graph handoff
|
||||
|
||||
Write `modernized/$1-reimagined/CLAUDE.md` — the persistent context file for
|
||||
the new system, containing: architecture summary, service responsibilities,
|
||||
where the spec lives, how to run tests, and the legacy→modern traceability
|
||||
map. This file IS the knowledge graph that future agents and engineers will
|
||||
load.
|
||||
|
||||
Report: services scaffolded, acceptance tests defined, % behaviors with a
|
||||
home, location of all artifacts.
|
||||
78
plugins/code-modernization/commands/modernize-transform.md
Normal file
78
plugins/code-modernization/commands/modernize-transform.md
Normal file
@@ -0,0 +1,78 @@
|
||||
---
|
||||
description: Transform one legacy module to the target stack — idiomatic rewrite with behavior-equivalence tests
|
||||
argument-hint: <system-dir> <module> <target-stack>
|
||||
---
|
||||
|
||||
Transform `legacy/$1` module **`$2`** into **$3**, with proof of behavioral
|
||||
equivalence.
|
||||
|
||||
This is a surgical, single-module transformation — one vertical slice of the
|
||||
strangler fig. Output goes to `modernized/$1/$2/`.
|
||||
|
||||
## Step 0 — Plan (HITL gate)
|
||||
|
||||
Read the source module and any business rules in `analysis/$1/BUSINESS_RULES.md`
|
||||
that reference it. Then **enter plan mode** and present:
|
||||
- Which source files are in scope
|
||||
- The target module structure (packages/classes/files you'll create)
|
||||
- Which business rules / behaviors this module implements
|
||||
- How you'll prove equivalence (test strategy)
|
||||
- Anything ambiguous that needs a human decision NOW
|
||||
|
||||
Wait for approval before writing any code.
|
||||
|
||||
## Step 1 — Characterization tests FIRST
|
||||
|
||||
Before writing target code, spawn the **test-engineer** subagent:
|
||||
|
||||
"Write characterization tests for legacy/$1 module $2. Read the source,
|
||||
identify every observable behavior, and encode each as a test case with
|
||||
concrete input → expected output pairs derived from the legacy logic.
|
||||
Target framework: <appropriate for $3>. Write to
|
||||
`modernized/$1/$2/src/test/`. These tests define 'done' — the new code
|
||||
must pass all of them."
|
||||
|
||||
Show the user the test file. Get a 👍 before proceeding.
|
||||
|
||||
## Step 2 — Idiomatic transformation
|
||||
|
||||
Write the target implementation in `modernized/$1/$2/src/main/`.
|
||||
|
||||
**Critical:** Write code a senior $3 engineer would write from the
|
||||
*specification*, not from the legacy structure. Do NOT mirror COBOL paragraphs
|
||||
as methods, do NOT preserve legacy variable names like `WS-TEMP-AMT-X`.
|
||||
Use the target language's idioms: records/dataclasses, streams, dependency
|
||||
injection, proper error types, etc.
|
||||
|
||||
Include: domain model, service logic, API surface (REST controller or
|
||||
equivalent), and configuration. Add concise Javadoc/docstrings linking each
|
||||
class back to the rule IDs it implements.
|
||||
|
||||
## Step 3 — Prove it
|
||||
|
||||
Run the characterization tests:
|
||||
```bash
|
||||
cd modernized/$1/$2 && <appropriate test command for $3>
|
||||
```
|
||||
Show the output. If anything fails, fix and re-run until green.
|
||||
|
||||
## Step 4 — Side-by-side review
|
||||
|
||||
Generate `modernized/$1/$2/TRANSFORMATION_NOTES.md`:
|
||||
- Mapping table: legacy file:lines → target file:lines, per behavior
|
||||
- Deliberate deviations from legacy behavior (with rationale)
|
||||
- What was NOT migrated (dead code, unreachable branches) and why
|
||||
- Follow-ups for the next module that depends on this one
|
||||
|
||||
Then show a visual diff of one representative behavior, legacy vs modern:
|
||||
```bash
|
||||
delta --side-by-side <(sed -n '<lines>p' legacy/$1/<file>) modernized/$1/$2/src/main/<file>
|
||||
```
|
||||
|
||||
## Step 5 — Architecture review
|
||||
|
||||
Spawn the **architecture-critic** subagent to review the transformed code
|
||||
against $3 best practices. Apply any HIGH-severity feedback; list the rest
|
||||
in TRANSFORMATION_NOTES.md.
|
||||
|
||||
Report: tests passing, lines of legacy retired, location of artifacts.
|
||||
@@ -1,6 +1,6 @@
|
||||
---
|
||||
name: conversation-analyzer
|
||||
description: Use this agent when analyzing conversation transcripts to find behaviors worth preventing with hooks. Examples: <example>Context: User is running /hookify command without arguments\nuser: "/hookify"\nassistant: "I'll analyze the conversation to find behaviors you want to prevent"\n<commentary>The /hookify command without arguments triggers conversation analysis to find unwanted behaviors.</commentary></example><example>Context: User wants to create hooks from recent frustrations\nuser: "Can you look back at this conversation and help me create hooks for the mistakes you made?"\nassistant: "I'll use the conversation-analyzer agent to identify the issues and suggest hooks."\n<commentary>User explicitly asks to analyze conversation for mistakes that should be prevented.</commentary></example>
|
||||
description: Use this agent when analyzing conversation transcripts to find behaviors worth preventing with hooks. Typical triggers include the /hookify command being invoked without arguments, or the user explicitly asking to look back at the current conversation and surface mistakes that should be prevented in the future. See "When to invoke" in the agent body for worked scenarios.
|
||||
model: inherit
|
||||
color: yellow
|
||||
tools: ["Read", "Grep"]
|
||||
@@ -8,6 +8,15 @@ tools: ["Read", "Grep"]
|
||||
|
||||
You are a conversation analysis specialist that identifies problematic behaviors in Claude Code sessions that could be prevented with hooks.
|
||||
|
||||
## When to invoke
|
||||
|
||||
Two representative scenarios:
|
||||
|
||||
- **Scenario A — `/hookify` invoked with no arguments.** Treat the bare `/hookify` invocation as a request to analyze the current conversation and surface unwanted behaviors. Respond by saying you'll analyze the conversation, then run the analysis described below.
|
||||
- **Scenario B — User asks to learn from recent frustrations.** When the user asks (in their own words) to look back over the conversation and create hooks for mistakes that were made, run the same analysis and propose hook rules for the issues found.
|
||||
|
||||
|
||||
|
||||
**Your Core Responsibilities:**
|
||||
1. Read and analyze user messages to find frustration signals
|
||||
2. Identify specific tool usage patterns that caused issues
|
||||
|
||||
@@ -14,10 +14,15 @@ The UI layer is **additive**. Under the hood it's still tools, resources, and th
|
||||
|
||||
## Claude host specifics
|
||||
|
||||
- `_meta.ui.prefersBorder: false` on a `ui://` resource removes the outer card border (mobile).
|
||||
| `_meta.ui.*` key | Where | Effect |
|
||||
|---|---|---|
|
||||
| `resourceUri` | tool | Which `ui://` resource the host renders for this tool's results. |
|
||||
| `visibility: ["app"]` | tool | Hide a widget-only helper tool (e.g. geometry/image fetcher called via `callServerTool`) from Claude's tool list. |
|
||||
| `prefersBorder: false` | resource | Drop the host's outer card border (mobile). |
|
||||
| `csp.{connectDomains, resourceDomains, baseUriDomains}` | resource | Declare external origins; default is block-all. `frameDomains` is currently restricted in Claude. |
|
||||
|
||||
- `hostContext.safeAreaInsets: {top, right, bottom, left}` (px) — honor these for notches and the composer overlay.
|
||||
- `_meta.ui.csp.{connectDomains, resourceDomains, baseUriDomains}` — declare external origins per resource; default is block-all. `frameDomains` is currently restricted in Claude.
|
||||
- Directory submission for MCP Apps requires 3–5 PNG screenshots, ≥1000px wide, cropped to the app response only (no prompt in the image). See https://claude.com/docs/connectors/building/submission#asset-specifications.
|
||||
- Directory submission requires OAuth or **authless** (`none`) — static bearer is private-deploy only and blocks listing — plus tool `annotations` and 3–5 PNG screenshots; see `references/directory-checklist.md`.
|
||||
|
||||
---
|
||||
|
||||
@@ -104,6 +109,7 @@ const server = new McpServer({ name: "contacts", version: "1.0.0" });
|
||||
// 1. The tool — returns DATA, declares which UI to show
|
||||
registerAppTool(server, "pick_contact", {
|
||||
description: "Open an interactive contact picker",
|
||||
annotations: { title: "Pick Contact", readOnlyHint: true },
|
||||
inputSchema: { filter: z.string().optional() },
|
||||
_meta: { ui: { resourceUri: "ui://widgets/contact-picker.html" } },
|
||||
}, async ({ filter }) => {
|
||||
@@ -172,7 +178,10 @@ The `/*__EXT_APPS_BUNDLE__*/` placeholder gets replaced by the server at startup
|
||||
| `app.updateModelContext({...})` | Widget → host | Update context silently (no visible message) |
|
||||
| `app.callServerTool({name, arguments})` | Widget → server | Call another tool on your server |
|
||||
| `app.openLink({url})` | Widget → host | Open a URL in a new tab (sandbox blocks `window.open`) |
|
||||
| `app.getHostContext()` / `app.onhostcontextchanged` | Host → widget | Theme (`light`/`dark`), locale, etc. |
|
||||
| `app.getHostContext()` / `app.onhostcontextchanged` | Host → widget | Theme, host CSS vars, `containerDimensions`, `displayMode`, `deviceCapabilities` |
|
||||
| `app.requestDisplayMode({mode})` | Widget → host | Ask for `inline` / `pip` / `fullscreen` |
|
||||
| `app.downloadFile({name, mimeType, content})` | Widget → host | Host-mediated download (base64 content) |
|
||||
| `new App(info, caps, {autoResize: true})` | — | Iframe height tracks rendered content |
|
||||
|
||||
`sendMessage` is the typical "user picked something, tell Claude" path. `updateModelContext` is for state that Claude should know about but shouldn't clutter the chat. `openLink` is **required** for any outbound navigation — `window.open` and `<a target="_blank">` are blocked by the sandbox attribute.
|
||||
|
||||
@@ -225,6 +234,7 @@ const pickerHtml = readFileSync("./widgets/picker.html", "utf8")
|
||||
|
||||
registerAppTool(server, "pick_contact", {
|
||||
description: "Open an interactive contact picker. User selects one contact.",
|
||||
annotations: { title: "Pick Contact", readOnlyHint: true },
|
||||
inputSchema: { filter: z.string().optional().describe("Name/email prefix filter") },
|
||||
_meta: { ui: { resourceUri: "ui://widgets/picker.html" } },
|
||||
}, async ({ filter }) => {
|
||||
@@ -348,6 +358,24 @@ Desktop caches UI resources aggressively. After editing widget HTML, **fully qui
|
||||
|
||||
The `sleep` keeps stdin open long enough to collect all responses. Parse the jsonl output with `jq` or a Python one-liner.
|
||||
|
||||
**Widget dev loop** — avoid the ⌘Q-relaunch cycle entirely by serving the inlined widget HTML at a plain GET route with a fake `ExtApps` shim that fires `ontoolresult` from a query param:
|
||||
|
||||
```ts
|
||||
app.get("/widget-preview", (_req, res) => {
|
||||
const shim = `globalThis.ExtApps={applyHostStyleVariables:()=>{},App:class{
|
||||
constructor(){this.h={}} ontoolresult;onhostcontextchanged;
|
||||
async connect(){const p=new URLSearchParams(location.search).get("payload");
|
||||
if(p)this.ontoolresult?.({content:[{type:"text",text:p}]});}
|
||||
getHostContext(){return{theme:"light"}}
|
||||
sendMessage(m){console.log("sendMessage",m)} updateModelContext(){}
|
||||
callServerTool(){return Promise.resolve({content:[]})} openLink(){} downloadFile(){}
|
||||
}};`;
|
||||
res.type("html").send(widgetHtml.replace("/*__EXT_APPS_BUNDLE__*/", shim));
|
||||
});
|
||||
```
|
||||
|
||||
Open `http://localhost:3000/widget-preview?payload={"rows":[...]}` in a normal browser tab and iterate with ordinary devtools.
|
||||
|
||||
**Host fallback** — use a host without the apps surface (or MCP Inspector) and confirm the tool's text content degrades gracefully.
|
||||
|
||||
**CSP debugging** — open the iframe's own devtools console. CSP violations are the #1 reason widgets silently fail (blank rectangle, no error in the main console). See `references/iframe-sandbox.md`.
|
||||
@@ -356,6 +384,9 @@ The `sleep` keeps stdin open long enough to collect all responses. Parse the jso
|
||||
|
||||
## Reference files
|
||||
|
||||
- `references/iframe-sandbox.md` — CSP/sandbox constraints, the bundle-inlining pattern, image handling
|
||||
- `references/iframe-sandbox.md` — CSP/sandbox constraints, the bundle-inlining pattern, image handling, host theming
|
||||
- `references/widget-templates.md` — reusable HTML scaffolds for picker / confirm / progress / display
|
||||
- `references/apps-sdk-messages.md` — the `App` class API: widget ↔ host ↔ server messaging
|
||||
- `references/apps-sdk-messages.md` — the `App` class API: widget ↔ host ↔ server messaging, lifecycle & supersession
|
||||
- `references/payload-budgeting.md` — host tool-result size caps, prune-then-truncate, heavy assets via `callServerTool`
|
||||
- `references/abuse-protection.md` — Anthropic egress CIDRs, tiered rate limiting, `trust proxy`, response caching
|
||||
- `references/directory-checklist.md` — pre-flight for connector-directory submission
|
||||
|
||||
@@ -0,0 +1,60 @@
|
||||
# Abuse protection for authless hosted servers
|
||||
|
||||
An authless StreamableHTTP server is reachable by anything on the internet.
|
||||
There are three resources to protect: your compute, any upstream API quota
|
||||
your tools consume, and egress bandwidth for large `callServerTool` payloads.
|
||||
|
||||
## You don't get a per-user identity
|
||||
|
||||
In authless mode there is no token and stateless transport gives no session
|
||||
ID. Traffic from claude.ai is proxied through Anthropic's egress — every web
|
||||
user arrives from the same small set of IPs:
|
||||
|
||||
```
|
||||
160.79.104.0/21
|
||||
2607:6bc0::/48
|
||||
```
|
||||
|
||||
(See https://platform.claude.com/docs/en/api/ip-addresses.)
|
||||
|
||||
Claude Desktop, Claude Code, and other hosts connect **directly from the
|
||||
user's machine**, so those *do* have distinct per-user IPs. Per-IP limiting
|
||||
therefore works for direct-connect clients; for claude.ai you can only limit
|
||||
the aggregate Anthropic pool. If true per-user limits matter, that's the
|
||||
trigger to add OAuth.
|
||||
|
||||
## Tiered token-bucket (per-replica backstop)
|
||||
|
||||
```ts
|
||||
const ANTHROPIC_CIDRS = ["160.79.104.0/21", "2607:6bc0::/48"];
|
||||
const TIERS = {
|
||||
anthropic: { capacity: 600, refillPerSec: 100 }, // shared pool
|
||||
other: { capacity: 30, refillPerSec: 2 }, // per-IP
|
||||
};
|
||||
```
|
||||
|
||||
Match `req.ip` against the CIDRs, pick a bucket (`"anthropic"` or
|
||||
`"ip:<addr>"`), 429 + `Retry-After` on exhaust. This is a per-replica
|
||||
backstop — cross-replica enforcement belongs at the edge (Cloudflare, Cloud
|
||||
Armor), which keeps the containers stateless.
|
||||
|
||||
## `trust proxy` must match your topology
|
||||
|
||||
`req.ip` only honours `X-Forwarded-For` if `app.set('trust proxy', N)` is
|
||||
set. `true` trusts every hop, which lets a direct client send
|
||||
`X-Forwarded-For: 160.79.108.42` and claim the Anthropic tier. Set it to the
|
||||
exact number of trusted hops (e.g. `1` behind a single LB, `2` behind
|
||||
Cloudflare → origin LB) and **never `true` in production**.
|
||||
|
||||
## Hard-allowlisting Anthropic IPs is a product decision
|
||||
|
||||
Blocking everything outside `160.79.104.0/21` locks out Desktop, Claude Code,
|
||||
and every other MCP host. Use the CIDRs to **tier** rate limits, not to gate
|
||||
access, unless claude.ai-only is an explicit goal.
|
||||
|
||||
## Cache upstream responses
|
||||
|
||||
For tools that wrap a third-party API, an in-process LRU keyed on the
|
||||
normalized query (TTL hours, no secrets in the key) is the primary cost
|
||||
control — repeat queries become free and absorb thundering-herd. Rate limits
|
||||
are the safety net, not the first line.
|
||||
@@ -2,6 +2,18 @@
|
||||
|
||||
The `@modelcontextprotocol/ext-apps` package provides the `App` class (browser side) and `registerAppTool`/`registerAppResource` helpers (server side). Messaging is bidirectional and persistent.
|
||||
|
||||
## Construction
|
||||
|
||||
```js
|
||||
const app = new App(
|
||||
{ name: "MyWidget", version: "1.0.0" },
|
||||
{}, // capabilities
|
||||
{ autoResize: true }, // options
|
||||
);
|
||||
```
|
||||
|
||||
`autoResize: true` wires a `ResizeObserver` that emits `ui/notifications/size-changed` so the host iframe height tracks your rendered content. Without it the frame is fixed-height and tall renders get clipped — set it for any widget whose height depends on data.
|
||||
|
||||
---
|
||||
|
||||
## Widget → Host
|
||||
@@ -63,6 +75,26 @@ card.querySelector("a").addEventListener("click", (e) => {
|
||||
|
||||
Host-mediated download (sandbox blocks direct `<a download>`). `content` is a base64 string.
|
||||
|
||||
```js
|
||||
const csv = rows.map((r) => Object.values(r).join(",")).join("\n");
|
||||
app.downloadFile({
|
||||
name: "export.csv",
|
||||
mimeType: "text/csv",
|
||||
content: btoa(unescape(encodeURIComponent(csv))),
|
||||
});
|
||||
```
|
||||
|
||||
### `app.requestDisplayMode({ mode })`
|
||||
|
||||
Ask the host to switch the widget between `"inline"`, `"pip"`, or `"fullscreen"`. Check `getHostContext().availableDisplayModes` first; hide the control if the mode isn't offered. The host responds by firing `onhostcontextchanged` with new `displayMode` and `containerDimensions` — re-render at the new size.
|
||||
|
||||
```js
|
||||
if (app.getHostContext()?.availableDisplayModes?.includes("fullscreen")) {
|
||||
expandBtn.hidden = false;
|
||||
expandBtn.onclick = () => app.requestDisplayMode({ mode: "fullscreen" });
|
||||
}
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Host → Widget
|
||||
@@ -84,9 +116,22 @@ app.ontoolresult = ({ content }) => {
|
||||
|
||||
Fires with the arguments Claude passed to the tool. Useful if the widget needs to know what was asked for (e.g., highlight the search term).
|
||||
|
||||
### `app.ontoolinputpartial = ({ arguments }) => {...}` / `app.ontoolcancelled = () => {...}`
|
||||
|
||||
`ontoolinputpartial` fires while Claude is still streaming arguments — use it to show a skeleton ("Preparing: <title>…") before the result lands. `ontoolcancelled` fires if the call is aborted; clear the skeleton.
|
||||
|
||||
### `app.getHostContext()` / `app.onhostcontextchanged = (ctx) => {...}`
|
||||
|
||||
Read and subscribe to host context — `theme` (`"light"` / `"dark"`), locale, etc. Call `getHostContext()` **after** `connect()`. Subscribe for live updates (user toggles dark mode mid-conversation).
|
||||
Read and subscribe to host context. Call `getHostContext()` **after** `connect()`. Subscribe for live updates (user toggles dark mode, expands to fullscreen).
|
||||
|
||||
| `ctx.` field | Use |
|
||||
|---|---|
|
||||
| `theme` | `"light"` / `"dark"` — toggle a `.dark` class |
|
||||
| `styles.variables` | Host CSS tokens — pass to `applyHostStyleVariables()` so colors/fonts match host chrome |
|
||||
| `displayMode` / `availableDisplayModes` | Current mode and which `requestDisplayMode` targets are valid |
|
||||
| `containerDimensions.{maxHeight,width}` | Size your render to this instead of hard-coded px |
|
||||
| `deviceCapabilities.touch` | Switch hover-only affordances to tap (`pointerdown`) |
|
||||
| `safeAreaInsets` | Padding for notches / composer overlay |
|
||||
|
||||
```js
|
||||
const applyTheme = (t) =>
|
||||
@@ -129,14 +174,36 @@ No `{ notify }` destructure — `extra` is `RequestHandlerExtra`; progress goes
|
||||
## Lifecycle
|
||||
|
||||
1. Claude calls a tool with `_meta.ui.resourceUri` declared
|
||||
2. Host fetches the resource (your HTML) and renders it in an iframe
|
||||
2. Host fetches the resource (your HTML) and mounts a **fresh iframe** for this call
|
||||
3. Widget script runs, sets handlers, calls `await app.connect()`
|
||||
4. Host pipes the tool's return value → `ontoolresult` fires
|
||||
5. Widget renders, user interacts
|
||||
6. Widget calls `sendMessage` / `updateModelContext` / `callServerTool` as needed
|
||||
7. Widget persists until conversation context moves on — subsequent calls to the same tool reuse the iframe and fire `ontoolresult` again
|
||||
7. Iframe persists in the transcript; **the next call to the same tool mounts another iframe** alongside it
|
||||
|
||||
There's no explicit "submit and close" — the widget is a long-lived surface.
|
||||
There's no explicit "submit and close" — each instance is long-lived, but instances are not reused across calls.
|
||||
|
||||
### Supersession
|
||||
|
||||
Because earlier instances stay mounted, a click on a stale widget can `sendMessage` after a newer one has rendered. Detect this with a `BroadcastChannel` and make older instances inert:
|
||||
|
||||
```js
|
||||
let superseded = false;
|
||||
const seq = Date.now() + Math.random();
|
||||
const bc = new BroadcastChannel("my-widget");
|
||||
bc.onmessage = (e) => {
|
||||
if (e.data?.seq > seq) {
|
||||
superseded = true;
|
||||
document.body.classList.add("superseded"); // opacity:.45; pointer-events:none
|
||||
}
|
||||
};
|
||||
bc.postMessage({ seq });
|
||||
|
||||
// Guard outbound calls:
|
||||
function safeSend(msg) {
|
||||
if (!superseded) app.sendMessage(msg);
|
||||
}
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
|
||||
@@ -0,0 +1,18 @@
|
||||
# Connector-directory submission checklist
|
||||
|
||||
Pre-flight before submitting a remote MCP app to the Claude connector
|
||||
directory. Each item is a hard review criterion.
|
||||
|
||||
| Area | Requirement |
|
||||
|---|---|
|
||||
| **Auth** | OAuth (DCR or CIMD) or **`none`** (authless). Static bearer tokens are private-deploy only and block listing. Authless is valid for public-data servers — the server holds any upstream API keys. |
|
||||
| **Tool annotations** | Every tool sets `annotations.title` plus the relevant hints: `readOnlyHint: true` for fetch/search tools, `destructiveHint` / `idempotentHint` for writes, `openWorldHint: true` if the tool reaches an external system. |
|
||||
| **Tool names** | ≤ 64 characters, snake/kebab case. |
|
||||
| **Widget layout** | Inline height ≤ 500px, no nested scroll containers, 44pt minimum touch targets, WCAG-AA contrast in both themes. |
|
||||
| **Theming** | `html, body { background: transparent }`, `<meta name="color-scheme" content="light dark">`, adopt host CSS tokens via `applyHostStyleVariables`. |
|
||||
| **External links** | Use `app.openLink`. Declare each origin (e.g. `https://api.example.com`) in the connector's *Allowed link URIs* so the link skips the confirm modal. |
|
||||
| **Helper tools** | Widget-only tools (geometry/image fetchers) carry `_meta.ui.visibility: ["app"]` so they don't appear in Claude's tool list. |
|
||||
| **Screenshots** | 3–5 PNGs, ≥ 1000px wide, cropped to the app response only — no prompt text in frame. |
|
||||
|
||||
See `abuse-protection.md` for rate-limit and IP-tiering guidance once the
|
||||
authless endpoint is public.
|
||||
@@ -122,23 +122,38 @@ that survives un-inlined.
|
||||
|
||||
---
|
||||
|
||||
## Dark mode
|
||||
## Theme & host styles
|
||||
|
||||
```js
|
||||
const applyTheme = (theme) =>
|
||||
document.documentElement.classList.toggle("dark", theme === "dark");
|
||||
The host renders the iframe inside its own card chrome — paint a **transparent** background and adopt host CSS tokens so the widget blends in across light/dark and across hosts.
|
||||
|
||||
app.onhostcontextchanged = (ctx) => applyTheme(ctx.theme);
|
||||
await app.connect();
|
||||
applyTheme(app.getHostContext()?.theme);
|
||||
```html
|
||||
<meta name="color-scheme" content="light dark" />
|
||||
```
|
||||
|
||||
```css
|
||||
:root { --ink:#0f1111; --bg:#fff; color-scheme:light; }
|
||||
:root.dark { --ink:#e6e6e6; --bg:#1f2428; color-scheme:dark; }
|
||||
:root {
|
||||
--ink: var(--color-text-primary, #0f1111);
|
||||
--sub: var(--color-text-secondary, #5a6270);
|
||||
--line: var(--color-border-default, #e3e6ea);
|
||||
}
|
||||
html, body { background: transparent; color: var(--ink); }
|
||||
:root.dark .thumb { mix-blend-mode: normal; } /* multiply → images vanish in dark */
|
||||
```
|
||||
|
||||
```js
|
||||
const { App, applyHostStyleVariables } = globalThis.ExtApps;
|
||||
|
||||
function applyHostContext(ctx) {
|
||||
document.documentElement.classList.toggle("dark", ctx?.theme === "dark");
|
||||
if (ctx?.styles?.variables) applyHostStyleVariables(ctx.styles.variables);
|
||||
}
|
||||
app.onhostcontextchanged = applyHostContext;
|
||||
await app.connect();
|
||||
applyHostContext(app.getHostContext());
|
||||
```
|
||||
|
||||
`applyHostStyleVariables` writes the host's `--color-*` / `--font-*` / `--border-radius-*` tokens onto `:root`; the hex values above are fallbacks for hosts that don't supply them.
|
||||
|
||||
---
|
||||
|
||||
## Debugging
|
||||
|
||||
@@ -0,0 +1,54 @@
|
||||
# Payload budgeting
|
||||
|
||||
Hosts cap tool-result text. claude.ai and Claude Desktop truncate at roughly
|
||||
**150,000 characters**; Claude Code at ~25k tokens. When a tool result exceeds
|
||||
the cap, the host substitutes a file-pointer string in place of your JSON. The
|
||||
widget then receives non-JSON in `ontoolresult`, `JSON.parse` throws, and the
|
||||
user sees something like *"Bad payload: SyntaxError: Unexpected token 'E'"* —
|
||||
with no hint that size was the cause.
|
||||
|
||||
## Symptom → cause
|
||||
|
||||
| Symptom | Likely cause |
|
||||
|---|---|
|
||||
| Widget shows a JSON parse error on `content[0].text` | Result over the host cap; host swapped in a file-pointer string |
|
||||
| Works for one query, breaks for "all of X" | Row count × column count crossed the cap |
|
||||
| Works in MCP Inspector, breaks in Desktop | Inspector has no cap; Desktop does |
|
||||
|
||||
## Strategy
|
||||
|
||||
Cap your own payload at ~130KB and degrade in order:
|
||||
|
||||
1. **Ship full rows** when `JSON.stringify(rows).length` is under the cap.
|
||||
2. **Prune columns** to those the rendering spec actually references. Walk the
|
||||
spec for both `field: "..."` keys *and* `datum.X` / `datum['X']` inside
|
||||
expression strings — if the spec aliases a column via a `calculate`
|
||||
transform, the alias appears as `field:` but the source column only appears
|
||||
as `datum.X`, and dropping it leaves the widget with NaN.
|
||||
3. **Truncate rows** as a last resort and include `{ truncated: N }` in the
|
||||
payload so the widget can label it.
|
||||
|
||||
```ts
|
||||
const MAX = 130_000;
|
||||
let out = rows;
|
||||
if (JSON.stringify(out).length > MAX) {
|
||||
const keep = referencedFields(spec); // field: + datum.X refs
|
||||
out = rows.map((r) => pick(r, keep));
|
||||
if (JSON.stringify(out).length > MAX) {
|
||||
const per = JSON.stringify(out[0] ?? {}).length || 1;
|
||||
out = out.slice(0, Math.floor(MAX / per));
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
## Heavy assets go via `callServerTool`, not the result
|
||||
|
||||
Geometry, image bytes, or any blob the widget needs but Claude doesn't should
|
||||
be served by a separate tool the widget calls after mount:
|
||||
|
||||
```js
|
||||
const topo = await app.callServerTool({ name: "get-topojson", arguments: { level } });
|
||||
```
|
||||
|
||||
Mark that helper tool with `_meta.ui.visibility: ["app"]` so it doesn't appear
|
||||
in Claude's tool list.
|
||||
@@ -24,21 +24,7 @@ Agents are autonomous subprocesses that handle complex, multi-step tasks indepen
|
||||
```markdown
|
||||
---
|
||||
name: agent-identifier
|
||||
description: Use this agent when [triggering conditions]. Examples:
|
||||
|
||||
<example>
|
||||
Context: [Situation description]
|
||||
user: "[User request]"
|
||||
assistant: "[How assistant should respond and use this agent]"
|
||||
<commentary>
|
||||
[Why this agent should be triggered]
|
||||
</commentary>
|
||||
</example>
|
||||
|
||||
<example>
|
||||
[Additional example...]
|
||||
</example>
|
||||
|
||||
description: Use this agent when [triggering conditions]. Typical triggers include [scenario 1 in prose], [scenario 2 in prose], and [scenario 3 in prose]. See "When to invoke" in the agent body for worked scenarios.
|
||||
model: inherit
|
||||
color: blue
|
||||
tools: ["Read", "Write", "Grep"]
|
||||
@@ -46,6 +32,12 @@ tools: ["Read", "Write", "Grep"]
|
||||
|
||||
You are [agent role description]...
|
||||
|
||||
## When to invoke
|
||||
|
||||
[Two to four representative scenarios written as prose, e.g.:]
|
||||
- **[Scenario name].** [What the situation looks like and what the agent should do.]
|
||||
- **[Scenario name].** [Same.]
|
||||
|
||||
**Your Core Responsibilities:**
|
||||
1. [Responsibility 1]
|
||||
2. [Responsibility 2]
|
||||
@@ -81,36 +73,24 @@ Agent identifier used for namespacing and invocation.
|
||||
|
||||
### description (required)
|
||||
|
||||
Defines when Claude should trigger this agent. **This is the most critical field.**
|
||||
Defines when Claude should trigger this agent. **This is the most critical field** — it is loaded into context whenever the agent is registered, so the harness can decide when to dispatch.
|
||||
|
||||
**Must include:**
|
||||
1. Triggering conditions ("Use this agent when...")
|
||||
2. Multiple `<example>` blocks showing usage
|
||||
3. Context, user request, and assistant response in each example
|
||||
4. `<commentary>` explaining why agent triggers
|
||||
2. A short prose summary of the typical trigger scenarios
|
||||
3. A pointer to a "When to invoke" section in the agent body for the detailed worked scenarios
|
||||
|
||||
**Format:**
|
||||
```
|
||||
Use this agent when [conditions]. Examples:
|
||||
|
||||
<example>
|
||||
Context: [Scenario description]
|
||||
user: "[What user says]"
|
||||
assistant: "[How Claude should respond]"
|
||||
<commentary>
|
||||
[Why this agent is appropriate]
|
||||
</commentary>
|
||||
</example>
|
||||
|
||||
[More examples...]
|
||||
Use this agent when [conditions]. Typical triggers include [scenario 1 in prose], [scenario 2 in prose], and [scenario 3 in prose]. See "When to invoke" in the agent body for worked scenarios.
|
||||
```
|
||||
|
||||
**Best practices:**
|
||||
- Include 2-4 concrete examples
|
||||
- Show proactive and reactive triggering
|
||||
- Cover different phrasings of same intent
|
||||
- Explain reasoning in commentary
|
||||
- Name 2-4 trigger scenarios in the prose summary
|
||||
- Cover both proactive (assistant invokes itself) and reactive (user requests) triggering
|
||||
- Cover different phrasings of the same intent
|
||||
- Be specific about when NOT to use the agent
|
||||
- Put detailed scenarios in the body under "When to invoke" as a bullet list of prose descriptions
|
||||
|
||||
### model (required)
|
||||
|
||||
@@ -231,14 +211,14 @@ Requirements:
|
||||
- Specific methodologies
|
||||
- Edge case handling
|
||||
- Output format
|
||||
- A "When to invoke" section listing 2-4 trigger scenarios as prose bullets
|
||||
4. Create identifier (lowercase, hyphens, 3-50 chars)
|
||||
5. Write description with triggering conditions
|
||||
6. Include 2-3 <example> blocks showing when to use
|
||||
5. Write description with triggering conditions and a short prose summary of trigger scenarios
|
||||
|
||||
Return JSON with:
|
||||
{
|
||||
"identifier": "agent-name",
|
||||
"whenToUse": "Use this agent when... Examples: <example>...</example>",
|
||||
"whenToUse": "Use this agent when... Typical triggers include [...]. See \"When to invoke\" in the agent body.",
|
||||
"systemPrompt": "You are..."
|
||||
}
|
||||
```
|
||||
@@ -332,13 +312,18 @@ Ensure system prompt is complete:
|
||||
```markdown
|
||||
---
|
||||
name: simple-agent
|
||||
description: Use this agent when... Examples: <example>...</example>
|
||||
description: Use this agent when [condition]. Typical triggers include [trigger 1] and [trigger 2]. See "When to invoke" in the agent body.
|
||||
model: inherit
|
||||
color: blue
|
||||
---
|
||||
|
||||
You are an agent that [does X].
|
||||
|
||||
## When to invoke
|
||||
|
||||
- **[Scenario A].** [Description.]
|
||||
- **[Scenario B].** [Description.]
|
||||
|
||||
Process:
|
||||
1. [Step 1]
|
||||
2. [Step 2]
|
||||
@@ -351,7 +336,7 @@ Output: [What to provide]
|
||||
| Field | Required | Format | Example |
|
||||
|-------|----------|--------|---------|
|
||||
| name | Yes | lowercase-hyphens | code-reviewer |
|
||||
| description | Yes | Text + examples | Use when... <example>... |
|
||||
| description | Yes | Prose triggers | Use when... Typical triggers include... |
|
||||
| model | Yes | inherit/sonnet/opus/haiku | inherit |
|
||||
| color | Yes | Color name | blue |
|
||||
| tools | No | Array of tool names | ["Read", "Grep"] |
|
||||
@@ -359,7 +344,8 @@ Output: [What to provide]
|
||||
### Best Practices
|
||||
|
||||
**DO:**
|
||||
- ✅ Include 2-4 concrete examples in description
|
||||
- ✅ Name 2-4 trigger scenarios in the description (as prose)
|
||||
- ✅ Put detailed worked scenarios in a "When to invoke" body section, as prose bullets
|
||||
- ✅ Write specific triggering conditions
|
||||
- ✅ Use `inherit` for model unless specific need
|
||||
- ✅ Choose appropriate tools (least privilege)
|
||||
@@ -367,7 +353,7 @@ Output: [What to provide]
|
||||
- ✅ Test agent triggering thoroughly
|
||||
|
||||
**DON'T:**
|
||||
- ❌ Use generic descriptions without examples
|
||||
- ❌ Use generic descriptions without trigger scenarios
|
||||
- ❌ Omit triggering conditions
|
||||
- ❌ Give all agents same color
|
||||
- ❌ Grant unnecessary tool access
|
||||
@@ -407,7 +393,7 @@ To create an agent for a plugin:
|
||||
3. Create `agents/agent-name.md` file
|
||||
4. Write frontmatter with all required fields
|
||||
5. Write system prompt following best practices
|
||||
6. Include 2-4 triggering examples in description
|
||||
6. Name 2-4 trigger scenarios in description (prose) and detail them in a "When to invoke" body section
|
||||
7. Validate with `scripts/validate-agent.sh`
|
||||
8. Test triggering with real scenarios
|
||||
9. Document agent in plugin README
|
||||
|
||||
@@ -31,11 +31,13 @@ Claude will return:
|
||||
```json
|
||||
{
|
||||
"identifier": "agent-name",
|
||||
"whenToUse": "Use this agent when... Examples: <example>...</example>",
|
||||
"systemPrompt": "You are... **Your Core Responsibilities:**..."
|
||||
"whenToUse": "Use this agent when... Typical triggers include [scenario 1], [scenario 2], and [scenario 3]. See \"When to invoke\" in the agent body for worked scenarios.",
|
||||
"systemPrompt": "You are...\n\n## When to invoke\n\n- **[Scenario A].** [Description]\n- **[Scenario B].** [Description]\n\n**Your Core Responsibilities:**..."
|
||||
}
|
||||
```
|
||||
|
||||
`whenToUse` is flat prose. `systemPrompt` includes a "When to invoke" section with prose bullets.
|
||||
|
||||
### Step 4: Convert to Agent File
|
||||
|
||||
Create `agents/[identifier].md`:
|
||||
@@ -63,8 +65,8 @@ I need an agent that reviews code changes for quality issues, security vulnerabi
|
||||
```json
|
||||
{
|
||||
"identifier": "code-quality-reviewer",
|
||||
"whenToUse": "Use this agent when the user has written code and needs quality review, or explicitly asks to review code changes. Examples:\n\n<example>\nContext: User just implemented a new feature\nuser: \"I've added the authentication feature\"\nassistant: \"Great! Let me review the code quality.\"\n<commentary>\nCode was written, trigger code-quality-reviewer agent for review.\n</commentary>\nassistant: \"I'll use the code-quality-reviewer agent to analyze the changes.\"\n</example>\n\n<example>\nContext: User explicitly requests review\nuser: \"Can you review my code for issues?\"\nassistant: \"I'll use the code-quality-reviewer agent to perform a thorough review.\"\n<commentary>\nExplicit review request triggers the agent.\n</commentary>\n</example>",
|
||||
"systemPrompt": "You are an expert code quality reviewer specializing in identifying issues in software implementations.\n\n**Your Core Responsibilities:**\n1. Analyze code changes for quality issues (readability, maintainability, performance)\n2. Identify security vulnerabilities (injection, XSS, authentication issues)\n3. Check adherence to project best practices and coding standards\n4. Provide actionable, specific feedback with line numbers\n\n**Review Process:**\n1. Read the code changes using available tools\n2. Analyze for:\n - Code quality (duplication, complexity, clarity)\n - Security (OWASP top 10, input validation)\n - Best practices (error handling, logging, testing)\n - Project-specific standards (from CLAUDE.md)\n3. Identify issues with severity (critical/major/minor)\n4. Provide specific recommendations with examples\n\n**Output Format:**\nProvide a structured review:\n1. Summary (2-3 sentences)\n2. Critical Issues (must fix)\n3. Major Issues (should fix)\n4. Minor Issues (nice to fix)\n5. Positive observations\n6. Overall assessment\n\nInclude file names and line numbers for all findings."
|
||||
"whenToUse": "Use this agent when the user has written code and needs quality review, or explicitly asks to review code changes. Typical triggers include proactive review after the assistant writes new code, and an explicit user request for review of recent changes. See \"When to invoke\" in the agent body for worked scenarios.",
|
||||
"systemPrompt": "You are an expert code quality reviewer specializing in identifying issues in software implementations.\n\n## When to invoke\n\n- **Proactive review after new code.** The assistant has just written or modified code (e.g. an authentication feature). Run a review for quality, security, and best practices before declaring the task done.\n- **Explicit review request.** The user asks for the recent changes to be reviewed for issues. Run a thorough review and report findings.\n\n**Your Core Responsibilities:**\n1. Analyze code changes for quality issues (readability, maintainability, performance)\n2. Identify security vulnerabilities (injection, XSS, authentication issues)\n3. Check adherence to project best practices and coding standards\n4. Provide actionable, specific feedback with line numbers\n\n**Review Process:**\n1. Read the code changes using available tools\n2. Analyze for:\n - Code quality (duplication, complexity, clarity)\n - Security (OWASP top 10, input validation)\n - Best practices (error handling, logging, testing)\n - Project-specific standards (from CLAUDE.md)\n3. Identify issues with severity (critical/major/minor)\n4. Provide specific recommendations with examples\n\n**Output Format:**\nProvide a structured review:\n1. Summary (2-3 sentences)\n2. Critical Issues (must fix)\n3. Major Issues (should fix)\n4. Minor Issues (nice to fix)\n5. Positive observations\n6. Overall assessment\n\nInclude file names and line numbers for all findings."
|
||||
}
|
||||
```
|
||||
|
||||
@@ -75,27 +77,7 @@ File: `agents/code-quality-reviewer.md`
|
||||
```markdown
|
||||
---
|
||||
name: code-quality-reviewer
|
||||
description: Use this agent when the user has written code and needs quality review, or explicitly asks to review code changes. Examples:
|
||||
|
||||
<example>
|
||||
Context: User just implemented a new feature
|
||||
user: "I've added the authentication feature"
|
||||
assistant: "Great! Let me review the code quality."
|
||||
<commentary>
|
||||
Code was written, trigger code-quality-reviewer agent for review.
|
||||
</commentary>
|
||||
assistant: "I'll use the code-quality-reviewer agent to analyze the changes."
|
||||
</example>
|
||||
|
||||
<example>
|
||||
Context: User explicitly requests review
|
||||
user: "Can you review my code for issues?"
|
||||
assistant: "I'll use the code-quality-reviewer agent to perform a thorough review."
|
||||
<commentary>
|
||||
Explicit review request triggers the agent.
|
||||
</commentary>
|
||||
</example>
|
||||
|
||||
description: Use this agent when the user has written code and needs quality review, or explicitly asks to review code changes. Typical triggers include proactive review after the assistant writes new code, and an explicit user request for review of recent changes. See "When to invoke" in the agent body for worked scenarios.
|
||||
model: inherit
|
||||
color: blue
|
||||
tools: ["Read", "Grep", "Glob"]
|
||||
@@ -103,6 +85,11 @@ tools: ["Read", "Grep", "Glob"]
|
||||
|
||||
You are an expert code quality reviewer specializing in identifying issues in software implementations.
|
||||
|
||||
## When to invoke
|
||||
|
||||
- **Proactive review after new code.** The assistant has just written or modified code (e.g. an authentication feature). Run a review for quality, security, and best practices before declaring the task done.
|
||||
- **Explicit review request.** The user asks for the recent changes to be reviewed for issues. Run a thorough review and report findings.
|
||||
|
||||
**Your Core Responsibilities:**
|
||||
1. Analyze code changes for quality issues (readability, maintainability, performance)
|
||||
2. Identify security vulnerabilities (injection, XSS, authentication issues)
|
||||
@@ -142,8 +129,8 @@ Create an agent that generates unit tests for code. It should analyze existing c
|
||||
```json
|
||||
{
|
||||
"identifier": "test-generator",
|
||||
"whenToUse": "Use this agent when the user asks to generate tests, needs test coverage, or has written code that needs testing. Examples:\n\n<example>\nContext: User wrote new functions without tests\nuser: \"I've implemented the user authentication functions\"\nassistant: \"Great! Let me generate tests for these functions.\"\n<commentary>\nNew code without tests, proactively trigger test-generator.\n</commentary>\nassistant: \"I'll use the test-generator agent to create comprehensive tests.\"\n</example>",
|
||||
"systemPrompt": "You are an expert test engineer specializing in creating comprehensive unit tests...\n\n**Your Core Responsibilities:**\n1. Analyze code to understand behavior\n2. Generate test cases covering happy paths and edge cases\n3. Follow project testing conventions\n4. Ensure high code coverage\n\n**Test Generation Process:**\n1. Read target code\n2. Identify testable units (functions, classes, methods)\n3. Design test cases (inputs, expected outputs, edge cases)\n4. Generate tests following project patterns\n5. Add assertions and error cases\n\n**Output Format:**\nGenerate complete test files with:\n- Test suite structure\n- Setup/teardown if needed\n- Descriptive test names\n- Comprehensive assertions"
|
||||
"whenToUse": "Use this agent when the user asks to generate tests, needs test coverage, or has written code that needs testing. Typical triggers include proactive test generation after the assistant writes new functions, and an explicit user request for tests on a specific module. See \"When to invoke\" in the agent body.",
|
||||
"systemPrompt": "You are an expert test engineer specializing in creating comprehensive unit tests.\n\n## When to invoke\n\n- **Proactive coverage after new code.** The assistant has just implemented new functions (e.g. user authentication functions) without tests. Generate a comprehensive test suite before declaring the task done.\n- **Explicit test request.** The user asks for tests on a specific surface. Generate the requested suite following project conventions.\n\n**Your Core Responsibilities:**\n1. Analyze code to understand behavior\n2. Generate test cases covering happy paths and edge cases\n3. Follow project testing conventions\n4. Ensure high code coverage\n\n**Test Generation Process:**\n1. Read target code\n2. Identify testable units (functions, classes, methods)\n3. Design test cases (inputs, expected outputs, edge cases)\n4. Generate tests following project patterns\n5. Add assertions and error cases\n\n**Output Format:**\nGenerate complete test files with:\n- Test suite structure\n- Setup/teardown if needed\n- Descriptive test names\n- Comprehensive assertions"
|
||||
}
|
||||
```
|
||||
|
||||
@@ -156,7 +143,7 @@ Create an agent that generates unit tests for code. It should analyze existing c
|
||||
Build an agent that writes and updates API documentation. It should analyze code and generate clear, comprehensive docs.
|
||||
```
|
||||
|
||||
**Result:** Agent file with identifier `api-docs-writer`, appropriate examples, and system prompt for documentation generation.
|
||||
**Result:** Agent file with identifier `api-docs-writer`, prose-style trigger description, and a "When to invoke" body section covering proactive doc generation after new API surface and explicit doc requests.
|
||||
|
||||
## Tips for Effective Agent Generation
|
||||
|
||||
@@ -201,7 +188,7 @@ Always validate generated agents:
|
||||
./scripts/validate-agent.sh agents/your-agent.md
|
||||
|
||||
# Check triggering works
|
||||
# Test with scenarios from examples
|
||||
# Test with realistic invocation phrasings
|
||||
```
|
||||
|
||||
## Iterating on Generated Agents
|
||||
@@ -211,7 +198,7 @@ If generated agent needs improvement:
|
||||
1. Identify what's missing or wrong
|
||||
2. Manually edit the agent file
|
||||
3. Focus on:
|
||||
- Better examples in description
|
||||
- Better-named trigger scenarios in `description:` and "When to invoke"
|
||||
- More specific system prompt
|
||||
- Clearer process steps
|
||||
- Better output format definition
|
||||
@@ -223,7 +210,6 @@ If generated agent needs improvement:
|
||||
- **Comprehensive**: Claude includes edge cases and quality checks
|
||||
- **Consistent**: Follows proven patterns
|
||||
- **Fast**: Seconds vs manual writing
|
||||
- **Examples**: Auto-generates triggering examples
|
||||
- **Complete**: Provides full system prompt structure
|
||||
|
||||
## When to Edit Manually
|
||||
|
||||
@@ -9,38 +9,7 @@ Full, production-ready agent examples for common use cases. Use these as templat
|
||||
```markdown
|
||||
---
|
||||
name: code-reviewer
|
||||
description: Use this agent when the user has written code and needs quality review, security analysis, or best practices validation. Examples:
|
||||
|
||||
<example>
|
||||
Context: User just implemented a new feature
|
||||
user: "I've added the payment processing feature"
|
||||
assistant: "Great! Let me review the implementation."
|
||||
<commentary>
|
||||
Code written for payment processing (security-critical). Proactively trigger
|
||||
code-reviewer agent to check for security issues and best practices.
|
||||
</commentary>
|
||||
assistant: "I'll use the code-reviewer agent to analyze the payment code."
|
||||
</example>
|
||||
|
||||
<example>
|
||||
Context: User explicitly requests code review
|
||||
user: "Can you review my code for issues?"
|
||||
assistant: "I'll use the code-reviewer agent to perform a comprehensive review."
|
||||
<commentary>
|
||||
Explicit code review request triggers the agent.
|
||||
</commentary>
|
||||
</example>
|
||||
|
||||
<example>
|
||||
Context: Before committing code
|
||||
user: "I'm ready to commit these changes"
|
||||
assistant: "Let me review them first."
|
||||
<commentary>
|
||||
Before commit, proactively review code quality.
|
||||
</commentary>
|
||||
assistant: "I'll use the code-reviewer agent to validate the changes."
|
||||
</example>
|
||||
|
||||
description: Use this agent when the user has written code and needs quality review, security analysis, or best practices validation. Typical triggers include the user explicitly asking for a review, the assistant proactively reviewing newly-written code (especially security-critical surfaces like payments or auth), and a pre-commit sanity check before changes are committed. See "When to invoke" in the agent body.
|
||||
model: inherit
|
||||
color: blue
|
||||
tools: ["Read", "Grep", "Glob"]
|
||||
@@ -48,6 +17,12 @@ tools: ["Read", "Grep", "Glob"]
|
||||
|
||||
You are an expert code quality reviewer specializing in identifying issues, security vulnerabilities, and opportunities for improvement in software implementations.
|
||||
|
||||
## When to invoke
|
||||
|
||||
- **Proactive review of security-critical code.** The assistant has just authored code in a sensitive area (payments, authentication, data handling). Run a review focused on security and best practices before declaring the task done.
|
||||
- **Explicit review request.** The user asks (in any phrasing) for the recent changes to be reviewed. Run a comprehensive review of the unstaged diff.
|
||||
- **Pre-commit validation.** The user signals readiness to commit. Run a review first to surface issues before they land.
|
||||
|
||||
**Your Core Responsibilities:**
|
||||
1. Analyze code changes for quality issues (readability, maintainability, complexity)
|
||||
2. Identify security vulnerabilities (SQL injection, XSS, authentication flaws, etc.)
|
||||
@@ -118,27 +93,7 @@ You are an expert code quality reviewer specializing in identifying issues, secu
|
||||
```markdown
|
||||
---
|
||||
name: test-generator
|
||||
description: Use this agent when the user has written code without tests, explicitly asks for test generation, or needs test coverage improvement. Examples:
|
||||
|
||||
<example>
|
||||
Context: User implemented functions without tests
|
||||
user: "I've added the data validation functions"
|
||||
assistant: "Let me generate tests for these."
|
||||
<commentary>
|
||||
New code without tests. Proactively trigger test-generator agent.
|
||||
</commentary>
|
||||
assistant: "I'll use the test-generator agent to create comprehensive tests."
|
||||
</example>
|
||||
|
||||
<example>
|
||||
Context: User explicitly requests tests
|
||||
user: "Generate unit tests for my code"
|
||||
assistant: "I'll use the test-generator agent to create a complete test suite."
|
||||
<commentary>
|
||||
Direct test generation request triggers the agent.
|
||||
</commentary>
|
||||
</example>
|
||||
|
||||
description: Use this agent when the user has written code without tests, explicitly asks for test generation, or needs test coverage improvement. Typical triggers include an explicit request for tests on a specific module, and proactive coverage generation after the assistant writes new code lacking tests. See "When to invoke" in the agent body.
|
||||
model: inherit
|
||||
color: green
|
||||
tools: ["Read", "Write", "Grep", "Bash"]
|
||||
@@ -146,6 +101,11 @@ tools: ["Read", "Write", "Grep", "Bash"]
|
||||
|
||||
You are an expert test engineer specializing in creating comprehensive, maintainable unit tests that ensure code correctness and reliability.
|
||||
|
||||
## When to invoke
|
||||
|
||||
- **Proactive coverage after new code.** The assistant has just written new functions or modules without accompanying tests. Generate a test suite before declaring the task done.
|
||||
- **Explicit test request.** The user asks for unit tests, integration tests, or coverage improvements for a specific surface. Generate the requested suite.
|
||||
|
||||
**Your Core Responsibilities:**
|
||||
1. Generate high-quality unit tests with excellent coverage
|
||||
2. Follow project testing conventions and patterns
|
||||
@@ -215,27 +175,7 @@ describe('[module name]', () => {
|
||||
```markdown
|
||||
---
|
||||
name: docs-generator
|
||||
description: Use this agent when the user has written code needing documentation, API endpoints requiring docs, or explicitly requests documentation generation. Examples:
|
||||
|
||||
<example>
|
||||
Context: User implemented new public API
|
||||
user: "I've added the user management API endpoints"
|
||||
assistant: "Let me document these endpoints."
|
||||
<commentary>
|
||||
New public API needs documentation. Proactively trigger docs-generator.
|
||||
</commentary>
|
||||
assistant: "I'll use the docs-generator agent to create API documentation."
|
||||
</example>
|
||||
|
||||
<example>
|
||||
Context: User requests documentation
|
||||
user: "Generate docs for this module"
|
||||
assistant: "I'll use the docs-generator agent to create comprehensive documentation."
|
||||
<commentary>
|
||||
Explicit documentation request triggers the agent.
|
||||
</commentary>
|
||||
</example>
|
||||
|
||||
description: Use this agent when the user has written code needing documentation, API endpoints requiring docs, or explicitly requests documentation generation. Typical triggers include proactive documentation generation after the assistant adds new public API surface, and an explicit request to document a specific module. See "When to invoke" in the agent body.
|
||||
model: inherit
|
||||
color: cyan
|
||||
tools: ["Read", "Write", "Grep", "Glob"]
|
||||
@@ -243,6 +183,11 @@ tools: ["Read", "Write", "Grep", "Glob"]
|
||||
|
||||
You are an expert technical writer specializing in creating clear, comprehensive documentation for software projects.
|
||||
|
||||
## When to invoke
|
||||
|
||||
- **Proactive docs for new API surface.** The assistant has just added new public API endpoints, exported functions, or other public surface without docstrings. Generate documentation before declaring the task done.
|
||||
- **Explicit doc request.** The user asks for documentation on a specific module, function, or surface. Generate comprehensive docs in the project's standard format.
|
||||
|
||||
**Your Core Responsibilities:**
|
||||
1. Generate accurate, clear documentation from code
|
||||
2. Follow project documentation standards
|
||||
@@ -300,27 +245,7 @@ Create documentation in project's standard format:
|
||||
```markdown
|
||||
---
|
||||
name: security-analyzer
|
||||
description: Use this agent when the user implements security-critical code (auth, payments, data handling), explicitly requests security analysis, or before deploying sensitive changes. Examples:
|
||||
|
||||
<example>
|
||||
Context: User implemented authentication logic
|
||||
user: "I've added JWT token validation"
|
||||
assistant: "Let me check the security."
|
||||
<commentary>
|
||||
Authentication code is security-critical. Proactively trigger security-analyzer.
|
||||
</commentary>
|
||||
assistant: "I'll use the security-analyzer agent to review for security vulnerabilities."
|
||||
</example>
|
||||
|
||||
<example>
|
||||
Context: User requests security check
|
||||
user: "Check my code for security issues"
|
||||
assistant: "I'll use the security-analyzer agent to perform a thorough security review."
|
||||
<commentary>
|
||||
Explicit security review request triggers the agent.
|
||||
</commentary>
|
||||
</example>
|
||||
|
||||
description: Use this agent when the user implements security-critical code (auth, payments, data handling), explicitly requests security analysis, or before deploying sensitive changes. Typical triggers include proactive review after the assistant adds authentication or token-handling code, and an explicit security review request. See "When to invoke" in the agent body.
|
||||
model: inherit
|
||||
color: red
|
||||
tools: ["Read", "Grep", "Glob"]
|
||||
@@ -328,6 +253,11 @@ tools: ["Read", "Grep", "Glob"]
|
||||
|
||||
You are an expert security analyst specializing in identifying vulnerabilities and security issues in software implementations.
|
||||
|
||||
## When to invoke
|
||||
|
||||
- **Proactive review of security-critical code.** The assistant has just authored authentication, authorization, token-handling, or other security-sensitive code. Run a security review before declaring the task done.
|
||||
- **Explicit security analysis request.** The user asks for a security check on recent code or a specific surface. Run a thorough analysis and report vulnerabilities.
|
||||
|
||||
**Your Core Responsibilities:**
|
||||
1. Identify security vulnerabilities (OWASP Top 10 and beyond)
|
||||
2. Analyze authentication and authorization logic
|
||||
@@ -419,7 +349,7 @@ Choose colors that match agent purpose:
|
||||
1. Copy template that matches your use case
|
||||
2. Replace placeholders with your specifics
|
||||
3. Customize process steps for your domain
|
||||
4. Adjust examples to your triggering scenarios
|
||||
4. Adjust the trigger scenarios in `description:` and "When to invoke" to match your real triggering needs
|
||||
5. Validate with `scripts/validate-agent.sh`
|
||||
6. Test triggering with real scenarios
|
||||
7. Iterate based on agent performance
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
# Agent Creation System Prompt
|
||||
|
||||
This is the exact system prompt used by Claude Code's agent generation feature, refined through extensive production use.
|
||||
This is the system prompt to drive AI-assisted agent generation. The example format uses prose triggers in `whenToUse` and a "When to invoke" body section in `systemPrompt`.
|
||||
|
||||
## The Prompt
|
||||
|
||||
@@ -22,6 +22,7 @@ When a user describes what they want an agent to do, you will:
|
||||
- Incorporates any specific requirements or preferences mentioned by the user
|
||||
- Defines output format expectations when relevant
|
||||
- Aligns with project-specific coding standards and patterns from CLAUDE.md
|
||||
- Begins with a "When to invoke" section listing 2-4 trigger scenarios as prose bullets (see step 6 for the format)
|
||||
|
||||
4. **Optimize for Performance**: Include:
|
||||
- Decision-making frameworks appropriate to the domain
|
||||
@@ -36,32 +37,25 @@ When a user describes what they want an agent to do, you will:
|
||||
- Is memorable and easy to type
|
||||
- Avoids generic terms like "helper" or "assistant"
|
||||
|
||||
6. **Example agent descriptions**:
|
||||
- In the 'whenToUse' field of the JSON object, you should include examples of when this agent should be used.
|
||||
- Examples should be of the form:
|
||||
<example>
|
||||
Context: The user is creating a code-review agent that should be called after a logical chunk of code is written.
|
||||
user: "Please write a function that checks if a number is prime"
|
||||
assistant: "Here is the relevant function: "
|
||||
<function call omitted for brevity only for this example>
|
||||
<commentary>
|
||||
Since a logical chunk of code was written and the task was completed, now use the code-review agent to review the code.
|
||||
</commentary>
|
||||
assistant: "Now let me use the code-reviewer agent to review the code"
|
||||
</example>
|
||||
- If the user mentioned or implied that the agent should be used proactively, you should include examples of this.
|
||||
- NOTE: Ensure that in the examples, you are making the assistant use the Agent tool and not simply respond directly to the task.
|
||||
6. **Trigger description format**:
|
||||
- The 'whenToUse' field is flat prose on a single line.
|
||||
- Format: "Use this agent when [conditions]. Typical triggers include [scenario 1], [scenario 2], and [scenario 3]. See \"When to invoke\" in the agent body for worked scenarios."
|
||||
- Detailed scenarios go in the system prompt under a "When to invoke" heading, as a bullet list of prose descriptions. Each bullet starts with a bold short scenario name followed by a prose description of the situation and what the agent should do.
|
||||
- Example bullets:
|
||||
- "**Proactive review after new code.** The assistant has just written a function in response to a user request. Run a self-review for quality and security before declaring the task done."
|
||||
- "**Explicit review request.** The user asks for the recent changes to be reviewed. Run a thorough review and report findings."
|
||||
- Cover both proactive and reactive triggers when applicable. Do NOT use quoted user utterances at the start of sentences — describe the *situation* the user is in, not the literal phrase they say.
|
||||
|
||||
Your output must be a valid JSON object with exactly these fields:
|
||||
{
|
||||
"identifier": "A unique, descriptive identifier using lowercase letters, numbers, and hyphens (e.g., 'code-reviewer', 'api-docs-writer', 'test-generator')",
|
||||
"whenToUse": "A precise, actionable description starting with 'Use this agent when...' that clearly defines the triggering conditions and use cases. Ensure you include examples as described above.",
|
||||
"systemPrompt": "The complete system prompt that will govern the agent's behavior, written in second person ('You are...', 'You will...') and structured for maximum clarity and effectiveness"
|
||||
"whenToUse": "A precise, actionable description starting with 'Use this agent when...' that clearly defines the triggering conditions and use cases. Flat prose only. End with a pointer to the 'When to invoke' section in the agent body.",
|
||||
"systemPrompt": "The complete system prompt that will govern the agent's behavior, written in second person ('You are...', 'You will...'). Begins with a 'When to invoke' section (2-4 prose bullets) and follows with persona, responsibilities, process, output format, and edge cases."
|
||||
}
|
||||
|
||||
Key principles for your system prompts:
|
||||
- Be specific rather than generic - avoid vague instructions
|
||||
- Include concrete examples when they would clarify behavior
|
||||
- Include concrete examples when they would clarify behavior (as prose)
|
||||
- Balance comprehensiveness with clarity - every instruction should add value
|
||||
- Ensure the agent has enough context to handle variations of the core task
|
||||
- Make the agent proactive in seeking clarification when needed
|
||||
@@ -74,17 +68,19 @@ Remember: The agents you create should be autonomous experts capable of handling
|
||||
|
||||
Use this prompt to generate agent configurations:
|
||||
|
||||
```markdown
|
||||
**User input:** "I need an agent that reviews pull requests for code quality issues"
|
||||
|
||||
**You send to Claude with the system prompt above:**
|
||||
```
|
||||
Create an agent configuration based on this request: "I need an agent that reviews pull requests for code quality issues"
|
||||
```
|
||||
|
||||
**Claude returns JSON:**
|
||||
**Claude returns JSON (note: prose `whenToUse`, "When to invoke" section in `systemPrompt`):**
|
||||
```json
|
||||
{
|
||||
"identifier": "pr-quality-reviewer",
|
||||
"whenToUse": "Use this agent when the user asks to review a pull request, check code quality, or analyze PR changes. Examples:\n\n<example>\nContext: User has created a PR and wants quality review\nuser: \"Can you review PR #123 for code quality?\"\nassistant: \"I'll use the pr-quality-reviewer agent to analyze the PR.\"\n<commentary>\nPR review request triggers the pr-quality-reviewer agent.\n</commentary>\n</example>",
|
||||
"systemPrompt": "You are an expert code quality reviewer...\n\n**Your Core Responsibilities:**\n1. Analyze code changes for quality issues\n2. Check adherence to best practices\n..."
|
||||
"whenToUse": "Use this agent when the user asks to review a pull request, check code quality, or analyze PR changes. Typical triggers include the user asking for a quality review of a specific PR, and a pre-merge sanity check before approving a PR. See \"When to invoke\" in the agent body for worked scenarios.",
|
||||
"systemPrompt": "You are an expert code quality reviewer...\n\n## When to invoke\n\n- **PR quality review request.** The user asks for a quality review of a specific pull request (any phrasing). Fetch the PR diff and run a thorough quality review.\n- **Pre-merge sanity check.** The user signals they're about to merge a PR. Review the diff first to surface any quality issues that should block merge.\n\n**Your Core Responsibilities:**\n1. Analyze code changes for quality issues\n2. Check adherence to best practices\n..."
|
||||
}
|
||||
```
|
||||
|
||||
@@ -96,23 +92,18 @@ Take the JSON output and create the agent markdown file:
|
||||
```markdown
|
||||
---
|
||||
name: pr-quality-reviewer
|
||||
description: Use this agent when the user asks to review a pull request, check code quality, or analyze PR changes. Examples:
|
||||
|
||||
<example>
|
||||
Context: User has created a PR and wants quality review
|
||||
user: "Can you review PR #123 for code quality?"
|
||||
assistant: "I'll use the pr-quality-reviewer agent to analyze the PR."
|
||||
<commentary>
|
||||
PR review request triggers the pr-quality-reviewer agent.
|
||||
</commentary>
|
||||
</example>
|
||||
|
||||
description: Use this agent when the user asks to review a pull request, check code quality, or analyze PR changes. Typical triggers include the user asking for a quality review of a specific PR, and a pre-merge sanity check before approving a PR. See "When to invoke" in the agent body for worked scenarios.
|
||||
model: inherit
|
||||
color: blue
|
||||
---
|
||||
|
||||
You are an expert code quality reviewer...
|
||||
|
||||
## When to invoke
|
||||
|
||||
- **PR quality review request.** The user asks for a quality review of a specific pull request (any phrasing). Fetch the PR diff and run a thorough quality review.
|
||||
- **Pre-merge sanity check.** The user signals they're about to merge a PR. Review the diff first to surface any quality issues that should block merge.
|
||||
|
||||
**Your Core Responsibilities:**
|
||||
1. Analyze code changes for quality issues
|
||||
2. Check adherence to best practices
|
||||
@@ -123,7 +114,7 @@ You are an expert code quality reviewer...
|
||||
|
||||
### Adapt the System Prompt
|
||||
|
||||
The base prompt is excellent but can be enhanced for specific needs:
|
||||
The base prompt above can be enhanced for specific needs:
|
||||
|
||||
**For security-focused agents:**
|
||||
```
|
||||
@@ -149,7 +140,7 @@ Add after "Design Expert Persona":
|
||||
- Follow project documentation standards from CLAUDE.md
|
||||
```
|
||||
|
||||
## Best Practices from Internal Implementation
|
||||
## Best Practices
|
||||
|
||||
### 1. Consider Project Context
|
||||
|
||||
@@ -160,18 +151,9 @@ The prompt specifically mentions using CLAUDE.md context:
|
||||
|
||||
### 2. Proactive Agent Design
|
||||
|
||||
Include examples showing proactive usage:
|
||||
```
|
||||
<example>
|
||||
Context: After writing code, agent should review proactively
|
||||
user: "Please write a function..."
|
||||
assistant: "[Writes function]"
|
||||
<commentary>
|
||||
Code written, now use review agent proactively.
|
||||
</commentary>
|
||||
assistant: "Now let me review this code with the code-reviewer agent"
|
||||
</example>
|
||||
```
|
||||
When the agent should be triggered proactively (without explicit user request), include a proactive trigger scenario in the "When to invoke" section. Describe the situation in prose:
|
||||
|
||||
> - **Proactive review after new code.** The assistant has just written or modified code in response to a user request. Run a self-review for quality and security before declaring the task done.
|
||||
|
||||
### 3. Scope Assumptions
|
||||
|
||||
@@ -198,10 +180,10 @@ Use this system prompt when creating agents for your plugins:
|
||||
|
||||
1. Take user request for agent functionality
|
||||
2. Feed to Claude with this system prompt
|
||||
3. Get JSON output (identifier, whenToUse, systemPrompt)
|
||||
3. Get JSON output (`identifier`, `whenToUse`, `systemPrompt`)
|
||||
4. Convert to agent markdown file with frontmatter
|
||||
5. Validate with agent validation rules
|
||||
5. Validate the file with agent validation rules
|
||||
6. Test triggering conditions
|
||||
7. Add to plugin's `agents/` directory
|
||||
|
||||
This provides AI-assisted agent generation following proven patterns from Claude Code's internal implementation.
|
||||
This provides AI-assisted agent generation.
|
||||
|
||||
@@ -1,491 +1,217 @@
|
||||
# Agent Triggering Examples: Best Practices
|
||||
# Agent Triggering: Best Practices
|
||||
|
||||
Complete guide to writing effective `<example>` blocks in agent descriptions for reliable triggering.
|
||||
Complete guide to writing trigger descriptions that cause an agent to be dispatched reliably.
|
||||
|
||||
## Example Block Format
|
||||
## Where trigger descriptions live
|
||||
|
||||
The standard format for triggering examples:
|
||||
An agent file has two places that talk about triggering:
|
||||
|
||||
1. **`description:` field in YAML frontmatter.** Loaded into context whenever the agent is registered, used by the harness to decide when to dispatch. Keep it flat prose.
|
||||
2. **A "When to invoke" section in the agent body.** Loaded only when the agent is actually invoked. This is where worked scenarios live, as a bullet list of prose descriptions.
|
||||
|
||||
## Format
|
||||
|
||||
### `description:` field
|
||||
|
||||
```
|
||||
description: Use this agent when [conditions]. Typical triggers include [scenario 1 phrased as a prose noun phrase], [scenario 2], and [scenario 3]. See "When to invoke" in the agent body for worked scenarios.
|
||||
```
|
||||
|
||||
Rules:
|
||||
- Single line of flat prose within the YAML scalar.
|
||||
- Name 2-4 trigger scenarios as noun phrases.
|
||||
- End with the pointer to the body's "When to invoke" section.
|
||||
|
||||
### "When to invoke" body section
|
||||
|
||||
```markdown
|
||||
<example>
|
||||
Context: [Describe the situation - what led to this interaction]
|
||||
user: "[Exact user message or request]"
|
||||
assistant: "[How Claude should respond before triggering]"
|
||||
<commentary>
|
||||
[Explanation of why this agent should be triggered in this scenario]
|
||||
</commentary>
|
||||
assistant: "[How Claude triggers the agent - usually 'I'll use the [agent-name] agent...']"
|
||||
</example>
|
||||
## When to invoke
|
||||
|
||||
[Two to four representative scenarios as prose bullets. Each describes the situation
|
||||
in third person and what the agent should do.]
|
||||
|
||||
- **[Short scenario name].** [What the situation looks like — what just happened or what
|
||||
the user is asking for — and what the agent should do in response.]
|
||||
- **[Short scenario name].** [Same.]
|
||||
```
|
||||
|
||||
## Anatomy of a Good Example
|
||||
## Anatomy of a good scenario
|
||||
|
||||
### Context
|
||||
### Scenario name (the bold lead)
|
||||
|
||||
**Purpose:** Set the scene - what happened before the user's message
|
||||
**Purpose:** A short noun phrase identifying the situation type.
|
||||
|
||||
**Good contexts:**
|
||||
```
|
||||
Context: User just implemented a new authentication feature
|
||||
Context: User has created a PR and wants it reviewed
|
||||
Context: User is debugging a test failure
|
||||
Context: After writing several functions without documentation
|
||||
**Good names:**
|
||||
- *User-requested review after a feature lands.*
|
||||
- *Proactive review of newly-written code.*
|
||||
- *Pre-PR sanity check.*
|
||||
- *PR updated with new logic.*
|
||||
|
||||
**Bad names:**
|
||||
- *Normal usage.* (not specific)
|
||||
- *User needs help.* (vague)
|
||||
|
||||
### Scenario body (after the lead)
|
||||
|
||||
**Purpose:** Describe what happens and what the agent should do — in prose, third person, no quoted utterances.
|
||||
|
||||
**Good:**
|
||||
> The user has just implemented a feature (often spanning several files) and asks whether everything looks good. Run a review of the recent diff and report findings.
|
||||
|
||||
**Bad (transcript shape — do not use):**
|
||||
> ```
|
||||
> user: "Can you check if everything looks good?"
|
||||
> assistant: "I'll use the reviewer agent..."
|
||||
> ```
|
||||
|
||||
The bad version mixes a turn-marker shape into the agent file. Keep scenarios as situation descriptions in prose.
|
||||
|
||||
## Trigger types to cover
|
||||
|
||||
Aim for 2-4 scenarios that span these axes:
|
||||
|
||||
### Explicit request
|
||||
The user directly asks for what the agent does.
|
||||
- *User-requested security check.* The user explicitly asks for a security review of recent code.
|
||||
|
||||
### Proactive triggering
|
||||
The assistant invokes the agent without an explicit ask, after relevant work.
|
||||
- *Proactive review after writing database code.* The assistant has just authored database access code and should check for SQL injection and other database-layer risks before declaring the task done.
|
||||
|
||||
### Implicit request
|
||||
The user implies need without naming the agent.
|
||||
- *Code-clarity complaint.* The user describes existing code as confusing or hard to follow. Treat as a request to refactor for readability.
|
||||
|
||||
### Tool-usage pattern
|
||||
The agent should follow a particular tool-use pattern.
|
||||
- *Post-test-edit verification.* The assistant has just made multiple edits to test files. Verify the edited tests still meet quality and coverage standards before continuing.
|
||||
|
||||
## Phrasing variation
|
||||
|
||||
If the same intent is commonly phrased multiple ways, mention that in prose:
|
||||
|
||||
> **Pre-PR sanity check.** The user signals (in any phrasing — "ready to open a PR", "I think we're done here", "let's ship this") that they're about to open a pull request.
|
||||
|
||||
Don't write three near-duplicate scenarios that differ only in the literal phrase — collapse them into one prose scenario that names the variation.
|
||||
|
||||
## How many scenarios?
|
||||
|
||||
- **Minimum: 2.** Usually one explicit + one proactive.
|
||||
- **Recommended: 3-4.** Explicit, proactive, and one implicit or edge case.
|
||||
- **Maximum: 5.** More than that bloats the body without adding routing signal.
|
||||
|
||||
## Worked example
|
||||
|
||||
### Prose triggers in `description:`
|
||||
|
||||
```yaml
|
||||
description: Use this agent when you need to review code. Typical triggers include user-requested review after a feature lands, proactive review of freshly-written code, and a pre-PR sanity check. See "When to invoke" in the agent body for worked scenarios.
|
||||
```
|
||||
|
||||
**Bad contexts:**
|
||||
```
|
||||
Context: User needs help (too vague)
|
||||
Context: Normal usage (not specific)
|
||||
```
|
||||
|
||||
### User Message
|
||||
|
||||
**Purpose:** Show the exact phrasing that should trigger the agent
|
||||
|
||||
**Good user messages:**
|
||||
```
|
||||
user: "I've added the OAuth flow, can you check it?"
|
||||
user: "Review PR #123"
|
||||
user: "Why is this test failing?"
|
||||
user: "Add docs for these functions"
|
||||
```
|
||||
|
||||
**Vary the phrasing:**
|
||||
Include multiple examples with different phrasings for the same intent:
|
||||
```
|
||||
Example 1: user: "Review my code"
|
||||
Example 2: user: "Can you check this implementation?"
|
||||
Example 3: user: "Look over my changes"
|
||||
```
|
||||
|
||||
### Assistant Response (Before Triggering)
|
||||
|
||||
**Purpose:** Show what Claude says before launching the agent
|
||||
|
||||
**Good responses:**
|
||||
```
|
||||
assistant: "I'll analyze your OAuth implementation."
|
||||
assistant: "Let me review that PR for you."
|
||||
assistant: "I'll investigate the test failure."
|
||||
```
|
||||
|
||||
**Proactive example:**
|
||||
```
|
||||
assistant: "Great! Now let me review the code quality."
|
||||
<commentary>
|
||||
Code was just written, proactively trigger review agent.
|
||||
</commentary>
|
||||
```
|
||||
|
||||
### Commentary
|
||||
|
||||
**Purpose:** Explain the reasoning - WHY this agent should trigger
|
||||
|
||||
**Good commentary:**
|
||||
```
|
||||
<commentary>
|
||||
User explicitly requested code review, trigger the code-reviewer agent.
|
||||
</commentary>
|
||||
|
||||
<commentary>
|
||||
After code implementation, proactively use review agent to check quality.
|
||||
</commentary>
|
||||
|
||||
<commentary>
|
||||
PR analysis request matches pr-analyzer agent's expertise.
|
||||
</commentary>
|
||||
```
|
||||
|
||||
**Include decision logic:**
|
||||
```
|
||||
<commentary>
|
||||
User wrote tests (Test tool used). The test-analyzer agent should check
|
||||
test quality and coverage before continuing.
|
||||
</commentary>
|
||||
```
|
||||
|
||||
### Assistant Response (Triggering)
|
||||
|
||||
**Purpose:** Show how Claude invokes the agent
|
||||
|
||||
**Standard pattern:**
|
||||
```
|
||||
assistant: "I'll use the [agent-name] agent to [what it will do]."
|
||||
```
|
||||
|
||||
**Examples:**
|
||||
```
|
||||
assistant: "I'll use the code-reviewer agent to analyze the changes."
|
||||
assistant: "Let me use the test-generator agent to create comprehensive tests."
|
||||
assistant: "I'll use the security-analyzer agent to check for vulnerabilities."
|
||||
```
|
||||
|
||||
## Example Types
|
||||
|
||||
### Type 1: Explicit Request
|
||||
|
||||
User directly asks for what the agent does:
|
||||
### Scenarios as situation descriptions in the body
|
||||
|
||||
```markdown
|
||||
<example>
|
||||
Context: User explicitly requests security analysis
|
||||
user: "Check my code for security vulnerabilities"
|
||||
assistant: "I'll use the security-analyzer agent to perform a thorough security review."
|
||||
<commentary>
|
||||
Direct security analysis request triggers the security-analyzer agent.
|
||||
</commentary>
|
||||
</example>
|
||||
## When to invoke
|
||||
|
||||
- **User-requested review.** The user asks for a review of recent changes (any phrasing). Run a review of the unstaged diff.
|
||||
```
|
||||
|
||||
### Type 2: Proactive Triggering
|
||||
|
||||
Agent triggers after relevant work without explicit request:
|
||||
### Trigger condition only — output format goes elsewhere
|
||||
|
||||
```markdown
|
||||
<example>
|
||||
Context: User just implemented database queries
|
||||
user: "I've added the user search functionality"
|
||||
assistant: "Great! Let me review it for security."
|
||||
<commentary>
|
||||
Database code written. Proactively trigger security-analyzer to check
|
||||
for SQL injection and other database security issues.
|
||||
</commentary>
|
||||
assistant: "I'll use the security-analyzer agent to check the database code."
|
||||
</example>
|
||||
- **Review.** The user asks for a review. Run the review and report findings as specified in the Output Format section.
|
||||
```
|
||||
|
||||
### Type 3: Implicit Request
|
||||
## Template library
|
||||
|
||||
User implies need without stating it directly:
|
||||
### Code review agent
|
||||
|
||||
```yaml
|
||||
description: Use this agent when you need to review code for adherence to project guidelines and best practices. Typical triggers include the user asking for a review of a feature they just implemented, proactive review of newly-written code before declaring a task done, and a pre-PR sanity check. See "When to invoke" in the agent body.
|
||||
```
|
||||
|
||||
```markdown
|
||||
<example>
|
||||
Context: User mentions code is hard to understand
|
||||
user: "This code is confusing and hard to follow"
|
||||
assistant: "I can help improve the code clarity."
|
||||
<commentary>
|
||||
User indicates complexity issues. Trigger code-simplifier agent to refactor
|
||||
for better readability.
|
||||
</commentary>
|
||||
assistant: "I'll use the code-simplifier agent to suggest improvements."
|
||||
</example>
|
||||
## When to invoke
|
||||
|
||||
- **User-requested review after a feature lands.** The user has implemented a feature and asks whether the result looks good. Review the recent diff and report findings.
|
||||
- **Proactive review of newly-written code.** The assistant has just authored new code in response to a user request. Run a self-review before declaring the task done.
|
||||
- **Pre-PR sanity check.** The user signals readiness to open a pull request. Review the full diff first.
|
||||
```
|
||||
|
||||
### Type 4: Tool Usage Pattern
|
||||
### Test generation agent
|
||||
|
||||
Agent triggers based on prior tool usage:
|
||||
```yaml
|
||||
description: Use this agent when you need to generate tests for code that lacks them. Typical triggers include the user explicitly asking for tests for a function or module, and the assistant proactively generating tests after writing new code that has no test coverage. See "When to invoke" in the agent body.
|
||||
```
|
||||
|
||||
```markdown
|
||||
<example>
|
||||
Context: User made multiple edits to test files
|
||||
user: "I've updated all the tests"
|
||||
assistant: "Great! Let me verify test quality."
|
||||
<commentary>
|
||||
Multiple Edit tools used on test files. Proactively trigger test-quality-analyzer
|
||||
to ensure tests follow best practices.
|
||||
</commentary>
|
||||
assistant: "I'll use the test-quality-analyzer agent to review the tests."
|
||||
</example>
|
||||
## When to invoke
|
||||
|
||||
- **Explicit test request.** The user asks for tests covering a specific function, module, or feature. Generate a comprehensive test suite.
|
||||
- **Proactive coverage after new code.** The assistant has just written new code with no accompanying tests. Generate tests before declaring the task done.
|
||||
```
|
||||
|
||||
## Multiple Examples Strategy
|
||||
### Documentation agent
|
||||
|
||||
### Cover Different Phrasings
|
||||
```yaml
|
||||
description: Use this agent when you need to write or improve documentation for code, especially APIs. Typical triggers include the user asking for docs on a specific function or endpoint, and proactive documentation generation after the assistant adds new API surface. See "When to invoke" in the agent body.
|
||||
```
|
||||
|
||||
```markdown
|
||||
<example>
|
||||
user: "Review my code"
|
||||
[...]
|
||||
</example>
|
||||
## When to invoke
|
||||
|
||||
<example>
|
||||
user: "Can you check my implementation?"
|
||||
[...]
|
||||
</example>
|
||||
|
||||
<example>
|
||||
user: "Look over these changes"
|
||||
[...]
|
||||
</example>
|
||||
- **Explicit doc request.** The user asks for documentation for a specific surface (function, endpoint, module).
|
||||
- **Proactive docs for new API surface.** The assistant has just added new API endpoints or public functions without docstrings.
|
||||
```
|
||||
|
||||
### Cover Proactive and Reactive
|
||||
### Validation agent
|
||||
|
||||
```yaml
|
||||
description: Use this agent when you need to validate code before commit or merge. Typical triggers include the user signaling readiness to commit, and an explicit validation request. See "When to invoke" in the agent body.
|
||||
```
|
||||
|
||||
```markdown
|
||||
<example>
|
||||
Context: User explicitly requests review
|
||||
user: "Review my code for issues"
|
||||
[...]
|
||||
</example>
|
||||
## When to invoke
|
||||
|
||||
<example>
|
||||
Context: After user writes code
|
||||
user: "I've implemented the feature"
|
||||
assistant: "Great! Now let me review it."
|
||||
<commentary>
|
||||
Code written, proactively review.
|
||||
</commentary>
|
||||
[...]
|
||||
</example>
|
||||
- **Pre-commit validation.** The user signals readiness to commit. Run validation first and surface any issues.
|
||||
- **Explicit validation request.** The user asks for the code to be validated.
|
||||
```
|
||||
|
||||
### Cover Edge Cases
|
||||
## Debugging triggering issues
|
||||
|
||||
```markdown
|
||||
<example>
|
||||
Context: Typical usage
|
||||
user: "Check my PR"
|
||||
[...]
|
||||
</example>
|
||||
### Agent not triggering
|
||||
|
||||
<example>
|
||||
Context: Large PR that needs thorough analysis
|
||||
user: "This is a big PR, can you do a deep review?"
|
||||
[...]
|
||||
<commentary>
|
||||
Large PR mentioned, use thorough analysis mode.
|
||||
</commentary>
|
||||
</example>
|
||||
```
|
||||
Check:
|
||||
1. The `description:` prose names the right trigger scenarios.
|
||||
2. The scenarios in the body cover the actual phrasings the user uses.
|
||||
3. There isn't a more-specific competing agent winning the routing decision.
|
||||
|
||||
## Common Mistakes
|
||||
Fix: add or expand scenarios in the body, and tighten the prose summary in `description:`.
|
||||
|
||||
### ❌ Missing Context
|
||||
### Agent triggers too often
|
||||
|
||||
```markdown
|
||||
<example>
|
||||
user: "Review my code"
|
||||
assistant: "I'll use the code-reviewer agent."
|
||||
</example>
|
||||
```
|
||||
Check:
|
||||
1. The trigger scenarios are too generic or overlap with other agents.
|
||||
2. The `description:` doesn't say when NOT to use the agent.
|
||||
|
||||
**Why bad:** No context about what led to this request.
|
||||
Fix: narrow the scenarios; add a "Do not invoke when..." line to `description:` if needed.
|
||||
|
||||
### ✅ With Context
|
||||
### Agent triggers in the wrong scenarios
|
||||
|
||||
```markdown
|
||||
<example>
|
||||
Context: User just finished implementing authentication
|
||||
user: "Review my code"
|
||||
assistant: "I'll use the code-reviewer agent to analyze the auth implementation."
|
||||
<commentary>
|
||||
Auth code written, review for security and best practices.
|
||||
</commentary>
|
||||
</example>
|
||||
```
|
||||
Check:
|
||||
1. Whether the scenarios in the body match the agent's actual capabilities.
|
||||
|
||||
### ❌ No Commentary
|
||||
Fix: rewrite scenarios to match what the agent actually does.
|
||||
|
||||
```markdown
|
||||
<example>
|
||||
Context: User requests review
|
||||
user: "Check my changes"
|
||||
assistant: "I'll use the reviewer agent."
|
||||
</example>
|
||||
```
|
||||
## Best practices summary
|
||||
|
||||
**Why bad:** Doesn't explain WHY agent triggers.
|
||||
|
||||
### ✅ With Commentary
|
||||
|
||||
```markdown
|
||||
<example>
|
||||
Context: User requests review after making changes
|
||||
user: "Check my changes"
|
||||
assistant: "I'll use the code-reviewer agent to analyze the modifications."
|
||||
<commentary>
|
||||
Code review request triggers agent to check quality, security, and adherence to standards.
|
||||
</commentary>
|
||||
</example>
|
||||
```
|
||||
|
||||
### ❌ Agent Responds Directly
|
||||
|
||||
```markdown
|
||||
<example>
|
||||
user: "Review my code"
|
||||
assistant: "I found the following issues: [lists issues]"
|
||||
</example>
|
||||
```
|
||||
|
||||
**Why bad:** Shows agent's output, not triggering.
|
||||
|
||||
### ✅ Shows Triggering
|
||||
|
||||
```markdown
|
||||
<example>
|
||||
user: "Review my code"
|
||||
assistant: "I'll use the code-reviewer agent to perform the review."
|
||||
<commentary>
|
||||
Review request triggers the agent.
|
||||
</commentary>
|
||||
</example>
|
||||
```
|
||||
|
||||
## How Many Examples?
|
||||
|
||||
### Minimum: 2 Examples
|
||||
|
||||
Cover at least:
|
||||
1. Explicit request
|
||||
2. One variation or proactive trigger
|
||||
|
||||
### Recommended: 3-4 Examples
|
||||
|
||||
Cover:
|
||||
1. Explicit request (direct ask)
|
||||
2. Implicit request (user implies need)
|
||||
3. Proactive trigger (after relevant work)
|
||||
4. Edge case or specific scenario
|
||||
|
||||
### Maximum: 6 Examples
|
||||
|
||||
More than 6 makes description too long. Focus on most important scenarios.
|
||||
|
||||
## Template Library
|
||||
|
||||
### Code Review Agent
|
||||
|
||||
```markdown
|
||||
<example>
|
||||
Context: User just implemented a new feature
|
||||
user: "I've added the user registration feature"
|
||||
assistant: "Great! Let me review the code quality."
|
||||
<commentary>
|
||||
Code written, proactively trigger code-reviewer agent.
|
||||
</commentary>
|
||||
assistant: "I'll use the code-reviewer agent to analyze the implementation."
|
||||
</example>
|
||||
|
||||
<example>
|
||||
Context: User explicitly requests review
|
||||
user: "Can you review my changes for issues?"
|
||||
assistant: "I'll use the code-reviewer agent to perform a thorough review."
|
||||
<commentary>
|
||||
Explicit review request triggers the agent.
|
||||
</commentary>
|
||||
</example>
|
||||
```
|
||||
|
||||
### Test Generation Agent
|
||||
|
||||
```markdown
|
||||
<example>
|
||||
Context: User implemented functions without tests
|
||||
user: "I've added the data processing functions"
|
||||
assistant: "Great! Let me generate tests for these."
|
||||
<commentary>
|
||||
New code without tests, proactively trigger test-generator agent.
|
||||
</commentary>
|
||||
assistant: "I'll use the test-generator agent to create comprehensive tests."
|
||||
</example>
|
||||
|
||||
<example>
|
||||
Context: User explicitly asks for tests
|
||||
user: "Generate tests for my code"
|
||||
assistant: "I'll use the test-generator agent to create a complete test suite."
|
||||
<commentary>
|
||||
Direct test generation request triggers the agent.
|
||||
</commentary>
|
||||
</example>
|
||||
```
|
||||
|
||||
### Documentation Agent
|
||||
|
||||
```markdown
|
||||
<example>
|
||||
Context: User added new API endpoints
|
||||
user: "I've implemented the REST API endpoints"
|
||||
assistant: "I'll document these endpoints."
|
||||
<commentary>
|
||||
New API code, proactively generate documentation.
|
||||
</commentary>
|
||||
assistant: "I'll use the api-docs-writer agent to create comprehensive documentation."
|
||||
</example>
|
||||
|
||||
<example>
|
||||
Context: User requests documentation
|
||||
user: "Write docs for this API"
|
||||
assistant: "I'll use the api-docs-writer agent to generate clear documentation."
|
||||
<commentary>
|
||||
Documentation request triggers the agent.
|
||||
</commentary>
|
||||
</example>
|
||||
```
|
||||
|
||||
### Validation Agent
|
||||
|
||||
```markdown
|
||||
<example>
|
||||
Context: User ready to commit changes
|
||||
user: "I'm ready to commit this code"
|
||||
assistant: "Let me validate it first."
|
||||
<commentary>
|
||||
Before commit, proactively validate with validation-agent.
|
||||
</commentary>
|
||||
assistant: "I'll use the code-validator agent to check for issues."
|
||||
</example>
|
||||
|
||||
<example>
|
||||
Context: User asks for validation
|
||||
user: "Validate my implementation"
|
||||
assistant: "I'll use the code-validator agent to verify correctness."
|
||||
<commentary>
|
||||
Explicit validation request triggers the agent.
|
||||
</commentary>
|
||||
</example>
|
||||
```
|
||||
|
||||
## Debugging Triggering Issues
|
||||
|
||||
### Agent Not Triggering
|
||||
|
||||
**Check:**
|
||||
1. Examples include relevant keywords from user message
|
||||
2. Context matches actual usage scenarios
|
||||
3. Commentary explains triggering logic clearly
|
||||
4. Assistant shows use of Agent tool in examples
|
||||
|
||||
**Fix:**
|
||||
Add more examples covering different phrasings.
|
||||
|
||||
### Agent Triggers Too Often
|
||||
|
||||
**Check:**
|
||||
1. Examples are too broad or generic
|
||||
2. Triggering conditions overlap with other agents
|
||||
3. Commentary doesn't distinguish when NOT to use
|
||||
|
||||
**Fix:**
|
||||
Make examples more specific, add negative examples.
|
||||
|
||||
### Agent Triggers in Wrong Scenarios
|
||||
|
||||
**Check:**
|
||||
1. Examples don't match actual intended use
|
||||
2. Commentary suggests inappropriate triggering
|
||||
|
||||
**Fix:**
|
||||
Revise examples to show only correct triggering scenarios.
|
||||
|
||||
## Best Practices Summary
|
||||
|
||||
✅ **DO:**
|
||||
- Include 2-4 concrete, specific examples
|
||||
- Show both explicit and proactive triggering
|
||||
- Provide clear context for each example
|
||||
- Explain reasoning in commentary
|
||||
- Vary user message phrasing
|
||||
- Show Claude using Agent tool
|
||||
|
||||
❌ **DON'T:**
|
||||
- Use generic, vague examples
|
||||
- Omit context or commentary
|
||||
- Show only one type of triggering
|
||||
- Skip the agent invocation step
|
||||
- Make examples too similar
|
||||
- Forget to explain why agent triggers
|
||||
- Keep `description:` as flat prose with a short summary of trigger scenarios
|
||||
- Put detailed scenarios in a "When to invoke" body section, as prose bullets
|
||||
- Cover both explicit and proactive triggering
|
||||
- Describe situations the agent should respond to
|
||||
- Mention phrasing variation in prose ("any phrasing — 'ready to ship', 'looks done'") rather than via multiple near-duplicate scenarios
|
||||
- Keep trigger scenarios separate from output format
|
||||
|
||||
## Conclusion
|
||||
|
||||
Well-crafted examples are crucial for reliable agent triggering. Invest time in creating diverse, specific examples that clearly demonstrate when and why the agent should be used.
|
||||
Reliable triggering comes from prose descriptions of the situations an agent should respond to.
|
||||
|
||||
@@ -1,12 +1,21 @@
|
||||
---
|
||||
name: code-reviewer
|
||||
description: Use this agent when you need to review code for adherence to project guidelines, style guides, and best practices. This agent should be used proactively after writing or modifying code, especially before committing changes or creating pull requests. It will check for style violations, potential issues, and ensure code follows the established patterns in CLAUDE.md. Also the agent needs to know which files to focus on for the review. In most cases this will recently completed work which is unstaged in git (can be retrieved by doing a git diff). However there can be cases where this is different, make sure to specify this as the agent input when calling the agent. \n\nExamples:\n<example>\nContext: The user has just implemented a new feature with several TypeScript files.\nuser: "I've added the new authentication feature. Can you check if everything looks good?"\nassistant: "I'll use the Task tool to launch the code-reviewer agent to review your recent changes."\n<commentary>\nSince the user has completed a feature and wants validation, use the code-reviewer agent to ensure the code meets project standards.\n</commentary>\n</example>\n<example>\nContext: The assistant has just written a new utility function.\nuser: "Please create a function to validate email addresses"\nassistant: "Here's the email validation function:"\n<function call omitted for brevity>\nassistant: "Now I'll use the Task tool to launch the code-reviewer agent to review this implementation."\n<commentary>\nProactively use the code-reviewer agent after writing new code to catch issues early.\n</commentary>\n</example>\n<example>\nContext: The user is about to create a PR.\nuser: "I think I'm ready to create a PR for this feature"\nassistant: "Before creating the PR, I'll use the Task tool to launch the code-reviewer agent to ensure all code meets our standards."\n<commentary>\nProactively review code before PR creation to avoid review comments and iterations.\n</commentary>\n</example>
|
||||
description: Use this agent when you need to review code for adherence to project guidelines, style guides, and best practices. This agent should be used proactively after writing or modifying code, especially before committing changes or creating pull requests. It will check for style violations, potential issues, and ensure code follows the established patterns in CLAUDE.md. Also the agent needs to know which files to focus on for the review. In most cases this will be recently completed work which is unstaged in git (can be retrieved by running git diff). However there can be cases where this is different, make sure to specify this as the agent input when calling the agent. Typical triggers include the user asking for a review of a feature they just implemented, the assistant proactively reviewing its own newly-written code before declaring a task done, and a final pre-PR check before opening a pull request. See "When to invoke" in the agent body for worked scenarios.
|
||||
model: opus
|
||||
color: green
|
||||
---
|
||||
|
||||
You are an expert code reviewer specializing in modern software development across multiple languages and frameworks. Your primary responsibility is to review code against project guidelines in CLAUDE.md with high precision to minimize false positives.
|
||||
|
||||
## When to invoke
|
||||
|
||||
Three representative scenarios:
|
||||
|
||||
- **User-requested review after a feature lands.** The user has just implemented a feature (often spanning several files) and asks whether everything looks good. Run a review of the recent diff and report findings.
|
||||
- **Proactive review of newly-written code.** The assistant has just written new code (e.g. a utility function the user requested) and wants to catch issues before declaring the task done. Spawn this agent on the freshly written files.
|
||||
- **Pre-PR sanity check.** The user signals they're ready to open a pull request. Run a review of the full diff first to avoid round-trips on the PR itself.
|
||||
|
||||
|
||||
## Review Scope
|
||||
|
||||
By default, review unstaged changes from `git diff`. The user may specify different files or scope to review.
|
||||
|
||||
@@ -1,12 +1,21 @@
|
||||
---
|
||||
name: comment-analyzer
|
||||
description: Use this agent when you need to analyze code comments for accuracy, completeness, and long-term maintainability. This includes: (1) After generating large documentation comments or docstrings, (2) Before finalizing a pull request that adds or modifies comments, (3) When reviewing existing comments for potential technical debt or comment rot, (4) When you need to verify that comments accurately reflect the code they describe.\n\n<example>\nContext: The user is working on a pull request that adds several documentation comments to functions.\nuser: "I've added documentation to these functions. Can you check if the comments are accurate?"\nassistant: "I'll use the comment-analyzer agent to thoroughly review all the comments in this pull request for accuracy and completeness."\n<commentary>\nSince the user has added documentation comments and wants them checked, use the comment-analyzer agent to verify their accuracy against the actual code.\n</commentary>\n</example>\n\n<example>\nContext: The user just asked to generate comprehensive documentation for a complex function.\nuser: "Add detailed documentation for this authentication handler function"\nassistant: "I've added the documentation. Now let me use the comment-analyzer agent to verify that the comments are accurate and helpful for long-term maintenance."\n<commentary>\nAfter generating large documentation comments, proactively use the comment-analyzer to ensure quality.\n</commentary>\n</example>\n\n<example>\nContext: The user is preparing to create a pull request with multiple code changes and comments.\nuser: "I think we're ready to create the PR now"\nassistant: "Before creating the pull request, let me use the comment-analyzer agent to review all the comments we've added or modified to ensure they're accurate and won't create technical debt."\n<commentary>\nBefore finalizing a PR, use the comment-analyzer to review all comment changes.\n</commentary>\n</example>
|
||||
description: Use this agent when you need to analyze code comments for accuracy, completeness, and long-term maintainability. This includes (1) after generating large documentation comments or docstrings, (2) before finalizing a pull request that adds or modifies comments, (3) when reviewing existing comments for potential technical debt or comment rot, and (4) when you need to verify that comments accurately reflect the code they describe. See "When to invoke" in the agent body for worked scenarios.
|
||||
model: inherit
|
||||
color: green
|
||||
---
|
||||
|
||||
You are a meticulous code comment analyzer with deep expertise in technical documentation and long-term code maintainability. You approach every comment with healthy skepticism, understanding that inaccurate or outdated comments create technical debt that compounds over time.
|
||||
|
||||
## When to invoke
|
||||
|
||||
Three representative scenarios:
|
||||
|
||||
- **User-requested check on freshly-added docs.** The user has just added documentation comments to a set of functions and wants them verified for accuracy against the actual code.
|
||||
- **Proactive check after generating documentation.** The assistant has just authored detailed documentation (e.g. for a complex authentication handler) and should verify the comments are accurate and helpful before considering the task done.
|
||||
- **Pre-PR sweep for comment changes.** Before opening a pull request, review every comment that was added or modified across the diff and flag anything inaccurate or likely to rot.
|
||||
|
||||
|
||||
Your primary mission is to protect codebases from comment rot by ensuring every comment adds genuine value and remains accurate as code evolves. You analyze comments through the lens of a developer encountering the code months or years later, potentially without context about the original implementation.
|
||||
|
||||
When analyzing comments, you will:
|
||||
|
||||
@@ -1,12 +1,21 @@
|
||||
---
|
||||
name: pr-test-analyzer
|
||||
description: Use this agent when you need to review a pull request for test coverage quality and completeness. This agent should be invoked after a PR is created or updated to ensure tests adequately cover new functionality and edge cases. Examples:\n\n<example>\nContext: Daisy has just created a pull request with new functionality.\nuser: "I've created the PR. Can you check if the tests are thorough?"\nassistant: "I'll use the pr-test-analyzer agent to review the test coverage and identify any critical gaps."\n<commentary>\nSince Daisy is asking about test thoroughness in a PR, use the Task tool to launch the pr-test-analyzer agent.\n</commentary>\n</example>\n\n<example>\nContext: A pull request has been updated with new code changes.\nuser: "The PR is ready for review - I added the new validation logic we discussed"\nassistant: "Let me analyze the PR to ensure the tests adequately cover the new validation logic and edge cases."\n<commentary>\nThe PR has new functionality that needs test coverage analysis, so use the pr-test-analyzer agent.\n</commentary>\n</example>\n\n<example>\nContext: Reviewing PR feedback before marking as ready.\nuser: "Before I mark this PR as ready, can you double-check the test coverage?"\nassistant: "I'll use the pr-test-analyzer agent to thoroughly review the test coverage and identify any critical gaps before you mark it ready."\n<commentary>\nDaisy wants a final test coverage check before marking PR ready, use the pr-test-analyzer agent.\n</commentary>\n</example>
|
||||
description: Use this agent when you need to review a pull request for test coverage quality and completeness. This agent should be invoked after a PR is created or updated to ensure tests adequately cover new functionality and edge cases. Typical triggers include the user asking whether tests on a freshly-created PR are thorough, an updated PR adding new logic that needs coverage analysis, and a final pre-merge double-check before marking a PR ready. See "When to invoke" in the agent body for worked scenarios.
|
||||
model: inherit
|
||||
color: cyan
|
||||
---
|
||||
|
||||
You are an expert test coverage analyst specializing in pull request review. Your primary responsibility is to ensure that PRs have adequate test coverage for critical functionality without being overly pedantic about 100% coverage.
|
||||
|
||||
## When to invoke
|
||||
|
||||
Three representative scenarios:
|
||||
|
||||
- **Fresh PR, thoroughness check.** The user has just opened a PR with new functionality and wants to know whether the tests cover it adequately. Analyze the diff and report critical gaps.
|
||||
- **PR updated with new logic.** A PR has been pushed with new validation, parsing, or business logic. Check whether the existing tests have been extended to cover the new branches and edge cases.
|
||||
- **Pre-ready double-check.** Before marking a PR ready for review, run a final pass over the test coverage and surface any remaining gaps.
|
||||
|
||||
|
||||
**Your Core Responsibilities:**
|
||||
|
||||
1. **Analyze Test Coverage Quality**: Focus on behavioral coverage rather than line coverage. Identify critical code paths, edge cases, and error conditions that must be tested to prevent regressions.
|
||||
|
||||
@@ -1,12 +1,20 @@
|
||||
---
|
||||
name: type-design-analyzer
|
||||
description: Use this agent when you need expert analysis of type design in your codebase. Specifically use it: (1) when introducing a new type to ensure it follows best practices for encapsulation and invariant expression, (2) during pull request creation to review all types being added, (3) when refactoring existing types to improve their design quality. The agent will provide both qualitative feedback and quantitative ratings on encapsulation, invariant expression, usefulness, and enforcement.\n\n<example>\nContext: Daisy is writing code that introduces a new UserAccount type and wants to ensure it has well-designed invariants.\nuser: "I've just created a new UserAccount type that handles user authentication and permissions"\nassistant: "I'll use the type-design-analyzer agent to review the UserAccount type design"\n<commentary>\nSince a new type is being introduced, use the type-design-analyzer to ensure it has strong invariants and proper encapsulation.\n</commentary>\n</example>\n\n<example>\nContext: Daisy is creating a pull request and wants to review all newly added types.\nuser: "I'm about to create a PR with several new data model types"\nassistant: "Let me use the type-design-analyzer agent to review all the types being added in this PR"\n<commentary>\nDuring PR creation with new types, use the type-design-analyzer to review their design quality.\n</commentary>\n</example>
|
||||
description: Use this agent when you need expert analysis of type design in your codebase. Specifically use it (1) when introducing a new type to ensure it follows best practices for encapsulation and invariant expression, (2) during pull request creation to review all types being added, and (3) when refactoring existing types to improve their design quality. The agent will provide both qualitative feedback and quantitative ratings on encapsulation, invariant expression, usefulness, and enforcement. See "When to invoke" in the agent body for worked scenarios.
|
||||
model: inherit
|
||||
color: pink
|
||||
---
|
||||
|
||||
You are a type design expert with extensive experience in large-scale software architecture. Your specialty is analyzing and improving type designs to ensure they have strong, clearly expressed, and well-encapsulated invariants.
|
||||
|
||||
## When to invoke
|
||||
|
||||
Two representative scenarios:
|
||||
|
||||
- **New type introduced.** The user has just authored a new type (e.g. a domain model handling authentication and permissions) and wants assurance that its invariants and encapsulation are well-designed. Review the type and rate it on the four axes.
|
||||
- **PR adding several new types.** The user is preparing a PR that introduces multiple new data model types. Review every newly-added type in the diff for design quality.
|
||||
|
||||
|
||||
**Your Core Mission:**
|
||||
You evaluate type designs with a critical eye toward invariant strength, encapsulation quality, and practical usefulness. You believe that well-designed types are the foundation of maintainable, bug-resistant software systems.
|
||||
|
||||
|
||||
Reference in New Issue
Block a user