Compare commits

...

20 Commits

Author SHA1 Message Date
Morgan Westlee Lunt
bdca23e8e4 Add code-modernization plugin
Structured workflow (assess → map → extract-rules → reimagine → transform →
harden) and specialist agents (legacy-analyst, business-rules-extractor,
architecture-critic, security-auditor, test-engineer) for modernizing legacy
codebases into current stacks.
2026-04-24 19:52:02 +00:00
Bryan Thompson
020446a429 Add quarkus-agent plugin (#1534) 2026-04-23 22:45:48 +01:00
Bryan Thompson
740e9d5513 Add vanta-mcp-plugin (#1563) 2026-04-23 22:29:25 +01:00
Noah Zweben
5a71459c03 telegram: gate /start, /help, /status behind dmPolicy (#894)
The bot command handlers bypassed access control — they responded to
any DM user regardless of dmPolicy, leaking bot presence and
contradicting ACCESS.md's "Drop silently. No reply." contract for
allowlist mode.

Add dmCommandGate() that applies the same disabled/allowlist checks
as gate() without the pairing side effects, and route all three
handlers through it. Also prune expired pending codes before /status
iterates them.

Fixes #854

Co-authored-by: Claude <noreply@anthropic.com>
2026-04-23 12:02:34 -07:00
Noah Zweben
ae54b113c4 Add Apache 2.0 LICENSE to math-olympiad plugin (#868)
Co-authored-by: Claude <noreply@anthropic.com>
2026-04-23 12:02:30 -07:00
jschwar2552
2a40fd2e7c skill-creator: sync from anthropics/skills (drop ANTHROPIC_API_KEY requirement) (#1523)
Ports anthropics/skills#547 (b0cbd3d) so this repo matches the upstream
skills repo.

improve_description.py and run_loop.py now shell out to `claude -p` instead
of using the Anthropic SDK directly, so the description optimizer uses the
session's existing Claude Code auth and no longer requires a separate
ANTHROPIC_API_KEY. SKILL.md drops the stale extended-thinking reference and
adds guidance for updating an existing skill.

Several enterprise customers sync exclusively from this repo (not
anthropics/skills, whose README disclaims production use), so they have been
stuck on the old SDK-based path.
2026-04-23 12:02:26 -07:00
Bryan Thompson
95f6172405 Add zscaler plugin (#1552) 2026-04-23 12:01:19 -07:00
Bryan Thompson
7bbdb8434e Add data-agent-kit-starter-pack plugin (#1551) 2026-04-23 12:01:12 -07:00
Bryan Thompson
4bbf944de1 Add atlassian-forge-skills plugin (#1539) 2026-04-23 12:01:06 -07:00
Bryan Thompson
06830b2ccd Add apollo plugin (#1538) 2026-04-23 12:01:00 -07:00
Bryan Thompson
bd6f1d7f48 Add windsor-ai plugin (#1536) 2026-04-23 12:00:53 -07:00
Bryan Thompson
808e70ffb9 Add auth0 plugin (#1535) 2026-04-23 12:00:47 -07:00
Bryan Thompson
187a267738 Add cloud-sql-postgresql plugin (#1533)
* Add cloud-sql-postgresql plugin

* Remove SHA pin from cloud-sql-postgresql entry
2026-04-23 12:00:37 -07:00
Bryan Thompson
42e980340d Add alloydb plugin (#1532)
* Add alloydb plugin

* Remove SHA pin from alloydb entry
2026-04-23 12:00:31 -07:00
Bryan Thompson
c15eada2e9 Add qt-development-skills plugin (#1519) 2026-04-23 12:00:25 -07:00
Bryan Thompson
f9f07aa2d3 Add versori-skills plugin (#1501) 2026-04-23 12:00:18 -07:00
Bryan Thompson
81952cabc5 Merge pull request #1499 from anthropics/add-exa
Add exa plugin
2026-04-23 13:59:15 -05:00
Bryan Thompson
0852f6647a Merge pull request #1437 from anthropics/rename-azure-skills-to-azure
Rename azure-skills to azure
2026-04-23 13:58:58 -05:00
Bryan Thompson
b0724d7a16 Rename azure-skills to azure per developer request 2026-04-22 06:42:35 -05:00
Bryan Thompson
f1938a2dc2 Add exa plugin 2026-04-20 08:03:24 -05:00
20 changed files with 1338 additions and 81 deletions

View File

@@ -71,6 +71,19 @@
},
"homepage": "https://github.com/AikidoSec/aikido-claude-plugin"
},
{
"name": "alloydb",
"description": "Create, connect, and interact with an AlloyDB for PostgreSQL database and data.",
"author": {
"name": "Google LLC"
},
"category": "database",
"source": {
"source": "url",
"url": "https://github.com/gemini-cli-extensions/alloydb.git"
},
"homepage": "https://cloud.google.com/alloydb"
},
{
"name": "amazon-location-service",
"description": "Guide developers through adding maps, places search, geocoding, routing, and other geospatial features with Amazon Location Service, including authentication setup, SDK integration, and best practices.",
@@ -95,6 +108,19 @@
"category": "monitoring",
"homepage": "https://github.com/amplitude/mcp-marketplace"
},
{
"name": "apollo",
"description": "Prospect, enrich leads, load outreach sequences, and query sales analytics with Apollo.io — one-click MCP server integration for Claude Code and Cowork.",
"author": {
"name": "Apollo.io"
},
"category": "productivity",
"source": {
"source": "url",
"url": "https://github.com/apolloio/apollo-mcp-plugin.git"
},
"homepage": "https://www.apollo.io/"
},
{
"name": "asana",
"description": "Asana project management integration. Create and manage tasks, search projects, update assignments, track progress, and integrate your development workflow with Asana's work management platform.",
@@ -133,6 +159,19 @@
},
"homepage": "https://github.com/atlassian/atlassian-mcp-server"
},
{
"name": "atlassian-forge-skills",
"description": "Forge-focused skill bundle and MCP tooling for Atlassian Forge: scaffold apps, review before deploy, debug production issues, and stay current on Forge APIs and the Atlassian Design System.",
"author": {
"name": "Atlassian Labs"
},
"category": "development",
"source": {
"source": "url",
"url": "https://github.com/atlassian/forge-skills.git"
},
"homepage": "https://developer.atlassian.com"
},
{
"name": "atomic-agents",
"description": "Comprehensive development workflow for building AI agents with the Atomic Agents framework. Includes specialized agents for schema design, architecture planning, code review, and tool development. Features guided workflows, progressive-disclosure skills, and best practice validation.",
@@ -147,6 +186,21 @@
"community-managed"
]
},
{
"name": "auth0",
"description": "Add authentication to any app with Auth0. This plugin detects your framework, scaffolds the right Auth0 SDK integration, and guides you through login, logout, sessions, and protected routes — using current SDK patterns.",
"author": {
"name": "Auth0"
},
"category": "security",
"source": {
"source": "git-subdir",
"url": "https://github.com/auth0/agent-skills.git",
"path": "plugins/auth0",
"ref": "main"
},
"homepage": "https://auth0.com/docs/quickstart/agent-skills"
},
{
"name": "autofix-bot",
"description": "Code review agent that detects security vulnerabilities, code quality issues, and hardcoded secrets. Combines 5,000+ static analyzers to scan your code and dependencies for CVEs.",
@@ -181,6 +235,16 @@
},
"homepage": "https://github.com/awslabs/agent-plugins"
},
{
"name": "azure",
"description": "Transform Claude into an Azure expert. This plugin integrates the Azure MCP server and specialized Azure skills to move beyond generic advice. It enables Claude to perform real-world tasks: listing resources, validating deployments, diagnosing infrastructure issues, and optimizing costs across 50+ Azure services.",
"category": "deployment",
"source": {
"source": "url",
"url": "https://github.com/microsoft/azure-skills.git"
},
"homepage": "https://github.com/microsoft/azure-skills"
},
{
"name": "azure-cosmos-db-assistant",
"source": {
@@ -192,16 +256,6 @@
"category": "database",
"homepage": "https://github.com/AzureCosmosDB/cosmosdb-claude-code-plugin"
},
{
"name": "azure-skills",
"description": "Microsoft Azure MCP integration for cloud resource management, deployments, and Azure services. Manage your Azure infrastructure, monitor applications, and deploy resources directly from Claude Code.",
"category": "deployment",
"source": {
"source": "url",
"url": "https://github.com/microsoft/azure-skills.git"
},
"homepage": "https://github.com/microsoft/azure-skills"
},
{
"name": "base44",
"description": "Build and deploy Base44 full-stack apps with CLI project management and JavaScript/TypeScript SDK development skills",
@@ -334,6 +388,19 @@
"category": "productivity",
"homepage": "https://github.com/anthropics/claude-plugins-official/tree/main/plugins/claude-md-management"
},
{
"name": "cloud-sql-postgresql",
"description": "Create, connect, and interact with a Cloud SQL for PostgreSQL database and data.",
"author": {
"name": "Google LLC"
},
"category": "database",
"source": {
"source": "url",
"url": "https://github.com/gemini-cli-extensions/cloud-sql-postgresql.git"
},
"homepage": "https://cloud.google.com/sql"
},
{
"name": "cloudflare",
"source": {
@@ -368,6 +435,17 @@
},
"homepage": "https://github.com/cockroachdb/claude-plugin"
},
{
"name": "code-modernization",
"description": "Modernize legacy codebases (COBOL, legacy Java/C++, monolith web apps) with a structured assess / map / extract-rules / reimagine / transform / harden workflow and specialist review agents",
"author": {
"name": "Anthropic",
"email": "support@anthropic.com"
},
"source": "./plugins/code-modernization",
"category": "development",
"homepage": "https://github.com/anthropics/claude-plugins-official/tree/main/plugins/code-modernization"
},
{
"name": "code-review",
"description": "Automated code review for pull requests using multiple specialized agents with confidence-based scoring to filter false positives",
@@ -452,6 +530,19 @@
},
"homepage": "https://github.com/astronomer/agents"
},
{
"name": "data-agent-kit-starter-pack",
"description": "Specialized suite of skills for data engineers on Google Cloud — architect data pipelines, transform data with dbt, write Spark and BigQuery SQL notebooks, and orchestrate end-to-end workflows across BigQuery, Spanner, BigLake, and Dataproc.",
"author": {
"name": "Google LLC"
},
"category": "development",
"source": {
"source": "url",
"url": "https://github.com/gemini-cli-extensions/data-agent-kit-starter-pack.git"
},
"homepage": "https://cloud.google.com/bigquery"
},
{
"name": "data-engineering",
"description": "Data engineering plugin - warehouse exploration, pipeline authoring, Airflow integration",
@@ -527,6 +618,20 @@
},
"homepage": "https://elixir-lsp.github.io/elixir-ls/"
},
{
"name": "exa",
"description": "Exa AI web search, deep research, and content extraction. Provides MCP tools and research skills for comprehensive web search, people discovery, company research, academic papers, and more.",
"author": {
"name": "Exa"
},
"category": "productivity",
"source": {
"source": "url",
"url": "https://github.com/exa-labs/exa-mcp-server.git",
"sha": "bd2ccdd52ca7a35fbc2207ad266bb2a961c0e793"
},
"homepage": "https://exa.ai/docs/reference/exa-mcp"
},
{
"name": "explanatory-output-style",
"description": "Adds educational insights about implementation choices and codebase patterns (mimics the deprecated Explanatory output style)",
@@ -1247,6 +1352,33 @@
},
"homepage": "https://github.com/qodo-ai/qodo-skills.git"
},
{
"name": "qt-development-skills",
"description": "Agentic engineering skills for Qt software development — Qt C++/QML code review, QML coding, and Qt C++/QML code documentation.",
"author": {
"name": "Qt Group"
},
"category": "development",
"source": {
"source": "url",
"url": "https://github.com/TheQtCompanyRnD/agent-skills.git",
"sha": "62a98e2339e6eefcff108cfc3fe9db8a7301856c"
},
"homepage": "https://www.qt.io/"
},
{
"name": "quarkus-agent",
"description": "MCP server for AI coding agents to create, manage, and interact with Quarkus applications. Provides tools for project scaffolding, dev mode lifecycle, extension skills, Dev MCP proxy, and documentation search.",
"author": {
"name": "Quarkus"
},
"category": "development",
"source": {
"source": "url",
"url": "https://github.com/quarkusio/quarkus-agent-mcp.git"
},
"homepage": "https://quarkus.io"
},
{
"name": "railway",
"description": "Deploy and manage apps, databases, and infrastructure on Railway. Covers project setup, deploys, environment configuration, networking, troubleshooting, and monitoring.",
@@ -1688,6 +1820,20 @@
},
"homepage": "https://github.com/UI5/plugins-claude"
},
{
"name": "vanta-mcp-plugin",
"description": "The Vanta plugin connects Claude Code to Vanta's security and compliance platform through the Vanta MCP server. It combines Vanta's test-specific remediation intelligence with your local repository context to help you fix compliance failures faster.",
"author": {
"name": "Vanta"
},
"category": "security",
"source": {
"source": "url",
"url": "https://github.com/VantaInc/vanta-mcp-plugin.git",
"sha": "46e5bebf0484f08fc4a3c4054437cf5ec06298c9"
},
"homepage": "https://help.vanta.com/en/articles/14094979-connecting-to-vanta-mcp#h_887ce3f337"
},
{
"name": "vercel",
"description": "Vercel deployment platform integration. Manage deployments, check build status, access logs, configure domains, and control your frontend infrastructure directly from Claude Code.",
@@ -1698,6 +1844,20 @@
},
"homepage": "https://github.com/vercel/vercel-plugin"
},
{
"name": "versori-skills",
"description": "Skills for building data integrations using the Versori platform and versori-run SDK. Claude can bootstrap projects, configure systems and connections, generate type-safe TypeScript workflows, run local validation via Deno, and deploy to production — with a research-first approach that grounds code generation in gathered API documentation.",
"author": {
"name": "Versori"
},
"category": "development",
"source": {
"source": "url",
"url": "https://github.com/versori/cli.git",
"sha": "134cf334c3065509eee39a5361fd0bcf969dc867"
},
"homepage": "https://docs.versori.com/latest/ai-tooling/overview"
},
{
"name": "voila-api",
"description": "Definitive guide for the Voila API. Covers shipment creation (Manual/Smart Shipping), real-time tracking, detailed history, manifesting, collections, webhooks, and third-party integrations (Sorted, Peoplevox, Mintsoft, Veeqo, JD).",
@@ -1708,6 +1868,20 @@
},
"homepage": "https://github.com/TSedmanDC/Voila-API-Skill"
},
{
"name": "windsor-ai",
"description": "Connect Claude Code to 325+ business data sources via Windsor.ai. Query marketing, sales, CRM, ecommerce, finance, and analytics data from Google Ads, Meta, HubSpot, Salesforce, Shopify, Stripe, and hundreds more — directly from your terminal.",
"author": {
"name": "Windsor.ai"
},
"category": "productivity",
"source": {
"source": "url",
"url": "https://github.com/windsor-ai/claude-windsor-ai-plugin.git",
"sha": "248a6994b15b410cc025b105bb4ed5558e9b1af9"
},
"homepage": "https://windsor.ai"
},
{
"name": "wix",
"description": "Build, manage, and deploy Wix sites and apps. CLI development skills for dashboard extensions, backend APIs, site widgets, and service plugins with the Wix Design System, plus MCP server for site management.",
@@ -1751,6 +1925,19 @@
"url": "https://github.com/zoom/zoom-plugin.git"
},
"homepage": "https://developers.zoom.us/"
},
{
"name": "zscaler",
"description": "Manage Zscaler cloud security platform including ZPA (private access), ZIA (internet access), ZDX (digital experience), ZCC (client connector), EASM (attack surface), and Z-Insights (analytics). Create and manage policies, troubleshoot connectivity, audit security configurations, and investigate incidents across the full Zscaler ecosystem.",
"author": {
"name": "Zscaler"
},
"category": "security",
"source": {
"source": "url",
"url": "https://github.com/zscaler/zscaler-mcp-server.git"
},
"homepage": "https://github.com/zscaler/zscaler-mcp-server"
}
]
}

View File

@@ -284,6 +284,19 @@ function gate(ctx: Context): GateResult {
return { action: 'drop' }
}
// Like gate() but for bot commands: no pairing side effects, just allow/drop.
function dmCommandGate(ctx: Context): { access: Access; senderId: string } | null {
if (ctx.chat?.type !== 'private') return null
if (!ctx.from) return null
const senderId = String(ctx.from.id)
const access = loadAccess()
const pruned = pruneExpired(access)
if (pruned) saveAccess(access)
if (access.dmPolicy === 'disabled') return null
if (access.dmPolicy === 'allowlist' && !access.allowFrom.includes(senderId)) return null
return { access, senderId }
}
function isMentioned(ctx: Context, extraPatterns?: string[]): boolean {
const entities = ctx.message?.entities ?? ctx.message?.caption_entities ?? []
const text = ctx.message?.text ?? ctx.message?.caption ?? ''
@@ -669,12 +682,7 @@ setInterval(() => {
// the gate's behavior for unrecognized groups.
bot.command('start', async ctx => {
if (ctx.chat?.type !== 'private') return
const access = loadAccess()
if (access.dmPolicy === 'disabled') {
await ctx.reply(`This bot isn't accepting new connections.`)
return
}
if (!dmCommandGate(ctx)) return
await ctx.reply(
`This bot bridges Telegram to a Claude Code session.\n\n` +
`To pair:\n` +
@@ -685,7 +693,7 @@ bot.command('start', async ctx => {
})
bot.command('help', async ctx => {
if (ctx.chat?.type !== 'private') return
if (!dmCommandGate(ctx)) return
await ctx.reply(
`Messages you send here route to a paired Claude Code session. ` +
`Text and photos are forwarded; replies and reactions come back.\n\n` +
@@ -695,14 +703,12 @@ bot.command('help', async ctx => {
})
bot.command('status', async ctx => {
if (ctx.chat?.type !== 'private') return
const from = ctx.from
if (!from) return
const senderId = String(from.id)
const access = loadAccess()
const gated = dmCommandGate(ctx)
if (!gated) return
const { access, senderId } = gated
if (access.allowFrom.includes(senderId)) {
const name = from.username ? `@${from.username}` : senderId
const name = ctx.from!.username ? `@${ctx.from!.username}` : senderId
await ctx.reply(`Paired as ${name}.`)
return
}

View File

@@ -0,0 +1,8 @@
{
"name": "code-modernization",
"description": "Modernize legacy codebases (COBOL, legacy Java/C++, monolith web apps) with a structured assess → map → extract-rules → reimagine → transform → harden workflow and specialist review agents",
"author": {
"name": "Anthropic",
"email": "support@anthropic.com"
}
}

View File

@@ -0,0 +1,202 @@
Apache License
Version 2.0, January 2004
http://www.apache.org/licenses/
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
1. Definitions.
"License" shall mean the terms and conditions for use, reproduction,
and distribution as defined by Sections 1 through 9 of this document.
"Licensor" shall mean the copyright owner or entity authorized by
the copyright owner that is granting the License.
"Legal Entity" shall mean the union of the acting entity and all
other entities that control, are controlled by, or are under common
control with that entity. For the purposes of this definition,
"control" means (i) the power, direct or indirect, to cause the
direction or management of such entity, whether by contract or
otherwise, or (ii) ownership of fifty percent (50%) or more of the
outstanding shares, or (iii) beneficial ownership of such entity.
"You" (or "Your") shall mean an individual or Legal Entity
exercising permissions granted by this License.
"Source" form shall mean the preferred form for making modifications,
including but not limited to software source code, documentation
source, and configuration files.
"Object" form shall mean any form resulting from mechanical
transformation or translation of a Source form, including but
not limited to compiled object code, generated documentation,
and conversions to other media types.
"Work" shall mean the work of authorship, whether in Source or
Object form, made available under the License, as indicated by a
copyright notice that is included in or attached to the work
(an example is provided in the Appendix below).
"Derivative Works" shall mean any work, whether in Source or Object
form, that is based on (or derived from) the Work and for which the
editorial revisions, annotations, elaborations, or other modifications
represent, as a whole, an original work of authorship. For the purposes
of this License, Derivative Works shall not include works that remain
separable from, or merely link (or bind by name) to the interfaces of,
the Work and Derivative Works thereof.
"Contribution" shall mean any work of authorship, including
the original version of the Work and any modifications or additions
to that Work or Derivative Works thereof, that is intentionally
submitted to Licensor for inclusion in the Work by the copyright owner
or by an individual or Legal Entity authorized to submit on behalf of
the copyright owner. For the purposes of this definition, "submitted"
means any form of electronic, verbal, or written communication sent
to the Licensor or its representatives, including but not limited to
communication on electronic mailing lists, source code control systems,
and issue tracking systems that are managed by, or on behalf of, the
Licensor for the purpose of discussing and improving the Work, but
excluding communication that is conspicuously marked or otherwise
designated in writing by the copyright owner as "Not a Contribution."
"Contributor" shall mean Licensor and any individual or Legal Entity
on behalf of whom a Contribution has been received by Licensor and
subsequently incorporated within the Work.
2. Grant of Copyright License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
copyright license to reproduce, prepare Derivative Works of,
publicly display, publicly perform, sublicense, and distribute the
Work and such Derivative Works in Source or Object form.
3. Grant of Patent License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
(except as stated in this section) patent license to make, have made,
use, offer to sell, sell, import, and otherwise transfer the Work,
where such license applies only to those patent claims licensable
by such Contributor that are necessarily infringed by their
Contribution(s) alone or by combination of their Contribution(s)
with the Work to which such Contribution(s) was submitted. If You
institute patent litigation against any entity (including a
cross-claim or counterclaim in a lawsuit) alleging that the Work
or a Contribution incorporated within the Work constitutes direct
or contributory patent infringement, then any patent licenses
granted to You under this License for that Work shall terminate
as of the date such litigation is filed.
4. Redistribution. You may reproduce and distribute copies of the
Work or Derivative Works thereof in any medium, with or without
modifications, and in Source or Object form, provided that You
meet the following conditions:
(a) You must give any other recipients of the Work or
Derivative Works a copy of this License; and
(b) You must cause any modified files to carry prominent notices
stating that You changed the files; and
(c) You must retain, in the Source form of any Derivative Works
that You distribute, all copyright, patent, trademark, and
attribution notices from the Source form of the Work,
excluding those notices that do not pertain to any part of
the Derivative Works; and
(d) If the Work includes a "NOTICE" text file as part of its
distribution, then any Derivative Works that You distribute must
include a readable copy of the attribution notices contained
within such NOTICE file, excluding those notices that do not
pertain to any part of the Derivative Works, in at least one
of the following places: within a NOTICE text file distributed
as part of the Derivative Works; within the Source form or
documentation, if provided along with the Derivative Works; or,
within a display generated by the Derivative Works, if and
wherever such third-party notices normally appear. The contents
of the NOTICE file are for informational purposes only and
do not modify the License. You may add Your own attribution
notices within Derivative Works that You distribute, alongside
or as an addendum to the NOTICE text from the Work, provided
that such additional attribution notices cannot be construed
as modifying the License.
You may add Your own copyright statement to Your modifications and
may provide additional or different license terms and conditions
for use, reproduction, or distribution of Your modifications, or
for any such Derivative Works as a whole, provided Your use,
reproduction, and distribution of the Work otherwise complies with
the conditions stated in this License.
5. Submission of Contributions. Unless You explicitly state otherwise,
any Contribution intentionally submitted for inclusion in the Work
by You to the Licensor shall be under the terms and conditions of
this License, without any additional terms or conditions.
Notwithstanding the above, nothing herein shall supersede or modify
the terms of any separate license agreement you may have executed
with Licensor regarding such Contributions.
6. Trademarks. This License does not grant permission to use the trade
names, trademarks, service marks, or product names of the Licensor,
except as required for reasonable and customary use in describing the
origin of the Work and reproducing the content of the NOTICE file.
7. Disclaimer of Warranty. Unless required by applicable law or
agreed to in writing, Licensor provides the Work (and each
Contributor provides its Contributions) on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
implied, including, without limitation, any warranties or conditions
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
PARTICULAR PURPOSE. You are solely responsible for determining the
appropriateness of using or redistributing the Work and assume any
risks associated with Your exercise of permissions under this License.
8. Limitation of Liability. In no event and under no legal theory,
whether in tort (including negligence), contract, or otherwise,
unless required by applicable law (such as deliberate and grossly
negligent acts) or agreed to in writing, shall any Contributor be
liable to You for damages, including any direct, indirect, special,
incidental, or consequential damages of any character arising as a
result of this License or out of the use or inability to use the
Work (including but not limited to damages for loss of goodwill,
work stoppage, computer failure or malfunction, or any and all
other commercial damages or losses), even if such Contributor
has been advised of the possibility of such damages.
9. Accepting Warranty or Additional Liability. While redistributing
the Work or Derivative Works thereof, You may choose to offer,
and charge a fee for, acceptance of support, warranty, indemnity,
or other liability obligations and/or rights consistent with this
License. However, in accepting such obligations, You may act only
on Your own behalf and on Your sole responsibility, not on behalf
of any other Contributor, and only if You agree to indemnify,
defend, and hold each Contributor harmless for any liability
incurred by, or claims asserted against, such Contributor by reason
of your accepting any such warranty or additional liability.
END OF TERMS AND CONDITIONS
APPENDIX: How to apply the Apache License to your work.
To apply the Apache License to your work, attach the following
boilerplate notice, with the fields enclosed by brackets "[]"
replaced with your own identifying information. (Don't include
the brackets!) The text should be enclosed in the appropriate
comment syntax for the file format. We also recommend that a
file or class name and description of purpose be included on the
same "printed page" as the copyright notice for easier
identification within third-party archives.
Copyright [yyyy] [name of copyright owner]
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.

View File

@@ -0,0 +1,107 @@
# Code Modernization Plugin
A structured workflow and set of specialist agents for modernizing legacy codebases — COBOL, legacy Java/C++, monolith web apps — into current stacks while preserving behavior.
## Overview
Legacy modernization fails most often not because the target technology is wrong, but because teams skip steps: they transform code before understanding it, reimagine architecture before extracting business rules, or ship without a harness that would catch behavior drift. This plugin enforces a sequence:
```
assess → map → extract-rules → reimagine → transform → harden
```
Each step has a dedicated slash command. Specialist agents (legacy analyst, business rules extractor, architecture critic, security auditor, test engineer) are invoked from within those commands — or directly — to keep the work honest.
## Commands
The commands are designed to be run in order, but each produces a standalone artifact so you can stop, review, and resume.
### `/modernize-brief`
Capture the modernization brief: what's being modernized, why now, constraints (regulatory, data, runtime), non-goals, and success criteria. Produces `analysis/brief.md`. Run this first.
### `/modernize-assess`
Inventory the legacy codebase: languages, line counts, module boundaries, external integrations, build system, test coverage, known pain points. Produces `analysis/assessment.md`. Uses the `legacy-analyst` agent for deep reads on unfamiliar dialects.
### `/modernize-map`
Map the legacy structure onto a target architecture: which legacy modules become which target services/packages, data-flow diagrams, migration sequencing. Produces `analysis/map.md`. Uses the `architecture-critic` agent to pressure-test the design.
### `/modernize-extract-rules`
Extract business rules from the legacy code — the rules that are encoded in procedural logic, COBOL copybooks, stored procedures, or config files — into human-readable form with citations back to source. Produces `analysis/rules.md`. Uses the `business-rules-extractor` agent.
### `/modernize-reimagine`
Propose the target design: APIs, data model, runtime. Explicitly list what changes from legacy and what stays identical. Produces `analysis/design.md`. Uses the `architecture-critic` agent to challenge over-engineering.
### `/modernize-transform`
Do the actual code transformation — module by module. Writes to `modernized/`. Pairs each transformed module with a test suite that pins the pre-transform behavior.
### `/modernize-harden`
Post-transform review pass: security audit, test coverage, error handling, observability. Uses `security-auditor` and `test-engineer` agents. Produces a findings report ranked Blocker / High / Medium / Nit.
## Agents
- **`legacy-analyst`** — Reads legacy code (COBOL, legacy Java/C++, procedural PHP, classic ASP) and produces structured summaries. Good at spotting implicit dependencies, copybook inheritance, and "JOBOL" patterns (procedural code wearing a modern syntax).
- **`business-rules-extractor`** — Extracts business rules from procedural code with source citations. Each rule includes: what, where it's implemented, which conditions fire it, and any corner cases hidden in data.
- **`architecture-critic`** — Adversarial reviewer for target architectures and transformed code. Default stance is skeptical: asks "do we actually need this?" Flags microservices-for-the-resume, ceremonial error handling, abstractions with one implementation.
- **`security-auditor`** — Reviews transformed code for auth, input validation, secret handling, and dependency CVEs. Tuned for the kinds of issues that appear when translating security primitives across stacks (e.g., session handling from servlet to stateless JWT).
- **`test-engineer`** — Audits test suites for behavior-pinning vs. coverage-theater. Flags tests that exercise code paths without asserting outcomes.
## Installation
```
/plugin install code-modernization@claude-plugins-official
```
## Recommended Workspace Setup
This plugin ships commands and agents, but modernization projects benefit from a workspace permission layout that enforces the "never touch legacy, freely edit modernized" rule. A starting-point `.claude/settings.json` for the project directory you're modernizing:
```json
{
"permissions": {
"allow": [
"Bash(git diff:*)",
"Bash(git log:*)",
"Bash(git status:*)",
"Read(**)",
"Write(analysis/**)",
"Write(modernized/**)",
"Edit(analysis/**)",
"Edit(modernized/**)"
],
"deny": [
"Edit(legacy/**)"
]
}
}
```
Adjust `legacy/` and `modernized/` to match your actual layout. The key invariants: `Edit` under `legacy/` is denied, and writes are scoped to `analysis/` (for documents) and `modernized/` (for the new code).
## Typical Workflow
```bash
# 1. Write the brief — what are we modernizing and why?
/modernize-brief
# 2. Inventory the legacy code
/modernize-assess
# 3. Extract business rules before touching the code
/modernize-extract-rules
# 4. Map legacy structure to target
/modernize-map
# 5. Propose the target design and review it
/modernize-reimagine
# 6. Transform module by module
/modernize-transform
# 7. Harden: security, tests, observability
/modernize-harden
```
## License
Apache 2.0. See `LICENSE`.

View File

@@ -0,0 +1,36 @@
---
name: architecture-critic
description: Reviews proposed target architectures and transformed code against modern best practice. Adversarial — looks for over-engineering, missed requirements, and simpler alternatives.
tools: Read, Glob, Grep, Bash
---
You are a principal engineer reviewing a modernization design or a freshly
transformed module. Your default stance is **skeptical**. The team is excited
about the new shiny; your job is to ask "do we actually need this?"
## Review lens
For **architecture proposals**:
- Does every service boundary correspond to a real domain seam, or is this
microservices-for-the-resume?
- What's the simplest design that meets the stated requirements? How does
the proposal compare?
- Which non-functional requirements (latency, throughput, consistency) are
unstated, and does the design accidentally violate them?
- What's the data migration story? "We'll figure it out" is a finding.
- What happens when service X is down? Trace one failure mode end-to-end.
For **transformed code**:
- Is this idiomatic for the target stack, or is legacy structure leaking
through? (Flag "JOBOL" — procedural Java with COBOL variable names.)
- Is error handling meaningful or ceremonial?
- Are there abstractions with exactly one implementation and no second use
case in sight?
- Does the test suite actually pin behavior, or just exercise code paths?
- What would the on-call engineer need at 3am that isn't here?
## Output
Findings ranked **Blocker / High / Medium / Nit**. Each with: what, where,
why it matters, and a concrete suggested change. End with one paragraph:
"If I could only change one thing, it would be ___."

View File

@@ -0,0 +1,46 @@
---
name: business-rules-extractor
description: Mines domain logic, calculations, validations, and policies from legacy code into testable Given/When/Then specifications. Use when you need to separate "what the business requires" from "how the old code happened to implement it."
tools: Read, Glob, Grep, Bash
---
You are a business analyst who reads code. Your job is to find the **rules**
hidden inside legacy systems — the calculations, thresholds, eligibility
checks, and policies that define how the business actually operates — and
express them in a form that survives the rewrite.
## What counts as a business rule
- **Calculations**: interest, fees, taxes, discounts, scores, aggregates
- **Validations**: required fields, format checks, range limits, cross-field
- **Eligibility / authorization**: who can do what, when, under which conditions
- **State transitions**: status lifecycles, what triggers each transition
- **Policies**: retention periods, retry limits, cutoff times, rounding rules
## What does NOT count
Infrastructure, logging, error handling, UI layout, technical retries,
connection pooling. If a rule would be the same regardless of what language
the system was written in, it's a business rule. If it only exists because
of the technology, skip it.
## Extraction discipline
1. Find the rule in code. Record exact `file:line-line`.
2. State it in plain English a non-engineer would recognize.
3. Encode it as Given/When/Then with **concrete values**:
```
Given an account with balance $1,250.00 and APR 18.5%
When the monthly interest batch runs
Then the interest charged is $19.27 (balance × APR ÷ 12, rounded half-up to cents)
```
4. List the parameters (rates, limits, magic numbers) with their current
hardcoded values — these often need to become configuration.
5. Rate your confidence: **High** (logic is explicit), **Medium** (inferred
from structure/names), **Low** (ambiguous; needs SME).
6. If confidence < High, write the exact question an SME must answer.
## Output format
One "Rule Card" per rule (see the format in the modernize:extract-rules
command). Group by category. Lead with a summary table.

View File

@@ -0,0 +1,39 @@
---
name: legacy-analyst
description: Deep-reads legacy codebases (COBOL, Java, .NET, Node, anything) to build structural and behavioral understanding. Use for discovery, dependency mapping, dead-code detection, and "what does this system actually do" questions.
tools: Read, Glob, Grep, Bash
---
You are a senior legacy systems analyst with 20 years of experience reading
code nobody else wants to read — COBOL, JCL, RPG, classic ASP, EJB 2,
Struts 1, raw servlets, Perl CGI.
Your job is **understanding, not judgment**. The code in front of you kept a
business running for decades. Treat it with respect, figure out what it does,
and explain it in terms a modern engineer can act on.
## How you work
- **Read before you grep.** Open the entry points (main programs, JCL jobs,
controllers, routes) and trace the actual flow. Pattern-matching on names
lies; control flow doesn't.
- **Cite everything.** Every claim gets a `path/to/file:line` reference.
If you can't point to a line, you don't know it — say so.
- **Distinguish "is" from "appears to be."** When you're inferring intent
from structure, flag it: "appears to handle X (inferred from variable
names; no comments confirm)."
- **Use the right vocabulary for the stack.** COBOL has paragraphs,
copybooks, and FD entries. CICS has transactions and BMS maps. JCL has
steps and DD statements. Java has packages and beans. Use the native
terms so SMEs trust your output.
- **Find the data first.** In legacy systems, the data structures (copybooks,
DDL, schemas) are usually more stable and truthful than the procedural
code. Map the data, then map who touches it.
- **Note what's missing.** Unhandled error paths, TODO comments, commented-out
blocks, magic numbers — these are signals about history and risk.
## Output format
Default to structured markdown: tables for inventories, Mermaid for graphs,
bullet lists for findings. Always include a "Confidence & Gaps" footer
listing what you couldn't determine and what you'd ask an SME.

View File

@@ -0,0 +1,47 @@
---
name: security-auditor
description: Adversarial security reviewer — OWASP Top 10, CWE, dependency CVEs, secrets, injection. Use for security debt scanning and pre-modernization hardening.
tools: Read, Glob, Grep, Bash
---
You are an application security engineer performing an adversarial review.
Assume the code is hostile until proven otherwise. Your job is to find
vulnerabilities a real attacker would find — and explain them in terms an
engineer can fix.
## Coverage checklist
Work through systematically:
- **Injection** (SQL, NoSQL, OS command, LDAP, XPath, template) — trace every
user-controlled input to every sink
- **Authentication / session** — hardcoded creds, weak session handling,
missing auth checks on sensitive routes
- **Sensitive data exposure** — secrets in source, weak crypto, PII in logs
- **Access control** — IDOR, missing ownership checks, privilege escalation paths
- **XSS / CSRF** — unescaped output, missing tokens
- **Insecure deserialization** — pickle/yaml.load/ObjectInputStream on
untrusted data
- **Vulnerable dependencies** — run `npm audit` / `pip-audit` /
read manifests and flag versions with known CVEs
- **SSRF / path traversal / open redirect**
- **Security misconfiguration** — debug mode, verbose errors, default creds
## Tooling
Use available SAST where it helps (npm audit, pip-audit, grep for known-bad
patterns) but **read the code** — tools miss logic flaws. Show tool output
verbatim, then add your manual findings.
## Reporting standard
For each finding:
| Field | Content |
|---|---|
| **ID** | SEC-NNN |
| **CWE** | CWE-XXX with name |
| **Severity** | Critical / High / Medium / Low (CVSS-ish reasoning) |
| **Location** | `file:line` |
| **Exploit scenario** | One sentence: how an attacker uses this |
| **Fix** | Concrete code-level remediation |
No hand-waving. If you can't write the exploit scenario, downgrade severity.

View File

@@ -0,0 +1,36 @@
---
name: test-engineer
description: Writes characterization, contract, and equivalence tests that pin down legacy behavior so transformation can be proven correct. Use before any rewrite.
tools: Read, Write, Edit, Glob, Grep, Bash
---
You are a test engineer specializing in **characterization testing**
writing tests that capture what legacy code *actually does* (not what
someone thinks it should do) so that a rewrite can be proven equivalent.
## Principles
- **The legacy code is the oracle.** If the legacy computes 19.27 and the
spec says 19.28, the test asserts 19.27 and you flag the discrepancy
separately. We're proving equivalence first; fixing bugs is a separate
decision.
- **Concrete over abstract.** Every test has literal input values and literal
expected outputs. No "should calculate correctly" — instead "given balance
1250.00 and APR 18.5%, returns 19.27".
- **Cover the edges the legacy covers.** Read the legacy code's branches.
Every IF/EVALUATE/switch arm gets at least one test case. Boundary values
(zero, negative, max, empty) get explicit cases.
- **Tests must run against BOTH.** Structure tests so the same inputs can be
fed to the legacy implementation (or a recorded trace of it) and the modern
one. The test harness compares.
- **Executable, not aspirational.** Tests compile and run from day one.
Behaviors not yet implemented in the target are marked
`@Disabled("pending RULE-NNN")` / `@pytest.mark.skip` / `it.todo()` — never
deleted.
## Output
Idiomatic tests for the requested target stack (JUnit 5 / pytest / Vitest /
xUnit), one test class/file per legacy module, test method names that read
as specifications. Include a `README.md` in the test directory explaining
how to run them and how to add a new case.

View File

@@ -0,0 +1,142 @@
---
description: Full discovery & portfolio analysis of a legacy system — inventory, complexity, debt, effort estimation
argument-hint: <system-dir> | --portfolio <parent-dir>
---
**Mode select.** If `$ARGUMENTS` starts with `--portfolio`, run **Portfolio
mode** against the directory that follows. Otherwise run **Single-system
mode** against `legacy/$1`.
---
# Portfolio mode (`--portfolio <parent-dir>`)
Sweep every immediate subdirectory of the parent dir and produce a
heat-map a steering committee can use to sequence a multi-year program.
## Step P1 — Per-system metrics
For each subdirectory `<sys>`:
```bash
cloc --quiet --csv <parent>/<sys> # LOC by language
lizard -s cyclomatic_complexity <parent>/<sys> 2>/dev/null | tail -1
```
Capture: total SLOC, dominant language, file count, mean & max
cyclomatic complexity (CCN). For dependency freshness, locate the
manifest (`package.json`, `pom.xml`, `*.csproj`, `requirements*.txt`,
copybook dir) and note its age / pinned-version count.
## Step P2 — COCOMO-II effort
Compute person-months per system using COCOMO-II basic:
`PM = 2.94 × (KSLOC)^1.10` (nominal scale factors). Show the formula and
inputs so the figure is defensible, not a guess.
## Step P3 — Documentation coverage
For each system, count source files with vs without a header comment
block, and list architecture docs present (`README`, `docs/`, ADRs).
Report coverage % and the top undocumented subsystems.
## Step P4 — Render the heat-map
Write `analysis/portfolio.html` (dark `#1e1e1e` bg, `#d4d4d4` text,
`#cc785c` accent, system-ui font, all CSS inline). One row per system;
columns: **System · Lang · KSLOC · Files · Mean CCN · Max CCN · Dep
Freshness · Doc Coverage % · COCOMO PM · Risk**. Color-grade the PM and
Risk cells (green→amber→red). Below the table, a 2-3 sentence
sequencing recommendation: which system first and why.
Then stop. Tell the user to open `analysis/portfolio.html`.
---
# Single-system mode
Perform a complete **modernization assessment** of `legacy/$1`.
This is the discovery phase — the goal is a fact-grounded executive brief that
a VP of Engineering could take into a budget meeting. Work in this order:
## Step 1 — Quantitative inventory
Run and show the output of:
```bash
scc legacy/$1
```
Then run `scc --by-file -s complexity legacy/$1 | head -25` to identify the
highest-complexity files. Capture the COCOMO effort/cost estimate scc provides.
## Step 2 — Technology fingerprint
Identify, with file evidence:
- Languages, frameworks, and runtime versions in use
- Build system and dependency manifest locations
- Data stores (schemas, copybooks, DDL, ORM configs)
- Integration points (queues, APIs, batch interfaces, screen maps)
- Test presence and approximate coverage signal
## Step 3 — Parallel deep analysis
Spawn three subagents **concurrently** using the Task tool:
1. **legacy-analyst** — "Build a structural map of legacy/$1: what are the
5-10 major functional domains, which source files belong to each, and how
do they depend on each other? Return a markdown table + a Mermaid
`graph TD` of domain-level dependencies. Cite file paths."
2. **legacy-analyst** — "Identify technical debt in legacy/$1: dead code,
deprecated APIs, copy-paste duplication, god objects/programs, missing
error handling, hardcoded config. Return the top 10 findings ranked by
remediation value, each with file:line evidence."
3. **security-auditor** — "Scan legacy/$1 for security vulnerabilities:
injection, auth weaknesses, hardcoded secrets, vulnerable dependencies,
missing input validation. Return findings in CWE-tagged table form with
file:line evidence and severity."
Wait for all three. Synthesize their findings.
## Step 4 — Production runtime overlay (observability)
If the system has batch jobs (e.g. JCL members under `app/jcl/`), call the
`observability` MCP tool `get_batch_runtimes` for each business-relevant
job name (interest, posting, statement, reporting). Use the returned
p50/p95/p99 and 90-day series to:
- Tag each functional domain from Step 3 with its production wall-clock
cost and **p99 variance** (p99/p50 ratio).
- Flag the highest-variance domain as the highest operational risk —
this is telemetry-grounded, not a static-analysis opinion.
Include a small **Batch Runtime** table (Job · Domain · p50 · p95 · p99 ·
p99/p50) in the assessment.
## Step 5 — Documentation gap analysis
Compare what the code *does* against what README/docs/comments *say*. List
the top 5 undocumented behaviors or subsystems that a new engineer would
need explained.
## Step 6 — Write the assessment
Create `analysis/$1/ASSESSMENT.md` with these sections:
- **Executive Summary** (3-4 sentences: what it is, how big, how risky, headline recommendation)
- **System Inventory** (the scc table + tech fingerprint)
- **Architecture-at-a-Glance** (the domain table; reference the diagram)
- **Production Runtime Profile** (the batch-runtime table from Step 4, with the highest-variance domain called out)
- **Technical Debt** (top 10, ranked)
- **Security Findings** (CWE table)
- **Documentation Gaps** (top 5)
- **Effort Estimation** (COCOMO-derived person-months, ±range, key cost drivers)
- **Recommended Modernization Pattern** (one of: Rehost / Replatform / Refactor / Rearchitect / Rebuild / Replace — with one-paragraph rationale)
Also create `analysis/$1/ARCHITECTURE.mmd` containing the Mermaid domain
dependency diagram from the legacy-analyst.
## Step 7 — Present
Tell the user the assessment is ready and suggest:
`glow -p analysis/$1/ASSESSMENT.md`

View File

@@ -0,0 +1,60 @@
---
description: Generate a phased Modernization Brief — the approved plan that transformation agents will execute against
argument-hint: <system-dir> [target-stack]
---
Synthesize everything in `analysis/$1/` into a **Modernization Brief** — the
single document a steering committee approves and engineering executes.
Target stack: `$2` (if blank, recommend one based on the assessment findings).
Read `analysis/$1/ASSESSMENT.md`, `TOPOLOGY.md`, and `BUSINESS_RULES.md` first.
If any are missing, say so and stop.
## The Brief
Write `analysis/$1/MODERNIZATION_BRIEF.md`:
### 1. Objective
One paragraph: from what, to what, why now.
### 2. Target Architecture
Mermaid C4 Container diagram of the *end state*. Name every service, data
store, and integration. Below it, a table mapping legacy component → target
component(s).
### 3. Phased Sequence
Break the work into 3-6 phases using **strangler-fig ordering** — lowest-risk,
fewest-dependencies first. For each phase:
- Scope (which legacy modules, which target services)
- Entry criteria (what must be true to start)
- Exit criteria (what tests/metrics prove it's done)
- Estimated effort (person-weeks, derived from COCOMO + complexity data)
- Risk level + top 2 risks + mitigation
Render the phases as a Mermaid `gantt` chart.
### 4. Behavior Contract
List the **P0 behaviors** from BUSINESS_RULES.md that MUST be proven
equivalent before any phase ships. These become the regression suite.
### 5. Validation Strategy
State which combination applies: characterization tests, contract tests,
parallel-run / dual-execution diff, property-based tests, manual UAT.
Justify per phase.
### 6. Open Questions
Anything requiring human/SME decision before Phase 1 starts. Each as a
checkbox the approver must tick.
### 7. Approval Block
```
Approved by: ________________ Date: __________
Approval covers: Phase 1 only | Full plan
```
## Present
Enter **plan mode** and present a summary of the brief. Do NOT proceed to any
transformation until the user explicitly approves. This gate is the
human-in-the-loop control point.

View File

@@ -0,0 +1,68 @@
---
description: Mine business logic from legacy code into testable, human-readable rule specifications
argument-hint: <system-dir> [module-pattern]
---
Extract the **business rules** embedded in `legacy/$1` into a structured,
testable specification — the institutional knowledge that's currently locked
in code and in the heads of engineers who are about to retire.
Scope: if a module pattern was given (`$2`), focus there; otherwise cover the
entire system. Either way, prioritize calculation, validation, eligibility,
and state-transition logic over plumbing.
## Method
Spawn **three business-rules-extractor subagents in parallel**, each assigned
a different lens. If `$2` is non-empty, include "focusing on files matching
$2" in each prompt.
1. **Calculations** — "Find every formula, rate, threshold, and computed value
in legacy/$1. For each: what does it compute, what are the inputs, what is
the exact formula/algorithm, where is it implemented (file:line), and what
edge cases does the code handle?"
2. **Validations & eligibility** — "Find every business validation, eligibility
check, and guard condition in legacy/$1. For each: what is being checked,
what happens on pass/fail, where is it (file:line)?"
3. **State & lifecycle** — "Find every status field, state machine, and
lifecycle transition in legacy/$1. For each entity: what states exist,
what triggers transitions, what side-effects fire?"
## Synthesize
Merge the three result sets. Deduplicate. For each distinct rule, write a
**Rule Card** in this exact format:
```
### RULE-NNN: <plain-English name>
**Category:** Calculation | Validation | Lifecycle | Policy
**Source:** `path/to/file.ext:line-line`
**Plain English:** One sentence a business analyst would recognize.
**Specification:**
Given <precondition>
When <trigger>
Then <outcome>
[And <additional outcome>]
**Parameters:** <constants, rates, thresholds with their current values>
**Edge cases handled:** <list>
**Confidence:** High | Medium | Low — <why>
```
Write all rule cards to `analysis/$1/BUSINESS_RULES.md` with:
- A summary table at top (ID, name, category, source, confidence)
- Rule cards grouped by category
- A final **"Rules requiring SME confirmation"** section listing every
Medium/Low confidence rule with the specific question a human needs to answer
## Generate the DTO catalog
As a companion, create `analysis/$1/DATA_OBJECTS.md` cataloging the core
data transfer objects / records / entities: name, fields with types, which
rules consume/produce them, source location.
## Present
Report: total rules found, breakdown by category, count needing SME review.
Suggest: `glow -p analysis/$1/BUSINESS_RULES.md`

View File

@@ -0,0 +1,46 @@
---
description: Security vulnerability scan + remediation — OWASP, CVE, secrets, injection
argument-hint: <system-dir>
---
Run a **security hardening pass** on `legacy/$1`: find vulnerabilities, rank
them, and fix the critical ones.
## Scan
Spawn the **security-auditor** subagent:
"Adversarially audit legacy/$1 for security vulnerabilities. Cover:
OWASP Top 10 (injection, broken auth, XSS, SSRF, etc.), hardcoded secrets,
vulnerable dependency versions (check package manifests against known CVEs),
missing input validation, insecure deserialization, path traversal.
For each finding return: CWE ID, severity (Critical/High/Med/Low), file:line,
one-sentence exploit scenario, and recommended fix. Also run any available
SAST tooling (npm audit, pip-audit, OWASP dependency-check) and include
its raw output."
## Triage
Write `analysis/$1/SECURITY_FINDINGS.md`:
- Summary scorecard (count by severity, top CWE categories)
- Findings table sorted by severity
- Dependency CVE table (package, installed version, CVE, fixed version)
## Remediate
For each **Critical** and **High** finding, fix it directly in the source.
Make minimal, targeted changes. After each fix, add a one-line entry under
"Remediation Log" in SECURITY_FINDINGS.md: finding ID → commit-style summary
of what changed.
Show the cumulative diff:
```bash
git -C legacy/$1 diff
```
## Verify
Re-run the security-auditor against the patched code to confirm the
Critical/High findings are resolved. Update the scorecard with before/after.
Suggest: `glow -p analysis/$1/SECURITY_FINDINGS.md`

View File

@@ -0,0 +1,66 @@
---
description: Dependency & topology mapping — call graphs, data lineage, batch flows, rendered as navigable diagrams
argument-hint: <system-dir>
---
Build a **dependency and topology map** of `legacy/$1` and render it visually.
The assessment gave us domains. Now go one level deeper: how do the *pieces*
connect? This is the map an engineer needs before touching anything.
## What to produce
Write a one-off analysis script (Python or shell — your choice) that parses
the source under `legacy/$1` and extracts:
- **Program/module call graph** — who calls whom (for COBOL: `CALL` statements
and CICS `LINK`/`XCTL`; for Java: class-level imports/invocations; for Node:
`require`/`import`)
- **Data dependency graph** — which programs read/write which data stores
(COBOL: copybooks + VSAM/DB2 in JCL DD statements; Java: JPA entities/tables;
Node: model files)
- **Entry points** — batch jobs, transaction IDs, HTTP routes, CLI commands
- **Dead-end candidates** — modules with no inbound edges (potential dead code)
Save the script as `analysis/$1/extract_topology.py` (or `.sh`) so it can be
re-run and audited. Run it. Show the raw output.
## Render
From the extracted data, generate **three Mermaid diagrams** and write them
to `analysis/$1/TOPOLOGY.html` so the artifact pane renders them live.
The HTML page must use: dark `#1e1e1e` background, `#d4d4d4` text,
`#cc785c` for `<h2>`/accents, `system-ui` font, all CSS **inline** (no
external stylesheets). Each diagram goes in a
`<pre class="mermaid">...</pre>` block — the artifact server loads
mermaid.js and renders client-side. Do **not** wrap diagrams in
markdown ` ``` ` fences inside the HTML.
1. **`graph TD` — Module call graph.** Cluster by domain (use `subgraph`).
Highlight entry points in a distinct style. Cap at ~40 nodes — if larger,
show domain-level with one expanded domain.
2. **`graph LR` — Data lineage.** Programs → data stores.
Mark read vs write edges.
3. **`flowchart TD` — Critical path.** Trace ONE end-to-end business flow
(e.g., "monthly billing run" or "process payment") through every program
and data store it touches, in execution order. If the `observability`
MCP server is connected, annotate each batch step with its p50/p99
wall-clock from `get_batch_runtimes`.
Also export the three diagrams as standalone `.mmd` files for re-use:
`analysis/$1/call-graph.mmd`, `analysis/$1/data-lineage.mmd`,
`analysis/$1/critical-path.mmd`.
## Annotate
Below each `<pre class="mermaid">` block in TOPOLOGY.html, add a `<ul>`
with 3-5 **architect observations**: tight coupling clusters, single
points of failure, candidates for service extraction, data stores
touched by too many writers.
## Present
Tell the user to open `analysis/$1/TOPOLOGY.html` in the artifact pane.

View File

@@ -0,0 +1,82 @@
---
description: Multi-agent greenfield rebuild — extract specs from legacy, design AI-native, scaffold & validate with HITL
argument-hint: <system-dir> <target-vision>
---
**Reimagine** `legacy/$1` as: $2
This is not a port — it's a rebuild from extracted intent. The legacy system
becomes the *specification source*, not the structural template. This command
orchestrates a multi-agent team with explicit human checkpoints.
## Phase A — Specification mining (parallel agents)
Spawn concurrently and show the user that all three are running:
1. **business-rules-extractor** — "Extract every business rule from legacy/$1
into Given/When/Then form. Output to a structured list I can parse."
2. **legacy-analyst** — "Catalog every external interface of legacy/$1:
inbound (screens, APIs, batch triggers, queues) and outbound (reports,
files, downstream calls, DB writes). For each: name, direction, payload
shape, frequency/SLA if discernible."
3. **legacy-analyst** — "Identify the core domain entities in legacy/$1 and
their relationships. Return as an entity list + Mermaid erDiagram."
Collect results. Write `analysis/$1/AI_NATIVE_SPEC.md` containing:
- **Capabilities** (what the system must do — derived from rules + interfaces)
- **Domain Model** (entities + erDiagram)
- **Interface Contracts** (each external interface as an OpenAPI fragment or
AsyncAPI fragment)
- **Non-functional requirements** inferred from legacy (batch windows, volumes)
- **Behavior Contract** (the Given/When/Then rules — these are the acceptance tests)
## Phase B — HITL checkpoint #1
Present the spec summary. Ask the user **one focused question**: "Which of
these capabilities are P0 for the reimagined system, and are there any we
should deliberately drop?" Wait for the answer. Record it in the spec.
## Phase C — Architecture (single agent, then critique)
Design the target architecture for "$2":
- Mermaid C4 Container diagram
- Service boundaries with rationale (which rules/entities live where)
- Technology choices with one-line justification each
- Data migration approach from legacy stores
Then spawn **architecture-critic**: "Review this proposed architecture for
$2 against the spec in analysis/$1/AI_NATIVE_SPEC.md. Identify over-engineering,
missed requirements, scaling risks, and simpler alternatives." Incorporate
the critique. Write the result to `analysis/$1/REIMAGINED_ARCHITECTURE.md`.
## Phase D — HITL checkpoint #2
Enter plan mode. Present the architecture. Wait for approval.
## Phase E — Parallel scaffolding
For each service in the approved architecture (cap at 3 for the demo), spawn
a **general-purpose agent in parallel**:
"Scaffold the <service-name> service per analysis/$1/REIMAGINED_ARCHITECTURE.md
and AI_NATIVE_SPEC.md. Create: project skeleton, domain model, API stubs
matching the interface contracts, and **executable acceptance tests** for every
behavior-contract rule assigned to this service (mark unimplemented ones as
expected-failure/skip with the rule ID). Write to modernized/$1-reimagined/<service-name>/."
Show the agents' progress. When all complete, run the acceptance test suites
and report: total tests, passing (scaffolded behavior), pending (rule IDs
awaiting implementation).
## Phase F — Knowledge graph handoff
Write `modernized/$1-reimagined/CLAUDE.md` — the persistent context file for
the new system, containing: architecture summary, service responsibilities,
where the spec lives, how to run tests, and the legacy→modern traceability
map. This file IS the knowledge graph that future agents and engineers will
load.
Report: services scaffolded, acceptance tests defined, % behaviors with a
home, location of all artifacts.

View File

@@ -0,0 +1,78 @@
---
description: Transform one legacy module to the target stack — idiomatic rewrite with behavior-equivalence tests
argument-hint: <system-dir> <module> <target-stack>
---
Transform `legacy/$1` module **`$2`** into **$3**, with proof of behavioral
equivalence.
This is a surgical, single-module transformation — one vertical slice of the
strangler fig. Output goes to `modernized/$1/$2/`.
## Step 0 — Plan (HITL gate)
Read the source module and any business rules in `analysis/$1/BUSINESS_RULES.md`
that reference it. Then **enter plan mode** and present:
- Which source files are in scope
- The target module structure (packages/classes/files you'll create)
- Which business rules / behaviors this module implements
- How you'll prove equivalence (test strategy)
- Anything ambiguous that needs a human decision NOW
Wait for approval before writing any code.
## Step 1 — Characterization tests FIRST
Before writing target code, spawn the **test-engineer** subagent:
"Write characterization tests for legacy/$1 module $2. Read the source,
identify every observable behavior, and encode each as a test case with
concrete input → expected output pairs derived from the legacy logic.
Target framework: <appropriate for $3>. Write to
`modernized/$1/$2/src/test/`. These tests define 'done' — the new code
must pass all of them."
Show the user the test file. Get a 👍 before proceeding.
## Step 2 — Idiomatic transformation
Write the target implementation in `modernized/$1/$2/src/main/`.
**Critical:** Write code a senior $3 engineer would write from the
*specification*, not from the legacy structure. Do NOT mirror COBOL paragraphs
as methods, do NOT preserve legacy variable names like `WS-TEMP-AMT-X`.
Use the target language's idioms: records/dataclasses, streams, dependency
injection, proper error types, etc.
Include: domain model, service logic, API surface (REST controller or
equivalent), and configuration. Add concise Javadoc/docstrings linking each
class back to the rule IDs it implements.
## Step 3 — Prove it
Run the characterization tests:
```bash
cd modernized/$1/$2 && <appropriate test command for $3>
```
Show the output. If anything fails, fix and re-run until green.
## Step 4 — Side-by-side review
Generate `modernized/$1/$2/TRANSFORMATION_NOTES.md`:
- Mapping table: legacy file:lines → target file:lines, per behavior
- Deliberate deviations from legacy behavior (with rationale)
- What was NOT migrated (dead code, unreachable branches) and why
- Follow-ups for the next module that depends on this one
Then show a visual diff of one representative behavior, legacy vs modern:
```bash
delta --side-by-side <(sed -n '<lines>p' legacy/$1/<file>) modernized/$1/$2/src/main/<file>
```
## Step 5 — Architecture review
Spawn the **architecture-critic** subagent to review the transformed code
against $3 best practices. Apply any HIGH-severity feedback; list the rest
in TRANSFORMATION_NOTES.md.
Report: tests passing, lines of legacy retired, location of artifacts.

View File

@@ -1,6 +1,6 @@
---
name: skill-creator
description: Create new skills, modify and improve existing skills, and measure skill performance. Use when users want to create a skill from scratch, update or optimize an existing skill, run evals to test a skill, benchmark skill performance with variance analysis, or optimize a skill's description for better triggering accuracy.
description: Create new skills, modify and improve existing skills, and measure skill performance. Use when users want to create a skill from scratch, edit, or optimize an existing skill, run evals to test a skill, benchmark skill performance with variance analysis, or optimize a skill's description for better triggering accuracy.
---
# Skill Creator
@@ -391,7 +391,7 @@ Use the model ID from your system prompt (the one powering the current session)
While it runs, periodically tail the output to give the user updates on which iteration it's on and what the scores look like.
This handles the full optimization loop automatically. It splits the eval set into 60% train and 40% held-out test, evaluates the current description (running each query 3 times to get a reliable trigger rate), then calls Claude with extended thinking to propose improvements based on what failed. It re-evaluates each new description on both train and test, iterating up to 5 times. When it's done, it opens an HTML report in the browser showing the results per iteration and returns JSON with `best_description` — selected by test score rather than train score to avoid overfitting.
This handles the full optimization loop automatically. It splits the eval set into 60% train and 40% held-out test, evaluates the current description (running each query 3 times to get a reliable trigger rate), then calls Claude to propose improvements based on what failed. It re-evaluates each new description on both train and test, iterating up to 5 times. When it's done, it opens an HTML report in the browser showing the results per iteration and returns JSON with `best_description` — selected by test score rather than train score to avoid overfitting.
### How skill triggering works
@@ -435,6 +435,11 @@ In Claude.ai, the core workflow is the same (draft → test → review → impro
**Packaging**: The `package_skill.py` script works anywhere with Python and a filesystem. On Claude.ai, you can run it and the user can download the resulting `.skill` file.
**Updating an existing skill**: The user might be asking you to update an existing skill, not create a new one. In this case:
- **Preserve the original name.** Note the skill's directory name and `name` frontmatter field -- use them unchanged. E.g., if the installed skill is `research-helper`, output `research-helper.skill` (not `research-helper-v2`).
- **Copy to a writeable location before editing.** The installed skill path may be read-only. Copy to `/tmp/skill-name/`, edit there, and package from the copy.
- **If packaging manually, stage in `/tmp/` first**, then copy to the output directory -- direct writes may fail due to permissions.
---
## Cowork-Specific Instructions
@@ -447,6 +452,7 @@ If you're in Cowork, the main things to know are:
- Feedback works differently: since there's no running server, the viewer's "Submit All Reviews" button will download `feedback.json` as a file. You can then read it from there (you may have to request access first).
- Packaging works — `package_skill.py` just needs Python and a filesystem.
- Description optimization (`run_loop.py` / `run_eval.py`) should work in Cowork just fine since it uses `claude -p` via subprocess, not a browser, but please save it until you've fully finished making the skill and the user agrees it's in good shape.
- **Updating an existing skill**: The user might be asking you to update an existing skill, not create a new one. Follow the update guidance in the claude.ai section above.
---

View File

@@ -2,22 +2,52 @@
"""Improve a skill description based on eval results.
Takes eval results (from run_eval.py) and generates an improved description
using Claude with extended thinking.
by calling `claude -p` as a subprocess (same auth pattern as run_eval.py —
uses the session's Claude Code auth, no separate ANTHROPIC_API_KEY needed).
"""
import argparse
import json
import os
import re
import subprocess
import sys
from pathlib import Path
import anthropic
from scripts.utils import parse_skill_md
def _call_claude(prompt: str, model: str | None, timeout: int = 300) -> str:
"""Run `claude -p` with the prompt on stdin and return the text response.
Prompt goes over stdin (not argv) because it embeds the full SKILL.md
body and can easily exceed comfortable argv length.
"""
cmd = ["claude", "-p", "--output-format", "text"]
if model:
cmd.extend(["--model", model])
# Remove CLAUDECODE env var to allow nesting claude -p inside a
# Claude Code session. The guard is for interactive terminal conflicts;
# programmatic subprocess usage is safe. Same pattern as run_eval.py.
env = {k: v for k, v in os.environ.items() if k != "CLAUDECODE"}
result = subprocess.run(
cmd,
input=prompt,
capture_output=True,
text=True,
env=env,
timeout=timeout,
)
if result.returncode != 0:
raise RuntimeError(
f"claude -p exited {result.returncode}\nstderr: {result.stderr}"
)
return result.stdout
def improve_description(
client: anthropic.Anthropic,
skill_name: str,
skill_content: str,
current_description: str,
@@ -99,7 +129,7 @@ Based on the failures, write a new and improved description that is more likely
1. Avoid overfitting
2. The list might get loooong and it's injected into ALL queries and there might be a lot of skills, so we don't want to blow too much space on any given description.
Concretely, your description should not be more than about 100-200 words, even if that comes at the cost of accuracy.
Concretely, your description should not be more than about 100-200 words, even if that comes at the cost of accuracy. There is a hard limit of 1024 characters — descriptions over that will be truncated, so stay comfortably under it.
Here are some tips that we've found to work well in writing these descriptions:
- The skill should be phrased in the imperative -- "Use this skill for" rather than "this skill does"
@@ -111,70 +141,41 @@ I'd encourage you to be creative and mix up the style in different iterations si
Please respond with only the new description text in <new_description> tags, nothing else."""
response = client.messages.create(
model=model,
max_tokens=16000,
thinking={
"type": "enabled",
"budget_tokens": 10000,
},
messages=[{"role": "user", "content": prompt}],
)
text = _call_claude(prompt, model)
# Extract thinking and text from response
thinking_text = ""
text = ""
for block in response.content:
if block.type == "thinking":
thinking_text = block.thinking
elif block.type == "text":
text = block.text
# Parse out the <new_description> tags
match = re.search(r"<new_description>(.*?)</new_description>", text, re.DOTALL)
description = match.group(1).strip().strip('"') if match else text.strip().strip('"')
# Log the transcript
transcript: dict = {
"iteration": iteration,
"prompt": prompt,
"thinking": thinking_text,
"response": text,
"parsed_description": description,
"char_count": len(description),
"over_limit": len(description) > 1024,
}
# If over 1024 chars, ask the model to shorten it
# Safety net: the prompt already states the 1024-char hard limit, but if
# the model blew past it anyway, make one fresh single-turn call that
# quotes the too-long version and asks for a shorter rewrite. (The old
# SDK path did this as a true multi-turn; `claude -p` is one-shot, so we
# inline the prior output into the new prompt instead.)
if len(description) > 1024:
shorten_prompt = f"Your description is {len(description)} characters, which exceeds the hard 1024 character limit. Please rewrite it to be under 1024 characters while preserving the most important trigger words and intent coverage. Respond with only the new description in <new_description> tags."
shorten_response = client.messages.create(
model=model,
max_tokens=16000,
thinking={
"type": "enabled",
"budget_tokens": 10000,
},
messages=[
{"role": "user", "content": prompt},
{"role": "assistant", "content": text},
{"role": "user", "content": shorten_prompt},
],
shorten_prompt = (
f"{prompt}\n\n"
f"---\n\n"
f"A previous attempt produced this description, which at "
f"{len(description)} characters is over the 1024-character hard limit:\n\n"
f'"{description}"\n\n'
f"Rewrite it to be under 1024 characters while keeping the most "
f"important trigger words and intent coverage. Respond with only "
f"the new description in <new_description> tags."
)
shorten_thinking = ""
shorten_text = ""
for block in shorten_response.content:
if block.type == "thinking":
shorten_thinking = block.thinking
elif block.type == "text":
shorten_text = block.text
shorten_text = _call_claude(shorten_prompt, model)
match = re.search(r"<new_description>(.*?)</new_description>", shorten_text, re.DOTALL)
shortened = match.group(1).strip().strip('"') if match else shorten_text.strip().strip('"')
transcript["rewrite_prompt"] = shorten_prompt
transcript["rewrite_thinking"] = shorten_thinking
transcript["rewrite_response"] = shorten_text
transcript["rewrite_description"] = shortened
transcript["rewrite_char_count"] = len(shortened)
@@ -216,9 +217,7 @@ def main():
print(f"Current: {current_description}", file=sys.stderr)
print(f"Score: {eval_results['summary']['passed']}/{eval_results['summary']['total']}", file=sys.stderr)
client = anthropic.Anthropic()
new_description = improve_description(
client=client,
skill_name=name,
skill_content=content,
current_description=current_description,

View File

@@ -15,8 +15,6 @@ import time
import webbrowser
from pathlib import Path
import anthropic
from scripts.generate_report import generate_html
from scripts.improve_description import improve_description
from scripts.run_eval import find_project_root, run_eval
@@ -75,7 +73,6 @@ def run_loop(
train_set = eval_set
test_set = []
client = anthropic.Anthropic()
history = []
exit_reason = "unknown"
@@ -200,7 +197,6 @@ def run_loop(
for h in history
]
new_description = improve_description(
client=client,
skill_name=name,
skill_content=content,
current_description=current_description,