Release v4.0.3: Strengthen using-superpowers for explicit skill requests

Bump version to 4.0.3
Strengthen using-superpowers for explicit skill requests
2026-04-19 13:32:40 +00:00 · 2025-12-26 22:55:32 -06:00 · 2025-12-26 22:53:58 -06:00 · 2025-12-26 22:41:22 -06:00 · 2025-12-23 23:03:31 -08:00 · 2025-12-23 23:03:19 -08:00
72 changed files with 2663 additions and 748 deletions
--- a/.claude-plugin/marketplace.json
+++ b/.claude-plugin/marketplace.json
@@ -9,7 +9,7 @@
    {
      "name": "superpowers",
      "description": "Core skills library for Claude Code: TDD, debugging, collaboration patterns, and proven techniques",
-      "version": "3.5.1",
+      "version": "4.0.3",
      "source": "./",
      "author": {
        "name": "Jesse Vincent",
--- a/.claude-plugin/plugin.json
+++ b/.claude-plugin/plugin.json
@@ -1,7 +1,7 @@
 {
  "name": "superpowers",
  "description": "Core skills library for Claude Code: TDD, debugging, collaboration patterns, and proven techniques",
-  "version": "3.5.1",
+  "version": "4.0.3",
  "author": {
    "name": "Jesse Vincent",
    "email": "jesse@fsck.com"
--- a/.claude/settings.local.json
+++ b/.claude/settings.local.json
@@ -1,141 +0,0 @@
-{
-  "permissions": {
-    "allow": [
-      "Read(//Users/jesse/.claude/plugins/cache/superpowers/skills/getting-started/**)",
-      "Read(//Users/jesse/Downloads/**)",
-      "Bash(~/.claude/plugins/cache/superpowers/skills/getting-started/list-skills)",
-      "Bash(~/.claude/plugins/cache/superpowers/skills/getting-started/skills-search \"prompt\")",
-      "Bash(~/.claude/plugins/cache/superpowers/skills/getting-started/skills-search \"communication\")",
-      "Bash(~/.claude/plugins/cache/superpowers/skills/getting-started/skills-search \"interaction\")",
-      "Read(//Users/jesse/.claude/plugins/cache/superpowers/skills/meta/testing-skills-with-subagents/**)",
-      "Read(//Users/jesse/.claude/plugins/cache/superpowers/skills/collaboration/dispatching-parallel-agents/**)",
-      "Read(//Users/jesse/.claude/plugins/cache/superpowers/skills/collaboration/requesting-code-review/**)",
-      "Read(//Users/jesse/.claude/plugins/cache/superpowers/skills/collaboration/writing-plans/**)",
-      "mcp__journal__search_journal",
-      "Read(//Users/jesse/.claude/plugins/cache/superpowers/skills/meta/creating-skills/**)",
-      "Read(//Users/jesse/.claude/plugins/cache/superpowers/skills/collaboration/brainstorming/**)",
-      "Read(//Users/jesse/.claude/plugins/cache/superpowers/skills/**)",
-      "Read(//Users/jesse/.claude/plugins/cache/**)",
-      "mcp__journal__read_journal_entry",
-      "Bash(/Users/jesse/git/superpowers/superpowers/skills/getting-started/list-skills)",
-      "Bash(/Users/jesse/git/superpowers/superpowers/skills/getting-started/skills-search refactor)",
-      "Read(//Users/jesse/Documents/GitHub/superpowers/**)",
-      "Bash(${CLAUDE_PLUGIN_ROOT}/skills/getting-started/list-skills:*)",
-      "Bash(/Users/jesse/Documents/GitHub/superpowers/superpowers/skills/getting-started/list-skills)",
-      "Bash(/Users/jesse/Documents/GitHub/superpowers/superpowers/skills/getting-started/skills-search editing)",
-      "Bash(list-skills brainstorm)",
-      "Read(//Users/jesse/.claude/commands/**)",
-      "Bash(git checkout:*)",
-      "Bash(/Users/jesse/.claude/plugins/cache/superpowers/skills/getting-started/list-skills)",
-      "Bash(ln:*)",
-      "Bash(git add:*)",
-      "Bash(git commit:*)",
-      "Bash(git push:*)",
-      "Read(//Users/jesse/.claude/plugins/**)",
-      "Read(//Users/jesse/.claude/**)",
-      "Bash(cat:*)",
-      "Read(//Users/jesse/.superpowers/**)",
-      "Bash(find:*)",
-      "Read(//Users/jesse/.clank/**)",
-      "Bash(./search-conversations:*)",
-      "Bash(./skills/collaboration/remembering-conversations/tool/search-conversations:*)",
-      "Bash(npm install)",
-      "Bash(sqlite3:*)",
-      "Bash(chmod:*)",
-      "Bash(/Users/jesse/Documents/GitHub/superpowers/superpowers/skills/collaboration/remembering-conversations/tool/migrate-to-config.sh:*)",
-      "Read(//Users/jesse/.config/superpowers/**)",
-      "Bash(./index-conversations --help)",
-      "Bash(./index-conversations:*)",
-      "Bash(bc)",
-      "Bash(bc:*)",
-      "Bash(./scripts/find-skills)",
-      "Bash(./scripts/run:*)",
-      "Bash(./scripts/find-skills test)",
-      "Bash(find-skills:*)",
-      "Bash(/Users/jesse/.claude/plugins/cache/superpowers/scripts/find-skills refactor)",
-      "Bash(mkdir:*)",
-      "Bash(git worktree add:*)",
-      "Bash([ -f package.json ])",
-      "Bash(git worktree:*)",
-      "Bash(gh repo create:*)",
-      "Bash(git clone:*)",
-      "Bash(gh repo view:*)",
-      "Bash(test:*)",
-      "Bash(git ls-tree:*)",
-      "Bash(git rm:*)",
-      "Bash(git mv:*)",
-      "Bash(/Users/jesse/Documents/GitHub/superpowers/superpowers-skills/skills/using-skills/find-skills)",
-      "Bash(tree:*)",
-      "Bash(/Users/jesse/Documents/GitHub/superpowers/superpowers-skills/skills/using-skills/skill-run --help)",
-      "Bash(echo:*)",
-      "Bash(git log:*)",
-      "Bash(git show:*)",
-      "Bash(git diff-tree:*)",
-      "Bash(bash:*)",
-      "Bash(xargs ls:*)",
-      "Bash(git rev-parse:*)",
-      "Bash(git reset:*)",
-      "Bash(./skills/using-skills/find-skills)",
-      "Bash(git rebase:*)",
-      "Bash(GIT_SEQUENCE_EDITOR=\"sed -i '' 's/^pick 683707a/edit 683707a/'\" git rebase:*)",
-      "Bash(gh pr create:*)",
-      "Bash(for:*)",
-      "Bash(do [ -f \"$skill\" ])",
-      "Bash(! grep -q \"^when_to_use:\" \"$skill\")",
-      "Bash(done)",
-      "Bash(gh issue view:*)",
-      "Bash(gh pr view:*)",
-      "Bash(gh pr diff:*)",
-      "Bash(/Users/jesse/Documents/GitHub/superpowers/superpowers-skills/skills/using-skills/find-skills test)",
-      "Bash(xargs -I {} bash -c 'dir=$(echo {} | sed \"\"\"\"s|/SKILL.md||\"\"\"\" | xargs basename); name=$(grep \"\"\"\"^name:\"\"\"\" {} | sed \"\"\"\"s/^name: //\"\"\"\"); echo \"\"\"\"$dir -> $name\"\"\"\"')",
-      "mcp__obsidian-mcp-tools__fetch",
-      "Skill(superpowers:using-git-worktrees)",
-      "Skill(superpowers:subagent-driven-development)",
-      "Bash(./test-raw.sh:*)",
-      "Bash(./chrome-ws raw \"ws://localhost:9222/devtools/page/test\" '{\"\"id\"\":1,\"\"method\"\":\"\"Browser.getVersion\"\"}')",
-      "Bash(./test-tabs.sh:*)",
-      "Bash(curl:*)",
-      "Bash(./chrome-ws tabs:*)",
-      "Bash(./chrome-ws close:*)",
-      "Bash(./chrome-ws raw:*)",
-      "Bash(./chrome-ws new:*)",
-      "Bash(./test-navigate.sh:*)",
-      "Bash(./test-interact.sh:*)",
-      "Bash(./test-extract.sh)",
-      "Bash(./test-wait.sh:*)",
-      "Bash(./test-e2e.sh:*)",
-      "Bash(./chrome-ws extract:*)",
-      "Bash(./chrome-ws screenshot:*)",
-      "Bash(./chrome-ws start:*)",
-      "Bash(./chrome-ws navigate:*)",
-      "Bash(git init:*)",
-      "Bash(git tag:*)",
-      "Skill(example-skills:mcp-builder)",
-      "Bash(npm run build)",
-      "Bash(npm run clean)",
-      "Bash(timeout 3s node dist/index.js)",
-      "Bash(git -C /Users/jesse/Documents/GitHub/superpowers/superpowers-chrome ls-files .claude-plugin/marketplace.json)",
-      "mcp__private-journal__read_journal_entry",
-      "Bash(git pull:*)",
-      "Skill(elements-of-style:writing-clearly-and-concisely)",
-      "Bash(gh release list:*)",
-      "Bash(gh release create:*)",
-      "Read(//Users/jesse/git/superpowers/superpowers-marketplace/.claude-plugin/**)",
-      "mcp__plugin_episodic-memory_episodic-memory__search",
-      "Skill(superpowers:writing-skills)",
-      "mcp__private-journal__process_thoughts",
-      "Skill(superpowers:brainstorming)",
-      "Skill(superpowers:using-superpowers)",
-      "Skill(episodic-memory:remembering-conversations)",
-      "Skill(superpowers-developing-for-claude-code:developing-claude-code-plugins)",
-      "Skill(working-with-claude-code)"
-    ],
-    "deny": [],
-    "ask": [],
-    "additionalDirectories": [
-      "/Users/jesse/Documents/GitHub/superpowers/superpowers-skills/",
-      "/Users/jesse/Documents/GitHub/superpowers/superpowers-marketplace",
-      "/Users/jesse/Documents/GitHub/superpowers/using-chrome-directly/"
-    ]
-  }
-}
--- a/.codex/superpowers-codex
+++ b/.codex/superpowers-codex
@@ -229,7 +229,7 @@ function runUseSkill(skillName) {
    if (frontmatter.description) {
        console.log(`# ${frontmatter.description}`);
    }
-    console.log(`# Supporting tools and docs are in ${skillDirectory}`);
+    console.log(`# Skill-specific tools and reference files live in ${skillDirectory}`);
    console.log('# ============================================');
    console.log('');

@@ -264,4 +264,4 @@ switch (command) {
        console.log('  superpowers-codex use-skill superpowers:brainstorming');
        console.log('  superpowers-codex use-skill my-custom-skill');
        break;
-}
+}
--- a/.gitignore
+++ b/.gitignore
@@ -1,2 +1,3 @@
 .worktrees/
 .private-journal/
+.claude/
--- a/.opencode/plugin/superpowers.js
+++ b/.opencode/plugin/superpowers.js
@@ -67,7 +67,7 @@ ${toolMapping}
        path: { id: sessionID },
        body: {
          noReply: true,
-          parts: [{ type: "text", text: bootstrapContent }]
+          parts: [{ type: "text", text: bootstrapContent, synthetic: true }]
        }
      });
      return true;
@@ -132,8 +132,8 @@ ${toolMapping}
              body: {
                noReply: true,
                parts: [
-                  { type: "text", text: `Loading skill: ${name || skill_name}` },
-                  { type: "text", text: `${skillHeader}\n\n${content}` }
+                  { type: "text", text: `Loading skill: ${name || skill_name}`, synthetic: true },
+                  { type: "text", text: `${skillHeader}\n\n${content}`, synthetic: true }
                ]
              }
            });
--- a/README.md
+++ b/README.md
@@ -85,7 +85,7 @@ Fetch and follow instructions from https://raw.githubusercontent.com/obra/superp

 3. **writing-plans** - Activates with approved design. Breaks work into bite-sized tasks (2-5 minutes each). Every task has exact file paths, complete code, verification steps.

-4. **subagent-driven-development** or **executing-plans** - Activates with plan. Dispatches fresh subagent per task (same session, fast iteration) or executes in batches (parallel session, human checkpoints).
+4. **subagent-driven-development** or **executing-plans** - Activates with plan. Dispatches fresh subagent per task with two-stage review (spec compliance, then code quality), or executes in batches with human checkpoints.

 5. **test-driven-development** - Activates during implementation. Enforces RED-GREEN-REFACTOR: write failing test, watch it fail, write minimal code, watch it pass, commit. Deletes code written before tests.

@@ -100,15 +100,11 @@ Fetch and follow instructions from https://raw.githubusercontent.com/obra/superp
 ### Skills Library

 **Testing**
- **test-driven-development** - RED-GREEN-REFACTOR cycle
- **condition-based-waiting** - Async test patterns
- **testing-anti-patterns** - Common pitfalls to avoid
+- **test-driven-development** - RED-GREEN-REFACTOR cycle (includes testing anti-patterns reference)

-**Debugging** 
- **systematic-debugging** - 4-phase root cause process
- **root-cause-tracing** - Find the real problem
+**Debugging**
+- **systematic-debugging** - 4-phase root cause process (includes root-cause-tracing, defense-in-depth, condition-based-waiting techniques)
 - **verification-before-completion** - Ensure it's actually fixed
- **defense-in-depth** - Multiple validation layers

 **Collaboration** 
 - **brainstorming** - Socratic design refinement
@@ -119,11 +115,10 @@ Fetch and follow instructions from https://raw.githubusercontent.com/obra/superp
 - **receiving-code-review** - Responding to feedback
 - **using-git-worktrees** - Parallel development branches
 - **finishing-a-development-branch** - Merge/PR decision workflow
- **subagent-driven-development** - Fast iteration with quality gates
+- **subagent-driven-development** - Fast iteration with two-stage review (spec compliance, then code quality)

-**Meta** 
- **writing-skills** - Create new skills following best practices
- **testing-skills-with-subagents** - Validate skill quality
+**Meta**
+- **writing-skills** - Create new skills following best practices (includes testing methodology)
 - **using-superpowers** - Introduction to the skills system

 ## Philosophy
@@ -141,9 +136,8 @@ Skills live directly in this repository. To contribute:

 1. Fork the repository
 2. Create a branch for your skill
-3. Follow the `writing-skills` skill for creating new skills
-4. Use the `testing-skills-with-subagents` skill to validate quality
-5. Submit a PR
+3. Follow the `writing-skills` skill for creating and testing new skills
+4. Submit a PR

 See `skills/writing-skills/SKILL.md` for the complete guide.

--- a/RELEASE-NOTES.md
+++ b/RELEASE-NOTES.md
@@ -1,5 +1,150 @@
 # Superpowers Release Notes

+## v4.0.3 (2025-12-26)
+
+### Improvements
+
+**Strengthened using-superpowers skill for explicit skill requests**
+
+Addressed a failure mode where Claude would skip invoking a skill even when the user explicitly requested it by name (e.g., "subagent-driven-development, please"). Claude would think "I know what that means" and start working directly instead of loading the skill.
+
+Changes:
+- Updated "The Rule" to say "Invoke relevant or requested skills" instead of "Check for skills" - emphasizing active invocation over passive checking
+- Added "BEFORE any response or action" - the original wording only mentioned "response" but Claude would sometimes take action without responding first
+- Added reassurance that invoking a wrong skill is okay - reduces hesitation
+- Added new red flag: "I know what that means" → Knowing the concept ≠ using the skill
+
+**Added explicit skill request tests**
+
+New test suite in `tests/explicit-skill-requests/` that verifies Claude correctly invokes skills when users request them by name. Includes single-turn and multi-turn test scenarios.
+
+## v4.0.2 (2025-12-23)
+
+### Fixes
+
+**Slash commands now user-only**
+
+Added `disable-model-invocation: true` to all three slash commands (`/brainstorm`, `/execute-plan`, `/write-plan`). Claude can no longer invoke these commands via the Skill tool—they're restricted to manual user invocation only.
+
+The underlying skills (`superpowers:brainstorming`, `superpowers:executing-plans`, `superpowers:writing-plans`) remain available for Claude to invoke autonomously. This change prevents confusion when Claude would invoke a command that just redirects to a skill anyway.
+
+## v4.0.1 (2025-12-23)
+
+### Fixes
+
+**Clarified how to access skills in Claude Code**
+
+Fixed a confusing pattern where Claude would invoke a skill via the Skill tool, then try to Read the skill file separately. The `using-superpowers` skill now explicitly states that the Skill tool loads skill content directly—no need to read files.
+
+- Added "How to Access Skills" section to `using-superpowers`
+- Changed "read the skill" → "invoke the skill" in instructions
+- Updated slash commands to use fully qualified skill names (e.g., `superpowers:brainstorming`)
+
+**Added GitHub thread reply guidance to receiving-code-review** (h/t @ralphbean)
+
+Added a note about replying to inline review comments in the original thread rather than as top-level PR comments.
+
+**Added automation-over-documentation guidance to writing-skills** (h/t @EthanJStark)
+
+Added guidance that mechanical constraints should be automated, not documented—save skills for judgment calls.
+
+## v4.0.0 (2025-12-17)
+
+### New Features
+
+**Two-stage code review in subagent-driven-development**
+
+Subagent workflows now use two separate review stages after each task:
+
+1. **Spec compliance review** - Skeptical reviewer verifies implementation matches spec exactly. Catches missing requirements AND over-building. Won't trust implementer's report—reads actual code.
+
+2. **Code quality review** - Only runs after spec compliance passes. Reviews for clean code, test coverage, maintainability.
+
+This catches the common failure mode where code is well-written but doesn't match what was requested. Reviews are loops, not one-shot: if reviewer finds issues, implementer fixes them, then reviewer checks again.
+
+Other subagent workflow improvements:
+- Controller provides full task text to workers (not file references)
+- Workers can ask clarifying questions before AND during work
+- Self-review checklist before reporting completion
+- Plan read once at start, extracted to TodoWrite
+
+New prompt templates in `skills/subagent-driven-development/`:
+- `implementer-prompt.md` - Includes self-review checklist, encourages questions
+- `spec-reviewer-prompt.md` - Skeptical verification against requirements
+- `code-quality-reviewer-prompt.md` - Standard code review
+
+**Debugging techniques consolidated with tools**
+
+`systematic-debugging` now bundles supporting techniques and tools:
+- `root-cause-tracing.md` - Trace bugs backward through call stack
+- `defense-in-depth.md` - Add validation at multiple layers
+- `condition-based-waiting.md` - Replace arbitrary timeouts with condition polling
+- `find-polluter.sh` - Bisection script to find which test creates pollution
+- `condition-based-waiting-example.ts` - Complete implementation from real debugging session
+
+**Testing anti-patterns reference**
+
+`test-driven-development` now includes `testing-anti-patterns.md` covering:
+- Testing mock behavior instead of real behavior
+- Adding test-only methods to production classes
+- Mocking without understanding dependencies
+- Incomplete mocks that hide structural assumptions
+
+**Skill test infrastructure**
+
+Three new test frameworks for validating skill behavior:
+
+`tests/skill-triggering/` - Validates skills trigger from naive prompts without explicit naming. Tests 6 skills to ensure descriptions alone are sufficient.
+
+`tests/claude-code/` - Integration tests using `claude -p` for headless testing. Verifies skill usage via session transcript (JSONL) analysis. Includes `analyze-token-usage.py` for cost tracking.
+
+`tests/subagent-driven-dev/` - End-to-end workflow validation with two complete test projects:
+- `go-fractals/` - CLI tool with Sierpinski/Mandelbrot (10 tasks)
+- `svelte-todo/` - CRUD app with localStorage and Playwright (12 tasks)
+
+### Major Changes
+
+**DOT flowcharts as executable specifications**
+
+Rewrote key skills using DOT/GraphViz flowcharts as the authoritative process definition. Prose becomes supporting content.
+
+**The Description Trap** (documented in `writing-skills`): Discovered that skill descriptions override flowchart content when descriptions contain workflow summaries. Claude follows the short description instead of reading the detailed flowchart. Fix: descriptions must be trigger-only ("Use when X") with no process details.
+
+**Skill priority in using-superpowers**
+
+When multiple skills apply, process skills (brainstorming, debugging) now explicitly come before implementation skills. "Build X" triggers brainstorming first, then domain skills.
+
+**brainstorming trigger strengthened**
+
+Description changed to imperative: "You MUST use this before any creative work—creating features, building components, adding functionality, or modifying behavior."
+
+### Breaking Changes
+
+**Skill consolidation** - Six standalone skills merged:
+- `root-cause-tracing`, `defense-in-depth`, `condition-based-waiting` → bundled in `systematic-debugging/`
+- `testing-skills-with-subagents` → bundled in `writing-skills/`
+- `testing-anti-patterns` → bundled in `test-driven-development/`
+- `sharing-skills` removed (obsolete)
+
+### Other Improvements
+
+- **render-graphs.js** - Tool to extract DOT diagrams from skills and render to SVG
+- **Rationalizations table** in using-superpowers - Scannable format including new entries: "I need more context first", "Let me explore first", "This feels productive"
+- **docs/testing.md** - Guide to testing skills with Claude Code integration tests
+
+---
+
+## v3.6.2 (2025-12-03)
+
+### Fixed
+
+- **Linux Compatibility**: Fixed polyglot hook wrapper (`run-hook.cmd`) to use POSIX-compliant syntax
+  - Replaced bash-specific `${BASH_SOURCE[0]:-$0}` with standard `$0` on line 16
+  - Resolves "Bad substitution" error on Ubuntu/Debian systems where `/bin/sh` is dash
+  - Fixes #141
+
+---
+
 ## v3.5.1 (2025-11-24)

 ### Changed
@@ -87,9 +232,9 @@
 - Updated terminology: "Superpowers skills" instead of "Core skills"

 ### Files Added
- `codex/INSTALL.md` - Installation guide for Codex users
- `codex/superpowers-bootstrap.md` - Bootstrap instructions with Codex adaptations
- `scripts/superpowers-codex` - Unified Node.js executable with all functionality
+- `.codex/INSTALL.md` - Installation guide for Codex users
+- `.codex/superpowers-bootstrap.md` - Bootstrap instructions with Codex adaptations
+- `.codex/superpowers-codex` - Unified Node.js executable with all functionality

 **Note:** Codex support is experimental. The integration provides core superpowers functionality but may require refinement based on user feedback.

--- a/agents/code-reviewer.md
+++ b/agents/code-reviewer.md
@@ -2,7 +2,7 @@
 name: code-reviewer
 description: |
  Use this agent when a major project step has been completed and needs to be reviewed against the original plan and coding standards. Examples: <example>Context: The user is creating a code-review agent that should be called after a logical chunk of code is written. user: "I've finished implementing the user authentication system as outlined in step 3 of our plan" assistant: "Great work! Now let me use the code-reviewer agent to review the implementation against our plan and coding standards" <commentary>Since a major project step has been completed, use the code-reviewer agent to validate the work against the plan and identify any issues.</commentary></example> <example>Context: User has completed a significant feature implementation. user: "The API endpoints for the task management system are now complete - that covers step 2 from our architecture document" assistant: "Excellent! Let me have the code-reviewer agent examine this implementation to ensure it aligns with our plan and follows best practices" <commentary>A numbered step from the planning document has been completed, so the code-reviewer agent should review the work.</commentary></example>
-model: sonnet
+model: inherit
 ---

 You are a Senior Code Reviewer with expertise in software architecture, design patterns, and best practices. Your role is to review completed project steps against original plans and ensure code quality standards are met.
--- a/commands/brainstorm.md
+++ b/commands/brainstorm.md
@@ -1,5 +1,6 @@
 ---
-description: Interactive design refinement using Socratic method
+description: "You MUST use this before any creative work - creating features, building components, adding functionality, or modifying behavior. Explores requirements and design before implementation."
+disable-model-invocation: true
 ---

-Use and follow the brainstorming skill exactly as written
+Invoke the superpowers:brainstorming skill and follow it exactly as presented to you
--- a/commands/execute-plan.md
+++ b/commands/execute-plan.md
@@ -1,5 +1,6 @@
 ---
 description: Execute plan in batches with review checkpoints
+disable-model-invocation: true
 ---

-Use the executing-plans skill exactly as written
+Invoke the superpowers:executing-plans skill and follow it exactly as presented to you
--- a/commands/write-plan.md
+++ b/commands/write-plan.md
@@ -1,5 +1,6 @@
 ---
 description: Create detailed implementation plan with bite-sized tasks
+disable-model-invocation: true
 ---

-Use the writing-plans skill exactly as written
+Invoke the superpowers:writing-plans skill and follow it exactly as presented to you
--- a/docs/windows/polyglot-hooks.md
+++ b/docs/windows/polyglot-hooks.md
@@ -0,0 +1,212 @@
+# Cross-Platform Polyglot Hooks for Claude Code
+
+Claude Code plugins need hooks that work on Windows, macOS, and Linux. This document explains the polyglot wrapper technique that makes this possible.
+
+## The Problem
+
+Claude Code runs hook commands through the system's default shell:
+- **Windows**: CMD.exe
+- **macOS/Linux**: bash or sh
+
+This creates several challenges:
+
+1. **Script execution**: Windows CMD can't execute `.sh` files directly - it tries to open them in a text editor
+2. **Path format**: Windows uses backslashes (`C:\path`), Unix uses forward slashes (`/path`)
+3. **Environment variables**: `$VAR` syntax doesn't work in CMD
+4. **No `bash` in PATH**: Even with Git Bash installed, `bash` isn't in the PATH when CMD runs
+
+## The Solution: Polyglot `.cmd` Wrapper
+
+A polyglot script is valid syntax in multiple languages simultaneously. Our wrapper is valid in both CMD and bash:
+
+```cmd
+: << 'CMDBLOCK'
+@echo off
+"C:\Program Files\Git\bin\bash.exe" -l -c "\"$(cygpath -u \"$CLAUDE_PLUGIN_ROOT\")/hooks/session-start.sh\""
+exit /b
+CMDBLOCK
+
+# Unix shell runs from here
+"${CLAUDE_PLUGIN_ROOT}/hooks/session-start.sh"
+```
+
+### How It Works
+
+#### On Windows (CMD.exe)
+
+1. `: << 'CMDBLOCK'` - CMD sees `:` as a label (like `:label`) and ignores `<< 'CMDBLOCK'`
+2. `@echo off` - Suppresses command echoing
+3. The bash.exe command runs with:
+   - `-l` (login shell) to get proper PATH with Unix utilities
+   - `cygpath -u` converts Windows path to Unix format (`C:\foo` → `/c/foo`)
+4. `exit /b` - Exits the batch script, stopping CMD here
+5. Everything after `CMDBLOCK` is never reached by CMD
+
+#### On Unix (bash/sh)
+
+1. `: << 'CMDBLOCK'` - `:` is a no-op, `<< 'CMDBLOCK'` starts a heredoc
+2. Everything until `CMDBLOCK` is consumed by the heredoc (ignored)
+3. `# Unix shell runs from here` - Comment
+4. The script runs directly with the Unix path
+
+## File Structure
+
+```
+hooks/
+├── hooks.json           # Points to the .cmd wrapper
+├── session-start.cmd    # Polyglot wrapper (cross-platform entry point)
+└── session-start.sh     # Actual hook logic (bash script)
+```
+
+### hooks.json
+
+```json
+{
+  "hooks": {
+    "SessionStart": [
+      {
+        "matcher": "startup|resume|clear|compact",
+        "hooks": [
+          {
+            "type": "command",
+            "command": "\"${CLAUDE_PLUGIN_ROOT}/hooks/session-start.cmd\""
+          }
+        ]
+      }
+    ]
+  }
+}
+```
+
+Note: The path must be quoted because `${CLAUDE_PLUGIN_ROOT}` may contain spaces on Windows (e.g., `C:\Program Files\...`).
+
+## Requirements
+
+### Windows
+- **Git for Windows** must be installed (provides `bash.exe` and `cygpath`)
+- Default installation path: `C:\Program Files\Git\bin\bash.exe`
+- If Git is installed elsewhere, the wrapper needs modification
+
+### Unix (macOS/Linux)
+- Standard bash or sh shell
+- The `.cmd` file must have execute permission (`chmod +x`)
+
+## Writing Cross-Platform Hook Scripts
+
+Your actual hook logic goes in the `.sh` file. To ensure it works on Windows (via Git Bash):
+
+### Do:
+- Use pure bash builtins when possible
+- Use `$(command)` instead of backticks
+- Quote all variable expansions: `"$VAR"`
+- Use `printf` or here-docs for output
+
+### Avoid:
+- External commands that may not be in PATH (sed, awk, grep)
+- If you must use them, they're available in Git Bash but ensure PATH is set up (use `bash -l`)
+
+### Example: JSON Escaping Without sed/awk
+
+Instead of:
+```bash
+escaped=$(echo "$content" | sed 's/\\/\\\\/g' | sed 's/"/\\"/g' | awk '{printf "%s\\n", $0}')
+```
+
+Use pure bash:
+```bash
+escape_for_json() {
+    local input="$1"
+    local output=""
+    local i char
+    for (( i=0; i<${#input}; i++ )); do
+        char="${input:$i:1}"
+        case "$char" in
+            $'\\') output+='\\' ;;
+            '"') output+='\"' ;;
+            $'\n') output+='\n' ;;
+            $'\r') output+='\r' ;;
+            $'\t') output+='\t' ;;
+            *) output+="$char" ;;
+        esac
+    done
+    printf '%s' "$output"
+}
+```
+
+## Reusable Wrapper Pattern
+
+For plugins with multiple hooks, you can create a generic wrapper that takes the script name as an argument:
+
+### run-hook.cmd
+```cmd
+: << 'CMDBLOCK'
+@echo off
+set "SCRIPT_DIR=%~dp0"
+set "SCRIPT_NAME=%~1"
+"C:\Program Files\Git\bin\bash.exe" -l -c "cd \"$(cygpath -u \"%SCRIPT_DIR%\")\" && \"./%SCRIPT_NAME%\""
+exit /b
+CMDBLOCK
+
+# Unix shell runs from here
+SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]:-$0}")" && pwd)"
+SCRIPT_NAME="$1"
+shift
+"${SCRIPT_DIR}/${SCRIPT_NAME}" "$@"
+```
+
+### hooks.json using the reusable wrapper
+```json
+{
+  "hooks": {
+    "SessionStart": [
+      {
+        "matcher": "startup",
+        "hooks": [
+          {
+            "type": "command",
+            "command": "\"${CLAUDE_PLUGIN_ROOT}/hooks/run-hook.cmd\" session-start.sh"
+          }
+        ]
+      }
+    ],
+    "PreToolUse": [
+      {
+        "matcher": "Bash",
+        "hooks": [
+          {
+            "type": "command",
+            "command": "\"${CLAUDE_PLUGIN_ROOT}/hooks/run-hook.cmd\" validate-bash.sh"
+          }
+        ]
+      }
+    ]
+  }
+}
+```
+
+## Troubleshooting
+
+### "bash is not recognized"
+CMD can't find bash. The wrapper uses the full path `C:\Program Files\Git\bin\bash.exe`. If Git is installed elsewhere, update the path.
+
+### "cygpath: command not found" or "dirname: command not found"
+Bash isn't running as a login shell. Ensure `-l` flag is used.
+
+### Path has weird `\/` in it
+`${CLAUDE_PLUGIN_ROOT}` expanded to a Windows path ending with backslash, then `/hooks/...` was appended. Use `cygpath` to convert the entire path.
+
+### Script opens in text editor instead of running
+The hooks.json is pointing directly to the `.sh` file. Point to the `.cmd` wrapper instead.
+
+### Works in terminal but not as hook
+Claude Code may run hooks differently. Test by simulating the hook environment:
+```powershell
+$env:CLAUDE_PLUGIN_ROOT = "C:\path\to\plugin"
+cmd /c "C:\path\to\plugin\hooks\session-start.cmd"
+```
+
+## Related Issues
+
+- [anthropics/claude-code#9758](https://github.com/anthropics/claude-code/issues/9758) - .sh scripts open in editor on Windows
+- [anthropics/claude-code#3417](https://github.com/anthropics/claude-code/issues/3417) - Hooks don't work on Windows
+- [anthropics/claude-code#6023](https://github.com/anthropics/claude-code/issues/6023) - CLAUDE_PROJECT_DIR not found
--- a/hooks/hooks.json
+++ b/hooks/hooks.json
@@ -6,7 +6,7 @@
        "hooks": [
          {
            "type": "command",
-            "command": "${CLAUDE_PLUGIN_ROOT}/hooks/session-start.sh"
+            "command": "\"${CLAUDE_PLUGIN_ROOT}/hooks/run-hook.cmd\" session-start.sh"
          }
        ]
      }
--- a/hooks/run-hook.cmd
+++ b/hooks/run-hook.cmd
@@ -0,0 +1,19 @@
+: << 'CMDBLOCK'
+@echo off
+REM Polyglot wrapper: runs .sh scripts cross-platform
+REM Usage: run-hook.cmd <script-name> [args...]
+REM The script should be in the same directory as this wrapper
+
+if "%~1"=="" (
+    echo run-hook.cmd: missing script name >&2
+    exit /b 1
+)
+"C:\Program Files\Git\bin\bash.exe" -l "%~dp0%~1" %2 %3 %4 %5 %6 %7 %8 %9
+exit /b
+CMDBLOCK
+
+# Unix shell runs from here
+SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
+SCRIPT_NAME="$1"
+shift
+"${SCRIPT_DIR}/${SCRIPT_NAME}" "$@"
--- a/hooks/session-start.sh
+++ b/hooks/session-start.sh
@@ -17,9 +17,27 @@ fi
 # Read using-superpowers content
 using_superpowers_content=$(cat "${PLUGIN_ROOT}/skills/using-superpowers/SKILL.md" 2>&1 || echo "Error reading using-superpowers skill")

-# Escape outputs for JSON
-using_superpowers_escaped=$(echo "$using_superpowers_content" | sed 's/\\/\\\\/g' | sed 's/"/\\"/g' | awk '{printf "%s\\n", $0}')
-warning_escaped=$(echo "$warning_message" | sed 's/\\/\\\\/g' | sed 's/"/\\"/g' | awk '{printf "%s\\n", $0}')
+# Escape outputs for JSON using pure bash
+escape_for_json() {
+    local input="$1"
+    local output=""
+    local i char
+    for (( i=0; i<${#input}; i++ )); do
+        char="${input:$i:1}"
+        case "$char" in
+            $'\\') output+='\\' ;;
+            '"') output+='\"' ;;
+            $'\n') output+='\n' ;;
+            $'\r') output+='\r' ;;
+            $'\t') output+='\t' ;;
+            *) output+="$char" ;;
+        esac
+    done
+    printf '%s' "$output"
+}
+
+using_superpowers_escaped=$(escape_for_json "$using_superpowers_content")
+warning_escaped=$(escape_for_json "$warning_message")

 # Output context injection as JSON
 cat <<EOF
--- a/skills/brainstorming/SKILL.md
+++ b/skills/brainstorming/SKILL.md
@@ -1,6 +1,6 @@
 ---
 name: brainstorming
-description: Use when creating or developing, before writing code or implementation plans - refines rough ideas into fully-formed designs through collaborative questioning, alternative exploration, and incremental validation. Don't use during clear 'mechanical' processes
+description: "You MUST use this before any creative work - creating features, building components, adding functionality, or modifying behavior. Explores user intent, requirements and design before implementation."
 ---

 # Brainstorming Ideas Into Designs
--- a/skills/dispatching-parallel-agents/SKILL.md
+++ b/skills/dispatching-parallel-agents/SKILL.md
@@ -1,6 +1,6 @@
 ---
 name: dispatching-parallel-agents
-description: Use when facing 3+ independent failures that can be investigated without shared state or dependencies - dispatches multiple Claude agents to investigate and fix independent problems concurrently
+description: Use when facing 2+ independent tasks that can be worked on without shared state or sequential dependencies
 ---

 # Dispatching Parallel Agents
--- a/skills/executing-plans/SKILL.md
+++ b/skills/executing-plans/SKILL.md
@@ -1,6 +1,6 @@
 ---
 name: executing-plans
-description: Use when partner provides a complete implementation plan to execute in controlled batches with review checkpoints - loads plan, reviews critically, executes tasks in batches, reports for review between batches
+description: Use when you have a written implementation plan to execute in a separate session with review checkpoints
 ---

 # Executing Plans
--- a/skills/receiving-code-review/SKILL.md
+++ b/skills/receiving-code-review/SKILL.md
@@ -200,6 +200,10 @@ You understand 1,2,3,6. Unclear on 4,5.
 ✅ "Understand 1,2,3,6. Need clarification on 4 and 5 before implementing."
 ```

+## GitHub Thread Replies
+
+When replying to inline review comments on GitHub, reply in the comment thread (`gh api repos/{owner}/{repo}/pulls/{pr}/comments/{id}/replies`), not as a top-level PR comment.
+
 ## The Bottom Line

 **External feedback = suggestions to evaluate, not orders to follow.**
--- a/skills/requesting-code-review/SKILL.md
+++ b/skills/requesting-code-review/SKILL.md
@@ -1,6 +1,6 @@
 ---
 name: requesting-code-review
-description: Use when completing tasks, implementing major features, or before merging to verify work meets requirements - dispatches superpowers:code-reviewer subagent to review implementation against plan or requirements before proceeding
+description: Use when completing tasks, implementing major features, or before merging to verify work meets requirements
 ---

 # Requesting Code Review
--- a/skills/sharing-skills/SKILL.md
+++ b/skills/sharing-skills/SKILL.md
@@ -1,194 +0,0 @@
---
-name: sharing-skills
-description: Use when you've developed a broadly useful skill and want to contribute it upstream via pull request - guides process of branching, committing, pushing, and creating PR to contribute skills back to upstream repository
---
-
-# Sharing Skills
-
-## Overview
-
-Contribute skills from your local branch back to the upstream repository.
-
-**Workflow:** Branch → Edit/Create skill → Commit → Push → PR
-
-## When to Share
-
-**Share when:**
- Skill applies broadly (not project-specific)
- Pattern/technique others would benefit from
- Well-tested and documented
- Follows writing-skills guidelines
-
-**Keep personal when:**
- Project-specific or organization-specific
- Experimental or unstable
- Contains sensitive information
- Too narrow/niche for general use
-
-## Prerequisites
-
- `gh` CLI installed and authenticated
- Working directory is `~/.config/superpowers/skills/` (your local clone)
- **REQUIRED:** Skill has been tested using writing-skills TDD process
-
-## Sharing Workflow
-
-### 1. Ensure You're on Main and Synced
-
-```bash
-cd ~/.config/superpowers/skills/
-git checkout main
-git pull upstream main
-git push origin main  # Push to your fork
-```
-
-### 2. Create Feature Branch
-
-```bash
-# Branch name: add-skillname-skill
-skill_name="your-skill-name"
-git checkout -b "add-${skill_name}-skill"
-```
-
-### 3. Create or Edit Skill
-
-```bash
-# Work on your skill in skills/
-# Create new skill or edit existing one
-# Skill should be in skills/category/skill-name/SKILL.md
-```
-
-### 4. Commit Changes
-
-```bash
-# Add and commit
-git add skills/your-skill-name/
-git commit -m "Add ${skill_name} skill
-
-$(cat <<'EOF'
-Brief description of what this skill does and why it's useful.
-
-Tested with: [describe testing approach]
-EOF
-)"
-```
-
-### 5. Push to Your Fork
-
-```bash
-git push -u origin "add-${skill_name}-skill"
-```
-
-### 6. Create Pull Request
-
-```bash
-# Create PR to upstream using gh CLI
-gh pr create \
-  --repo upstream-org/upstream-repo \
-  --title "Add ${skill_name} skill" \
-  --body "$(cat <<'EOF'
-## Summary
-Brief description of the skill and what problem it solves.
-
-## Testing
-Describe how you tested this skill (pressure scenarios, baseline tests, etc.).
-
-## Context
-Any additional context about why this skill is needed and how it should be used.
-EOF
-)"
-```
-
-## Complete Example
-
-Here's a complete example of sharing a skill called "async-patterns":
-
-```bash
-# 1. Sync with upstream
-cd ~/.config/superpowers/skills/
-git checkout main
-git pull upstream main
-git push origin main
-
-# 2. Create branch
-git checkout -b "add-async-patterns-skill"
-
-# 3. Create/edit the skill
-# (Work on skills/async-patterns/SKILL.md)
-
-# 4. Commit
-git add skills/async-patterns/
-git commit -m "Add async-patterns skill
-
-Patterns for handling asynchronous operations in tests and application code.
-
-Tested with: Multiple pressure scenarios testing agent compliance."
-
-# 5. Push
-git push -u origin "add-async-patterns-skill"
-
-# 6. Create PR
-gh pr create \
-  --repo upstream-org/upstream-repo \
-  --title "Add async-patterns skill" \
-  --body "## Summary
-Patterns for handling asynchronous operations correctly in tests and application code.
-
-## Testing
-Tested with multiple application scenarios. Agents successfully apply patterns to new code.
-
-## Context
-Addresses common async pitfalls like race conditions, improper error handling, and timing issues."
-```
-
-## After PR is Merged
-
-Once your PR is merged:
-
-1. Sync your local main branch:
-```bash
-cd ~/.config/superpowers/skills/
-git checkout main
-git pull upstream main
-git push origin main
-```
-
-2. Delete the feature branch:
-```bash
-git branch -d "add-${skill_name}-skill"
-git push origin --delete "add-${skill_name}-skill"
-```
-
-## Troubleshooting
-
-**"gh: command not found"**
- Install GitHub CLI: https://cli.github.com/
- Authenticate: `gh auth login`
-
-**"Permission denied (publickey)"**
- Check SSH keys: `gh auth status`
- Set up SSH: https://docs.github.com/en/authentication
-
-**"Skill already exists"**
- You're creating a modified version
- Consider different skill name or coordinate with the skill's maintainer
-
-**PR merge conflicts**
- Rebase on latest upstream: `git fetch upstream && git rebase upstream/main`
- Resolve conflicts
- Force push: `git push -f origin your-branch`
-
-## Multi-Skill Contributions
-
-**Do NOT batch multiple skills in one PR.**
-
-Each skill should:
- Have its own feature branch
- Have its own PR
- Be independently reviewable
-
-**Why?** Individual skills can be reviewed, iterated, and merged independently.
-
-## Related Skills
-
- **writing-skills** - REQUIRED: How to create well-tested skills before sharing
--- a/skills/subagent-driven-development/SKILL.md
+++ b/skills/subagent-driven-development/SKILL.md
@@ -1,256 +1,92 @@
 ---
 name: subagent-driven-development
-description: Use when executing implementation plans with independent tasks in the current session - dispatches fresh subagent for each task with code review between tasks, enabling fast iteration with quality gates
+description: Use when executing implementation plans with independent tasks in the current session
 ---

 # Subagent-Driven Development

-Execute plan by dispatching fresh subagent per task, with code review after each.
+Execute plan by dispatching fresh subagent per task, with two-stage review after each: spec compliance review first, then code quality review.

-**Core principle:** Fresh subagent per task + review between tasks = high quality, fast iteration
+**Core principle:** Fresh subagent per task + two-stage review (spec then quality) = high quality, fast iteration

-## Overview
+## When to Use
+
+```dot
+digraph when_to_use {
+    "Have implementation plan?" [shape=diamond];
+    "Tasks mostly independent?" [shape=diamond];
+    "Stay in this session?" [shape=diamond];
+    "subagent-driven-development" [shape=box];
+    "executing-plans" [shape=box];
+    "Manual execution or brainstorm first" [shape=box];
+
+    "Have implementation plan?" -> "Tasks mostly independent?" [label="yes"];
+    "Have implementation plan?" -> "Manual execution or brainstorm first" [label="no"];
+    "Tasks mostly independent?" -> "Stay in this session?" [label="yes"];
+    "Tasks mostly independent?" -> "Manual execution or brainstorm first" [label="no - tightly coupled"];
+    "Stay in this session?" -> "subagent-driven-development" [label="yes"];
+    "Stay in this session?" -> "executing-plans" [label="no - parallel session"];
+}
+```

 **vs. Executing Plans (parallel session):**
 - Same session (no context switch)
 - Fresh subagent per task (no context pollution)
- Code review after each task (catch issues early)
+- Two-stage review after each task: spec compliance first, then code quality
 - Faster iteration (no human-in-loop between tasks)

-**When to use:**
- Staying in this session
- Tasks are mostly independent
- Want continuous progress with quality gates
-
-**When NOT to use:**
- Need to review plan first (use executing-plans)
- Tasks are tightly coupled (manual execution better)
- Plan needs revision (brainstorm first)
-
 ## The Process

-### 1. Load Plan
+```dot
+digraph process {
+    rankdir=TB;

-1. Read plan file once
-2. Extract all tasks (full text of each)
-3. For each task, note scene-setting context:
-   - Where it fits in overall plan
-   - Dependencies on previous tasks
-   - Architectural context
-   - Relevant patterns or existing code to follow
-4. Create TodoWrite with all tasks
+    subgraph cluster_per_task {
+        label="Per Task";
+        "Dispatch implementer subagent (./implementer-prompt.md)" [shape=box];
+        "Implementer subagent asks questions?" [shape=diamond];
+        "Answer questions, provide context" [shape=box];
+        "Implementer subagent implements, tests, commits, self-reviews" [shape=box];
+        "Dispatch spec reviewer subagent (./spec-reviewer-prompt.md)" [shape=box];
+        "Spec reviewer subagent confirms code matches spec?" [shape=diamond];
+        "Implementer subagent fixes spec gaps" [shape=box];
+        "Dispatch code quality reviewer subagent (./code-quality-reviewer-prompt.md)" [shape=box];
+        "Code quality reviewer subagent approves?" [shape=diamond];
+        "Implementer subagent fixes quality issues" [shape=box];
+        "Mark task complete in TodoWrite" [shape=box];
+    }

-### 2. Execute Task with Subagent
+    "Read plan, extract all tasks with full text, note context, create TodoWrite" [shape=box];
+    "More tasks remain?" [shape=diamond];
+    "Dispatch final code reviewer subagent for entire implementation" [shape=box];
+    "Use superpowers:finishing-a-development-branch" [shape=box style=filled fillcolor=lightgreen];

-For each task:
-
-**1. Prepare task context:**
- Get the full text of Task N (already extracted in Step 1)
- Get the scene-setting context (already noted in Step 1)
-
-**2. Dispatch fresh subagent with full task text:**
-```
-Task tool (general-purpose):
-  description: "Implement Task N: [task name]"
-  prompt: |
-    You are implementing Task N: [task name]
-
-    ## Task Description
-
-    [FULL TEXT of task from plan - paste it here, don't make subagent read file]
-
-    ## Context
-
-    [Scene-setting: where this fits, dependencies, architectural context]
-
-    ## Before You Begin
-
-    If you have questions about:
-    - The requirements or acceptance criteria
-    - The approach or implementation strategy
-    - Dependencies or assumptions
-    - Anything unclear in the task description
-
-    **Ask them now.** Raise any concerns before starting work.
-
-    ## Your Job
-
-    Once you're clear on requirements:
-    1. Implement exactly what the task specifies
-    2. Write tests (following TDD if task says to)
-    3. Verify implementation works
-    4. Commit your work
-    5. Self-review (see below)
-    6. Report back
-
-    Work from: [directory]
-
-    **While you work:** If you encounter something unexpected or unclear, **ask questions**.
-    It's always OK to pause and clarify. Don't guess or make assumptions.
-
-    ## Before Reporting Back: Self-Review
-
-    Review your work with fresh eyes. Ask yourself:
-
-    **Completeness:**
-    - Did I fully implement everything in the spec?
-    - Did I miss any requirements?
-    - Are there edge cases I didn't handle?
-
-    **Quality:**
-    - Is this my best work?
-    - Are names clear and accurate (match what things do, not how they work)?
-    - Is the code clean and maintainable?
-
-    **Discipline:**
-    - Did I avoid overbuilding (YAGNI)?
-    - Did I only build what was requested?
-    - Did I follow existing patterns in the codebase?
-
-    **Testing:**
-    - Do tests actually verify behavior (not just mock behavior)?
-    - Did I follow TDD if required?
-    - Are tests comprehensive?
-
-    If you find issues during self-review, fix them now before reporting.
-
-    ## Report Format
-
-    When done, report:
-    - What you implemented
-    - What you tested and test results
-    - Files changed
-    - Self-review findings (if any)
-    - Any issues or concerns
+    "Read plan, extract all tasks with full text, note context, create TodoWrite" -> "Dispatch implementer subagent (./implementer-prompt.md)";
+    "Dispatch implementer subagent (./implementer-prompt.md)" -> "Implementer subagent asks questions?";
+    "Implementer subagent asks questions?" -> "Answer questions, provide context" [label="yes"];
+    "Answer questions, provide context" -> "Dispatch implementer subagent (./implementer-prompt.md)";
+    "Implementer subagent asks questions?" -> "Implementer subagent implements, tests, commits, self-reviews" [label="no"];
+    "Implementer subagent implements, tests, commits, self-reviews" -> "Dispatch spec reviewer subagent (./spec-reviewer-prompt.md)";
+    "Dispatch spec reviewer subagent (./spec-reviewer-prompt.md)" -> "Spec reviewer subagent confirms code matches spec?";
+    "Spec reviewer subagent confirms code matches spec?" -> "Implementer subagent fixes spec gaps" [label="no"];
+    "Implementer subagent fixes spec gaps" -> "Dispatch spec reviewer subagent (./spec-reviewer-prompt.md)" [label="re-review"];
+    "Spec reviewer subagent confirms code matches spec?" -> "Dispatch code quality reviewer subagent (./code-quality-reviewer-prompt.md)" [label="yes"];
+    "Dispatch code quality reviewer subagent (./code-quality-reviewer-prompt.md)" -> "Code quality reviewer subagent approves?";
+    "Code quality reviewer subagent approves?" -> "Implementer subagent fixes quality issues" [label="no"];
+    "Implementer subagent fixes quality issues" -> "Dispatch code quality reviewer subagent (./code-quality-reviewer-prompt.md)" [label="re-review"];
+    "Code quality reviewer subagent approves?" -> "Mark task complete in TodoWrite" [label="yes"];
+    "Mark task complete in TodoWrite" -> "More tasks remain?";
+    "More tasks remain?" -> "Dispatch implementer subagent (./implementer-prompt.md)" [label="yes"];
+    "More tasks remain?" -> "Dispatch final code reviewer subagent for entire implementation" [label="no"];
+    "Dispatch final code reviewer subagent for entire implementation" -> "Use superpowers:finishing-a-development-branch";
+}
 ```

-**3. Handle subagent response:**
+## Prompt Templates

-If subagent asks questions:
- Answer clearly
- Provide additional context if needed
- Either continue conversation or re-dispatch with answers
-
-If subagent proceeds with implementation:
- Review their report
- Proceed to spec compliance review (Step 3)
-
-### 3. Spec Compliance Review
-
-**Purpose:** Verify implementer built what was requested (nothing more, nothing less)
-
-**Dispatch spec compliance reviewer:**
-```
-Task tool (general-purpose):
-  description: "Review spec compliance for Task N"
-  prompt: |
-    You are reviewing whether an implementation matches its specification.
-
-    ## What Was Requested
-
-    [FULL TEXT of task requirements]
-
-    ## What Implementer Claims They Built
-
-    [From implementer's report]
-
-    ## CRITICAL: Do Not Trust the Report
-
-    The implementer finished suspiciously quickly. Their report may be incomplete,
-    inaccurate, or optimistic. You MUST verify everything independently.
-
-    **DO NOT:**
-    - Take their word for what they implemented
-    - Trust their claims about completeness
-    - Accept their interpretation of requirements
-
-    **DO:**
-    - Read the actual code they wrote
-    - Compare actual implementation to requirements line by line
-    - Check for missing pieces they claimed to implement
-    - Look for extra features they didn't mention
-
-    ## Your Job
-
-    Read the implementation code and verify:
-
-    **Missing requirements:**
-    - Did they implement everything that was requested?
-    - Are there requirements they skipped or missed?
-    - Did they claim something works but didn't actually implement it?
-
-    **Extra/unneeded work:**
-    - Did they build things that weren't requested?
-    - Did they over-engineer or add unnecessary features?
-    - Did they add "nice to haves" that weren't in spec?
-
-    **Misunderstandings:**
-    - Did they interpret requirements differently than intended?
-    - Did they solve the wrong problem?
-    - Did they implement the right feature but wrong way?
-
-    **Verify by reading code, not by trusting report.**
-
-    Report:
-    - ✅ Spec compliant (if everything matches after code inspection)
-    - ❌ Issues found: [list specifically what's missing or extra, with file:line references]
-```
-
-**Review loop (must complete before Step 4):**
-1. Spec reviewer reports findings
-2. If issues found:
-   - Original implementer fixes issues
-   - Spec reviewer reviews again
-3. Repeat until spec compliant
-
-**Do NOT proceed to code quality review until spec compliance is ✅**
-
-### 4. Code Quality Review
-
-**Purpose:** Verify implementation is well-built (clean, tested, maintainable)
-
-**Only run after spec compliance review is complete.**
-
-**Dispatch code-reviewer subagent:**
-```
-Task tool (superpowers:code-reviewer):
-  Use template at requesting-code-review/code-reviewer.md
-
-  WHAT_WAS_IMPLEMENTED: [from implementer's report]
-  PLAN_OR_REQUIREMENTS: Task N from [plan-file]
-  BASE_SHA: [commit before task]
-  HEAD_SHA: [current commit]
-  DESCRIPTION: [task summary]
-```
-
-**Code reviewer returns:** Strengths, Issues (Critical/Important/Minor), Assessment
-
-**Review loop:**
-1. Code reviewer reports findings
-2. If issues found:
-   - Original implementer fixes issues
-   - Code reviewer reviews again
-3. Repeat until code quality approved
-
-### 5. Mark Complete, Next Task
-
- Mark task as completed in TodoWrite
- Move to next task
- Repeat steps 2-5 for each remaining task
-
-### 6. Final Review
-
-After all tasks complete, dispatch final code-reviewer:
- Reviews entire implementation
- Checks all plan requirements met
- Validates overall architecture
-
-### 7. Complete Development
-
-After final review passes:
- Announce: "I'm using the finishing-a-development-branch skill to complete this work."
- **REQUIRED SUB-SKILL:** Use superpowers:finishing-a-development-branch
- Follow that skill to verify tests, present options, execute choice
+- `./implementer-prompt.md` - Dispatch implementer subagent
+- `./spec-reviewer-prompt.md` - Dispatch spec compliance reviewer subagent
+- `./code-quality-reviewer-prompt.md` - Dispatch code quality reviewer subagent

 ## Example Workflow

@@ -393,14 +229,12 @@ Done!
 ## Integration

 **Required workflow skills:**
- **writing-plans** - REQUIRED: Creates the plan that this skill executes
- **requesting-code-review** - REQUIRED: Review after each task (see Step 3)
- **finishing-a-development-branch** - REQUIRED: Complete development after all tasks (see Step 7)
+- **superpowers:writing-plans** - Creates the plan this skill executes
+- **superpowers:requesting-code-review** - Code review template for reviewer subagents
+- **superpowers:finishing-a-development-branch** - Complete development after all tasks

-**Subagents must use:**
- **test-driven-development** - Subagents follow TDD for each task
+**Subagents should use:**
+- **superpowers:test-driven-development** - Subagents follow TDD for each task

 **Alternative workflow:**
- **executing-plans** - Use for parallel session instead of same-session execution
-
-See code-reviewer template: requesting-code-review/code-reviewer.md
+- **superpowers:executing-plans** - Use for parallel session instead of same-session execution
--- a/skills/subagent-driven-development/code-quality-reviewer-prompt.md
+++ b/skills/subagent-driven-development/code-quality-reviewer-prompt.md
@@ -0,0 +1,20 @@
+# Code Quality Reviewer Prompt Template
+
+Use this template when dispatching a code quality reviewer subagent.
+
+**Purpose:** Verify implementation is well-built (clean, tested, maintainable)
+
+**Only dispatch after spec compliance review passes.**
+
+```
+Task tool (superpowers:code-reviewer):
+  Use template at requesting-code-review/code-reviewer.md
+
+  WHAT_WAS_IMPLEMENTED: [from implementer's report]
+  PLAN_OR_REQUIREMENTS: Task N from [plan-file]
+  BASE_SHA: [commit before task]
+  HEAD_SHA: [current commit]
+  DESCRIPTION: [task summary]
+```
+
+**Code reviewer returns:** Strengths, Issues (Critical/Important/Minor), Assessment
--- a/skills/subagent-driven-development/implementer-prompt.md
+++ b/skills/subagent-driven-development/implementer-prompt.md
@@ -0,0 +1,78 @@
+# Implementer Subagent Prompt Template
+
+Use this template when dispatching an implementer subagent.
+
+```
+Task tool (general-purpose):
+  description: "Implement Task N: [task name]"
+  prompt: |
+    You are implementing Task N: [task name]
+
+    ## Task Description
+
+    [FULL TEXT of task from plan - paste it here, don't make subagent read file]
+
+    ## Context
+
+    [Scene-setting: where this fits, dependencies, architectural context]
+
+    ## Before You Begin
+
+    If you have questions about:
+    - The requirements or acceptance criteria
+    - The approach or implementation strategy
+    - Dependencies or assumptions
+    - Anything unclear in the task description
+
+    **Ask them now.** Raise any concerns before starting work.
+
+    ## Your Job
+
+    Once you're clear on requirements:
+    1. Implement exactly what the task specifies
+    2. Write tests (following TDD if task says to)
+    3. Verify implementation works
+    4. Commit your work
+    5. Self-review (see below)
+    6. Report back
+
+    Work from: [directory]
+
+    **While you work:** If you encounter something unexpected or unclear, **ask questions**.
+    It's always OK to pause and clarify. Don't guess or make assumptions.
+
+    ## Before Reporting Back: Self-Review
+
+    Review your work with fresh eyes. Ask yourself:
+
+    **Completeness:**
+    - Did I fully implement everything in the spec?
+    - Did I miss any requirements?
+    - Are there edge cases I didn't handle?
+
+    **Quality:**
+    - Is this my best work?
+    - Are names clear and accurate (match what things do, not how they work)?
+    - Is the code clean and maintainable?
+
+    **Discipline:**
+    - Did I avoid overbuilding (YAGNI)?
+    - Did I only build what was requested?
+    - Did I follow existing patterns in the codebase?
+
+    **Testing:**
+    - Do tests actually verify behavior (not just mock behavior)?
+    - Did I follow TDD if required?
+    - Are tests comprehensive?
+
+    If you find issues during self-review, fix them now before reporting.
+
+    ## Report Format
+
+    When done, report:
+    - What you implemented
+    - What you tested and test results
+    - Files changed
+    - Self-review findings (if any)
+    - Any issues or concerns
+```
--- a/skills/subagent-driven-development/spec-reviewer-prompt.md
+++ b/skills/subagent-driven-development/spec-reviewer-prompt.md
@@ -0,0 +1,61 @@
+# Spec Compliance Reviewer Prompt Template
+
+Use this template when dispatching a spec compliance reviewer subagent.
+
+**Purpose:** Verify implementer built what was requested (nothing more, nothing less)
+
+```
+Task tool (general-purpose):
+  description: "Review spec compliance for Task N"
+  prompt: |
+    You are reviewing whether an implementation matches its specification.
+
+    ## What Was Requested
+
+    [FULL TEXT of task requirements]
+
+    ## What Implementer Claims They Built
+
+    [From implementer's report]
+
+    ## CRITICAL: Do Not Trust the Report
+
+    The implementer finished suspiciously quickly. Their report may be incomplete,
+    inaccurate, or optimistic. You MUST verify everything independently.
+
+    **DO NOT:**
+    - Take their word for what they implemented
+    - Trust their claims about completeness
+    - Accept their interpretation of requirements
+
+    **DO:**
+    - Read the actual code they wrote
+    - Compare actual implementation to requirements line by line
+    - Check for missing pieces they claimed to implement
+    - Look for extra features they didn't mention
+
+    ## Your Job
+
+    Read the implementation code and verify:
+
+    **Missing requirements:**
+    - Did they implement everything that was requested?
+    - Are there requirements they skipped or missed?
+    - Did they claim something works but didn't actually implement it?
+
+    **Extra/unneeded work:**
+    - Did they build things that weren't requested?
+    - Did they over-engineer or add unnecessary features?
+    - Did they add "nice to haves" that weren't in spec?
+
+    **Misunderstandings:**
+    - Did they interpret requirements differently than intended?
+    - Did they solve the wrong problem?
+    - Did they implement the right feature but wrong way?
+
+    **Verify by reading code, not by trusting report.**
+
+    Report:
+    - ✅ Spec compliant (if everything matches after code inspection)
+    - ❌ Issues found: [list specifically what's missing or extra, with file:line references]
+```
--- a/skills/systematic-debugging/SKILL.md
+++ b/skills/systematic-debugging/SKILL.md
@@ -1,6 +1,6 @@
 ---
 name: systematic-debugging
-description: Use when encountering any bug, test failure, or unexpected behavior, before proposing fixes - four-phase framework (root cause investigation, pattern analysis, hypothesis testing, implementation) that ensures understanding before attempting solutions
+description: Use when encountering any bug, test failure, or unexpected behavior, before proposing fixes
 ---

 # Systematic Debugging
@@ -111,7 +111,7 @@ You MUST complete each phase before proceeding to the next.

   **WHEN error is deep in call stack:**

-   **REQUIRED SUB-SKILL:** Use superpowers:root-cause-tracing for backward tracing technique
+   See `root-cause-tracing.md` in this directory for the complete backward tracing technique.

   **Quick version:**
   - Where does bad value originate?
@@ -176,7 +176,7 @@ You MUST complete each phase before proceeding to the next.
   - Automated test if possible
   - One-off test script if no framework
   - MUST have before fixing
-   - **REQUIRED SUB-SKILL:** Use superpowers:test-driven-development for writing proper failing tests
+   - Use the `superpowers:test-driven-development` skill for writing proper failing tests

 2. **Implement Single Fix**
   - Address the root cause identified
@@ -275,16 +275,17 @@ If systematic investigation reveals issue is truly environmental, timing-depende

 **But:** 95% of "no root cause" cases are incomplete investigation.

-## Integration with Other Skills
+## Supporting Techniques

-**This skill requires using:**
- **root-cause-tracing** - REQUIRED when error is deep in call stack (see Phase 1, Step 5)
- **test-driven-development** - REQUIRED for creating failing test case (see Phase 4, Step 1)
+These techniques are part of systematic debugging and available in this directory:

-**Complementary skills:**
- **defense-in-depth** - Add validation at multiple layers after finding root cause
- **condition-based-waiting** - Replace arbitrary timeouts identified in Phase 2
- **verification-before-completion** - Verify fix worked before claiming success
+- **`root-cause-tracing.md`** - Trace bugs backward through call stack to find original trigger
+- **`defense-in-depth.md`** - Add validation at multiple layers after finding root cause
+- **`condition-based-waiting.md`** - Replace arbitrary timeouts with condition polling
+
+**Related skills:**
+- **superpowers:test-driven-development** - For creating failing test case (Phase 4, Step 1)
+- **superpowers:verification-before-completion** - Verify fix worked before claiming success

 ## Real-World Impact

--- a/skills/systematic-debugging/condition-based-waiting-example.ts
+++ b/skills/systematic-debugging/condition-based-waiting-example.ts
--- a/skills/systematic-debugging/condition-based-waiting.md
+++ b/skills/systematic-debugging/condition-based-waiting.md
@@ -1,8 +1,3 @@
---
-name: condition-based-waiting
-description: Use when tests have race conditions, timing dependencies, or inconsistent pass/fail behavior - replaces arbitrary timeouts with condition polling to wait for actual state changes, eliminating flaky tests from timing guesses
---
-
 # Condition-Based Waiting

 ## Overview
@@ -84,7 +79,7 @@ async function waitFor<T>(
 }
 ```

-See @example.ts for complete implementation with domain-specific helpers (`waitForEvent`, `waitForEventCount`, `waitForEventMatch`) from actual debugging session.
+See `condition-based-waiting-example.ts` in this directory for complete implementation with domain-specific helpers (`waitForEvent`, `waitForEventCount`, `waitForEventMatch`) from actual debugging session.

 ## Common Mistakes

--- a/skills/systematic-debugging/defense-in-depth.md
+++ b/skills/systematic-debugging/defense-in-depth.md
@@ -1,8 +1,3 @@
---
-name: defense-in-depth
-description: Use when invalid data causes failures deep in execution, requiring validation at multiple system layers - validates at every layer data passes through to make bugs structurally impossible
---
-
 # Defense-in-Depth Validation

 ## Overview
--- a/skills/systematic-debugging/find-polluter.sh
+++ b/skills/systematic-debugging/find-polluter.sh
--- a/skills/systematic-debugging/root-cause-tracing.md
+++ b/skills/systematic-debugging/root-cause-tracing.md
@@ -1,8 +1,3 @@
---
-name: root-cause-tracing
-description: Use when errors occur deep in execution and you need to trace back to find the original trigger - systematically traces bugs backward through call stack, adding instrumentation when needed, to identify source of invalid data or incorrect behavior
---
-
 # Root Cause Tracing

 ## Overview
@@ -103,7 +98,7 @@ npm test 2>&1 | grep 'DEBUG git init'

 If something appears during tests but you don't know which test:

-Use the bisection script: @find-polluter.sh
+Use the bisection script `find-polluter.sh` in this directory:

 ```bash
 ./find-polluter.sh '.git' 'src/**/*.test.ts'
--- a/skills/test-driven-development/SKILL.md
+++ b/skills/test-driven-development/SKILL.md
@@ -1,6 +1,6 @@
 ---
 name: test-driven-development
-description: Use when implementing any feature or bugfix, before writing implementation code - write the test first, watch it fail, write minimal code to pass; ensures tests actually verify behavior by requiring failure first
+description: Use when implementing any feature or bugfix, before writing implementation code
 ---

 # Test-Driven Development (TDD)
@@ -354,6 +354,13 @@ Bug found? Write failing test reproducing it. Follow TDD cycle. Test proves fix

 Never fix bugs without a test.

+## Testing Anti-Patterns
+
+When adding mocks or test utilities, read @testing-anti-patterns.md to avoid common pitfalls:
+- Testing mock behavior instead of real behavior
+- Adding test-only methods to production classes
+- Mocking without understanding dependencies
+
 ## Final Rule

 ```
--- a/skills/test-driven-development/testing-anti-patterns.md
+++ b/skills/test-driven-development/testing-anti-patterns.md
@@ -1,10 +1,7 @@
---
-name: testing-anti-patterns
-description: Use when writing or changing tests, adding mocks, or tempted to add test-only methods to production code - prevents testing mock behavior, production pollution with test-only methods, and mocking without understanding dependencies
---
-
 # Testing Anti-Patterns

+**Load this reference when:** writing or changing tests, adding mocks, or tempted to add test-only methods to production code.
+
 ## Overview

 Tests must verify real behavior, not mock behavior. Mocks are a means to isolate, not the thing being tested.
--- a/skills/using-git-worktrees/SKILL.md
+++ b/skills/using-git-worktrees/SKILL.md
@@ -52,14 +52,14 @@ Which would you prefer?

 ### For Project-Local Directories (.worktrees or worktrees)

-**MUST verify .gitignore before creating worktree:**
+**MUST verify directory is ignored before creating worktree:**

 ```bash
-# Check if directory pattern in .gitignore
-grep -q "^\.worktrees/$" .gitignore || grep -q "^worktrees/$" .gitignore
+# Check if directory is ignored (respects local, global, and system gitignore)
+git check-ignore -q .worktrees 2>/dev/null || git check-ignore -q worktrees 2>/dev/null
 ```

-**If NOT in .gitignore:**
+**If NOT ignored:**

 Per Jesse's rule "Fix broken things immediately":
 1. Add appropriate line to .gitignore
@@ -145,29 +145,33 @@ Ready to implement <feature-name>

 | Situation | Action |
 |-----------|--------|
-| `.worktrees/` exists | Use it (verify .gitignore) |
-| `worktrees/` exists | Use it (verify .gitignore) |
+| `.worktrees/` exists | Use it (verify ignored) |
+| `worktrees/` exists | Use it (verify ignored) |
 | Both exist | Use `.worktrees/` |
 | Neither exists | Check CLAUDE.md → Ask user |
-| Directory not in .gitignore | Add it immediately + commit |
+| Directory not ignored | Add to .gitignore + commit |
 | Tests fail during baseline | Report failures + ask |
 | No package.json/Cargo.toml | Skip dependency install |

 ## Common Mistakes

-**Skipping .gitignore verification**
- **Problem:** Worktree contents get tracked, pollute git status
- **Fix:** Always grep .gitignore before creating project-local worktree
+### Skipping ignore verification
+
+- **Problem:** Worktree contents get tracked, pollute git status
+- **Fix:** Always use `git check-ignore` before creating project-local worktree
+
+### Assuming directory location

-**Assuming directory location**
 - **Problem:** Creates inconsistency, violates project conventions
 - **Fix:** Follow priority: existing > CLAUDE.md > ask

-**Proceeding with failing tests**
+### Proceeding with failing tests
+
 - **Problem:** Can't distinguish new bugs from pre-existing issues
 - **Fix:** Report failures, get explicit permission to proceed

-**Hardcoding setup commands**
+### Hardcoding setup commands
+
 - **Problem:** Breaks on projects using different tools
 - **Fix:** Auto-detect from project files (package.json, etc.)

@@ -177,7 +181,7 @@ Ready to implement <feature-name>
 You: I'm using the using-git-worktrees skill to set up an isolated workspace.

 [Check .worktrees/ - exists]
-[Verify .gitignore - contains .worktrees/]
+[Verify ignored - git check-ignore confirms .worktrees/ is ignored]
 [Create worktree: git worktree add .worktrees/auth -b feature/auth]
 [Run npm install]
 [Run npm test - 47 passing]
@@ -190,7 +194,7 @@ Ready to implement auth feature
 ## Red Flags

 **Never:**
- Create worktree without .gitignore verification (project-local)
+- Create worktree without verifying it's ignored (project-local)
 - Skip baseline test verification
 - Proceed with failing tests without asking
 - Assume directory location when ambiguous
@@ -198,7 +202,7 @@ Ready to implement auth feature

 **Always:**
 - Follow directory priority: existing > CLAUDE.md > ask
- Verify .gitignore for project-local
+- Verify directory is ignored for project-local
 - Auto-detect and run project setup
 - Verify clean test baseline

--- a/skills/using-superpowers/SKILL.md
+++ b/skills/using-superpowers/SKILL.md
@@ -1,101 +1,87 @@
 ---
 name: using-superpowers
-description: Use when starting any conversation - establishes mandatory workflows for finding and using skills, including using Skill tool before announcing usage, following brainstorming before coding, and creating TodoWrite todos for checklists
+description: Use when starting any conversation - establishes how to find and use skills, requiring Skill tool invocation before ANY response including clarifying questions
 ---

 <EXTREMELY-IMPORTANT>
-If you think there is even a 1% chance a skill might apply to what you are doing, you ABSOLUTELY MUST read the skill.
+If you think there is even a 1% chance a skill might apply to what you are doing, you ABSOLUTELY MUST invoke the skill.

 IF A SKILL APPLIES TO YOUR TASK, YOU DO NOT HAVE A CHOICE. YOU MUST USE IT.

 This is not negotiable. This is not optional. You cannot rationalize your way out of this.
 </EXTREMELY-IMPORTANT>

-# Getting Started with Skills
+## How to Access Skills

-## MANDATORY FIRST RESPONSE PROTOCOL
+**In Claude Code:** Use the `Skill` tool. When you invoke a skill, its content is loaded and presented to you—follow it directly. Never use the Read tool on skill files.

-Before responding to ANY user message, you MUST complete this checklist:
+**In other environments:** Check your platform's documentation for how skills are loaded.

-1. ☐ List available skills in your mind
-2. ☐ Ask yourself: "Does ANY skill match this request?"
-3. ☐ If yes → Use the Skill tool to read and run the skill file
-4. ☐ Announce which skill you're using
-5. ☐ Follow the skill exactly
+# Using Skills

-**Responding WITHOUT completing this checklist = automatic failure.**
+## The Rule

-## Critical Rules
+**Invoke relevant or requested skills BEFORE any response or action.** Even a 1% chance a skill might apply means that you should invoke the skill to check. If an invoked skill turns out to be wrong for the situation, you don't need to use it.

-1. **Follow mandatory workflows.** Brainstorming before coding. Check for relevant skills before ANY task.
+```dot
+digraph skill_flow {
+    "User message received" [shape=doublecircle];
+    "Might any skill apply?" [shape=diamond];
+    "Invoke Skill tool" [shape=box];
+    "Announce: 'Using [skill] to [purpose]'" [shape=box];
+    "Has checklist?" [shape=diamond];
+    "Create TodoWrite todo per item" [shape=box];
+    "Follow skill exactly" [shape=box];
+    "Respond (including clarifications)" [shape=doublecircle];

-2. Execute skills with the Skill tool
+    "User message received" -> "Might any skill apply?";
+    "Might any skill apply?" -> "Invoke Skill tool" [label="yes, even 1%"];
+    "Might any skill apply?" -> "Respond (including clarifications)" [label="definitely not"];
+    "Invoke Skill tool" -> "Announce: 'Using [skill] to [purpose]'";
+    "Announce: 'Using [skill] to [purpose]'" -> "Has checklist?";
+    "Has checklist?" -> "Create TodoWrite todo per item" [label="yes"];
+    "Has checklist?" -> "Follow skill exactly" [label="no"];
+    "Create TodoWrite todo per item" -> "Follow skill exactly";
+}
+```

-## Common Rationalizations That Mean You're About To Fail
+## Red Flags

-If you catch yourself thinking ANY of these thoughts, STOP. You are rationalizing. Check for and use the skill.
+These thoughts mean STOP—you're rationalizing:

- "This is just a simple question" → WRONG. Questions are tasks. Check for skills.
- "I can check git/files quickly" → WRONG. Files don't have conversation context. Check for skills.
- "Let me gather information first" → WRONG. Skills tell you HOW to gather information. Check for skills.
- "This doesn't need a formal skill" → WRONG. If a skill exists for it, use it.
- "I remember this skill" → WRONG. Skills evolve. Run the current version.
- "This doesn't count as a task" → WRONG. If you're taking action, it's a task. Check for skills.
- "The skill is overkill for this" → WRONG. Skills exist because simple things become complex. Use it.
- "I'll just do this one thing first" → WRONG. Check for skills BEFORE doing anything.
+| Thought | Reality |
+|---------|---------|
+| "This is just a simple question" | Questions are tasks. Check for skills. |
+| "I need more context first" | Skill check comes BEFORE clarifying questions. |
+| "Let me explore the codebase first" | Skills tell you HOW to explore. Check first. |
+| "I can check git/files quickly" | Files lack conversation context. Check for skills. |
+| "Let me gather information first" | Skills tell you HOW to gather information. |
+| "This doesn't need a formal skill" | If a skill exists, use it. |
+| "I remember this skill" | Skills evolve. Read current version. |
+| "This doesn't count as a task" | Action = task. Check for skills. |
+| "The skill is overkill" | Simple things become complex. Use it. |
+| "I'll just do this one thing first" | Check BEFORE doing anything. |
+| "This feels productive" | Undisciplined action wastes time. Skills prevent this. |
+| "I know what that means" | Knowing the concept ≠ using the skill. Invoke it. |

-**Why:** Skills document proven techniques that save time and prevent mistakes. Not using available skills means repeating solved problems and making known errors.
+## Skill Priority

-If a skill for your task exists, you must use it or you will fail at your task.
+When multiple skills could apply, use this order:

-## Skills with Checklists
+1. **Process skills first** (brainstorming, debugging) - these determine HOW to approach the task
+2. **Implementation skills second** (frontend-design, mcp-builder) - these guide execution

-If a skill has a checklist, YOU MUST create TodoWrite todos for EACH item.
+"Let's build X" → brainstorming first, then implementation skills.
+"Fix this bug" → debugging first, then domain-specific skills.

-**Don't:**
- Work through checklist mentally
- Skip creating todos "to save time"
- Batch multiple items into one todo
- Mark complete without doing them
+## Skill Types

-**Why:** Checklists without TodoWrite tracking = steps get skipped. Every time. The overhead of TodoWrite is tiny compared to the cost of missing steps.
+**Rigid** (TDD, debugging): Follow exactly. Don't adapt away discipline.

-## Announcing Skill Usage
+**Flexible** (patterns): Adapt principles to context.

-Before using a skill, announce that you are using it.
-"I'm using [Skill Name] to [what you're doing]."
+The skill itself tells you which.

-**Examples:**
- "I'm using the brainstorming skill to refine your idea into a design."
- "I'm using the test-driven-development skill to implement this feature."
+## User Instructions

-**Why:** Transparency helps your human partner understand your process and catch errors early. It also confirms you actually read the skill.
-
-# About these skills
-
-**Many skills contain rigid rules (TDD, debugging, verification).** Follow them exactly. Don't adapt away the discipline.
-
-**Some skills are flexible patterns (architecture, naming).** Adapt core principles to your context.
-
-The skill itself tells you which type it is.
-
-## Instructions ≠ Permission to Skip Workflows
-
-Your human partner's specific instructions describe WHAT to do, not HOW.
-
-"Add X", "Fix Y" = the goal, NOT permission to skip brainstorming, TDD, or RED-GREEN-REFACTOR.
-
-**Red flags:** "Instruction was specific" • "Seems simple" • "Workflow is overkill"
-
-**Why:** Specific instructions mean clear requirements, which is when workflows matter MOST. Skipping process on "simple" tasks is how simple tasks become complex problems.
-
-## Summary
-
-**Starting any task:**
-1. If relevant skill exists → Use the skill
-3. Announce you're using it
-4. Follow what it says
-
-**Skill has checklist?** TodoWrite for every item.
-
-**Finding a relevant skill = mandatory to read and use it. Not optional.**
+Instructions say WHAT, not HOW. "Add X" or "Fix Y" doesn't mean skip workflows.
--- a/skills/writing-plans/SKILL.md
+++ b/skills/writing-plans/SKILL.md
@@ -1,6 +1,6 @@
 ---
 name: writing-plans
-description: Use when design is complete and you need detailed implementation tasks for engineers with zero codebase context - creates comprehensive implementation plans with exact file paths, complete code examples, and verification steps assuming engineer has minimal domain knowledge
+description: Use when you have a spec or requirements for a multi-step task, before touching code
 ---

 # Writing Plans
--- a/skills/writing-skills/SKILL.md
+++ b/skills/writing-skills/SKILL.md
@@ -1,6 +1,6 @@
 ---
 name: writing-skills
-description: Use when creating new skills, editing existing skills, or verifying skills work before deployment - applies TDD to process documentation by testing with subagents before writing, iterating until bulletproof against rationalization
+description: Use when creating new skills, editing existing skills, or verifying skills work before deployment
 ---

 # Writing Skills
@@ -56,6 +56,7 @@ The entire skill creation process follows RED-GREEN-REFACTOR.
 - One-off solutions
 - Standard practices well-documented elsewhere
 - Project-specific conventions (put in CLAUDE.md)
+- Mechanical constraints (if it's enforceable with regex/validation, automate it—save documentation for judgment calls)

 ## Skill Types

@@ -95,15 +96,16 @@ skills/
 - Only two fields supported: `name` and `description`
 - Max 1024 characters total
 - `name`: Use letters, numbers, and hyphens only (no parentheses, special chars)
- `description`: Third-person, includes BOTH what it does AND when to use it
+- `description`: Third-person, describes ONLY when to use (NOT what it does)
  - Start with "Use when..." to focus on triggering conditions
  - Include specific symptoms, situations, and contexts
+  - **NEVER summarize the skill's process or workflow** (see CSO section for why)
  - Keep under 500 characters if possible

 ```markdown
 ---
 name: Skill-Name-With-Hyphens
-description: Use when [specific triggering conditions and symptoms] - [what the skill does and how it helps, written in third person]
+description: Use when [specific triggering conditions and symptoms]
 ---

 # Skill Name
@@ -143,7 +145,31 @@ Concrete results

 **Purpose:** Claude reads description to decide which skills to load for a given task. Make it answer: "Should I read this skill right now?"

-**Format:** Start with "Use when..." to focus on triggering conditions, then explain what it does
+**Format:** Start with "Use when..." to focus on triggering conditions
+
+**CRITICAL: Description = When to Use, NOT What the Skill Does**
+
+The description should ONLY describe triggering conditions. Do NOT summarize the skill's process or workflow in the description.
+
+**Why this matters:** Testing revealed that when a description summarizes the skill's workflow, Claude may follow the description instead of reading the full skill content. A description saying "code review between tasks" caused Claude to do ONE review, even though the skill's flowchart clearly showed TWO reviews (spec compliance then code quality).
+
+When the description was changed to just "Use when executing implementation plans with independent tasks" (no workflow summary), Claude correctly read the flowchart and followed the two-stage review process.
+
+**The trap:** Descriptions that summarize workflow create a shortcut Claude will take. The skill body becomes documentation Claude skips.
+
+```yaml
+# ❌ BAD: Summarizes workflow - Claude may follow this instead of reading skill
+description: Use when executing plans - dispatches subagent per task with code review between tasks
+
+# ❌ BAD: Too much process detail
+description: Use for TDD - write test first, watch it fail, write minimal code, refactor
+
+# ✅ GOOD: Just triggering conditions, no workflow summary
+description: Use when executing implementation plans with independent tasks in the current session
+
+# ✅ GOOD: Triggering conditions only
+description: Use when implementing any feature or bugfix, before writing implementation code
+```

 **Content:**
 - Use concrete triggers, symptoms, and situations that signal this skill applies
@@ -151,6 +177,7 @@ Concrete results
 - Keep triggers technology-agnostic unless the skill itself is technology-specific
 - If skill is technology-specific, make that explicit in the trigger
 - Write in third person (injected into system prompt)
+- **NEVER summarize the skill's process or workflow**

 ```yaml
 # ❌ BAD: Too abstract, vague, doesn't include when to use
@@ -162,11 +189,11 @@ description: I can help you with async tests when they're flaky
 # ❌ BAD: Mentions technology but skill isn't specific to it
 description: Use when tests use setTimeout/sleep and are flaky

-# ✅ GOOD: Starts with "Use when", describes problem, then what it does
-description: Use when tests have race conditions, timing dependencies, or pass/fail inconsistently - replaces arbitrary timeouts with condition polling for reliable async tests
+# ✅ GOOD: Starts with "Use when", describes problem, no workflow
+description: Use when tests have race conditions, timing dependencies, or pass/fail inconsistently

 # ✅ GOOD: Technology-specific skill with explicit trigger
-description: Use when using React Router and handling authentication redirects - provides patterns for protected routes and auth state management
+description: Use when using React Router and handling authentication redirects
 ```

 ### 2. Keyword Coverage
@@ -181,7 +208,7 @@ Use words Claude would search for:

 **Use active voice, verb-first:**
 - ✅ `creating-skills` not `skill-creation`
- ✅ `testing-skills-with-subagents` not `subagent-skill-testing`
+- ✅ `condition-based-waiting` not `async-test-helpers`

 ### 4. Token Efficiency (Critical)

@@ -288,6 +315,12 @@ digraph when_flowchart {

 See @graphviz-conventions.dot for graphviz style rules.

+**Visualizing for your human partner:** Use `render-graphs.js` in this directory to render a skill's flowcharts to SVG:
+```bash
+./render-graphs.js ../some-skill           # Each diagram separately
+./render-graphs.js ../some-skill --combine # All diagrams in one SVG
+```
+
 ## Code Examples

 **One excellent example beats many mediocre ones**
@@ -520,7 +553,7 @@ Run same scenarios WITH skill. Agent should now comply.

 Agent found new rationalization? Add explicit counter. Re-test until bulletproof.

-**REQUIRED SUB-SKILL:** Use superpowers:testing-skills-with-subagents for the complete testing methodology:
+**Testing methodology:** See @testing-skills-with-subagents.md for the complete testing methodology:
 - How to write pressure scenarios
 - Pressure types (time, sunk cost, authority, exhaustion)
 - Plugging holes systematically
--- a/skills/writing-skills/anthropic-best-practices.md
+++ b/skills/writing-skills/anthropic-best-practices.md
@@ -10,7 +10,7 @@ For conceptual background on how Skills work, see the [Skills overview](/en/docs

 ### Concise is key

-The [context window](/en/docs/build-with-claude/context-windows) is a public good. Your Skill shares the context window with everything else Claude needs to know, including:
+The [context window](https://platform.claude.com/docs/en/build-with-claude/context-windows) is a public good. Your Skill shares the context window with everything else Claude needs to know, including:

 * The system prompt
 * Conversation history
--- a/skills/testing-skills-with-subagents/examples/CLAUDE_MD_TESTING.md
+++ b/skills/testing-skills-with-subagents/examples/CLAUDE_MD_TESTING.md
--- a/skills/writing-skills/render-graphs.js
+++ b/skills/writing-skills/render-graphs.js
@@ -0,0 +1,168 @@
+#!/usr/bin/env node
+
+/**
+ * Render graphviz diagrams from a skill's SKILL.md to SVG files.
+ *
+ * Usage:
+ *   ./render-graphs.js <skill-directory>           # Render each diagram separately
+ *   ./render-graphs.js <skill-directory> --combine # Combine all into one diagram
+ *
+ * Extracts all ```dot blocks from SKILL.md and renders to SVG.
+ * Useful for helping your human partner visualize the process flows.
+ *
+ * Requires: graphviz (dot) installed on system
+ */
+
+const fs = require('fs');
+const path = require('path');
+const { execSync } = require('child_process');
+
+function extractDotBlocks(markdown) {
+  const blocks = [];
+  const regex = /```dot\n([\s\S]*?)```/g;
+  let match;
+
+  while ((match = regex.exec(markdown)) !== null) {
+    const content = match[1].trim();
+
+    // Extract digraph name
+    const nameMatch = content.match(/digraph\s+(\w+)/);
+    const name = nameMatch ? nameMatch[1] : `graph_${blocks.length + 1}`;
+
+    blocks.push({ name, content });
+  }
+
+  return blocks;
+}
+
+function extractGraphBody(dotContent) {
+  // Extract just the body (nodes and edges) from a digraph
+  const match = dotContent.match(/digraph\s+\w+\s*\{([\s\S]*)\}/);
+  if (!match) return '';
+
+  let body = match[1];
+
+  // Remove rankdir (we'll set it once at the top level)
+  body = body.replace(/^\s*rankdir\s*=\s*\w+\s*;?\s*$/gm, '');
+
+  return body.trim();
+}
+
+function combineGraphs(blocks, skillName) {
+  const bodies = blocks.map((block, i) => {
+    const body = extractGraphBody(block.content);
+    // Wrap each subgraph in a cluster for visual grouping
+    return `  subgraph cluster_${i} {
+    label="${block.name}";
+    ${body.split('\n').map(line => '  ' + line).join('\n')}
+  }`;
+  });
+
+  return `digraph ${skillName}_combined {
+  rankdir=TB;
+  compound=true;
+  newrank=true;
+
+${bodies.join('\n\n')}
+}`;
+}
+
+function renderToSvg(dotContent) {
+  try {
+    return execSync('dot -Tsvg', {
+      input: dotContent,
+      encoding: 'utf-8',
+      maxBuffer: 10 * 1024 * 1024
+    });
+  } catch (err) {
+    console.error('Error running dot:', err.message);
+    if (err.stderr) console.error(err.stderr.toString());
+    return null;
+  }
+}
+
+function main() {
+  const args = process.argv.slice(2);
+  const combine = args.includes('--combine');
+  const skillDirArg = args.find(a => !a.startsWith('--'));
+
+  if (!skillDirArg) {
+    console.error('Usage: render-graphs.js <skill-directory> [--combine]');
+    console.error('');
+    console.error('Options:');
+    console.error('  --combine    Combine all diagrams into one SVG');
+    console.error('');
+    console.error('Example:');
+    console.error('  ./render-graphs.js ../subagent-driven-development');
+    console.error('  ./render-graphs.js ../subagent-driven-development --combine');
+    process.exit(1);
+  }
+
+  const skillDir = path.resolve(skillDirArg);
+  const skillFile = path.join(skillDir, 'SKILL.md');
+  const skillName = path.basename(skillDir).replace(/-/g, '_');
+
+  if (!fs.existsSync(skillFile)) {
+    console.error(`Error: ${skillFile} not found`);
+    process.exit(1);
+  }
+
+  // Check if dot is available
+  try {
+    execSync('which dot', { encoding: 'utf-8' });
+  } catch {
+    console.error('Error: graphviz (dot) not found. Install with:');
+    console.error('  brew install graphviz    # macOS');
+    console.error('  apt install graphviz     # Linux');
+    process.exit(1);
+  }
+
+  const markdown = fs.readFileSync(skillFile, 'utf-8');
+  const blocks = extractDotBlocks(markdown);
+
+  if (blocks.length === 0) {
+    console.log('No ```dot blocks found in', skillFile);
+    process.exit(0);
+  }
+
+  console.log(`Found ${blocks.length} diagram(s) in ${path.basename(skillDir)}/SKILL.md`);
+
+  const outputDir = path.join(skillDir, 'diagrams');
+  if (!fs.existsSync(outputDir)) {
+    fs.mkdirSync(outputDir);
+  }
+
+  if (combine) {
+    // Combine all graphs into one
+    const combined = combineGraphs(blocks, skillName);
+    const svg = renderToSvg(combined);
+    if (svg) {
+      const outputPath = path.join(outputDir, `${skillName}_combined.svg`);
+      fs.writeFileSync(outputPath, svg);
+      console.log(`  Rendered: ${skillName}_combined.svg`);
+
+      // Also write the dot source for debugging
+      const dotPath = path.join(outputDir, `${skillName}_combined.dot`);
+      fs.writeFileSync(dotPath, combined);
+      console.log(`  Source: ${skillName}_combined.dot`);
+    } else {
+      console.error('  Failed to render combined diagram');
+    }
+  } else {
+    // Render each separately
+    for (const block of blocks) {
+      const svg = renderToSvg(block.content);
+      if (svg) {
+        const outputPath = path.join(outputDir, `${block.name}.svg`);
+        fs.writeFileSync(outputPath, svg);
+        console.log(`  Rendered: ${block.name}.svg`);
+      } else {
+        console.error(`  Failed: ${block.name}`);
+      }
+    }
+  }
+
+  console.log(`\nOutput: ${outputDir}/`);
+}
+
+main();
--- a/skills/writing-skills/testing-skills-with-subagents.md
+++ b/skills/writing-skills/testing-skills-with-subagents.md
@@ -1,10 +1,7 @@
---
-name: testing-skills-with-subagents
-description: Use when creating or editing skills, before deployment, to verify they work under pressure and resist rationalization - applies RED-GREEN-REFACTOR cycle to process documentation by running baseline without skill, writing to address failures, iterating to close loopholes
---
-
 # Testing Skills With Subagents

+**Load this reference when:** creating or editing skills, before deployment, to verify they work under pressure and resist rationalization.
+
 ## Overview

 **Testing skills is just TDD applied to process documentation.**
--- a/tests/explicit-skill-requests/prompts/action-oriented.txt
+++ b/tests/explicit-skill-requests/prompts/action-oriented.txt
@@ -0,0 +1,3 @@
+The plan is done. docs/plans/auth-system.md has everything.
+
+Do subagent-driven development on this - start with Task 1, dispatch a subagent, then we'll review.
--- a/tests/explicit-skill-requests/prompts/after-planning-flow.txt
+++ b/tests/explicit-skill-requests/prompts/after-planning-flow.txt
@@ -0,0 +1,17 @@
+Great, the plan is complete. I've saved it to docs/plans/auth-system.md.
+
+Here's a summary of what we designed:
+- Task 1: Add User Model with email/password fields
+- Task 2: Create auth routes for login/register
+- Task 3: Add JWT middleware for protected routes
+- Task 4: Write tests for all auth functionality
+
+Two execution options:
+1. Subagent-Driven (this session) - dispatch a fresh subagent per task
+2. Parallel Session (separate) - open new Claude Code session
+
+Which approach do you want?
+
+---
+
+subagent-driven-development, please
--- a/tests/explicit-skill-requests/prompts/claude-suggested-it.txt
+++ b/tests/explicit-skill-requests/prompts/claude-suggested-it.txt
@@ -0,0 +1,11 @@
+[Previous assistant message]:
+Plan complete and saved to docs/plans/auth-system.md.
+
+Two execution options:
+1. Subagent-Driven (this session) - I dispatch a fresh subagent per task, review between tasks, fast iteration within this conversation
+2. Parallel Session (separate) - Open a new Claude Code session with the execute-plan skill, batch execution with review checkpoints
+
+Which approach do you want to use for implementation?
+
+[Your response]:
+subagent-driven-development, please
--- a/tests/explicit-skill-requests/prompts/i-know-what-sdd-means.txt
+++ b/tests/explicit-skill-requests/prompts/i-know-what-sdd-means.txt
@@ -0,0 +1,8 @@
+I have my implementation plan ready at docs/plans/auth-system.md.
+
+I want to use subagent-driven-development to execute it. That means:
+- Dispatch a fresh subagent for each task in the plan
+- Review the output between tasks
+- Keep iteration fast within this conversation
+
+Let's start - please read the plan and begin dispatching subagents for each task.
--- a/tests/explicit-skill-requests/prompts/mid-conversation-execute-plan.txt
+++ b/tests/explicit-skill-requests/prompts/mid-conversation-execute-plan.txt
@@ -0,0 +1,3 @@
+I have a plan at docs/plans/auth-system.md that's ready to implement.
+
+subagent-driven-development, please
--- a/tests/explicit-skill-requests/prompts/please-use-brainstorming.txt
+++ b/tests/explicit-skill-requests/prompts/please-use-brainstorming.txt
@@ -0,0 +1 @@
+please use the brainstorming skill to help me think through this feature
--- a/tests/explicit-skill-requests/prompts/skip-formalities.txt
+++ b/tests/explicit-skill-requests/prompts/skip-formalities.txt
@@ -0,0 +1,3 @@
+Plan is at docs/plans/auth-system.md.
+
+subagent-driven-development, please. Don't waste time - just read the plan and start dispatching subagents immediately.
--- a/tests/explicit-skill-requests/prompts/subagent-driven-development-please.txt
+++ b/tests/explicit-skill-requests/prompts/subagent-driven-development-please.txt
@@ -0,0 +1 @@
+subagent-driven-development, please
--- a/tests/explicit-skill-requests/prompts/use-systematic-debugging.txt
+++ b/tests/explicit-skill-requests/prompts/use-systematic-debugging.txt
@@ -0,0 +1 @@
+use systematic-debugging to figure out what's wrong
--- a/tests/explicit-skill-requests/run-all.sh
+++ b/tests/explicit-skill-requests/run-all.sh
@@ -0,0 +1,70 @@
+#!/bin/bash
+# Run all explicit skill request tests
+# Usage: ./run-all.sh
+
+set -e
+
+SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+PROMPTS_DIR="$SCRIPT_DIR/prompts"
+
+echo "=== Running All Explicit Skill Request Tests ==="
+echo ""
+
+PASSED=0
+FAILED=0
+RESULTS=""
+
+# Test: subagent-driven-development, please
+echo ">>> Test 1: subagent-driven-development-please"
+if "$SCRIPT_DIR/run-test.sh" "subagent-driven-development" "$PROMPTS_DIR/subagent-driven-development-please.txt"; then
+    PASSED=$((PASSED + 1))
+    RESULTS="$RESULTS\nPASS: subagent-driven-development-please"
+else
+    FAILED=$((FAILED + 1))
+    RESULTS="$RESULTS\nFAIL: subagent-driven-development-please"
+fi
+echo ""
+
+# Test: use systematic-debugging
+echo ">>> Test 2: use-systematic-debugging"
+if "$SCRIPT_DIR/run-test.sh" "systematic-debugging" "$PROMPTS_DIR/use-systematic-debugging.txt"; then
+    PASSED=$((PASSED + 1))
+    RESULTS="$RESULTS\nPASS: use-systematic-debugging"
+else
+    FAILED=$((FAILED + 1))
+    RESULTS="$RESULTS\nFAIL: use-systematic-debugging"
+fi
+echo ""
+
+# Test: please use brainstorming
+echo ">>> Test 3: please-use-brainstorming"
+if "$SCRIPT_DIR/run-test.sh" "brainstorming" "$PROMPTS_DIR/please-use-brainstorming.txt"; then
+    PASSED=$((PASSED + 1))
+    RESULTS="$RESULTS\nPASS: please-use-brainstorming"
+else
+    FAILED=$((FAILED + 1))
+    RESULTS="$RESULTS\nFAIL: please-use-brainstorming"
+fi
+echo ""
+
+# Test: mid-conversation execute plan
+echo ">>> Test 4: mid-conversation-execute-plan"
+if "$SCRIPT_DIR/run-test.sh" "subagent-driven-development" "$PROMPTS_DIR/mid-conversation-execute-plan.txt"; then
+    PASSED=$((PASSED + 1))
+    RESULTS="$RESULTS\nPASS: mid-conversation-execute-plan"
+else
+    FAILED=$((FAILED + 1))
+    RESULTS="$RESULTS\nFAIL: mid-conversation-execute-plan"
+fi
+echo ""
+
+echo "=== Summary ==="
+echo -e "$RESULTS"
+echo ""
+echo "Passed: $PASSED"
+echo "Failed: $FAILED"
+echo "Total: $((PASSED + FAILED))"
+
+if [ "$FAILED" -gt 0 ]; then
+    exit 1
+fi
--- a/tests/explicit-skill-requests/run-claude-describes-sdd.sh
+++ b/tests/explicit-skill-requests/run-claude-describes-sdd.sh
@@ -0,0 +1,100 @@
+#!/bin/bash
+# Test where Claude explicitly describes subagent-driven-development before user requests it
+# This mimics the original failure scenario
+
+set -e
+
+SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+PLUGIN_DIR="$(cd "$SCRIPT_DIR/../.." && pwd)"
+
+TIMESTAMP=$(date +%s)
+OUTPUT_DIR="/tmp/superpowers-tests/${TIMESTAMP}/explicit-skill-requests/claude-describes"
+mkdir -p "$OUTPUT_DIR"
+
+PROJECT_DIR="$OUTPUT_DIR/project"
+mkdir -p "$PROJECT_DIR/docs/plans"
+
+echo "=== Test: Claude Describes SDD First ==="
+echo "Output dir: $OUTPUT_DIR"
+echo ""
+
+cd "$PROJECT_DIR"
+
+# Create a plan
+cat > "$PROJECT_DIR/docs/plans/auth-system.md" << 'EOF'
+# Auth System Implementation Plan
+
+## Task 1: Add User Model
+Create user model with email and password fields.
+
+## Task 2: Add Auth Routes
+Create login and register endpoints.
+
+## Task 3: Add JWT Middleware
+Protect routes with JWT validation.
+EOF
+
+# Turn 1: Have Claude describe execution options including SDD
+echo ">>> Turn 1: Ask Claude to describe execution options..."
+claude -p "I have a plan at docs/plans/auth-system.md. Tell me about my options for executing it, including what subagent-driven-development means and how it works." \
+    --model haiku \
+    --plugin-dir "$PLUGIN_DIR" \
+    --dangerously-skip-permissions \
+    --max-turns 3 \
+    --output-format stream-json \
+    > "$OUTPUT_DIR/turn1.json" 2>&1 || true
+echo "Done."
+
+# Turn 2: THE CRITICAL TEST - now that Claude has explained it
+echo ">>> Turn 2: Request subagent-driven-development..."
+FINAL_LOG="$OUTPUT_DIR/turn2.json"
+claude -p "subagent-driven-development, please" \
+    --continue \
+    --model haiku \
+    --plugin-dir "$PLUGIN_DIR" \
+    --dangerously-skip-permissions \
+    --max-turns 2 \
+    --output-format stream-json \
+    > "$FINAL_LOG" 2>&1 || true
+echo "Done."
+echo ""
+
+echo "=== Results ==="
+
+# Check Turn 1 to see if Claude described SDD
+echo "Turn 1 - Claude's description of options (excerpt):"
+grep '"type":"assistant"' "$OUTPUT_DIR/turn1.json" | head -1 | jq -r '.message.content[0].text // .message.content' 2>/dev/null | head -c 800 || echo "  (could not extract)"
+echo ""
+echo "---"
+echo ""
+
+# Check final turn
+SKILL_PATTERN='"skill":"([^"]*:)?subagent-driven-development"'
+if grep -q '"name":"Skill"' "$FINAL_LOG" && grep -qE "$SKILL_PATTERN" "$FINAL_LOG"; then
+    echo "PASS: Skill was triggered after Claude described it"
+    TRIGGERED=true
+else
+    echo "FAIL: Skill was NOT triggered (Claude may have thought it already knew)"
+    TRIGGERED=false
+
+    echo ""
+    echo "Tools invoked in final turn:"
+    grep '"type":"tool_use"' "$FINAL_LOG" | grep -o '"name":"[^"]*"' | sort -u | head -10 || echo "  (none)"
+
+    echo ""
+    echo "Final turn response:"
+    grep '"type":"assistant"' "$FINAL_LOG" | head -1 | jq -r '.message.content[0].text // .message.content' 2>/dev/null | head -c 800 || echo "  (could not extract)"
+fi
+
+echo ""
+echo "Skills triggered in final turn:"
+grep -o '"skill":"[^"]*"' "$FINAL_LOG" 2>/dev/null | sort -u || echo "  (none)"
+
+echo ""
+echo "Logs in: $OUTPUT_DIR"
+
+if [ "$TRIGGERED" = "true" ]; then
+    exit 0
+else
+    exit 1
+fi
--- a/tests/explicit-skill-requests/run-extended-multiturn-test.sh
+++ b/tests/explicit-skill-requests/run-extended-multiturn-test.sh
@@ -0,0 +1,113 @@
+#!/bin/bash
+# Extended multi-turn test with more conversation history
+# This tries to reproduce the failure by building more context
+
+set -e
+
+SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+PLUGIN_DIR="$(cd "$SCRIPT_DIR/../.." && pwd)"
+
+TIMESTAMP=$(date +%s)
+OUTPUT_DIR="/tmp/superpowers-tests/${TIMESTAMP}/explicit-skill-requests/extended-multiturn"
+mkdir -p "$OUTPUT_DIR"
+
+PROJECT_DIR="$OUTPUT_DIR/project"
+mkdir -p "$PROJECT_DIR/docs/plans"
+
+echo "=== Extended Multi-Turn Test ==="
+echo "Output dir: $OUTPUT_DIR"
+echo "Plugin dir: $PLUGIN_DIR"
+echo ""
+
+cd "$PROJECT_DIR"
+
+# Turn 1: Start brainstorming
+echo ">>> Turn 1: Brainstorming request..."
+claude -p "I want to add user authentication to my app. Help me think through this." \
+    --plugin-dir "$PLUGIN_DIR" \
+    --dangerously-skip-permissions \
+    --max-turns 3 \
+    --output-format stream-json \
+    > "$OUTPUT_DIR/turn1.json" 2>&1 || true
+echo "Done."
+
+# Turn 2: Answer a brainstorming question
+echo ">>> Turn 2: Answering questions..."
+claude -p "Let's use JWT tokens with 24-hour expiry. Email/password registration." \
+    --continue \
+    --plugin-dir "$PLUGIN_DIR" \
+    --dangerously-skip-permissions \
+    --max-turns 3 \
+    --output-format stream-json \
+    > "$OUTPUT_DIR/turn2.json" 2>&1 || true
+echo "Done."
+
+# Turn 3: Ask to write a plan
+echo ">>> Turn 3: Requesting plan..."
+claude -p "Great, write this up as an implementation plan." \
+    --continue \
+    --plugin-dir "$PLUGIN_DIR" \
+    --dangerously-skip-permissions \
+    --max-turns 3 \
+    --output-format stream-json \
+    > "$OUTPUT_DIR/turn3.json" 2>&1 || true
+echo "Done."
+
+# Turn 4: Confirm plan looks good
+echo ">>> Turn 4: Confirming plan..."
+claude -p "The plan looks good. What are my options for executing it?" \
+    --continue \
+    --plugin-dir "$PLUGIN_DIR" \
+    --dangerously-skip-permissions \
+    --max-turns 2 \
+    --output-format stream-json \
+    > "$OUTPUT_DIR/turn4.json" 2>&1 || true
+echo "Done."
+
+# Turn 5: THE CRITICAL TEST
+echo ">>> Turn 5: Requesting subagent-driven-development..."
+FINAL_LOG="$OUTPUT_DIR/turn5.json"
+claude -p "subagent-driven-development, please" \
+    --continue \
+    --plugin-dir "$PLUGIN_DIR" \
+    --dangerously-skip-permissions \
+    --max-turns 2 \
+    --output-format stream-json \
+    > "$FINAL_LOG" 2>&1 || true
+echo "Done."
+echo ""
+
+echo "=== Results ==="
+
+# Check final turn
+SKILL_PATTERN='"skill":"([^"]*:)?subagent-driven-development"'
+if grep -q '"name":"Skill"' "$FINAL_LOG" && grep -qE "$SKILL_PATTERN" "$FINAL_LOG"; then
+    echo "PASS: Skill was triggered"
+    TRIGGERED=true
+else
+    echo "FAIL: Skill was NOT triggered"
+    TRIGGERED=false
+
+    # Show what was invoked instead
+    echo ""
+    echo "Tools invoked in final turn:"
+    grep '"type":"tool_use"' "$FINAL_LOG" | jq -r '.content[] | select(.type=="tool_use") | .name' 2>/dev/null | head -10 || \
+    grep -o '"name":"[^"]*"' "$FINAL_LOG" | head -10 || echo "  (none found)"
+fi
+
+echo ""
+echo "Skills triggered:"
+grep -o '"skill":"[^"]*"' "$FINAL_LOG" 2>/dev/null | sort -u || echo "  (none)"
+
+echo ""
+echo "Final turn response (first 500 chars):"
+grep '"type":"assistant"' "$FINAL_LOG" | head -1 | jq -r '.message.content[0].text // .message.content' 2>/dev/null | head -c 500 || echo "  (could not extract)"
+
+echo ""
+echo "Logs in: $OUTPUT_DIR"
+
+if [ "$TRIGGERED" = "true" ]; then
+    exit 0
+else
+    exit 1
+fi
--- a/tests/explicit-skill-requests/run-haiku-test.sh
+++ b/tests/explicit-skill-requests/run-haiku-test.sh
@@ -0,0 +1,144 @@
+#!/bin/bash
+# Test with haiku model and user's CLAUDE.md
+# This tests whether a cheaper/faster model fails more easily
+
+set -e
+
+SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+PLUGIN_DIR="$(cd "$SCRIPT_DIR/../.." && pwd)"
+
+TIMESTAMP=$(date +%s)
+OUTPUT_DIR="/tmp/superpowers-tests/${TIMESTAMP}/explicit-skill-requests/haiku"
+mkdir -p "$OUTPUT_DIR"
+
+PROJECT_DIR="$OUTPUT_DIR/project"
+mkdir -p "$PROJECT_DIR/docs/plans"
+mkdir -p "$PROJECT_DIR/.claude"
+
+echo "=== Haiku Model Test with User CLAUDE.md ==="
+echo "Output dir: $OUTPUT_DIR"
+echo "Plugin dir: $PLUGIN_DIR"
+echo ""
+
+cd "$PROJECT_DIR"
+
+# Copy user's CLAUDE.md to simulate real environment
+if [ -f "$HOME/.claude/CLAUDE.md" ]; then
+    cp "$HOME/.claude/CLAUDE.md" "$PROJECT_DIR/.claude/CLAUDE.md"
+    echo "Copied user CLAUDE.md"
+else
+    echo "No user CLAUDE.md found, proceeding without"
+fi
+
+# Create a dummy plan file
+cat > "$PROJECT_DIR/docs/plans/auth-system.md" << 'EOF'
+# Auth System Implementation Plan
+
+## Task 1: Add User Model
+Create user model with email and password fields.
+
+## Task 2: Add Auth Routes
+Create login and register endpoints.
+
+## Task 3: Add JWT Middleware
+Protect routes with JWT validation.
+
+## Task 4: Write Tests
+Add comprehensive test coverage.
+EOF
+
+echo ""
+
+# Turn 1: Start brainstorming
+echo ">>> Turn 1: Brainstorming request..."
+claude -p "I want to add user authentication to my app. Help me think through this." \
+    --model haiku \
+    --plugin-dir "$PLUGIN_DIR" \
+    --dangerously-skip-permissions \
+    --max-turns 3 \
+    --output-format stream-json \
+    > "$OUTPUT_DIR/turn1.json" 2>&1 || true
+echo "Done."
+
+# Turn 2: Answer questions
+echo ">>> Turn 2: Answering questions..."
+claude -p "Let's use JWT tokens with 24-hour expiry. Email/password registration." \
+    --continue \
+    --model haiku \
+    --plugin-dir "$PLUGIN_DIR" \
+    --dangerously-skip-permissions \
+    --max-turns 3 \
+    --output-format stream-json \
+    > "$OUTPUT_DIR/turn2.json" 2>&1 || true
+echo "Done."
+
+# Turn 3: Ask to write a plan
+echo ">>> Turn 3: Requesting plan..."
+claude -p "Great, write this up as an implementation plan." \
+    --continue \
+    --model haiku \
+    --plugin-dir "$PLUGIN_DIR" \
+    --dangerously-skip-permissions \
+    --max-turns 3 \
+    --output-format stream-json \
+    > "$OUTPUT_DIR/turn3.json" 2>&1 || true
+echo "Done."
+
+# Turn 4: Confirm plan looks good
+echo ">>> Turn 4: Confirming plan..."
+claude -p "The plan looks good. What are my options for executing it?" \
+    --continue \
+    --model haiku \
+    --plugin-dir "$PLUGIN_DIR" \
+    --dangerously-skip-permissions \
+    --max-turns 2 \
+    --output-format stream-json \
+    > "$OUTPUT_DIR/turn4.json" 2>&1 || true
+echo "Done."
+
+# Turn 5: THE CRITICAL TEST
+echo ">>> Turn 5: Requesting subagent-driven-development..."
+FINAL_LOG="$OUTPUT_DIR/turn5.json"
+claude -p "subagent-driven-development, please" \
+    --continue \
+    --model haiku \
+    --plugin-dir "$PLUGIN_DIR" \
+    --dangerously-skip-permissions \
+    --max-turns 2 \
+    --output-format stream-json \
+    > "$FINAL_LOG" 2>&1 || true
+echo "Done."
+echo ""
+
+echo "=== Results (Haiku) ==="
+
+# Check final turn
+SKILL_PATTERN='"skill":"([^"]*:)?subagent-driven-development"'
+if grep -q '"name":"Skill"' "$FINAL_LOG" && grep -qE "$SKILL_PATTERN" "$FINAL_LOG"; then
+    echo "PASS: Skill was triggered"
+    TRIGGERED=true
+else
+    echo "FAIL: Skill was NOT triggered"
+    TRIGGERED=false
+
+    echo ""
+    echo "Tools invoked in final turn:"
+    grep '"type":"tool_use"' "$FINAL_LOG" | grep -o '"name":"[^"]*"' | head -10 || echo "  (none)"
+fi
+
+echo ""
+echo "Skills triggered:"
+grep -o '"skill":"[^"]*"' "$FINAL_LOG" 2>/dev/null | sort -u || echo "  (none)"
+
+echo ""
+echo "Final turn response (first 500 chars):"
+grep '"type":"assistant"' "$FINAL_LOG" | head -1 | jq -r '.message.content[0].text // .message.content' 2>/dev/null | head -c 500 || echo "  (could not extract)"
+
+echo ""
+echo "Logs in: $OUTPUT_DIR"
+
+if [ "$TRIGGERED" = "true" ]; then
+    exit 0
+else
+    exit 1
+fi
--- a/tests/explicit-skill-requests/run-multiturn-test.sh
+++ b/tests/explicit-skill-requests/run-multiturn-test.sh
@@ -0,0 +1,143 @@
+#!/bin/bash
+# Test explicit skill requests in multi-turn conversations
+# Usage: ./run-multiturn-test.sh
+#
+# This test builds actual conversation history to reproduce the failure mode
+# where Claude skips skill invocation after extended conversation
+
+set -e
+
+SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+PLUGIN_DIR="$(cd "$SCRIPT_DIR/../.." && pwd)"
+
+TIMESTAMP=$(date +%s)
+OUTPUT_DIR="/tmp/superpowers-tests/${TIMESTAMP}/explicit-skill-requests/multiturn"
+mkdir -p "$OUTPUT_DIR"
+
+# Create project directory (conversation is cwd-based)
+PROJECT_DIR="$OUTPUT_DIR/project"
+mkdir -p "$PROJECT_DIR/docs/plans"
+
+echo "=== Multi-Turn Explicit Skill Request Test ==="
+echo "Output dir: $OUTPUT_DIR"
+echo "Project dir: $PROJECT_DIR"
+echo "Plugin dir: $PLUGIN_DIR"
+echo ""
+
+cd "$PROJECT_DIR"
+
+# Create a dummy plan file
+cat > "$PROJECT_DIR/docs/plans/auth-system.md" << 'EOF'
+# Auth System Implementation Plan
+
+## Task 1: Add User Model
+Create user model with email and password fields.
+
+## Task 2: Add Auth Routes
+Create login and register endpoints.
+
+## Task 3: Add JWT Middleware
+Protect routes with JWT validation.
+
+## Task 4: Write Tests
+Add comprehensive test coverage.
+EOF
+
+# Turn 1: Start a planning conversation
+echo ">>> Turn 1: Starting planning conversation..."
+TURN1_LOG="$OUTPUT_DIR/turn1.json"
+claude -p "I need to implement an authentication system. Let's plan this out. The requirements are: user registration with email/password, JWT tokens, and protected routes." \
+    --plugin-dir "$PLUGIN_DIR" \
+    --dangerously-skip-permissions \
+    --max-turns 2 \
+    --output-format stream-json \
+    > "$TURN1_LOG" 2>&1 || true
+
+echo "Turn 1 complete."
+echo ""
+
+# Turn 2: Continue with more planning detail
+echo ">>> Turn 2: Continuing planning..."
+TURN2_LOG="$OUTPUT_DIR/turn2.json"
+claude -p "Good analysis. I've already written the plan to docs/plans/auth-system.md. Now I'm ready to implement. What are my options for execution?" \
+    --continue \
+    --plugin-dir "$PLUGIN_DIR" \
+    --dangerously-skip-permissions \
+    --max-turns 2 \
+    --output-format stream-json \
+    > "$TURN2_LOG" 2>&1 || true
+
+echo "Turn 2 complete."
+echo ""
+
+# Turn 3: The critical test - ask for subagent-driven-development
+echo ">>> Turn 3: Requesting subagent-driven-development..."
+TURN3_LOG="$OUTPUT_DIR/turn3.json"
+claude -p "subagent-driven-development, please" \
+    --continue \
+    --plugin-dir "$PLUGIN_DIR" \
+    --dangerously-skip-permissions \
+    --max-turns 2 \
+    --output-format stream-json \
+    > "$TURN3_LOG" 2>&1 || true
+
+echo "Turn 3 complete."
+echo ""
+
+echo "=== Results ==="
+
+# Check if skill was triggered in Turn 3
+SKILL_PATTERN='"skill":"([^"]*:)?subagent-driven-development"'
+if grep -q '"name":"Skill"' "$TURN3_LOG" && grep -qE "$SKILL_PATTERN" "$TURN3_LOG"; then
+    echo "PASS: Skill 'subagent-driven-development' was triggered in Turn 3"
+    TRIGGERED=true
+else
+    echo "FAIL: Skill 'subagent-driven-development' was NOT triggered in Turn 3"
+    TRIGGERED=false
+fi
+
+# Show what skills were triggered
+echo ""
+echo "Skills triggered in Turn 3:"
+grep -o '"skill":"[^"]*"' "$TURN3_LOG" 2>/dev/null | sort -u || echo "  (none)"
+
+# Check for premature action in Turn 3
+echo ""
+echo "Checking for premature action in Turn 3..."
+FIRST_SKILL_LINE=$(grep -n '"name":"Skill"' "$TURN3_LOG" | head -1 | cut -d: -f1)
+if [ -n "$FIRST_SKILL_LINE" ]; then
+    PREMATURE_TOOLS=$(head -n "$FIRST_SKILL_LINE" "$TURN3_LOG" | \
+        grep '"type":"tool_use"' | \
+        grep -v '"name":"Skill"' | \
+        grep -v '"name":"TodoWrite"' || true)
+    if [ -n "$PREMATURE_TOOLS" ]; then
+        echo "WARNING: Tools invoked BEFORE Skill tool in Turn 3:"
+        echo "$PREMATURE_TOOLS" | head -5
+    else
+        echo "OK: No premature tool invocations detected"
+    fi
+else
+    echo "WARNING: No Skill invocation found in Turn 3"
+    # Show what WAS invoked
+    echo ""
+    echo "Tools invoked in Turn 3:"
+    grep '"type":"tool_use"' "$TURN3_LOG" | grep -o '"name":"[^"]*"' | head -10 || echo "  (none)"
+fi
+
+# Show Turn 3 assistant response
+echo ""
+echo "Turn 3 first assistant response (truncated):"
+grep '"type":"assistant"' "$TURN3_LOG" | head -1 | jq -r '.message.content[0].text // .message.content' 2>/dev/null | head -c 500 || echo "  (could not extract)"
+
+echo ""
+echo "Logs:"
+echo "  Turn 1: $TURN1_LOG"
+echo "  Turn 2: $TURN2_LOG"
+echo "  Turn 3: $TURN3_LOG"
+echo "Timestamp: $TIMESTAMP"
+
+if [ "$TRIGGERED" = "true" ]; then
+    exit 0
+else
+    exit 1
+fi
--- a/tests/explicit-skill-requests/run-test.sh
+++ b/tests/explicit-skill-requests/run-test.sh
@@ -0,0 +1,136 @@
+#!/bin/bash
+# Test explicit skill requests (user names a skill directly)
+# Usage: ./run-test.sh <skill-name> <prompt-file>
+#
+# Tests whether Claude invokes a skill when the user explicitly requests it by name
+# (without using the plugin namespace prefix)
+#
+# Uses isolated HOME to avoid user context interference
+
+set -e
+
+SKILL_NAME="$1"
+PROMPT_FILE="$2"
+MAX_TURNS="${3:-3}"
+
+if [ -z "$SKILL_NAME" ] || [ -z "$PROMPT_FILE" ]; then
+    echo "Usage: $0 <skill-name> <prompt-file> [max-turns]"
+    echo "Example: $0 subagent-driven-development ./prompts/subagent-driven-development-please.txt"
+    exit 1
+fi
+
+# Get the directory where this script lives
+SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+# Get the superpowers plugin root (two levels up)
+PLUGIN_DIR="$(cd "$SCRIPT_DIR/../.." && pwd)"
+
+TIMESTAMP=$(date +%s)
+OUTPUT_DIR="/tmp/superpowers-tests/${TIMESTAMP}/explicit-skill-requests/${SKILL_NAME}"
+mkdir -p "$OUTPUT_DIR"
+
+# Read prompt from file
+PROMPT=$(cat "$PROMPT_FILE")
+
+echo "=== Explicit Skill Request Test ==="
+echo "Skill: $SKILL_NAME"
+echo "Prompt file: $PROMPT_FILE"
+echo "Max turns: $MAX_TURNS"
+echo "Output dir: $OUTPUT_DIR"
+echo ""
+
+# Copy prompt for reference
+cp "$PROMPT_FILE" "$OUTPUT_DIR/prompt.txt"
+
+# Create a minimal project directory for the test
+PROJECT_DIR="$OUTPUT_DIR/project"
+mkdir -p "$PROJECT_DIR/docs/plans"
+
+# Create a dummy plan file for mid-conversation tests
+cat > "$PROJECT_DIR/docs/plans/auth-system.md" << 'EOF'
+# Auth System Implementation Plan
+
+## Task 1: Add User Model
+Create user model with email and password fields.
+
+## Task 2: Add Auth Routes
+Create login and register endpoints.
+
+## Task 3: Add JWT Middleware
+Protect routes with JWT validation.
+EOF
+
+# Run Claude with isolated environment
+LOG_FILE="$OUTPUT_DIR/claude-output.json"
+cd "$PROJECT_DIR"
+
+echo "Plugin dir: $PLUGIN_DIR"
+echo "Running claude -p with explicit skill request..."
+echo "Prompt: $PROMPT"
+echo ""
+
+timeout 300 claude -p "$PROMPT" \
+    --plugin-dir "$PLUGIN_DIR" \
+    --dangerously-skip-permissions \
+    --max-turns "$MAX_TURNS" \
+    --output-format stream-json \
+    > "$LOG_FILE" 2>&1 || true
+
+echo ""
+echo "=== Results ==="
+
+# Check if skill was triggered (look for Skill tool invocation)
+# Match either "skill":"skillname" or "skill":"namespace:skillname"
+SKILL_PATTERN='"skill":"([^"]*:)?'"${SKILL_NAME}"'"'
+if grep -q '"name":"Skill"' "$LOG_FILE" && grep -qE "$SKILL_PATTERN" "$LOG_FILE"; then
+    echo "PASS: Skill '$SKILL_NAME' was triggered"
+    TRIGGERED=true
+else
+    echo "FAIL: Skill '$SKILL_NAME' was NOT triggered"
+    TRIGGERED=false
+fi
+
+# Show what skills WERE triggered
+echo ""
+echo "Skills triggered in this run:"
+grep -o '"skill":"[^"]*"' "$LOG_FILE" 2>/dev/null | sort -u || echo "  (none)"
+
+# Check if Claude took action BEFORE invoking the skill (the failure mode)
+echo ""
+echo "Checking for premature action..."
+
+# Look for tool invocations before the Skill invocation
+# This detects the failure mode where Claude starts doing work without loading the skill
+FIRST_SKILL_LINE=$(grep -n '"name":"Skill"' "$LOG_FILE" | head -1 | cut -d: -f1)
+if [ -n "$FIRST_SKILL_LINE" ]; then
+    # Check if any non-Skill, non-system tools were invoked before the first Skill invocation
+    # Filter out system messages, TodoWrite (planning is ok), and other non-action tools
+    PREMATURE_TOOLS=$(head -n "$FIRST_SKILL_LINE" "$LOG_FILE" | \
+        grep '"type":"tool_use"' | \
+        grep -v '"name":"Skill"' | \
+        grep -v '"name":"TodoWrite"' || true)
+    if [ -n "$PREMATURE_TOOLS" ]; then
+        echo "WARNING: Tools invoked BEFORE Skill tool:"
+        echo "$PREMATURE_TOOLS" | head -5
+        echo ""
+        echo "This indicates Claude started working before loading the requested skill."
+    else
+        echo "OK: No premature tool invocations detected"
+    fi
+else
+    echo "WARNING: No Skill invocation found at all"
+fi
+
+# Show first assistant message
+echo ""
+echo "First assistant response (truncated):"
+grep '"type":"assistant"' "$LOG_FILE" | head -1 | jq -r '.message.content[0].text // .message.content' 2>/dev/null | head -c 500 || echo "  (could not extract)"
+
+echo ""
+echo "Full log: $LOG_FILE"
+echo "Timestamp: $TIMESTAMP"
+
+if [ "$TRIGGERED" = "true" ]; then
+    exit 0
+else
+    exit 1
+fi
--- a/tests/skill-triggering/prompts/dispatching-parallel-agents.txt
+++ b/tests/skill-triggering/prompts/dispatching-parallel-agents.txt
@@ -0,0 +1,8 @@
+I have 4 independent test failures happening in different modules:
+
+1. tests/auth/login.test.ts - "should redirect after login" is failing
+2. tests/api/users.test.ts - "should return user list" returns 500
+3. tests/components/Button.test.tsx - snapshot mismatch
+4. tests/utils/date.test.ts - timezone handling broken
+
+These are unrelated issues in different parts of the codebase. Can you investigate all of them?
--- a/tests/skill-triggering/prompts/executing-plans.txt
+++ b/tests/skill-triggering/prompts/executing-plans.txt
@@ -0,0 +1 @@
+I have a plan document at docs/plans/2024-01-15-auth-system.md that needs to be executed. Please implement it.
--- a/tests/skill-triggering/prompts/requesting-code-review.txt
+++ b/tests/skill-triggering/prompts/requesting-code-review.txt
@@ -0,0 +1,3 @@
+I just finished implementing the user authentication feature. All the code is committed. Can you review the changes before I merge to main?
+
+The commits are between abc123 and def456.
--- a/tests/skill-triggering/prompts/systematic-debugging.txt
+++ b/tests/skill-triggering/prompts/systematic-debugging.txt
@@ -0,0 +1,11 @@
+The tests are failing with this error:
+
+```
+FAIL src/utils/parser.test.ts
+  ● Parser › should handle nested objects
+    TypeError: Cannot read property 'value' of undefined
+      at parse (src/utils/parser.ts:42:18)
+      at Object.<anonymous> (src/utils/parser.test.ts:28:20)
+```
+
+Can you figure out what's going wrong and fix it?
--- a/tests/skill-triggering/prompts/test-driven-development.txt
+++ b/tests/skill-triggering/prompts/test-driven-development.txt
@@ -0,0 +1,7 @@
+I need to add a new feature to validate email addresses. It should:
+- Check that there's an @ symbol
+- Check that there's at least one character before the @
+- Check that there's a dot in the domain part
+- Return true/false
+
+Can you implement this?
--- a/tests/skill-triggering/prompts/writing-plans.txt
+++ b/tests/skill-triggering/prompts/writing-plans.txt
@@ -0,0 +1,10 @@
+Here's the spec for our new authentication system:
+
+Requirements:
+- Users can register with email/password
+- Users can log in and receive a JWT token
+- Protected routes require valid JWT
+- Tokens expire after 24 hours
+- Support password reset via email
+
+We need to implement this. There are multiple steps involved - user model, auth routes, middleware, email service integration.
--- a/tests/skill-triggering/run-all.sh
+++ b/tests/skill-triggering/run-all.sh
@@ -0,0 +1,60 @@
+#!/bin/bash
+# Run all skill triggering tests
+# Usage: ./run-all.sh
+
+set -e
+
+SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+PROMPTS_DIR="$SCRIPT_DIR/prompts"
+
+SKILLS=(
+    "systematic-debugging"
+    "test-driven-development"
+    "writing-plans"
+    "dispatching-parallel-agents"
+    "executing-plans"
+    "requesting-code-review"
+)
+
+echo "=== Running Skill Triggering Tests ==="
+echo ""
+
+PASSED=0
+FAILED=0
+RESULTS=()
+
+for skill in "${SKILLS[@]}"; do
+    prompt_file="$PROMPTS_DIR/${skill}.txt"
+
+    if [ ! -f "$prompt_file" ]; then
+        echo "⚠️  SKIP: No prompt file for $skill"
+        continue
+    fi
+
+    echo "Testing: $skill"
+
+    if "$SCRIPT_DIR/run-test.sh" "$skill" "$prompt_file" 3 2>&1 | tee /tmp/skill-test-$skill.log; then
+        PASSED=$((PASSED + 1))
+        RESULTS+=("✅ $skill")
+    else
+        FAILED=$((FAILED + 1))
+        RESULTS+=("❌ $skill")
+    fi
+
+    echo ""
+    echo "---"
+    echo ""
+done
+
+echo ""
+echo "=== Summary ==="
+for result in "${RESULTS[@]}"; do
+    echo "  $result"
+done
+echo ""
+echo "Passed: $PASSED"
+echo "Failed: $FAILED"
+
+if [ $FAILED -gt 0 ]; then
+    exit 1
+fi
--- a/tests/skill-triggering/run-test.sh
+++ b/tests/skill-triggering/run-test.sh
@@ -0,0 +1,88 @@
+#!/bin/bash
+# Test skill triggering with naive prompts
+# Usage: ./run-test.sh <skill-name> <prompt-file>
+#
+# Tests whether Claude triggers a skill based on a natural prompt
+# (without explicitly mentioning the skill)
+
+set -e
+
+SKILL_NAME="$1"
+PROMPT_FILE="$2"
+MAX_TURNS="${3:-3}"
+
+if [ -z "$SKILL_NAME" ] || [ -z "$PROMPT_FILE" ]; then
+    echo "Usage: $0 <skill-name> <prompt-file> [max-turns]"
+    echo "Example: $0 systematic-debugging ./test-prompts/debugging.txt"
+    exit 1
+fi
+
+# Get the directory where this script lives (should be tests/skill-triggering)
+SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+# Get the superpowers plugin root (two levels up from tests/skill-triggering)
+PLUGIN_DIR="$(cd "$SCRIPT_DIR/../.." && pwd)"
+
+TIMESTAMP=$(date +%s)
+OUTPUT_DIR="/tmp/superpowers-tests/${TIMESTAMP}/skill-triggering/${SKILL_NAME}"
+mkdir -p "$OUTPUT_DIR"
+
+# Read prompt from file
+PROMPT=$(cat "$PROMPT_FILE")
+
+echo "=== Skill Triggering Test ==="
+echo "Skill: $SKILL_NAME"
+echo "Prompt file: $PROMPT_FILE"
+echo "Max turns: $MAX_TURNS"
+echo "Output dir: $OUTPUT_DIR"
+echo ""
+
+# Copy prompt for reference
+cp "$PROMPT_FILE" "$OUTPUT_DIR/prompt.txt"
+
+# Run Claude
+LOG_FILE="$OUTPUT_DIR/claude-output.json"
+cd "$OUTPUT_DIR"
+
+echo "Plugin dir: $PLUGIN_DIR"
+echo "Running claude -p with naive prompt..."
+timeout 300 claude -p "$PROMPT" \
+    --plugin-dir "$PLUGIN_DIR" \
+    --dangerously-skip-permissions \
+    --max-turns "$MAX_TURNS" \
+    --output-format stream-json \
+    > "$LOG_FILE" 2>&1 || true
+
+echo ""
+echo "=== Results ==="
+
+# Check if skill was triggered (look for Skill tool invocation)
+# In stream-json, tool invocations have "name":"Skill" (not "tool":"Skill")
+# Match either "skill":"skillname" or "skill":"namespace:skillname"
+SKILL_PATTERN='"skill":"([^"]*:)?'"${SKILL_NAME}"'"'
+if grep -q '"name":"Skill"' "$LOG_FILE" && grep -qE "$SKILL_PATTERN" "$LOG_FILE"; then
+    echo "✅ PASS: Skill '$SKILL_NAME' was triggered"
+    TRIGGERED=true
+else
+    echo "❌ FAIL: Skill '$SKILL_NAME' was NOT triggered"
+    TRIGGERED=false
+fi
+
+# Show what skills WERE triggered
+echo ""
+echo "Skills triggered in this run:"
+grep -o '"skill":"[^"]*"' "$LOG_FILE" 2>/dev/null | sort -u || echo "  (none)"
+
+# Show first assistant message
+echo ""
+echo "First assistant response (truncated):"
+grep '"type":"assistant"' "$LOG_FILE" | head -1 | jq -r '.message.content[0].text // .message.content' 2>/dev/null | head -c 500 || echo "  (could not extract)"
+
+echo ""
+echo "Full log: $LOG_FILE"
+echo "Timestamp: $TIMESTAMP"
+
+if [ "$TRIGGERED" = "true" ]; then
+    exit 0
+else
+    exit 1
+fi
--- a/tests/subagent-driven-dev/go-fractals/design.md
+++ b/tests/subagent-driven-dev/go-fractals/design.md
@@ -0,0 +1,81 @@
+# Go Fractals CLI - Design
+
+## Overview
+
+A command-line tool that generates ASCII art fractals. Supports two fractal types with configurable output.
+
+## Usage
+
+```bash
+# Sierpinski triangle
+fractals sierpinski --size 32 --depth 5
+
+# Mandelbrot set
+fractals mandelbrot --width 80 --height 24 --iterations 100
+
+# Custom character
+fractals sierpinski --size 16 --char '#'
+
+# Help
+fractals --help
+fractals sierpinski --help
+```
+
+## Commands
+
+### `sierpinski`
+
+Generates a Sierpinski triangle using recursive subdivision.
+
+Flags:
+- `--size` (default: 32) - Width of the triangle base in characters
+- `--depth` (default: 5) - Recursion depth
+- `--char` (default: '*') - Character to use for filled points
+
+Output: Triangle printed to stdout, one line per row.
+
+### `mandelbrot`
+
+Renders the Mandelbrot set as ASCII art. Maps iteration count to characters.
+
+Flags:
+- `--width` (default: 80) - Output width in characters
+- `--height` (default: 24) - Output height in characters
+- `--iterations` (default: 100) - Maximum iterations for escape calculation
+- `--char` (default: gradient) - Single character, or omit for gradient " .:-=+*#%@"
+
+Output: Rectangle printed to stdout.
+
+## Architecture
+
+```
+cmd/
+  fractals/
+    main.go           # Entry point, CLI setup
+internal/
+  sierpinski/
+    sierpinski.go     # Algorithm
+    sierpinski_test.go
+  mandelbrot/
+    mandelbrot.go     # Algorithm
+    mandelbrot_test.go
+  cli/
+    root.go           # Root command, help
+    sierpinski.go     # Sierpinski subcommand
+    mandelbrot.go     # Mandelbrot subcommand
+```
+
+## Dependencies
+
+- Go 1.21+
+- `github.com/spf13/cobra` for CLI
+
+## Acceptance Criteria
+
+1. `fractals --help` shows usage
+2. `fractals sierpinski` outputs a recognizable triangle
+3. `fractals mandelbrot` outputs a recognizable Mandelbrot set
+4. `--size`, `--width`, `--height`, `--depth`, `--iterations` flags work
+5. `--char` customizes output character
+6. Invalid inputs produce clear error messages
+7. All tests pass
--- a/tests/subagent-driven-dev/go-fractals/plan.md
+++ b/tests/subagent-driven-dev/go-fractals/plan.md
@@ -0,0 +1,172 @@
+# Go Fractals CLI - Implementation Plan
+
+Execute this plan using the `superpowers:subagent-driven-development` skill.
+
+## Context
+
+Building a CLI tool that generates ASCII fractals. See `design.md` for full specification.
+
+## Tasks
+
+### Task 1: Project Setup
+
+Create the Go module and directory structure.
+
+**Do:**
+- Initialize `go.mod` with module name `github.com/superpowers-test/fractals`
+- Create directory structure: `cmd/fractals/`, `internal/sierpinski/`, `internal/mandelbrot/`, `internal/cli/`
+- Create minimal `cmd/fractals/main.go` that prints "fractals cli"
+- Add `github.com/spf13/cobra` dependency
+
+**Verify:**
+- `go build ./cmd/fractals` succeeds
+- `./fractals` prints "fractals cli"
+
+---
+
+### Task 2: CLI Framework with Help
+
+Set up Cobra root command with help output.
+
+**Do:**
+- Create `internal/cli/root.go` with root command
+- Configure help text showing available subcommands
+- Wire root command into `main.go`
+
+**Verify:**
+- `./fractals --help` shows usage with "sierpinski" and "mandelbrot" listed as available commands
+- `./fractals` (no args) shows help
+
+---
+
+### Task 3: Sierpinski Algorithm
+
+Implement the Sierpinski triangle generation algorithm.
+
+**Do:**
+- Create `internal/sierpinski/sierpinski.go`
+- Implement `Generate(size, depth int, char rune) []string` that returns lines of the triangle
+- Use recursive midpoint subdivision algorithm
+- Create `internal/sierpinski/sierpinski_test.go` with tests:
+  - Small triangle (size=4, depth=2) matches expected output
+  - Size=1 returns single character
+  - Depth=0 returns filled triangle
+
+**Verify:**
+- `go test ./internal/sierpinski/...` passes
+
+---
+
+### Task 4: Sierpinski CLI Integration
+
+Wire the Sierpinski algorithm to a CLI subcommand.
+
+**Do:**
+- Create `internal/cli/sierpinski.go` with `sierpinski` subcommand
+- Add flags: `--size` (default 32), `--depth` (default 5), `--char` (default '*')
+- Call `sierpinski.Generate()` and print result to stdout
+
+**Verify:**
+- `./fractals sierpinski` outputs a triangle
+- `./fractals sierpinski --size 16 --depth 3` outputs smaller triangle
+- `./fractals sierpinski --help` shows flag documentation
+
+---
+
+### Task 5: Mandelbrot Algorithm
+
+Implement the Mandelbrot set ASCII renderer.
+
+**Do:**
+- Create `internal/mandelbrot/mandelbrot.go`
+- Implement `Render(width, height, maxIter int, char string) []string`
+- Map complex plane region (-2.5 to 1.0 real, -1.0 to 1.0 imaginary) to output dimensions
+- Map iteration count to character gradient " .:-=+*#%@" (or single char if provided)
+- Create `internal/mandelbrot/mandelbrot_test.go` with tests:
+  - Output dimensions match requested width/height
+  - Known point inside set (0,0) maps to max-iteration character
+  - Known point outside set (2,0) maps to low-iteration character
+
+**Verify:**
+- `go test ./internal/mandelbrot/...` passes
+
+---
+
+### Task 6: Mandelbrot CLI Integration
+
+Wire the Mandelbrot algorithm to a CLI subcommand.
+
+**Do:**
+- Create `internal/cli/mandelbrot.go` with `mandelbrot` subcommand
+- Add flags: `--width` (default 80), `--height` (default 24), `--iterations` (default 100), `--char` (default "")
+- Call `mandelbrot.Render()` and print result to stdout
+
+**Verify:**
+- `./fractals mandelbrot` outputs recognizable Mandelbrot set
+- `./fractals mandelbrot --width 40 --height 12` outputs smaller version
+- `./fractals mandelbrot --help` shows flag documentation
+
+---
+
+### Task 7: Character Set Configuration
+
+Ensure `--char` flag works consistently across both commands.
+
+**Do:**
+- Verify Sierpinski `--char` flag passes character to algorithm
+- For Mandelbrot, `--char` should use single character instead of gradient
+- Add tests for custom character output
+
+**Verify:**
+- `./fractals sierpinski --char '#'` uses '#' character
+- `./fractals mandelbrot --char '.'` uses '.' for all filled points
+- Tests pass
+
+---
+
+### Task 8: Input Validation and Error Handling
+
+Add validation for invalid inputs.
+
+**Do:**
+- Sierpinski: size must be > 0, depth must be >= 0
+- Mandelbrot: width/height must be > 0, iterations must be > 0
+- Return clear error messages for invalid inputs
+- Add tests for error cases
+
+**Verify:**
+- `./fractals sierpinski --size 0` prints error, exits non-zero
+- `./fractals mandelbrot --width -1` prints error, exits non-zero
+- Error messages are clear and helpful
+
+---
+
+### Task 9: Integration Tests
+
+Add integration tests that invoke the CLI.
+
+**Do:**
+- Create `cmd/fractals/main_test.go` or `test/integration_test.go`
+- Test full CLI invocation for both commands
+- Verify output format and exit codes
+- Test error cases return non-zero exit
+
+**Verify:**
+- `go test ./...` passes all tests including integration tests
+
+---
+
+### Task 10: README
+
+Document usage and examples.
+
+**Do:**
+- Create `README.md` with:
+  - Project description
+  - Installation: `go install ./cmd/fractals`
+  - Usage examples for both commands
+  - Example output (small samples)
+
+**Verify:**
+- README accurately describes the tool
+- Examples in README actually work
--- a/tests/subagent-driven-dev/go-fractals/scaffold.sh
+++ b/tests/subagent-driven-dev/go-fractals/scaffold.sh
@@ -0,0 +1,45 @@
+#!/bin/bash
+# Scaffold the Go Fractals test project
+# Usage: ./scaffold.sh /path/to/target/directory
+
+set -e
+
+TARGET_DIR="${1:?Usage: $0 <target-directory>}"
+SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
+
+# Create target directory
+mkdir -p "$TARGET_DIR"
+cd "$TARGET_DIR"
+
+# Initialize git repo
+git init
+
+# Copy design and plan
+cp "$SCRIPT_DIR/design.md" .
+cp "$SCRIPT_DIR/plan.md" .
+
+# Create .claude settings to allow reads/writes in this directory
+mkdir -p .claude
+cat > .claude/settings.local.json << 'SETTINGS'
+{
+  "permissions": {
+    "allow": [
+      "Read(**)",
+      "Edit(**)",
+      "Write(**)",
+      "Bash(go:*)",
+      "Bash(mkdir:*)",
+      "Bash(git:*)"
+    ]
+  }
+}
+SETTINGS
+
+# Create initial commit
+git add .
+git commit -m "Initial project setup with design and plan"
+
+echo "Scaffolded Go Fractals project at: $TARGET_DIR"
+echo ""
+echo "To run the test:"
+echo "  claude -p \"Execute this plan using superpowers:subagent-driven-development. Plan: $TARGET_DIR/plan.md\" --plugin-dir /path/to/superpowers"
--- a/tests/subagent-driven-dev/run-test.sh
+++ b/tests/subagent-driven-dev/run-test.sh
@@ -0,0 +1,105 @@
+#!/bin/bash
+# Run a subagent-driven-development test
+# Usage: ./run-test.sh <test-name> [--plugin-dir <path>]
+#
+# Example:
+#   ./run-test.sh go-fractals
+#   ./run-test.sh svelte-todo --plugin-dir /path/to/superpowers
+
+set -e
+
+SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
+TEST_NAME="${1:?Usage: $0 <test-name> [--plugin-dir <path>]}"
+shift
+
+# Parse optional arguments
+PLUGIN_DIR=""
+while [[ $# -gt 0 ]]; do
+  case $1 in
+    --plugin-dir)
+      PLUGIN_DIR="$2"
+      shift 2
+      ;;
+    *)
+      echo "Unknown option: $1"
+      exit 1
+      ;;
+  esac
+done
+
+# Default plugin dir to parent of tests directory
+if [[ -z "$PLUGIN_DIR" ]]; then
+  PLUGIN_DIR="$(cd "$SCRIPT_DIR/../.." && pwd)"
+fi
+
+# Verify test exists
+TEST_DIR="$SCRIPT_DIR/$TEST_NAME"
+if [[ ! -d "$TEST_DIR" ]]; then
+  echo "Error: Test '$TEST_NAME' not found at $TEST_DIR"
+  echo "Available tests:"
+  ls -1 "$SCRIPT_DIR" | grep -v '\.sh$' | grep -v '\.md$'
+  exit 1
+fi
+
+# Create timestamped output directory
+TIMESTAMP=$(date +%s)
+OUTPUT_BASE="/tmp/superpowers-tests/$TIMESTAMP/subagent-driven-development"
+OUTPUT_DIR="$OUTPUT_BASE/$TEST_NAME"
+mkdir -p "$OUTPUT_DIR"
+
+echo "=== Subagent-Driven Development Test ==="
+echo "Test: $TEST_NAME"
+echo "Output: $OUTPUT_DIR"
+echo "Plugin: $PLUGIN_DIR"
+echo ""
+
+# Scaffold the project
+echo ">>> Scaffolding project..."
+"$TEST_DIR/scaffold.sh" "$OUTPUT_DIR/project"
+echo ""
+
+# Prepare the prompt
+PLAN_PATH="$OUTPUT_DIR/project/plan.md"
+PROMPT="Execute this plan using superpowers:subagent-driven-development. The plan is at: $PLAN_PATH"
+
+# Run Claude with JSON output for token tracking
+LOG_FILE="$OUTPUT_DIR/claude-output.json"
+echo ">>> Running Claude..."
+echo "Prompt: $PROMPT"
+echo "Log file: $LOG_FILE"
+echo ""
+
+# Run claude and capture output
+# Using stream-json to get token usage stats
+# --dangerously-skip-permissions for automated testing (subagents don't inherit parent settings)
+cd "$OUTPUT_DIR/project"
+claude -p "$PROMPT" \
+  --plugin-dir "$PLUGIN_DIR" \
+  --dangerously-skip-permissions \
+  --output-format stream-json \
+  > "$LOG_FILE" 2>&1 || true
+
+# Extract final stats
+echo ""
+echo ">>> Test complete"
+echo "Project directory: $OUTPUT_DIR/project"
+echo "Claude log: $LOG_FILE"
+echo ""
+
+# Show token usage if available
+if command -v jq &> /dev/null; then
+  echo ">>> Token usage:"
+  # Extract usage from the last message with usage info
+  jq -s '[.[] | select(.type == "result")] | last | .usage' "$LOG_FILE" 2>/dev/null || echo "(could not parse usage)"
+  echo ""
+fi
+
+echo ">>> Next steps:"
+echo "1. Review the project: cd $OUTPUT_DIR/project"
+echo "2. Review Claude's log: less $LOG_FILE"
+echo "3. Check if tests pass:"
+if [[ "$TEST_NAME" == "go-fractals" ]]; then
+  echo "   cd $OUTPUT_DIR/project && go test ./..."
+elif [[ "$TEST_NAME" == "svelte-todo" ]]; then
+  echo "   cd $OUTPUT_DIR/project && npm test && npx playwright test"
+fi
--- a/tests/subagent-driven-dev/svelte-todo/design.md
+++ b/tests/subagent-driven-dev/svelte-todo/design.md
@@ -0,0 +1,70 @@
+# Svelte Todo List - Design
+
+## Overview
+
+A simple todo list application built with Svelte. Supports creating, completing, and deleting todos with localStorage persistence.
+
+## Features
+
+- Add new todos
+- Mark todos as complete/incomplete
+- Delete todos
+- Filter by: All / Active / Completed
+- Clear all completed todos
+- Persist to localStorage
+- Show count of remaining items
+
+## User Interface
+
+```
+┌─────────────────────────────────────────┐
+│  Svelte Todos                           │
+├─────────────────────────────────────────┤
+│  [________________________] [Add]       │
+├─────────────────────────────────────────┤
+│  [ ] Buy groceries                  [x] │
+│  [✓] Walk the dog                   [x] │
+│  [ ] Write code                     [x] │
+├─────────────────────────────────────────┤
+│  2 items left                           │
+│  [All] [Active] [Completed]  [Clear ✓]  │
+└─────────────────────────────────────────┘
+```
+
+## Components
+
+```
+src/
+  App.svelte           # Main app, state management
+  lib/
+    TodoInput.svelte   # Text input + Add button
+    TodoList.svelte    # List container
+    TodoItem.svelte    # Single todo with checkbox, text, delete
+    FilterBar.svelte   # Filter buttons + clear completed
+    store.ts           # Svelte store for todos
+    storage.ts         # localStorage persistence
+```
+
+## Data Model
+
+```typescript
+interface Todo {
+  id: string;        // UUID
+  text: string;      // Todo text
+  completed: boolean;
+}
+
+type Filter = 'all' | 'active' | 'completed';
+```
+
+## Acceptance Criteria
+
+1. Can add a todo by typing and pressing Enter or clicking Add
+2. Can toggle todo completion by clicking checkbox
+3. Can delete a todo by clicking X button
+4. Filter buttons show correct subset of todos
+5. "X items left" shows count of incomplete todos
+6. "Clear completed" removes all completed todos
+7. Todos persist across page refresh (localStorage)
+8. Empty state shows helpful message
+9. All tests pass
--- a/tests/subagent-driven-dev/svelte-todo/plan.md
+++ b/tests/subagent-driven-dev/svelte-todo/plan.md
@@ -0,0 +1,222 @@
+# Svelte Todo List - Implementation Plan
+
+Execute this plan using the `superpowers:subagent-driven-development` skill.
+
+## Context
+
+Building a todo list app with Svelte. See `design.md` for full specification.
+
+## Tasks
+
+### Task 1: Project Setup
+
+Create the Svelte project with Vite.
+
+**Do:**
+- Run `npm create vite@latest . -- --template svelte-ts`
+- Install dependencies with `npm install`
+- Verify dev server works
+- Clean up default Vite template content from App.svelte
+
+**Verify:**
+- `npm run dev` starts server
+- App shows minimal "Svelte Todos" heading
+- `npm run build` succeeds
+
+---
+
+### Task 2: Todo Store
+
+Create the Svelte store for todo state management.
+
+**Do:**
+- Create `src/lib/store.ts`
+- Define `Todo` interface with id, text, completed
+- Create writable store with initial empty array
+- Export functions: `addTodo(text)`, `toggleTodo(id)`, `deleteTodo(id)`, `clearCompleted()`
+- Create `src/lib/store.test.ts` with tests for each function
+
+**Verify:**
+- Tests pass: `npm run test` (install vitest if needed)
+
+---
+
+### Task 3: localStorage Persistence
+
+Add persistence layer for todos.
+
+**Do:**
+- Create `src/lib/storage.ts`
+- Implement `loadTodos(): Todo[]` and `saveTodos(todos: Todo[])`
+- Handle JSON parse errors gracefully (return empty array)
+- Integrate with store: load on init, save on change
+- Add tests for load/save/error handling
+
+**Verify:**
+- Tests pass
+- Manual test: add todo, refresh page, todo persists
+
+---
+
+### Task 4: TodoInput Component
+
+Create the input component for adding todos.
+
+**Do:**
+- Create `src/lib/TodoInput.svelte`
+- Text input bound to local state
+- Add button calls `addTodo()` and clears input
+- Enter key also submits
+- Disable Add button when input is empty
+- Add component tests
+
+**Verify:**
+- Tests pass
+- Component renders input and button
+
+---
+
+### Task 5: TodoItem Component
+
+Create the single todo item component.
+
+**Do:**
+- Create `src/lib/TodoItem.svelte`
+- Props: `todo: Todo`
+- Checkbox toggles completion (calls `toggleTodo`)
+- Text with strikethrough when completed
+- Delete button (X) calls `deleteTodo`
+- Add component tests
+
+**Verify:**
+- Tests pass
+- Component renders checkbox, text, delete button
+
+---
+
+### Task 6: TodoList Component
+
+Create the list container component.
+
+**Do:**
+- Create `src/lib/TodoList.svelte`
+- Props: `todos: Todo[]`
+- Renders TodoItem for each todo
+- Shows "No todos yet" when empty
+- Add component tests
+
+**Verify:**
+- Tests pass
+- Component renders list of TodoItems
+
+---
+
+### Task 7: FilterBar Component
+
+Create the filter and status bar component.
+
+**Do:**
+- Create `src/lib/FilterBar.svelte`
+- Props: `todos: Todo[]`, `filter: Filter`, `onFilterChange: (f: Filter) => void`
+- Show count: "X items left" (incomplete count)
+- Three filter buttons: All, Active, Completed
+- Active filter is visually highlighted
+- "Clear completed" button (hidden when no completed todos)
+- Add component tests
+
+**Verify:**
+- Tests pass
+- Component renders count, filters, clear button
+
+---
+
+### Task 8: App Integration
+
+Wire all components together in App.svelte.
+
+**Do:**
+- Import all components and store
+- Add filter state (default: 'all')
+- Compute filtered todos based on filter state
+- Render: heading, TodoInput, TodoList, FilterBar
+- Pass appropriate props to each component
+
+**Verify:**
+- App renders all components
+- Adding todos works
+- Toggling works
+- Deleting works
+
+---
+
+### Task 9: Filter Functionality
+
+Ensure filtering works end-to-end.
+
+**Do:**
+- Verify filter buttons change displayed todos
+- 'all' shows all todos
+- 'active' shows only incomplete todos
+- 'completed' shows only completed todos
+- Clear completed removes completed todos and resets filter if needed
+- Add integration tests
+
+**Verify:**
+- Filter tests pass
+- Manual verification of all filter states
+
+---
+
+### Task 10: Styling and Polish
+
+Add CSS styling for usability.
+
+**Do:**
+- Style the app to match the design mockup
+- Completed todos have strikethrough and muted color
+- Active filter button is highlighted
+- Input has focus styles
+- Delete button appears on hover (or always on mobile)
+- Responsive layout
+
+**Verify:**
+- App is visually usable
+- Styles don't break functionality
+
+---
+
+### Task 11: End-to-End Tests
+
+Add Playwright tests for full user flows.
+
+**Do:**
+- Install Playwright: `npm init playwright@latest`
+- Create `tests/todo.spec.ts`
+- Test flows:
+  - Add a todo
+  - Complete a todo
+  - Delete a todo
+  - Filter todos
+  - Clear completed
+  - Persistence (add, reload, verify)
+
+**Verify:**
+- `npx playwright test` passes
+
+---
+
+### Task 12: README
+
+Document the project.
+
+**Do:**
+- Create `README.md` with:
+  - Project description
+  - Setup: `npm install`
+  - Development: `npm run dev`
+  - Testing: `npm test` and `npx playwright test`
+  - Build: `npm run build`
+
+**Verify:**
+- README accurately describes the project
+- Instructions work
--- a/tests/subagent-driven-dev/svelte-todo/scaffold.sh
+++ b/tests/subagent-driven-dev/svelte-todo/scaffold.sh
@@ -0,0 +1,46 @@
+#!/bin/bash
+# Scaffold the Svelte Todo test project
+# Usage: ./scaffold.sh /path/to/target/directory
+
+set -e
+
+TARGET_DIR="${1:?Usage: $0 <target-directory>}"
+SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
+
+# Create target directory
+mkdir -p "$TARGET_DIR"
+cd "$TARGET_DIR"
+
+# Initialize git repo
+git init
+
+# Copy design and plan
+cp "$SCRIPT_DIR/design.md" .
+cp "$SCRIPT_DIR/plan.md" .
+
+# Create .claude settings to allow reads/writes in this directory
+mkdir -p .claude
+cat > .claude/settings.local.json << 'SETTINGS'
+{
+  "permissions": {
+    "allow": [
+      "Read(**)",
+      "Edit(**)",
+      "Write(**)",
+      "Bash(npm:*)",
+      "Bash(npx:*)",
+      "Bash(mkdir:*)",
+      "Bash(git:*)"
+    ]
+  }
+}
+SETTINGS
+
+# Create initial commit
+git add .
+git commit -m "Initial project setup with design and plan"
+
+echo "Scaffolded Svelte Todo project at: $TARGET_DIR"
+echo ""
+echo "To run the test:"
+echo "  claude -p \"Execute this plan using superpowers:subagent-driven-development. Plan: $TARGET_DIR/plan.md\" --plugin-dir /path/to/superpowers"
				`@@ -0,0 +1 @@`
				`please use the brainstorming skill to help me think through this feature`
				`@@ -0,0 +1 @@`
				`use systematic-debugging to figure out what's wrong`
				`@@ -0,0 +1 @@`
				`I have a plan document at docs/plans/2024-01-15-auth-system.md that needs to be executed. Please implement it.`