Add comprehensive testing documentation

Documents: - How to run integration tests - subagent-driven-development test details - Token analysis tool usage - Troubleshooting common issues - Writing new integration tests - Session transcript format
Add token usage analysis to subagent-driven-development test
2026-04-20 14:12:41 +00:00 · 2025-11-29 21:03:33 -08:00 · 2025-11-29 20:51:18 -08:00 · 2025-11-29 19:35:43 -08:00 · 2025-11-29 17:15:21 -08:00 · 2025-11-29 11:47:47 -08:00
53 changed files with 724 additions and 1820 deletions
--- a/.claude-plugin/marketplace.json
+++ b/.claude-plugin/marketplace.json
@@ -9,7 +9,7 @@
    {
      "name": "superpowers",
      "description": "Core skills library for Claude Code: TDD, debugging, collaboration patterns, and proven techniques",
-      "version": "4.0.0",
+      "version": "3.5.1",
      "source": "./",
      "author": {
        "name": "Jesse Vincent",
--- a/.claude-plugin/plugin.json
+++ b/.claude-plugin/plugin.json
@@ -1,7 +1,7 @@
 {
  "name": "superpowers",
  "description": "Core skills library for Claude Code: TDD, debugging, collaboration patterns, and proven techniques",
-  "version": "4.0.0",
+  "version": "3.5.1",
  "author": {
    "name": "Jesse Vincent",
    "email": "jesse@fsck.com"
--- a/.claude/settings.local.json
+++ b/.claude/settings.local.json
@@ -0,0 +1,141 @@
+{
+  "permissions": {
+    "allow": [
+      "Read(//Users/jesse/.claude/plugins/cache/superpowers/skills/getting-started/**)",
+      "Read(//Users/jesse/Downloads/**)",
+      "Bash(~/.claude/plugins/cache/superpowers/skills/getting-started/list-skills)",
+      "Bash(~/.claude/plugins/cache/superpowers/skills/getting-started/skills-search \"prompt\")",
+      "Bash(~/.claude/plugins/cache/superpowers/skills/getting-started/skills-search \"communication\")",
+      "Bash(~/.claude/plugins/cache/superpowers/skills/getting-started/skills-search \"interaction\")",
+      "Read(//Users/jesse/.claude/plugins/cache/superpowers/skills/meta/testing-skills-with-subagents/**)",
+      "Read(//Users/jesse/.claude/plugins/cache/superpowers/skills/collaboration/dispatching-parallel-agents/**)",
+      "Read(//Users/jesse/.claude/plugins/cache/superpowers/skills/collaboration/requesting-code-review/**)",
+      "Read(//Users/jesse/.claude/plugins/cache/superpowers/skills/collaboration/writing-plans/**)",
+      "mcp__journal__search_journal",
+      "Read(//Users/jesse/.claude/plugins/cache/superpowers/skills/meta/creating-skills/**)",
+      "Read(//Users/jesse/.claude/plugins/cache/superpowers/skills/collaboration/brainstorming/**)",
+      "Read(//Users/jesse/.claude/plugins/cache/superpowers/skills/**)",
+      "Read(//Users/jesse/.claude/plugins/cache/**)",
+      "mcp__journal__read_journal_entry",
+      "Bash(/Users/jesse/git/superpowers/superpowers/skills/getting-started/list-skills)",
+      "Bash(/Users/jesse/git/superpowers/superpowers/skills/getting-started/skills-search refactor)",
+      "Read(//Users/jesse/Documents/GitHub/superpowers/**)",
+      "Bash(${CLAUDE_PLUGIN_ROOT}/skills/getting-started/list-skills:*)",
+      "Bash(/Users/jesse/Documents/GitHub/superpowers/superpowers/skills/getting-started/list-skills)",
+      "Bash(/Users/jesse/Documents/GitHub/superpowers/superpowers/skills/getting-started/skills-search editing)",
+      "Bash(list-skills brainstorm)",
+      "Read(//Users/jesse/.claude/commands/**)",
+      "Bash(git checkout:*)",
+      "Bash(/Users/jesse/.claude/plugins/cache/superpowers/skills/getting-started/list-skills)",
+      "Bash(ln:*)",
+      "Bash(git add:*)",
+      "Bash(git commit:*)",
+      "Bash(git push:*)",
+      "Read(//Users/jesse/.claude/plugins/**)",
+      "Read(//Users/jesse/.claude/**)",
+      "Bash(cat:*)",
+      "Read(//Users/jesse/.superpowers/**)",
+      "Bash(find:*)",
+      "Read(//Users/jesse/.clank/**)",
+      "Bash(./search-conversations:*)",
+      "Bash(./skills/collaboration/remembering-conversations/tool/search-conversations:*)",
+      "Bash(npm install)",
+      "Bash(sqlite3:*)",
+      "Bash(chmod:*)",
+      "Bash(/Users/jesse/Documents/GitHub/superpowers/superpowers/skills/collaboration/remembering-conversations/tool/migrate-to-config.sh:*)",
+      "Read(//Users/jesse/.config/superpowers/**)",
+      "Bash(./index-conversations --help)",
+      "Bash(./index-conversations:*)",
+      "Bash(bc)",
+      "Bash(bc:*)",
+      "Bash(./scripts/find-skills)",
+      "Bash(./scripts/run:*)",
+      "Bash(./scripts/find-skills test)",
+      "Bash(find-skills:*)",
+      "Bash(/Users/jesse/.claude/plugins/cache/superpowers/scripts/find-skills refactor)",
+      "Bash(mkdir:*)",
+      "Bash(git worktree add:*)",
+      "Bash([ -f package.json ])",
+      "Bash(git worktree:*)",
+      "Bash(gh repo create:*)",
+      "Bash(git clone:*)",
+      "Bash(gh repo view:*)",
+      "Bash(test:*)",
+      "Bash(git ls-tree:*)",
+      "Bash(git rm:*)",
+      "Bash(git mv:*)",
+      "Bash(/Users/jesse/Documents/GitHub/superpowers/superpowers-skills/skills/using-skills/find-skills)",
+      "Bash(tree:*)",
+      "Bash(/Users/jesse/Documents/GitHub/superpowers/superpowers-skills/skills/using-skills/skill-run --help)",
+      "Bash(echo:*)",
+      "Bash(git log:*)",
+      "Bash(git show:*)",
+      "Bash(git diff-tree:*)",
+      "Bash(bash:*)",
+      "Bash(xargs ls:*)",
+      "Bash(git rev-parse:*)",
+      "Bash(git reset:*)",
+      "Bash(./skills/using-skills/find-skills)",
+      "Bash(git rebase:*)",
+      "Bash(GIT_SEQUENCE_EDITOR=\"sed -i '' 's/^pick 683707a/edit 683707a/'\" git rebase:*)",
+      "Bash(gh pr create:*)",
+      "Bash(for:*)",
+      "Bash(do [ -f \"$skill\" ])",
+      "Bash(! grep -q \"^when_to_use:\" \"$skill\")",
+      "Bash(done)",
+      "Bash(gh issue view:*)",
+      "Bash(gh pr view:*)",
+      "Bash(gh pr diff:*)",
+      "Bash(/Users/jesse/Documents/GitHub/superpowers/superpowers-skills/skills/using-skills/find-skills test)",
+      "Bash(xargs -I {} bash -c 'dir=$(echo {} | sed \"\"\"\"s|/SKILL.md||\"\"\"\" | xargs basename); name=$(grep \"\"\"\"^name:\"\"\"\" {} | sed \"\"\"\"s/^name: //\"\"\"\"); echo \"\"\"\"$dir -> $name\"\"\"\"')",
+      "mcp__obsidian-mcp-tools__fetch",
+      "Skill(superpowers:using-git-worktrees)",
+      "Skill(superpowers:subagent-driven-development)",
+      "Bash(./test-raw.sh:*)",
+      "Bash(./chrome-ws raw \"ws://localhost:9222/devtools/page/test\" '{\"\"id\"\":1,\"\"method\"\":\"\"Browser.getVersion\"\"}')",
+      "Bash(./test-tabs.sh:*)",
+      "Bash(curl:*)",
+      "Bash(./chrome-ws tabs:*)",
+      "Bash(./chrome-ws close:*)",
+      "Bash(./chrome-ws raw:*)",
+      "Bash(./chrome-ws new:*)",
+      "Bash(./test-navigate.sh:*)",
+      "Bash(./test-interact.sh:*)",
+      "Bash(./test-extract.sh)",
+      "Bash(./test-wait.sh:*)",
+      "Bash(./test-e2e.sh:*)",
+      "Bash(./chrome-ws extract:*)",
+      "Bash(./chrome-ws screenshot:*)",
+      "Bash(./chrome-ws start:*)",
+      "Bash(./chrome-ws navigate:*)",
+      "Bash(git init:*)",
+      "Bash(git tag:*)",
+      "Skill(example-skills:mcp-builder)",
+      "Bash(npm run build)",
+      "Bash(npm run clean)",
+      "Bash(timeout 3s node dist/index.js)",
+      "Bash(git -C /Users/jesse/Documents/GitHub/superpowers/superpowers-chrome ls-files .claude-plugin/marketplace.json)",
+      "mcp__private-journal__read_journal_entry",
+      "Bash(git pull:*)",
+      "Skill(elements-of-style:writing-clearly-and-concisely)",
+      "Bash(gh release list:*)",
+      "Bash(gh release create:*)",
+      "Read(//Users/jesse/git/superpowers/superpowers-marketplace/.claude-plugin/**)",
+      "mcp__plugin_episodic-memory_episodic-memory__search",
+      "Skill(superpowers:writing-skills)",
+      "mcp__private-journal__process_thoughts",
+      "Skill(superpowers:brainstorming)",
+      "Skill(superpowers:using-superpowers)",
+      "Skill(episodic-memory:remembering-conversations)",
+      "Skill(superpowers-developing-for-claude-code:developing-claude-code-plugins)",
+      "Skill(working-with-claude-code)"
+    ],
+    "deny": [],
+    "ask": [],
+    "additionalDirectories": [
+      "/Users/jesse/Documents/GitHub/superpowers/superpowers-skills/",
+      "/Users/jesse/Documents/GitHub/superpowers/superpowers-marketplace",
+      "/Users/jesse/Documents/GitHub/superpowers/using-chrome-directly/"
+    ]
+  }
+}
--- a/.codex/superpowers-codex
+++ b/.codex/superpowers-codex
@@ -229,7 +229,7 @@ function runUseSkill(skillName) {
    if (frontmatter.description) {
        console.log(`# ${frontmatter.description}`);
    }
-    console.log(`# Skill-specific tools and reference files live in ${skillDirectory}`);
+    console.log(`# Supporting tools and docs are in ${skillDirectory}`);
    console.log('# ============================================');
    console.log('');

@@ -264,4 +264,4 @@ switch (command) {
        console.log('  superpowers-codex use-skill superpowers:brainstorming');
        console.log('  superpowers-codex use-skill my-custom-skill');
        break;
-}
+}
--- a/.gitignore
+++ b/.gitignore
@@ -1,3 +1,2 @@
 .worktrees/
 .private-journal/
-.claude/
--- a/.opencode/plugin/superpowers.js
+++ b/.opencode/plugin/superpowers.js
@@ -67,7 +67,7 @@ ${toolMapping}
        path: { id: sessionID },
        body: {
          noReply: true,
-          parts: [{ type: "text", text: bootstrapContent, synthetic: true }]
+          parts: [{ type: "text", text: bootstrapContent }]
        }
      });
      return true;
@@ -132,8 +132,8 @@ ${toolMapping}
              body: {
                noReply: true,
                parts: [
-                  { type: "text", text: `Loading skill: ${name || skill_name}`, synthetic: true },
-                  { type: "text", text: `${skillHeader}\n\n${content}`, synthetic: true }
+                  { type: "text", text: `Loading skill: ${name || skill_name}` },
+                  { type: "text", text: `${skillHeader}\n\n${content}` }
                ]
              }
            });
--- a/README.md
+++ b/README.md
@@ -100,8 +100,9 @@ Fetch and follow instructions from https://raw.githubusercontent.com/obra/superp
 ### Skills Library

 **Testing**
- **test-driven-development** - RED-GREEN-REFACTOR cycle (includes anti-patterns reference)
+- **test-driven-development** - RED-GREEN-REFACTOR cycle
 - **condition-based-waiting** - Async test patterns
+- **testing-anti-patterns** - Common pitfalls to avoid

 **Debugging** 
 - **systematic-debugging** - 4-phase root cause process
@@ -120,8 +121,9 @@ Fetch and follow instructions from https://raw.githubusercontent.com/obra/superp
 - **finishing-a-development-branch** - Merge/PR decision workflow
 - **subagent-driven-development** - Fast iteration with quality gates

-**Meta**
- **writing-skills** - Create new skills following best practices (includes testing methodology)
+**Meta** 
+- **writing-skills** - Create new skills following best practices
+- **testing-skills-with-subagents** - Validate skill quality
 - **using-superpowers** - Introduction to the skills system

 ## Philosophy
@@ -139,8 +141,9 @@ Skills live directly in this repository. To contribute:

 1. Fork the repository
 2. Create a branch for your skill
-3. Follow the `writing-skills` skill for creating and testing new skills
-4. Submit a PR
+3. Follow the `writing-skills` skill for creating new skills
+4. Use the `testing-skills-with-subagents` skill to validate quality
+5. Submit a PR

 See `skills/writing-skills/SKILL.md` for the complete guide.

--- a/RELEASE-NOTES.md
+++ b/RELEASE-NOTES.md
@@ -1,102 +1,5 @@
 # Superpowers Release Notes

-## v4.0.0 (2025-12-17)
-
-### New Features
-
-**Two-stage code review in subagent-driven-development**
-
-Subagent workflows now use two separate review stages after each task:
-
-1. **Spec compliance review** - Skeptical reviewer verifies implementation matches spec exactly. Catches missing requirements AND over-building. Won't trust implementer's report—reads actual code.
-
-2. **Code quality review** - Only runs after spec compliance passes. Reviews for clean code, test coverage, maintainability.
-
-This catches the common failure mode where code is well-written but doesn't match what was requested. Reviews are loops, not one-shot: if reviewer finds issues, implementer fixes them, then reviewer checks again.
-
-Other subagent workflow improvements:
- Controller provides full task text to workers (not file references)
- Workers can ask clarifying questions before AND during work
- Self-review checklist before reporting completion
- Plan read once at start, extracted to TodoWrite
-
-New prompt templates in `skills/subagent-driven-development/`:
- `implementer-prompt.md` - Includes self-review checklist, encourages questions
- `spec-reviewer-prompt.md` - Skeptical verification against requirements
- `code-quality-reviewer-prompt.md` - Standard code review
-
-**Debugging techniques consolidated with tools**
-
-`systematic-debugging` now bundles supporting techniques and tools:
- `root-cause-tracing.md` - Trace bugs backward through call stack
- `defense-in-depth.md` - Add validation at multiple layers
- `condition-based-waiting.md` - Replace arbitrary timeouts with condition polling
- `find-polluter.sh` - Bisection script to find which test creates pollution
- `condition-based-waiting-example.ts` - Complete implementation from real debugging session
-
-**Testing anti-patterns reference**
-
-`test-driven-development` now includes `testing-anti-patterns.md` covering:
- Testing mock behavior instead of real behavior
- Adding test-only methods to production classes
- Mocking without understanding dependencies
- Incomplete mocks that hide structural assumptions
-
-**Skill test infrastructure**
-
-Three new test frameworks for validating skill behavior:
-
-`tests/skill-triggering/` - Validates skills trigger from naive prompts without explicit naming. Tests 6 skills to ensure descriptions alone are sufficient.
-
-`tests/claude-code/` - Integration tests using `claude -p` for headless testing. Verifies skill usage via session transcript (JSONL) analysis. Includes `analyze-token-usage.py` for cost tracking.
-
-`tests/subagent-driven-dev/` - End-to-end workflow validation with two complete test projects:
- `go-fractals/` - CLI tool with Sierpinski/Mandelbrot (10 tasks)
- `svelte-todo/` - CRUD app with localStorage and Playwright (12 tasks)
-
-### Major Changes
-
-**DOT flowcharts as executable specifications**
-
-Rewrote key skills using DOT/GraphViz flowcharts as the authoritative process definition. Prose becomes supporting content.
-
-**The Description Trap** (documented in `writing-skills`): Discovered that skill descriptions override flowchart content when descriptions contain workflow summaries. Claude follows the short description instead of reading the detailed flowchart. Fix: descriptions must be trigger-only ("Use when X") with no process details.
-
-**Skill priority in using-superpowers**
-
-When multiple skills apply, process skills (brainstorming, debugging) now explicitly come before implementation skills. "Build X" triggers brainstorming first, then domain skills.
-
-**brainstorming trigger strengthened**
-
-Description changed to imperative: "You MUST use this before any creative work—creating features, building components, adding functionality, or modifying behavior."
-
-### Breaking Changes
-
-**Skill consolidation** - Six standalone skills merged:
- `root-cause-tracing`, `defense-in-depth`, `condition-based-waiting` → bundled in `systematic-debugging/`
- `testing-skills-with-subagents` → bundled in `writing-skills/`
- `testing-anti-patterns` → bundled in `test-driven-development/`
- `sharing-skills` removed (obsolete)
-
-### Other Improvements
-
- **render-graphs.js** - Tool to extract DOT diagrams from skills and render to SVG
- **Rationalizations table** in using-superpowers - Scannable format including new entries: "I need more context first", "Let me explore first", "This feels productive"
- **docs/testing.md** - Guide to testing skills with Claude Code integration tests
-
---
-
-## v3.6.2 (2025-12-03)
-
-### Fixed
-
- **Linux Compatibility**: Fixed polyglot hook wrapper (`run-hook.cmd`) to use POSIX-compliant syntax
-  - Replaced bash-specific `${BASH_SOURCE[0]:-$0}` with standard `$0` on line 16
-  - Resolves "Bad substitution" error on Ubuntu/Debian systems where `/bin/sh` is dash
-  - Fixes #141
-
---
-
 ## v3.5.1 (2025-11-24)

 ### Changed
@@ -184,9 +87,9 @@ Description changed to imperative: "You MUST use this before any creative work
 - Updated terminology: "Superpowers skills" instead of "Core skills"

 ### Files Added
- `.codex/INSTALL.md` - Installation guide for Codex users
- `.codex/superpowers-bootstrap.md` - Bootstrap instructions with Codex adaptations
- `.codex/superpowers-codex` - Unified Node.js executable with all functionality
+- `codex/INSTALL.md` - Installation guide for Codex users
+- `codex/superpowers-bootstrap.md` - Bootstrap instructions with Codex adaptations
+- `scripts/superpowers-codex` - Unified Node.js executable with all functionality

 **Note:** Codex support is experimental. The integration provides core superpowers functionality but may require refinement based on user feedback.

--- a/agents/code-reviewer.md
+++ b/agents/code-reviewer.md
@@ -2,6 +2,7 @@
 name: code-reviewer
 description: |
  Use this agent when a major project step has been completed and needs to be reviewed against the original plan and coding standards. Examples: <example>Context: The user is creating a code-review agent that should be called after a logical chunk of code is written. user: "I've finished implementing the user authentication system as outlined in step 3 of our plan" assistant: "Great work! Now let me use the code-reviewer agent to review the implementation against our plan and coding standards" <commentary>Since a major project step has been completed, use the code-reviewer agent to validate the work against the plan and identify any issues.</commentary></example> <example>Context: User has completed a significant feature implementation. user: "The API endpoints for the task management system are now complete - that covers step 2 from our architecture document" assistant: "Excellent! Let me have the code-reviewer agent examine this implementation to ensure it aligns with our plan and follows best practices" <commentary>A numbered step from the planning document has been completed, so the code-reviewer agent should review the work.</commentary></example>
+model: sonnet
 ---

 You are a Senior Code Reviewer with expertise in software architecture, design patterns, and best practices. Your role is to review completed project steps against original plans and ensure code quality standards are met.
--- a/commands/brainstorm.md
+++ b/commands/brainstorm.md
@@ -1,5 +1,5 @@
 ---
-description: "You MUST use this before any creative work - creating features, building components, adding functionality, or modifying behavior. Explores requirements and design before implementation."
+description: Interactive design refinement using Socratic method
 ---

 Use and follow the brainstorming skill exactly as written
--- a/docs/windows/polyglot-hooks.md
+++ b/docs/windows/polyglot-hooks.md
@@ -1,212 +0,0 @@
-# Cross-Platform Polyglot Hooks for Claude Code
-
-Claude Code plugins need hooks that work on Windows, macOS, and Linux. This document explains the polyglot wrapper technique that makes this possible.
-
-## The Problem
-
-Claude Code runs hook commands through the system's default shell:
- **Windows**: CMD.exe
- **macOS/Linux**: bash or sh
-
-This creates several challenges:
-
-1. **Script execution**: Windows CMD can't execute `.sh` files directly - it tries to open them in a text editor
-2. **Path format**: Windows uses backslashes (`C:\path`), Unix uses forward slashes (`/path`)
-3. **Environment variables**: `$VAR` syntax doesn't work in CMD
-4. **No `bash` in PATH**: Even with Git Bash installed, `bash` isn't in the PATH when CMD runs
-
-## The Solution: Polyglot `.cmd` Wrapper
-
-A polyglot script is valid syntax in multiple languages simultaneously. Our wrapper is valid in both CMD and bash:
-
-```cmd
-: << 'CMDBLOCK'
-@echo off
-"C:\Program Files\Git\bin\bash.exe" -l -c "\"$(cygpath -u \"$CLAUDE_PLUGIN_ROOT\")/hooks/session-start.sh\""
-exit /b
-CMDBLOCK
-
-# Unix shell runs from here
-"${CLAUDE_PLUGIN_ROOT}/hooks/session-start.sh"
-```
-
-### How It Works
-
-#### On Windows (CMD.exe)
-
-1. `: << 'CMDBLOCK'` - CMD sees `:` as a label (like `:label`) and ignores `<< 'CMDBLOCK'`
-2. `@echo off` - Suppresses command echoing
-3. The bash.exe command runs with:
-   - `-l` (login shell) to get proper PATH with Unix utilities
-   - `cygpath -u` converts Windows path to Unix format (`C:\foo` → `/c/foo`)
-4. `exit /b` - Exits the batch script, stopping CMD here
-5. Everything after `CMDBLOCK` is never reached by CMD
-
-#### On Unix (bash/sh)
-
-1. `: << 'CMDBLOCK'` - `:` is a no-op, `<< 'CMDBLOCK'` starts a heredoc
-2. Everything until `CMDBLOCK` is consumed by the heredoc (ignored)
-3. `# Unix shell runs from here` - Comment
-4. The script runs directly with the Unix path
-
-## File Structure
-
-```
-hooks/
-├── hooks.json           # Points to the .cmd wrapper
-├── session-start.cmd    # Polyglot wrapper (cross-platform entry point)
-└── session-start.sh     # Actual hook logic (bash script)
-```
-
-### hooks.json
-
-```json
-{
-  "hooks": {
-    "SessionStart": [
-      {
-        "matcher": "startup|resume|clear|compact",
-        "hooks": [
-          {
-            "type": "command",
-            "command": "\"${CLAUDE_PLUGIN_ROOT}/hooks/session-start.cmd\""
-          }
-        ]
-      }
-    ]
-  }
-}
-```
-
-Note: The path must be quoted because `${CLAUDE_PLUGIN_ROOT}` may contain spaces on Windows (e.g., `C:\Program Files\...`).
-
-## Requirements
-
-### Windows
- **Git for Windows** must be installed (provides `bash.exe` and `cygpath`)
- Default installation path: `C:\Program Files\Git\bin\bash.exe`
- If Git is installed elsewhere, the wrapper needs modification
-
-### Unix (macOS/Linux)
- Standard bash or sh shell
- The `.cmd` file must have execute permission (`chmod +x`)
-
-## Writing Cross-Platform Hook Scripts
-
-Your actual hook logic goes in the `.sh` file. To ensure it works on Windows (via Git Bash):
-
-### Do:
- Use pure bash builtins when possible
- Use `$(command)` instead of backticks
- Quote all variable expansions: `"$VAR"`
- Use `printf` or here-docs for output
-
-### Avoid:
- External commands that may not be in PATH (sed, awk, grep)
- If you must use them, they're available in Git Bash but ensure PATH is set up (use `bash -l`)
-
-### Example: JSON Escaping Without sed/awk
-
-Instead of:
-```bash
-escaped=$(echo "$content" | sed 's/\\/\\\\/g' | sed 's/"/\\"/g' | awk '{printf "%s\\n", $0}')
-```
-
-Use pure bash:
-```bash
-escape_for_json() {
-    local input="$1"
-    local output=""
-    local i char
-    for (( i=0; i<${#input}; i++ )); do
-        char="${input:$i:1}"
-        case "$char" in
-            $'\\') output+='\\' ;;
-            '"') output+='\"' ;;
-            $'\n') output+='\n' ;;
-            $'\r') output+='\r' ;;
-            $'\t') output+='\t' ;;
-            *) output+="$char" ;;
-        esac
-    done
-    printf '%s' "$output"
-}
-```
-
-## Reusable Wrapper Pattern
-
-For plugins with multiple hooks, you can create a generic wrapper that takes the script name as an argument:
-
-### run-hook.cmd
-```cmd
-: << 'CMDBLOCK'
-@echo off
-set "SCRIPT_DIR=%~dp0"
-set "SCRIPT_NAME=%~1"
-"C:\Program Files\Git\bin\bash.exe" -l -c "cd \"$(cygpath -u \"%SCRIPT_DIR%\")\" && \"./%SCRIPT_NAME%\""
-exit /b
-CMDBLOCK
-
-# Unix shell runs from here
-SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]:-$0}")" && pwd)"
-SCRIPT_NAME="$1"
-shift
-"${SCRIPT_DIR}/${SCRIPT_NAME}" "$@"
-```
-
-### hooks.json using the reusable wrapper
-```json
-{
-  "hooks": {
-    "SessionStart": [
-      {
-        "matcher": "startup",
-        "hooks": [
-          {
-            "type": "command",
-            "command": "\"${CLAUDE_PLUGIN_ROOT}/hooks/run-hook.cmd\" session-start.sh"
-          }
-        ]
-      }
-    ],
-    "PreToolUse": [
-      {
-        "matcher": "Bash",
-        "hooks": [
-          {
-            "type": "command",
-            "command": "\"${CLAUDE_PLUGIN_ROOT}/hooks/run-hook.cmd\" validate-bash.sh"
-          }
-        ]
-      }
-    ]
-  }
-}
-```
-
-## Troubleshooting
-
-### "bash is not recognized"
-CMD can't find bash. The wrapper uses the full path `C:\Program Files\Git\bin\bash.exe`. If Git is installed elsewhere, update the path.
-
-### "cygpath: command not found" or "dirname: command not found"
-Bash isn't running as a login shell. Ensure `-l` flag is used.
-
-### Path has weird `\/` in it
-`${CLAUDE_PLUGIN_ROOT}` expanded to a Windows path ending with backslash, then `/hooks/...` was appended. Use `cygpath` to convert the entire path.
-
-### Script opens in text editor instead of running
-The hooks.json is pointing directly to the `.sh` file. Point to the `.cmd` wrapper instead.
-
-### Works in terminal but not as hook
-Claude Code may run hooks differently. Test by simulating the hook environment:
-```powershell
-$env:CLAUDE_PLUGIN_ROOT = "C:\path\to\plugin"
-cmd /c "C:\path\to\plugin\hooks\session-start.cmd"
-```
-
-## Related Issues
-
- [anthropics/claude-code#9758](https://github.com/anthropics/claude-code/issues/9758) - .sh scripts open in editor on Windows
- [anthropics/claude-code#3417](https://github.com/anthropics/claude-code/issues/3417) - Hooks don't work on Windows
- [anthropics/claude-code#6023](https://github.com/anthropics/claude-code/issues/6023) - CLAUDE_PROJECT_DIR not found
--- a/hooks/hooks.json
+++ b/hooks/hooks.json
@@ -6,7 +6,7 @@
        "hooks": [
          {
            "type": "command",
-            "command": "\"${CLAUDE_PLUGIN_ROOT}/hooks/run-hook.cmd\" session-start.sh"
+            "command": "${CLAUDE_PLUGIN_ROOT}/hooks/session-start.sh"
          }
        ]
      }
--- a/hooks/run-hook.cmd
+++ b/hooks/run-hook.cmd
@@ -1,19 +0,0 @@
-: << 'CMDBLOCK'
-@echo off
-REM Polyglot wrapper: runs .sh scripts cross-platform
-REM Usage: run-hook.cmd <script-name> [args...]
-REM The script should be in the same directory as this wrapper
-
-if "%~1"=="" (
-    echo run-hook.cmd: missing script name >&2
-    exit /b 1
-)
-"C:\Program Files\Git\bin\bash.exe" -l "%~dp0%~1" %2 %3 %4 %5 %6 %7 %8 %9
-exit /b
-CMDBLOCK
-
-# Unix shell runs from here
-SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
-SCRIPT_NAME="$1"
-shift
-"${SCRIPT_DIR}/${SCRIPT_NAME}" "$@"
--- a/hooks/session-start.sh
+++ b/hooks/session-start.sh
@@ -17,27 +17,9 @@ fi
 # Read using-superpowers content
 using_superpowers_content=$(cat "${PLUGIN_ROOT}/skills/using-superpowers/SKILL.md" 2>&1 || echo "Error reading using-superpowers skill")

-# Escape outputs for JSON using pure bash
-escape_for_json() {
-    local input="$1"
-    local output=""
-    local i char
-    for (( i=0; i<${#input}; i++ )); do
-        char="${input:$i:1}"
-        case "$char" in
-            $'\\') output+='\\' ;;
-            '"') output+='\"' ;;
-            $'\n') output+='\n' ;;
-            $'\r') output+='\r' ;;
-            $'\t') output+='\t' ;;
-            *) output+="$char" ;;
-        esac
-    done
-    printf '%s' "$output"
-}
-
-using_superpowers_escaped=$(escape_for_json "$using_superpowers_content")
-warning_escaped=$(escape_for_json "$warning_message")
+# Escape outputs for JSON
+using_superpowers_escaped=$(echo "$using_superpowers_content" | sed 's/\\/\\\\/g' | sed 's/"/\\"/g' | awk '{printf "%s\\n", $0}')
+warning_escaped=$(echo "$warning_message" | sed 's/\\/\\\\/g' | sed 's/"/\\"/g' | awk '{printf "%s\\n", $0}')

 # Output context injection as JSON
 cat <<EOF
--- a/skills/brainstorming/SKILL.md
+++ b/skills/brainstorming/SKILL.md
@@ -1,6 +1,6 @@
 ---
 name: brainstorming
-description: "You MUST use this before any creative work - creating features, building components, adding functionality, or modifying behavior. Explores user intent, requirements and design before implementation."
+description: Use when creating or developing, before writing code or implementation plans - refines rough ideas into fully-formed designs through collaborative questioning, alternative exploration, and incremental validation. Don't use during clear 'mechanical' processes
 ---

 # Brainstorming Ideas Into Designs
--- a/skills/systematic-debugging/condition-based-waiting.md
+++ b/skills/systematic-debugging/condition-based-waiting.md
@@ -1,3 +1,8 @@
+---
+name: condition-based-waiting
+description: Use when tests have race conditions, timing dependencies, or inconsistent pass/fail behavior - replaces arbitrary timeouts with condition polling to wait for actual state changes, eliminating flaky tests from timing guesses
+---
+
 # Condition-Based Waiting

 ## Overview
@@ -79,7 +84,7 @@ async function waitFor<T>(
 }
 ```

-See `condition-based-waiting-example.ts` in this directory for complete implementation with domain-specific helpers (`waitForEvent`, `waitForEventCount`, `waitForEventMatch`) from actual debugging session.
+See @example.ts for complete implementation with domain-specific helpers (`waitForEvent`, `waitForEventCount`, `waitForEventMatch`) from actual debugging session.

 ## Common Mistakes

--- a/skills/systematic-debugging/condition-based-waiting-example.ts
+++ b/skills/systematic-debugging/condition-based-waiting-example.ts
--- a/skills/systematic-debugging/defense-in-depth.md
+++ b/skills/systematic-debugging/defense-in-depth.md
@@ -1,3 +1,8 @@
+---
+name: defense-in-depth
+description: Use when invalid data causes failures deep in execution, requiring validation at multiple system layers - validates at every layer data passes through to make bugs structurally impossible
+---
+
 # Defense-in-Depth Validation

 ## Overview
--- a/skills/dispatching-parallel-agents/SKILL.md
+++ b/skills/dispatching-parallel-agents/SKILL.md
@@ -1,6 +1,6 @@
 ---
 name: dispatching-parallel-agents
-description: Use when facing 2+ independent tasks that can be worked on without shared state or sequential dependencies
+description: Use when facing 3+ independent failures that can be investigated without shared state or dependencies - dispatches multiple Claude agents to investigate and fix independent problems concurrently
 ---

 # Dispatching Parallel Agents
--- a/skills/executing-plans/SKILL.md
+++ b/skills/executing-plans/SKILL.md
@@ -1,6 +1,6 @@
 ---
 name: executing-plans
-description: Use when you have a written implementation plan to execute in a separate session with review checkpoints
+description: Use when partner provides a complete implementation plan to execute in controlled batches with review checkpoints - loads plan, reviews critically, executes tasks in batches, reports for review between batches
 ---

 # Executing Plans
--- a/skills/requesting-code-review/SKILL.md
+++ b/skills/requesting-code-review/SKILL.md
@@ -1,6 +1,6 @@
 ---
 name: requesting-code-review
-description: Use when completing tasks, implementing major features, or before merging to verify work meets requirements
+description: Use when completing tasks, implementing major features, or before merging to verify work meets requirements - dispatches superpowers:code-reviewer subagent to review implementation against plan or requirements before proceeding
 ---

 # Requesting Code Review
--- a/skills/systematic-debugging/root-cause-tracing.md
+++ b/skills/systematic-debugging/root-cause-tracing.md
@@ -1,3 +1,8 @@
+---
+name: root-cause-tracing
+description: Use when errors occur deep in execution and you need to trace back to find the original trigger - systematically traces bugs backward through call stack, adding instrumentation when needed, to identify source of invalid data or incorrect behavior
+---
+
 # Root Cause Tracing

 ## Overview
@@ -98,7 +103,7 @@ npm test 2>&1 | grep 'DEBUG git init'

 If something appears during tests but you don't know which test:

-Use the bisection script `find-polluter.sh` in this directory:
+Use the bisection script: @find-polluter.sh

 ```bash
 ./find-polluter.sh '.git' 'src/**/*.test.ts'
--- a/skills/systematic-debugging/find-polluter.sh
+++ b/skills/systematic-debugging/find-polluter.sh
--- a/skills/sharing-skills/SKILL.md
+++ b/skills/sharing-skills/SKILL.md
@@ -0,0 +1,194 @@
+---
+name: sharing-skills
+description: Use when you've developed a broadly useful skill and want to contribute it upstream via pull request - guides process of branching, committing, pushing, and creating PR to contribute skills back to upstream repository
+---
+
+# Sharing Skills
+
+## Overview
+
+Contribute skills from your local branch back to the upstream repository.
+
+**Workflow:** Branch → Edit/Create skill → Commit → Push → PR
+
+## When to Share
+
+**Share when:**
+- Skill applies broadly (not project-specific)
+- Pattern/technique others would benefit from
+- Well-tested and documented
+- Follows writing-skills guidelines
+
+**Keep personal when:**
+- Project-specific or organization-specific
+- Experimental or unstable
+- Contains sensitive information
+- Too narrow/niche for general use
+
+## Prerequisites
+
+- `gh` CLI installed and authenticated
+- Working directory is `~/.config/superpowers/skills/` (your local clone)
+- **REQUIRED:** Skill has been tested using writing-skills TDD process
+
+## Sharing Workflow
+
+### 1. Ensure You're on Main and Synced
+
+```bash
+cd ~/.config/superpowers/skills/
+git checkout main
+git pull upstream main
+git push origin main  # Push to your fork
+```
+
+### 2. Create Feature Branch
+
+```bash
+# Branch name: add-skillname-skill
+skill_name="your-skill-name"
+git checkout -b "add-${skill_name}-skill"
+```
+
+### 3. Create or Edit Skill
+
+```bash
+# Work on your skill in skills/
+# Create new skill or edit existing one
+# Skill should be in skills/category/skill-name/SKILL.md
+```
+
+### 4. Commit Changes
+
+```bash
+# Add and commit
+git add skills/your-skill-name/
+git commit -m "Add ${skill_name} skill
+
+$(cat <<'EOF'
+Brief description of what this skill does and why it's useful.
+
+Tested with: [describe testing approach]
+EOF
+)"
+```
+
+### 5. Push to Your Fork
+
+```bash
+git push -u origin "add-${skill_name}-skill"
+```
+
+### 6. Create Pull Request
+
+```bash
+# Create PR to upstream using gh CLI
+gh pr create \
+  --repo upstream-org/upstream-repo \
+  --title "Add ${skill_name} skill" \
+  --body "$(cat <<'EOF'
+## Summary
+Brief description of the skill and what problem it solves.
+
+## Testing
+Describe how you tested this skill (pressure scenarios, baseline tests, etc.).
+
+## Context
+Any additional context about why this skill is needed and how it should be used.
+EOF
+)"
+```
+
+## Complete Example
+
+Here's a complete example of sharing a skill called "async-patterns":
+
+```bash
+# 1. Sync with upstream
+cd ~/.config/superpowers/skills/
+git checkout main
+git pull upstream main
+git push origin main
+
+# 2. Create branch
+git checkout -b "add-async-patterns-skill"
+
+# 3. Create/edit the skill
+# (Work on skills/async-patterns/SKILL.md)
+
+# 4. Commit
+git add skills/async-patterns/
+git commit -m "Add async-patterns skill
+
+Patterns for handling asynchronous operations in tests and application code.
+
+Tested with: Multiple pressure scenarios testing agent compliance."
+
+# 5. Push
+git push -u origin "add-async-patterns-skill"
+
+# 6. Create PR
+gh pr create \
+  --repo upstream-org/upstream-repo \
+  --title "Add async-patterns skill" \
+  --body "## Summary
+Patterns for handling asynchronous operations correctly in tests and application code.
+
+## Testing
+Tested with multiple application scenarios. Agents successfully apply patterns to new code.
+
+## Context
+Addresses common async pitfalls like race conditions, improper error handling, and timing issues."
+```
+
+## After PR is Merged
+
+Once your PR is merged:
+
+1. Sync your local main branch:
+```bash
+cd ~/.config/superpowers/skills/
+git checkout main
+git pull upstream main
+git push origin main
+```
+
+2. Delete the feature branch:
+```bash
+git branch -d "add-${skill_name}-skill"
+git push origin --delete "add-${skill_name}-skill"
+```
+
+## Troubleshooting
+
+**"gh: command not found"**
+- Install GitHub CLI: https://cli.github.com/
+- Authenticate: `gh auth login`
+
+**"Permission denied (publickey)"**
+- Check SSH keys: `gh auth status`
+- Set up SSH: https://docs.github.com/en/authentication
+
+**"Skill already exists"**
+- You're creating a modified version
+- Consider different skill name or coordinate with the skill's maintainer
+
+**PR merge conflicts**
+- Rebase on latest upstream: `git fetch upstream && git rebase upstream/main`
+- Resolve conflicts
+- Force push: `git push -f origin your-branch`
+
+## Multi-Skill Contributions
+
+**Do NOT batch multiple skills in one PR.**
+
+Each skill should:
+- Have its own feature branch
+- Have its own PR
+- Be independently reviewable
+
+**Why?** Individual skills can be reviewed, iterated, and merged independently.
+
+## Related Skills
+
+- **writing-skills** - REQUIRED: How to create well-tested skills before sharing
--- a/skills/subagent-driven-development/SKILL.md
+++ b/skills/subagent-driven-development/SKILL.md
@@ -1,92 +1,256 @@
 ---
 name: subagent-driven-development
-description: Use when executing implementation plans with independent tasks in the current session
+description: Use when executing implementation plans with independent tasks in the current session - dispatches fresh subagent for each task with code review between tasks, enabling fast iteration with quality gates
 ---

 # Subagent-Driven Development

-Execute plan by dispatching fresh subagent per task, with two-stage review after each: spec compliance review first, then code quality review.
+Execute plan by dispatching fresh subagent per task, with code review after each.

-**Core principle:** Fresh subagent per task + two-stage review (spec then quality) = high quality, fast iteration
+**Core principle:** Fresh subagent per task + review between tasks = high quality, fast iteration

-## When to Use
-
-```dot
-digraph when_to_use {
-    "Have implementation plan?" [shape=diamond];
-    "Tasks mostly independent?" [shape=diamond];
-    "Stay in this session?" [shape=diamond];
-    "subagent-driven-development" [shape=box];
-    "executing-plans" [shape=box];
-    "Manual execution or brainstorm first" [shape=box];
-
-    "Have implementation plan?" -> "Tasks mostly independent?" [label="yes"];
-    "Have implementation plan?" -> "Manual execution or brainstorm first" [label="no"];
-    "Tasks mostly independent?" -> "Stay in this session?" [label="yes"];
-    "Tasks mostly independent?" -> "Manual execution or brainstorm first" [label="no - tightly coupled"];
-    "Stay in this session?" -> "subagent-driven-development" [label="yes"];
-    "Stay in this session?" -> "executing-plans" [label="no - parallel session"];
-}
-```
+## Overview

 **vs. Executing Plans (parallel session):**
 - Same session (no context switch)
 - Fresh subagent per task (no context pollution)
- Two-stage review after each task: spec compliance first, then code quality
+- Code review after each task (catch issues early)
 - Faster iteration (no human-in-loop between tasks)

+**When to use:**
+- Staying in this session
+- Tasks are mostly independent
+- Want continuous progress with quality gates
+
+**When NOT to use:**
+- Need to review plan first (use executing-plans)
+- Tasks are tightly coupled (manual execution better)
+- Plan needs revision (brainstorm first)
+
 ## The Process

-```dot
-digraph process {
-    rankdir=TB;
+### 1. Load Plan

-    subgraph cluster_per_task {
-        label="Per Task";
-        "Dispatch implementer subagent (./implementer-prompt.md)" [shape=box];
-        "Implementer subagent asks questions?" [shape=diamond];
-        "Answer questions, provide context" [shape=box];
-        "Implementer subagent implements, tests, commits, self-reviews" [shape=box];
-        "Dispatch spec reviewer subagent (./spec-reviewer-prompt.md)" [shape=box];
-        "Spec reviewer subagent confirms code matches spec?" [shape=diamond];
-        "Implementer subagent fixes spec gaps" [shape=box];
-        "Dispatch code quality reviewer subagent (./code-quality-reviewer-prompt.md)" [shape=box];
-        "Code quality reviewer subagent approves?" [shape=diamond];
-        "Implementer subagent fixes quality issues" [shape=box];
-        "Mark task complete in TodoWrite" [shape=box];
-    }
+1. Read plan file once
+2. Extract all tasks (full text of each)
+3. For each task, note scene-setting context:
+   - Where it fits in overall plan
+   - Dependencies on previous tasks
+   - Architectural context
+   - Relevant patterns or existing code to follow
+4. Create TodoWrite with all tasks

-    "Read plan, extract all tasks with full text, note context, create TodoWrite" [shape=box];
-    "More tasks remain?" [shape=diamond];
-    "Dispatch final code reviewer subagent for entire implementation" [shape=box];
-    "Use superpowers:finishing-a-development-branch" [shape=box style=filled fillcolor=lightgreen];
+### 2. Execute Task with Subagent

-    "Read plan, extract all tasks with full text, note context, create TodoWrite" -> "Dispatch implementer subagent (./implementer-prompt.md)";
-    "Dispatch implementer subagent (./implementer-prompt.md)" -> "Implementer subagent asks questions?";
-    "Implementer subagent asks questions?" -> "Answer questions, provide context" [label="yes"];
-    "Answer questions, provide context" -> "Dispatch implementer subagent (./implementer-prompt.md)";
-    "Implementer subagent asks questions?" -> "Implementer subagent implements, tests, commits, self-reviews" [label="no"];
-    "Implementer subagent implements, tests, commits, self-reviews" -> "Dispatch spec reviewer subagent (./spec-reviewer-prompt.md)";
-    "Dispatch spec reviewer subagent (./spec-reviewer-prompt.md)" -> "Spec reviewer subagent confirms code matches spec?";
-    "Spec reviewer subagent confirms code matches spec?" -> "Implementer subagent fixes spec gaps" [label="no"];
-    "Implementer subagent fixes spec gaps" -> "Dispatch spec reviewer subagent (./spec-reviewer-prompt.md)" [label="re-review"];
-    "Spec reviewer subagent confirms code matches spec?" -> "Dispatch code quality reviewer subagent (./code-quality-reviewer-prompt.md)" [label="yes"];
-    "Dispatch code quality reviewer subagent (./code-quality-reviewer-prompt.md)" -> "Code quality reviewer subagent approves?";
-    "Code quality reviewer subagent approves?" -> "Implementer subagent fixes quality issues" [label="no"];
-    "Implementer subagent fixes quality issues" -> "Dispatch code quality reviewer subagent (./code-quality-reviewer-prompt.md)" [label="re-review"];
-    "Code quality reviewer subagent approves?" -> "Mark task complete in TodoWrite" [label="yes"];
-    "Mark task complete in TodoWrite" -> "More tasks remain?";
-    "More tasks remain?" -> "Dispatch implementer subagent (./implementer-prompt.md)" [label="yes"];
-    "More tasks remain?" -> "Dispatch final code reviewer subagent for entire implementation" [label="no"];
-    "Dispatch final code reviewer subagent for entire implementation" -> "Use superpowers:finishing-a-development-branch";
-}
+For each task:
+
+**1. Prepare task context:**
+- Get the full text of Task N (already extracted in Step 1)
+- Get the scene-setting context (already noted in Step 1)
+
+**2. Dispatch fresh subagent with full task text:**
+```
+Task tool (general-purpose):
+  description: "Implement Task N: [task name]"
+  prompt: |
+    You are implementing Task N: [task name]
+
+    ## Task Description
+
+    [FULL TEXT of task from plan - paste it here, don't make subagent read file]
+
+    ## Context
+
+    [Scene-setting: where this fits, dependencies, architectural context]
+
+    ## Before You Begin
+
+    If you have questions about:
+    - The requirements or acceptance criteria
+    - The approach or implementation strategy
+    - Dependencies or assumptions
+    - Anything unclear in the task description
+
+    **Ask them now.** Raise any concerns before starting work.
+
+    ## Your Job
+
+    Once you're clear on requirements:
+    1. Implement exactly what the task specifies
+    2. Write tests (following TDD if task says to)
+    3. Verify implementation works
+    4. Commit your work
+    5. Self-review (see below)
+    6. Report back
+
+    Work from: [directory]
+
+    **While you work:** If you encounter something unexpected or unclear, **ask questions**.
+    It's always OK to pause and clarify. Don't guess or make assumptions.
+
+    ## Before Reporting Back: Self-Review
+
+    Review your work with fresh eyes. Ask yourself:
+
+    **Completeness:**
+    - Did I fully implement everything in the spec?
+    - Did I miss any requirements?
+    - Are there edge cases I didn't handle?
+
+    **Quality:**
+    - Is this my best work?
+    - Are names clear and accurate (match what things do, not how they work)?
+    - Is the code clean and maintainable?
+
+    **Discipline:**
+    - Did I avoid overbuilding (YAGNI)?
+    - Did I only build what was requested?
+    - Did I follow existing patterns in the codebase?
+
+    **Testing:**
+    - Do tests actually verify behavior (not just mock behavior)?
+    - Did I follow TDD if required?
+    - Are tests comprehensive?
+
+    If you find issues during self-review, fix them now before reporting.
+
+    ## Report Format
+
+    When done, report:
+    - What you implemented
+    - What you tested and test results
+    - Files changed
+    - Self-review findings (if any)
+    - Any issues or concerns
 ```

-## Prompt Templates
+**3. Handle subagent response:**

- `./implementer-prompt.md` - Dispatch implementer subagent
- `./spec-reviewer-prompt.md` - Dispatch spec compliance reviewer subagent
- `./code-quality-reviewer-prompt.md` - Dispatch code quality reviewer subagent
+If subagent asks questions:
+- Answer clearly
+- Provide additional context if needed
+- Either continue conversation or re-dispatch with answers
+
+If subagent proceeds with implementation:
+- Review their report
+- Proceed to spec compliance review (Step 3)
+
+### 3. Spec Compliance Review
+
+**Purpose:** Verify implementer built what was requested (nothing more, nothing less)
+
+**Dispatch spec compliance reviewer:**
+```
+Task tool (general-purpose):
+  description: "Review spec compliance for Task N"
+  prompt: |
+    You are reviewing whether an implementation matches its specification.
+
+    ## What Was Requested
+
+    [FULL TEXT of task requirements]
+
+    ## What Implementer Claims They Built
+
+    [From implementer's report]
+
+    ## CRITICAL: Do Not Trust the Report
+
+    The implementer finished suspiciously quickly. Their report may be incomplete,
+    inaccurate, or optimistic. You MUST verify everything independently.
+
+    **DO NOT:**
+    - Take their word for what they implemented
+    - Trust their claims about completeness
+    - Accept their interpretation of requirements
+
+    **DO:**
+    - Read the actual code they wrote
+    - Compare actual implementation to requirements line by line
+    - Check for missing pieces they claimed to implement
+    - Look for extra features they didn't mention
+
+    ## Your Job
+
+    Read the implementation code and verify:
+
+    **Missing requirements:**
+    - Did they implement everything that was requested?
+    - Are there requirements they skipped or missed?
+    - Did they claim something works but didn't actually implement it?
+
+    **Extra/unneeded work:**
+    - Did they build things that weren't requested?
+    - Did they over-engineer or add unnecessary features?
+    - Did they add "nice to haves" that weren't in spec?
+
+    **Misunderstandings:**
+    - Did they interpret requirements differently than intended?
+    - Did they solve the wrong problem?
+    - Did they implement the right feature but wrong way?
+
+    **Verify by reading code, not by trusting report.**
+
+    Report:
+    - ✅ Spec compliant (if everything matches after code inspection)
+    - ❌ Issues found: [list specifically what's missing or extra, with file:line references]
+```
+
+**Review loop (must complete before Step 4):**
+1. Spec reviewer reports findings
+2. If issues found:
+   - Original implementer fixes issues
+   - Spec reviewer reviews again
+3. Repeat until spec compliant
+
+**Do NOT proceed to code quality review until spec compliance is ✅**
+
+### 4. Code Quality Review
+
+**Purpose:** Verify implementation is well-built (clean, tested, maintainable)
+
+**Only run after spec compliance review is complete.**
+
+**Dispatch code-reviewer subagent:**
+```
+Task tool (superpowers:code-reviewer):
+  Use template at requesting-code-review/code-reviewer.md
+
+  WHAT_WAS_IMPLEMENTED: [from implementer's report]
+  PLAN_OR_REQUIREMENTS: Task N from [plan-file]
+  BASE_SHA: [commit before task]
+  HEAD_SHA: [current commit]
+  DESCRIPTION: [task summary]
+```
+
+**Code reviewer returns:** Strengths, Issues (Critical/Important/Minor), Assessment
+
+**Review loop:**
+1. Code reviewer reports findings
+2. If issues found:
+   - Original implementer fixes issues
+   - Code reviewer reviews again
+3. Repeat until code quality approved
+
+### 5. Mark Complete, Next Task
+
+- Mark task as completed in TodoWrite
+- Move to next task
+- Repeat steps 2-5 for each remaining task
+
+### 6. Final Review
+
+After all tasks complete, dispatch final code-reviewer:
+- Reviews entire implementation
+- Checks all plan requirements met
+- Validates overall architecture
+
+### 7. Complete Development
+
+After final review passes:
+- Announce: "I'm using the finishing-a-development-branch skill to complete this work."
+- **REQUIRED SUB-SKILL:** Use superpowers:finishing-a-development-branch
+- Follow that skill to verify tests, present options, execute choice

 ## Example Workflow

@@ -229,12 +393,14 @@ Done!
 ## Integration

 **Required workflow skills:**
- **superpowers:writing-plans** - Creates the plan this skill executes
- **superpowers:requesting-code-review** - Code review template for reviewer subagents
- **superpowers:finishing-a-development-branch** - Complete development after all tasks
+- **writing-plans** - REQUIRED: Creates the plan that this skill executes
+- **requesting-code-review** - REQUIRED: Review after each task (see Step 3)
+- **finishing-a-development-branch** - REQUIRED: Complete development after all tasks (see Step 7)

-**Subagents should use:**
- **superpowers:test-driven-development** - Subagents follow TDD for each task
+**Subagents must use:**
+- **test-driven-development** - Subagents follow TDD for each task

 **Alternative workflow:**
- **superpowers:executing-plans** - Use for parallel session instead of same-session execution
+- **executing-plans** - Use for parallel session instead of same-session execution
+
+See code-reviewer template: requesting-code-review/code-reviewer.md
--- a/skills/subagent-driven-development/code-quality-reviewer-prompt.md
+++ b/skills/subagent-driven-development/code-quality-reviewer-prompt.md
@@ -1,20 +0,0 @@
-# Code Quality Reviewer Prompt Template
-
-Use this template when dispatching a code quality reviewer subagent.
-
-**Purpose:** Verify implementation is well-built (clean, tested, maintainable)
-
-**Only dispatch after spec compliance review passes.**
-
-```
-Task tool (superpowers:code-reviewer):
-  Use template at requesting-code-review/code-reviewer.md
-
-  WHAT_WAS_IMPLEMENTED: [from implementer's report]
-  PLAN_OR_REQUIREMENTS: Task N from [plan-file]
-  BASE_SHA: [commit before task]
-  HEAD_SHA: [current commit]
-  DESCRIPTION: [task summary]
-```
-
-**Code reviewer returns:** Strengths, Issues (Critical/Important/Minor), Assessment
--- a/skills/subagent-driven-development/implementer-prompt.md
+++ b/skills/subagent-driven-development/implementer-prompt.md
@@ -1,78 +0,0 @@
-# Implementer Subagent Prompt Template
-
-Use this template when dispatching an implementer subagent.
-
-```
-Task tool (general-purpose):
-  description: "Implement Task N: [task name]"
-  prompt: |
-    You are implementing Task N: [task name]
-
-    ## Task Description
-
-    [FULL TEXT of task from plan - paste it here, don't make subagent read file]
-
-    ## Context
-
-    [Scene-setting: where this fits, dependencies, architectural context]
-
-    ## Before You Begin
-
-    If you have questions about:
-    - The requirements or acceptance criteria
-    - The approach or implementation strategy
-    - Dependencies or assumptions
-    - Anything unclear in the task description
-
-    **Ask them now.** Raise any concerns before starting work.
-
-    ## Your Job
-
-    Once you're clear on requirements:
-    1. Implement exactly what the task specifies
-    2. Write tests (following TDD if task says to)
-    3. Verify implementation works
-    4. Commit your work
-    5. Self-review (see below)
-    6. Report back
-
-    Work from: [directory]
-
-    **While you work:** If you encounter something unexpected or unclear, **ask questions**.
-    It's always OK to pause and clarify. Don't guess or make assumptions.
-
-    ## Before Reporting Back: Self-Review
-
-    Review your work with fresh eyes. Ask yourself:
-
-    **Completeness:**
-    - Did I fully implement everything in the spec?
-    - Did I miss any requirements?
-    - Are there edge cases I didn't handle?
-
-    **Quality:**
-    - Is this my best work?
-    - Are names clear and accurate (match what things do, not how they work)?
-    - Is the code clean and maintainable?
-
-    **Discipline:**
-    - Did I avoid overbuilding (YAGNI)?
-    - Did I only build what was requested?
-    - Did I follow existing patterns in the codebase?
-
-    **Testing:**
-    - Do tests actually verify behavior (not just mock behavior)?
-    - Did I follow TDD if required?
-    - Are tests comprehensive?
-
-    If you find issues during self-review, fix them now before reporting.
-
-    ## Report Format
-
-    When done, report:
-    - What you implemented
-    - What you tested and test results
-    - Files changed
-    - Self-review findings (if any)
-    - Any issues or concerns
-```
--- a/skills/subagent-driven-development/spec-reviewer-prompt.md
+++ b/skills/subagent-driven-development/spec-reviewer-prompt.md
@@ -1,61 +0,0 @@
-# Spec Compliance Reviewer Prompt Template
-
-Use this template when dispatching a spec compliance reviewer subagent.
-
-**Purpose:** Verify implementer built what was requested (nothing more, nothing less)
-
-```
-Task tool (general-purpose):
-  description: "Review spec compliance for Task N"
-  prompt: |
-    You are reviewing whether an implementation matches its specification.
-
-    ## What Was Requested
-
-    [FULL TEXT of task requirements]
-
-    ## What Implementer Claims They Built
-
-    [From implementer's report]
-
-    ## CRITICAL: Do Not Trust the Report
-
-    The implementer finished suspiciously quickly. Their report may be incomplete,
-    inaccurate, or optimistic. You MUST verify everything independently.
-
-    **DO NOT:**
-    - Take their word for what they implemented
-    - Trust their claims about completeness
-    - Accept their interpretation of requirements
-
-    **DO:**
-    - Read the actual code they wrote
-    - Compare actual implementation to requirements line by line
-    - Check for missing pieces they claimed to implement
-    - Look for extra features they didn't mention
-
-    ## Your Job
-
-    Read the implementation code and verify:
-
-    **Missing requirements:**
-    - Did they implement everything that was requested?
-    - Are there requirements they skipped or missed?
-    - Did they claim something works but didn't actually implement it?
-
-    **Extra/unneeded work:**
-    - Did they build things that weren't requested?
-    - Did they over-engineer or add unnecessary features?
-    - Did they add "nice to haves" that weren't in spec?
-
-    **Misunderstandings:**
-    - Did they interpret requirements differently than intended?
-    - Did they solve the wrong problem?
-    - Did they implement the right feature but wrong way?
-
-    **Verify by reading code, not by trusting report.**
-
-    Report:
-    - ✅ Spec compliant (if everything matches after code inspection)
-    - ❌ Issues found: [list specifically what's missing or extra, with file:line references]
-```
--- a/skills/systematic-debugging/SKILL.md
+++ b/skills/systematic-debugging/SKILL.md
@@ -1,6 +1,6 @@
 ---
 name: systematic-debugging
-description: Use when encountering any bug, test failure, or unexpected behavior, before proposing fixes
+description: Use when encountering any bug, test failure, or unexpected behavior, before proposing fixes - four-phase framework (root cause investigation, pattern analysis, hypothesis testing, implementation) that ensures understanding before attempting solutions
 ---

 # Systematic Debugging
@@ -111,7 +111,7 @@ You MUST complete each phase before proceeding to the next.

   **WHEN error is deep in call stack:**

-   See `root-cause-tracing.md` in this directory for the complete backward tracing technique.
+   **REQUIRED SUB-SKILL:** Use superpowers:root-cause-tracing for backward tracing technique

   **Quick version:**
   - Where does bad value originate?
@@ -176,7 +176,7 @@ You MUST complete each phase before proceeding to the next.
   - Automated test if possible
   - One-off test script if no framework
   - MUST have before fixing
-   - Use the `superpowers:test-driven-development` skill for writing proper failing tests
+   - **REQUIRED SUB-SKILL:** Use superpowers:test-driven-development for writing proper failing tests

 2. **Implement Single Fix**
   - Address the root cause identified
@@ -275,17 +275,16 @@ If systematic investigation reveals issue is truly environmental, timing-depende

 **But:** 95% of "no root cause" cases are incomplete investigation.

-## Supporting Techniques
+## Integration with Other Skills

-These techniques are part of systematic debugging and available in this directory:
+**This skill requires using:**
+- **root-cause-tracing** - REQUIRED when error is deep in call stack (see Phase 1, Step 5)
+- **test-driven-development** - REQUIRED for creating failing test case (see Phase 4, Step 1)

- **`root-cause-tracing.md`** - Trace bugs backward through call stack to find original trigger
- **`defense-in-depth.md`** - Add validation at multiple layers after finding root cause
- **`condition-based-waiting.md`** - Replace arbitrary timeouts with condition polling
-
-**Related skills:**
- **superpowers:test-driven-development** - For creating failing test case (Phase 4, Step 1)
- **superpowers:verification-before-completion** - Verify fix worked before claiming success
+**Complementary skills:**
+- **defense-in-depth** - Add validation at multiple layers after finding root cause
+- **condition-based-waiting** - Replace arbitrary timeouts identified in Phase 2
+- **verification-before-completion** - Verify fix worked before claiming success

 ## Real-World Impact

--- a/skills/test-driven-development/SKILL.md
+++ b/skills/test-driven-development/SKILL.md
@@ -1,6 +1,6 @@
 ---
 name: test-driven-development
-description: Use when implementing any feature or bugfix, before writing implementation code
+description: Use when implementing any feature or bugfix, before writing implementation code - write the test first, watch it fail, write minimal code to pass; ensures tests actually verify behavior by requiring failure first
 ---

 # Test-Driven Development (TDD)
@@ -354,13 +354,6 @@ Bug found? Write failing test reproducing it. Follow TDD cycle. Test proves fix

 Never fix bugs without a test.

-## Testing Anti-Patterns
-
-When adding mocks or test utilities, read @testing-anti-patterns.md to avoid common pitfalls:
- Testing mock behavior instead of real behavior
- Adding test-only methods to production classes
- Mocking without understanding dependencies
-
 ## Final Rule

 ```
--- a/skills/test-driven-development/testing-anti-patterns.md
+++ b/skills/test-driven-development/testing-anti-patterns.md
@@ -1,6 +1,9 @@
-# Testing Anti-Patterns
+---
+name: testing-anti-patterns
+description: Use when writing or changing tests, adding mocks, or tempted to add test-only methods to production code - prevents testing mock behavior, production pollution with test-only methods, and mocking without understanding dependencies
+---

-**Load this reference when:** writing or changing tests, adding mocks, or tempted to add test-only methods to production code.
+# Testing Anti-Patterns

 ## Overview

--- a/skills/writing-skills/testing-skills-with-subagents.md
+++ b/skills/writing-skills/testing-skills-with-subagents.md
@@ -1,6 +1,9 @@
-# Testing Skills With Subagents
+---
+name: testing-skills-with-subagents
+description: Use when creating or editing skills, before deployment, to verify they work under pressure and resist rationalization - applies RED-GREEN-REFACTOR cycle to process documentation by running baseline without skill, writing to address failures, iterating to close loopholes
+---

-**Load this reference when:** creating or editing skills, before deployment, to verify they work under pressure and resist rationalization.
+# Testing Skills With Subagents

 ## Overview

--- a/skills/testing-skills-with-subagents/examples/CLAUDE_MD_TESTING.md
+++ b/skills/testing-skills-with-subagents/examples/CLAUDE_MD_TESTING.md
--- a/skills/using-superpowers/SKILL.md
+++ b/skills/using-superpowers/SKILL.md
@@ -1,6 +1,6 @@
 ---
 name: using-superpowers
-description: Use when starting any conversation - establishes how to find and use skills, requiring Skill tool invocation before ANY response including clarifying questions
+description: Use when starting any conversation - establishes mandatory workflows for finding and using skills, including using Skill tool before announcing usage, following brainstorming before coding, and creating TodoWrite todos for checklists
 ---

 <EXTREMELY-IMPORTANT>
@@ -11,70 +11,91 @@ IF A SKILL APPLIES TO YOUR TASK, YOU DO NOT HAVE A CHOICE. YOU MUST USE IT.
 This is not negotiable. This is not optional. You cannot rationalize your way out of this.
 </EXTREMELY-IMPORTANT>

-# Using Skills
+# Getting Started with Skills

-## The Rule
+## MANDATORY FIRST RESPONSE PROTOCOL

-**Check for skills BEFORE ANY RESPONSE.** This includes clarifying questions. Even 1% chance means invoke the Skill tool first.
+Before responding to ANY user message, you MUST complete this checklist:

-```dot
-digraph skill_flow {
-    "User message received" [shape=doublecircle];
-    "Might any skill apply?" [shape=diamond];
-    "Invoke Skill tool" [shape=box];
-    "Announce: 'Using [skill] to [purpose]'" [shape=box];
-    "Has checklist?" [shape=diamond];
-    "Create TodoWrite todo per item" [shape=box];
-    "Follow skill exactly" [shape=box];
-    "Respond (including clarifications)" [shape=doublecircle];
+1. ☐ List available skills in your mind
+2. ☐ Ask yourself: "Does ANY skill match this request?"
+3. ☐ If yes → Use the Skill tool to read and run the skill file
+4. ☐ Announce which skill you're using
+5. ☐ Follow the skill exactly

-    "User message received" -> "Might any skill apply?";
-    "Might any skill apply?" -> "Invoke Skill tool" [label="yes, even 1%"];
-    "Might any skill apply?" -> "Respond (including clarifications)" [label="definitely not"];
-    "Invoke Skill tool" -> "Announce: 'Using [skill] to [purpose]'";
-    "Announce: 'Using [skill] to [purpose]'" -> "Has checklist?";
-    "Has checklist?" -> "Create TodoWrite todo per item" [label="yes"];
-    "Has checklist?" -> "Follow skill exactly" [label="no"];
-    "Create TodoWrite todo per item" -> "Follow skill exactly";
-}
-```
+**Responding WITHOUT completing this checklist = automatic failure.**

-## Red Flags
+## Critical Rules

-These thoughts mean STOP—you're rationalizing:
+1. **Follow mandatory workflows.** Brainstorming before coding. Check for relevant skills before ANY task.

-| Thought | Reality |
-|---------|---------|
-| "This is just a simple question" | Questions are tasks. Check for skills. |
-| "I need more context first" | Skill check comes BEFORE clarifying questions. |
-| "Let me explore the codebase first" | Skills tell you HOW to explore. Check first. |
-| "I can check git/files quickly" | Files lack conversation context. Check for skills. |
-| "Let me gather information first" | Skills tell you HOW to gather information. |
-| "This doesn't need a formal skill" | If a skill exists, use it. |
-| "I remember this skill" | Skills evolve. Read current version. |
-| "This doesn't count as a task" | Action = task. Check for skills. |
-| "The skill is overkill" | Simple things become complex. Use it. |
-| "I'll just do this one thing first" | Check BEFORE doing anything. |
-| "This feels productive" | Undisciplined action wastes time. Skills prevent this. |
+2. Execute skills with the Skill tool

-## Skill Priority
+## Common Rationalizations That Mean You're About To Fail

-When multiple skills could apply, use this order:
+If you catch yourself thinking ANY of these thoughts, STOP. You are rationalizing. Check for and use the skill.

-1. **Process skills first** (brainstorming, debugging) - these determine HOW to approach the task
-2. **Implementation skills second** (frontend-design, mcp-builder) - these guide execution
+- "This is just a simple question" → WRONG. Questions are tasks. Check for skills.
+- "I can check git/files quickly" → WRONG. Files don't have conversation context. Check for skills.
+- "Let me gather information first" → WRONG. Skills tell you HOW to gather information. Check for skills.
+- "This doesn't need a formal skill" → WRONG. If a skill exists for it, use it.
+- "I remember this skill" → WRONG. Skills evolve. Run the current version.
+- "This doesn't count as a task" → WRONG. If you're taking action, it's a task. Check for skills.
+- "The skill is overkill for this" → WRONG. Skills exist because simple things become complex. Use it.
+- "I'll just do this one thing first" → WRONG. Check for skills BEFORE doing anything.

-"Let's build X" → brainstorming first, then implementation skills.
-"Fix this bug" → debugging first, then domain-specific skills.
+**Why:** Skills document proven techniques that save time and prevent mistakes. Not using available skills means repeating solved problems and making known errors.

-## Skill Types
+If a skill for your task exists, you must use it or you will fail at your task.

-**Rigid** (TDD, debugging): Follow exactly. Don't adapt away discipline.
+## Skills with Checklists

-**Flexible** (patterns): Adapt principles to context.
+If a skill has a checklist, YOU MUST create TodoWrite todos for EACH item.

-The skill itself tells you which.
+**Don't:**
+- Work through checklist mentally
+- Skip creating todos "to save time"
+- Batch multiple items into one todo
+- Mark complete without doing them

-## User Instructions
+**Why:** Checklists without TodoWrite tracking = steps get skipped. Every time. The overhead of TodoWrite is tiny compared to the cost of missing steps.

-Instructions say WHAT, not HOW. "Add X" or "Fix Y" doesn't mean skip workflows.
+## Announcing Skill Usage
+
+Before using a skill, announce that you are using it.
+"I'm using [Skill Name] to [what you're doing]."
+
+**Examples:**
+- "I'm using the brainstorming skill to refine your idea into a design."
+- "I'm using the test-driven-development skill to implement this feature."
+
+**Why:** Transparency helps your human partner understand your process and catch errors early. It also confirms you actually read the skill.
+
+# About these skills
+
+**Many skills contain rigid rules (TDD, debugging, verification).** Follow them exactly. Don't adapt away the discipline.
+
+**Some skills are flexible patterns (architecture, naming).** Adapt core principles to your context.
+
+The skill itself tells you which type it is.
+
+## Instructions ≠ Permission to Skip Workflows
+
+Your human partner's specific instructions describe WHAT to do, not HOW.
+
+"Add X", "Fix Y" = the goal, NOT permission to skip brainstorming, TDD, or RED-GREEN-REFACTOR.
+
+**Red flags:** "Instruction was specific" • "Seems simple" • "Workflow is overkill"
+
+**Why:** Specific instructions mean clear requirements, which is when workflows matter MOST. Skipping process on "simple" tasks is how simple tasks become complex problems.
+
+## Summary
+
+**Starting any task:**
+1. If relevant skill exists → Use the skill
+3. Announce you're using it
+4. Follow what it says
+
+**Skill has checklist?** TodoWrite for every item.
+
+**Finding a relevant skill = mandatory to read and use it. Not optional.**
--- a/skills/writing-plans/SKILL.md
+++ b/skills/writing-plans/SKILL.md
@@ -1,6 +1,6 @@
 ---
 name: writing-plans
-description: Use when you have a spec or requirements for a multi-step task, before touching code
+description: Use when design is complete and you need detailed implementation tasks for engineers with zero codebase context - creates comprehensive implementation plans with exact file paths, complete code examples, and verification steps assuming engineer has minimal domain knowledge
 ---

 # Writing Plans
--- a/skills/writing-skills/SKILL.md
+++ b/skills/writing-skills/SKILL.md
@@ -1,6 +1,6 @@
 ---
 name: writing-skills
-description: Use when creating new skills, editing existing skills, or verifying skills work before deployment
+description: Use when creating new skills, editing existing skills, or verifying skills work before deployment - applies TDD to process documentation by testing with subagents before writing, iterating until bulletproof against rationalization
 ---

 # Writing Skills
@@ -95,16 +95,15 @@ skills/
 - Only two fields supported: `name` and `description`
 - Max 1024 characters total
 - `name`: Use letters, numbers, and hyphens only (no parentheses, special chars)
- `description`: Third-person, describes ONLY when to use (NOT what it does)
+- `description`: Third-person, includes BOTH what it does AND when to use it
  - Start with "Use when..." to focus on triggering conditions
  - Include specific symptoms, situations, and contexts
-  - **NEVER summarize the skill's process or workflow** (see CSO section for why)
  - Keep under 500 characters if possible

 ```markdown
 ---
 name: Skill-Name-With-Hyphens
-description: Use when [specific triggering conditions and symptoms]
+description: Use when [specific triggering conditions and symptoms] - [what the skill does and how it helps, written in third person]
 ---

 # Skill Name
@@ -144,31 +143,7 @@ Concrete results

 **Purpose:** Claude reads description to decide which skills to load for a given task. Make it answer: "Should I read this skill right now?"

-**Format:** Start with "Use when..." to focus on triggering conditions
-
-**CRITICAL: Description = When to Use, NOT What the Skill Does**
-
-The description should ONLY describe triggering conditions. Do NOT summarize the skill's process or workflow in the description.
-
-**Why this matters:** Testing revealed that when a description summarizes the skill's workflow, Claude may follow the description instead of reading the full skill content. A description saying "code review between tasks" caused Claude to do ONE review, even though the skill's flowchart clearly showed TWO reviews (spec compliance then code quality).
-
-When the description was changed to just "Use when executing implementation plans with independent tasks" (no workflow summary), Claude correctly read the flowchart and followed the two-stage review process.
-
-**The trap:** Descriptions that summarize workflow create a shortcut Claude will take. The skill body becomes documentation Claude skips.
-
-```yaml
-# ❌ BAD: Summarizes workflow - Claude may follow this instead of reading skill
-description: Use when executing plans - dispatches subagent per task with code review between tasks
-
-# ❌ BAD: Too much process detail
-description: Use for TDD - write test first, watch it fail, write minimal code, refactor
-
-# ✅ GOOD: Just triggering conditions, no workflow summary
-description: Use when executing implementation plans with independent tasks in the current session
-
-# ✅ GOOD: Triggering conditions only
-description: Use when implementing any feature or bugfix, before writing implementation code
-```
+**Format:** Start with "Use when..." to focus on triggering conditions, then explain what it does

 **Content:**
 - Use concrete triggers, symptoms, and situations that signal this skill applies
@@ -176,7 +151,6 @@ description: Use when implementing any feature or bugfix, before writing impleme
 - Keep triggers technology-agnostic unless the skill itself is technology-specific
 - If skill is technology-specific, make that explicit in the trigger
 - Write in third person (injected into system prompt)
- **NEVER summarize the skill's process or workflow**

 ```yaml
 # ❌ BAD: Too abstract, vague, doesn't include when to use
@@ -188,11 +162,11 @@ description: I can help you with async tests when they're flaky
 # ❌ BAD: Mentions technology but skill isn't specific to it
 description: Use when tests use setTimeout/sleep and are flaky

-# ✅ GOOD: Starts with "Use when", describes problem, no workflow
-description: Use when tests have race conditions, timing dependencies, or pass/fail inconsistently
+# ✅ GOOD: Starts with "Use when", describes problem, then what it does
+description: Use when tests have race conditions, timing dependencies, or pass/fail inconsistently - replaces arbitrary timeouts with condition polling for reliable async tests

 # ✅ GOOD: Technology-specific skill with explicit trigger
-description: Use when using React Router and handling authentication redirects
+description: Use when using React Router and handling authentication redirects - provides patterns for protected routes and auth state management
 ```

 ### 2. Keyword Coverage
@@ -207,7 +181,7 @@ Use words Claude would search for:

 **Use active voice, verb-first:**
 - ✅ `creating-skills` not `skill-creation`
- ✅ `condition-based-waiting` not `async-test-helpers`
+- ✅ `testing-skills-with-subagents` not `subagent-skill-testing`

 ### 4. Token Efficiency (Critical)

@@ -314,12 +288,6 @@ digraph when_flowchart {

 See @graphviz-conventions.dot for graphviz style rules.

-**Visualizing for your human partner:** Use `render-graphs.js` in this directory to render a skill's flowcharts to SVG:
-```bash
-./render-graphs.js ../some-skill           # Each diagram separately
-./render-graphs.js ../some-skill --combine # All diagrams in one SVG
-```
-
 ## Code Examples

 **One excellent example beats many mediocre ones**
@@ -552,7 +520,7 @@ Run same scenarios WITH skill. Agent should now comply.

 Agent found new rationalization? Add explicit counter. Re-test until bulletproof.

-**Testing methodology:** See @testing-skills-with-subagents.md for the complete testing methodology:
+**REQUIRED SUB-SKILL:** Use superpowers:testing-skills-with-subagents for the complete testing methodology:
 - How to write pressure scenarios
 - Pressure types (time, sunk cost, authority, exhaustion)
 - Plugging holes systematically
--- a/skills/writing-skills/anthropic-best-practices.md
+++ b/skills/writing-skills/anthropic-best-practices.md
@@ -10,7 +10,7 @@ For conceptual background on how Skills work, see the [Skills overview](/en/docs

 ### Concise is key

-The [context window](https://platform.claude.com/docs/en/build-with-claude/context-windows) is a public good. Your Skill shares the context window with everything else Claude needs to know, including:
+The [context window](/en/docs/build-with-claude/context-windows) is a public good. Your Skill shares the context window with everything else Claude needs to know, including:

 * The system prompt
 * Conversation history
--- a/skills/writing-skills/render-graphs.js
+++ b/skills/writing-skills/render-graphs.js
@@ -1,168 +0,0 @@
-#!/usr/bin/env node
-
-/**
- * Render graphviz diagrams from a skill's SKILL.md to SVG files.
- *
- * Usage:
- *   ./render-graphs.js <skill-directory>           # Render each diagram separately
- *   ./render-graphs.js <skill-directory> --combine # Combine all into one diagram
- *
- * Extracts all ```dot blocks from SKILL.md and renders to SVG.
- * Useful for helping your human partner visualize the process flows.
- *
- * Requires: graphviz (dot) installed on system
- */
-
-const fs = require('fs');
-const path = require('path');
-const { execSync } = require('child_process');
-
-function extractDotBlocks(markdown) {
-  const blocks = [];
-  const regex = /```dot\n([\s\S]*?)```/g;
-  let match;
-
-  while ((match = regex.exec(markdown)) !== null) {
-    const content = match[1].trim();
-
-    // Extract digraph name
-    const nameMatch = content.match(/digraph\s+(\w+)/);
-    const name = nameMatch ? nameMatch[1] : `graph_${blocks.length + 1}`;
-
-    blocks.push({ name, content });
-  }
-
-  return blocks;
-}
-
-function extractGraphBody(dotContent) {
-  // Extract just the body (nodes and edges) from a digraph
-  const match = dotContent.match(/digraph\s+\w+\s*\{([\s\S]*)\}/);
-  if (!match) return '';
-
-  let body = match[1];
-
-  // Remove rankdir (we'll set it once at the top level)
-  body = body.replace(/^\s*rankdir\s*=\s*\w+\s*;?\s*$/gm, '');
-
-  return body.trim();
-}
-
-function combineGraphs(blocks, skillName) {
-  const bodies = blocks.map((block, i) => {
-    const body = extractGraphBody(block.content);
-    // Wrap each subgraph in a cluster for visual grouping
-    return `  subgraph cluster_${i} {
-    label="${block.name}";
-    ${body.split('\n').map(line => '  ' + line).join('\n')}
-  }`;
-  });
-
-  return `digraph ${skillName}_combined {
-  rankdir=TB;
-  compound=true;
-  newrank=true;
-
-${bodies.join('\n\n')}
-}`;
-}
-
-function renderToSvg(dotContent) {
-  try {
-    return execSync('dot -Tsvg', {
-      input: dotContent,
-      encoding: 'utf-8',
-      maxBuffer: 10 * 1024 * 1024
-    });
-  } catch (err) {
-    console.error('Error running dot:', err.message);
-    if (err.stderr) console.error(err.stderr.toString());
-    return null;
-  }
-}
-
-function main() {
-  const args = process.argv.slice(2);
-  const combine = args.includes('--combine');
-  const skillDirArg = args.find(a => !a.startsWith('--'));
-
-  if (!skillDirArg) {
-    console.error('Usage: render-graphs.js <skill-directory> [--combine]');
-    console.error('');
-    console.error('Options:');
-    console.error('  --combine    Combine all diagrams into one SVG');
-    console.error('');
-    console.error('Example:');
-    console.error('  ./render-graphs.js ../subagent-driven-development');
-    console.error('  ./render-graphs.js ../subagent-driven-development --combine');
-    process.exit(1);
-  }
-
-  const skillDir = path.resolve(skillDirArg);
-  const skillFile = path.join(skillDir, 'SKILL.md');
-  const skillName = path.basename(skillDir).replace(/-/g, '_');
-
-  if (!fs.existsSync(skillFile)) {
-    console.error(`Error: ${skillFile} not found`);
-    process.exit(1);
-  }
-
-  // Check if dot is available
-  try {
-    execSync('which dot', { encoding: 'utf-8' });
-  } catch {
-    console.error('Error: graphviz (dot) not found. Install with:');
-    console.error('  brew install graphviz    # macOS');
-    console.error('  apt install graphviz     # Linux');
-    process.exit(1);
-  }
-
-  const markdown = fs.readFileSync(skillFile, 'utf-8');
-  const blocks = extractDotBlocks(markdown);
-
-  if (blocks.length === 0) {
-    console.log('No ```dot blocks found in', skillFile);
-    process.exit(0);
-  }
-
-  console.log(`Found ${blocks.length} diagram(s) in ${path.basename(skillDir)}/SKILL.md`);
-
-  const outputDir = path.join(skillDir, 'diagrams');
-  if (!fs.existsSync(outputDir)) {
-    fs.mkdirSync(outputDir);
-  }
-
-  if (combine) {
-    // Combine all graphs into one
-    const combined = combineGraphs(blocks, skillName);
-    const svg = renderToSvg(combined);
-    if (svg) {
-      const outputPath = path.join(outputDir, `${skillName}_combined.svg`);
-      fs.writeFileSync(outputPath, svg);
-      console.log(`  Rendered: ${skillName}_combined.svg`);
-
-      // Also write the dot source for debugging
-      const dotPath = path.join(outputDir, `${skillName}_combined.dot`);
-      fs.writeFileSync(dotPath, combined);
-      console.log(`  Source: ${skillName}_combined.dot`);
-    } else {
-      console.error('  Failed to render combined diagram');
-    }
-  } else {
-    // Render each separately
-    for (const block of blocks) {
-      const svg = renderToSvg(block.content);
-      if (svg) {
-        const outputPath = path.join(outputDir, `${block.name}.svg`);
-        fs.writeFileSync(outputPath, svg);
-        console.log(`  Rendered: ${block.name}.svg`);
-      } else {
-        console.error(`  Failed: ${block.name}`);
-      }
-    }
-  }
-
-  console.log(`\nOutput: ${outputDir}/`);
-}
-
-main();
--- a/tests/skill-triggering/prompts/dispatching-parallel-agents.txt
+++ b/tests/skill-triggering/prompts/dispatching-parallel-agents.txt
@@ -1,8 +0,0 @@
-I have 4 independent test failures happening in different modules:
-
-1. tests/auth/login.test.ts - "should redirect after login" is failing
-2. tests/api/users.test.ts - "should return user list" returns 500
-3. tests/components/Button.test.tsx - snapshot mismatch
-4. tests/utils/date.test.ts - timezone handling broken
-
-These are unrelated issues in different parts of the codebase. Can you investigate all of them?
--- a/tests/skill-triggering/prompts/executing-plans.txt
+++ b/tests/skill-triggering/prompts/executing-plans.txt
@@ -1 +0,0 @@
-I have a plan document at docs/plans/2024-01-15-auth-system.md that needs to be executed. Please implement it.
--- a/tests/skill-triggering/prompts/requesting-code-review.txt
+++ b/tests/skill-triggering/prompts/requesting-code-review.txt
@@ -1,3 +0,0 @@
-I just finished implementing the user authentication feature. All the code is committed. Can you review the changes before I merge to main?
-
-The commits are between abc123 and def456.
--- a/tests/skill-triggering/prompts/systematic-debugging.txt
+++ b/tests/skill-triggering/prompts/systematic-debugging.txt
@@ -1,11 +0,0 @@
-The tests are failing with this error:
-
-```
-FAIL src/utils/parser.test.ts
-  ● Parser › should handle nested objects
-    TypeError: Cannot read property 'value' of undefined
-      at parse (src/utils/parser.ts:42:18)
-      at Object.<anonymous> (src/utils/parser.test.ts:28:20)
-```
-
-Can you figure out what's going wrong and fix it?
--- a/tests/skill-triggering/prompts/test-driven-development.txt
+++ b/tests/skill-triggering/prompts/test-driven-development.txt
@@ -1,7 +0,0 @@
-I need to add a new feature to validate email addresses. It should:
- Check that there's an @ symbol
- Check that there's at least one character before the @
- Check that there's a dot in the domain part
- Return true/false
-
-Can you implement this?
--- a/tests/skill-triggering/prompts/writing-plans.txt
+++ b/tests/skill-triggering/prompts/writing-plans.txt
@@ -1,10 +0,0 @@
-Here's the spec for our new authentication system:
-
-Requirements:
- Users can register with email/password
- Users can log in and receive a JWT token
- Protected routes require valid JWT
- Tokens expire after 24 hours
- Support password reset via email
-
-We need to implement this. There are multiple steps involved - user model, auth routes, middleware, email service integration.
--- a/tests/skill-triggering/run-all.sh
+++ b/tests/skill-triggering/run-all.sh
@@ -1,60 +0,0 @@
-#!/bin/bash
-# Run all skill triggering tests
-# Usage: ./run-all.sh
-
-set -e
-
-SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
-PROMPTS_DIR="$SCRIPT_DIR/prompts"
-
-SKILLS=(
-    "systematic-debugging"
-    "test-driven-development"
-    "writing-plans"
-    "dispatching-parallel-agents"
-    "executing-plans"
-    "requesting-code-review"
-)
-
-echo "=== Running Skill Triggering Tests ==="
-echo ""
-
-PASSED=0
-FAILED=0
-RESULTS=()
-
-for skill in "${SKILLS[@]}"; do
-    prompt_file="$PROMPTS_DIR/${skill}.txt"
-
-    if [ ! -f "$prompt_file" ]; then
-        echo "⚠️  SKIP: No prompt file for $skill"
-        continue
-    fi
-
-    echo "Testing: $skill"
-
-    if "$SCRIPT_DIR/run-test.sh" "$skill" "$prompt_file" 3 2>&1 | tee /tmp/skill-test-$skill.log; then
-        PASSED=$((PASSED + 1))
-        RESULTS+=("✅ $skill")
-    else
-        FAILED=$((FAILED + 1))
-        RESULTS+=("❌ $skill")
-    fi
-
-    echo ""
-    echo "---"
-    echo ""
-done
-
-echo ""
-echo "=== Summary ==="
-for result in "${RESULTS[@]}"; do
-    echo "  $result"
-done
-echo ""
-echo "Passed: $PASSED"
-echo "Failed: $FAILED"
-
-if [ $FAILED -gt 0 ]; then
-    exit 1
-fi
--- a/tests/skill-triggering/run-test.sh
+++ b/tests/skill-triggering/run-test.sh
@@ -1,88 +0,0 @@
-#!/bin/bash
-# Test skill triggering with naive prompts
-# Usage: ./run-test.sh <skill-name> <prompt-file>
-#
-# Tests whether Claude triggers a skill based on a natural prompt
-# (without explicitly mentioning the skill)
-
-set -e
-
-SKILL_NAME="$1"
-PROMPT_FILE="$2"
-MAX_TURNS="${3:-3}"
-
-if [ -z "$SKILL_NAME" ] || [ -z "$PROMPT_FILE" ]; then
-    echo "Usage: $0 <skill-name> <prompt-file> [max-turns]"
-    echo "Example: $0 systematic-debugging ./test-prompts/debugging.txt"
-    exit 1
-fi
-
-# Get the directory where this script lives (should be tests/skill-triggering)
-SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
-# Get the superpowers plugin root (two levels up from tests/skill-triggering)
-PLUGIN_DIR="$(cd "$SCRIPT_DIR/../.." && pwd)"
-
-TIMESTAMP=$(date +%s)
-OUTPUT_DIR="/tmp/superpowers-tests/${TIMESTAMP}/skill-triggering/${SKILL_NAME}"
-mkdir -p "$OUTPUT_DIR"
-
-# Read prompt from file
-PROMPT=$(cat "$PROMPT_FILE")
-
-echo "=== Skill Triggering Test ==="
-echo "Skill: $SKILL_NAME"
-echo "Prompt file: $PROMPT_FILE"
-echo "Max turns: $MAX_TURNS"
-echo "Output dir: $OUTPUT_DIR"
-echo ""
-
-# Copy prompt for reference
-cp "$PROMPT_FILE" "$OUTPUT_DIR/prompt.txt"
-
-# Run Claude
-LOG_FILE="$OUTPUT_DIR/claude-output.json"
-cd "$OUTPUT_DIR"
-
-echo "Plugin dir: $PLUGIN_DIR"
-echo "Running claude -p with naive prompt..."
-timeout 300 claude -p "$PROMPT" \
-    --plugin-dir "$PLUGIN_DIR" \
-    --dangerously-skip-permissions \
-    --max-turns "$MAX_TURNS" \
-    --output-format stream-json \
-    > "$LOG_FILE" 2>&1 || true
-
-echo ""
-echo "=== Results ==="
-
-# Check if skill was triggered (look for Skill tool invocation)
-# In stream-json, tool invocations have "name":"Skill" (not "tool":"Skill")
-# Match either "skill":"skillname" or "skill":"namespace:skillname"
-SKILL_PATTERN='"skill":"([^"]*:)?'"${SKILL_NAME}"'"'
-if grep -q '"name":"Skill"' "$LOG_FILE" && grep -qE "$SKILL_PATTERN" "$LOG_FILE"; then
-    echo "✅ PASS: Skill '$SKILL_NAME' was triggered"
-    TRIGGERED=true
-else
-    echo "❌ FAIL: Skill '$SKILL_NAME' was NOT triggered"
-    TRIGGERED=false
-fi
-
-# Show what skills WERE triggered
-echo ""
-echo "Skills triggered in this run:"
-grep -o '"skill":"[^"]*"' "$LOG_FILE" 2>/dev/null | sort -u || echo "  (none)"
-
-# Show first assistant message
-echo ""
-echo "First assistant response (truncated):"
-grep '"type":"assistant"' "$LOG_FILE" | head -1 | jq -r '.message.content[0].text // .message.content' 2>/dev/null | head -c 500 || echo "  (could not extract)"
-
-echo ""
-echo "Full log: $LOG_FILE"
-echo "Timestamp: $TIMESTAMP"
-
-if [ "$TRIGGERED" = "true" ]; then
-    exit 0
-else
-    exit 1
-fi
--- a/tests/subagent-driven-dev/go-fractals/design.md
+++ b/tests/subagent-driven-dev/go-fractals/design.md
@@ -1,81 +0,0 @@
-# Go Fractals CLI - Design
-
-## Overview
-
-A command-line tool that generates ASCII art fractals. Supports two fractal types with configurable output.
-
-## Usage
-
-```bash
-# Sierpinski triangle
-fractals sierpinski --size 32 --depth 5
-
-# Mandelbrot set
-fractals mandelbrot --width 80 --height 24 --iterations 100
-
-# Custom character
-fractals sierpinski --size 16 --char '#'
-
-# Help
-fractals --help
-fractals sierpinski --help
-```
-
-## Commands
-
-### `sierpinski`
-
-Generates a Sierpinski triangle using recursive subdivision.
-
-Flags:
- `--size` (default: 32) - Width of the triangle base in characters
- `--depth` (default: 5) - Recursion depth
- `--char` (default: '*') - Character to use for filled points
-
-Output: Triangle printed to stdout, one line per row.
-
-### `mandelbrot`
-
-Renders the Mandelbrot set as ASCII art. Maps iteration count to characters.
-
-Flags:
- `--width` (default: 80) - Output width in characters
- `--height` (default: 24) - Output height in characters
- `--iterations` (default: 100) - Maximum iterations for escape calculation
- `--char` (default: gradient) - Single character, or omit for gradient " .:-=+*#%@"
-
-Output: Rectangle printed to stdout.
-
-## Architecture
-
-```
-cmd/
-  fractals/
-    main.go           # Entry point, CLI setup
-internal/
-  sierpinski/
-    sierpinski.go     # Algorithm
-    sierpinski_test.go
-  mandelbrot/
-    mandelbrot.go     # Algorithm
-    mandelbrot_test.go
-  cli/
-    root.go           # Root command, help
-    sierpinski.go     # Sierpinski subcommand
-    mandelbrot.go     # Mandelbrot subcommand
-```
-
-## Dependencies
-
- Go 1.21+
- `github.com/spf13/cobra` for CLI
-
-## Acceptance Criteria
-
-1. `fractals --help` shows usage
-2. `fractals sierpinski` outputs a recognizable triangle
-3. `fractals mandelbrot` outputs a recognizable Mandelbrot set
-4. `--size`, `--width`, `--height`, `--depth`, `--iterations` flags work
-5. `--char` customizes output character
-6. Invalid inputs produce clear error messages
-7. All tests pass
--- a/tests/subagent-driven-dev/go-fractals/plan.md
+++ b/tests/subagent-driven-dev/go-fractals/plan.md
@@ -1,172 +0,0 @@
-# Go Fractals CLI - Implementation Plan
-
-Execute this plan using the `superpowers:subagent-driven-development` skill.
-
-## Context
-
-Building a CLI tool that generates ASCII fractals. See `design.md` for full specification.
-
-## Tasks
-
-### Task 1: Project Setup
-
-Create the Go module and directory structure.
-
-**Do:**
- Initialize `go.mod` with module name `github.com/superpowers-test/fractals`
- Create directory structure: `cmd/fractals/`, `internal/sierpinski/`, `internal/mandelbrot/`, `internal/cli/`
- Create minimal `cmd/fractals/main.go` that prints "fractals cli"
- Add `github.com/spf13/cobra` dependency
-
-**Verify:**
- `go build ./cmd/fractals` succeeds
- `./fractals` prints "fractals cli"
-
---
-
-### Task 2: CLI Framework with Help
-
-Set up Cobra root command with help output.
-
-**Do:**
- Create `internal/cli/root.go` with root command
- Configure help text showing available subcommands
- Wire root command into `main.go`
-
-**Verify:**
- `./fractals --help` shows usage with "sierpinski" and "mandelbrot" listed as available commands
- `./fractals` (no args) shows help
-
---
-
-### Task 3: Sierpinski Algorithm
-
-Implement the Sierpinski triangle generation algorithm.
-
-**Do:**
- Create `internal/sierpinski/sierpinski.go`
- Implement `Generate(size, depth int, char rune) []string` that returns lines of the triangle
- Use recursive midpoint subdivision algorithm
- Create `internal/sierpinski/sierpinski_test.go` with tests:
-  - Small triangle (size=4, depth=2) matches expected output
-  - Size=1 returns single character
-  - Depth=0 returns filled triangle
-
-**Verify:**
- `go test ./internal/sierpinski/...` passes
-
---
-
-### Task 4: Sierpinski CLI Integration
-
-Wire the Sierpinski algorithm to a CLI subcommand.
-
-**Do:**
- Create `internal/cli/sierpinski.go` with `sierpinski` subcommand
- Add flags: `--size` (default 32), `--depth` (default 5), `--char` (default '*')
- Call `sierpinski.Generate()` and print result to stdout
-
-**Verify:**
- `./fractals sierpinski` outputs a triangle
- `./fractals sierpinski --size 16 --depth 3` outputs smaller triangle
- `./fractals sierpinski --help` shows flag documentation
-
---
-
-### Task 5: Mandelbrot Algorithm
-
-Implement the Mandelbrot set ASCII renderer.
-
-**Do:**
- Create `internal/mandelbrot/mandelbrot.go`
- Implement `Render(width, height, maxIter int, char string) []string`
- Map complex plane region (-2.5 to 1.0 real, -1.0 to 1.0 imaginary) to output dimensions
- Map iteration count to character gradient " .:-=+*#%@" (or single char if provided)
- Create `internal/mandelbrot/mandelbrot_test.go` with tests:
-  - Output dimensions match requested width/height
-  - Known point inside set (0,0) maps to max-iteration character
-  - Known point outside set (2,0) maps to low-iteration character
-
-**Verify:**
- `go test ./internal/mandelbrot/...` passes
-
---
-
-### Task 6: Mandelbrot CLI Integration
-
-Wire the Mandelbrot algorithm to a CLI subcommand.
-
-**Do:**
- Create `internal/cli/mandelbrot.go` with `mandelbrot` subcommand
- Add flags: `--width` (default 80), `--height` (default 24), `--iterations` (default 100), `--char` (default "")
- Call `mandelbrot.Render()` and print result to stdout
-
-**Verify:**
- `./fractals mandelbrot` outputs recognizable Mandelbrot set
- `./fractals mandelbrot --width 40 --height 12` outputs smaller version
- `./fractals mandelbrot --help` shows flag documentation
-
---
-
-### Task 7: Character Set Configuration
-
-Ensure `--char` flag works consistently across both commands.
-
-**Do:**
- Verify Sierpinski `--char` flag passes character to algorithm
- For Mandelbrot, `--char` should use single character instead of gradient
- Add tests for custom character output
-
-**Verify:**
- `./fractals sierpinski --char '#'` uses '#' character
- `./fractals mandelbrot --char '.'` uses '.' for all filled points
- Tests pass
-
---
-
-### Task 8: Input Validation and Error Handling
-
-Add validation for invalid inputs.
-
-**Do:**
- Sierpinski: size must be > 0, depth must be >= 0
- Mandelbrot: width/height must be > 0, iterations must be > 0
- Return clear error messages for invalid inputs
- Add tests for error cases
-
-**Verify:**
- `./fractals sierpinski --size 0` prints error, exits non-zero
- `./fractals mandelbrot --width -1` prints error, exits non-zero
- Error messages are clear and helpful
-
---
-
-### Task 9: Integration Tests
-
-Add integration tests that invoke the CLI.
-
-**Do:**
- Create `cmd/fractals/main_test.go` or `test/integration_test.go`
- Test full CLI invocation for both commands
- Verify output format and exit codes
- Test error cases return non-zero exit
-
-**Verify:**
- `go test ./...` passes all tests including integration tests
-
---
-
-### Task 10: README
-
-Document usage and examples.
-
-**Do:**
- Create `README.md` with:
-  - Project description
-  - Installation: `go install ./cmd/fractals`
-  - Usage examples for both commands
-  - Example output (small samples)
-
-**Verify:**
- README accurately describes the tool
- Examples in README actually work
--- a/tests/subagent-driven-dev/go-fractals/scaffold.sh
+++ b/tests/subagent-driven-dev/go-fractals/scaffold.sh
@@ -1,45 +0,0 @@
-#!/bin/bash
-# Scaffold the Go Fractals test project
-# Usage: ./scaffold.sh /path/to/target/directory
-
-set -e
-
-TARGET_DIR="${1:?Usage: $0 <target-directory>}"
-SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
-
-# Create target directory
-mkdir -p "$TARGET_DIR"
-cd "$TARGET_DIR"
-
-# Initialize git repo
-git init
-
-# Copy design and plan
-cp "$SCRIPT_DIR/design.md" .
-cp "$SCRIPT_DIR/plan.md" .
-
-# Create .claude settings to allow reads/writes in this directory
-mkdir -p .claude
-cat > .claude/settings.local.json << 'SETTINGS'
-{
-  "permissions": {
-    "allow": [
-      "Read(**)",
-      "Edit(**)",
-      "Write(**)",
-      "Bash(go:*)",
-      "Bash(mkdir:*)",
-      "Bash(git:*)"
-    ]
-  }
-}
-SETTINGS
-
-# Create initial commit
-git add .
-git commit -m "Initial project setup with design and plan"
-
-echo "Scaffolded Go Fractals project at: $TARGET_DIR"
-echo ""
-echo "To run the test:"
-echo "  claude -p \"Execute this plan using superpowers:subagent-driven-development. Plan: $TARGET_DIR/plan.md\" --plugin-dir /path/to/superpowers"
--- a/tests/subagent-driven-dev/run-test.sh
+++ b/tests/subagent-driven-dev/run-test.sh
@@ -1,105 +0,0 @@
-#!/bin/bash
-# Run a subagent-driven-development test
-# Usage: ./run-test.sh <test-name> [--plugin-dir <path>]
-#
-# Example:
-#   ./run-test.sh go-fractals
-#   ./run-test.sh svelte-todo --plugin-dir /path/to/superpowers
-
-set -e
-
-SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
-TEST_NAME="${1:?Usage: $0 <test-name> [--plugin-dir <path>]}"
-shift
-
-# Parse optional arguments
-PLUGIN_DIR=""
-while [[ $# -gt 0 ]]; do
-  case $1 in
-    --plugin-dir)
-      PLUGIN_DIR="$2"
-      shift 2
-      ;;
-    *)
-      echo "Unknown option: $1"
-      exit 1
-      ;;
-  esac
-done
-
-# Default plugin dir to parent of tests directory
-if [[ -z "$PLUGIN_DIR" ]]; then
-  PLUGIN_DIR="$(cd "$SCRIPT_DIR/../.." && pwd)"
-fi
-
-# Verify test exists
-TEST_DIR="$SCRIPT_DIR/$TEST_NAME"
-if [[ ! -d "$TEST_DIR" ]]; then
-  echo "Error: Test '$TEST_NAME' not found at $TEST_DIR"
-  echo "Available tests:"
-  ls -1 "$SCRIPT_DIR" | grep -v '\.sh$' | grep -v '\.md$'
-  exit 1
-fi
-
-# Create timestamped output directory
-TIMESTAMP=$(date +%s)
-OUTPUT_BASE="/tmp/superpowers-tests/$TIMESTAMP/subagent-driven-development"
-OUTPUT_DIR="$OUTPUT_BASE/$TEST_NAME"
-mkdir -p "$OUTPUT_DIR"
-
-echo "=== Subagent-Driven Development Test ==="
-echo "Test: $TEST_NAME"
-echo "Output: $OUTPUT_DIR"
-echo "Plugin: $PLUGIN_DIR"
-echo ""
-
-# Scaffold the project
-echo ">>> Scaffolding project..."
-"$TEST_DIR/scaffold.sh" "$OUTPUT_DIR/project"
-echo ""
-
-# Prepare the prompt
-PLAN_PATH="$OUTPUT_DIR/project/plan.md"
-PROMPT="Execute this plan using superpowers:subagent-driven-development. The plan is at: $PLAN_PATH"
-
-# Run Claude with JSON output for token tracking
-LOG_FILE="$OUTPUT_DIR/claude-output.json"
-echo ">>> Running Claude..."
-echo "Prompt: $PROMPT"
-echo "Log file: $LOG_FILE"
-echo ""
-
-# Run claude and capture output
-# Using stream-json to get token usage stats
-# --dangerously-skip-permissions for automated testing (subagents don't inherit parent settings)
-cd "$OUTPUT_DIR/project"
-claude -p "$PROMPT" \
-  --plugin-dir "$PLUGIN_DIR" \
-  --dangerously-skip-permissions \
-  --output-format stream-json \
-  > "$LOG_FILE" 2>&1 || true
-
-# Extract final stats
-echo ""
-echo ">>> Test complete"
-echo "Project directory: $OUTPUT_DIR/project"
-echo "Claude log: $LOG_FILE"
-echo ""
-
-# Show token usage if available
-if command -v jq &> /dev/null; then
-  echo ">>> Token usage:"
-  # Extract usage from the last message with usage info
-  jq -s '[.[] | select(.type == "result")] | last | .usage' "$LOG_FILE" 2>/dev/null || echo "(could not parse usage)"
-  echo ""
-fi
-
-echo ">>> Next steps:"
-echo "1. Review the project: cd $OUTPUT_DIR/project"
-echo "2. Review Claude's log: less $LOG_FILE"
-echo "3. Check if tests pass:"
-if [[ "$TEST_NAME" == "go-fractals" ]]; then
-  echo "   cd $OUTPUT_DIR/project && go test ./..."
-elif [[ "$TEST_NAME" == "svelte-todo" ]]; then
-  echo "   cd $OUTPUT_DIR/project && npm test && npx playwright test"
-fi
--- a/tests/subagent-driven-dev/svelte-todo/design.md
+++ b/tests/subagent-driven-dev/svelte-todo/design.md
@@ -1,70 +0,0 @@
-# Svelte Todo List - Design
-
-## Overview
-
-A simple todo list application built with Svelte. Supports creating, completing, and deleting todos with localStorage persistence.
-
-## Features
-
- Add new todos
- Mark todos as complete/incomplete
- Delete todos
- Filter by: All / Active / Completed
- Clear all completed todos
- Persist to localStorage
- Show count of remaining items
-
-## User Interface
-
-```
-┌─────────────────────────────────────────┐
-│  Svelte Todos                           │
-├─────────────────────────────────────────┤
-│  [________________________] [Add]       │
-├─────────────────────────────────────────┤
-│  [ ] Buy groceries                  [x] │
-│  [✓] Walk the dog                   [x] │
-│  [ ] Write code                     [x] │
-├─────────────────────────────────────────┤
-│  2 items left                           │
-│  [All] [Active] [Completed]  [Clear ✓]  │
-└─────────────────────────────────────────┘
-```
-
-## Components
-
-```
-src/
-  App.svelte           # Main app, state management
-  lib/
-    TodoInput.svelte   # Text input + Add button
-    TodoList.svelte    # List container
-    TodoItem.svelte    # Single todo with checkbox, text, delete
-    FilterBar.svelte   # Filter buttons + clear completed
-    store.ts           # Svelte store for todos
-    storage.ts         # localStorage persistence
-```
-
-## Data Model
-
-```typescript
-interface Todo {
-  id: string;        // UUID
-  text: string;      // Todo text
-  completed: boolean;
-}
-
-type Filter = 'all' | 'active' | 'completed';
-```
-
-## Acceptance Criteria
-
-1. Can add a todo by typing and pressing Enter or clicking Add
-2. Can toggle todo completion by clicking checkbox
-3. Can delete a todo by clicking X button
-4. Filter buttons show correct subset of todos
-5. "X items left" shows count of incomplete todos
-6. "Clear completed" removes all completed todos
-7. Todos persist across page refresh (localStorage)
-8. Empty state shows helpful message
-9. All tests pass
--- a/tests/subagent-driven-dev/svelte-todo/plan.md
+++ b/tests/subagent-driven-dev/svelte-todo/plan.md
@@ -1,222 +0,0 @@
-# Svelte Todo List - Implementation Plan
-
-Execute this plan using the `superpowers:subagent-driven-development` skill.
-
-## Context
-
-Building a todo list app with Svelte. See `design.md` for full specification.
-
-## Tasks
-
-### Task 1: Project Setup
-
-Create the Svelte project with Vite.
-
-**Do:**
- Run `npm create vite@latest . -- --template svelte-ts`
- Install dependencies with `npm install`
- Verify dev server works
- Clean up default Vite template content from App.svelte
-
-**Verify:**
- `npm run dev` starts server
- App shows minimal "Svelte Todos" heading
- `npm run build` succeeds
-
---
-
-### Task 2: Todo Store
-
-Create the Svelte store for todo state management.
-
-**Do:**
- Create `src/lib/store.ts`
- Define `Todo` interface with id, text, completed
- Create writable store with initial empty array
- Export functions: `addTodo(text)`, `toggleTodo(id)`, `deleteTodo(id)`, `clearCompleted()`
- Create `src/lib/store.test.ts` with tests for each function
-
-**Verify:**
- Tests pass: `npm run test` (install vitest if needed)
-
---
-
-### Task 3: localStorage Persistence
-
-Add persistence layer for todos.
-
-**Do:**
- Create `src/lib/storage.ts`
- Implement `loadTodos(): Todo[]` and `saveTodos(todos: Todo[])`
- Handle JSON parse errors gracefully (return empty array)
- Integrate with store: load on init, save on change
- Add tests for load/save/error handling
-
-**Verify:**
- Tests pass
- Manual test: add todo, refresh page, todo persists
-
---
-
-### Task 4: TodoInput Component
-
-Create the input component for adding todos.
-
-**Do:**
- Create `src/lib/TodoInput.svelte`
- Text input bound to local state
- Add button calls `addTodo()` and clears input
- Enter key also submits
- Disable Add button when input is empty
- Add component tests
-
-**Verify:**
- Tests pass
- Component renders input and button
-
---
-
-### Task 5: TodoItem Component
-
-Create the single todo item component.
-
-**Do:**
- Create `src/lib/TodoItem.svelte`
- Props: `todo: Todo`
- Checkbox toggles completion (calls `toggleTodo`)
- Text with strikethrough when completed
- Delete button (X) calls `deleteTodo`
- Add component tests
-
-**Verify:**
- Tests pass
- Component renders checkbox, text, delete button
-
---
-
-### Task 6: TodoList Component
-
-Create the list container component.
-
-**Do:**
- Create `src/lib/TodoList.svelte`
- Props: `todos: Todo[]`
- Renders TodoItem for each todo
- Shows "No todos yet" when empty
- Add component tests
-
-**Verify:**
- Tests pass
- Component renders list of TodoItems
-
---
-
-### Task 7: FilterBar Component
-
-Create the filter and status bar component.
-
-**Do:**
- Create `src/lib/FilterBar.svelte`
- Props: `todos: Todo[]`, `filter: Filter`, `onFilterChange: (f: Filter) => void`
- Show count: "X items left" (incomplete count)
- Three filter buttons: All, Active, Completed
- Active filter is visually highlighted
- "Clear completed" button (hidden when no completed todos)
- Add component tests
-
-**Verify:**
- Tests pass
- Component renders count, filters, clear button
-
---
-
-### Task 8: App Integration
-
-Wire all components together in App.svelte.
-
-**Do:**
- Import all components and store
- Add filter state (default: 'all')
- Compute filtered todos based on filter state
- Render: heading, TodoInput, TodoList, FilterBar
- Pass appropriate props to each component
-
-**Verify:**
- App renders all components
- Adding todos works
- Toggling works
- Deleting works
-
---
-
-### Task 9: Filter Functionality
-
-Ensure filtering works end-to-end.
-
-**Do:**
- Verify filter buttons change displayed todos
- 'all' shows all todos
- 'active' shows only incomplete todos
- 'completed' shows only completed todos
- Clear completed removes completed todos and resets filter if needed
- Add integration tests
-
-**Verify:**
- Filter tests pass
- Manual verification of all filter states
-
---
-
-### Task 10: Styling and Polish
-
-Add CSS styling for usability.
-
-**Do:**
- Style the app to match the design mockup
- Completed todos have strikethrough and muted color
- Active filter button is highlighted
- Input has focus styles
- Delete button appears on hover (or always on mobile)
- Responsive layout
-
-**Verify:**
- App is visually usable
- Styles don't break functionality
-
---
-
-### Task 11: End-to-End Tests
-
-Add Playwright tests for full user flows.
-
-**Do:**
- Install Playwright: `npm init playwright@latest`
- Create `tests/todo.spec.ts`
- Test flows:
-  - Add a todo
-  - Complete a todo
-  - Delete a todo
-  - Filter todos
-  - Clear completed
-  - Persistence (add, reload, verify)
-
-**Verify:**
- `npx playwright test` passes
-
---
-
-### Task 12: README
-
-Document the project.
-
-**Do:**
- Create `README.md` with:
-  - Project description
-  - Setup: `npm install`
-  - Development: `npm run dev`
-  - Testing: `npm test` and `npx playwright test`
-  - Build: `npm run build`
-
-**Verify:**
- README accurately describes the project
- Instructions work
--- a/tests/subagent-driven-dev/svelte-todo/scaffold.sh
+++ b/tests/subagent-driven-dev/svelte-todo/scaffold.sh
@@ -1,46 +0,0 @@
-#!/bin/bash
-# Scaffold the Svelte Todo test project
-# Usage: ./scaffold.sh /path/to/target/directory
-
-set -e
-
-TARGET_DIR="${1:?Usage: $0 <target-directory>}"
-SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
-
-# Create target directory
-mkdir -p "$TARGET_DIR"
-cd "$TARGET_DIR"
-
-# Initialize git repo
-git init
-
-# Copy design and plan
-cp "$SCRIPT_DIR/design.md" .
-cp "$SCRIPT_DIR/plan.md" .
-
-# Create .claude settings to allow reads/writes in this directory
-mkdir -p .claude
-cat > .claude/settings.local.json << 'SETTINGS'
-{
-  "permissions": {
-    "allow": [
-      "Read(**)",
-      "Edit(**)",
-      "Write(**)",
-      "Bash(npm:*)",
-      "Bash(npx:*)",
-      "Bash(mkdir:*)",
-      "Bash(git:*)"
-    ]
-  }
-}
-SETTINGS
-
-# Create initial commit
-git add .
-git commit -m "Initial project setup with design and plan"
-
-echo "Scaffolded Svelte Todo project at: $TARGET_DIR"
-echo ""
-echo "To run the test:"
-echo "  claude -p \"Execute this plan using superpowers:subagent-driven-development. Plan: $TARGET_DIR/plan.md\" --plugin-dir /path/to/superpowers"
Author	SHA1	Message	Date
Jesse Vincent	2ea4f36b0f	Add comprehensive testing documentation Documents: - How to run integration tests - subagent-driven-development test details - Token analysis tool usage - Troubleshooting common issues - Writing new integration tests - Session transcript format	2025-11-29 21:03:33 -08:00
Jesse Vincent	4923c072e6	Add token usage analysis to subagent-driven-development test - Rewrote analyze-token-usage.py to parse main session file correctly - Extracts usage from toolUseResult fields for each subagent - Shows breakdown by agent with descriptions - Integrated into test-subagent-driven-development-integration.sh - Displays token usage automatically after each test run	2025-11-29 20:51:18 -08:00
Jesse Vincent	e90722ebf6	fix: verify skill usage via session transcript not text output The skill instructions are internal and don't appear in user-facing output. Updated verification to parse the session JSONL transcript and check for actual tool usage: - Skill tool invocation - Task tool (subagents) - TodoWrite (tracking) - Implementation results	2025-11-29 19:35:43 -08:00
Jesse Vincent	9c72ae7866	test: use bypassPermissions mode for unrestricted testing dontAsk mode was auto-denying Write tool. Use bypassPermissions instead to allow full tool access in this controlled test environment.	2025-11-29 17:15:21 -08:00
Jesse Vincent	d8004f9b27	test: auto-approve permissions with --permission-mode dontAsk Headless tests need automatic permission approval to write files. Using dontAsk mode to auto-approve permissions for test directory.	2025-11-29 11:47:47 -08:00
Jesse Vincent	79b1ac39e0	test: add --add-dir flag for temp directory access Claude needs explicit permission to access the temp test directory. Added --add-dir flag to grant access to the test project.	2025-11-29 11:39:04 -08:00
Jesse Vincent	c538d16550	test: show Claude output in real-time during integration test Use tee instead of redirection so test output is visible during execution while still being saved to file for analysis.	2025-11-29 11:29:12 -08:00
Jesse Vincent	8923b2cf16	fix: run integration test from superpowers dir to access local dev skills The superpowers-dev marketplace makes skills available only when running from the plugin directory. Updated test to run claude from superpowers directory while working on the test project.	2025-11-29 10:35:38 -08:00
Jesse Vincent	575b14161e	Fix tests to use --allowed-tools flag Claude Code headless mode requires --allowed-tools flag to actually execute tool calls. Without it, Claude only responds as if it's doing things but doesn't actually use tools. Changes: - Updated run_claude helper to accept allowed_tools parameter - Updated integration test to use --allowed-tools=all - This enables actual tool execution (Write, Task, Bash, etc.) Now the integration test should actually execute the workflow instead of just talking about it.	2025-11-28 22:21:24 -08:00
Jesse Vincent	5b5fb3940d	Fix syntax error in integration test Simplified command substitution to avoid shell parsing issues. Instead of nested heredoc in command substitution, write prompt to file first then read it.	2025-11-28 15:22:01 -08:00
Jesse Vincent	c2c72d9bfa	Add integration test for subagent-driven-development Created full end-to-end integration test that executes a real plan and verifies the new workflow improvements actually work. New test: test-subagent-driven-development-integration.sh - Creates real Node.js test project - Generates implementation plan (2 tasks) - Executes using subagent-driven-development skill - Verifies 8 key behaviors: 1. Plan read once at beginning (not per task) 2. Full task text provided to subagents (not file reading) 3. Subagents perform self-review 4. Spec compliance review before code quality 5. Spec reviewer reads code independently 6. Working implementation produced 7. Tests pass 8. No extra features added (spec compliance) Integration tests are opt-in (--integration flag) due to 10-30 min runtime. Updated run-skill-tests.sh: - Added --integration flag - Separates fast tests from integration tests - Shows note when integration tests skipped Updated README with integration test documentation. Run with: ./run-skill-tests.sh # Fast tests only ./run-skill-tests.sh --integration # Include integration tests	2025-11-28 15:06:10 -08:00
Jesse Vincent	2d011053d5	Add Claude Code skills test framework Created automated test suite for testing superpowers skills using Claude Code CLI in headless mode. New files: - tests/claude-code/run-skill-tests.sh - Main test runner - tests/claude-code/test-helpers.sh - Helper functions for testing - tests/claude-code/test-subagent-driven-development.sh - First test - tests/claude-code/README.md - Documentation Test framework features: - Run Claude Code with prompts and capture output - Assertion helpers (contains, not_contains, count, order) - Test project creation helpers - Timeout support (default 5 minutes) - Verbose mode for debugging - Specific test selection First test verifies subagent-driven-development skill: - Skill loading - Workflow ordering (spec compliance before code quality) - Self-review requirements - Plan reading efficiency (read once) - Spec compliance reviewer skepticism - Review loops - Task context provision Run with: cd tests/claude-code && ./run-skill-tests.sh	2025-11-28 14:51:08 -08:00
Jesse Vincent	47bfdf36f1	Emphasize spec compliance review must complete before code quality Made sequencing explicit: - Spec compliance review loop must fully complete (✅) before code quality - Added "Do NOT proceed to code quality review until spec compliance is ✅" - Code Quality Review section starts with "Only run after spec compliance review is complete" - Red Flags: Added "Start code quality review before spec compliance is ✅ (wrong order)" This ensures we don't waste time reviewing code quality of the wrong implementation. Verify they built the right thing first, then verify they built it well.	2025-11-28 14:32:12 -08:00
Jesse Vincent	fedd3e2096	Make spec compliance reviewer skeptical and verification-focused The spec compliance reviewer now: - Does NOT trust implementer's report - Is warned implementer finished suspiciously quickly - MUST verify everything by reading actual code - Compares implementation to requirements line by line - Reports issues with file:line references Key additions: - "Do Not Trust the Report" section - Explicit DO NOT / DO lists - "Verify by reading code, not by trusting report" - Changed "What Was Implemented" to "What Implementer Claims They Built" This prevents rubber-stamping and ensures independent verification of spec compliance against actual codebase.	2025-11-28 14:29:53 -08:00
Jesse Vincent	78496a6a1c	Improve subagent-driven-development workflow Key improvements based on feedback: 1. Read plan once, not per task - Extract all tasks in Step 1 - Reference extracted tasks in Step 2 - Eliminates redundant file reading 2. Enable questions during work - Not just before, but also while working - "It's always OK to ask questions" - Don't guess or make assumptions 3. Add self-review before reporting - Completeness: implemented everything? - Quality: best work, clear names? - Discipline: avoided overbuilding? - Testing: comprehensive, real behavior? - Catches issues before handoff 4. Add spec compliance review - Separate reviewer checks: built the right thing? - Flags missing requirements - Flags extra/unneeded work - Flags misunderstandings - Runs BEFORE code quality review 5. Make reviews loops, not one-shot - Reviewer finds issues - Implementer fixes - Reviewer reviews again - Repeat until approved - Applies to both spec and code quality Two-stage review process: - Stage 1: Spec compliance (right thing?) - Stage 2: Code quality (built well?) This enables subagents to do their best work with clear requirements, opportunities to clarify, self-critique, and thorough review loops.	2025-11-28 14:24:13 -08:00
Jesse Vincent	830c226a9c	Update subagent-driven-development: controller provides full task text Changed workflow so controller provides complete task context directly rather than making subagent read plan file. Key changes: - Controller reads plan and extracts full task text - Controller provides scene-setting context (dependencies, architecture) - Subagent receives complete information in prompt (no file reading) - Subagent can ask clarifying questions before beginning work - Controller handles questions/concerns before subagent proceeds Benefits: - No file reading overhead for subagent - Controller curates exactly what context is needed - Questions surfaced before work begins (not after) - Subagent has complete information to do best work This enables subagents to start with clarity rather than ambiguity.	2025-11-28 13:50:37 -08:00
Jesse Vincent	637ad174be	Add skills improvement plan from user feedback Analyzed feedback from two Claude instances using superpowers in real development scenarios. Identified 8 core problems and proposed improvements organized by impact and risk. Key problems: - Configuration change verification gap (verify success not intent) - Background process accumulation across subagents - Context bloat in subagent prompts - Missing self-reflection before handoff - Mock-interface drift - Code reviewer file access issues - Skills not being read/enforced - Fix workflow latency Proposed improvements organized in 3 phases: - Phase 1: High-impact, low-risk (do first) - Phase 2: Moderate changes (test carefully) - Phase 3: Optimization (validate first) See plan for detailed analysis and open questions.	2025-11-28 13:43:23 -08:00
				`@@ -1 +0,0 @@`
				`I have a plan document at docs/plans/2024-01-15-auth-system.md that needs to be executed. Please implement it.`