2025-11-28 14:51:08 -08:00
|
|
|
#!/usr/bin/env bash
|
|
|
|
|
# Test runner for Claude Code skills
|
|
|
|
|
# Tests skills by invoking Claude Code CLI and verifying behavior
|
|
|
|
|
set -euo pipefail
|
|
|
|
|
|
|
|
|
|
SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
|
|
|
|
|
cd "$SCRIPT_DIR"
|
|
|
|
|
|
|
|
|
|
echo "========================================"
|
|
|
|
|
echo " Claude Code Skills Test Suite"
|
|
|
|
|
echo "========================================"
|
|
|
|
|
echo ""
|
|
|
|
|
echo "Repository: $(cd ../.. && pwd)"
|
|
|
|
|
echo "Test time: $(date)"
|
|
|
|
|
echo "Claude version: $(claude --version 2>/dev/null || echo 'not found')"
|
|
|
|
|
echo ""
|
|
|
|
|
|
|
|
|
|
# Check if Claude Code is available
|
|
|
|
|
if ! command -v claude &> /dev/null; then
|
|
|
|
|
echo "ERROR: Claude Code CLI not found"
|
|
|
|
|
echo "Install Claude Code first: https://code.claude.com"
|
|
|
|
|
exit 1
|
|
|
|
|
fi
|
|
|
|
|
|
|
|
|
|
# Parse command line arguments
|
|
|
|
|
VERBOSE=false
|
|
|
|
|
SPECIFIC_TEST=""
|
|
|
|
|
TIMEOUT=300 # Default 5 minute timeout per test
|
2025-11-28 15:06:10 -08:00
|
|
|
RUN_INTEGRATION=false
|
2025-11-28 14:51:08 -08:00
|
|
|
|
|
|
|
|
while [[ $# -gt 0 ]]; do
|
|
|
|
|
case $1 in
|
|
|
|
|
--verbose|-v)
|
|
|
|
|
VERBOSE=true
|
|
|
|
|
shift
|
|
|
|
|
;;
|
|
|
|
|
--test|-t)
|
|
|
|
|
SPECIFIC_TEST="$2"
|
|
|
|
|
shift 2
|
|
|
|
|
;;
|
|
|
|
|
--timeout)
|
|
|
|
|
TIMEOUT="$2"
|
|
|
|
|
shift 2
|
|
|
|
|
;;
|
2025-11-28 15:06:10 -08:00
|
|
|
--integration|-i)
|
|
|
|
|
RUN_INTEGRATION=true
|
|
|
|
|
shift
|
|
|
|
|
;;
|
2025-11-28 14:51:08 -08:00
|
|
|
--help|-h)
|
|
|
|
|
echo "Usage: $0 [options]"
|
|
|
|
|
echo ""
|
|
|
|
|
echo "Options:"
|
2025-11-28 15:06:10 -08:00
|
|
|
echo " --verbose, -v Show verbose output"
|
|
|
|
|
echo " --test, -t NAME Run only the specified test"
|
|
|
|
|
echo " --timeout SECONDS Set timeout per test (default: 300)"
|
|
|
|
|
echo " --integration, -i Run integration tests (slow, 10-30 min)"
|
|
|
|
|
echo " --help, -h Show this help"
|
2025-11-28 14:51:08 -08:00
|
|
|
echo ""
|
|
|
|
|
echo "Tests:"
|
2025-11-28 15:06:10 -08:00
|
|
|
echo " test-subagent-driven-development.sh Test skill loading and requirements"
|
|
|
|
|
echo ""
|
|
|
|
|
echo "Integration Tests (use --integration):"
|
|
|
|
|
echo " test-subagent-driven-development-integration.sh Full workflow execution"
|
2025-11-28 14:51:08 -08:00
|
|
|
exit 0
|
|
|
|
|
;;
|
|
|
|
|
*)
|
|
|
|
|
echo "Unknown option: $1"
|
|
|
|
|
echo "Use --help for usage information"
|
|
|
|
|
exit 1
|
|
|
|
|
;;
|
|
|
|
|
esac
|
|
|
|
|
done
|
|
|
|
|
|
2025-11-28 15:06:10 -08:00
|
|
|
# List of skill tests to run (fast unit tests)
|
2025-11-28 14:51:08 -08:00
|
|
|
tests=(
|
|
|
|
|
"test-subagent-driven-development.sh"
|
|
|
|
|
)
|
|
|
|
|
|
2025-11-28 15:06:10 -08:00
|
|
|
# Integration tests (slow, full execution)
|
|
|
|
|
integration_tests=(
|
|
|
|
|
"test-subagent-driven-development-integration.sh"
|
Lift superpowers:code-reviewer agent into the requesting-code-review skill
The plugin had a single named agent (`agents/code-reviewer.md`) used by
two skills, while every other reviewer/implementer subagent in the repo
is dispatched as `general-purpose` with the prompt template living
alongside its skill. That asymmetry had no upside and several costs:
- Two sources of truth for the code review checklist (the agent file
and `requesting-code-review/code-reviewer.md`), both drifting
independently.
- `Codex` users could not use the named agent directly; the codex-tools
reference doc had a workaround section explaining how to flatten the
named agent into a `worker` dispatch.
- No third-party reliance on `superpowers:code-reviewer` inside this
repo.
Changes:
- Merge `agents/code-reviewer.md` (persona + checklist) and
`skills/requesting-code-review/code-reviewer.md` (placeholder
template) into a single self-contained Task-dispatch template,
matching the shape of `implementer-prompt.md`,
`spec-reviewer-prompt.md`, etc.
- Update `skills/requesting-code-review/SKILL.md` and
`skills/subagent-driven-development/code-quality-reviewer-prompt.md`
to dispatch `Task (general-purpose)` instead of the named agent.
- Drop the now-obsolete "Named agent dispatch" workaround sections from
`codex-tools.md` and `copilot-tools.md` — superpowers no longer ships
any named agents, so those instructions documented nothing.
- Delete `agents/code-reviewer.md` and the empty `agents/` directory.
Tier 3 coverage for the change: a new behavioral test
`tests/claude-code/test-requesting-code-review.sh` plants real bugs
(SQL injection, plaintext password handling, credential logging) into
a tiny project, runs the actual `requesting-code-review` skill against
the working tree, and asserts the dispatched reviewer flags every
planted issue at Critical/Important severity and refuses to approve
the diff.
Verified end-to-end on this branch:
- The new test passes (5/5 assertions; reviewer caught all planted
bugs and several others).
- The existing SDD integration test still passes (7/7 subagents
dispatched, all as `general-purpose`; spec compliance still
rejects extra features; produced code is correct).
- Session JSONLs confirm zero remaining `superpowers:code-reviewer`
dispatches anywhere in the SDD pipeline.
2026-04-28 11:59:36 -07:00
|
|
|
"test-requesting-code-review.sh"
|
2025-11-28 15:06:10 -08:00
|
|
|
)
|
|
|
|
|
|
|
|
|
|
# Add integration tests if requested
|
|
|
|
|
if [ "$RUN_INTEGRATION" = true ]; then
|
|
|
|
|
tests+=("${integration_tests[@]}")
|
|
|
|
|
fi
|
|
|
|
|
|
2025-11-28 14:51:08 -08:00
|
|
|
# Filter to specific test if requested
|
|
|
|
|
if [ -n "$SPECIFIC_TEST" ]; then
|
|
|
|
|
tests=("$SPECIFIC_TEST")
|
|
|
|
|
fi
|
|
|
|
|
|
|
|
|
|
# Track results
|
|
|
|
|
passed=0
|
|
|
|
|
failed=0
|
|
|
|
|
skipped=0
|
|
|
|
|
|
|
|
|
|
# Run each test
|
|
|
|
|
for test in "${tests[@]}"; do
|
|
|
|
|
echo "----------------------------------------"
|
|
|
|
|
echo "Running: $test"
|
|
|
|
|
echo "----------------------------------------"
|
|
|
|
|
|
|
|
|
|
test_path="$SCRIPT_DIR/$test"
|
|
|
|
|
|
|
|
|
|
if [ ! -f "$test_path" ]; then
|
|
|
|
|
echo " [SKIP] Test file not found: $test"
|
|
|
|
|
skipped=$((skipped + 1))
|
|
|
|
|
continue
|
|
|
|
|
fi
|
|
|
|
|
|
|
|
|
|
if [ ! -x "$test_path" ]; then
|
|
|
|
|
echo " Making $test executable..."
|
|
|
|
|
chmod +x "$test_path"
|
|
|
|
|
fi
|
|
|
|
|
|
|
|
|
|
start_time=$(date +%s)
|
|
|
|
|
|
|
|
|
|
if [ "$VERBOSE" = true ]; then
|
|
|
|
|
if timeout "$TIMEOUT" bash "$test_path"; then
|
|
|
|
|
end_time=$(date +%s)
|
|
|
|
|
duration=$((end_time - start_time))
|
|
|
|
|
echo ""
|
|
|
|
|
echo " [PASS] $test (${duration}s)"
|
|
|
|
|
passed=$((passed + 1))
|
|
|
|
|
else
|
|
|
|
|
exit_code=$?
|
|
|
|
|
end_time=$(date +%s)
|
|
|
|
|
duration=$((end_time - start_time))
|
|
|
|
|
echo ""
|
|
|
|
|
if [ $exit_code -eq 124 ]; then
|
|
|
|
|
echo " [FAIL] $test (timeout after ${TIMEOUT}s)"
|
|
|
|
|
else
|
|
|
|
|
echo " [FAIL] $test (${duration}s)"
|
|
|
|
|
fi
|
|
|
|
|
failed=$((failed + 1))
|
|
|
|
|
fi
|
|
|
|
|
else
|
|
|
|
|
# Capture output for non-verbose mode
|
|
|
|
|
if output=$(timeout "$TIMEOUT" bash "$test_path" 2>&1); then
|
|
|
|
|
end_time=$(date +%s)
|
|
|
|
|
duration=$((end_time - start_time))
|
|
|
|
|
echo " [PASS] (${duration}s)"
|
|
|
|
|
passed=$((passed + 1))
|
|
|
|
|
else
|
|
|
|
|
exit_code=$?
|
|
|
|
|
end_time=$(date +%s)
|
|
|
|
|
duration=$((end_time - start_time))
|
|
|
|
|
if [ $exit_code -eq 124 ]; then
|
|
|
|
|
echo " [FAIL] (timeout after ${TIMEOUT}s)"
|
|
|
|
|
else
|
|
|
|
|
echo " [FAIL] (${duration}s)"
|
|
|
|
|
fi
|
|
|
|
|
echo ""
|
|
|
|
|
echo " Output:"
|
|
|
|
|
echo "$output" | sed 's/^/ /'
|
|
|
|
|
failed=$((failed + 1))
|
|
|
|
|
fi
|
|
|
|
|
fi
|
|
|
|
|
|
|
|
|
|
echo ""
|
|
|
|
|
done
|
|
|
|
|
|
|
|
|
|
# Print summary
|
|
|
|
|
echo "========================================"
|
|
|
|
|
echo " Test Results Summary"
|
|
|
|
|
echo "========================================"
|
|
|
|
|
echo ""
|
|
|
|
|
echo " Passed: $passed"
|
|
|
|
|
echo " Failed: $failed"
|
|
|
|
|
echo " Skipped: $skipped"
|
|
|
|
|
echo ""
|
|
|
|
|
|
2025-11-28 15:06:10 -08:00
|
|
|
if [ "$RUN_INTEGRATION" = false ] && [ ${#integration_tests[@]} -gt 0 ]; then
|
|
|
|
|
echo "Note: Integration tests were not run (they take 10-30 minutes)."
|
|
|
|
|
echo "Use --integration flag to run full workflow execution tests."
|
|
|
|
|
echo ""
|
|
|
|
|
fi
|
|
|
|
|
|
2025-11-28 14:51:08 -08:00
|
|
|
if [ $failed -gt 0 ]; then
|
|
|
|
|
echo "STATUS: FAILED"
|
|
|
|
|
exit 1
|
|
|
|
|
else
|
|
|
|
|
echo "STATUS: PASSED"
|
|
|
|
|
exit 0
|
|
|
|
|
fi
|