feat(builtin-commands): adapt commands and templates for team-mode

This commit is contained in:
YeonGyu-Kim
2026-04-28 10:48:02 +09:00
parent 4af6eb6ebd
commit 5e42efc900
4 changed files with 392 additions and 5 deletions

View File

@@ -3,7 +3,8 @@
import { afterEach, beforeEach, describe, test, expect } from "bun:test"
import { loadBuiltinCommands } from "./commands"
import { HANDOFF_TEMPLATE } from "./templates/handoff"
import { REMOVE_AI_SLOPS_TEMPLATE } from "./templates/remove-ai-slops"
import { REFACTOR_TEMPLATE, REFACTOR_TEAM_MODE_ADDENDUM } from "./templates/refactor"
import { REMOVE_AI_SLOPS_TEMPLATE, REMOVE_AI_SLOPS_TEAM_MODE_ADDENDUM } from "./templates/remove-ai-slops"
import type { BuiltinCommandName } from "./types"
import { _resetForTesting, registerAgentName } from "../claude-code-session-state"
@@ -181,6 +182,138 @@ describe("REMOVE_AI_SLOPS_TEMPLATE", () => {
expect(REMOVE_AI_SLOPS_TEMPLATE).toContain('git merge-base "$BASE_BRANCH" HEAD')
expect(REMOVE_AI_SLOPS_TEMPLATE).not.toContain("git merge-base main HEAD")
})
test("should not contain team mode content in the base template", () => {
//#given - the base template string, which is used when team mode is disabled
//#when / #then
expect(REMOVE_AI_SLOPS_TEMPLATE).not.toContain("slop-squad")
expect(REMOVE_AI_SLOPS_TEMPLATE).not.toContain("team_create")
expect(REMOVE_AI_SLOPS_TEMPLATE).not.toContain("Team Mode Protocol")
})
})
describe("REMOVE_AI_SLOPS_TEAM_MODE_ADDENDUM", () => {
test("should define the slop-squad team spec and lifecycle", () => {
//#given - the team mode addendum, injected only when team mode is enabled
//#when / #then
expect(REMOVE_AI_SLOPS_TEAM_MODE_ADDENDUM).toContain("slop-squad")
expect(REMOVE_AI_SLOPS_TEAM_MODE_ADDENDUM).toContain("team_create")
expect(REMOVE_AI_SLOPS_TEAM_MODE_ADDENDUM).toContain("team_task_create")
expect(REMOVE_AI_SLOPS_TEAM_MODE_ADDENDUM).toContain("team_delete")
})
test("should route review to external deep task instead of a team member", () => {
//#given - reviewer must run outside the team because category routing downcasts to sisyphus-junior
//#when / #then
expect(REMOVE_AI_SLOPS_TEAM_MODE_ADDENDUM).toContain('category="deep"')
})
test("should teach valid lead messaging examples", () => {
//#given - the team mode addendum, injected only when team mode is enabled
//#when / #then
expect(REMOVE_AI_SLOPS_TEAM_MODE_ADDENDUM).toContain('teamRunId=<id>, to="*"')
expect(REMOVE_AI_SLOPS_TEAM_MODE_ADDENDUM).toContain('to="lead"')
expect(REMOVE_AI_SLOPS_TEAM_MODE_ADDENDUM).not.toContain("to=sisyphus")
})
})
describe("loadBuiltinCommands - team mode gating for remove-ai-slops", () => {
test("should exclude team mode addendum when teamModeEnabled is false", () => {
//#given - team mode disabled
const commands = loadBuiltinCommands(undefined, { teamModeEnabled: false })
//#when / #then
expect(commands["remove-ai-slops"].template).not.toContain("slop-squad")
expect(commands["remove-ai-slops"].template).not.toContain("Team Mode Protocol")
})
test("should include team mode addendum when teamModeEnabled is true", () => {
//#given - team mode enabled
const commands = loadBuiltinCommands(undefined, { teamModeEnabled: true })
//#when / #then
expect(commands["remove-ai-slops"].template).toContain("slop-squad")
expect(commands["remove-ai-slops"].template).toContain("Team Mode Protocol")
})
test("should default to team mode disabled when option is omitted", () => {
//#given - no options passed at all
const commands = loadBuiltinCommands()
//#when / #then
expect(commands["remove-ai-slops"].template).not.toContain("slop-squad")
})
})
describe("REFACTOR_TEMPLATE", () => {
test("should not contain team mode content in the base template", () => {
//#given - the base template string, which is used when team mode is disabled
//#when / #then
expect(REFACTOR_TEMPLATE).not.toContain("refactor-squad")
expect(REFACTOR_TEMPLATE).not.toContain("team_create")
expect(REFACTOR_TEMPLATE).not.toContain("Team Mode Protocol")
})
})
describe("REFACTOR_TEAM_MODE_ADDENDUM", () => {
test("should define the refactor-squad team spec and lifecycle", () => {
//#given - the team mode addendum, injected only when team mode is enabled
//#when / #then
expect(REFACTOR_TEAM_MODE_ADDENDUM).toContain("refactor-squad")
expect(REFACTOR_TEAM_MODE_ADDENDUM).toContain("team_create")
expect(REFACTOR_TEAM_MODE_ADDENDUM).toContain("team_task_create")
expect(REFACTOR_TEAM_MODE_ADDENDUM).toContain("team_delete")
})
test("should require team staffing recommendation as part of the plan", () => {
//#given - plan agent must output a staffing roster so Phase 5 can dispatch
//#when / #then
expect(REFACTOR_TEAM_MODE_ADDENDUM).toContain("Team Staffing Recommendation")
expect(REFACTOR_TEAM_MODE_ADDENDUM).toContain("dispatch_path_recommendation")
})
test("should route verification to external deep task instead of a team member", () => {
//#given - verifier runs outside the team because category routing downcasts to sisyphus-junior
//#when / #then
expect(REFACTOR_TEAM_MODE_ADDENDUM).toContain('category="deep"')
})
test("should teach valid lead messaging examples", () => {
//#given - the team mode addendum, injected only when team mode is enabled
//#when / #then
expect(REFACTOR_TEAM_MODE_ADDENDUM).toContain('to="lead"')
expect(REFACTOR_TEAM_MODE_ADDENDUM).toContain("teamRunId=<id>")
expect(REFACTOR_TEAM_MODE_ADDENDUM).not.toContain("to=sisyphus")
})
})
describe("loadBuiltinCommands - team mode gating for refactor", () => {
test("should exclude team mode addendum when teamModeEnabled is false", () => {
//#given - team mode disabled
const commands = loadBuiltinCommands(undefined, { teamModeEnabled: false })
//#when / #then
expect(commands.refactor.template).not.toContain("refactor-squad")
expect(commands.refactor.template).not.toContain("Team Mode Protocol")
})
test("should include team mode addendum when teamModeEnabled is true", () => {
//#given - team mode enabled
const commands = loadBuiltinCommands(undefined, { teamModeEnabled: true })
//#when / #then
expect(commands.refactor.template).toContain("refactor-squad")
expect(commands.refactor.template).toContain("Team Mode Protocol")
})
})
describe("HANDOFF_TEMPLATE", () => {

View File

@@ -4,13 +4,14 @@ import type { BuiltinCommandName, BuiltinCommands } from "./types"
import { INIT_DEEP_TEMPLATE } from "./templates/init-deep"
import { RALPH_LOOP_TEMPLATE, ULW_LOOP_TEMPLATE, CANCEL_RALPH_TEMPLATE } from "./templates/ralph-loop"
import { STOP_CONTINUATION_TEMPLATE } from "./templates/stop-continuation"
import { REFACTOR_TEMPLATE } from "./templates/refactor"
import { REFACTOR_TEMPLATE, REFACTOR_TEAM_MODE_ADDENDUM } from "./templates/refactor"
import { START_WORK_TEMPLATE } from "./templates/start-work"
import { HANDOFF_TEMPLATE } from "./templates/handoff"
import { REMOVE_AI_SLOPS_TEMPLATE } from "./templates/remove-ai-slops"
import { REMOVE_AI_SLOPS_TEMPLATE, REMOVE_AI_SLOPS_TEAM_MODE_ADDENDUM } from "./templates/remove-ai-slops"
interface LoadBuiltinCommandsOptions {
useRegisteredAgents?: boolean
teamModeEnabled?: boolean
}
function resolveStartWorkAgent(options?: LoadBuiltinCommandsOptions): "atlas" | "sisyphus" {
@@ -21,9 +22,21 @@ function resolveStartWorkAgent(options?: LoadBuiltinCommandsOptions): "atlas" |
return "atlas"
}
function withTeamModeAddendum(baseTemplate: string, addendum: string, teamModeEnabled: boolean): string {
return teamModeEnabled ? `${baseTemplate}\n${addendum}` : baseTemplate
}
function createBuiltinCommandDefinitions(
options?: LoadBuiltinCommandsOptions,
): Record<BuiltinCommandName, Omit<CommandDefinition, "name">> {
const teamModeEnabled = options?.teamModeEnabled ?? false
const refactorContent = withTeamModeAddendum(REFACTOR_TEMPLATE, REFACTOR_TEAM_MODE_ADDENDUM, teamModeEnabled)
const removeAiSlopsContent = withTeamModeAddendum(
REMOVE_AI_SLOPS_TEMPLATE,
REMOVE_AI_SLOPS_TEAM_MODE_ADDENDUM,
teamModeEnabled,
)
return {
"init-deep": {
description: "(builtin) Initialize hierarchical AGENTS.md knowledge base",
@@ -68,7 +81,7 @@ ${CANCEL_RALPH_TEMPLATE}
description:
"(builtin) Intelligent refactoring command with LSP, AST-grep, architecture analysis, codemap, and TDD verification.",
template: `<command-instruction>
${REFACTOR_TEMPLATE}
${refactorContent}
</command-instruction>`,
argumentHint: "<refactoring-target> [--scope=<file|module|project>] [--strategy=<safe|aggressive>]",
},
@@ -98,7 +111,7 @@ ${STOP_CONTINUATION_TEMPLATE}
"remove-ai-slops": {
description: "(builtin) Remove AI-generated code smells from branch changes and critically review the results",
template: `<command-instruction>
${REMOVE_AI_SLOPS_TEMPLATE}
${removeAiSlopsContent}
</command-instruction>
<user-request>

View File

@@ -617,3 +617,142 @@ When you encounter deprecated methods/APIs during refactoring:
$ARGUMENTS
</user-request>
`
export const REFACTOR_TEAM_MODE_ADDENDUM = `
---
# Team Mode Protocol (active when team_* tools are present)
Team mode is enabled for this session. The rules below **override Phase 4-6** above. Follow this protocol instead of the in-session step-by-step execution.
## Phase 4 override: Plan agent staffing requirement
When invoking the Plan agent in Phase 4.1, append this additional requirement to the prompt:
\`\`\`
7. (REQUIRED when team mode is active) Output a Team Staffing Recommendation section with these fields — missing fields fail Phase 5.0:
- total_atomic_steps: integer
- file_independent_steps: integer (parallelizable, no cross-file blocker)
- cross_file_dependent_steps: integer (has blockers)
- per_step_assignment: [{step_id, assigned_to: 'quick' | 'unspecified-low', blockedBy: [step_ids], rationale}]
- dispatch_path_recommendation: 'team' | 'legacy' with reason
- rationale for the composition
\`\`\`
**Classification rules** the plan agent must apply to each step:
- \`quick\`: mechanical edits — LSP rename, extract variable, inline, simple move, signature change without call-site logic.
- \`unspecified-low\`: logic-preserving refactors that need reasoning — extract function, restructure conditional, pattern transformation, cross-file API change.
- Recommend \`team\` path when \`file_independent_steps >= 3\`; recommend \`legacy\` otherwise.
## Phase 5 override: Dispatch path selection
Read the Team Staffing Recommendation from Phase 4. If any required field is missing, fail here and re-request the plan with the exact missing field names. Do not proceed with a partial plan.
Then choose the path:
- **Team path (5.1-T)**: when the plan recommends \`team\` AND \`file_independent_steps >= 3\`. Members execute in parallel, Lead orchestrates, a \`deep\` verifier lives outside the team.
- **Legacy path (5.1-L)**: otherwise. Use the original 5.1 / 5.2 / 5.3 flow from above.
Record the chosen path in the TodoWrite list.
## Phase 5.1-T: \`refactor-squad\` team execution
**Precondition checks** (fail hard if any step fails):
1. Load the \`team-mode\` skill via the \`skill\` tool for lifecycle, message protocol, and limits.
2. Call \`team_list\` and verify no active \`refactor-squad\` run exists; if one does, shutdown + delete the orphan before proceeding.
3. If \`~/.omo/teams/refactor-squad/config.json\` is missing, write it using the spec below.
**Team spec** (\`~/.omo/teams/refactor-squad/config.json\`):
\`\`\`json
{
"name": "refactor-squad",
"lead": { "kind": "subagent_type", "subagent_type": "sisyphus" },
"members": [
{
"kind": "category",
"category": "quick",
"prompt": "You handle mechanical refactoring steps (LSP rename, extract variable, inline, simple move, signature change). Use LSP tools for correctness. Apply the task description's per-step instructions verbatim — no scope expansion. After edits, run lsp_diagnostics on touched files. Report via team_send_message(teamRunId=<id>, to=\"lead\", summary=<files touched>, body=<lsp status + diff summary>) + team_task_update(status=completed). Never run tests — the external verifier handles that. Never git add, never --continue."
},
{ "kind": "category", "category": "quick", "prompt": "Same contract as peer quick worker." },
{
"kind": "category",
"category": "unspecified-low",
"prompt": "You handle logic-preserving refactors that need reasoning (extract function, restructure conditional, pattern transformation, cross-file API change). Read the task description's plan step carefully. Use ast_grep_replace with dryRun=true first, review the preview, then execute. If the step is ambiguous or would require out-of-scope changes, STOP and send team_send_message(teamRunId=<id>, to=\"lead\", summary=\"UNCLEAR\", body=<reason>) + team_task_update(status=pending). Same reporting contract as peer quick workers. Never run tests."
},
{ "kind": "category", "category": "unspecified-low", "prompt": "Same contract as peer unspecified-low worker." }
]
}
\`\`\`
Rationale for this composition:
- **4 workers = team mode's parallel cap.** 5+ just queues.
- **No verifier team member.** Verification needs \`deep\` reasoning (or \`unspecified-high\` fallback). In-team category routing downcasts to sisyphus-junior, which is weaker than required — the verifier runs OUTSIDE the team as a \`task(category="deep")\`.
- **quick × 2** for mechanical edits, **unspecified-low × 2** for reasoning edits — mirrors the plan's split.
**Team lifecycle** (one team, reused until Phase 6 cleanup):
1. \`team_create(teamName="refactor-squad")\`. Record \`teamRunId\`.
2. Broadcast the refactor Intent Card ONCE (keep task descriptions slim):
\`\`\`
team_send_message(
teamRunId=<id>, to="*", kind="announcement",
summary="refactor-intent",
body=<codemap summary + constraints + established patterns from Phase 2>
)
\`\`\`
3. Broadcast the verification spec ONCE:
\`\`\`
team_send_message(
teamRunId=<id>, to="*", kind="announcement",
summary="verify-spec",
body=<exact test/typecheck/lint commands + expected pass counts + regression indicators from Phase 3.4>
)
\`\`\`
4. For each plan step, \`team_task_create(teamRunId=<id>, subject="refactor step <N>: <short>", description=<per-step instructions from plan, including target files and line ranges, rollback strategy>, blockedBy=<from plan's per_step_assignment>)\`.
**Lead monitoring loop**:
While any team task is \`pending | claimed | in_progress\`:
- Wait for \`<system-reminder>\` or member messages. Avoid tight polling; a single \`team_status\` check is acceptable if no notification arrives within roughly 10 seconds of expected completion.
- On a worker completion report, immediately dispatch an **external verifier** — verification runs OUTSIDE the team because team-member category routing downcasts to sisyphus-junior:
\`\`\`
task(
category="deep",
load_skills=[],
run_in_background=true,
description="verify step <N>",
prompt=<files touched + verify-spec commands + instruction to return "PASS" or "FAIL:<failing test + specific error + suggested revert hunks>">
)
\`\`\`
If \`deep\` is unavailable, fall back to \`category="unspecified-high"\`. Do not create a commit checkpoint until the verifier returns PASS.
- On a verifier PASS: make the commit checkpoint for that step (see original 5.3). Proceed.
- On a verifier FAIL: Lead decides:
- **Retry with fix hint**: \`team_task_update(status=pending)\` on the original step + \`team_send_message(teamRunId=<id>, to=<original member>, summary="retry", body=<specific failure from verifier>)\`. Runtime reassigns.
- **Escalate**: after three FAIL cycles on the same step, STOP and consult the user with full evidence.
- On a member UNCLEAR message: re-harvest context via a targeted \`task()\` outside the team, broadcast an updated Intent Card fragment, then reassign.
Proceed to Phase 6 only when every team task is \`completed\` AND every paired verifier task returned PASS.
## Phase 6 override: Team cleanup before summary
If Phase 5 used the team path, dismantle \`refactor-squad\` BEFORE producing the 6.6 summary. Every exit path — success, escalation, abort — must cleanup; orphan teams poison the next session's precondition check.
1. \`team_shutdown_request\` for each member, then \`team_approve_shutdown\` if members do not self-approve within a reasonable window.
2. \`team_delete(teamRunId=<id>)\`.
3. \`team_list\` to confirm no residual \`refactor-squad\` run.
The \`~/.omo/teams/refactor-squad/config.json\` declaration stays on disk; next session reuses it.
Append to the 6.6 summary a "Dispatch path" line and, when team path was used, team metrics (teamRunId, tasks created, verifier runs, team lifetime).
## MUST NOT (team mode)
- Lead never edits files directly — orchestrate only.
- Do not inline the Intent Card or verify-spec into task descriptions — rely on the broadcasts.
- Do not recreate the team mid-session.
- Do not run tests from Lead — the external verifier owns that lane.
- Do not put \`oracle\` / \`librarian\` / \`deep\` into the team spec — oracle/librarian are team-ineligible, and \`deep\` under category routing downcasts to sisyphus-junior. Use them via \`task()\` outside the team when needed.
`

View File

@@ -94,3 +94,105 @@ If any issues are found during critical review:
- ALWAYS verify changes compile/parse correctly
- ALWAYS preserve test coverage
- If uncertain about a change, err on the side of keeping the original code`
export const REMOVE_AI_SLOPS_TEAM_MODE_ADDENDUM = `
---
# Team Mode Protocol (active when team_* tools are present)
Team mode is enabled for this session. The rules below **override Phase 2-4** of the legacy flow above. Follow this protocol instead of the per-file fire-and-forget \`task()\` dispatch.
## Phase 2 (team): \`slop-squad\` setup
**Precondition checks** (fail hard if any step fails):
1. Load the \`team-mode\` skill via the \`skill\` tool for lifecycle, message protocol, broadcast rules, 32KB message cap, and 4 parallel worker cap.
2. Call \`team_list\` and verify no active run named \`slop-squad\` exists. If one does, it is an orphan from a crashed prior session — \`team_shutdown_request\` + \`team_approve_shutdown\` + \`team_delete\` it before proceeding. Do not rename the team or run concurrent sessions under the same name.
3. If \`~/.omo/teams/slop-squad/config.json\` is missing, write it using the spec below.
**Team spec** (\`~/.omo/teams/slop-squad/config.json\`):
\`\`\`json
{
"name": "slop-squad",
"lead": { "kind": "subagent_type", "subagent_type": "sisyphus" },
"members": [
{
"kind": "category",
"category": "quick",
"prompt": "You run ai-slop-remover on ONE file per task. Load ai-slop-remover via the skill tool. Read the task description for the file path. Apply the skill's detection criteria verbatim. After edits: run lsp_diagnostics on the file. Report via team_send_message(teamRunId=<id>, to=\"lead\", summary=<change count>, body=<full ai-slop-remover report>) + team_task_update(status=completed). On ambiguity: send team_send_message(teamRunId=<id>, to=\"lead\", summary=\"UNCLEAR\", body=<reason>) + team_task_update(status=pending). Never git add, never run tests, never touch other files."
},
{ "kind": "category", "category": "quick", "prompt": "Same contract as peer quick worker." },
{ "kind": "category", "category": "quick", "prompt": "Same contract as peer quick worker." },
{
"kind": "category",
"category": "unspecified-low",
"prompt": "You are the FIX worker. You claim rework tasks that the lead creates after the external reviewer flags issues. Read the reviewer's per-hunk rollback instructions in the task description, apply the reverse patch, then run ai-slop-remover ONLY on the non-rolled-back remainder. Same reporting contract as quick peers. Handle UNCLEAR escalations the same way."
}
]
}
\`\`\`
Rationale for this composition:
- **4 workers = team mode's parallel cap.** A fifth member just queues.
- **Reviewer is NOT a team member** — review demands stronger reasoning than category routing provides (team category members are downcast to sisyphus-junior). The reviewer runs OUTSIDE the team as a \`deep\` task; see Phase 3.
- **quick × 3** absorbs the mass of per-file slop removal. **unspecified-low × 1** is the rework lane for fixes triggered by reviewer findings.
**Team lifecycle** (create once, reuse until Phase 5 cleanup):
1. \`team_create(teamName="slop-squad")\`. Record \`teamRunId\` — every subsequent team call needs it.
2. Broadcast the detection criteria ONCE so each task description stays minimal:
\`\`\`
team_send_message(
teamRunId=<id>, to="*", kind="announcement",
summary="slop-criteria",
body=<the 9 slop categories + KEEP rules; reference the ai-slop-remover skill content>
)
\`\`\`
3. Before spawning tasks, save a per-file rollback artifact that captures only the delta the slop-removal pass will introduce. Do NOT use \`git checkout -- <file>\` — that would discard pre-existing branch changes.
4. For each changed file, \`team_task_create(teamRunId=<id>, subject="slop: <file>", description=<file path + rollback artifact path + reporting format>, blockedBy=[])\`.
## Phase 3 (team): Incremental reviewer dispatch
While any team task is \`pending | claimed | in_progress\`:
- Wait for \`<system-reminder>\` or member messages. Do NOT tight-poll \`team_status\`; the runtime notifies on state changes. A single \`team_status\` check is acceptable if no notification arrives within roughly 10 seconds of expected completion.
- On each worker completion report:
- Log the report to the pending final summary (no blocking).
- Immediately dispatch an **external reviewer** — review runs OUTSIDE the team because team-member category routing downcasts to sisyphus-junior:
\`\`\`
task(
category="deep",
load_skills=[],
run_in_background=true,
description="slop review: <file>",
prompt=<file path + full worker report + Safety/Behavior/Quality checklist + instruction to output "PASS" or "FAIL:<per-hunk rollback instructions>">
)
\`\`\`
If \`deep\` is unavailable in this session, fall back to \`category="unspecified-high"\`.
- On a reviewer task returning FAIL:
- Create a rework team task: \`team_task_create(subject="rework: <file>", description=<reverse-patch hunks from reviewer + "then run ai-slop-remover on remaining non-rolled-back issues only">)\`. The \`unspecified-low\` fix member claims it.
- Create a new reviewer task paired to the rework completion (same incremental pattern).
- Loop until every file has a PASS from the reviewer AND no team task is outstanding.
## Phase 4 (team): Fix issues
Fixes happen incrementally during Phase 3's loop via rework tasks — this phase is already handled when the loop exits. Any remaining manual fix that neither worker nor fix member could resolve is handled by Lead here, editing files directly.
## Phase 5 (team): Team cleanup
Before producing the summary report, dismantle the team on EVERY exit path — success, escalation, abort — otherwise the next session's Phase 2 precondition check catches the orphan.
1. \`team_shutdown_request\` for each member, then \`team_approve_shutdown\` if members do not self-approve within a reasonable window.
2. \`team_delete(teamRunId=<id>)\`.
3. \`team_list\` to confirm no residual \`slop-squad\` run.
The \`~/.omo/teams/slop-squad/config.json\` declaration file stays on disk; it is reused next session.
## MUST NOT (team mode)
- Lead never edits files directly — orchestrate only. If editing is needed, it goes into a team task.
- Do not inline the full slop-criteria into every task description; rely on the Phase 2 broadcast.
- Do not call \`team_create\` again mid-session. One team per resolution.
- Do not put \`oracle\` / \`librarian\` into the team spec — they are team-ineligible; call them via \`task()\` outside the team when needed.
`