fix: align persistent stop hook and tighten agent output contracts (#2653)

Co-authored-by: Codex Review <codex-review@example.com>
2026-04-20 21:00:50 +08:00 · 2026-04-16 10:23:07 +09:00
parent 4851e81450
commit 15799479ec
10 changed files with 61 additions and 1 deletions
--- a/hooks/hooks.json
+++ b/hooks/hooks.json
@@ -180,7 +180,7 @@
          },
          {
            "type": "command",
-            "command": "node \"$CLAUDE_PLUGIN_ROOT\"/scripts/run.cjs \"$CLAUDE_PLUGIN_ROOT\"/scripts/persistent-mode.cjs",
+            "command": "node \"$CLAUDE_PLUGIN_ROOT\"/scripts/run.cjs \"$CLAUDE_PLUGIN_ROOT\"/scripts/persistent-mode.mjs",
            "timeout": 10
          },
          {
--- a/src/tests/issue-2652-runtime-wiring-and-output-contract.test.ts
+++ b/src/tests/issue-2652-runtime-wiring-and-output-contract.test.ts
@@ -0,0 +1,27 @@
+import { describe, expect, it } from 'vitest';
+import { readFileSync } from 'fs';
+import { join } from 'path';
+import { ULTRAWORK_MESSAGE } from '../installer/hooks.js';
+
+describe('issue #2652 runtime wiring and output contract', () => {
+  it('ships the Stop hook through persistent-mode.mjs', () => {
+    const hooksJsonPath = join(process.cwd(), 'hooks', 'hooks.json');
+    const hooks = JSON.parse(readFileSync(hooksJsonPath, 'utf-8')) as {
+      hooks?: Record<string, Array<{ hooks?: Array<{ command?: string }> }>>;
+    };
+
+    const stopCommands = (hooks.hooks?.Stop ?? [])
+      .flatMap((entry) => entry.hooks ?? [])
+      .map((hook) => hook.command ?? '');
+
+    expect(stopCommands.some((command) => command.includes('/scripts/persistent-mode.mjs'))).toBe(true);
+    expect(stopCommands.some((command) => command.includes('/scripts/persistent-mode.cjs'))).toBe(false);
+  });
+
+  it('ultrawork mode instructs spawned agents to keep outputs concise', () => {
+    expect(ULTRAWORK_MESSAGE).toContain('CONCISE OUTPUTS');
+    expect(ULTRAWORK_MESSAGE).toContain('under 100 words');
+    expect(ULTRAWORK_MESSAGE).toContain('files touched');
+    expect(ULTRAWORK_MESSAGE).toContain('verification status');
+  });
+});
--- a/src/tests/ralph-prd-mandatory.test.ts
+++ b/src/tests/ralph-prd-mandatory.test.ts
@@ -320,6 +320,7 @@ describe('Ralph PRD-Mandatory', () => {
      expect(prompt).toContain('Are ALL requirements from the original task met?');
      expect(prompt).toContain('Is the implementation complete, not partial?');
      expect(prompt).not.toContain('Verify EACH acceptance criterion');
+      expect(prompt).toContain('concise review summary under 100 words');
    });

    it('should fall back to generic prompt when story is undefined', () => {
--- a/src/hooks/autopilot/tests/pipeline.test.ts
+++ b/src/hooks/autopilot/tests/pipeline.test.ts
@@ -133,6 +133,7 @@ describe('Stage Adapters', () => {
      expect(prompt).toContain('Team Mode');
      expect(prompt).toContain('TeamCreate');
      expect(prompt).toContain(EXECUTION_COMPLETION_SIGNAL);
+      expect(prompt).toContain('short execution summary under 100 words');
    });

    it('should generate solo prompt for solo mode', () => {
@@ -143,6 +144,7 @@ describe('Stage Adapters', () => {
      });
      expect(prompt).toContain('Solo Mode');
      expect(prompt).toContain(EXECUTION_COMPLETION_SIGNAL);
+      expect(prompt).toContain('short execution summary under 100 words');
    });
  });

@@ -166,6 +168,7 @@ describe('Stage Adapters', () => {
      });
      expect(prompt).toContain('50');
      expect(prompt).toContain(RALPH_COMPLETION_SIGNAL);
+      expect(prompt).toContain('concise review summary under 100 words');
    });
  });

--- a/src/hooks/autopilot/tests/prompts.test.ts
+++ b/src/hooks/autopilot/tests/prompts.test.ts
@@ -73,6 +73,13 @@ describe("Prompt Generation", () => {
      expect(prompt).toContain("Ralph");
      expect(prompt).toContain("Ultrawork");
    });
+
+    it("should require concise executor summaries", () => {
+      const prompt = getExecutionPrompt("plan.md");
+      expect(prompt).toContain("concise execution summary under 100 words");
+      expect(prompt).toContain("files touched");
+      expect(prompt).toContain("verification status");
+    });
  });

  describe("getQAPrompt", () => {
@@ -96,6 +103,13 @@ describe("Prompt Generation", () => {
      expect(prompt).toContain("Security");
      expect(prompt).toContain("Quality");
    });
+
+    it("should require concise reviewer summaries", () => {
+      const prompt = getValidationPrompt("spec.md");
+      expect(prompt).toContain("concise review summary under 100 words");
+      expect(prompt).toContain("evidence highlights");
+      expect(prompt).toContain("files checked");
+    });
  });

  describe("getPhasePrompt", () => {
--- a/src/hooks/autopilot/adapters/execution-adapter.ts
+++ b/src/hooks/autopilot/adapters/execution-adapter.ts
@@ -49,6 +49,10 @@ Use the Team orchestrator to execute tasks in parallel:
 4. **Monitor progress** as teammates complete tasks
 5. **Coordinate** dependencies between tasks

+### Output Contract
+
+Every teammate response must stay concise: return ONLY a short execution summary under 100 words covering what changed, files touched, verification status, and blockers. Store bulky logs/details in files or artifacts and reference them briefly.
+
 ### Agent Selection

 Match agent types to task complexity:
@@ -92,6 +96,10 @@ Execute tasks sequentially (or with limited parallelism via background agents):
 3. Use executor agents for independent tasks that can run in parallel
 4. Track progress in the TODO list

+### Output Contract
+
+Every spawned executor response must return ONLY a short execution summary under 100 words covering what changed, files touched, verification status, and blockers. Store bulky logs/details in files or artifacts and reference them briefly.
+
 ### Agent Spawning

 \`\`\`
--- a/src/hooks/autopilot/adapters/ralph-adapter.ts
+++ b/src/hooks/autopilot/adapters/ralph-adapter.ts
@@ -39,6 +39,8 @@ Verify the implementation against the specification using the Ralph verification

 Spawn parallel verification reviewers:

+Each reviewer must return ONLY a concise review summary under 100 words covering verdict, evidence highlights, files checked, and blockers. Avoid dumping long logs or transcripts into the main session.
+
 \`\`\`
 // Functional Completeness Review
 Task(
--- a/src/hooks/autopilot/prompts.ts
+++ b/src/hooks/autopilot/prompts.ts
@@ -214,6 +214,7 @@ Ralph and Ultrawork are now active. Execute tasks in parallel where possible.
 - Spawn multiple executor agents for parallel work
 - Track progress in the TODO list
 - Use appropriate agent tiers based on task complexity
+- Every spawned agent must return ONLY a concise execution summary under 100 words covering: what changed, files touched, verification status, and blockers. Do not paste long logs inline; write bulky output to files/artifacts and reference them briefly.

 ### Agent Spawning Pattern

@@ -316,6 +317,8 @@ Spawn parallel validation architects for comprehensive review.

 Spawn all three architects in parallel:

+Each reviewer must return ONLY a concise review summary under 100 words with verdict, evidence highlights, files checked, and blockers. Do not paste long transcripts or logs into the main session.
+
 \`\`\`
 // Functional Completeness Review
 Task(
--- a/src/hooks/ralph/verifier.ts
+++ b/src/hooks/ralph/verifier.ts
@@ -272,6 +272,7 @@ ${getVerificationAgentStep(state.critic_mode)}
   - Are there any obvious bugs or issues?
   - Does the code compile/run without errors?
   - Are tests passing (if applicable)?
+   - Return ONLY a concise review summary under 100 words with verdict, evidence highlights, files checked, and blockers. Do not paste long logs inline.

 3. **Based on ${criticLabel}'s response:**
   - If APPROVED: Output the exact correlated approval tag \`${approvalTag}\`, then run \`/oh-my-claudecode:cancel\` to cleanly exit
--- a/src/installer/hooks.ts
+++ b/src/installer/hooks.ts
@@ -135,6 +135,7 @@ TELL THE USER WHAT AGENTS YOU WILL LEVERAGE NOW TO SATISFY USER'S REQUEST.
 - **TODO**: Track EVERY step. Mark complete IMMEDIATELY after each.
 - **PARALLEL**: Fire independent agent calls simultaneously via Task(run_in_background=true) - NEVER wait sequentially.
 - **BACKGROUND FIRST**: Use Task tool for exploration/document-specialist agents (10+ concurrent if needed).
+- **CONCISE OUTPUTS**: Every Task/Agent result must return ONLY a short execution summary (target: under 100 words) covering what changed, files touched, verification status, and blockers. Do not paste long logs into the main session; put bulky details in files/artifacts and reference them briefly.
 - **VERIFY**: Re-read request after completion. Check ALL requirements met before reporting done.
 - **DELEGATE**: Don't do everything yourself - orchestrate specialized agents for their strengths.