fix(agents): deny apply_patch for GPT models to prevent verification hangs (#2935)

GPT models (5.3-codex, 5.4, etc.) frequently hang when using apply_patch due to verification loops. This adds: 1. Tool restriction: apply_patch is denied for GPT variants of Hephaestus, Sisyphus-Junior, and Sisyphus agents 2. Prompt guidance: GPT-specific prompts now explicitly instruct using edit/write tools instead of apply_patch 3. Removed the 'Always use apply_patch' instruction from sisyphus-junior/gpt-5-4.ts that contradicted the fix The deny is model-conditional — Claude variants retain apply_patch access since it works reliably there.
2026-05-01 03:59:23 +08:00 · 2026-04-07 11:28:48 +09:00
parent 12106ffccc
commit 1140080927
14 changed files with 100 additions and 6 deletions
--- a/src/agents/delegation-trust-prompt.test.ts
+++ b/src/agents/delegation-trust-prompt.test.ts
@@ -114,6 +114,8 @@ describe("delegation trust prompt rules", () => {
    expect(prompt).toContain("do only non-overlapping work simultaneously")
    expect(prompt).toContain("Continue only with non-overlapping work")
    expect(prompt).toContain("DO NOT perform the same search yourself")
+    expect(prompt).toContain("Do not use `apply_patch`")
+    expect(prompt).toContain("`edit` and `write`")
  })

  test("Sisyphus-Junior GPT-5.4 prompt forbids duplicate delegated exploration", () => {
--- a/src/agents/hephaestus/agent.test.ts
+++ b/src/agents/hephaestus/agent.test.ts
@@ -192,6 +192,8 @@ describe("createHephaestusAgent", () => {
    expect(config.prompt).toContain("You build context by examining");
    expect(config.prompt).toContain("Never chain together bash commands");
    expect(config.prompt).toContain("<tool_usage_rules>");
+    expect(config.prompt).toContain("Do not use `apply_patch`");
+    expect(config.prompt).toContain("`edit` and `write`");
  });

  test("GPT 5.3-codex model includes GPT-5.3 specific prompt content", () => {
@@ -205,6 +207,8 @@ describe("createHephaestusAgent", () => {
    expect(config.prompt).toContain("Senior Staff Engineer");
    expect(config.prompt).toContain("Hard Constraints");
    expect(config.prompt).toContain("<tool_usage_rules>");
+    expect(config.prompt).toContain("Do not use `apply_patch`");
+    expect(config.prompt).toContain("`edit` and `write`");
  });

  test("includes Hephaestus identity in prompt", () => {
@@ -219,6 +223,35 @@ describe("createHephaestusAgent", () => {
    expect(config.prompt).toContain("autonomous deep worker");
  });

+  test("generic GPT model includes apply_patch workaround guidance", () => {
+    // given
+    const model = "openai/gpt-4o";
+
+    // when
+    const config = createHephaestusAgent(model);
+
+    // then
+    expect(config.prompt).toContain("Do not use `apply_patch`");
+    expect(config.prompt).toContain("`edit` and `write`");
+  });
+
+  test("GPT models deny apply_patch while non-GPT models do not", () => {
+    // given
+    const gpt54Model = "openai/gpt-5.4";
+    const gptGenericModel = "openai/gpt-4o";
+    const claudeModel = "anthropic/claude-opus-4-6";
+
+    // when
+    const gpt54Config = createHephaestusAgent(gpt54Model);
+    const gptGenericConfig = createHephaestusAgent(gptGenericModel);
+    const claudeConfig = createHephaestusAgent(claudeModel);
+
+    // then
+    expect(gpt54Config.permission ?? {}).toHaveProperty("apply_patch", "deny");
+    expect(gptGenericConfig.permission ?? {}).toHaveProperty("apply_patch", "deny");
+    expect(claudeConfig.permission ?? {}).not.toHaveProperty("apply_patch");
+  });
+
  test("useTaskSystem=true produces Task Discipline prompt", () => {
    // given
    const model = "openai/gpt-5.4";
--- a/src/agents/hephaestus/agent.ts
+++ b/src/agents/hephaestus/agent.ts
@@ -1,6 +1,6 @@
 import type { AgentConfig } from "@opencode-ai/sdk";
 import type { AgentMode, AgentPromptMetadata } from "../types";
-import { isGpt5_4Model, isGpt5_3CodexModel } from "../types";
+import { isGptModel, isGpt5_4Model, isGpt5_3CodexModel } from "../types";
 import type {
  AvailableAgent,
  AvailableTool,
@@ -120,6 +120,7 @@ export function createHephaestusAgent(
    permission: {
      question: "allow",
      call_omo_agent: "deny",
+      ...(isGptModel(model) ? { apply_patch: "deny" as const } : {}),
    } as AgentConfig["permission"],
    reasoningEffort: "medium",
  };
--- a/src/agents/hephaestus/gpt-5-3-codex.ts
+++ b/src/agents/hephaestus/gpt-5-3-codex.ts
@@ -448,6 +448,7 @@ ${oracleSection}
 1. SEARCH existing codebase for similar patterns/styles
 2. Match naming, indentation, import styles, error handling conventions
 3. Default to ASCII. Add comments only for non-obvious blocks
+4. Use the \`edit\` and \`write\` tools for file changes. Do not use \`apply_patch\` on GPT models - it is unreliable here and can hang during verification.

 ### After Implementation (MANDATORY - DO NOT SKIP)

--- a/src/agents/hephaestus/gpt-5-4.ts
+++ b/src/agents/hephaestus/gpt-5-4.ts
@@ -252,7 +252,7 @@ ${antiPatterns}
 1. **Explore**: Fire 2-5 explore/librarian agents in parallel + direct tool reads. Goal: complete understanding, not just enough context.
 2. **Plan**: List files to modify, specific changes, dependencies, complexity estimate.
 3. **Decide**: Trivial (<10 lines, single file) -> self. Complex (multi-file, >100 lines) -> delegate.
-4. **Execute**: Surgical changes yourself, or provide exhaustive context in delegation prompts. Match existing patterns. Minimal diff. Search the codebase for similar patterns before writing code. Default to ASCII. Add comments only for non-obvious blocks.
+4. **Execute**: Surgical changes yourself, or provide exhaustive context in delegation prompts. Match existing patterns. Minimal diff. Search the codebase for similar patterns before writing code. Default to ASCII. Add comments only for non-obvious blocks. Use the \`edit\` and \`write\` tools for file changes. Do not use \`apply_patch\` on GPT models - it is unreliable here and can hang during verification.
 5. **Verify**: \`lsp_diagnostics\` on all modified files (zero errors) -> run related tests (\`foo.ts\` -> \`foo.test.ts\`) -> typecheck -> build if applicable (exit 0). Fix only issues your changes caused.

 If verification fails, return to step 1 with a materially different approach. After three attempts: stop, revert to last working state, document what you tried, consult Oracle. If Oracle cannot resolve, ask the user.
--- a/src/agents/hephaestus/gpt.ts
+++ b/src/agents/hephaestus/gpt.ts
@@ -311,6 +311,7 @@ ${oracleSection}
 1. SEARCH existing codebase for similar patterns/styles
 2. Match naming, indentation, import styles, error handling conventions
 3. Default to ASCII. Add comments only for non-obvious blocks
+4. Use the \`edit\` and \`write\` tools for file changes. Do not use \`apply_patch\` on GPT models - it is unreliable here and can hang during verification.

 ### After Implementation (MANDATORY - DO NOT SKIP)

--- a/src/agents/sisyphus-junior/agent.ts
+++ b/src/agents/sisyphus-junior/agent.ts
@@ -30,6 +30,7 @@ const MODE: AgentMode = "subagent"
 // Core tools that Sisyphus-Junior must NEVER have access to
 // Note: call_omo_agent is ALLOWED so subagents can spawn explore/librarian
 const BLOCKED_TOOLS = ["task"]
+const GPT_BLOCKED_TOOLS = ["task", "apply_patch"]

 export const SISYPHUS_JUNIOR_DEFAULTS = {
  model: "anthropic/claude-sonnet-4-6",
@@ -91,13 +92,14 @@ export function createSisyphusJuniorAgentWithOverrides(

  const promptAppend = override?.prompt_append
  const prompt = buildSisyphusJuniorPrompt(model, useTaskSystem, promptAppend)
+  const blockedTools = isGptModel(model) ? GPT_BLOCKED_TOOLS : BLOCKED_TOOLS

-  const baseRestrictions = createAgentToolRestrictions(BLOCKED_TOOLS)
+  const baseRestrictions = createAgentToolRestrictions(blockedTools)

  const userPermission = (override?.permission ?? {}) as Record<string, PermissionValue>
  const basePermission = baseRestrictions.permission
  const merged: Record<string, PermissionValue> = { ...userPermission }
-  for (const tool of BLOCKED_TOOLS) {
+  for (const tool of blockedTools) {
    merged[tool] = "deny"
  }
  merged.call_omo_agent = "allow"
--- a/src/agents/sisyphus-junior/gpt-5-3-codex.ts
+++ b/src/agents/sisyphus-junior/gpt-5-3-codex.ts
@@ -92,6 +92,7 @@ Style:
 1. SEARCH existing codebase for similar patterns/styles
 2. Match naming, indentation, import styles, error handling conventions
 3. Default to ASCII. Add comments only for non-obvious blocks
+4. Use the \`edit\` and \`write\` tools for file changes. Do not use \`apply_patch\` on GPT models - it is unreliable here and can hang during verification.

 ### After Implementation (MANDATORY - DO NOT SKIP)

--- a/src/agents/sisyphus-junior/gpt-5-4.ts
+++ b/src/agents/sisyphus-junior/gpt-5-4.ts
@@ -96,7 +96,7 @@ Style:
 1. SEARCH existing codebase for similar patterns/styles
 2. Match naming, indentation, import styles, error handling conventions
 3. Default to ASCII. Add comments only for non-obvious blocks
-4. Always use apply_patch for manual code edits. Do not use cat or echo for file creation/editing. Formatting commands or bulk edits don't need apply_patch
+4. Use the \`edit\` and \`write\` tools for file changes. Do not use \`apply_patch\` on GPT models - it is unreliable here and can hang during verification.
 5. Do not chain bash commands with separators - each command should be a separate tool call

 ### After Implementation (MANDATORY - DO NOT SKIP)
--- a/src/agents/sisyphus-junior/gpt.ts
+++ b/src/agents/sisyphus-junior/gpt.ts
@@ -93,6 +93,7 @@ Style:
 1. SEARCH existing codebase for similar patterns/styles
 2. Match naming, indentation, import styles, error handling conventions
 3. Default to ASCII. Add comments only for non-obvious blocks
+4. Use the \`edit\` and \`write\` tools for file changes. Do not use \`apply_patch\` on GPT models - it is unreliable here and can hang during verification.

 ### After Implementation (MANDATORY - DO NOT SKIP)

--- a/src/agents/sisyphus-junior/index.test.ts
+++ b/src/agents/sisyphus-junior/index.test.ts
@@ -350,6 +350,8 @@ describe("createSisyphusJuniorAgentWithOverrides", () => {
      expect(result.prompt).toContain("Scope Discipline")
      expect(result.prompt).toContain("<tool_usage_rules>")
      expect(result.prompt).toContain("Progress Updates")
+      expect(result.prompt).toContain("Do not use `apply_patch`")
+      expect(result.prompt).toContain("`edit` and `write`")
    })

    test("GPT 5.4 model uses GPT-5.4 specific prompt", () => {
@@ -362,6 +364,9 @@ describe("createSisyphusJuniorAgentWithOverrides", () => {
      // then
      expect(result.prompt).toContain("expert coding agent")
      expect(result.prompt).toContain("<tool_usage_rules>")
+      expect(result.prompt).toContain("Do not use `apply_patch`")
+      expect(result.prompt).toContain("`edit` and `write`")
+      expect(result.prompt).not.toContain("Always use apply_patch")
    })

    test("GPT 5.3 Codex model uses GPT-5.3-codex specific prompt", () => {
@@ -374,6 +379,28 @@ describe("createSisyphusJuniorAgentWithOverrides", () => {
      // then
      expect(result.prompt).toContain("Senior Engineer")
      expect(result.prompt).toContain("<tool_usage_rules>")
+      expect(result.prompt).toContain("Do not use `apply_patch`")
+      expect(result.prompt).toContain("`edit` and `write`")
+    })
+
+    test("GPT variants deny apply_patch while Claude variants do not", () => {
+      // given
+      const gpt54Override = { model: "openai/gpt-5.4" }
+      const gpt53Override = { model: "openai/gpt-5.3-codex" }
+      const gptGenericOverride = { model: "openai/gpt-4o" }
+      const claudeOverride = { model: "anthropic/claude-sonnet-4-6" }
+
+      // when
+      const gpt54Result = createSisyphusJuniorAgentWithOverrides(gpt54Override)
+      const gpt53Result = createSisyphusJuniorAgentWithOverrides(gpt53Override)
+      const gptGenericResult = createSisyphusJuniorAgentWithOverrides(gptGenericOverride)
+      const claudeResult = createSisyphusJuniorAgentWithOverrides(claudeOverride)
+
+      // then
+      expect(gpt54Result.permission ?? {}).toHaveProperty("apply_patch", "deny")
+      expect(gpt53Result.permission ?? {}).toHaveProperty("apply_patch", "deny")
+      expect(gptGenericResult.permission ?? {}).toHaveProperty("apply_patch", "deny")
+      expect(claudeResult.permission ?? {}).not.toHaveProperty("apply_patch")
    })

    test("prompt_append is added after base prompt", () => {
@@ -494,6 +521,7 @@ describe("buildSisyphusJuniorPrompt", () => {
    expect(prompt).toContain("expert coding agent")
    expect(prompt).toContain("Scope Discipline")
    expect(prompt).toContain("<tool_usage_rules>")
+    expect(prompt).toContain("Do not use `apply_patch`")
  })

  test("GPT 5.3 Codex model uses GPT-5.3-codex prompt", () => {
@@ -507,6 +535,7 @@ describe("buildSisyphusJuniorPrompt", () => {
    expect(prompt).toContain("Senior Engineer")
    expect(prompt).toContain("Scope Discipline")
    expect(prompt).toContain("<tool_usage_rules>")
+    expect(prompt).toContain("Do not use `apply_patch`")
  })

  test("generic GPT model uses generic GPT prompt", () => {
@@ -521,6 +550,7 @@ describe("buildSisyphusJuniorPrompt", () => {
    expect(prompt).toContain("Scope Discipline")
    expect(prompt).toContain("<tool_usage_rules>")
    expect(prompt).toContain("Progress Updates")
+    expect(prompt).toContain("Do not use `apply_patch`")
  })

  test("Claude model prompt contains Claude-specific sections", () => {
--- a/src/agents/sisyphus.ts
+++ b/src/agents/sisyphus.ts
@@ -492,6 +492,7 @@ export function createSisyphusAgent(
      permission: {
        question: "allow",
        call_omo_agent: "deny",
+        apply_patch: "deny",
      } as AgentConfig["permission"],
      reasoningEffort: "medium",
    };
@@ -531,6 +532,7 @@ export function createSisyphusAgent(
  const permission = {
    question: "allow",
    call_omo_agent: "deny",
+    ...(isGptModel(model) ? { apply_patch: "deny" as const } : {}),
  } as AgentConfig["permission"];
  const base = {
    description:
--- a/src/agents/sisyphus/gpt-5-4.ts
+++ b/src/agents/sisyphus/gpt-5-4.ts
@@ -304,7 +304,7 @@ Every implementation task follows this cycle. No exceptions.
   Skills: if ANY available skill's domain overlaps with the task, load it NOW via \`skill\` tool and include it in \`load_skills\`. When the connection is even remotely plausible, load the skill - the cost of loading an irrelevant skill is near zero, the cost of missing a relevant one is high.

 4. EXECUTE_OR_SUPERVISE -
-   If self: surgical changes, match existing patterns, minimal diff. Never suppress type errors. Never commit unless asked. Bugfix rule: fix minimally, never refactor while fixing.
+   If self: surgical changes, match existing patterns, minimal diff. Never suppress type errors. Never commit unless asked. Bugfix rule: fix minimally, never refactor while fixing. Use the \`edit\` and \`write\` tools for file changes. Do not use \`apply_patch\` on GPT models - it is unreliable here and can hang during verification.
   If delegated: exhaustive 6-section prompt per \`<delegation>\` protocol. Session continuity for follow-ups.

 5. VERIFY -
--- a/src/agents/tool-restrictions.test.ts
+++ b/src/agents/tool-restrictions.test.ts
@@ -5,6 +5,7 @@ import { createExploreAgent } from "./explore"
 import { createMomusAgent } from "./momus"
 import { createMetisAgent } from "./metis"
 import { createAtlasAgent } from "./atlas"
+import { createSisyphusAgent } from "./sisyphus"

 const TEST_MODEL = "anthropic/claude-sonnet-4-5"

@@ -111,4 +112,23 @@ describe("read-only agent tool restrictions", () => {
      expect(permission["call_omo_agent"]).toBeUndefined()
    })
  })
+
+  describe("Sisyphus GPT variants", () => {
+    test("deny apply_patch for GPT models but not Claude models", () => {
+      // given
+      const gpt54Agent = createSisyphusAgent("openai/gpt-5.4")
+      const gptGenericAgent = createSisyphusAgent("openai/gpt-5.2")
+      const claudeAgent = createSisyphusAgent(TEST_MODEL)
+
+      // when
+      const gpt54Permission = (gpt54Agent.permission ?? {}) as Record<string, string>
+      const gptGenericPermission = (gptGenericAgent.permission ?? {}) as Record<string, string>
+      const claudePermission = (claudeAgent.permission ?? {}) as Record<string, string>
+
+      // then
+      expect(gpt54Permission["apply_patch"]).toBe("deny")
+      expect(gptGenericPermission["apply_patch"]).toBe("deny")
+      expect(claudePermission["apply_patch"]).toBeUndefined()
+    })
+  })
 })