From 1140080927d764048d2adb4e1da860df3edd6b2d Mon Sep 17 00:00:00 2001 From: YeonGyu-Kim Date: Tue, 7 Apr 2026 11:28:48 +0900 Subject: [PATCH] fix(agents): deny apply_patch for GPT models to prevent verification hangs (#2935) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit GPT models (5.3-codex, 5.4, etc.) frequently hang when using apply_patch due to verification loops. This adds: 1. Tool restriction: apply_patch is denied for GPT variants of Hephaestus, Sisyphus-Junior, and Sisyphus agents 2. Prompt guidance: GPT-specific prompts now explicitly instruct using edit/write tools instead of apply_patch 3. Removed the 'Always use apply_patch' instruction from sisyphus-junior/gpt-5-4.ts that contradicted the fix The deny is model-conditional — Claude variants retain apply_patch access since it works reliably there. --- src/agents/delegation-trust-prompt.test.ts | 2 ++ src/agents/hephaestus/agent.test.ts | 33 +++++++++++++++++++++ src/agents/hephaestus/agent.ts | 3 +- src/agents/hephaestus/gpt-5-3-codex.ts | 1 + src/agents/hephaestus/gpt-5-4.ts | 2 +- src/agents/hephaestus/gpt.ts | 1 + src/agents/sisyphus-junior/agent.ts | 6 ++-- src/agents/sisyphus-junior/gpt-5-3-codex.ts | 1 + src/agents/sisyphus-junior/gpt-5-4.ts | 2 +- src/agents/sisyphus-junior/gpt.ts | 1 + src/agents/sisyphus-junior/index.test.ts | 30 +++++++++++++++++++ src/agents/sisyphus.ts | 2 ++ src/agents/sisyphus/gpt-5-4.ts | 2 +- src/agents/tool-restrictions.test.ts | 20 +++++++++++++ 14 files changed, 100 insertions(+), 6 deletions(-) diff --git a/src/agents/delegation-trust-prompt.test.ts b/src/agents/delegation-trust-prompt.test.ts index 03c4ea49a..2de84132a 100644 --- a/src/agents/delegation-trust-prompt.test.ts +++ b/src/agents/delegation-trust-prompt.test.ts @@ -114,6 +114,8 @@ describe("delegation trust prompt rules", () => { expect(prompt).toContain("do only non-overlapping work simultaneously") expect(prompt).toContain("Continue only with non-overlapping work") expect(prompt).toContain("DO NOT perform the same search yourself") + expect(prompt).toContain("Do not use `apply_patch`") + expect(prompt).toContain("`edit` and `write`") }) test("Sisyphus-Junior GPT-5.4 prompt forbids duplicate delegated exploration", () => { diff --git a/src/agents/hephaestus/agent.test.ts b/src/agents/hephaestus/agent.test.ts index 82accf8ee..0885e7839 100644 --- a/src/agents/hephaestus/agent.test.ts +++ b/src/agents/hephaestus/agent.test.ts @@ -192,6 +192,8 @@ describe("createHephaestusAgent", () => { expect(config.prompt).toContain("You build context by examining"); expect(config.prompt).toContain("Never chain together bash commands"); expect(config.prompt).toContain(""); + expect(config.prompt).toContain("Do not use `apply_patch`"); + expect(config.prompt).toContain("`edit` and `write`"); }); test("GPT 5.3-codex model includes GPT-5.3 specific prompt content", () => { @@ -205,6 +207,8 @@ describe("createHephaestusAgent", () => { expect(config.prompt).toContain("Senior Staff Engineer"); expect(config.prompt).toContain("Hard Constraints"); expect(config.prompt).toContain(""); + expect(config.prompt).toContain("Do not use `apply_patch`"); + expect(config.prompt).toContain("`edit` and `write`"); }); test("includes Hephaestus identity in prompt", () => { @@ -219,6 +223,35 @@ describe("createHephaestusAgent", () => { expect(config.prompt).toContain("autonomous deep worker"); }); + test("generic GPT model includes apply_patch workaround guidance", () => { + // given + const model = "openai/gpt-4o"; + + // when + const config = createHephaestusAgent(model); + + // then + expect(config.prompt).toContain("Do not use `apply_patch`"); + expect(config.prompt).toContain("`edit` and `write`"); + }); + + test("GPT models deny apply_patch while non-GPT models do not", () => { + // given + const gpt54Model = "openai/gpt-5.4"; + const gptGenericModel = "openai/gpt-4o"; + const claudeModel = "anthropic/claude-opus-4-6"; + + // when + const gpt54Config = createHephaestusAgent(gpt54Model); + const gptGenericConfig = createHephaestusAgent(gptGenericModel); + const claudeConfig = createHephaestusAgent(claudeModel); + + // then + expect(gpt54Config.permission ?? {}).toHaveProperty("apply_patch", "deny"); + expect(gptGenericConfig.permission ?? {}).toHaveProperty("apply_patch", "deny"); + expect(claudeConfig.permission ?? {}).not.toHaveProperty("apply_patch"); + }); + test("useTaskSystem=true produces Task Discipline prompt", () => { // given const model = "openai/gpt-5.4"; diff --git a/src/agents/hephaestus/agent.ts b/src/agents/hephaestus/agent.ts index 5d27e6220..fc21027e5 100644 --- a/src/agents/hephaestus/agent.ts +++ b/src/agents/hephaestus/agent.ts @@ -1,6 +1,6 @@ import type { AgentConfig } from "@opencode-ai/sdk"; import type { AgentMode, AgentPromptMetadata } from "../types"; -import { isGpt5_4Model, isGpt5_3CodexModel } from "../types"; +import { isGptModel, isGpt5_4Model, isGpt5_3CodexModel } from "../types"; import type { AvailableAgent, AvailableTool, @@ -120,6 +120,7 @@ export function createHephaestusAgent( permission: { question: "allow", call_omo_agent: "deny", + ...(isGptModel(model) ? { apply_patch: "deny" as const } : {}), } as AgentConfig["permission"], reasoningEffort: "medium", }; diff --git a/src/agents/hephaestus/gpt-5-3-codex.ts b/src/agents/hephaestus/gpt-5-3-codex.ts index 732a83afe..93a7ef32a 100644 --- a/src/agents/hephaestus/gpt-5-3-codex.ts +++ b/src/agents/hephaestus/gpt-5-3-codex.ts @@ -448,6 +448,7 @@ ${oracleSection} 1. SEARCH existing codebase for similar patterns/styles 2. Match naming, indentation, import styles, error handling conventions 3. Default to ASCII. Add comments only for non-obvious blocks +4. Use the \`edit\` and \`write\` tools for file changes. Do not use \`apply_patch\` on GPT models - it is unreliable here and can hang during verification. ### After Implementation (MANDATORY - DO NOT SKIP) diff --git a/src/agents/hephaestus/gpt-5-4.ts b/src/agents/hephaestus/gpt-5-4.ts index 9447d183f..2c0f8410b 100644 --- a/src/agents/hephaestus/gpt-5-4.ts +++ b/src/agents/hephaestus/gpt-5-4.ts @@ -252,7 +252,7 @@ ${antiPatterns} 1. **Explore**: Fire 2-5 explore/librarian agents in parallel + direct tool reads. Goal: complete understanding, not just enough context. 2. **Plan**: List files to modify, specific changes, dependencies, complexity estimate. 3. **Decide**: Trivial (<10 lines, single file) -> self. Complex (multi-file, >100 lines) -> delegate. -4. **Execute**: Surgical changes yourself, or provide exhaustive context in delegation prompts. Match existing patterns. Minimal diff. Search the codebase for similar patterns before writing code. Default to ASCII. Add comments only for non-obvious blocks. +4. **Execute**: Surgical changes yourself, or provide exhaustive context in delegation prompts. Match existing patterns. Minimal diff. Search the codebase for similar patterns before writing code. Default to ASCII. Add comments only for non-obvious blocks. Use the \`edit\` and \`write\` tools for file changes. Do not use \`apply_patch\` on GPT models - it is unreliable here and can hang during verification. 5. **Verify**: \`lsp_diagnostics\` on all modified files (zero errors) -> run related tests (\`foo.ts\` -> \`foo.test.ts\`) -> typecheck -> build if applicable (exit 0). Fix only issues your changes caused. If verification fails, return to step 1 with a materially different approach. After three attempts: stop, revert to last working state, document what you tried, consult Oracle. If Oracle cannot resolve, ask the user. diff --git a/src/agents/hephaestus/gpt.ts b/src/agents/hephaestus/gpt.ts index bfa7ae4b4..b305d1128 100644 --- a/src/agents/hephaestus/gpt.ts +++ b/src/agents/hephaestus/gpt.ts @@ -311,6 +311,7 @@ ${oracleSection} 1. SEARCH existing codebase for similar patterns/styles 2. Match naming, indentation, import styles, error handling conventions 3. Default to ASCII. Add comments only for non-obvious blocks +4. Use the \`edit\` and \`write\` tools for file changes. Do not use \`apply_patch\` on GPT models - it is unreliable here and can hang during verification. ### After Implementation (MANDATORY - DO NOT SKIP) diff --git a/src/agents/sisyphus-junior/agent.ts b/src/agents/sisyphus-junior/agent.ts index c8178f7fb..febb2512b 100644 --- a/src/agents/sisyphus-junior/agent.ts +++ b/src/agents/sisyphus-junior/agent.ts @@ -30,6 +30,7 @@ const MODE: AgentMode = "subagent" // Core tools that Sisyphus-Junior must NEVER have access to // Note: call_omo_agent is ALLOWED so subagents can spawn explore/librarian const BLOCKED_TOOLS = ["task"] +const GPT_BLOCKED_TOOLS = ["task", "apply_patch"] export const SISYPHUS_JUNIOR_DEFAULTS = { model: "anthropic/claude-sonnet-4-6", @@ -91,13 +92,14 @@ export function createSisyphusJuniorAgentWithOverrides( const promptAppend = override?.prompt_append const prompt = buildSisyphusJuniorPrompt(model, useTaskSystem, promptAppend) + const blockedTools = isGptModel(model) ? GPT_BLOCKED_TOOLS : BLOCKED_TOOLS - const baseRestrictions = createAgentToolRestrictions(BLOCKED_TOOLS) + const baseRestrictions = createAgentToolRestrictions(blockedTools) const userPermission = (override?.permission ?? {}) as Record const basePermission = baseRestrictions.permission const merged: Record = { ...userPermission } - for (const tool of BLOCKED_TOOLS) { + for (const tool of blockedTools) { merged[tool] = "deny" } merged.call_omo_agent = "allow" diff --git a/src/agents/sisyphus-junior/gpt-5-3-codex.ts b/src/agents/sisyphus-junior/gpt-5-3-codex.ts index 8394afc7c..02e8d07fa 100644 --- a/src/agents/sisyphus-junior/gpt-5-3-codex.ts +++ b/src/agents/sisyphus-junior/gpt-5-3-codex.ts @@ -92,6 +92,7 @@ Style: 1. SEARCH existing codebase for similar patterns/styles 2. Match naming, indentation, import styles, error handling conventions 3. Default to ASCII. Add comments only for non-obvious blocks +4. Use the \`edit\` and \`write\` tools for file changes. Do not use \`apply_patch\` on GPT models - it is unreliable here and can hang during verification. ### After Implementation (MANDATORY - DO NOT SKIP) diff --git a/src/agents/sisyphus-junior/gpt-5-4.ts b/src/agents/sisyphus-junior/gpt-5-4.ts index fabd679e8..81e706530 100644 --- a/src/agents/sisyphus-junior/gpt-5-4.ts +++ b/src/agents/sisyphus-junior/gpt-5-4.ts @@ -96,7 +96,7 @@ Style: 1. SEARCH existing codebase for similar patterns/styles 2. Match naming, indentation, import styles, error handling conventions 3. Default to ASCII. Add comments only for non-obvious blocks -4. Always use apply_patch for manual code edits. Do not use cat or echo for file creation/editing. Formatting commands or bulk edits don't need apply_patch +4. Use the \`edit\` and \`write\` tools for file changes. Do not use \`apply_patch\` on GPT models - it is unreliable here and can hang during verification. 5. Do not chain bash commands with separators - each command should be a separate tool call ### After Implementation (MANDATORY - DO NOT SKIP) diff --git a/src/agents/sisyphus-junior/gpt.ts b/src/agents/sisyphus-junior/gpt.ts index 83339fc11..c69ab7a2a 100644 --- a/src/agents/sisyphus-junior/gpt.ts +++ b/src/agents/sisyphus-junior/gpt.ts @@ -93,6 +93,7 @@ Style: 1. SEARCH existing codebase for similar patterns/styles 2. Match naming, indentation, import styles, error handling conventions 3. Default to ASCII. Add comments only for non-obvious blocks +4. Use the \`edit\` and \`write\` tools for file changes. Do not use \`apply_patch\` on GPT models - it is unreliable here and can hang during verification. ### After Implementation (MANDATORY - DO NOT SKIP) diff --git a/src/agents/sisyphus-junior/index.test.ts b/src/agents/sisyphus-junior/index.test.ts index dace8bf38..00a4c0377 100644 --- a/src/agents/sisyphus-junior/index.test.ts +++ b/src/agents/sisyphus-junior/index.test.ts @@ -350,6 +350,8 @@ describe("createSisyphusJuniorAgentWithOverrides", () => { expect(result.prompt).toContain("Scope Discipline") expect(result.prompt).toContain("") expect(result.prompt).toContain("Progress Updates") + expect(result.prompt).toContain("Do not use `apply_patch`") + expect(result.prompt).toContain("`edit` and `write`") }) test("GPT 5.4 model uses GPT-5.4 specific prompt", () => { @@ -362,6 +364,9 @@ describe("createSisyphusJuniorAgentWithOverrides", () => { // then expect(result.prompt).toContain("expert coding agent") expect(result.prompt).toContain("") + expect(result.prompt).toContain("Do not use `apply_patch`") + expect(result.prompt).toContain("`edit` and `write`") + expect(result.prompt).not.toContain("Always use apply_patch") }) test("GPT 5.3 Codex model uses GPT-5.3-codex specific prompt", () => { @@ -374,6 +379,28 @@ describe("createSisyphusJuniorAgentWithOverrides", () => { // then expect(result.prompt).toContain("Senior Engineer") expect(result.prompt).toContain("") + expect(result.prompt).toContain("Do not use `apply_patch`") + expect(result.prompt).toContain("`edit` and `write`") + }) + + test("GPT variants deny apply_patch while Claude variants do not", () => { + // given + const gpt54Override = { model: "openai/gpt-5.4" } + const gpt53Override = { model: "openai/gpt-5.3-codex" } + const gptGenericOverride = { model: "openai/gpt-4o" } + const claudeOverride = { model: "anthropic/claude-sonnet-4-6" } + + // when + const gpt54Result = createSisyphusJuniorAgentWithOverrides(gpt54Override) + const gpt53Result = createSisyphusJuniorAgentWithOverrides(gpt53Override) + const gptGenericResult = createSisyphusJuniorAgentWithOverrides(gptGenericOverride) + const claudeResult = createSisyphusJuniorAgentWithOverrides(claudeOverride) + + // then + expect(gpt54Result.permission ?? {}).toHaveProperty("apply_patch", "deny") + expect(gpt53Result.permission ?? {}).toHaveProperty("apply_patch", "deny") + expect(gptGenericResult.permission ?? {}).toHaveProperty("apply_patch", "deny") + expect(claudeResult.permission ?? {}).not.toHaveProperty("apply_patch") }) test("prompt_append is added after base prompt", () => { @@ -494,6 +521,7 @@ describe("buildSisyphusJuniorPrompt", () => { expect(prompt).toContain("expert coding agent") expect(prompt).toContain("Scope Discipline") expect(prompt).toContain("") + expect(prompt).toContain("Do not use `apply_patch`") }) test("GPT 5.3 Codex model uses GPT-5.3-codex prompt", () => { @@ -507,6 +535,7 @@ describe("buildSisyphusJuniorPrompt", () => { expect(prompt).toContain("Senior Engineer") expect(prompt).toContain("Scope Discipline") expect(prompt).toContain("") + expect(prompt).toContain("Do not use `apply_patch`") }) test("generic GPT model uses generic GPT prompt", () => { @@ -521,6 +550,7 @@ describe("buildSisyphusJuniorPrompt", () => { expect(prompt).toContain("Scope Discipline") expect(prompt).toContain("") expect(prompt).toContain("Progress Updates") + expect(prompt).toContain("Do not use `apply_patch`") }) test("Claude model prompt contains Claude-specific sections", () => { diff --git a/src/agents/sisyphus.ts b/src/agents/sisyphus.ts index f534a50fa..059f080e2 100644 --- a/src/agents/sisyphus.ts +++ b/src/agents/sisyphus.ts @@ -492,6 +492,7 @@ export function createSisyphusAgent( permission: { question: "allow", call_omo_agent: "deny", + apply_patch: "deny", } as AgentConfig["permission"], reasoningEffort: "medium", }; @@ -531,6 +532,7 @@ export function createSisyphusAgent( const permission = { question: "allow", call_omo_agent: "deny", + ...(isGptModel(model) ? { apply_patch: "deny" as const } : {}), } as AgentConfig["permission"]; const base = { description: diff --git a/src/agents/sisyphus/gpt-5-4.ts b/src/agents/sisyphus/gpt-5-4.ts index 3dcb12d0d..c111aa2e2 100644 --- a/src/agents/sisyphus/gpt-5-4.ts +++ b/src/agents/sisyphus/gpt-5-4.ts @@ -304,7 +304,7 @@ Every implementation task follows this cycle. No exceptions. Skills: if ANY available skill's domain overlaps with the task, load it NOW via \`skill\` tool and include it in \`load_skills\`. When the connection is even remotely plausible, load the skill - the cost of loading an irrelevant skill is near zero, the cost of missing a relevant one is high. 4. EXECUTE_OR_SUPERVISE - - If self: surgical changes, match existing patterns, minimal diff. Never suppress type errors. Never commit unless asked. Bugfix rule: fix minimally, never refactor while fixing. + If self: surgical changes, match existing patterns, minimal diff. Never suppress type errors. Never commit unless asked. Bugfix rule: fix minimally, never refactor while fixing. Use the \`edit\` and \`write\` tools for file changes. Do not use \`apply_patch\` on GPT models - it is unreliable here and can hang during verification. If delegated: exhaustive 6-section prompt per \`\` protocol. Session continuity for follow-ups. 5. VERIFY - diff --git a/src/agents/tool-restrictions.test.ts b/src/agents/tool-restrictions.test.ts index 85facdc54..3ae7bfcfe 100644 --- a/src/agents/tool-restrictions.test.ts +++ b/src/agents/tool-restrictions.test.ts @@ -5,6 +5,7 @@ import { createExploreAgent } from "./explore" import { createMomusAgent } from "./momus" import { createMetisAgent } from "./metis" import { createAtlasAgent } from "./atlas" +import { createSisyphusAgent } from "./sisyphus" const TEST_MODEL = "anthropic/claude-sonnet-4-5" @@ -111,4 +112,23 @@ describe("read-only agent tool restrictions", () => { expect(permission["call_omo_agent"]).toBeUndefined() }) }) + + describe("Sisyphus GPT variants", () => { + test("deny apply_patch for GPT models but not Claude models", () => { + // given + const gpt54Agent = createSisyphusAgent("openai/gpt-5.4") + const gptGenericAgent = createSisyphusAgent("openai/gpt-5.2") + const claudeAgent = createSisyphusAgent(TEST_MODEL) + + // when + const gpt54Permission = (gpt54Agent.permission ?? {}) as Record + const gptGenericPermission = (gptGenericAgent.permission ?? {}) as Record + const claudePermission = (claudeAgent.permission ?? {}) as Record + + // then + expect(gpt54Permission["apply_patch"]).toBe("deny") + expect(gptGenericPermission["apply_patch"]).toBe("deny") + expect(claudePermission["apply_patch"]).toBeUndefined() + }) + }) })