fix(agents): deny apply_patch for GPT models to prevent verification hangs (#2935)

GPT models (5.3-codex, 5.4, etc.) frequently hang when using apply_patch
due to verification loops. This adds:

1. Tool restriction: apply_patch is denied for GPT variants of
   Hephaestus, Sisyphus-Junior, and Sisyphus agents
2. Prompt guidance: GPT-specific prompts now explicitly instruct using
   edit/write tools instead of apply_patch
3. Removed the 'Always use apply_patch' instruction from
   sisyphus-junior/gpt-5-4.ts that contradicted the fix

The deny is model-conditional — Claude variants retain apply_patch
access since it works reliably there.
This commit is contained in:
YeonGyu-Kim
2026-04-07 11:28:48 +09:00
parent 12106ffccc
commit 1140080927
14 changed files with 100 additions and 6 deletions

View File

@@ -114,6 +114,8 @@ describe("delegation trust prompt rules", () => {
expect(prompt).toContain("do only non-overlapping work simultaneously")
expect(prompt).toContain("Continue only with non-overlapping work")
expect(prompt).toContain("DO NOT perform the same search yourself")
expect(prompt).toContain("Do not use `apply_patch`")
expect(prompt).toContain("`edit` and `write`")
})
test("Sisyphus-Junior GPT-5.4 prompt forbids duplicate delegated exploration", () => {

View File

@@ -192,6 +192,8 @@ describe("createHephaestusAgent", () => {
expect(config.prompt).toContain("You build context by examining");
expect(config.prompt).toContain("Never chain together bash commands");
expect(config.prompt).toContain("<tool_usage_rules>");
expect(config.prompt).toContain("Do not use `apply_patch`");
expect(config.prompt).toContain("`edit` and `write`");
});
test("GPT 5.3-codex model includes GPT-5.3 specific prompt content", () => {
@@ -205,6 +207,8 @@ describe("createHephaestusAgent", () => {
expect(config.prompt).toContain("Senior Staff Engineer");
expect(config.prompt).toContain("Hard Constraints");
expect(config.prompt).toContain("<tool_usage_rules>");
expect(config.prompt).toContain("Do not use `apply_patch`");
expect(config.prompt).toContain("`edit` and `write`");
});
test("includes Hephaestus identity in prompt", () => {
@@ -219,6 +223,35 @@ describe("createHephaestusAgent", () => {
expect(config.prompt).toContain("autonomous deep worker");
});
test("generic GPT model includes apply_patch workaround guidance", () => {
// given
const model = "openai/gpt-4o";
// when
const config = createHephaestusAgent(model);
// then
expect(config.prompt).toContain("Do not use `apply_patch`");
expect(config.prompt).toContain("`edit` and `write`");
});
test("GPT models deny apply_patch while non-GPT models do not", () => {
// given
const gpt54Model = "openai/gpt-5.4";
const gptGenericModel = "openai/gpt-4o";
const claudeModel = "anthropic/claude-opus-4-6";
// when
const gpt54Config = createHephaestusAgent(gpt54Model);
const gptGenericConfig = createHephaestusAgent(gptGenericModel);
const claudeConfig = createHephaestusAgent(claudeModel);
// then
expect(gpt54Config.permission ?? {}).toHaveProperty("apply_patch", "deny");
expect(gptGenericConfig.permission ?? {}).toHaveProperty("apply_patch", "deny");
expect(claudeConfig.permission ?? {}).not.toHaveProperty("apply_patch");
});
test("useTaskSystem=true produces Task Discipline prompt", () => {
// given
const model = "openai/gpt-5.4";

View File

@@ -1,6 +1,6 @@
import type { AgentConfig } from "@opencode-ai/sdk";
import type { AgentMode, AgentPromptMetadata } from "../types";
import { isGpt5_4Model, isGpt5_3CodexModel } from "../types";
import { isGptModel, isGpt5_4Model, isGpt5_3CodexModel } from "../types";
import type {
AvailableAgent,
AvailableTool,
@@ -120,6 +120,7 @@ export function createHephaestusAgent(
permission: {
question: "allow",
call_omo_agent: "deny",
...(isGptModel(model) ? { apply_patch: "deny" as const } : {}),
} as AgentConfig["permission"],
reasoningEffort: "medium",
};

View File

@@ -448,6 +448,7 @@ ${oracleSection}
1. SEARCH existing codebase for similar patterns/styles
2. Match naming, indentation, import styles, error handling conventions
3. Default to ASCII. Add comments only for non-obvious blocks
4. Use the \`edit\` and \`write\` tools for file changes. Do not use \`apply_patch\` on GPT models - it is unreliable here and can hang during verification.
### After Implementation (MANDATORY - DO NOT SKIP)

View File

@@ -252,7 +252,7 @@ ${antiPatterns}
1. **Explore**: Fire 2-5 explore/librarian agents in parallel + direct tool reads. Goal: complete understanding, not just enough context.
2. **Plan**: List files to modify, specific changes, dependencies, complexity estimate.
3. **Decide**: Trivial (<10 lines, single file) -> self. Complex (multi-file, >100 lines) -> delegate.
4. **Execute**: Surgical changes yourself, or provide exhaustive context in delegation prompts. Match existing patterns. Minimal diff. Search the codebase for similar patterns before writing code. Default to ASCII. Add comments only for non-obvious blocks.
4. **Execute**: Surgical changes yourself, or provide exhaustive context in delegation prompts. Match existing patterns. Minimal diff. Search the codebase for similar patterns before writing code. Default to ASCII. Add comments only for non-obvious blocks. Use the \`edit\` and \`write\` tools for file changes. Do not use \`apply_patch\` on GPT models - it is unreliable here and can hang during verification.
5. **Verify**: \`lsp_diagnostics\` on all modified files (zero errors) -> run related tests (\`foo.ts\` -> \`foo.test.ts\`) -> typecheck -> build if applicable (exit 0). Fix only issues your changes caused.
If verification fails, return to step 1 with a materially different approach. After three attempts: stop, revert to last working state, document what you tried, consult Oracle. If Oracle cannot resolve, ask the user.

View File

@@ -311,6 +311,7 @@ ${oracleSection}
1. SEARCH existing codebase for similar patterns/styles
2. Match naming, indentation, import styles, error handling conventions
3. Default to ASCII. Add comments only for non-obvious blocks
4. Use the \`edit\` and \`write\` tools for file changes. Do not use \`apply_patch\` on GPT models - it is unreliable here and can hang during verification.
### After Implementation (MANDATORY - DO NOT SKIP)

View File

@@ -30,6 +30,7 @@ const MODE: AgentMode = "subagent"
// Core tools that Sisyphus-Junior must NEVER have access to
// Note: call_omo_agent is ALLOWED so subagents can spawn explore/librarian
const BLOCKED_TOOLS = ["task"]
const GPT_BLOCKED_TOOLS = ["task", "apply_patch"]
export const SISYPHUS_JUNIOR_DEFAULTS = {
model: "anthropic/claude-sonnet-4-6",
@@ -91,13 +92,14 @@ export function createSisyphusJuniorAgentWithOverrides(
const promptAppend = override?.prompt_append
const prompt = buildSisyphusJuniorPrompt(model, useTaskSystem, promptAppend)
const blockedTools = isGptModel(model) ? GPT_BLOCKED_TOOLS : BLOCKED_TOOLS
const baseRestrictions = createAgentToolRestrictions(BLOCKED_TOOLS)
const baseRestrictions = createAgentToolRestrictions(blockedTools)
const userPermission = (override?.permission ?? {}) as Record<string, PermissionValue>
const basePermission = baseRestrictions.permission
const merged: Record<string, PermissionValue> = { ...userPermission }
for (const tool of BLOCKED_TOOLS) {
for (const tool of blockedTools) {
merged[tool] = "deny"
}
merged.call_omo_agent = "allow"

View File

@@ -92,6 +92,7 @@ Style:
1. SEARCH existing codebase for similar patterns/styles
2. Match naming, indentation, import styles, error handling conventions
3. Default to ASCII. Add comments only for non-obvious blocks
4. Use the \`edit\` and \`write\` tools for file changes. Do not use \`apply_patch\` on GPT models - it is unreliable here and can hang during verification.
### After Implementation (MANDATORY - DO NOT SKIP)

View File

@@ -96,7 +96,7 @@ Style:
1. SEARCH existing codebase for similar patterns/styles
2. Match naming, indentation, import styles, error handling conventions
3. Default to ASCII. Add comments only for non-obvious blocks
4. Always use apply_patch for manual code edits. Do not use cat or echo for file creation/editing. Formatting commands or bulk edits don't need apply_patch
4. Use the \`edit\` and \`write\` tools for file changes. Do not use \`apply_patch\` on GPT models - it is unreliable here and can hang during verification.
5. Do not chain bash commands with separators - each command should be a separate tool call
### After Implementation (MANDATORY - DO NOT SKIP)

View File

@@ -93,6 +93,7 @@ Style:
1. SEARCH existing codebase for similar patterns/styles
2. Match naming, indentation, import styles, error handling conventions
3. Default to ASCII. Add comments only for non-obvious blocks
4. Use the \`edit\` and \`write\` tools for file changes. Do not use \`apply_patch\` on GPT models - it is unreliable here and can hang during verification.
### After Implementation (MANDATORY - DO NOT SKIP)

View File

@@ -350,6 +350,8 @@ describe("createSisyphusJuniorAgentWithOverrides", () => {
expect(result.prompt).toContain("Scope Discipline")
expect(result.prompt).toContain("<tool_usage_rules>")
expect(result.prompt).toContain("Progress Updates")
expect(result.prompt).toContain("Do not use `apply_patch`")
expect(result.prompt).toContain("`edit` and `write`")
})
test("GPT 5.4 model uses GPT-5.4 specific prompt", () => {
@@ -362,6 +364,9 @@ describe("createSisyphusJuniorAgentWithOverrides", () => {
// then
expect(result.prompt).toContain("expert coding agent")
expect(result.prompt).toContain("<tool_usage_rules>")
expect(result.prompt).toContain("Do not use `apply_patch`")
expect(result.prompt).toContain("`edit` and `write`")
expect(result.prompt).not.toContain("Always use apply_patch")
})
test("GPT 5.3 Codex model uses GPT-5.3-codex specific prompt", () => {
@@ -374,6 +379,28 @@ describe("createSisyphusJuniorAgentWithOverrides", () => {
// then
expect(result.prompt).toContain("Senior Engineer")
expect(result.prompt).toContain("<tool_usage_rules>")
expect(result.prompt).toContain("Do not use `apply_patch`")
expect(result.prompt).toContain("`edit` and `write`")
})
test("GPT variants deny apply_patch while Claude variants do not", () => {
// given
const gpt54Override = { model: "openai/gpt-5.4" }
const gpt53Override = { model: "openai/gpt-5.3-codex" }
const gptGenericOverride = { model: "openai/gpt-4o" }
const claudeOverride = { model: "anthropic/claude-sonnet-4-6" }
// when
const gpt54Result = createSisyphusJuniorAgentWithOverrides(gpt54Override)
const gpt53Result = createSisyphusJuniorAgentWithOverrides(gpt53Override)
const gptGenericResult = createSisyphusJuniorAgentWithOverrides(gptGenericOverride)
const claudeResult = createSisyphusJuniorAgentWithOverrides(claudeOverride)
// then
expect(gpt54Result.permission ?? {}).toHaveProperty("apply_patch", "deny")
expect(gpt53Result.permission ?? {}).toHaveProperty("apply_patch", "deny")
expect(gptGenericResult.permission ?? {}).toHaveProperty("apply_patch", "deny")
expect(claudeResult.permission ?? {}).not.toHaveProperty("apply_patch")
})
test("prompt_append is added after base prompt", () => {
@@ -494,6 +521,7 @@ describe("buildSisyphusJuniorPrompt", () => {
expect(prompt).toContain("expert coding agent")
expect(prompt).toContain("Scope Discipline")
expect(prompt).toContain("<tool_usage_rules>")
expect(prompt).toContain("Do not use `apply_patch`")
})
test("GPT 5.3 Codex model uses GPT-5.3-codex prompt", () => {
@@ -507,6 +535,7 @@ describe("buildSisyphusJuniorPrompt", () => {
expect(prompt).toContain("Senior Engineer")
expect(prompt).toContain("Scope Discipline")
expect(prompt).toContain("<tool_usage_rules>")
expect(prompt).toContain("Do not use `apply_patch`")
})
test("generic GPT model uses generic GPT prompt", () => {
@@ -521,6 +550,7 @@ describe("buildSisyphusJuniorPrompt", () => {
expect(prompt).toContain("Scope Discipline")
expect(prompt).toContain("<tool_usage_rules>")
expect(prompt).toContain("Progress Updates")
expect(prompt).toContain("Do not use `apply_patch`")
})
test("Claude model prompt contains Claude-specific sections", () => {

View File

@@ -492,6 +492,7 @@ export function createSisyphusAgent(
permission: {
question: "allow",
call_omo_agent: "deny",
apply_patch: "deny",
} as AgentConfig["permission"],
reasoningEffort: "medium",
};
@@ -531,6 +532,7 @@ export function createSisyphusAgent(
const permission = {
question: "allow",
call_omo_agent: "deny",
...(isGptModel(model) ? { apply_patch: "deny" as const } : {}),
} as AgentConfig["permission"];
const base = {
description:

View File

@@ -304,7 +304,7 @@ Every implementation task follows this cycle. No exceptions.
Skills: if ANY available skill's domain overlaps with the task, load it NOW via \`skill\` tool and include it in \`load_skills\`. When the connection is even remotely plausible, load the skill - the cost of loading an irrelevant skill is near zero, the cost of missing a relevant one is high.
4. EXECUTE_OR_SUPERVISE -
If self: surgical changes, match existing patterns, minimal diff. Never suppress type errors. Never commit unless asked. Bugfix rule: fix minimally, never refactor while fixing.
If self: surgical changes, match existing patterns, minimal diff. Never suppress type errors. Never commit unless asked. Bugfix rule: fix minimally, never refactor while fixing. Use the \`edit\` and \`write\` tools for file changes. Do not use \`apply_patch\` on GPT models - it is unreliable here and can hang during verification.
If delegated: exhaustive 6-section prompt per \`<delegation>\` protocol. Session continuity for follow-ups.
5. VERIFY -

View File

@@ -5,6 +5,7 @@ import { createExploreAgent } from "./explore"
import { createMomusAgent } from "./momus"
import { createMetisAgent } from "./metis"
import { createAtlasAgent } from "./atlas"
import { createSisyphusAgent } from "./sisyphus"
const TEST_MODEL = "anthropic/claude-sonnet-4-5"
@@ -111,4 +112,23 @@ describe("read-only agent tool restrictions", () => {
expect(permission["call_omo_agent"]).toBeUndefined()
})
})
describe("Sisyphus GPT variants", () => {
test("deny apply_patch for GPT models but not Claude models", () => {
// given
const gpt54Agent = createSisyphusAgent("openai/gpt-5.4")
const gptGenericAgent = createSisyphusAgent("openai/gpt-5.2")
const claudeAgent = createSisyphusAgent(TEST_MODEL)
// when
const gpt54Permission = (gpt54Agent.permission ?? {}) as Record<string, string>
const gptGenericPermission = (gptGenericAgent.permission ?? {}) as Record<string, string>
const claudePermission = (claudeAgent.permission ?? {}) as Record<string, string>
// then
expect(gpt54Permission["apply_patch"]).toBe("deny")
expect(gptGenericPermission["apply_patch"]).toBe("deny")
expect(claudePermission["apply_patch"]).toBeUndefined()
})
})
})