feat(codex): enable image generation for all Codex upstream requests

Codex CLI gates the built-in image_generation tool behind
AuthMode::Chatgpt (OAuth only). When clients connect via API key
auth through CPA, the tool is absent from requests, making image
generation unavailable through the reverse proxy.

Changes:

1. Inject image_generation tool (codex_executor.go):
   Add ensureImageGenerationTool() that appends
   {"type":"image_generation","output_format":"png"} to the tools
   array if not already present. Applied to all three execution
   paths: Execute, executeCompact, and ExecuteStream.

2. Route aliases for Codex CLI direct access (server.go):
   Add /backend-api/codex/responses routes that map to the same
   OpenAI Responses API handlers as /v1/responses. This allows
   Codex CLI to connect via chatgpt_base_url config while keeping
   AuthMode::Chatgpt, which enables the built-in image_generation
   tool on the client side.

3. Unit tests (codex_executor_imagegen_test.go):
   Cover no-tools, existing tools, already-present, empty array,
   and mixed built-in tool scenarios.
This commit is contained in:
MoYeRanQianZhi
2026-04-23 01:15:47 +08:00
parent a188159632
commit 31934ae04c
3 changed files with 119 additions and 0 deletions

View File

@@ -353,6 +353,15 @@ func (s *Server) setupRoutes() {
v1.POST("/responses/compact", openaiResponsesHandlers.Compact)
}
// Codex CLI direct route aliases (chatgpt_base_url compatible)
codexDirect := s.engine.Group("/backend-api/codex")
codexDirect.Use(AuthMiddleware(s.accessManager))
{
codexDirect.GET("/responses", openaiResponsesHandlers.ResponsesWebsocket)
codexDirect.POST("/responses", openaiResponsesHandlers.Responses)
codexDirect.POST("/responses/compact", openaiResponsesHandlers.Compact)
}
// Gemini compatible API routes
v1beta := s.engine.Group("/v1beta")
v1beta.Use(AuthMiddleware(s.accessManager))

View File

@@ -180,6 +180,7 @@ func (e *CodexExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, re
body, _ = sjson.DeleteBytes(body, "safety_identifier")
body, _ = sjson.DeleteBytes(body, "stream_options")
body = normalizeCodexInstructions(body)
body = ensureImageGenerationTool(body)
url := strings.TrimSuffix(baseURL, "/") + "/responses"
httpReq, err := e.cacheHelper(ctx, from, url, req, body)
@@ -326,6 +327,7 @@ func (e *CodexExecutor) executeCompact(ctx context.Context, auth *cliproxyauth.A
body, _ = sjson.SetBytes(body, "model", baseModel)
body, _ = sjson.DeleteBytes(body, "stream")
body = normalizeCodexInstructions(body)
body = ensureImageGenerationTool(body)
url := strings.TrimSuffix(baseURL, "/") + "/responses/compact"
httpReq, err := e.cacheHelper(ctx, from, url, req, body)
@@ -420,6 +422,7 @@ func (e *CodexExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.Au
body, _ = sjson.DeleteBytes(body, "stream_options")
body, _ = sjson.SetBytes(body, "model", baseModel)
body = normalizeCodexInstructions(body)
body = ensureImageGenerationTool(body)
url := strings.TrimSuffix(baseURL, "/") + "/responses"
httpReq, err := e.cacheHelper(ctx, from, url, req, body)
@@ -821,6 +824,24 @@ func normalizeCodexInstructions(body []byte) []byte {
return body
}
var imageGenToolJSON = []byte(`{"type":"image_generation","output_format":"png"}`)
var imageGenToolArrayJSON = []byte(`[{"type":"image_generation","output_format":"png"}]`)
func ensureImageGenerationTool(body []byte) []byte {
tools := gjson.GetBytes(body, "tools")
if !tools.Exists() || !tools.IsArray() {
body, _ = sjson.SetRawBytes(body, "tools", imageGenToolArrayJSON)
return body
}
for _, t := range tools.Array() {
if t.Get("type").String() == "image_generation" {
return body
}
}
body, _ = sjson.SetRawBytes(body, "tools.-1", imageGenToolJSON)
return body
}
func isCodexModelCapacityError(errorBody []byte) bool {
if len(errorBody) == 0 {
return false

View File

@@ -0,0 +1,89 @@
package executor
import (
"testing"
"github.com/tidwall/gjson"
)
func TestEnsureImageGenerationTool_NoTools(t *testing.T) {
body := []byte(`{"model":"gpt-5.4","input":"draw a cat"}`)
result := ensureImageGenerationTool(body)
tools := gjson.GetBytes(result, "tools")
if !tools.IsArray() {
t.Fatalf("expected tools array, got %v", tools.Type)
}
arr := tools.Array()
if len(arr) != 1 {
t.Fatalf("expected 1 tool, got %d", len(arr))
}
if arr[0].Get("type").String() != "image_generation" {
t.Fatalf("expected type=image_generation, got %s", arr[0].Get("type").String())
}
if arr[0].Get("output_format").String() != "png" {
t.Fatalf("expected output_format=png, got %s", arr[0].Get("output_format").String())
}
}
func TestEnsureImageGenerationTool_ExistingToolsWithoutImageGen(t *testing.T) {
body := []byte(`{"model":"gpt-5.4","tools":[{"type":"function","name":"get_weather","parameters":{}}]}`)
result := ensureImageGenerationTool(body)
tools := gjson.GetBytes(result, "tools")
arr := tools.Array()
if len(arr) != 2 {
t.Fatalf("expected 2 tools, got %d", len(arr))
}
if arr[0].Get("type").String() != "function" {
t.Fatalf("expected first tool type=function, got %s", arr[0].Get("type").String())
}
if arr[1].Get("type").String() != "image_generation" {
t.Fatalf("expected second tool type=image_generation, got %s", arr[1].Get("type").String())
}
}
func TestEnsureImageGenerationTool_AlreadyPresent(t *testing.T) {
body := []byte(`{"model":"gpt-5.4","tools":[{"type":"image_generation","output_format":"webp"},{"type":"function","name":"f1"}]}`)
result := ensureImageGenerationTool(body)
tools := gjson.GetBytes(result, "tools")
arr := tools.Array()
if len(arr) != 2 {
t.Fatalf("expected 2 tools (no duplicate), got %d", len(arr))
}
if arr[0].Get("output_format").String() != "webp" {
t.Fatalf("expected original output_format=webp preserved, got %s", arr[0].Get("output_format").String())
}
}
func TestEnsureImageGenerationTool_EmptyToolsArray(t *testing.T) {
body := []byte(`{"model":"gpt-5.4","tools":[]}`)
result := ensureImageGenerationTool(body)
tools := gjson.GetBytes(result, "tools")
arr := tools.Array()
if len(arr) != 1 {
t.Fatalf("expected 1 tool, got %d", len(arr))
}
if arr[0].Get("type").String() != "image_generation" {
t.Fatalf("expected type=image_generation, got %s", arr[0].Get("type").String())
}
}
func TestEnsureImageGenerationTool_WebSearchAndImageGen(t *testing.T) {
body := []byte(`{"model":"gpt-5.4","tools":[{"type":"web_search"}]}`)
result := ensureImageGenerationTool(body)
tools := gjson.GetBytes(result, "tools")
arr := tools.Array()
if len(arr) != 2 {
t.Fatalf("expected 2 tools, got %d", len(arr))
}
if arr[0].Get("type").String() != "web_search" {
t.Fatalf("expected first tool type=web_search, got %s", arr[0].Get("type").String())
}
if arr[1].Get("type").String() != "image_generation" {
t.Fatalf("expected second tool type=image_generation, got %s", arr[1].Get("type").String())
}
}