From 5ef2d59e0591bf9e5f683a0bc769b3ec59bbd973 Mon Sep 17 00:00:00 2001
From: hkfires <10558748+hkfires@users.noreply.github.com>
Date: Sat, 13 Dec 2025 08:18:06 +0800
Subject: [PATCH 1/9] fix(thinking): gate reasoning effort by model support

Only map OpenAI reasoning effort to Claude thinking for models that support
thinking and use budget tokens (not level-based thinking).

Also add "xhigh" effort mapping and adjust minimal/low budgets, with new
raw-payload conversion tests across protocols and models.
---
 .../chat-completions/claude_openai_request.go |   5 +-
 .../claude_openai-responses_request.go        |   9 +-
 test/thinking_conversion_test.go              | 273 +++++++++++++++++-
 3 files changed, 281 insertions(+), 6 deletions(-)

diff --git a/internal/translator/claude/openai/chat-completions/claude_openai_request.go b/internal/translator/claude/openai/chat-completions/claude_openai_request.go
index b3384ecc6..9825c661a 100644
--- a/internal/translator/claude/openai/chat-completions/claude_openai_request.go
+++ b/internal/translator/claude/openai/chat-completions/claude_openai_request.go
@@ -16,6 +16,7 @@ import (
 	"strings"
 
 	"github.com/google/uuid"
+	"github.com/router-for-me/CLIProxyAPI/v6/internal/util"
 	"github.com/tidwall/gjson"
 	"github.com/tidwall/sjson"
 )
@@ -65,7 +66,7 @@ func ConvertOpenAIRequestToClaude(modelName string, inputRawJSON []byte, stream
 
 	root := gjson.ParseBytes(rawJSON)
 
-	if v := root.Get("reasoning_effort"); v.Exists() {
+	if v := root.Get("reasoning_effort"); v.Exists() && util.ModelSupportsThinking(modelName) && !util.ModelUsesThinkingLevels(modelName) {
 		out, _ = sjson.Set(out, "thinking.type", "enabled")
 
 		switch v.String() {
@@ -77,6 +78,8 @@ func ConvertOpenAIRequestToClaude(modelName string, inputRawJSON []byte, stream
 			out, _ = sjson.Set(out, "thinking.budget_tokens", 8192)
 		case "high":
 			out, _ = sjson.Set(out, "thinking.budget_tokens", 24576)
+		case "xhigh":
+			out, _ = sjson.Set(out, "thinking.budget_tokens", 32768)
 		}
 	}
 
diff --git a/internal/translator/claude/openai/responses/claude_openai-responses_request.go b/internal/translator/claude/openai/responses/claude_openai-responses_request.go
index 764bb5c90..4a19bb928 100644
--- a/internal/translator/claude/openai/responses/claude_openai-responses_request.go
+++ b/internal/translator/claude/openai/responses/claude_openai-responses_request.go
@@ -10,6 +10,7 @@ import (
 	"strings"
 
 	"github.com/google/uuid"
+	"github.com/router-for-me/CLIProxyAPI/v6/internal/util"
 	"github.com/tidwall/gjson"
 	"github.com/tidwall/sjson"
 )
@@ -52,20 +53,22 @@ func ConvertOpenAIResponsesRequestToClaude(modelName string, inputRawJSON []byte
 
 	root := gjson.ParseBytes(rawJSON)
 
-	if v := root.Get("reasoning.effort"); v.Exists() {
+	if v := root.Get("reasoning.effort"); v.Exists() && util.ModelSupportsThinking(modelName) && !util.ModelUsesThinkingLevels(modelName) {
 		out, _ = sjson.Set(out, "thinking.type", "enabled")
 
 		switch v.String() {
 		case "none":
 			out, _ = sjson.Set(out, "thinking.type", "disabled")
 		case "minimal":
-			out, _ = sjson.Set(out, "thinking.budget_tokens", 1024)
+			out, _ = sjson.Set(out, "thinking.budget_tokens", 512)
 		case "low":
-			out, _ = sjson.Set(out, "thinking.budget_tokens", 4096)
+			out, _ = sjson.Set(out, "thinking.budget_tokens", 1024)
 		case "medium":
 			out, _ = sjson.Set(out, "thinking.budget_tokens", 8192)
 		case "high":
 			out, _ = sjson.Set(out, "thinking.budget_tokens", 24576)
+		case "xhigh":
+			out, _ = sjson.Set(out, "thinking.budget_tokens", 32768)
 		}
 	}
 
diff --git a/test/thinking_conversion_test.go b/test/thinking_conversion_test.go
index c2f4aa8dd..a1462611b 100644
--- a/test/thinking_conversion_test.go
+++ b/test/thinking_conversion_test.go
@@ -81,8 +81,10 @@ func applyReasoningEffortMetadataLocal(payload []byte, metadata map[string]any,
 		return payload
 	}
 	if effort, ok := util.ReasoningEffortFromMetadata(metadata); ok && effort != "" {
-		if updated, err := sjson.SetBytes(payload, field, effort); err == nil {
-			return updated
+		if util.ModelUsesThinkingLevels(model) {
+			if updated, err := sjson.SetBytes(payload, field, effort); err == nil {
+				return updated
+			}
 		}
 	}
 	if util.ModelUsesThinkingLevels(model) {
@@ -523,6 +525,273 @@ func TestThinkingConversionsAcrossProtocolsAndModels(t *testing.T) {
 	}
 }
 
+// buildRawPayloadWithThinking creates a payload with thinking parameters already in the body.
+// This tests the path where thinking comes from the raw payload, not model suffix.
+func buildRawPayloadWithThinking(fromProtocol, model string, thinkingParam any) []byte {
+	switch fromProtocol {
+	case "gemini":
+		base := fmt.Sprintf(`{"model":"%s","contents":[{"role":"user","parts":[{"text":"hi"}]}]}`, model)
+		if budget, ok := thinkingParam.(int); ok {
+			base, _ = sjson.Set(base, "generationConfig.thinkingConfig.thinkingBudget", budget)
+		}
+		return []byte(base)
+	case "openai-response":
+		base := fmt.Sprintf(`{"model":"%s","input":[{"role":"user","content":[{"type":"text","text":"hi"}]}]}`, model)
+		if effort, ok := thinkingParam.(string); ok && effort != "" {
+			base, _ = sjson.Set(base, "reasoning.effort", effort)
+		}
+		return []byte(base)
+	case "openai":
+		base := fmt.Sprintf(`{"model":"%s","messages":[{"role":"user","content":"hi"}]}`, model)
+		if effort, ok := thinkingParam.(string); ok && effort != "" {
+			base, _ = sjson.Set(base, "reasoning_effort", effort)
+		}
+		return []byte(base)
+	case "claude":
+		base := fmt.Sprintf(`{"model":"%s","messages":[{"role":"user","content":"hi"}]}`, model)
+		if budget, ok := thinkingParam.(int); ok && budget > 0 {
+			base, _ = sjson.Set(base, "thinking.type", "enabled")
+			base, _ = sjson.Set(base, "thinking.budget_tokens", budget)
+		}
+		return []byte(base)
+	default:
+		return []byte(fmt.Sprintf(`{"model":"%s","messages":[{"role":"user","content":"hi"}]}`, model))
+	}
+}
+
+// buildBodyForProtocolWithRawThinking translates payload with raw thinking params.
+func buildBodyForProtocolWithRawThinking(t *testing.T, fromProtocol, toProtocol, model string, thinkingParam any) ([]byte, error) {
+	t.Helper()
+	raw := buildRawPayloadWithThinking(fromProtocol, model, thinkingParam)
+	stream := fromProtocol != toProtocol
+
+	body := sdktranslator.TranslateRequest(
+		sdktranslator.FromString(fromProtocol),
+		sdktranslator.FromString(toProtocol),
+		model,
+		raw,
+		stream,
+	)
+
+	var err error
+	switch toProtocol {
+	case "gemini":
+		body = util.ApplyDefaultThinkingIfNeeded(model, body)
+		body = util.NormalizeGeminiThinkingBudget(model, body)
+		body = util.StripThinkingConfigIfUnsupported(model, body)
+	case "claude":
+		// For raw payload, Claude thinking is passed through by translator
+		// No additional processing needed as thinking is already in body
+	case "openai":
+		body = normalizeThinkingConfigLocal(body, model)
+		err = validateThinkingConfigLocal(body, model)
+	case "codex":
+		body, err = normalizeCodexPayload(body, model)
+	}
+
+	body, _ = sjson.SetBytes(body, "model", model)
+	body = filterThinkingBody(toProtocol, body, model, model)
+	return body, err
+}
+
+func TestRawPayloadThinkingConversions(t *testing.T) {
+	cleanup := registerCoreModels(t)
+	defer cleanup()
+
+	models := []string{
+		"gpt-5",             // supports levels (low/medium/high)
+		"gemini-2.5-pro",    // supports numeric budget
+		"qwen3-coder-flash", // no thinking support
+	}
+	fromProtocols := []string{"openai", "claude", "gemini", "openai-response"}
+	toProtocols := []string{"gemini", "claude", "openai", "codex"}
+
+	type scenario struct {
+		name          string
+		thinkingParam any // int for budget, string for effort level
+	}
+
+	for _, model := range models {
+		supportsThinking := util.ModelSupportsThinking(model)
+		usesLevels := util.ModelUsesThinkingLevels(model)
+
+		for _, from := range fromProtocols {
+			var cases []scenario
+			switch from {
+			case "openai", "openai-response":
+				cases = []scenario{
+					{name: "no-thinking", thinkingParam: nil},
+					{name: "effort-low", thinkingParam: "low"},
+					{name: "effort-medium", thinkingParam: "medium"},
+					{name: "effort-high", thinkingParam: "high"},
+					{name: "effort-invalid-xhigh", thinkingParam: "xhigh"},
+					{name: "effort-invalid-foo", thinkingParam: "foo"},
+				}
+			case "gemini":
+				cases = []scenario{
+					{name: "no-thinking", thinkingParam: nil},
+					{name: "budget-1024", thinkingParam: 1024},
+					{name: "budget-8192", thinkingParam: 8192},
+					{name: "budget-16384", thinkingParam: 16384},
+				}
+			case "claude":
+				cases = []scenario{
+					{name: "no-thinking", thinkingParam: nil},
+					{name: "budget-1024", thinkingParam: 1024},
+					{name: "budget-8192", thinkingParam: 8192},
+					{name: "budget-16384", thinkingParam: 16384},
+				}
+			}
+
+			for _, to := range toProtocols {
+				if from == to {
+					continue
+				}
+				t.Logf("═══════════════════════════════════════════════════════════════════════════════")
+				t.Logf("  RAW PAYLOAD: %s -> %s | model: %s", from, to, model)
+				t.Logf("═══════════════════════════════════════════════════════════════════════════════")
+
+				for _, cs := range cases {
+					from := from
+					to := to
+					cs := cs
+					testName := fmt.Sprintf("raw/%s->%s/%s/%s", from, to, model, cs.name)
+					t.Run(testName, func(t *testing.T) {
+						expectPresent, expectValue, expectErr := func() (bool, string, bool) {
+							if cs.thinkingParam == nil {
+								// No thinking param provided
+								if to == "codex" && from != "openai-response" {
+									// Codex translators default to medium
+									if supportsThinking && usesLevels {
+										return true, "medium", false
+									}
+								}
+								return false, "", false
+							}
+							if !supportsThinking {
+								return false, "", false
+							}
+
+							switch to {
+							case "gemini":
+								// Gemini expects numeric budget
+								if budget, ok := cs.thinkingParam.(int); ok {
+									norm := util.NormalizeThinkingBudget(model, budget)
+									return true, fmt.Sprintf("%d", norm), false
+								}
+								if effort, ok := cs.thinkingParam.(string); ok && effort != "" {
+									if b, okB := util.ThinkingEffortToBudget(model, effort); okB {
+										return true, fmt.Sprintf("%d", b), false
+									}
+								}
+								return false, "", false
+							case "claude":
+								// Claude expects numeric budget
+								if budget, ok := cs.thinkingParam.(int); ok && budget > 0 {
+									norm := util.NormalizeThinkingBudget(model, budget)
+									return true, fmt.Sprintf("%d", norm), false
+								}
+								if effort, ok := cs.thinkingParam.(string); ok && effort != "" {
+									if b, okB := util.ThinkingEffortToBudget(model, effort); okB && b > 0 {
+										return true, fmt.Sprintf("%d", b), false
+									}
+								}
+								return false, "", false
+							case "openai":
+								if !usesLevels {
+									return false, "", false
+								}
+								if effort, ok := cs.thinkingParam.(string); ok && effort != "" {
+									if normalized, okN := util.NormalizeReasoningEffortLevel(model, effort); okN {
+										return true, normalized, false
+									}
+									return false, "", true // invalid level
+								}
+								if budget, ok := cs.thinkingParam.(int); ok {
+									if mapped, okM := util.OpenAIThinkingBudgetToEffort(model, budget); okM && mapped != "" {
+										return true, mapped, false
+									}
+								}
+								return false, "", false
+							case "codex":
+								if !usesLevels {
+									return false, "", false
+								}
+								if effort, ok := cs.thinkingParam.(string); ok && effort != "" {
+									if normalized, okN := util.NormalizeReasoningEffortLevel(model, effort); okN {
+										return true, normalized, false
+									}
+									return false, "", true
+								}
+								if budget, ok := cs.thinkingParam.(int); ok {
+									if mapped, okM := util.OpenAIThinkingBudgetToEffort(model, budget); okM && mapped != "" {
+										return true, mapped, false
+									}
+								}
+								// thinkingParam was non-nil but couldn't map - no default medium
+								return false, "", false
+							}
+							return false, "", false
+						}()
+
+						body, err := buildBodyForProtocolWithRawThinking(t, from, to, model, cs.thinkingParam)
+						actualPresent, actualValue := func() (bool, string) {
+							path := ""
+							switch to {
+							case "gemini":
+								path = "generationConfig.thinkingConfig.thinkingBudget"
+							case "claude":
+								path = "thinking.budget_tokens"
+							case "openai":
+								path = "reasoning_effort"
+							case "codex":
+								path = "reasoning.effort"
+							}
+							if path == "" {
+								return false, ""
+							}
+							val := gjson.GetBytes(body, path)
+							if to == "codex" && !val.Exists() {
+								reasoning := gjson.GetBytes(body, "reasoning")
+								if reasoning.Exists() {
+									val = reasoning.Get("effort")
+								}
+							}
+							if !val.Exists() {
+								return false, ""
+							}
+							if val.Type == gjson.Number {
+								return true, fmt.Sprintf("%d", val.Int())
+							}
+							return true, val.String()
+						}()
+
+						t.Logf("from=%s to=%s model=%s param=%v present(expect=%v got=%v) value(expect=%s got=%s) err(expect=%v got=%v) body=%s",
+							from, to, model, cs.thinkingParam, expectPresent, actualPresent, expectValue, actualValue, expectErr, err != nil, string(body))
+
+						if expectErr {
+							if err == nil {
+								t.Fatalf("expected validation error but got none, body=%s", string(body))
+							}
+							return
+						}
+						if err != nil {
+							t.Fatalf("unexpected error: %v body=%s", err, string(body))
+						}
+
+						if expectPresent != actualPresent {
+							t.Fatalf("presence mismatch: expect %v got %v body=%s", expectPresent, actualPresent, string(body))
+						}
+						if expectPresent && expectValue != actualValue {
+							t.Fatalf("value mismatch: expect %s got %s body=%s", expectValue, actualValue, string(body))
+						}
+					})
+				}
+			}
+		}
+	}
+}
+
 func TestOpenAIThinkingBudgetToEffortRanges(t *testing.T) {
 	cleanup := registerCoreModels(t)
 	defer cleanup()

From 8496cc24446d2ac51e9ce2d938a3768f978a5835 Mon Sep 17 00:00:00 2001
From: hkfires <10558748+hkfires@users.noreply.github.com>
Date: Sun, 14 Dec 2025 13:00:34 +0800
Subject: [PATCH 2/9] test(thinking): cover openai-compat reasoning passthrough

---
 test/thinking_conversion_test.go | 166 ++++++++++++++++++++++---------
 1 file changed, 117 insertions(+), 49 deletions(-)

diff --git a/test/thinking_conversion_test.go b/test/thinking_conversion_test.go
index a1462611b..60f4a02ed 100644
--- a/test/thinking_conversion_test.go
+++ b/test/thinking_conversion_test.go
@@ -24,6 +24,13 @@ type statusErr struct {
 
 func (e statusErr) Error() string { return e.msg }
 
+// isOpenAICompatModel returns true if the model is configured as an OpenAI-compatible
+// model that should have reasoning effort passed through even if not in registry.
+// This simulates the allowCompat behavior from OpenAICompatExecutor.
+func isOpenAICompatModel(model string) bool {
+	return model == "custom-thinking-model"
+}
+
 // registerCoreModels loads representative models across providers into the registry
 // so NormalizeThinkingBudget and level validation use real ranges.
 func registerCoreModels(t *testing.T) func() {
@@ -34,11 +41,28 @@ func registerCoreModels(t *testing.T) func() {
 	reg.RegisterClient(uid+"-claude", "claude", registry.GetClaudeModels())
 	reg.RegisterClient(uid+"-openai", "codex", registry.GetOpenAIModels())
 	reg.RegisterClient(uid+"-qwen", "qwen", registry.GetQwenModels())
+	// Custom openai-compatible model with forced thinking suffix passthrough.
+	// No Thinking field - simulates an external model added via openai-compat
+	// where the registry has no knowledge of its thinking capabilities.
+	// The allowCompat flag should preserve reasoning effort for such models.
+	customOpenAIModels := []*registry.ModelInfo{
+		{
+			ID:          "custom-thinking-model",
+			Object:      "model",
+			Created:     1700000000,
+			OwnedBy:     "custom-provider",
+			Type:        "openai",
+			DisplayName: "Custom Thinking Model",
+			Description: "OpenAI-compatible model with forced thinking suffix support",
+		},
+	}
+	reg.RegisterClient(uid+"-custom-openai", "codex", customOpenAIModels)
 	return func() {
 		reg.UnregisterClient(uid + "-gemini")
 		reg.UnregisterClient(uid + "-claude")
 		reg.UnregisterClient(uid + "-openai")
 		reg.UnregisterClient(uid + "-qwen")
+		reg.UnregisterClient(uid + "-custom-openai")
 	}
 }
 
@@ -70,24 +94,24 @@ func applyThinkingMetadataLocal(payload []byte, metadata map[string]any, model s
 }
 
 // applyReasoningEffortMetadataLocal mirrors executor.applyReasoningEffortMetadata.
-func applyReasoningEffortMetadataLocal(payload []byte, metadata map[string]any, model, field string) []byte {
+func applyReasoningEffortMetadataLocal(payload []byte, metadata map[string]any, model, field string, allowCompat bool) []byte {
 	if len(metadata) == 0 {
 		return payload
 	}
-	if !util.ModelSupportsThinking(model) {
-		return payload
-	}
 	if field == "" {
 		return payload
 	}
+	if !util.ModelSupportsThinking(model) && !allowCompat {
+		return payload
+	}
 	if effort, ok := util.ReasoningEffortFromMetadata(metadata); ok && effort != "" {
-		if util.ModelUsesThinkingLevels(model) {
+		if util.ModelUsesThinkingLevels(model) || allowCompat {
 			if updated, err := sjson.SetBytes(payload, field, effort); err == nil {
 				return updated
 			}
 		}
 	}
-	if util.ModelUsesThinkingLevels(model) {
+	if util.ModelUsesThinkingLevels(model) || allowCompat {
 		if budget, _, _, matched := util.ThinkingFromMetadata(metadata); matched && budget != nil {
 			if effort, ok := util.OpenAIThinkingBudgetToEffort(model, *budget); ok && effort != "" {
 				if updated, err := sjson.SetBytes(payload, field, effort); err == nil {
@@ -100,12 +124,17 @@ func applyReasoningEffortMetadataLocal(payload []byte, metadata map[string]any,
 }
 
 // normalizeThinkingConfigLocal mirrors executor.normalizeThinkingConfig.
-func normalizeThinkingConfigLocal(payload []byte, model string) []byte {
+// When allowCompat is true, reasoning fields are preserved even for models
+// without thinking support (simulating openai-compat passthrough behavior).
+func normalizeThinkingConfigLocal(payload []byte, model string, allowCompat bool) []byte {
 	if len(payload) == 0 || model == "" {
 		return payload
 	}
 
 	if !util.ModelSupportsThinking(model) {
+		if allowCompat {
+			return payload
+		}
 		return stripThinkingFieldsLocal(payload, false)
 	}
 
@@ -187,8 +216,8 @@ func validateThinkingConfigLocal(payload []byte, model string) error {
 }
 
 // normalizeCodexPayload mirrors codex_executor's reasoning + streaming tweaks.
-func normalizeCodexPayload(body []byte, upstreamModel string) ([]byte, error) {
-	body = normalizeThinkingConfigLocal(body, upstreamModel)
+func normalizeCodexPayload(body []byte, upstreamModel string, allowCompat bool) ([]byte, error) {
+	body = normalizeThinkingConfigLocal(body, upstreamModel, allowCompat)
 	if err := validateThinkingConfigLocal(body, upstreamModel); err != nil {
 		return body, err
 	}
@@ -216,6 +245,7 @@ func buildBodyForProtocol(t *testing.T, fromProtocol, toProtocol, modelWithSuffi
 	)
 
 	var err error
+	allowCompat := isOpenAICompatModel(normalizedModel)
 	switch toProtocol {
 	case "gemini":
 		body = applyThinkingMetadataLocal(body, metadata, normalizedModel)
@@ -227,13 +257,14 @@ func buildBodyForProtocol(t *testing.T, fromProtocol, toProtocol, modelWithSuffi
 			body = util.ApplyClaudeThinkingConfig(body, budget)
 		}
 	case "openai":
-		body = applyReasoningEffortMetadataLocal(body, metadata, normalizedModel, "reasoning_effort")
-		body = normalizeThinkingConfigLocal(body, upstreamModel)
+		body = applyReasoningEffortMetadataLocal(body, metadata, normalizedModel, "reasoning_effort", allowCompat)
+		body = normalizeThinkingConfigLocal(body, upstreamModel, allowCompat)
 		err = validateThinkingConfigLocal(body, upstreamModel)
 	case "codex": // OpenAI responses / codex
-		body = applyReasoningEffortMetadataLocal(body, metadata, normalizedModel, "reasoning.effort")
+		// Codex does not support allowCompat; always use false.
+		body = applyReasoningEffortMetadataLocal(body, metadata, normalizedModel, "reasoning.effort", false)
 		// Mirror CodexExecutor final normalization and model override so tests log the final body.
-		body, err = normalizeCodexPayload(body, upstreamModel)
+		body, err = normalizeCodexPayload(body, upstreamModel, false)
 	default:
 	}
 
@@ -290,9 +321,10 @@ func TestThinkingConversionsAcrossProtocolsAndModels(t *testing.T) {
 	defer cleanup()
 
 	models := []string{
-		"gpt-5",             // supports levels (low/medium/high)
-		"gemini-2.5-pro",    // supports numeric budget
-		"qwen3-coder-flash", // no thinking support
+		"gpt-5",                 // supports levels (low/medium/high)
+		"gemini-2.5-pro",        // supports numeric budget
+		"qwen3-coder-flash",     // no thinking support
+		"custom-thinking-model", // openai-compatible model with forced thinking suffix
 	}
 	fromProtocols := []string{"openai", "claude", "gemini", "openai-response"}
 	toProtocols := []string{"gemini", "claude", "openai", "codex"}
@@ -404,7 +436,22 @@ func TestThinkingConversionsAcrossProtocolsAndModels(t *testing.T) {
 								}
 								return true, fmt.Sprintf("%d", *budget), false
 							case "openai":
-								if !util.ModelSupportsThinking(normalizedModel) {
+								allowCompat := isOpenAICompatModel(normalizedModel)
+								if !util.ModelSupportsThinking(normalizedModel) && !allowCompat {
+									return false, "", false
+								}
+								// For allowCompat models, pass through effort directly without validation
+								if allowCompat {
+									effort, ok := util.ReasoningEffortFromMetadata(metadata)
+									if ok && strings.TrimSpace(effort) != "" {
+										return true, strings.ToLower(strings.TrimSpace(effort)), false
+									}
+									// Check numeric budget fallback for allowCompat
+									if budget, _, _, matched := util.ThinkingFromMetadata(metadata); matched && budget != nil {
+										if mapped, okMap := util.OpenAIThinkingBudgetToEffort(normalizedModel, *budget); okMap && mapped != "" {
+											return true, mapped, false
+										}
+									}
 									return false, "", false
 								}
 								if !util.ModelUsesThinkingLevels(normalizedModel) {
@@ -429,14 +476,8 @@ func TestThinkingConversionsAcrossProtocolsAndModels(t *testing.T) {
 								}
 								return false, "", true // validation would fail
 							case "codex":
-								if !util.ModelSupportsThinking(normalizedModel) {
-									return false, "", false
-								}
-								if !util.ModelUsesThinkingLevels(normalizedModel) {
-									// Non-levels models don't support effort strings in codex
-									if from != "openai-response" {
-										return false, "", false
-									}
+								// Codex does not support allowCompat; require thinking-capable level models.
+								if !util.ModelSupportsThinking(normalizedModel) || !util.ModelUsesThinkingLevels(normalizedModel) {
 									return false, "", false
 								}
 								effort, ok := util.ReasoningEffortFromMetadata(metadata)
@@ -574,6 +615,7 @@ func buildBodyForProtocolWithRawThinking(t *testing.T, fromProtocol, toProtocol,
 	)
 
 	var err error
+	allowCompat := isOpenAICompatModel(model)
 	switch toProtocol {
 	case "gemini":
 		body = util.ApplyDefaultThinkingIfNeeded(model, body)
@@ -583,10 +625,11 @@ func buildBodyForProtocolWithRawThinking(t *testing.T, fromProtocol, toProtocol,
 		// For raw payload, Claude thinking is passed through by translator
 		// No additional processing needed as thinking is already in body
 	case "openai":
-		body = normalizeThinkingConfigLocal(body, model)
+		body = normalizeThinkingConfigLocal(body, model, allowCompat)
 		err = validateThinkingConfigLocal(body, model)
 	case "codex":
-		body, err = normalizeCodexPayload(body, model)
+		// Codex does not support allowCompat; always use false.
+		body, err = normalizeCodexPayload(body, model, false)
 	}
 
 	body, _ = sjson.SetBytes(body, "model", model)
@@ -599,9 +642,10 @@ func TestRawPayloadThinkingConversions(t *testing.T) {
 	defer cleanup()
 
 	models := []string{
-		"gpt-5",             // supports levels (low/medium/high)
-		"gemini-2.5-pro",    // supports numeric budget
-		"qwen3-coder-flash", // no thinking support
+		"gpt-5",                 // supports levels (low/medium/high)
+		"gemini-2.5-pro",        // supports numeric budget
+		"qwen3-coder-flash",     // no thinking support
+		"custom-thinking-model", // openai-compatible model with forced thinking suffix
 	}
 	fromProtocols := []string{"openai", "claude", "gemini", "openai-response"}
 	toProtocols := []string{"gemini", "claude", "openai", "codex"}
@@ -614,6 +658,7 @@ func TestRawPayloadThinkingConversions(t *testing.T) {
 	for _, model := range models {
 		supportsThinking := util.ModelSupportsThinking(model)
 		usesLevels := util.ModelUsesThinkingLevels(model)
+		allowCompat := isOpenAICompatModel(model)
 
 		for _, from := range fromProtocols {
 			var cases []scenario
@@ -624,7 +669,7 @@ func TestRawPayloadThinkingConversions(t *testing.T) {
 					{name: "effort-low", thinkingParam: "low"},
 					{name: "effort-medium", thinkingParam: "medium"},
 					{name: "effort-high", thinkingParam: "high"},
-					{name: "effort-invalid-xhigh", thinkingParam: "xhigh"},
+					{name: "effort-xhigh", thinkingParam: "xhigh"},
 					{name: "effort-invalid-foo", thinkingParam: "foo"},
 				}
 			case "gemini":
@@ -659,46 +704,65 @@ func TestRawPayloadThinkingConversions(t *testing.T) {
 					t.Run(testName, func(t *testing.T) {
 						expectPresent, expectValue, expectErr := func() (bool, string, bool) {
 							if cs.thinkingParam == nil {
-								// No thinking param provided
-								if to == "codex" && from != "openai-response" {
-									// Codex translators default to medium
-									if supportsThinking && usesLevels {
-										return true, "medium", false
-									}
+								if to == "codex" && from != "openai-response" && supportsThinking && usesLevels {
+									// Codex translators default reasoning.effort to "medium" for thinking-capable level models
+									return true, "medium", false
 								}
 								return false, "", false
 							}
-							if !supportsThinking {
-								return false, "", false
-							}
 
 							switch to {
 							case "gemini":
-								// Gemini expects numeric budget
+								if !supportsThinking || usesLevels {
+									return false, "", false
+								}
+								// Gemini expects numeric budget (only for non-level models)
 								if budget, ok := cs.thinkingParam.(int); ok {
 									norm := util.NormalizeThinkingBudget(model, budget)
 									return true, fmt.Sprintf("%d", norm), false
 								}
+								// Convert effort level to budget for non-level models only
 								if effort, ok := cs.thinkingParam.(string); ok && effort != "" {
-									if b, okB := util.ThinkingEffortToBudget(model, effort); okB {
-										return true, fmt.Sprintf("%d", b), false
+									if budget, okB := util.ThinkingEffortToBudget(model, effort); okB {
+										// ThinkingEffortToBudget already returns normalized budget
+										return true, fmt.Sprintf("%d", budget), false
 									}
+									// Invalid effort maps to default/fallback
+									return true, fmt.Sprintf("%d", -1), false
 								}
 								return false, "", false
 							case "claude":
-								// Claude expects numeric budget
+								if !supportsThinking || usesLevels {
+									return false, "", false
+								}
+								// Claude expects numeric budget (only for non-level models)
 								if budget, ok := cs.thinkingParam.(int); ok && budget > 0 {
 									norm := util.NormalizeThinkingBudget(model, budget)
 									return true, fmt.Sprintf("%d", norm), false
 								}
+								// Convert effort level to budget for non-level models only
 								if effort, ok := cs.thinkingParam.(string); ok && effort != "" {
-									if b, okB := util.ThinkingEffortToBudget(model, effort); okB && b > 0 {
-										return true, fmt.Sprintf("%d", b), false
+									if budget, okB := util.ThinkingEffortToBudget(model, effort); okB {
+										// ThinkingEffortToBudget already returns normalized budget
+										return true, fmt.Sprintf("%d", budget), false
 									}
+									// Invalid effort - claude may still set thinking with type:enabled
+									return true, "", false
 								}
 								return false, "", false
 							case "openai":
-								if !usesLevels {
+								if allowCompat {
+									if effort, ok := cs.thinkingParam.(string); ok && strings.TrimSpace(effort) != "" {
+										return true, strings.ToLower(strings.TrimSpace(effort)), false
+									}
+									if budget, ok := cs.thinkingParam.(int); ok {
+										if mapped, okM := util.OpenAIThinkingBudgetToEffort(model, budget); okM && mapped != "" {
+											return true, mapped, false
+										}
+									}
+									return false, "", false
+								}
+								if !supportsThinking || !usesLevels {
 									return false, "", false
 								}
 								if effort, ok := cs.thinkingParam.(string); ok && effort != "" {
@@ -714,7 +778,8 @@ func TestRawPayloadThinkingConversions(t *testing.T) {
 								}
 								return false, "", false
 							case "codex":
-								if !usesLevels {
+								// Codex does not support allowCompat; require thinking-capable level models.
+								if !supportsThinking || !usesLevels {
 									return false, "", false
 								}
 								if effort, ok := cs.thinkingParam.(string); ok && effort != "" {
@@ -728,7 +793,10 @@ func TestRawPayloadThinkingConversions(t *testing.T) {
 										return true, mapped, false
 									}
 								}
-								// thinkingParam was non-nil but couldn't map - no default medium
+								if from != "openai-response" {
+									// Codex translators default reasoning.effort to "medium" for thinking-capable models
+									return true, "medium", false
+								}
 								return false, "", false
 							}
 							return false, "", false

From e8976f9898c4ad6c09690678748751e062926392 Mon Sep 17 00:00:00 2001
From: hkfires <10558748+hkfires@users.noreply.github.com>
Date: Sun, 14 Dec 2025 13:18:35 +0800
Subject: [PATCH 3/9] fix(thinking): map budgets to effort for level models

---
 .../antigravity_openai_request.go              |  5 +++--
 .../claude/gemini/claude_gemini_request.go     |  3 ++-
 .../codex/claude/codex_claude_request.go       | 18 +++++++++++++++++-
 .../codex/gemini/codex_gemini_request.go       | 17 ++++++++++++++++-
 .../gemini-cli_openai_request.go               |  5 +++--
 .../gemini/claude/gemini_claude_request.go     |  3 ++-
 .../chat-completions/gemini_openai_request.go  |  7 +++++--
 .../gemini_openai-responses_request.go         |  6 ++++--
 .../openai/claude/openai_claude_request.go     | 13 +++++++++++++
 .../openai/gemini/openai_gemini_request.go     | 13 +++++++++++++
 10 files changed, 78 insertions(+), 12 deletions(-)

diff --git a/internal/translator/antigravity/openai/chat-completions/antigravity_openai_request.go b/internal/translator/antigravity/openai/chat-completions/antigravity_openai_request.go
index 717f88f7b..251357bb1 100644
--- a/internal/translator/antigravity/openai/chat-completions/antigravity_openai_request.go
+++ b/internal/translator/antigravity/openai/chat-completions/antigravity_openai_request.go
@@ -39,7 +39,7 @@ func ConvertOpenAIRequestToAntigravity(modelName string, inputRawJSON []byte, _
 	// Note: OpenAI official fields take precedence over extra_body.google.thinking_config
 	re := gjson.GetBytes(rawJSON, "reasoning_effort")
 	hasOfficialThinking := re.Exists()
-	if hasOfficialThinking && util.ModelSupportsThinking(modelName) {
+	if hasOfficialThinking && util.ModelSupportsThinking(modelName) && !util.ModelUsesThinkingLevels(modelName) {
 		switch re.String() {
 		case "none":
 			out, _ = sjson.DeleteBytes(out, "request.generationConfig.thinkingConfig.include_thoughts")
@@ -63,7 +63,8 @@ func ConvertOpenAIRequestToAntigravity(modelName string, inputRawJSON []byte, _
 	}
 
 	// Cherry Studio extension extra_body.google.thinking_config (effective only when official fields are absent)
-	if !hasOfficialThinking && util.ModelSupportsThinking(modelName) {
+	// Only apply for models that use numeric budgets, not discrete levels.
+	if !hasOfficialThinking && util.ModelSupportsThinking(modelName) && !util.ModelUsesThinkingLevels(modelName) {
 		if tc := gjson.GetBytes(rawJSON, "extra_body.google.thinking_config"); tc.Exists() && tc.IsObject() {
 			var setBudget bool
 			var budget int
diff --git a/internal/translator/claude/gemini/claude_gemini_request.go b/internal/translator/claude/gemini/claude_gemini_request.go
index 302c7d66b..780dd5f4c 100644
--- a/internal/translator/claude/gemini/claude_gemini_request.go
+++ b/internal/translator/claude/gemini/claude_gemini_request.go
@@ -114,7 +114,8 @@ func ConvertGeminiRequestToClaude(modelName string, inputRawJSON []byte, stream
 			}
 		}
 		// Include thoughts configuration for reasoning process visibility
-		if thinkingConfig := genConfig.Get("thinkingConfig"); thinkingConfig.Exists() && thinkingConfig.IsObject() {
+		// Only apply for models that use numeric budgets, not discrete levels.
+		if thinkingConfig := genConfig.Get("thinkingConfig"); thinkingConfig.Exists() && thinkingConfig.IsObject() && !util.ModelUsesThinkingLevels(modelName) {
 			if includeThoughts := thinkingConfig.Get("include_thoughts"); includeThoughts.Exists() {
 				if includeThoughts.Type == gjson.True {
 					out, _ = sjson.Set(out, "thinking.type", "enabled")
diff --git a/internal/translator/codex/claude/codex_claude_request.go b/internal/translator/codex/claude/codex_claude_request.go
index 3c86e3cf7..414efa89c 100644
--- a/internal/translator/codex/claude/codex_claude_request.go
+++ b/internal/translator/codex/claude/codex_claude_request.go
@@ -12,6 +12,7 @@ import (
 	"strings"
 
 	"github.com/router-for-me/CLIProxyAPI/v6/internal/misc"
+	"github.com/router-for-me/CLIProxyAPI/v6/internal/util"
 	"github.com/tidwall/gjson"
 	"github.com/tidwall/sjson"
 )
@@ -214,7 +215,22 @@ func ConvertClaudeRequestToCodex(modelName string, inputRawJSON []byte, _ bool)
 
 	// Add additional configuration parameters for the Codex API.
 	template, _ = sjson.Set(template, "parallel_tool_calls", true)
-	template, _ = sjson.Set(template, "reasoning.effort", "medium")
+
+	// Convert thinking.budget_tokens to reasoning.effort for level-based models
+	reasoningEffort := "medium" // default
+	if thinking := rootResult.Get("thinking"); thinking.Exists() && thinking.IsObject() {
+		if thinking.Get("type").String() == "enabled" {
+			if util.ModelUsesThinkingLevels(modelName) {
+				if budgetTokens := thinking.Get("budget_tokens"); budgetTokens.Exists() {
+					budget := int(budgetTokens.Int())
+					if effort, ok := util.OpenAIThinkingBudgetToEffort(modelName, budget); ok && effort != "" {
+						reasoningEffort = effort
+					}
+				}
+			}
+		}
+	}
+	template, _ = sjson.Set(template, "reasoning.effort", reasoningEffort)
 	template, _ = sjson.Set(template, "reasoning.summary", "auto")
 	template, _ = sjson.Set(template, "stream", true)
 	template, _ = sjson.Set(template, "store", false)
diff --git a/internal/translator/codex/gemini/codex_gemini_request.go b/internal/translator/codex/gemini/codex_gemini_request.go
index 427fd9ad5..c2dacd3e2 100644
--- a/internal/translator/codex/gemini/codex_gemini_request.go
+++ b/internal/translator/codex/gemini/codex_gemini_request.go
@@ -245,7 +245,22 @@ func ConvertGeminiRequestToCodex(modelName string, inputRawJSON []byte, _ bool)
 
 	// Fixed flags aligning with Codex expectations
 	out, _ = sjson.Set(out, "parallel_tool_calls", true)
-	out, _ = sjson.Set(out, "reasoning.effort", "medium")
+
+	// Convert thinkingBudget to reasoning.effort for level-based models
+	reasoningEffort := "medium" // default
+	if genConfig := root.Get("generationConfig"); genConfig.Exists() {
+		if thinkingConfig := genConfig.Get("thinkingConfig"); thinkingConfig.Exists() && thinkingConfig.IsObject() {
+			if util.ModelUsesThinkingLevels(modelName) {
+				if thinkingBudget := thinkingConfig.Get("thinkingBudget"); thinkingBudget.Exists() {
+					budget := int(thinkingBudget.Int())
+					if effort, ok := util.OpenAIThinkingBudgetToEffort(modelName, budget); ok && effort != "" {
+						reasoningEffort = effort
+					}
+				}
+			}
+		}
+	}
+	out, _ = sjson.Set(out, "reasoning.effort", reasoningEffort)
 	out, _ = sjson.Set(out, "reasoning.summary", "auto")
 	out, _ = sjson.Set(out, "stream", true)
 	out, _ = sjson.Set(out, "store", false)
diff --git a/internal/translator/gemini-cli/openai/chat-completions/gemini-cli_openai_request.go b/internal/translator/gemini-cli/openai/chat-completions/gemini-cli_openai_request.go
index b52bf224a..c7560d2fe 100644
--- a/internal/translator/gemini-cli/openai/chat-completions/gemini-cli_openai_request.go
+++ b/internal/translator/gemini-cli/openai/chat-completions/gemini-cli_openai_request.go
@@ -39,7 +39,7 @@ func ConvertOpenAIRequestToGeminiCLI(modelName string, inputRawJSON []byte, _ bo
 	// Note: OpenAI official fields take precedence over extra_body.google.thinking_config
 	re := gjson.GetBytes(rawJSON, "reasoning_effort")
 	hasOfficialThinking := re.Exists()
-	if hasOfficialThinking && util.ModelSupportsThinking(modelName) {
+	if hasOfficialThinking && util.ModelSupportsThinking(modelName) && !util.ModelUsesThinkingLevels(modelName) {
 		switch re.String() {
 		case "none":
 			out, _ = sjson.DeleteBytes(out, "request.generationConfig.thinkingConfig.include_thoughts")
@@ -63,7 +63,8 @@ func ConvertOpenAIRequestToGeminiCLI(modelName string, inputRawJSON []byte, _ bo
 	}
 
 	// Cherry Studio extension extra_body.google.thinking_config (effective only when official fields are absent)
-	if !hasOfficialThinking && util.ModelSupportsThinking(modelName) {
+	// Only apply for models that use numeric budgets, not discrete levels.
+	if !hasOfficialThinking && util.ModelSupportsThinking(modelName) && !util.ModelUsesThinkingLevels(modelName) {
 		if tc := gjson.GetBytes(rawJSON, "extra_body.google.thinking_config"); tc.Exists() && tc.IsObject() {
 			var setBudget bool
 			var budget int
diff --git a/internal/translator/gemini/claude/gemini_claude_request.go b/internal/translator/gemini/claude/gemini_claude_request.go
index 45a5a88f4..f626a581b 100644
--- a/internal/translator/gemini/claude/gemini_claude_request.go
+++ b/internal/translator/gemini/claude/gemini_claude_request.go
@@ -154,7 +154,8 @@ func ConvertClaudeRequestToGemini(modelName string, inputRawJSON []byte, _ bool)
 	}
 
 	// Map Anthropic thinking -> Gemini thinkingBudget/include_thoughts when enabled
-	if t := gjson.GetBytes(rawJSON, "thinking"); t.Exists() && t.IsObject() && util.ModelSupportsThinking(modelName) {
+	// Only apply for models that use numeric budgets, not discrete levels.
+	if t := gjson.GetBytes(rawJSON, "thinking"); t.Exists() && t.IsObject() && util.ModelSupportsThinking(modelName) && !util.ModelUsesThinkingLevels(modelName) {
 		if t.Get("type").String() == "enabled" {
 			if b := t.Get("budget_tokens"); b.Exists() && b.Type == gjson.Number {
 				budget := int(b.Int())
diff --git a/internal/translator/gemini/openai/chat-completions/gemini_openai_request.go b/internal/translator/gemini/openai/chat-completions/gemini_openai_request.go
index 8c48a5b3c..e754d0f16 100644
--- a/internal/translator/gemini/openai/chat-completions/gemini_openai_request.go
+++ b/internal/translator/gemini/openai/chat-completions/gemini_openai_request.go
@@ -37,9 +37,11 @@ func ConvertOpenAIRequestToGemini(modelName string, inputRawJSON []byte, _ bool)
 
 	// Reasoning effort -> thinkingBudget/include_thoughts
 	// Note: OpenAI official fields take precedence over extra_body.google.thinking_config
+	// Only convert for models that use numeric budgets (not discrete levels) to avoid
+	// incorrectly applying thinkingBudget for level-based models like gpt-5.
 	re := gjson.GetBytes(rawJSON, "reasoning_effort")
 	hasOfficialThinking := re.Exists()
-	if hasOfficialThinking && util.ModelSupportsThinking(modelName) {
+	if hasOfficialThinking && util.ModelSupportsThinking(modelName) && !util.ModelUsesThinkingLevels(modelName) {
 		switch re.String() {
 		case "none":
 			out, _ = sjson.DeleteBytes(out, "generationConfig.thinkingConfig.include_thoughts")
@@ -63,7 +65,8 @@ func ConvertOpenAIRequestToGemini(modelName string, inputRawJSON []byte, _ bool)
 	}
 
 	// Cherry Studio extension extra_body.google.thinking_config (effective only when official fields are absent)
-	if !hasOfficialThinking && util.ModelSupportsThinking(modelName) {
+	// Only apply for models that use numeric budgets, not discrete levels.
+	if !hasOfficialThinking && util.ModelSupportsThinking(modelName) && !util.ModelUsesThinkingLevels(modelName) {
 		if tc := gjson.GetBytes(rawJSON, "extra_body.google.thinking_config"); tc.Exists() && tc.IsObject() {
 			var setBudget bool
 			var budget int
diff --git a/internal/translator/gemini/openai/responses/gemini_openai-responses_request.go b/internal/translator/gemini/openai/responses/gemini_openai-responses_request.go
index bdf597853..b6f471d9e 100644
--- a/internal/translator/gemini/openai/responses/gemini_openai-responses_request.go
+++ b/internal/translator/gemini/openai/responses/gemini_openai-responses_request.go
@@ -389,8 +389,9 @@ func ConvertOpenAIResponsesRequestToGemini(modelName string, inputRawJSON []byte
 	}
 
 	// OpenAI official reasoning fields take precedence
+	// Only convert for models that use numeric budgets (not discrete levels).
 	hasOfficialThinking := root.Get("reasoning.effort").Exists()
-	if hasOfficialThinking && util.ModelSupportsThinking(modelName) {
+	if hasOfficialThinking && util.ModelSupportsThinking(modelName) && !util.ModelUsesThinkingLevels(modelName) {
 		reasoningEffort := root.Get("reasoning.effort")
 		switch reasoningEffort.String() {
 		case "none":
@@ -418,7 +419,8 @@ func ConvertOpenAIResponsesRequestToGemini(modelName string, inputRawJSON []byte
 	}
 
 	// Cherry Studio extension (applies only when official fields are missing)
-	if !hasOfficialThinking && util.ModelSupportsThinking(modelName) {
+	// Only apply for models that use numeric budgets, not discrete levels.
+	if !hasOfficialThinking && util.ModelSupportsThinking(modelName) && !util.ModelUsesThinkingLevels(modelName) {
 		if tc := root.Get("extra_body.google.thinking_config"); tc.Exists() && tc.IsObject() {
 			var setBudget bool
 			var budget int
diff --git a/internal/translator/openai/claude/openai_claude_request.go b/internal/translator/openai/claude/openai_claude_request.go
index 3521b2e5f..0ee8c2253 100644
--- a/internal/translator/openai/claude/openai_claude_request.go
+++ b/internal/translator/openai/claude/openai_claude_request.go
@@ -10,6 +10,7 @@ import (
 	"encoding/json"
 	"strings"
 
+	"github.com/router-for-me/CLIProxyAPI/v6/internal/util"
 	"github.com/tidwall/gjson"
 	"github.com/tidwall/sjson"
 )
@@ -60,6 +61,18 @@ func ConvertClaudeRequestToOpenAI(modelName string, inputRawJSON []byte, stream
 	// Stream
 	out, _ = sjson.Set(out, "stream", stream)
 
+	// Thinking: Convert Claude thinking.budget_tokens to OpenAI reasoning_effort
+	if thinking := root.Get("thinking"); thinking.Exists() && thinking.IsObject() {
+		if thinkingType := thinking.Get("type"); thinkingType.Exists() && thinkingType.String() == "enabled" {
+			if budgetTokens := thinking.Get("budget_tokens"); budgetTokens.Exists() {
+				budget := int(budgetTokens.Int())
+				if effort, ok := util.OpenAIThinkingBudgetToEffort(modelName, budget); ok && effort != "" {
+					out, _ = sjson.Set(out, "reasoning_effort", effort)
+				}
+			}
+		}
+	}
+
 	// Process messages and system
 	var messagesJSON = "[]"
 
diff --git a/internal/translator/openai/gemini/openai_gemini_request.go b/internal/translator/openai/gemini/openai_gemini_request.go
index deedf96a3..1fd20f823 100644
--- a/internal/translator/openai/gemini/openai_gemini_request.go
+++ b/internal/translator/openai/gemini/openai_gemini_request.go
@@ -13,6 +13,7 @@ import (
 	"math/big"
 	"strings"
 
+	"github.com/router-for-me/CLIProxyAPI/v6/internal/util"
 	"github.com/tidwall/gjson"
 	"github.com/tidwall/sjson"
 )
@@ -76,6 +77,18 @@ func ConvertGeminiRequestToOpenAI(modelName string, inputRawJSON []byte, stream
 				out, _ = sjson.Set(out, "stop", stops)
 			}
 		}
+
+		// Convert thinkingBudget to reasoning_effort for level-based models
+		if thinkingConfig := genConfig.Get("thinkingConfig"); thinkingConfig.Exists() && thinkingConfig.IsObject() {
+			if util.ModelUsesThinkingLevels(modelName) {
+				if thinkingBudget := thinkingConfig.Get("thinkingBudget"); thinkingBudget.Exists() {
+					budget := int(thinkingBudget.Int())
+					if effort, ok := util.OpenAIThinkingBudgetToEffort(modelName, budget); ok && effort != "" {
+						out, _ = sjson.Set(out, "reasoning_effort", effort)
+					}
+				}
+			}
+		}
 	}
 
 	// Stream parameter

From 716aa71f6ef54ab26efb8fc6231cbb0ddab2a267 Mon Sep 17 00:00:00 2001
From: hkfires <10558748+hkfires@users.noreply.github.com>
Date: Sun, 14 Dec 2025 14:45:01 +0800
Subject: [PATCH 4/9] fix(thinking): centralize reasoning_effort mapping

Move OpenAI `reasoning_effort` -> Gemini `thinkingConfig` budget logic into
shared helpers used by Gemini, Gemini CLI, and antigravity translators.

Normalize Claude thinking handling by preferring positive budgets, applying
budget token normalization, and gating by model support.

Always convert Gemini `thinkingBudget` back to OpenAI `reasoning_effort` to
support allowCompat models, and update tests for normalization behavior.
---
 .../antigravity_openai_request.go             | 21 +-------
 .../claude/gemini/claude_gemini_request.go    | 19 +++----
 .../gemini-cli_openai_request.go              | 21 +-------
 .../chat-completions/gemini_openai_request.go | 21 +-------
 .../gemini_openai-responses_request.go        | 24 +--------
 .../openai/gemini/openai_gemini_request.go    | 13 +++--
 internal/util/gemini_thinking.go              | 53 +++++++++++++++++++
 test/thinking_conversion_test.go              | 13 +++--
 8 files changed, 83 insertions(+), 102 deletions(-)

diff --git a/internal/translator/antigravity/openai/chat-completions/antigravity_openai_request.go b/internal/translator/antigravity/openai/chat-completions/antigravity_openai_request.go
index 251357bb1..2a4684e23 100644
--- a/internal/translator/antigravity/openai/chat-completions/antigravity_openai_request.go
+++ b/internal/translator/antigravity/openai/chat-completions/antigravity_openai_request.go
@@ -40,26 +40,7 @@ func ConvertOpenAIRequestToAntigravity(modelName string, inputRawJSON []byte, _
 	re := gjson.GetBytes(rawJSON, "reasoning_effort")
 	hasOfficialThinking := re.Exists()
 	if hasOfficialThinking && util.ModelSupportsThinking(modelName) && !util.ModelUsesThinkingLevels(modelName) {
-		switch re.String() {
-		case "none":
-			out, _ = sjson.DeleteBytes(out, "request.generationConfig.thinkingConfig.include_thoughts")
-			out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.thinkingBudget", 0)
-		case "auto":
-			out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.thinkingBudget", -1)
-			out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.include_thoughts", true)
-		case "low":
-			out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.thinkingBudget", 1024)
-			out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.include_thoughts", true)
-		case "medium":
-			out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.thinkingBudget", 8192)
-			out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.include_thoughts", true)
-		case "high":
-			out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.thinkingBudget", 32768)
-			out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.include_thoughts", true)
-		default:
-			out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.thinkingBudget", -1)
-			out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.include_thoughts", true)
-		}
+		out = util.ApplyReasoningEffortToGeminiCLI(out, re.String())
 	}
 
 	// Cherry Studio extension extra_body.google.thinking_config (effective only when official fields are absent)
diff --git a/internal/translator/claude/gemini/claude_gemini_request.go b/internal/translator/claude/gemini/claude_gemini_request.go
index 780dd5f4c..6518947bd 100644
--- a/internal/translator/claude/gemini/claude_gemini_request.go
+++ b/internal/translator/claude/gemini/claude_gemini_request.go
@@ -114,15 +114,16 @@ func ConvertGeminiRequestToClaude(modelName string, inputRawJSON []byte, stream
 			}
 		}
 		// Include thoughts configuration for reasoning process visibility
-		// Only apply for models that use numeric budgets, not discrete levels.
-		if thinkingConfig := genConfig.Get("thinkingConfig"); thinkingConfig.Exists() && thinkingConfig.IsObject() && !util.ModelUsesThinkingLevels(modelName) {
-			if includeThoughts := thinkingConfig.Get("include_thoughts"); includeThoughts.Exists() {
-				if includeThoughts.Type == gjson.True {
-					out, _ = sjson.Set(out, "thinking.type", "enabled")
-					if thinkingBudget := thinkingConfig.Get("thinkingBudget"); thinkingBudget.Exists() {
-						out, _ = sjson.Set(out, "thinking.budget_tokens", thinkingBudget.Int())
-					}
-				}
+		// Only apply for models that support thinking and use numeric budgets, not discrete levels.
+		if thinkingConfig := genConfig.Get("thinkingConfig"); thinkingConfig.Exists() && thinkingConfig.IsObject() && util.ModelSupportsThinking(modelName) && !util.ModelUsesThinkingLevels(modelName) {
+			// Check for thinkingBudget first - if present, enable thinking with budget
+			if thinkingBudget := thinkingConfig.Get("thinkingBudget"); thinkingBudget.Exists() && thinkingBudget.Int() > 0 {
+				out, _ = sjson.Set(out, "thinking.type", "enabled")
+				normalizedBudget := util.NormalizeThinkingBudget(modelName, int(thinkingBudget.Int()))
+				out, _ = sjson.Set(out, "thinking.budget_tokens", normalizedBudget)
+			} else if includeThoughts := thinkingConfig.Get("include_thoughts"); includeThoughts.Exists() && includeThoughts.Type == gjson.True {
+				// Fallback to include_thoughts if no budget specified
+				out, _ = sjson.Set(out, "thinking.type", "enabled")
 			}
 		}
 	}
diff --git a/internal/translator/gemini-cli/openai/chat-completions/gemini-cli_openai_request.go b/internal/translator/gemini-cli/openai/chat-completions/gemini-cli_openai_request.go
index c7560d2fe..dc5cf935a 100644
--- a/internal/translator/gemini-cli/openai/chat-completions/gemini-cli_openai_request.go
+++ b/internal/translator/gemini-cli/openai/chat-completions/gemini-cli_openai_request.go
@@ -40,26 +40,7 @@ func ConvertOpenAIRequestToGeminiCLI(modelName string, inputRawJSON []byte, _ bo
 	re := gjson.GetBytes(rawJSON, "reasoning_effort")
 	hasOfficialThinking := re.Exists()
 	if hasOfficialThinking && util.ModelSupportsThinking(modelName) && !util.ModelUsesThinkingLevels(modelName) {
-		switch re.String() {
-		case "none":
-			out, _ = sjson.DeleteBytes(out, "request.generationConfig.thinkingConfig.include_thoughts")
-			out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.thinkingBudget", 0)
-		case "auto":
-			out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.thinkingBudget", -1)
-			out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.include_thoughts", true)
-		case "low":
-			out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.thinkingBudget", 1024)
-			out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.include_thoughts", true)
-		case "medium":
-			out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.thinkingBudget", 8192)
-			out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.include_thoughts", true)
-		case "high":
-			out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.thinkingBudget", 32768)
-			out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.include_thoughts", true)
-		default:
-			out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.thinkingBudget", -1)
-			out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.include_thoughts", true)
-		}
+		out = util.ApplyReasoningEffortToGeminiCLI(out, re.String())
 	}
 
 	// Cherry Studio extension extra_body.google.thinking_config (effective only when official fields are absent)
diff --git a/internal/translator/gemini/openai/chat-completions/gemini_openai_request.go b/internal/translator/gemini/openai/chat-completions/gemini_openai_request.go
index e754d0f16..54843f0d7 100644
--- a/internal/translator/gemini/openai/chat-completions/gemini_openai_request.go
+++ b/internal/translator/gemini/openai/chat-completions/gemini_openai_request.go
@@ -42,26 +42,7 @@ func ConvertOpenAIRequestToGemini(modelName string, inputRawJSON []byte, _ bool)
 	re := gjson.GetBytes(rawJSON, "reasoning_effort")
 	hasOfficialThinking := re.Exists()
 	if hasOfficialThinking && util.ModelSupportsThinking(modelName) && !util.ModelUsesThinkingLevels(modelName) {
-		switch re.String() {
-		case "none":
-			out, _ = sjson.DeleteBytes(out, "generationConfig.thinkingConfig.include_thoughts")
-			out, _ = sjson.SetBytes(out, "generationConfig.thinkingConfig.thinkingBudget", 0)
-		case "auto":
-			out, _ = sjson.SetBytes(out, "generationConfig.thinkingConfig.thinkingBudget", -1)
-			out, _ = sjson.SetBytes(out, "generationConfig.thinkingConfig.include_thoughts", true)
-		case "low":
-			out, _ = sjson.SetBytes(out, "generationConfig.thinkingConfig.thinkingBudget", 1024)
-			out, _ = sjson.SetBytes(out, "generationConfig.thinkingConfig.include_thoughts", true)
-		case "medium":
-			out, _ = sjson.SetBytes(out, "generationConfig.thinkingConfig.thinkingBudget", 8192)
-			out, _ = sjson.SetBytes(out, "generationConfig.thinkingConfig.include_thoughts", true)
-		case "high":
-			out, _ = sjson.SetBytes(out, "generationConfig.thinkingConfig.thinkingBudget", 32768)
-			out, _ = sjson.SetBytes(out, "generationConfig.thinkingConfig.include_thoughts", true)
-		default:
-			out, _ = sjson.SetBytes(out, "generationConfig.thinkingConfig.thinkingBudget", -1)
-			out, _ = sjson.SetBytes(out, "generationConfig.thinkingConfig.include_thoughts", true)
-		}
+		out = util.ApplyReasoningEffortToGemini(out, re.String())
 	}
 
 	// Cherry Studio extension extra_body.google.thinking_config (effective only when official fields are absent)
diff --git a/internal/translator/gemini/openai/responses/gemini_openai-responses_request.go b/internal/translator/gemini/openai/responses/gemini_openai-responses_request.go
index b6f471d9e..1bf67e7f5 100644
--- a/internal/translator/gemini/openai/responses/gemini_openai-responses_request.go
+++ b/internal/translator/gemini/openai/responses/gemini_openai-responses_request.go
@@ -393,29 +393,7 @@ func ConvertOpenAIResponsesRequestToGemini(modelName string, inputRawJSON []byte
 	hasOfficialThinking := root.Get("reasoning.effort").Exists()
 	if hasOfficialThinking && util.ModelSupportsThinking(modelName) && !util.ModelUsesThinkingLevels(modelName) {
 		reasoningEffort := root.Get("reasoning.effort")
-		switch reasoningEffort.String() {
-		case "none":
-			out, _ = sjson.Set(out, "generationConfig.thinkingConfig.include_thoughts", false)
-			out, _ = sjson.Set(out, "generationConfig.thinkingConfig.thinkingBudget", 0)
-		case "auto":
-			out, _ = sjson.Set(out, "generationConfig.thinkingConfig.thinkingBudget", -1)
-			out, _ = sjson.Set(out, "generationConfig.thinkingConfig.include_thoughts", true)
-		case "minimal":
-			out, _ = sjson.Set(out, "generationConfig.thinkingConfig.thinkingBudget", 1024)
-			out, _ = sjson.Set(out, "generationConfig.thinkingConfig.include_thoughts", true)
-		case "low":
-			out, _ = sjson.Set(out, "generationConfig.thinkingConfig.thinkingBudget", 4096)
-			out, _ = sjson.Set(out, "generationConfig.thinkingConfig.include_thoughts", true)
-		case "medium":
-			out, _ = sjson.Set(out, "generationConfig.thinkingConfig.thinkingBudget", 8192)
-			out, _ = sjson.Set(out, "generationConfig.thinkingConfig.include_thoughts", true)
-		case "high":
-			out, _ = sjson.Set(out, "generationConfig.thinkingConfig.thinkingBudget", 32768)
-			out, _ = sjson.Set(out, "generationConfig.thinkingConfig.include_thoughts", true)
-		default:
-			out, _ = sjson.Set(out, "generationConfig.thinkingConfig.thinkingBudget", -1)
-			out, _ = sjson.Set(out, "generationConfig.thinkingConfig.include_thoughts", true)
-		}
+		out = string(util.ApplyReasoningEffortToGemini([]byte(out), reasoningEffort.String()))
 	}
 
 	// Cherry Studio extension (applies only when official fields are missing)
diff --git a/internal/translator/openai/gemini/openai_gemini_request.go b/internal/translator/openai/gemini/openai_gemini_request.go
index 1fd20f823..cca6ebf74 100644
--- a/internal/translator/openai/gemini/openai_gemini_request.go
+++ b/internal/translator/openai/gemini/openai_gemini_request.go
@@ -78,14 +78,13 @@ func ConvertGeminiRequestToOpenAI(modelName string, inputRawJSON []byte, stream
 			}
 		}
 
-		// Convert thinkingBudget to reasoning_effort for level-based models
+		// Convert thinkingBudget to reasoning_effort
+		// Always perform conversion to support allowCompat models that may not be in registry
 		if thinkingConfig := genConfig.Get("thinkingConfig"); thinkingConfig.Exists() && thinkingConfig.IsObject() {
-			if util.ModelUsesThinkingLevels(modelName) {
-				if thinkingBudget := thinkingConfig.Get("thinkingBudget"); thinkingBudget.Exists() {
-					budget := int(thinkingBudget.Int())
-					if effort, ok := util.OpenAIThinkingBudgetToEffort(modelName, budget); ok && effort != "" {
-						out, _ = sjson.Set(out, "reasoning_effort", effort)
-					}
+			if thinkingBudget := thinkingConfig.Get("thinkingBudget"); thinkingBudget.Exists() {
+				budget := int(thinkingBudget.Int())
+				if effort, ok := util.OpenAIThinkingBudgetToEffort(modelName, budget); ok && effort != "" {
+					out, _ = sjson.Set(out, "reasoning_effort", effort)
 				}
 			}
 		}
diff --git a/internal/util/gemini_thinking.go b/internal/util/gemini_thinking.go
index a89aba26e..661982cd5 100644
--- a/internal/util/gemini_thinking.go
+++ b/internal/util/gemini_thinking.go
@@ -152,6 +152,59 @@ func NormalizeGeminiCLIThinkingBudget(model string, body []byte) []byte {
 	return updated
 }
 
+// ReasoningEffortBudgetMapping defines the thinkingBudget values for each reasoning effort level.
+var ReasoningEffortBudgetMapping = map[string]int{
+	"none":    0,
+	"auto":    -1,
+	"minimal": 512,
+	"low":     1024,
+	"medium":  8192,
+	"high":    24576,
+	"xhigh":   32768,
+}
+
+// ApplyReasoningEffortToGemini applies OpenAI reasoning_effort to Gemini thinkingConfig
+// for standard Gemini API format (generationConfig.thinkingConfig path).
+// Returns the modified body with thinkingBudget and include_thoughts set.
+func ApplyReasoningEffortToGemini(body []byte, effort string) []byte {
+	budget, ok := ReasoningEffortBudgetMapping[effort]
+	if !ok {
+		budget = -1 // default to auto
+	}
+
+	budgetPath := "generationConfig.thinkingConfig.thinkingBudget"
+	includePath := "generationConfig.thinkingConfig.include_thoughts"
+
+	if effort == "none" {
+		body, _ = sjson.DeleteBytes(body, "generationConfig.thinkingConfig")
+	} else {
+		body, _ = sjson.SetBytes(body, budgetPath, budget)
+		body, _ = sjson.SetBytes(body, includePath, true)
+	}
+	return body
+}
+
+// ApplyReasoningEffortToGeminiCLI applies OpenAI reasoning_effort to Gemini CLI thinkingConfig
+// for Gemini CLI API format (request.generationConfig.thinkingConfig path).
+// Returns the modified body with thinkingBudget and include_thoughts set.
+func ApplyReasoningEffortToGeminiCLI(body []byte, effort string) []byte {
+	budget, ok := ReasoningEffortBudgetMapping[effort]
+	if !ok {
+		budget = -1 // default to auto
+	}
+
+	budgetPath := "request.generationConfig.thinkingConfig.thinkingBudget"
+	includePath := "request.generationConfig.thinkingConfig.include_thoughts"
+
+	if effort == "none" {
+		body, _ = sjson.DeleteBytes(body, "request.generationConfig.thinkingConfig")
+	} else {
+		body, _ = sjson.SetBytes(body, budgetPath, budget)
+		body, _ = sjson.SetBytes(body, includePath, true)
+	}
+	return body
+}
+
 // ConvertThinkingLevelToBudget checks for "generationConfig.thinkingConfig.thinkingLevel"
 // and converts it to "thinkingBudget".
 // "high" -> 32768
diff --git a/test/thinking_conversion_test.go b/test/thinking_conversion_test.go
index 60f4a02ed..34b344f06 100644
--- a/test/thinking_conversion_test.go
+++ b/test/thinking_conversion_test.go
@@ -746,14 +746,21 @@ func TestRawPayloadThinkingConversions(t *testing.T) {
 										// ThinkingEffortToBudget already returns normalized budget
 										return true, fmt.Sprintf("%d", budget), false
 									}
-									// Invalid effort - claude may still set thinking with type:enabled
-									return true, "", false
+									// Invalid effort - claude sets thinking.type:enabled but no budget_tokens
+									return false, "", false
 								}
 								return false, "", false
 							case "openai":
 								if allowCompat {
 									if effort, ok := cs.thinkingParam.(string); ok && strings.TrimSpace(effort) != "" {
-										return true, strings.ToLower(strings.TrimSpace(effort)), false
+										// For allowCompat models, invalid effort values are normalized to "auto"
+										normalized := strings.ToLower(strings.TrimSpace(effort))
+										switch normalized {
+										case "none", "auto", "low", "medium", "high", "xhigh":
+											return true, normalized, false
+										default:
+											return true, "auto", false
+										}
 									}
 									if budget, ok := cs.thinkingParam.(int); ok {
 										if mapped, okM := util.OpenAIThinkingBudgetToEffort(model, budget); okM && mapped != "" {

From a4a3274a5525170d49f02957608b46a20e20f63a Mon Sep 17 00:00:00 2001
From: hkfires <10558748+hkfires@users.noreply.github.com>
Date: Sun, 14 Dec 2025 18:32:20 +0800
Subject: [PATCH 5/9] test(thinking): expand conversion edge case coverage

---
 test/thinking_conversion_test.go | 113 +++++++++++++++++--------------
 1 file changed, 63 insertions(+), 50 deletions(-)

diff --git a/test/thinking_conversion_test.go b/test/thinking_conversion_test.go
index 34b344f06..839fc3759 100644
--- a/test/thinking_conversion_test.go
+++ b/test/thinking_conversion_test.go
@@ -28,7 +28,7 @@ func (e statusErr) Error() string { return e.msg }
 // model that should have reasoning effort passed through even if not in registry.
 // This simulates the allowCompat behavior from OpenAICompatExecutor.
 func isOpenAICompatModel(model string) bool {
-	return model == "custom-thinking-model"
+	return model == "openai-compat"
 }
 
 // registerCoreModels loads representative models across providers into the registry
@@ -47,12 +47,12 @@ func registerCoreModels(t *testing.T) func() {
 	// The allowCompat flag should preserve reasoning effort for such models.
 	customOpenAIModels := []*registry.ModelInfo{
 		{
-			ID:          "custom-thinking-model",
+			ID:          "openai-compat",
 			Object:      "model",
 			Created:     1700000000,
 			OwnedBy:     "custom-provider",
 			Type:        "openai",
-			DisplayName: "Custom Thinking Model",
+			DisplayName: "OpenAI Compatible Model",
 			Description: "OpenAI-compatible model with forced thinking suffix support",
 		},
 	}
@@ -321,10 +321,10 @@ func TestThinkingConversionsAcrossProtocolsAndModels(t *testing.T) {
 	defer cleanup()
 
 	models := []string{
-		"gpt-5",                 // supports levels (low/medium/high)
-		"gemini-2.5-pro",        // supports numeric budget
-		"qwen3-coder-flash",     // no thinking support
-		"custom-thinking-model", // openai-compatible model with forced thinking suffix
+		"gpt-5",           // supports levels (level-based thinking)
+		"gemini-2.5-pro",  // supports numeric budget
+		"qwen3-code-plus", // no thinking support
+		"openai-compat",   // openai-compatible channel (allowCompat=true)
 	}
 	fromProtocols := []string{"openai", "claude", "gemini", "openai-response"}
 	toProtocols := []string{"gemini", "claude", "openai", "codex"}
@@ -357,12 +357,7 @@ func TestThinkingConversionsAcrossProtocolsAndModels(t *testing.T) {
 	}
 
 	for _, model := range models {
-		info := registry.GetGlobalRegistry().GetModelInfo(model)
-		min, max := 0, 0
-		if info != nil && info.Thinking != nil {
-			min = info.Thinking.Min
-			max = info.Thinking.Max
-		}
+		_ = registry.GetGlobalRegistry().GetModelInfo(model)
 
 		for _, from := range fromProtocols {
 			// Scenario selection follows protocol semantics:
@@ -372,29 +367,29 @@ func TestThinkingConversionsAcrossProtocolsAndModels(t *testing.T) {
 				{name: "no-suffix", modelSuffix: model, expectFn: func(_ *registry.ModelInfo) (bool, int64) { return false, 0 }},
 			}
 			if from == "openai" || from == "openai-response" {
+				// Level-based test cases: auto, none, minimal, low, medium, high, xhigh, foo(invalid)
+				// Maps to numeric: -1, 0, 512, 1024, 8192, 24576, 32768, invalid
 				cases = append(cases,
-					scenario{name: "level-low", modelSuffix: fmt.Sprintf("%s(low)", model), expectFn: levelBudgetFn("low")},
-					scenario{name: "level-high", modelSuffix: fmt.Sprintf("%s(high)", model), expectFn: levelBudgetFn("high")},
 					scenario{name: "level-auto", modelSuffix: fmt.Sprintf("%s(auto)", model), expectFn: levelBudgetFn("auto")},
+					scenario{name: "level-none", modelSuffix: fmt.Sprintf("%s(none)", model), expectFn: levelBudgetFn("none")},
+					scenario{name: "level-minimal", modelSuffix: fmt.Sprintf("%s(minimal)", model), expectFn: levelBudgetFn("minimal")},
+					scenario{name: "level-low", modelSuffix: fmt.Sprintf("%s(low)", model), expectFn: levelBudgetFn("low")},
+					scenario{name: "level-medium", modelSuffix: fmt.Sprintf("%s(medium)", model), expectFn: levelBudgetFn("medium")},
+					scenario{name: "level-high", modelSuffix: fmt.Sprintf("%s(high)", model), expectFn: levelBudgetFn("high")},
+					scenario{name: "level-xhigh", modelSuffix: fmt.Sprintf("%s(xhigh)", model), expectFn: levelBudgetFn("xhigh")},
+					scenario{name: "level-invalid", modelSuffix: fmt.Sprintf("%s(invalid)", model), expectFn: levelBudgetFn("invalid")},
 				)
 			} else { // claude or gemini
-				if util.ModelUsesThinkingLevels(model) {
-					// Numeric budgets for level-based models are mapped into levels when needed.
-					cases = append(cases,
-						scenario{name: "numeric-0", modelSuffix: fmt.Sprintf("%s(0)", model), expectFn: buildBudgetFn(0)},
-						scenario{name: "numeric-1024", modelSuffix: fmt.Sprintf("%s(1024)", model), expectFn: buildBudgetFn(1024)},
-						scenario{name: "numeric-1025", modelSuffix: fmt.Sprintf("%s(1025)", model), expectFn: buildBudgetFn(1025)},
-						scenario{name: "numeric-8192", modelSuffix: fmt.Sprintf("%s(8192)", model), expectFn: buildBudgetFn(8192)},
-						scenario{name: "numeric-8193", modelSuffix: fmt.Sprintf("%s(8193)", model), expectFn: buildBudgetFn(8193)},
-						scenario{name: "numeric-24576", modelSuffix: fmt.Sprintf("%s(24576)", model), expectFn: buildBudgetFn(24576)},
-						scenario{name: "numeric-24577", modelSuffix: fmt.Sprintf("%s(24577)", model), expectFn: buildBudgetFn(24577)},
-					)
-				} else {
-					cases = append(cases,
-						scenario{name: "numeric-below-min", modelSuffix: fmt.Sprintf("%s(%d)", model, min-10), expectFn: buildBudgetFn(min - 10)},
-						scenario{name: "numeric-above-max", modelSuffix: fmt.Sprintf("%s(%d)", model, max+10), expectFn: buildBudgetFn(max + 10)},
-					)
-				}
+				// Numeric test cases: -1, 0, 1023, 1025, 8193, 24577
+				// Maps to levels: auto, none, low, medium, high, xhigh
+				cases = append(cases,
+					scenario{name: "numeric-neg1", modelSuffix: fmt.Sprintf("%s(-1)", model), expectFn: buildBudgetFn(-1)},
+					scenario{name: "numeric-0", modelSuffix: fmt.Sprintf("%s(0)", model), expectFn: buildBudgetFn(0)},
+					scenario{name: "numeric-1023", modelSuffix: fmt.Sprintf("%s(1023)", model), expectFn: buildBudgetFn(1023)},
+					scenario{name: "numeric-1025", modelSuffix: fmt.Sprintf("%s(1025)", model), expectFn: buildBudgetFn(1025)},
+					scenario{name: "numeric-8193", modelSuffix: fmt.Sprintf("%s(8193)", model), expectFn: buildBudgetFn(8193)},
+					scenario{name: "numeric-24577", modelSuffix: fmt.Sprintf("%s(24577)", model), expectFn: buildBudgetFn(24577)},
+				)
 			}
 
 			for _, to := range toProtocols {
@@ -642,10 +637,10 @@ func TestRawPayloadThinkingConversions(t *testing.T) {
 	defer cleanup()
 
 	models := []string{
-		"gpt-5",                 // supports levels (low/medium/high)
-		"gemini-2.5-pro",        // supports numeric budget
-		"qwen3-coder-flash",     // no thinking support
-		"custom-thinking-model", // openai-compatible model with forced thinking suffix
+		"gpt-5",           // supports levels (level-based thinking)
+		"gemini-2.5-pro",  // supports numeric budget
+		"qwen3-code-plus", // no thinking support
+		"openai-compat",   // openai-compatible channel (allowCompat=true)
 	}
 	fromProtocols := []string{"openai", "claude", "gemini", "openai-response"}
 	toProtocols := []string{"gemini", "claude", "openai", "codex"}
@@ -664,27 +659,28 @@ func TestRawPayloadThinkingConversions(t *testing.T) {
 			var cases []scenario
 			switch from {
 			case "openai", "openai-response":
+				// Level-based test cases: auto, none, minimal, low, medium, high, xhigh, foo(invalid)
 				cases = []scenario{
 					{name: "no-thinking", thinkingParam: nil},
+					{name: "effort-auto", thinkingParam: "auto"},
+					{name: "effort-none", thinkingParam: "none"},
+					{name: "effort-minimal", thinkingParam: "minimal"},
 					{name: "effort-low", thinkingParam: "low"},
 					{name: "effort-medium", thinkingParam: "medium"},
 					{name: "effort-high", thinkingParam: "high"},
 					{name: "effort-xhigh", thinkingParam: "xhigh"},
-					{name: "effort-invalid-foo", thinkingParam: "foo"},
+					{name: "effort-invalid", thinkingParam: "invalid"},
 				}
-			case "gemini":
+			case "gemini", "claude":
+				// Numeric test cases: -1, 0, 1023, 1025, 8193, 24577
 				cases = []scenario{
 					{name: "no-thinking", thinkingParam: nil},
-					{name: "budget-1024", thinkingParam: 1024},
-					{name: "budget-8192", thinkingParam: 8192},
-					{name: "budget-16384", thinkingParam: 16384},
-				}
-			case "claude":
-				cases = []scenario{
-					{name: "no-thinking", thinkingParam: nil},
-					{name: "budget-1024", thinkingParam: 1024},
-					{name: "budget-8192", thinkingParam: 8192},
-					{name: "budget-16384", thinkingParam: 16384},
+					{name: "budget-neg1", thinkingParam: -1},
+					{name: "budget-0", thinkingParam: 0},
+					{name: "budget-1023", thinkingParam: 1023},
+					{name: "budget-1025", thinkingParam: 1025},
+					{name: "budget-8193", thinkingParam: 8193},
+					{name: "budget-24577", thinkingParam: 24577},
 				}
 			}
 
@@ -723,12 +719,16 @@ func TestRawPayloadThinkingConversions(t *testing.T) {
 								}
 								// Convert effort level to budget for non-level models only
 								if effort, ok := cs.thinkingParam.(string); ok && effort != "" {
+									// "none" disables thinking - no thinkingBudget in output
+									if strings.ToLower(effort) == "none" {
+										return false, "", false
+									}
 									if budget, okB := util.ThinkingEffortToBudget(model, effort); okB {
 										// ThinkingEffortToBudget already returns normalized budget
 										return true, fmt.Sprintf("%d", budget), false
 									}
-									// Invalid effort maps to default/fallback
-									return true, fmt.Sprintf("%d", -1), false
+									// Invalid effort maps to default auto (-1)
+									return true, "-1", false
 								}
 								return false, "", false
 							case "claude":
@@ -742,6 +742,11 @@ func TestRawPayloadThinkingConversions(t *testing.T) {
 								}
 								// Convert effort level to budget for non-level models only
 								if effort, ok := cs.thinkingParam.(string); ok && effort != "" {
+									// "none" and "auto" don't produce budget_tokens
+									lower := strings.ToLower(effort)
+									if lower == "none" || lower == "auto" {
+										return false, "", false
+									}
 									if budget, okB := util.ThinkingEffortToBudget(model, effort); okB {
 										// ThinkingEffortToBudget already returns normalized budget
 										return true, fmt.Sprintf("%d", budget), false
@@ -780,6 +785,10 @@ func TestRawPayloadThinkingConversions(t *testing.T) {
 								}
 								if budget, ok := cs.thinkingParam.(int); ok {
 									if mapped, okM := util.OpenAIThinkingBudgetToEffort(model, budget); okM && mapped != "" {
+										// Check if the mapped effort is valid for this model
+										if _, validLevel := util.NormalizeReasoningEffortLevel(model, mapped); !validLevel {
+											return true, mapped, true // expect validation error
+										}
 										return true, mapped, false
 									}
 								}
@@ -797,6 +806,10 @@ func TestRawPayloadThinkingConversions(t *testing.T) {
 								}
 								if budget, ok := cs.thinkingParam.(int); ok {
 									if mapped, okM := util.OpenAIThinkingBudgetToEffort(model, budget); okM && mapped != "" {
+										// Check if the mapped effort is valid for this model
+										if _, validLevel := util.NormalizeReasoningEffortLevel(model, mapped); !validLevel {
+											return true, mapped, true // expect validation error
+										}
 										return true, mapped, false
 									}
 								}

From 712ce9f78189a3a8a5c77ba425f936bbb351964f Mon Sep 17 00:00:00 2001
From: hkfires <10558748+hkfires@users.noreply.github.com>
Date: Sun, 14 Dec 2025 18:45:03 +0800
Subject: [PATCH 6/9] fix(thinking): drop unsupported none effort

When budget 0 maps to "none" for models that use thinking levels
but don't support that effort level, strip thinking fields instead
of setting an invalid reasoning_effort value.
Tests now expect removal for this edge case.
---
 internal/runtime/executor/payload_helpers.go | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/internal/runtime/executor/payload_helpers.go b/internal/runtime/executor/payload_helpers.go
index 6e352c519..667f29da1 100644
--- a/internal/runtime/executor/payload_helpers.go
+++ b/internal/runtime/executor/payload_helpers.go
@@ -73,6 +73,12 @@ func applyReasoningEffortMetadata(payload []byte, metadata map[string]any, model
 	if util.ModelUsesThinkingLevels(baseModel) || allowCompat {
 		if budget, _, _, matched := util.ThinkingFromMetadata(metadata); matched && budget != nil {
 			if effort, ok := util.OpenAIThinkingBudgetToEffort(baseModel, *budget); ok && effort != "" {
+				if *budget == 0 && effort == "none" && util.ModelUsesThinkingLevels(baseModel) {
+					if _, supported := util.NormalizeReasoningEffortLevel(baseModel, effort); !supported {
+						return stripThinkingFields(payload, false)
+					}
+				}
+
 				if updated, err := sjson.SetBytes(payload, field, effort); err == nil {
 					return updated
 				}

From d20b71deb97eb3c14411612e19c9603f8eb0f3a9 Mon Sep 17 00:00:00 2001
From: hkfires <10558748+hkfires@users.noreply.github.com>
Date: Sun, 14 Dec 2025 20:11:30 +0800
Subject: [PATCH 7/9] fix(thinking): normalize effort mapping

Route OpenAI reasoning effort through ThinkingEffortToBudget for Claude
translators, preserve "minimal" when translating OpenAI Responses, and
treat blank/unknown efforts as no-ops for Gemini thinking configs.

Also map budget -1 to "auto" and expand cross-protocol thinking tests.
---
 .../chat-completions/claude_openai_request.go |  29 +--
 .../claude_openai-responses_request.go        |  31 +--
 .../openai_openai-responses_request.go        |   2 +-
 internal/util/gemini_thinking.go              |  40 ++--
 internal/util/openai_thinking.go              |   7 +-
 test/thinking_conversion_test.go              | 193 +++++++++---------
 6 files changed, 160 insertions(+), 142 deletions(-)

diff --git a/internal/translator/claude/openai/chat-completions/claude_openai_request.go b/internal/translator/claude/openai/chat-completions/claude_openai_request.go
index 9825c661a..9122b97e7 100644
--- a/internal/translator/claude/openai/chat-completions/claude_openai_request.go
+++ b/internal/translator/claude/openai/chat-completions/claude_openai_request.go
@@ -67,19 +67,22 @@ func ConvertOpenAIRequestToClaude(modelName string, inputRawJSON []byte, stream
 	root := gjson.ParseBytes(rawJSON)
 
 	if v := root.Get("reasoning_effort"); v.Exists() && util.ModelSupportsThinking(modelName) && !util.ModelUsesThinkingLevels(modelName) {
-		out, _ = sjson.Set(out, "thinking.type", "enabled")
-
-		switch v.String() {
-		case "none":
-			out, _ = sjson.Set(out, "thinking.type", "disabled")
-		case "low":
-			out, _ = sjson.Set(out, "thinking.budget_tokens", 1024)
-		case "medium":
-			out, _ = sjson.Set(out, "thinking.budget_tokens", 8192)
-		case "high":
-			out, _ = sjson.Set(out, "thinking.budget_tokens", 24576)
-		case "xhigh":
-			out, _ = sjson.Set(out, "thinking.budget_tokens", 32768)
+		effort := strings.ToLower(strings.TrimSpace(v.String()))
+		if effort != "" {
+			budget, ok := util.ThinkingEffortToBudget(modelName, effort)
+			if ok {
+				switch budget {
+				case 0:
+					out, _ = sjson.Set(out, "thinking.type", "disabled")
+				case -1:
+					out, _ = sjson.Set(out, "thinking.type", "enabled")
+				default:
+					if budget > 0 {
+						out, _ = sjson.Set(out, "thinking.type", "enabled")
+						out, _ = sjson.Set(out, "thinking.budget_tokens", budget)
+					}
+				}
+			}
 		}
 	}
 
diff --git a/internal/translator/claude/openai/responses/claude_openai-responses_request.go b/internal/translator/claude/openai/responses/claude_openai-responses_request.go
index 4a19bb928..b3654ca09 100644
--- a/internal/translator/claude/openai/responses/claude_openai-responses_request.go
+++ b/internal/translator/claude/openai/responses/claude_openai-responses_request.go
@@ -54,21 +54,22 @@ func ConvertOpenAIResponsesRequestToClaude(modelName string, inputRawJSON []byte
 	root := gjson.ParseBytes(rawJSON)
 
 	if v := root.Get("reasoning.effort"); v.Exists() && util.ModelSupportsThinking(modelName) && !util.ModelUsesThinkingLevels(modelName) {
-		out, _ = sjson.Set(out, "thinking.type", "enabled")
-
-		switch v.String() {
-		case "none":
-			out, _ = sjson.Set(out, "thinking.type", "disabled")
-		case "minimal":
-			out, _ = sjson.Set(out, "thinking.budget_tokens", 512)
-		case "low":
-			out, _ = sjson.Set(out, "thinking.budget_tokens", 1024)
-		case "medium":
-			out, _ = sjson.Set(out, "thinking.budget_tokens", 8192)
-		case "high":
-			out, _ = sjson.Set(out, "thinking.budget_tokens", 24576)
-		case "xhigh":
-			out, _ = sjson.Set(out, "thinking.budget_tokens", 32768)
+		effort := strings.ToLower(strings.TrimSpace(v.String()))
+		if effort != "" {
+			budget, ok := util.ThinkingEffortToBudget(modelName, effort)
+			if ok {
+				switch budget {
+				case 0:
+					out, _ = sjson.Set(out, "thinking.type", "disabled")
+				case -1:
+					out, _ = sjson.Set(out, "thinking.type", "enabled")
+				default:
+					if budget > 0 {
+						out, _ = sjson.Set(out, "thinking.type", "enabled")
+						out, _ = sjson.Set(out, "thinking.budget_tokens", budget)
+					}
+				}
+			}
 		}
 	}
 
diff --git a/internal/translator/openai/openai/responses/openai_openai-responses_request.go b/internal/translator/openai/openai/responses/openai_openai-responses_request.go
index 1b1f071fa..8bac0485f 100644
--- a/internal/translator/openai/openai/responses/openai_openai-responses_request.go
+++ b/internal/translator/openai/openai/responses/openai_openai-responses_request.go
@@ -195,7 +195,7 @@ func ConvertOpenAIResponsesRequestToOpenAIChatCompletions(modelName string, inpu
 		case "auto":
 			out, _ = sjson.Set(out, "reasoning_effort", "auto")
 		case "minimal":
-			out, _ = sjson.Set(out, "reasoning_effort", "low")
+			out, _ = sjson.Set(out, "reasoning_effort", "minimal")
 		case "low":
 			out, _ = sjson.Set(out, "reasoning_effort", "low")
 		case "medium":
diff --git a/internal/util/gemini_thinking.go b/internal/util/gemini_thinking.go
index 661982cd5..9e349d99d 100644
--- a/internal/util/gemini_thinking.go
+++ b/internal/util/gemini_thinking.go
@@ -167,20 +167,26 @@ var ReasoningEffortBudgetMapping = map[string]int{
 // for standard Gemini API format (generationConfig.thinkingConfig path).
 // Returns the modified body with thinkingBudget and include_thoughts set.
 func ApplyReasoningEffortToGemini(body []byte, effort string) []byte {
-	budget, ok := ReasoningEffortBudgetMapping[effort]
-	if !ok {
-		budget = -1 // default to auto
+	normalized := strings.ToLower(strings.TrimSpace(effort))
+	if normalized == "" {
+		return body
 	}
 
 	budgetPath := "generationConfig.thinkingConfig.thinkingBudget"
 	includePath := "generationConfig.thinkingConfig.include_thoughts"
 
-	if effort == "none" {
+	if normalized == "none" {
 		body, _ = sjson.DeleteBytes(body, "generationConfig.thinkingConfig")
-	} else {
-		body, _ = sjson.SetBytes(body, budgetPath, budget)
-		body, _ = sjson.SetBytes(body, includePath, true)
+		return body
 	}
+
+	budget, ok := ReasoningEffortBudgetMapping[normalized]
+	if !ok {
+		return body
+	}
+
+	body, _ = sjson.SetBytes(body, budgetPath, budget)
+	body, _ = sjson.SetBytes(body, includePath, true)
 	return body
 }
 
@@ -188,20 +194,26 @@ func ApplyReasoningEffortToGemini(body []byte, effort string) []byte {
 // for Gemini CLI API format (request.generationConfig.thinkingConfig path).
 // Returns the modified body with thinkingBudget and include_thoughts set.
 func ApplyReasoningEffortToGeminiCLI(body []byte, effort string) []byte {
-	budget, ok := ReasoningEffortBudgetMapping[effort]
-	if !ok {
-		budget = -1 // default to auto
+	normalized := strings.ToLower(strings.TrimSpace(effort))
+	if normalized == "" {
+		return body
 	}
 
 	budgetPath := "request.generationConfig.thinkingConfig.thinkingBudget"
 	includePath := "request.generationConfig.thinkingConfig.include_thoughts"
 
-	if effort == "none" {
+	if normalized == "none" {
 		body, _ = sjson.DeleteBytes(body, "request.generationConfig.thinkingConfig")
-	} else {
-		body, _ = sjson.SetBytes(body, budgetPath, budget)
-		body, _ = sjson.SetBytes(body, includePath, true)
+		return body
 	}
+
+	budget, ok := ReasoningEffortBudgetMapping[normalized]
+	if !ok {
+		return body
+	}
+
+	body, _ = sjson.SetBytes(body, budgetPath, budget)
+	body, _ = sjson.SetBytes(body, includePath, true)
 	return body
 }
 
diff --git a/internal/util/openai_thinking.go b/internal/util/openai_thinking.go
index 4dda38f69..5ce7e6bf0 100644
--- a/internal/util/openai_thinking.go
+++ b/internal/util/openai_thinking.go
@@ -5,15 +5,18 @@ package util
 //
 // Ranges:
 //   - 0            -> "none"
+//   - -1           -> "auto"
 //   - 1..1024      -> "low"
 //   - 1025..8192   -> "medium"
 //   - 8193..24576  -> "high"
 //   - 24577..      -> highest supported level for the model (defaults to "xhigh")
 //
-// Negative values (except the dynamic -1 handled elsewhere) are treated as unsupported.
+// Negative values other than -1 are treated as unsupported.
 func OpenAIThinkingBudgetToEffort(model string, budget int) (string, bool) {
 	switch {
-	case budget < 0:
+	case budget == -1:
+		return "auto", true
+	case budget < -1:
 		return "", false
 	case budget == 0:
 		return "none", true
diff --git a/test/thinking_conversion_test.go b/test/thinking_conversion_test.go
index 839fc3759..9d15e1281 100644
--- a/test/thinking_conversion_test.go
+++ b/test/thinking_conversion_test.go
@@ -66,6 +66,37 @@ func registerCoreModels(t *testing.T) func() {
 	}
 }
 
+var (
+	thinkingTestModels = []string{
+		"gpt-5",           // level-based thinking model
+		"gemini-2.5-pro",  // numeric-budget thinking model
+		"qwen3-code-plus", // no thinking support
+		"openai-compat",   // allowCompat=true (OpenAI-compatible channel)
+	}
+	thinkingTestFromProtocols = []string{"openai", "claude", "gemini", "openai-response"}
+	thinkingTestToProtocols   = []string{"gemini", "claude", "openai", "codex"}
+
+	// Numeric budgets and their level equivalents:
+	// -1 -> auto
+	// 0 -> none
+	// 1..1024 -> low
+	// 1025..8192 -> medium
+	// 8193..24576 -> high
+	// >24576 -> model highest level (right-most in Levels)
+	thinkingNumericSamples = []int{-1, 0, 1023, 1025, 8193, 64000}
+
+	// Levels and their numeric equivalents:
+	// auto -> -1
+	// none -> 0
+	// minimal -> 512
+	// low -> 1024
+	// medium -> 8192
+	// high -> 24576
+	// xhigh -> 32768
+	// invalid -> invalid (no mapping)
+	thinkingLevelSamples = []string{"auto", "none", "minimal", "low", "medium", "high", "xhigh", "invalid"}
+)
+
 func buildRawPayload(fromProtocol, modelWithSuffix string) []byte {
 	switch fromProtocol {
 	case "gemini":
@@ -101,19 +132,30 @@ func applyReasoningEffortMetadataLocal(payload []byte, metadata map[string]any,
 	if field == "" {
 		return payload
 	}
-	if !util.ModelSupportsThinking(model) && !allowCompat {
+	baseModel := util.ResolveOriginalModel(model, metadata)
+	if baseModel == "" {
+		baseModel = model
+	}
+	if !util.ModelSupportsThinking(baseModel) && !allowCompat {
 		return payload
 	}
 	if effort, ok := util.ReasoningEffortFromMetadata(metadata); ok && effort != "" {
-		if util.ModelUsesThinkingLevels(model) || allowCompat {
+		if util.ModelUsesThinkingLevels(baseModel) || allowCompat {
 			if updated, err := sjson.SetBytes(payload, field, effort); err == nil {
 				return updated
 			}
 		}
 	}
-	if util.ModelUsesThinkingLevels(model) || allowCompat {
+	// Fallback: numeric thinking_budget suffix for level-based (OpenAI-style) models.
+	if util.ModelUsesThinkingLevels(baseModel) || allowCompat {
 		if budget, _, _, matched := util.ThinkingFromMetadata(metadata); matched && budget != nil {
-			if effort, ok := util.OpenAIThinkingBudgetToEffort(model, *budget); ok && effort != "" {
+			if effort, ok := util.OpenAIThinkingBudgetToEffort(baseModel, *budget); ok && effort != "" {
+				if *budget == 0 && effort == "none" && util.ModelUsesThinkingLevels(baseModel) {
+					if _, supported := util.NormalizeReasoningEffortLevel(baseModel, effort); !supported {
+						return stripThinkingFieldsLocal(payload, false)
+					}
+				}
+
 				if updated, err := sjson.SetBytes(payload, field, effort); err == nil {
 					return updated
 				}
@@ -320,79 +362,46 @@ func TestThinkingConversionsAcrossProtocolsAndModels(t *testing.T) {
 	cleanup := registerCoreModels(t)
 	defer cleanup()
 
-	models := []string{
-		"gpt-5",           // supports levels (level-based thinking)
-		"gemini-2.5-pro",  // supports numeric budget
-		"qwen3-code-plus", // no thinking support
-		"openai-compat",   // openai-compatible channel (allowCompat=true)
-	}
-	fromProtocols := []string{"openai", "claude", "gemini", "openai-response"}
-	toProtocols := []string{"gemini", "claude", "openai", "codex"}
-
 	type scenario struct {
 		name        string
 		modelSuffix string
-		expectFn    func(info *registry.ModelInfo) (present bool, budget int64)
 	}
 
-	buildBudgetFn := func(raw int) func(info *registry.ModelInfo) (bool, int64) {
-		return func(info *registry.ModelInfo) (bool, int64) {
-			if info == nil || info.Thinking == nil {
-				return false, 0
-			}
-			return true, int64(util.NormalizeThinkingBudget(info.ID, raw))
+	numericName := func(budget int) string {
+		if budget < 0 {
+			return "numeric-neg1"
 		}
+		return fmt.Sprintf("numeric-%d", budget)
 	}
 
-	levelBudgetFn := func(level string) func(info *registry.ModelInfo) (bool, int64) {
-		return func(info *registry.ModelInfo) (bool, int64) {
-			if info == nil || info.Thinking == nil {
-				return false, 0
-			}
-			if b, ok := util.ThinkingEffortToBudget(info.ID, level); ok {
-				return true, int64(b)
-			}
-			return false, 0
-		}
-	}
-
-	for _, model := range models {
+	for _, model := range thinkingTestModels {
 		_ = registry.GetGlobalRegistry().GetModelInfo(model)
 
-		for _, from := range fromProtocols {
+		for _, from := range thinkingTestFromProtocols {
 			// Scenario selection follows protocol semantics:
 			// - OpenAI-style protocols (openai/openai-response) express thinking as levels.
 			// - Claude/Gemini-style protocols express thinking as numeric budgets.
 			cases := []scenario{
-				{name: "no-suffix", modelSuffix: model, expectFn: func(_ *registry.ModelInfo) (bool, int64) { return false, 0 }},
+				{name: "no-suffix", modelSuffix: model},
 			}
 			if from == "openai" || from == "openai-response" {
-				// Level-based test cases: auto, none, minimal, low, medium, high, xhigh, foo(invalid)
-				// Maps to numeric: -1, 0, 512, 1024, 8192, 24576, 32768, invalid
-				cases = append(cases,
-					scenario{name: "level-auto", modelSuffix: fmt.Sprintf("%s(auto)", model), expectFn: levelBudgetFn("auto")},
-					scenario{name: "level-none", modelSuffix: fmt.Sprintf("%s(none)", model), expectFn: levelBudgetFn("none")},
-					scenario{name: "level-minimal", modelSuffix: fmt.Sprintf("%s(minimal)", model), expectFn: levelBudgetFn("minimal")},
-					scenario{name: "level-low", modelSuffix: fmt.Sprintf("%s(low)", model), expectFn: levelBudgetFn("low")},
-					scenario{name: "level-medium", modelSuffix: fmt.Sprintf("%s(medium)", model), expectFn: levelBudgetFn("medium")},
-					scenario{name: "level-high", modelSuffix: fmt.Sprintf("%s(high)", model), expectFn: levelBudgetFn("high")},
-					scenario{name: "level-xhigh", modelSuffix: fmt.Sprintf("%s(xhigh)", model), expectFn: levelBudgetFn("xhigh")},
-					scenario{name: "level-invalid", modelSuffix: fmt.Sprintf("%s(invalid)", model), expectFn: levelBudgetFn("invalid")},
-				)
+				for _, lvl := range thinkingLevelSamples {
+					cases = append(cases, scenario{
+						name:        "level-" + lvl,
+						modelSuffix: fmt.Sprintf("%s(%s)", model, lvl),
+					})
+				}
 			} else { // claude or gemini
-				// Numeric test cases: -1, 0, 1023, 1025, 8193, 24577
-				// Maps to levels: auto, none, low, medium, high, xhigh
-				cases = append(cases,
-					scenario{name: "numeric-neg1", modelSuffix: fmt.Sprintf("%s(-1)", model), expectFn: buildBudgetFn(-1)},
-					scenario{name: "numeric-0", modelSuffix: fmt.Sprintf("%s(0)", model), expectFn: buildBudgetFn(0)},
-					scenario{name: "numeric-1023", modelSuffix: fmt.Sprintf("%s(1023)", model), expectFn: buildBudgetFn(1023)},
-					scenario{name: "numeric-1025", modelSuffix: fmt.Sprintf("%s(1025)", model), expectFn: buildBudgetFn(1025)},
-					scenario{name: "numeric-8193", modelSuffix: fmt.Sprintf("%s(8193)", model), expectFn: buildBudgetFn(8193)},
-					scenario{name: "numeric-24577", modelSuffix: fmt.Sprintf("%s(24577)", model), expectFn: buildBudgetFn(24577)},
-				)
+				for _, budget := range thinkingNumericSamples {
+					budget := budget
+					cases = append(cases, scenario{
+						name:        numericName(budget),
+						modelSuffix: fmt.Sprintf("%s(%d)", model, budget),
+					})
+				}
 			}
 
-			for _, to := range toProtocols {
+			for _, to := range thinkingTestToProtocols {
 				if from == to {
 					continue
 				}
@@ -585,7 +594,7 @@ func buildRawPayloadWithThinking(fromProtocol, model string, thinkingParam any)
 		return []byte(base)
 	case "claude":
 		base := fmt.Sprintf(`{"model":"%s","messages":[{"role":"user","content":"hi"}]}`, model)
-		if budget, ok := thinkingParam.(int); ok && budget > 0 {
+		if budget, ok := thinkingParam.(int); ok {
 			base, _ = sjson.Set(base, "thinking.type", "enabled")
 			base, _ = sjson.Set(base, "thinking.budget_tokens", budget)
 		}
@@ -636,55 +645,50 @@ func TestRawPayloadThinkingConversions(t *testing.T) {
 	cleanup := registerCoreModels(t)
 	defer cleanup()
 
-	models := []string{
-		"gpt-5",           // supports levels (level-based thinking)
-		"gemini-2.5-pro",  // supports numeric budget
-		"qwen3-code-plus", // no thinking support
-		"openai-compat",   // openai-compatible channel (allowCompat=true)
-	}
-	fromProtocols := []string{"openai", "claude", "gemini", "openai-response"}
-	toProtocols := []string{"gemini", "claude", "openai", "codex"}
-
 	type scenario struct {
 		name          string
 		thinkingParam any // int for budget, string for effort level
 	}
 
-	for _, model := range models {
+	numericName := func(budget int) string {
+		if budget < 0 {
+			return "budget-neg1"
+		}
+		return fmt.Sprintf("budget-%d", budget)
+	}
+
+	for _, model := range thinkingTestModels {
 		supportsThinking := util.ModelSupportsThinking(model)
 		usesLevels := util.ModelUsesThinkingLevels(model)
 		allowCompat := isOpenAICompatModel(model)
 
-		for _, from := range fromProtocols {
+		for _, from := range thinkingTestFromProtocols {
 			var cases []scenario
 			switch from {
 			case "openai", "openai-response":
-				// Level-based test cases: auto, none, minimal, low, medium, high, xhigh, foo(invalid)
 				cases = []scenario{
 					{name: "no-thinking", thinkingParam: nil},
-					{name: "effort-auto", thinkingParam: "auto"},
-					{name: "effort-none", thinkingParam: "none"},
-					{name: "effort-minimal", thinkingParam: "minimal"},
-					{name: "effort-low", thinkingParam: "low"},
-					{name: "effort-medium", thinkingParam: "medium"},
-					{name: "effort-high", thinkingParam: "high"},
-					{name: "effort-xhigh", thinkingParam: "xhigh"},
-					{name: "effort-invalid", thinkingParam: "invalid"},
+				}
+				for _, lvl := range thinkingLevelSamples {
+					cases = append(cases, scenario{
+						name:          "effort-" + lvl,
+						thinkingParam: lvl,
+					})
 				}
 			case "gemini", "claude":
-				// Numeric test cases: -1, 0, 1023, 1025, 8193, 24577
 				cases = []scenario{
 					{name: "no-thinking", thinkingParam: nil},
-					{name: "budget-neg1", thinkingParam: -1},
-					{name: "budget-0", thinkingParam: 0},
-					{name: "budget-1023", thinkingParam: 1023},
-					{name: "budget-1025", thinkingParam: 1025},
-					{name: "budget-8193", thinkingParam: 8193},
-					{name: "budget-24577", thinkingParam: 24577},
+				}
+				for _, budget := range thinkingNumericSamples {
+					budget := budget
+					cases = append(cases, scenario{
+						name:          numericName(budget),
+						thinkingParam: budget,
+					})
 				}
 			}
 
-			for _, to := range toProtocols {
+			for _, to := range thinkingTestToProtocols {
 				if from == to {
 					continue
 				}
@@ -727,8 +731,8 @@ func TestRawPayloadThinkingConversions(t *testing.T) {
 										// ThinkingEffortToBudget already returns normalized budget
 										return true, fmt.Sprintf("%d", budget), false
 									}
-									// Invalid effort maps to default auto (-1)
-									return true, "-1", false
+									// Invalid effort does not map to a budget
+									return false, "", false
 								}
 								return false, "", false
 							case "claude":
@@ -758,14 +762,8 @@ func TestRawPayloadThinkingConversions(t *testing.T) {
 							case "openai":
 								if allowCompat {
 									if effort, ok := cs.thinkingParam.(string); ok && strings.TrimSpace(effort) != "" {
-										// For allowCompat models, invalid effort values are normalized to "auto"
 										normalized := strings.ToLower(strings.TrimSpace(effort))
-										switch normalized {
-										case "none", "auto", "low", "medium", "high", "xhigh":
-											return true, normalized, false
-										default:
-											return true, "auto", false
-										}
+										return true, normalized, false
 									}
 									if budget, ok := cs.thinkingParam.(int); ok {
 										if mapped, okM := util.OpenAIThinkingBudgetToEffort(model, budget); okM && mapped != "" {
@@ -891,6 +889,7 @@ func TestOpenAIThinkingBudgetToEffortRanges(t *testing.T) {
 		want   string
 		ok     bool
 	}{
+		{name: "dynamic-auto", model: "gpt-5", budget: -1, want: "auto", ok: true},
 		{name: "zero-none", model: "gpt-5", budget: 0, want: "none", ok: true},
 		{name: "low-min", model: "gpt-5", budget: 1, want: "low", ok: true},
 		{name: "low-max", model: "gpt-5", budget: 1024, want: "low", ok: true},
@@ -898,7 +897,7 @@ func TestOpenAIThinkingBudgetToEffortRanges(t *testing.T) {
 		{name: "medium-max", model: "gpt-5", budget: 8192, want: "medium", ok: true},
 		{name: "high-min", model: "gpt-5", budget: 8193, want: "high", ok: true},
 		{name: "high-max", model: "gpt-5", budget: 24576, want: "high", ok: true},
-		{name: "over-max-clamps-to-highest", model: "gpt-5", budget: 24577, want: "high", ok: true},
+		{name: "over-max-clamps-to-highest", model: "gpt-5", budget: 64000, want: "high", ok: true},
 		{name: "over-max-xhigh-model", model: "gpt-5.2", budget: 50000, want: "xhigh", ok: true},
 		{name: "negative-unsupported", model: "gpt-5", budget: -5, want: "", ok: false},
 	}

From 367a05bdf63645b678ee91e0a63ad08d0c1d6043 Mon Sep 17 00:00:00 2001
From: hkfires <10558748+hkfires@users.noreply.github.com>
Date: Sun, 14 Dec 2025 20:12:54 +0800
Subject: [PATCH 8/9] refactor(thinking): export thinking helpers

Expose thinking/effort normalization helpers from the executor package
so conversion tests use production code and stay aligned with runtime
validation behavior.
---
 .../runtime/executor/aistudio_executor.go     |   2 +-
 internal/runtime/executor/codex_executor.go   |  14 +-
 internal/runtime/executor/gemini_executor.go  |   6 +-
 internal/runtime/executor/iflow_executor.go   |  12 +-
 .../executor/openai_compat_executor.go        |  12 +-
 internal/runtime/executor/payload_helpers.go  |  32 ++--
 internal/runtime/executor/qwen_executor.go    |  12 +-
 test/thinking_conversion_test.go              | 177 +-----------------
 8 files changed, 55 insertions(+), 212 deletions(-)

diff --git a/internal/runtime/executor/aistudio_executor.go b/internal/runtime/executor/aistudio_executor.go
index 221fb6481..ada0af395 100644
--- a/internal/runtime/executor/aistudio_executor.go
+++ b/internal/runtime/executor/aistudio_executor.go
@@ -322,7 +322,7 @@ func (e *AIStudioExecutor) translateRequest(req cliproxyexecutor.Request, opts c
 	from := opts.SourceFormat
 	to := sdktranslator.FromString("gemini")
 	payload := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), stream)
-	payload = applyThinkingMetadata(payload, req.Metadata, req.Model)
+	payload = ApplyThinkingMetadata(payload, req.Metadata, req.Model)
 	payload = util.ApplyDefaultThinkingIfNeeded(req.Model, payload)
 	payload = util.ConvertThinkingLevelToBudget(payload)
 	payload = util.NormalizeGeminiThinkingBudget(req.Model, payload)
diff --git a/internal/runtime/executor/codex_executor.go b/internal/runtime/executor/codex_executor.go
index 2e9662372..c3e14701e 100644
--- a/internal/runtime/executor/codex_executor.go
+++ b/internal/runtime/executor/codex_executor.go
@@ -54,9 +54,9 @@ func (e *CodexExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, re
 	from := opts.SourceFormat
 	to := sdktranslator.FromString("codex")
 	body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), false)
-	body = applyReasoningEffortMetadata(body, req.Metadata, req.Model, "reasoning.effort", false)
-	body = normalizeThinkingConfig(body, upstreamModel, false)
-	if errValidate := validateThinkingConfig(body, upstreamModel); errValidate != nil {
+	body = ApplyReasoningEffortMetadata(body, req.Metadata, req.Model, "reasoning.effort", false)
+	body = NormalizeThinkingConfig(body, upstreamModel, false)
+	if errValidate := ValidateThinkingConfig(body, upstreamModel); errValidate != nil {
 		return resp, errValidate
 	}
 	body = applyPayloadConfig(e.cfg, req.Model, body)
@@ -152,9 +152,9 @@ func (e *CodexExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.Au
 	to := sdktranslator.FromString("codex")
 	body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), true)
 
-	body = applyReasoningEffortMetadata(body, req.Metadata, req.Model, "reasoning.effort", false)
-	body = normalizeThinkingConfig(body, upstreamModel, false)
-	if errValidate := validateThinkingConfig(body, upstreamModel); errValidate != nil {
+	body = ApplyReasoningEffortMetadata(body, req.Metadata, req.Model, "reasoning.effort", false)
+	body = NormalizeThinkingConfig(body, upstreamModel, false)
+	if errValidate := ValidateThinkingConfig(body, upstreamModel); errValidate != nil {
 		return nil, errValidate
 	}
 	body = applyPayloadConfig(e.cfg, req.Model, body)
@@ -254,7 +254,7 @@ func (e *CodexExecutor) CountTokens(ctx context.Context, auth *cliproxyauth.Auth
 
 	modelForCounting := req.Model
 
-	body = applyReasoningEffortMetadata(body, req.Metadata, req.Model, "reasoning.effort", false)
+	body = ApplyReasoningEffortMetadata(body, req.Metadata, req.Model, "reasoning.effort", false)
 	body, _ = sjson.SetBytes(body, "model", upstreamModel)
 	body, _ = sjson.DeleteBytes(body, "previous_response_id")
 	body, _ = sjson.SetBytes(body, "stream", false)
diff --git a/internal/runtime/executor/gemini_executor.go b/internal/runtime/executor/gemini_executor.go
index 8dd3dc3b1..f211ba62a 100644
--- a/internal/runtime/executor/gemini_executor.go
+++ b/internal/runtime/executor/gemini_executor.go
@@ -83,7 +83,7 @@ func (e *GeminiExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, r
 	from := opts.SourceFormat
 	to := sdktranslator.FromString("gemini")
 	body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), false)
-	body = applyThinkingMetadata(body, req.Metadata, req.Model)
+	body = ApplyThinkingMetadata(body, req.Metadata, req.Model)
 	body = util.ApplyDefaultThinkingIfNeeded(req.Model, body)
 	body = util.NormalizeGeminiThinkingBudget(req.Model, body)
 	body = util.StripThinkingConfigIfUnsupported(req.Model, body)
@@ -178,7 +178,7 @@ func (e *GeminiExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.A
 	from := opts.SourceFormat
 	to := sdktranslator.FromString("gemini")
 	body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), true)
-	body = applyThinkingMetadata(body, req.Metadata, req.Model)
+	body = ApplyThinkingMetadata(body, req.Metadata, req.Model)
 	body = util.ApplyDefaultThinkingIfNeeded(req.Model, body)
 	body = util.NormalizeGeminiThinkingBudget(req.Model, body)
 	body = util.StripThinkingConfigIfUnsupported(req.Model, body)
@@ -290,7 +290,7 @@ func (e *GeminiExecutor) CountTokens(ctx context.Context, auth *cliproxyauth.Aut
 	from := opts.SourceFormat
 	to := sdktranslator.FromString("gemini")
 	translatedReq := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), false)
-	translatedReq = applyThinkingMetadata(translatedReq, req.Metadata, req.Model)
+	translatedReq = ApplyThinkingMetadata(translatedReq, req.Metadata, req.Model)
 	translatedReq = util.StripThinkingConfigIfUnsupported(req.Model, translatedReq)
 	translatedReq = fixGeminiImageAspectRatio(req.Model, translatedReq)
 	respCtx := context.WithValue(ctx, "alt", opts.Alt)
diff --git a/internal/runtime/executor/iflow_executor.go b/internal/runtime/executor/iflow_executor.go
index 03df1be0a..ad0b4d2a3 100644
--- a/internal/runtime/executor/iflow_executor.go
+++ b/internal/runtime/executor/iflow_executor.go
@@ -57,13 +57,13 @@ func (e *IFlowExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, re
 	from := opts.SourceFormat
 	to := sdktranslator.FromString("openai")
 	body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), false)
-	body = applyReasoningEffortMetadata(body, req.Metadata, req.Model, "reasoning_effort", false)
+	body = ApplyReasoningEffortMetadata(body, req.Metadata, req.Model, "reasoning_effort", false)
 	upstreamModel := util.ResolveOriginalModel(req.Model, req.Metadata)
 	if upstreamModel != "" {
 		body, _ = sjson.SetBytes(body, "model", upstreamModel)
 	}
-	body = normalizeThinkingConfig(body, upstreamModel, false)
-	if errValidate := validateThinkingConfig(body, upstreamModel); errValidate != nil {
+	body = NormalizeThinkingConfig(body, upstreamModel, false)
+	if errValidate := ValidateThinkingConfig(body, upstreamModel); errValidate != nil {
 		return resp, errValidate
 	}
 	body = applyPayloadConfig(e.cfg, req.Model, body)
@@ -148,13 +148,13 @@ func (e *IFlowExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.Au
 	to := sdktranslator.FromString("openai")
 	body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), true)
 
-	body = applyReasoningEffortMetadata(body, req.Metadata, req.Model, "reasoning_effort", false)
+	body = ApplyReasoningEffortMetadata(body, req.Metadata, req.Model, "reasoning_effort", false)
 	upstreamModel := util.ResolveOriginalModel(req.Model, req.Metadata)
 	if upstreamModel != "" {
 		body, _ = sjson.SetBytes(body, "model", upstreamModel)
 	}
-	body = normalizeThinkingConfig(body, upstreamModel, false)
-	if errValidate := validateThinkingConfig(body, upstreamModel); errValidate != nil {
+	body = NormalizeThinkingConfig(body, upstreamModel, false)
+	if errValidate := ValidateThinkingConfig(body, upstreamModel); errValidate != nil {
 		return nil, errValidate
 	}
 	// Ensure tools array exists to avoid provider quirks similar to Qwen's behaviour.
diff --git a/internal/runtime/executor/openai_compat_executor.go b/internal/runtime/executor/openai_compat_executor.go
index 1bbd0c8eb..1c57c9b7b 100644
--- a/internal/runtime/executor/openai_compat_executor.go
+++ b/internal/runtime/executor/openai_compat_executor.go
@@ -60,13 +60,13 @@ func (e *OpenAICompatExecutor) Execute(ctx context.Context, auth *cliproxyauth.A
 	}
 	translated = applyPayloadConfigWithRoot(e.cfg, req.Model, to.String(), "", translated)
 	allowCompat := e.allowCompatReasoningEffort(req.Model, auth)
-	translated = applyReasoningEffortMetadata(translated, req.Metadata, req.Model, "reasoning_effort", allowCompat)
+	translated = ApplyReasoningEffortMetadata(translated, req.Metadata, req.Model, "reasoning_effort", allowCompat)
 	upstreamModel := util.ResolveOriginalModel(req.Model, req.Metadata)
 	if upstreamModel != "" && modelOverride == "" {
 		translated, _ = sjson.SetBytes(translated, "model", upstreamModel)
 	}
-	translated = normalizeThinkingConfig(translated, upstreamModel, allowCompat)
-	if errValidate := validateThinkingConfig(translated, upstreamModel); errValidate != nil {
+	translated = NormalizeThinkingConfig(translated, upstreamModel, allowCompat)
+	if errValidate := ValidateThinkingConfig(translated, upstreamModel); errValidate != nil {
 		return resp, errValidate
 	}
 
@@ -156,13 +156,13 @@ func (e *OpenAICompatExecutor) ExecuteStream(ctx context.Context, auth *cliproxy
 	}
 	translated = applyPayloadConfigWithRoot(e.cfg, req.Model, to.String(), "", translated)
 	allowCompat := e.allowCompatReasoningEffort(req.Model, auth)
-	translated = applyReasoningEffortMetadata(translated, req.Metadata, req.Model, "reasoning_effort", allowCompat)
+	translated = ApplyReasoningEffortMetadata(translated, req.Metadata, req.Model, "reasoning_effort", allowCompat)
 	upstreamModel := util.ResolveOriginalModel(req.Model, req.Metadata)
 	if upstreamModel != "" && modelOverride == "" {
 		translated, _ = sjson.SetBytes(translated, "model", upstreamModel)
 	}
-	translated = normalizeThinkingConfig(translated, upstreamModel, allowCompat)
-	if errValidate := validateThinkingConfig(translated, upstreamModel); errValidate != nil {
+	translated = NormalizeThinkingConfig(translated, upstreamModel, allowCompat)
+	if errValidate := ValidateThinkingConfig(translated, upstreamModel); errValidate != nil {
 		return nil, errValidate
 	}
 
diff --git a/internal/runtime/executor/payload_helpers.go b/internal/runtime/executor/payload_helpers.go
index 667f29da1..b0eafbb7a 100644
--- a/internal/runtime/executor/payload_helpers.go
+++ b/internal/runtime/executor/payload_helpers.go
@@ -11,9 +11,9 @@ import (
 	"github.com/tidwall/sjson"
 )
 
-// applyThinkingMetadata applies thinking config from model suffix metadata (e.g., (high), (8192))
+// ApplyThinkingMetadata applies thinking config from model suffix metadata (e.g., (high), (8192))
 // for standard Gemini format payloads. It normalizes the budget when the model supports thinking.
-func applyThinkingMetadata(payload []byte, metadata map[string]any, model string) []byte {
+func ApplyThinkingMetadata(payload []byte, metadata map[string]any, model string) []byte {
 	budgetOverride, includeOverride, ok := util.ResolveThinkingConfigFromMetadata(model, metadata)
 	if !ok || (budgetOverride == nil && includeOverride == nil) {
 		return payload
@@ -45,10 +45,10 @@ func applyThinkingMetadataCLI(payload []byte, metadata map[string]any, model str
 	return util.ApplyGeminiCLIThinkingConfig(payload, budgetOverride, includeOverride)
 }
 
-// applyReasoningEffortMetadata applies reasoning effort overrides from metadata to the given JSON path.
+// ApplyReasoningEffortMetadata applies reasoning effort overrides from metadata to the given JSON path.
 // Metadata values take precedence over any existing field when the model supports thinking, intentionally
 // overwriting caller-provided values to honor suffix/default metadata priority.
-func applyReasoningEffortMetadata(payload []byte, metadata map[string]any, model, field string, allowCompat bool) []byte {
+func ApplyReasoningEffortMetadata(payload []byte, metadata map[string]any, model, field string, allowCompat bool) []byte {
 	if len(metadata) == 0 {
 		return payload
 	}
@@ -75,7 +75,7 @@ func applyReasoningEffortMetadata(payload []byte, metadata map[string]any, model
 			if effort, ok := util.OpenAIThinkingBudgetToEffort(baseModel, *budget); ok && effort != "" {
 				if *budget == 0 && effort == "none" && util.ModelUsesThinkingLevels(baseModel) {
 					if _, supported := util.NormalizeReasoningEffortLevel(baseModel, effort); !supported {
-						return stripThinkingFields(payload, false)
+						return StripThinkingFields(payload, false)
 					}
 				}
 
@@ -238,12 +238,12 @@ func matchModelPattern(pattern, model string) bool {
 	return pi == len(pattern)
 }
 
-// normalizeThinkingConfig normalizes thinking-related fields in the payload
+// NormalizeThinkingConfig normalizes thinking-related fields in the payload
 // based on model capabilities. For models without thinking support, it strips
 // reasoning fields. For models with level-based thinking, it validates and
 // normalizes the reasoning effort level. For models with numeric budget thinking,
 // it strips the effort string fields.
-func normalizeThinkingConfig(payload []byte, model string, allowCompat bool) []byte {
+func NormalizeThinkingConfig(payload []byte, model string, allowCompat bool) []byte {
 	if len(payload) == 0 || model == "" {
 		return payload
 	}
@@ -252,22 +252,22 @@ func normalizeThinkingConfig(payload []byte, model string, allowCompat bool) []b
 		if allowCompat {
 			return payload
 		}
-		return stripThinkingFields(payload, false)
+		return StripThinkingFields(payload, false)
 	}
 
 	if util.ModelUsesThinkingLevels(model) {
-		return normalizeReasoningEffortLevel(payload, model)
+		return NormalizeReasoningEffortLevel(payload, model)
 	}
 
 	// Model supports thinking but uses numeric budgets, not levels.
 	// Strip effort string fields since they are not applicable.
-	return stripThinkingFields(payload, true)
+	return StripThinkingFields(payload, true)
 }
 
-// stripThinkingFields removes thinking-related fields from the payload for
+// StripThinkingFields removes thinking-related fields from the payload for
 // models that do not support thinking. If effortOnly is true, only removes
 // effort string fields (for models using numeric budgets).
-func stripThinkingFields(payload []byte, effortOnly bool) []byte {
+func StripThinkingFields(payload []byte, effortOnly bool) []byte {
 	fieldsToRemove := []string{
 		"reasoning_effort",
 		"reasoning.effort",
@@ -284,9 +284,9 @@ func stripThinkingFields(payload []byte, effortOnly bool) []byte {
 	return out
 }
 
-// normalizeReasoningEffortLevel validates and normalizes the reasoning_effort
+// NormalizeReasoningEffortLevel validates and normalizes the reasoning_effort
 // or reasoning.effort field for level-based thinking models.
-func normalizeReasoningEffortLevel(payload []byte, model string) []byte {
+func NormalizeReasoningEffortLevel(payload []byte, model string) []byte {
 	out := payload
 
 	if effort := gjson.GetBytes(out, "reasoning_effort"); effort.Exists() {
@@ -304,10 +304,10 @@ func normalizeReasoningEffortLevel(payload []byte, model string) []byte {
 	return out
 }
 
-// validateThinkingConfig checks for unsupported reasoning levels on level-based models.
+// ValidateThinkingConfig checks for unsupported reasoning levels on level-based models.
 // Returns a statusErr with 400 when an unsupported level is supplied to avoid silently
 // downgrading requests.
-func validateThinkingConfig(payload []byte, model string) error {
+func ValidateThinkingConfig(payload []byte, model string) error {
 	if len(payload) == 0 || model == "" {
 		return nil
 	}
diff --git a/internal/runtime/executor/qwen_executor.go b/internal/runtime/executor/qwen_executor.go
index 3bd610217..1d4ef52d5 100644
--- a/internal/runtime/executor/qwen_executor.go
+++ b/internal/runtime/executor/qwen_executor.go
@@ -51,13 +51,13 @@ func (e *QwenExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, req
 	from := opts.SourceFormat
 	to := sdktranslator.FromString("openai")
 	body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), false)
-	body = applyReasoningEffortMetadata(body, req.Metadata, req.Model, "reasoning_effort", false)
+	body = ApplyReasoningEffortMetadata(body, req.Metadata, req.Model, "reasoning_effort", false)
 	upstreamModel := util.ResolveOriginalModel(req.Model, req.Metadata)
 	if upstreamModel != "" {
 		body, _ = sjson.SetBytes(body, "model", upstreamModel)
 	}
-	body = normalizeThinkingConfig(body, upstreamModel, false)
-	if errValidate := validateThinkingConfig(body, upstreamModel); errValidate != nil {
+	body = NormalizeThinkingConfig(body, upstreamModel, false)
+	if errValidate := ValidateThinkingConfig(body, upstreamModel); errValidate != nil {
 		return resp, errValidate
 	}
 	body = applyPayloadConfig(e.cfg, req.Model, body)
@@ -131,13 +131,13 @@ func (e *QwenExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.Aut
 	to := sdktranslator.FromString("openai")
 	body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), true)
 
-	body = applyReasoningEffortMetadata(body, req.Metadata, req.Model, "reasoning_effort", false)
+	body = ApplyReasoningEffortMetadata(body, req.Metadata, req.Model, "reasoning_effort", false)
 	upstreamModel := util.ResolveOriginalModel(req.Model, req.Metadata)
 	if upstreamModel != "" {
 		body, _ = sjson.SetBytes(body, "model", upstreamModel)
 	}
-	body = normalizeThinkingConfig(body, upstreamModel, false)
-	if errValidate := validateThinkingConfig(body, upstreamModel); errValidate != nil {
+	body = NormalizeThinkingConfig(body, upstreamModel, false)
+	if errValidate := ValidateThinkingConfig(body, upstreamModel); errValidate != nil {
 		return nil, errValidate
 	}
 	toolsResult := gjson.GetBytes(body, "tools")
diff --git a/test/thinking_conversion_test.go b/test/thinking_conversion_test.go
index 9d15e1281..6d1569545 100644
--- a/test/thinking_conversion_test.go
+++ b/test/thinking_conversion_test.go
@@ -2,7 +2,6 @@ package test
 
 import (
 	"fmt"
-	"net/http"
 	"strings"
 	"testing"
 	"time"
@@ -10,20 +9,13 @@ import (
 	_ "github.com/router-for-me/CLIProxyAPI/v6/internal/translator"
 
 	"github.com/router-for-me/CLIProxyAPI/v6/internal/registry"
+	"github.com/router-for-me/CLIProxyAPI/v6/internal/runtime/executor"
 	"github.com/router-for-me/CLIProxyAPI/v6/internal/util"
 	sdktranslator "github.com/router-for-me/CLIProxyAPI/v6/sdk/translator"
 	"github.com/tidwall/gjson"
 	"github.com/tidwall/sjson"
 )
 
-// statusErr mirrors executor.statusErr to keep validation behavior aligned.
-type statusErr struct {
-	code int
-	msg  string
-}
-
-func (e statusErr) Error() string { return e.msg }
-
 // isOpenAICompatModel returns true if the model is configured as an OpenAI-compatible
 // model that should have reasoning effort passed through even if not in registry.
 // This simulates the allowCompat behavior from OpenAICompatExecutor.
@@ -108,159 +100,10 @@ func buildRawPayload(fromProtocol, modelWithSuffix string) []byte {
 	}
 }
 
-// applyThinkingMetadataLocal mirrors executor.applyThinkingMetadata.
-func applyThinkingMetadataLocal(payload []byte, metadata map[string]any, model string) []byte {
-	budgetOverride, includeOverride, ok := util.ResolveThinkingConfigFromMetadata(model, metadata)
-	if !ok || (budgetOverride == nil && includeOverride == nil) {
-		return payload
-	}
-	if !util.ModelSupportsThinking(model) {
-		return payload
-	}
-	if budgetOverride != nil {
-		norm := util.NormalizeThinkingBudget(model, *budgetOverride)
-		budgetOverride = &norm
-	}
-	return util.ApplyGeminiThinkingConfig(payload, budgetOverride, includeOverride)
-}
-
-// applyReasoningEffortMetadataLocal mirrors executor.applyReasoningEffortMetadata.
-func applyReasoningEffortMetadataLocal(payload []byte, metadata map[string]any, model, field string, allowCompat bool) []byte {
-	if len(metadata) == 0 {
-		return payload
-	}
-	if field == "" {
-		return payload
-	}
-	baseModel := util.ResolveOriginalModel(model, metadata)
-	if baseModel == "" {
-		baseModel = model
-	}
-	if !util.ModelSupportsThinking(baseModel) && !allowCompat {
-		return payload
-	}
-	if effort, ok := util.ReasoningEffortFromMetadata(metadata); ok && effort != "" {
-		if util.ModelUsesThinkingLevels(baseModel) || allowCompat {
-			if updated, err := sjson.SetBytes(payload, field, effort); err == nil {
-				return updated
-			}
-		}
-	}
-	// Fallback: numeric thinking_budget suffix for level-based (OpenAI-style) models.
-	if util.ModelUsesThinkingLevels(baseModel) || allowCompat {
-		if budget, _, _, matched := util.ThinkingFromMetadata(metadata); matched && budget != nil {
-			if effort, ok := util.OpenAIThinkingBudgetToEffort(baseModel, *budget); ok && effort != "" {
-				if *budget == 0 && effort == "none" && util.ModelUsesThinkingLevels(baseModel) {
-					if _, supported := util.NormalizeReasoningEffortLevel(baseModel, effort); !supported {
-						return stripThinkingFieldsLocal(payload, false)
-					}
-				}
-
-				if updated, err := sjson.SetBytes(payload, field, effort); err == nil {
-					return updated
-				}
-			}
-		}
-	}
-	return payload
-}
-
-// normalizeThinkingConfigLocal mirrors executor.normalizeThinkingConfig.
-// When allowCompat is true, reasoning fields are preserved even for models
-// without thinking support (simulating openai-compat passthrough behavior).
-func normalizeThinkingConfigLocal(payload []byte, model string, allowCompat bool) []byte {
-	if len(payload) == 0 || model == "" {
-		return payload
-	}
-
-	if !util.ModelSupportsThinking(model) {
-		if allowCompat {
-			return payload
-		}
-		return stripThinkingFieldsLocal(payload, false)
-	}
-
-	if util.ModelUsesThinkingLevels(model) {
-		return normalizeReasoningEffortLevelLocal(payload, model)
-	}
-
-	// Model supports thinking but uses numeric budgets, not levels.
-	// Strip effort string fields since they are not applicable.
-	return stripThinkingFieldsLocal(payload, true)
-}
-
-// stripThinkingFieldsLocal mirrors executor.stripThinkingFields.
-func stripThinkingFieldsLocal(payload []byte, effortOnly bool) []byte {
-	fieldsToRemove := []string{
-		"reasoning_effort",
-		"reasoning.effort",
-	}
-	if !effortOnly {
-		fieldsToRemove = append([]string{"reasoning"}, fieldsToRemove...)
-	}
-	out := payload
-	for _, field := range fieldsToRemove {
-		if gjson.GetBytes(out, field).Exists() {
-			out, _ = sjson.DeleteBytes(out, field)
-		}
-	}
-	return out
-}
-
-// normalizeReasoningEffortLevelLocal mirrors executor.normalizeReasoningEffortLevel.
-func normalizeReasoningEffortLevelLocal(payload []byte, model string) []byte {
-	out := payload
-
-	if effort := gjson.GetBytes(out, "reasoning_effort"); effort.Exists() {
-		if normalized, ok := util.NormalizeReasoningEffortLevel(model, effort.String()); ok {
-			out, _ = sjson.SetBytes(out, "reasoning_effort", normalized)
-		}
-	}
-
-	if effort := gjson.GetBytes(out, "reasoning.effort"); effort.Exists() {
-		if normalized, ok := util.NormalizeReasoningEffortLevel(model, effort.String()); ok {
-			out, _ = sjson.SetBytes(out, "reasoning.effort", normalized)
-		}
-	}
-
-	return out
-}
-
-// validateThinkingConfigLocal mirrors executor.validateThinkingConfig.
-func validateThinkingConfigLocal(payload []byte, model string) error {
-	if len(payload) == 0 || model == "" {
-		return nil
-	}
-	if !util.ModelSupportsThinking(model) || !util.ModelUsesThinkingLevels(model) {
-		return nil
-	}
-
-	levels := util.GetModelThinkingLevels(model)
-	checkField := func(path string) error {
-		if effort := gjson.GetBytes(payload, path); effort.Exists() {
-			if _, ok := util.NormalizeReasoningEffortLevel(model, effort.String()); !ok {
-				return statusErr{
-					code: http.StatusBadRequest,
-					msg:  fmt.Sprintf("unsupported reasoning effort level %q for model %s (supported: %s)", effort.String(), model, strings.Join(levels, ", ")),
-				}
-			}
-		}
-		return nil
-	}
-
-	if err := checkField("reasoning_effort"); err != nil {
-		return err
-	}
-	if err := checkField("reasoning.effort"); err != nil {
-		return err
-	}
-	return nil
-}
-
 // normalizeCodexPayload mirrors codex_executor's reasoning + streaming tweaks.
 func normalizeCodexPayload(body []byte, upstreamModel string, allowCompat bool) ([]byte, error) {
-	body = normalizeThinkingConfigLocal(body, upstreamModel, allowCompat)
-	if err := validateThinkingConfigLocal(body, upstreamModel); err != nil {
+	body = executor.NormalizeThinkingConfig(body, upstreamModel, allowCompat)
+	if err := executor.ValidateThinkingConfig(body, upstreamModel); err != nil {
 		return body, err
 	}
 	body, _ = sjson.SetBytes(body, "model", upstreamModel)
@@ -290,7 +133,7 @@ func buildBodyForProtocol(t *testing.T, fromProtocol, toProtocol, modelWithSuffi
 	allowCompat := isOpenAICompatModel(normalizedModel)
 	switch toProtocol {
 	case "gemini":
-		body = applyThinkingMetadataLocal(body, metadata, normalizedModel)
+		body = executor.ApplyThinkingMetadata(body, metadata, normalizedModel)
 		body = util.ApplyDefaultThinkingIfNeeded(normalizedModel, body)
 		body = util.NormalizeGeminiThinkingBudget(normalizedModel, body)
 		body = util.StripThinkingConfigIfUnsupported(normalizedModel, body)
@@ -299,12 +142,12 @@ func buildBodyForProtocol(t *testing.T, fromProtocol, toProtocol, modelWithSuffi
 			body = util.ApplyClaudeThinkingConfig(body, budget)
 		}
 	case "openai":
-		body = applyReasoningEffortMetadataLocal(body, metadata, normalizedModel, "reasoning_effort", allowCompat)
-		body = normalizeThinkingConfigLocal(body, upstreamModel, allowCompat)
-		err = validateThinkingConfigLocal(body, upstreamModel)
+		body = executor.ApplyReasoningEffortMetadata(body, metadata, normalizedModel, "reasoning_effort", allowCompat)
+		body = executor.NormalizeThinkingConfig(body, upstreamModel, allowCompat)
+		err = executor.ValidateThinkingConfig(body, upstreamModel)
 	case "codex": // OpenAI responses / codex
 		// Codex does not support allowCompat; always use false.
-		body = applyReasoningEffortMetadataLocal(body, metadata, normalizedModel, "reasoning.effort", false)
+		body = executor.ApplyReasoningEffortMetadata(body, metadata, normalizedModel, "reasoning.effort", false)
 		// Mirror CodexExecutor final normalization and model override so tests log the final body.
 		body, err = normalizeCodexPayload(body, upstreamModel, false)
 	default:
@@ -629,8 +472,8 @@ func buildBodyForProtocolWithRawThinking(t *testing.T, fromProtocol, toProtocol,
 		// For raw payload, Claude thinking is passed through by translator
 		// No additional processing needed as thinking is already in body
 	case "openai":
-		body = normalizeThinkingConfigLocal(body, model, allowCompat)
-		err = validateThinkingConfigLocal(body, model)
+		body = executor.NormalizeThinkingConfig(body, model, allowCompat)
+		err = executor.ValidateThinkingConfig(body, model)
 	case "codex":
 		// Codex does not support allowCompat; always use false.
 		body, err = normalizeCodexPayload(body, model, false)

From 09c339953dd08b4fec070b5a6434266631eaa5ba Mon Sep 17 00:00:00 2001
From: hkfires <10558748+hkfires@users.noreply.github.com>
Date: Sun, 14 Dec 2025 20:30:26 +0800
Subject: [PATCH 9/9] fix(openai): forward reasoning.effort value

Drop the hardcoded effort mapping in request conversion so
unknown values are preserved instead of being coerced to `auto
---
 .../openai_openai-responses_request.go        | 21 ++++---------------
 1 file changed, 4 insertions(+), 17 deletions(-)

diff --git a/internal/translator/openai/openai/responses/openai_openai-responses_request.go b/internal/translator/openai/openai/responses/openai_openai-responses_request.go
index 8bac0485f..f8bcb7b1e 100644
--- a/internal/translator/openai/openai/responses/openai_openai-responses_request.go
+++ b/internal/translator/openai/openai/responses/openai_openai-responses_request.go
@@ -2,6 +2,7 @@ package responses
 
 import (
 	"bytes"
+	"strings"
 
 	"github.com/tidwall/gjson"
 	"github.com/tidwall/sjson"
@@ -189,23 +190,9 @@ func ConvertOpenAIResponsesRequestToOpenAIChatCompletions(modelName string, inpu
 	}
 
 	if reasoningEffort := root.Get("reasoning.effort"); reasoningEffort.Exists() {
-		switch reasoningEffort.String() {
-		case "none":
-			out, _ = sjson.Set(out, "reasoning_effort", "none")
-		case "auto":
-			out, _ = sjson.Set(out, "reasoning_effort", "auto")
-		case "minimal":
-			out, _ = sjson.Set(out, "reasoning_effort", "minimal")
-		case "low":
-			out, _ = sjson.Set(out, "reasoning_effort", "low")
-		case "medium":
-			out, _ = sjson.Set(out, "reasoning_effort", "medium")
-		case "high":
-			out, _ = sjson.Set(out, "reasoning_effort", "high")
-		case "xhigh":
-			out, _ = sjson.Set(out, "reasoning_effort", "xhigh")
-		default:
-			out, _ = sjson.Set(out, "reasoning_effort", "auto")
+		effort := strings.ToLower(strings.TrimSpace(reasoningEffort.String()))
+		if effort != "" {
+			out, _ = sjson.Set(out, "reasoning_effort", effort)
 		}
 	}