diff --git a/CHANGELOG.md b/CHANGELOG.md index 5d5d4af54d8..2c5f4b6b930 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -190,6 +190,7 @@ Docs: https://docs.openclaw.ai - Setup/config/install: stop setup, config dry-runs, and daemon install from eagerly booting auth-profile and plugin repair runtime when those paths are not needed, so onboarding and local service setup avoid long cold-start stalls. Thanks @vincentkoc. - Cron/direct delivery: slim isolated-agent delivery cold paths so direct channel delivery and related cron execution spend less time loading unrelated auth, plugin, and channel runtime. Thanks @vincentkoc. - Channels/replay dedupe: standardize replay claims, retryable-failure release, and post-success commit behavior across Telegram, Discord, Slack, Mattermost, WhatsApp, Matrix, LINE, Feishu, Zalo, Nextcloud Talk, TLON, Nostr, Voice Call, and shared plugin interactive callbacks so duplicate deliveries stay reply-once after success but retry cleanly after pre-delivery failures. Thanks @vincentkoc. +- Agents/OpenAI mini reasoning: remap unsupported `low` and `minimal` reasoning effort to `medium` for affected OpenAI mini models, and add a live regression lane to keep the compatibility fix covered. (#65478) Thanks @vincentkoc. ## 2026.4.11 diff --git a/src/agents/openai-reasoning-compat.live.test.ts b/src/agents/openai-reasoning-compat.live.test.ts new file mode 100644 index 00000000000..09670fb6ff5 --- /dev/null +++ b/src/agents/openai-reasoning-compat.live.test.ts @@ -0,0 +1,172 @@ +import { completeSimple, type Api, type Model } from "@mariozechner/pi-ai"; +import { describe, expect, it } from "vitest"; +import { loadConfig } from "../config/config.js"; +import { resolveOpenClawAgentDir } from "./agent-paths.js"; +import { isLiveProfileKeyModeEnabled, isLiveTestEnabled } from "./live-test-helpers.js"; +import { getApiKeyForModel, requireApiKey } from "./model-auth.js"; +import { ensureOpenClawModelsJson } from "./models-config.js"; +import { discoverAuthStorage, discoverModels } from "./pi-model-discovery.js"; + +const LIVE = isLiveTestEnabled(); +const REQUIRE_PROFILE_KEYS = isLiveProfileKeyModeEnabled(); +const LIVE_CREDENTIAL_PRECEDENCE = REQUIRE_PROFILE_KEYS ? "profile-first" : "env-first"; +const DEFAULT_TARGET_MODEL_REF = "openai-codex/gpt-5.1-codex-mini"; +const TARGET_MODEL_REF = + process.env.OPENCLAW_LIVE_OPENAI_REASONING_COMPAT_MODEL?.trim() || DEFAULT_TARGET_MODEL_REF; +const describeLive = LIVE ? describe : describe.skip; + +function logProgress(message: string): void { + process.stderr.write(`[live] ${message}\n`); +} + +async function completeSimpleWithTimeout( + model: Model, + context: Parameters>[1], + options: Parameters>[2], + timeoutMs: number, +): Promise>>> { + const controller = new AbortController(); + const abortTimer = setTimeout(() => { + controller.abort(); + }, timeoutMs); + abortTimer.unref?.(); + try { + return await Promise.race([ + completeSimple(model, context, { + ...options, + signal: controller.signal, + }), + new Promise((_, reject) => { + const hardTimer = setTimeout(() => { + reject(new Error(`model call timed out after ${timeoutMs}ms`)); + }, timeoutMs); + hardTimer.unref?.(); + }), + ]); + } finally { + clearTimeout(abortTimer); + } +} + +async function completeReplyWithRetry(params: { + model: Model; + apiKey: string; + message: string; +}): Promise<{ text: string; errorMessage?: string }> { + const runOnce = async (maxTokens: number) => { + const response = await completeSimpleWithTimeout( + params.model, + { + systemPrompt: "You are a concise assistant. Follow the user's instruction exactly.", + messages: [ + { + role: "user", + content: params.message, + timestamp: Date.now(), + }, + ], + }, + { + apiKey: params.apiKey, + reasoning: "low", + maxTokens, + }, + 120_000, + ); + const text = response.content + .filter((block) => block.type === "text") + .map((block) => block.text.trim()) + .join(" ") + .trim(); + return { + text, + errorMessage: + typeof (response as { errorMessage?: unknown }).errorMessage === "string" + ? ((response as { errorMessage?: string }).errorMessage ?? undefined) + : undefined, + }; + }; + + const first = await runOnce(64); + if (first.text.length > 0 || first.errorMessage) { + return first; + } + return await runOnce(256); +} + +function isKnownLiveBlocker(errorMessage: string): boolean { + return ( + /not supported when using codex with a chatgpt account/i.test(errorMessage) || + /hit your chatgpt usage limit/i.test(errorMessage) + ); +} + +function resolveTargetModelRef(): { provider: string; modelId: string } { + const [provider, ...rest] = TARGET_MODEL_REF.split("/"); + const modelId = rest.join("/").trim(); + if (!provider?.trim() || !modelId) { + throw new Error( + `Invalid OPENCLAW_LIVE_OPENAI_REASONING_COMPAT_MODEL: ${JSON.stringify(TARGET_MODEL_REF)}`, + ); + } + return { + provider: provider.trim(), + modelId, + }; +} + +describeLive("openai reasoning compat live", () => { + it( + "remaps low reasoning for the configured OpenAI mini target", + async () => { + const { provider, modelId } = resolveTargetModelRef(); + const cfg = loadConfig(); + await ensureOpenClawModelsJson(cfg); + + const agentDir = resolveOpenClawAgentDir(); + const authStorage = discoverAuthStorage(agentDir); + const modelRegistry = discoverModels(authStorage, agentDir); + const model = modelRegistry.find(provider, modelId) as Model | null; + + if (!model) { + logProgress(`[openai-reasoning-compat] model missing from registry: ${TARGET_MODEL_REF}`); + return; + } + + let apiKeyInfo; + try { + apiKeyInfo = await getApiKeyForModel({ + model, + cfg, + credentialPrecedence: LIVE_CREDENTIAL_PRECEDENCE, + }); + } catch (error) { + logProgress(`[openai-reasoning-compat] skip (${String(error)})`); + return; + } + + if (REQUIRE_PROFILE_KEYS && !apiKeyInfo.source.startsWith("profile:")) { + logProgress( + `[openai-reasoning-compat] skip (non-profile credential source: ${apiKeyInfo.source})`, + ); + return; + } + + logProgress( + `[openai-reasoning-compat] target=${TARGET_MODEL_REF} auth source=${apiKeyInfo.source}`, + ); + const result = await completeReplyWithRetry({ + model, + apiKey: requireApiKey(apiKeyInfo, model.provider), + message: "Reply with exactly: low reasoning ok.", + }); + if (result.errorMessage && isKnownLiveBlocker(result.errorMessage)) { + logProgress(`[openai-reasoning-compat] skip (${result.errorMessage})`); + return; + } + + expect(result.text).toMatch(/^low reasoning ok\.?$/i); + }, + 3 * 60 * 1000, + ); +}); diff --git a/src/agents/openai-reasoning-compat.ts b/src/agents/openai-reasoning-compat.ts new file mode 100644 index 00000000000..3d994b773fe --- /dev/null +++ b/src/agents/openai-reasoning-compat.ts @@ -0,0 +1,56 @@ +import { normalizeLowercaseStringOrEmpty } from "../shared/string-coerce.js"; + +type OpenAIReasoningCompatModel = { + provider?: string | null; + id?: string | null; + compat?: unknown; +}; + +const OPENAI_MEDIUM_ONLY_REASONING_MODEL_IDS = new Set(["gpt-5.1-codex-mini", "gpt-5.4-mini"]); + +function readCompatReasoningEffortMap(compat: unknown): Record { + if (!compat || typeof compat !== "object") { + return {}; + } + const rawMap = (compat as { reasoningEffortMap?: unknown }).reasoningEffortMap; + if (!rawMap || typeof rawMap !== "object") { + return {}; + } + return Object.fromEntries( + Object.entries(rawMap).filter( + (entry): entry is [string, string] => + typeof entry[0] === "string" && typeof entry[1] === "string", + ), + ); +} + +export function resolveOpenAIReasoningEffortMap( + model: OpenAIReasoningCompatModel, + fallbackMap: Record = {}, +): Record { + const provider = normalizeLowercaseStringOrEmpty(model.provider ?? ""); + const id = normalizeLowercaseStringOrEmpty(model.id ?? ""); + const builtinMap = + (provider === "openai" || provider === "openai-codex") && + OPENAI_MEDIUM_ONLY_REASONING_MODEL_IDS.has(id) + ? { minimal: "medium", low: "medium" } + : {}; + return { + ...fallbackMap, + ...builtinMap, + ...readCompatReasoningEffortMap(model.compat), + }; +} + +export function mapOpenAIReasoningEffortForModel(params: { + model: OpenAIReasoningCompatModel; + effort?: string; + fallbackMap?: Record; +}): string | undefined { + const { effort } = params; + if (effort === undefined || effort === "none") { + return effort; + } + const reasoningEffortMap = resolveOpenAIReasoningEffortMap(params.model, params.fallbackMap); + return reasoningEffortMap[effort] ?? effort; +} diff --git a/src/agents/openai-transport-stream.test.ts b/src/agents/openai-transport-stream.test.ts index 30479b5adee..71eadc9a7ba 100644 --- a/src/agents/openai-transport-stream.test.ts +++ b/src/agents/openai-transport-stream.test.ts @@ -615,6 +615,33 @@ describe("openai transport stream", () => { expect(params.reasoning).toEqual({ effort: "low", summary: "auto" }); }); + it("maps low reasoning to medium for Codex mini responses models", () => { + const params = buildOpenAIResponsesParams( + { + id: "gpt-5.1-codex-mini", + name: "gpt-5.1-codex-mini", + api: "openai-codex-responses", + provider: "openai-codex", + baseUrl: "https://chatgpt.com/backend-api", + reasoning: true, + input: ["text"], + cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 }, + contextWindow: 200000, + maxTokens: 8192, + } satisfies Model<"openai-codex-responses">, + { + systemPrompt: "system", + messages: [], + tools: [], + } as never, + { + reasoning: "low", + } as never, + ) as { reasoning?: unknown }; + + expect(params.reasoning).toEqual({ effort: "medium", summary: "auto" }); + }); + it.each([ { label: "openai", diff --git a/src/agents/openai-transport-stream.ts b/src/agents/openai-transport-stream.ts index b1c99aec48e..c52d820f515 100644 --- a/src/agents/openai-transport-stream.ts +++ b/src/agents/openai-transport-stream.ts @@ -24,6 +24,10 @@ import { resolveProviderTransportTurnStateWithPlugin } from "../plugins/provider import { buildCopilotDynamicHeaders, hasCopilotVisionInput } from "./copilot-dynamic-headers.js"; import { detectOpenAICompletionsCompat } from "./openai-completions-compat.js"; import { flattenCompletionMessagesToStringContent } from "./openai-completions-string-content.js"; +import { + mapOpenAIReasoningEffortForModel, + resolveOpenAIReasoningEffortMap, +} from "./openai-reasoning-compat.js"; import { normalizeOpenAIReasoningEffort, type OpenAIApiReasoningEffort, @@ -795,8 +799,12 @@ export function buildOpenAIResponsesParams( } if (model.reasoning) { if (options?.reasoningEffort || options?.reasoning || options?.reasoningSummary) { - params.reasoning = { + const reasoningEffort = mapOpenAIReasoningEffortForModel({ + model, effort: resolveOpenAIReasoningEffort(options), + }); + params.reasoning = { + effort: reasoningEffort ?? "high", summary: options?.reasoningSummary || "auto", }; params.include = ["reasoning.encrypted_content"]; @@ -1209,9 +1217,7 @@ function getCompat(model: OpenAIModeModel): { supportsDeveloperRole: (compat.supportsDeveloperRole as boolean | undefined) ?? detected.supportsDeveloperRole, supportsReasoningEffort, - reasoningEffortMap: - (compat.reasoningEffortMap as Record | undefined) ?? - detected.reasoningEffortMap, + reasoningEffortMap: resolveOpenAIReasoningEffortMap(model, detected.reasoningEffortMap), supportsUsageInStreaming: (compat.supportsUsageInStreaming as boolean | undefined) ?? detected.supportsUsageInStreaming, maxTokensField: (compat.maxTokensField as string | undefined) ?? detected.maxTokensField, diff --git a/src/agents/openai-ws-request.ts b/src/agents/openai-ws-request.ts index 711a1e72704..0792e70e5f9 100644 --- a/src/agents/openai-ws-request.ts +++ b/src/agents/openai-ws-request.ts @@ -1,5 +1,6 @@ import type { StreamFn } from "@mariozechner/pi-agent-core"; import { readStringValue } from "../shared/string-coerce.js"; +import { mapOpenAIReasoningEffortForModel } from "./openai-reasoning-compat.js"; import { normalizeOpenAIReasoningEffort } from "./openai-reasoning-effort.js"; import type { FunctionToolDefinition, @@ -71,10 +72,13 @@ export function buildOpenAIWebSocketResponseCreatePayload(params: { extraParams.tool_choice = streamOpts.toolChoice; } - const reasoningEffort = - streamOpts?.reasoningEffort ?? - streamOpts?.reasoning ?? - (params.model.reasoning ? "high" : undefined); + const reasoningEffort = mapOpenAIReasoningEffortForModel({ + model: params.model, + effort: + streamOpts?.reasoningEffort ?? + streamOpts?.reasoning ?? + (params.model.reasoning ? "high" : undefined), + }); if (reasoningEffort !== "none" && (reasoningEffort || streamOpts?.reasoningSummary)) { const reasoning: { effort?: string; summary?: string } = {}; if (reasoningEffort !== undefined) { diff --git a/src/agents/openai-ws-stream.test.ts b/src/agents/openai-ws-stream.test.ts index e723a460a78..df50ae32f28 100644 --- a/src/agents/openai-ws-stream.test.ts +++ b/src/agents/openai-ws-stream.test.ts @@ -3146,6 +3146,44 @@ describe("createOpenAIWebSocketStreamFn", () => { expect(sent.reasoning).toEqual({ effort: "low" }); }); + it("maps low reasoning to medium for Codex mini websocket requests", async () => { + const streamFn = createOpenAIWebSocketStreamFn("sk-test", "sess-reason-codex-mini"); + const opts = { reasoning: "low" }; + const stream = streamFn( + { + ...modelStub, + id: "gpt-5.1-codex-mini", + name: "gpt-5.1-codex-mini", + provider: "openai-codex", + api: "openai-codex-responses", + baseUrl: "https://chatgpt.com/backend-api", + reasoning: true, + } as Parameters[0], + contextStub as Parameters[1], + opts as unknown as Parameters[2], + ); + await new Promise((resolve, reject) => { + queueMicrotask(async () => { + try { + await new Promise((r) => setImmediate(r)); + MockManager.lastInstance!.simulateEvent({ + type: "response.completed", + response: makeResponseObject("resp-reason-codex-mini", "Mini thought"), + }); + for await (const _ of await resolveStream(stream)) { + /* consume */ + } + resolve(); + } catch (e) { + reject(e); + } + }); + }); + const sent = MockManager.lastInstance!.sentEvents[0] as Record; + expect(sent.type).toBe("response.create"); + expect(sent.reasoning).toEqual({ effort: "medium" }); + }); + it("omits response.create reasoning when reasoningEffort is none", async () => { const streamFn = createOpenAIWebSocketStreamFn("sk-test", "sess-reason-none"); const opts = { reasoningEffort: "none" };