diff --git a/extensions/qa-lab/src/cli.runtime.test.ts b/extensions/qa-lab/src/cli.runtime.test.ts index 75629b1afec..29732d7dea3 100644 --- a/extensions/qa-lab/src/cli.runtime.test.ts +++ b/extensions/qa-lab/src/cli.runtime.test.ts @@ -14,6 +14,7 @@ const { writeQaDockerHarnessFiles, buildQaDockerHarnessImage, runQaDockerUp, + defaultQaRuntimeModelForMode, } = vi.hoisted(() => ({ runQaManualLane: vi.fn(), runQaSuiteFromRuntime: vi.fn(), @@ -25,6 +26,14 @@ const { writeQaDockerHarnessFiles: vi.fn(), buildQaDockerHarnessImage: vi.fn(), runQaDockerUp: vi.fn(), + defaultQaRuntimeModelForMode: vi.fn<(mode: string, options?: { alternate?: boolean }) => string>( + (mode, options) => + mode === "live-frontier" + ? "openai/gpt-5.4" + : options?.alternate + ? "mock-openai/gpt-5.4-alt" + : "mock-openai/gpt-5.4", + ), })); vi.mock("./manual-lane.runtime.js", () => ({ @@ -64,6 +73,10 @@ vi.mock("./docker-up.runtime.js", () => ({ runQaDockerUp, })); +vi.mock("./model-selection.runtime.js", () => ({ + defaultQaRuntimeModelForMode, +})); + import { resolveRepoRelativeOutputDir } from "./cli-paths.js"; import { runQaLabSelfCheckCommand, @@ -93,6 +106,14 @@ describe("qa cli runtime", () => { writeQaDockerHarnessFiles.mockReset(); buildQaDockerHarnessImage.mockReset(); runQaDockerUp.mockReset(); + defaultQaRuntimeModelForMode.mockImplementation( + (mode: string, options?: { alternate?: boolean }) => + mode === "live-frontier" + ? "openai/gpt-5.4" + : options?.alternate + ? "mock-openai/gpt-5.4-alt" + : "mock-openai/gpt-5.4", + ); runQaSuiteFromRuntime.mockResolvedValue({ watchUrl: "http://127.0.0.1:43124", reportPath: "/tmp/report.md", @@ -690,6 +711,32 @@ describe("qa cli runtime", () => { ); }); + it("defaults manual frontier runs onto Codex OAuth when the runtime resolver prefers it", async () => { + defaultQaRuntimeModelForMode.mockImplementation((mode, options) => + mode === "live-frontier" + ? "openai-codex/gpt-5.4" + : options?.alternate + ? "mock-openai/gpt-5.4-alt" + : "mock-openai/gpt-5.4", + ); + + await runQaManualLaneCommand({ + repoRoot: "/tmp/openclaw-repo", + message: "read qa kickoff and reply short", + }); + + expect(runQaManualLane).toHaveBeenCalledWith({ + repoRoot: path.resolve("/tmp/openclaw-repo"), + transportId: "qa-channel", + providerMode: "live-frontier", + primaryModel: "openai-codex/gpt-5.4", + alternateModel: "openai-codex/gpt-5.4", + fastMode: undefined, + message: "read qa kickoff and reply short", + timeoutMs: undefined, + }); + }); + it("resolves self-check repo-root-relative paths before starting the lab server", async () => { await runQaLabSelfCheckCommand({ repoRoot: "/tmp/openclaw-repo", diff --git a/extensions/qa-lab/src/model-selection.runtime.test.ts b/extensions/qa-lab/src/model-selection.runtime.test.ts new file mode 100644 index 00000000000..8377f82b867 --- /dev/null +++ b/extensions/qa-lab/src/model-selection.runtime.test.ts @@ -0,0 +1,60 @@ +import { beforeEach, describe, expect, it, vi } from "vitest"; + +const { resolveEnvApiKey, loadAuthProfileStoreForRuntime, listProfilesForProvider } = vi.hoisted( + () => ({ + resolveEnvApiKey: vi.fn(), + loadAuthProfileStoreForRuntime: vi.fn(), + listProfilesForProvider: vi.fn(), + }), +); + +vi.mock("openclaw/plugin-sdk/provider-auth", () => ({ + resolveEnvApiKey, +})); + +vi.mock("openclaw/plugin-sdk/agent-runtime", () => ({ + loadAuthProfileStoreForRuntime, + listProfilesForProvider, +})); + +import { + defaultQaRuntimeModelForMode, + resolveQaPreferredLiveModel, +} from "./model-selection.runtime.js"; + +describe("qa model selection runtime", () => { + beforeEach(() => { + vi.clearAllMocks(); + resolveEnvApiKey.mockReturnValue(undefined); + loadAuthProfileStoreForRuntime.mockReturnValue({ profiles: {} }); + listProfilesForProvider.mockReturnValue([]); + }); + + it("keeps the OpenAI live default when an API key is configured", () => { + resolveEnvApiKey.mockReturnValue({ apiKey: "sk-test" }); + + expect(resolveQaPreferredLiveModel()).toBeUndefined(); + expect(defaultQaRuntimeModelForMode("live-frontier")).toBe("openai/gpt-5.4"); + expect(loadAuthProfileStoreForRuntime).not.toHaveBeenCalled(); + }); + + it("prefers the Codex OAuth live default when only Codex auth profiles are available", () => { + listProfilesForProvider.mockImplementation((_store: unknown, provider: string) => + provider === "openai-codex" ? ["openai-codex:user@example.com"] : [], + ); + + expect(resolveQaPreferredLiveModel()).toBe("openai-codex/gpt-5.4"); + expect(defaultQaRuntimeModelForMode("live-frontier")).toBe("openai-codex/gpt-5.4"); + }); + + it("leaves mock-openai defaults unchanged", () => { + listProfilesForProvider.mockImplementation((_store: unknown, provider: string) => + provider === "openai-codex" ? ["openai-codex:user@example.com"] : [], + ); + + expect(defaultQaRuntimeModelForMode("mock-openai")).toBe("mock-openai/gpt-5.4"); + expect(defaultQaRuntimeModelForMode("mock-openai", { alternate: true })).toBe( + "mock-openai/gpt-5.4-alt", + ); + }); +}); diff --git a/extensions/qa-lab/src/model-selection.runtime.ts b/extensions/qa-lab/src/model-selection.runtime.ts new file mode 100644 index 00000000000..a9a6469cd72 --- /dev/null +++ b/extensions/qa-lab/src/model-selection.runtime.ts @@ -0,0 +1,38 @@ +import { + listProfilesForProvider, + loadAuthProfileStoreForRuntime, +} from "openclaw/plugin-sdk/agent-runtime"; +import { resolveEnvApiKey } from "openclaw/plugin-sdk/provider-auth"; +import { defaultQaModelForMode, type QaProviderModeInput } from "./model-selection.js"; + +const QA_CODEX_OAUTH_LIVE_MODEL = "openai-codex/gpt-5.4"; + +export function resolveQaPreferredLiveModel() { + if (resolveEnvApiKey("openai")?.apiKey) { + return undefined; + } + try { + const store = loadAuthProfileStoreForRuntime(undefined, { + readOnly: true, + allowKeychainPrompt: false, + }); + return listProfilesForProvider(store, "openai-codex").length > 0 + ? QA_CODEX_OAUTH_LIVE_MODEL + : undefined; + } catch { + return undefined; + } +} + +export function defaultQaRuntimeModelForMode( + mode: QaProviderModeInput, + options?: { + alternate?: boolean; + preferredLiveModel?: string; + }, +) { + return defaultQaModelForMode(mode, { + ...options, + preferredLiveModel: options?.preferredLiveModel ?? resolveQaPreferredLiveModel(), + }); +} diff --git a/extensions/qa-lab/src/run-config.test.ts b/extensions/qa-lab/src/run-config.test.ts index 5bf4b534356..de2d70695dc 100644 --- a/extensions/qa-lab/src/run-config.test.ts +++ b/extensions/qa-lab/src/run-config.test.ts @@ -1,4 +1,19 @@ -import { describe, expect, it } from "vitest"; +import { beforeEach, describe, expect, it, vi } from "vitest"; + +const { defaultQaRuntimeModelForMode } = vi.hoisted(() => ({ + defaultQaRuntimeModelForMode: vi.fn<(mode: string, options?: { alternate?: boolean }) => string>( + (mode, options) => + mode === "live-frontier" + ? "openai/gpt-5.4" + : options?.alternate + ? "mock-openai/gpt-5.4-alt" + : "mock-openai/gpt-5.4", + ), +})); + +vi.mock("./model-selection.runtime.js", () => ({ + defaultQaRuntimeModelForMode, +})); import { createDefaultQaRunSelection, createIdleQaRunnerSnapshot, @@ -24,6 +39,17 @@ const scenarios = [ ]; describe("qa run config", () => { + beforeEach(() => { + defaultQaRuntimeModelForMode.mockImplementation( + (mode: string, options?: { alternate?: boolean }) => + mode === "live-frontier" + ? "openai/gpt-5.4" + : options?.alternate + ? "mock-openai/gpt-5.4-alt" + : "mock-openai/gpt-5.4", + ); + }); + it("creates a live-by-default selection that arms every scenario", () => { expect(createDefaultQaRunSelection(scenarios)).toEqual({ providerMode: "live-frontier", @@ -73,4 +99,22 @@ describe("qa run config", () => { const outputDir = createQaRunOutputDir("/tmp/openclaw-repo"); expect(outputDir.startsWith("/tmp/openclaw-repo/.artifacts/qa-e2e/lab-")).toBe(true); }); + + it("prefers the Codex OAuth default when the runtime resolver says it is available", () => { + defaultQaRuntimeModelForMode.mockImplementation((mode, options) => + mode === "live-frontier" + ? "openai-codex/gpt-5.4" + : options?.alternate + ? "mock-openai/gpt-5.4-alt" + : "mock-openai/gpt-5.4", + ); + + expect(createDefaultQaRunSelection(scenarios)).toEqual({ + providerMode: "live-frontier", + primaryModel: "openai-codex/gpt-5.4", + alternateModel: "openai-codex/gpt-5.4", + fastMode: true, + scenarioIds: ["dm-chat-baseline", "thread-lifecycle"], + }); + }); }); diff --git a/extensions/qa-lab/src/run-config.ts b/extensions/qa-lab/src/run-config.ts index fca4fd24f87..0185f816509 100644 --- a/extensions/qa-lab/src/run-config.ts +++ b/extensions/qa-lab/src/run-config.ts @@ -1,9 +1,9 @@ import path from "node:path"; import { - defaultQaModelForMode as resolveDefaultQaModelForMode, normalizeQaProviderMode as normalizeQaProviderModeInput, type QaProviderMode, } from "./model-selection.js"; +import { defaultQaRuntimeModelForMode } from "./model-selection.runtime.js"; import type { QaSeedScenario } from "./scenario-catalog.js"; export type { QaProviderMode } from "./model-selection.js"; @@ -34,7 +34,7 @@ export type QaLabRunnerSnapshot = { }; export function defaultQaModelForMode(mode: QaProviderMode, alternate = false) { - return resolveDefaultQaModelForMode(mode, alternate ? { alternate: true } : undefined); + return defaultQaRuntimeModelForMode(mode, alternate ? { alternate: true } : undefined); } export function createDefaultQaRunSelection(scenarios: QaSeedScenario[]): QaLabRunSelection { diff --git a/extensions/qa-lab/src/suite.ts b/extensions/qa-lab/src/suite.ts index d9f916eac21..77a327f6698 100644 --- a/extensions/qa-lab/src/suite.ts +++ b/extensions/qa-lab/src/suite.ts @@ -40,7 +40,6 @@ import type { import { resolveQaLiveTurnTimeoutMs } from "./live-timeout.js"; import { startQaMockOpenAiServer } from "./mock-openai-server.js"; import { - defaultQaModelForMode, isQaFastModeEnabled, normalizeQaProviderMode, type QaProviderMode, @@ -63,6 +62,7 @@ import { } from "./qa-transport.js"; import { extractQaFailureReplyText } from "./reply-failure.js"; import { renderQaMarkdownReport, type QaReportCheck, type QaReportScenario } from "./report.js"; +import { defaultQaModelForMode } from "./run-config.js"; import { qaChannelPlugin, type QaBusMessage } from "./runtime-api.js"; import { readQaBootstrapScenarioCatalog } from "./scenario-catalog.js"; import { runScenarioFlow } from "./scenario-flow-runner.js"; @@ -1495,8 +1495,7 @@ export async function runQaSuite(params?: QaSuiteRunParams): Promise