qa: prefer codex auth for live defaults

This commit is contained in:
pashpashpash
2026-04-12 23:17:54 -07:00
parent 666f48d9b8
commit ae4b997a00
6 changed files with 194 additions and 6 deletions

View File

@@ -14,6 +14,7 @@ const {
writeQaDockerHarnessFiles,
buildQaDockerHarnessImage,
runQaDockerUp,
defaultQaRuntimeModelForMode,
} = vi.hoisted(() => ({
runQaManualLane: vi.fn(),
runQaSuiteFromRuntime: vi.fn(),
@@ -25,6 +26,14 @@ const {
writeQaDockerHarnessFiles: vi.fn(),
buildQaDockerHarnessImage: vi.fn(),
runQaDockerUp: vi.fn(),
defaultQaRuntimeModelForMode: vi.fn<(mode: string, options?: { alternate?: boolean }) => string>(
(mode, options) =>
mode === "live-frontier"
? "openai/gpt-5.4"
: options?.alternate
? "mock-openai/gpt-5.4-alt"
: "mock-openai/gpt-5.4",
),
}));
vi.mock("./manual-lane.runtime.js", () => ({
@@ -64,6 +73,10 @@ vi.mock("./docker-up.runtime.js", () => ({
runQaDockerUp,
}));
vi.mock("./model-selection.runtime.js", () => ({
defaultQaRuntimeModelForMode,
}));
import { resolveRepoRelativeOutputDir } from "./cli-paths.js";
import {
runQaLabSelfCheckCommand,
@@ -93,6 +106,14 @@ describe("qa cli runtime", () => {
writeQaDockerHarnessFiles.mockReset();
buildQaDockerHarnessImage.mockReset();
runQaDockerUp.mockReset();
defaultQaRuntimeModelForMode.mockImplementation(
(mode: string, options?: { alternate?: boolean }) =>
mode === "live-frontier"
? "openai/gpt-5.4"
: options?.alternate
? "mock-openai/gpt-5.4-alt"
: "mock-openai/gpt-5.4",
);
runQaSuiteFromRuntime.mockResolvedValue({
watchUrl: "http://127.0.0.1:43124",
reportPath: "/tmp/report.md",
@@ -690,6 +711,32 @@ describe("qa cli runtime", () => {
);
});
it("defaults manual frontier runs onto Codex OAuth when the runtime resolver prefers it", async () => {
defaultQaRuntimeModelForMode.mockImplementation((mode, options) =>
mode === "live-frontier"
? "openai-codex/gpt-5.4"
: options?.alternate
? "mock-openai/gpt-5.4-alt"
: "mock-openai/gpt-5.4",
);
await runQaManualLaneCommand({
repoRoot: "/tmp/openclaw-repo",
message: "read qa kickoff and reply short",
});
expect(runQaManualLane).toHaveBeenCalledWith({
repoRoot: path.resolve("/tmp/openclaw-repo"),
transportId: "qa-channel",
providerMode: "live-frontier",
primaryModel: "openai-codex/gpt-5.4",
alternateModel: "openai-codex/gpt-5.4",
fastMode: undefined,
message: "read qa kickoff and reply short",
timeoutMs: undefined,
});
});
it("resolves self-check repo-root-relative paths before starting the lab server", async () => {
await runQaLabSelfCheckCommand({
repoRoot: "/tmp/openclaw-repo",

View File

@@ -0,0 +1,60 @@
import { beforeEach, describe, expect, it, vi } from "vitest";
const { resolveEnvApiKey, loadAuthProfileStoreForRuntime, listProfilesForProvider } = vi.hoisted(
() => ({
resolveEnvApiKey: vi.fn(),
loadAuthProfileStoreForRuntime: vi.fn(),
listProfilesForProvider: vi.fn(),
}),
);
vi.mock("openclaw/plugin-sdk/provider-auth", () => ({
resolveEnvApiKey,
}));
vi.mock("openclaw/plugin-sdk/agent-runtime", () => ({
loadAuthProfileStoreForRuntime,
listProfilesForProvider,
}));
import {
defaultQaRuntimeModelForMode,
resolveQaPreferredLiveModel,
} from "./model-selection.runtime.js";
describe("qa model selection runtime", () => {
beforeEach(() => {
vi.clearAllMocks();
resolveEnvApiKey.mockReturnValue(undefined);
loadAuthProfileStoreForRuntime.mockReturnValue({ profiles: {} });
listProfilesForProvider.mockReturnValue([]);
});
it("keeps the OpenAI live default when an API key is configured", () => {
resolveEnvApiKey.mockReturnValue({ apiKey: "sk-test" });
expect(resolveQaPreferredLiveModel()).toBeUndefined();
expect(defaultQaRuntimeModelForMode("live-frontier")).toBe("openai/gpt-5.4");
expect(loadAuthProfileStoreForRuntime).not.toHaveBeenCalled();
});
it("prefers the Codex OAuth live default when only Codex auth profiles are available", () => {
listProfilesForProvider.mockImplementation((_store: unknown, provider: string) =>
provider === "openai-codex" ? ["openai-codex:user@example.com"] : [],
);
expect(resolveQaPreferredLiveModel()).toBe("openai-codex/gpt-5.4");
expect(defaultQaRuntimeModelForMode("live-frontier")).toBe("openai-codex/gpt-5.4");
});
it("leaves mock-openai defaults unchanged", () => {
listProfilesForProvider.mockImplementation((_store: unknown, provider: string) =>
provider === "openai-codex" ? ["openai-codex:user@example.com"] : [],
);
expect(defaultQaRuntimeModelForMode("mock-openai")).toBe("mock-openai/gpt-5.4");
expect(defaultQaRuntimeModelForMode("mock-openai", { alternate: true })).toBe(
"mock-openai/gpt-5.4-alt",
);
});
});

View File

@@ -0,0 +1,38 @@
import {
listProfilesForProvider,
loadAuthProfileStoreForRuntime,
} from "openclaw/plugin-sdk/agent-runtime";
import { resolveEnvApiKey } from "openclaw/plugin-sdk/provider-auth";
import { defaultQaModelForMode, type QaProviderModeInput } from "./model-selection.js";
const QA_CODEX_OAUTH_LIVE_MODEL = "openai-codex/gpt-5.4";
export function resolveQaPreferredLiveModel() {
if (resolveEnvApiKey("openai")?.apiKey) {
return undefined;
}
try {
const store = loadAuthProfileStoreForRuntime(undefined, {
readOnly: true,
allowKeychainPrompt: false,
});
return listProfilesForProvider(store, "openai-codex").length > 0
? QA_CODEX_OAUTH_LIVE_MODEL
: undefined;
} catch {
return undefined;
}
}
export function defaultQaRuntimeModelForMode(
mode: QaProviderModeInput,
options?: {
alternate?: boolean;
preferredLiveModel?: string;
},
) {
return defaultQaModelForMode(mode, {
...options,
preferredLiveModel: options?.preferredLiveModel ?? resolveQaPreferredLiveModel(),
});
}

View File

@@ -1,4 +1,19 @@
import { describe, expect, it } from "vitest";
import { beforeEach, describe, expect, it, vi } from "vitest";
const { defaultQaRuntimeModelForMode } = vi.hoisted(() => ({
defaultQaRuntimeModelForMode: vi.fn<(mode: string, options?: { alternate?: boolean }) => string>(
(mode, options) =>
mode === "live-frontier"
? "openai/gpt-5.4"
: options?.alternate
? "mock-openai/gpt-5.4-alt"
: "mock-openai/gpt-5.4",
),
}));
vi.mock("./model-selection.runtime.js", () => ({
defaultQaRuntimeModelForMode,
}));
import {
createDefaultQaRunSelection,
createIdleQaRunnerSnapshot,
@@ -24,6 +39,17 @@ const scenarios = [
];
describe("qa run config", () => {
beforeEach(() => {
defaultQaRuntimeModelForMode.mockImplementation(
(mode: string, options?: { alternate?: boolean }) =>
mode === "live-frontier"
? "openai/gpt-5.4"
: options?.alternate
? "mock-openai/gpt-5.4-alt"
: "mock-openai/gpt-5.4",
);
});
it("creates a live-by-default selection that arms every scenario", () => {
expect(createDefaultQaRunSelection(scenarios)).toEqual({
providerMode: "live-frontier",
@@ -73,4 +99,22 @@ describe("qa run config", () => {
const outputDir = createQaRunOutputDir("/tmp/openclaw-repo");
expect(outputDir.startsWith("/tmp/openclaw-repo/.artifacts/qa-e2e/lab-")).toBe(true);
});
it("prefers the Codex OAuth default when the runtime resolver says it is available", () => {
defaultQaRuntimeModelForMode.mockImplementation((mode, options) =>
mode === "live-frontier"
? "openai-codex/gpt-5.4"
: options?.alternate
? "mock-openai/gpt-5.4-alt"
: "mock-openai/gpt-5.4",
);
expect(createDefaultQaRunSelection(scenarios)).toEqual({
providerMode: "live-frontier",
primaryModel: "openai-codex/gpt-5.4",
alternateModel: "openai-codex/gpt-5.4",
fastMode: true,
scenarioIds: ["dm-chat-baseline", "thread-lifecycle"],
});
});
});

View File

@@ -1,9 +1,9 @@
import path from "node:path";
import {
defaultQaModelForMode as resolveDefaultQaModelForMode,
normalizeQaProviderMode as normalizeQaProviderModeInput,
type QaProviderMode,
} from "./model-selection.js";
import { defaultQaRuntimeModelForMode } from "./model-selection.runtime.js";
import type { QaSeedScenario } from "./scenario-catalog.js";
export type { QaProviderMode } from "./model-selection.js";
@@ -34,7 +34,7 @@ export type QaLabRunnerSnapshot = {
};
export function defaultQaModelForMode(mode: QaProviderMode, alternate = false) {
return resolveDefaultQaModelForMode(mode, alternate ? { alternate: true } : undefined);
return defaultQaRuntimeModelForMode(mode, alternate ? { alternate: true } : undefined);
}
export function createDefaultQaRunSelection(scenarios: QaSeedScenario[]): QaLabRunSelection {

View File

@@ -40,7 +40,6 @@ import type {
import { resolveQaLiveTurnTimeoutMs } from "./live-timeout.js";
import { startQaMockOpenAiServer } from "./mock-openai-server.js";
import {
defaultQaModelForMode,
isQaFastModeEnabled,
normalizeQaProviderMode,
type QaProviderMode,
@@ -63,6 +62,7 @@ import {
} from "./qa-transport.js";
import { extractQaFailureReplyText } from "./reply-failure.js";
import { renderQaMarkdownReport, type QaReportCheck, type QaReportScenario } from "./report.js";
import { defaultQaModelForMode } from "./run-config.js";
import { qaChannelPlugin, type QaBusMessage } from "./runtime-api.js";
import { readQaBootstrapScenarioCatalog } from "./scenario-catalog.js";
import { runScenarioFlow } from "./scenario-flow-runner.js";
@@ -1495,8 +1495,7 @@ export async function runQaSuite(params?: QaSuiteRunParams): Promise<QaSuiteResu
const providerMode = normalizeQaProviderMode(params?.providerMode ?? "live-frontier");
const transportId = normalizeQaTransportId(params?.transportId);
const primaryModel = params?.primaryModel ?? defaultQaModelForMode(providerMode);
const alternateModel =
params?.alternateModel ?? defaultQaModelForMode(providerMode, { alternate: true });
const alternateModel = params?.alternateModel ?? defaultQaModelForMode(providerMode, true);
const fastMode =
typeof params?.fastMode === "boolean"
? params.fastMode