From 42dddbbe785aff0b521efbeba3b4e529185c34d2 Mon Sep 17 00:00:00 2001 From: Peter Steinberger Date: Mon, 27 Apr 2026 23:02:26 +0100 Subject: [PATCH] fix(cli): streamline local model probes --- CHANGELOG.md | 1 + docs/cli/infer.md | 19 ++- docs/gateway/local-models.md | 21 ++- docs/providers/ollama.md | 27 +++- extensions/ollama/index.test.ts | 51 ++++++++ extensions/ollama/index.ts | 73 +++++++++++ extensions/ollama/ollama.live.test.ts | 131 +++++++++++++++++++ src/agents/model-auth.test.ts | 41 ++++++ src/agents/model-auth.ts | 16 +++ src/agents/pi-embedded-runner/model.test.ts | 25 +++- src/agents/simple-completion-runtime.test.ts | 109 ++++++++++++++- src/agents/simple-completion-runtime.ts | 15 ++- src/cli/capability-cli.test.ts | 64 ++++++--- src/cli/capability-cli.ts | 68 ++++++---- 14 files changed, 605 insertions(+), 56 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index c4cb8692eb9..d041fbee119 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -16,6 +16,7 @@ Docs: https://docs.openclaw.ai ### Fixes +- CLI/Ollama: run local `infer model run` through the lean provider completion path and skip global model discovery for one-shot local probes, so Ollama smoke tests no longer pay full chat-agent/tool startup cost or hang before the native `/api/chat` request. Fixes #72851. Thanks @TotalRes2020. - Channels/commands: make generated `/dock-*` commands switch the active session reply route through `session.identityLinks` instead of falling through to normal chat. Fixes #69206; carries forward #73033. Thanks @clawbones and @michaelatamuk. - Providers/Cloudflare AI Gateway: strip assistant prefill turns from Anthropic Messages payloads when thinking is enabled, so Claude requests through Cloudflare AI Gateway no longer fail Anthropic conversation-ending validation. Fixes #72905; carries forward #73005. Thanks @AaronFaby and @sahilsatralkar. - Gateway/startup: keep primary-model startup prewarm on scoped metadata preparation, let native approval bootstraps retry outside channel startup, and skip the global hook runner when no `gateway_start` hook is registered, so clean post-ready sidecar work stays off the critical path. Refs #72846. Thanks @RayWoo, @livekm0309, and @mrz1836. diff --git a/docs/cli/infer.md b/docs/cli/infer.md index 8ab8faa01ed..fe82af9be73 100644 --- a/docs/cli/infer.md +++ b/docs/cli/infer.md @@ -130,7 +130,8 @@ This table maps common inference tasks to the corresponding infer command. - Stateless execution commands default to local. - Gateway-managed state commands default to gateway. - The normal local path does not require the gateway to be running. -- `model run` is one-shot. MCP servers opened through the agent runtime for that command are retired after the reply for both local and `--gateway` execution, so repeated scripted invocations do not keep stdio MCP child processes alive. +- Local `model run` is a lean one-shot provider completion. It resolves the configured agent model and auth, but does not start a chat-agent turn, load tools, or open bundled MCP servers. +- `model run --gateway` still uses the Gateway agent runtime so it can exercise the same routed runtime path as a normal Gateway-backed turn. MCP servers opened through that runtime are retired after the reply, so repeated scripted invocations do not keep stdio MCP child processes alive. ## Model @@ -143,10 +144,22 @@ openclaw infer model providers --json openclaw infer model inspect --name gpt-5.5 --json ``` +Use full `` refs to smoke-test a specific provider without +starting the Gateway or loading the full agent tool surface: + +```bash +openclaw infer model run --local --model anthropic/claude-sonnet-4-6 --prompt "Reply with exactly: pong" --json +openclaw infer model run --local --model cerebras/zai-glm-4.7 --prompt "Reply with exactly: pong" --json +openclaw infer model run --local --model google/gemini-2.5-flash --prompt "Reply with exactly: pong" --json +openclaw infer model run --local --model groq/llama-3.1-8b-instant --prompt "Reply with exactly: pong" --json +openclaw infer model run --local --model mistral/mistral-small-latest --prompt "Reply with exactly: pong" --json +openclaw infer model run --local --model openai/gpt-4.1 --prompt "Reply with exactly: pong" --json +``` + Notes: -- `model run` reuses the agent runtime so provider/model overrides behave like normal agent execution. -- Because `model run` is intended for headless automation, it does not retain per-session bundled MCP runtimes after the command finishes. +- Local `model run` is the narrowest CLI smoke for provider/model/auth health because it sends only the supplied prompt to the selected model. +- Use `model run --gateway` when you need to test Gateway routing, agent-runtime setup, or Gateway-managed provider state instead of the lean local completion path. - `model auth login`, `model auth logout`, and `model auth status` manage saved provider auth state. ## Image diff --git a/docs/gateway/local-models.md b/docs/gateway/local-models.md index 842f9a081b7..a5480dd1279 100644 --- a/docs/gateway/local-models.md +++ b/docs/gateway/local-models.md @@ -239,14 +239,20 @@ Compatibility notes for stricter OpenAI-compatible backends: ``` - Some smaller or stricter local backends are unstable with OpenClaw's full - agent-runtime prompt shape, especially when tool schemas are included. If the - backend works for tiny direct `/v1/chat/completions` calls but fails on normal - OpenClaw agent turns, first try + agent-runtime prompt shape, especially when tool schemas are included. First + verify the provider path with the lean local probe: + + ```bash + openclaw infer model run --local --model --prompt "Reply with exactly: pong" --json + ``` + + If that succeeds but normal OpenClaw agent turns fail, first try `agents.defaults.experimental.localModelLean: true` to drop heavyweight default tools like `browser`, `cron`, and `message`; this is an experimental flag, not a stable default-mode setting. See [Experimental Features](/concepts/experimental-features). If that still fails, try `models.providers..models[].compat.supportsTools: false`. + - If the backend still fails only on larger OpenClaw runs, the remaining issue is usually upstream model/server capacity or a backend bug, not OpenClaw's transport layer. @@ -264,10 +270,11 @@ Compatibility notes for stricter OpenAI-compatible backends: - Context errors? Lower `contextWindow` or raise your server limit. - OpenAI-compatible server returns `messages[].content ... expected a string`? Add `compat.requiresStringContent: true` on that model entry. -- Direct tiny `/v1/chat/completions` calls work, but `openclaw infer model run` - fails on Gemma or another local model? Disable tool schemas first with - `compat.supportsTools: false`, then retest. If the server still crashes only - on larger OpenClaw prompts, treat it as an upstream server/model limitation. +- Direct tiny `/v1/chat/completions` calls work, but `openclaw infer model run --local` + fails on Gemma or another local model? Check the provider URL, model ref, auth + marker, and server logs first; local `model run` does not include agent tools. + If local `model run` succeeds but larger agent turns fail, reduce the agent + tool surface with `localModelLean` or `compat.supportsTools: false`. - Tool calls show up as raw JSON/XML/ReAct text, or the provider returns an empty `tool_calls` array? Do not add a proxy that blindly converts assistant text into tool execution. Fix the server chat template/parser first. If the diff --git a/docs/providers/ollama.md b/docs/providers/ollama.md index c189c976eb4..93e26b5e7dc 100644 --- a/docs/providers/ollama.md +++ b/docs/providers/ollama.md @@ -185,7 +185,7 @@ When you set `OLLAMA_API_KEY` (or an auth profile) and **do not** define `models | Token limits | Sets `maxTokens` to the default Ollama max-token cap used by OpenClaw | | Costs | Sets all costs to `0` | -This avoids manual model entries while keeping the catalog aligned with the local Ollama instance. +This avoids manual model entries while keeping the catalog aligned with the local Ollama instance. You can use a full ref such as `ollama/:latest` in local `infer model run`; OpenClaw resolves that installed model from Ollama's live catalog without requiring a hand-written `models.json` entry. ```bash # See what models are available @@ -193,6 +193,31 @@ ollama list openclaw models list ``` +For a narrow text-generation smoke test that avoids the full agent tool surface, +use local `infer model run` with a full Ollama model ref: + +```bash +OLLAMA_API_KEY=ollama-local \ + openclaw infer model run \ + --local \ + --model ollama/llama3.2:latest \ + --prompt "Reply with exactly: pong" \ + --json +``` + +That path still uses OpenClaw's configured provider, auth, and native Ollama +transport, but it does not start a chat-agent turn or load MCP/tool context. If +this succeeds while normal agent replies fail, troubleshoot the model's agent +prompt/tool capacity next. + +Live-verify the local text path, native stream path, and embeddings against +local Ollama with: + +```bash +OPENCLAW_LIVE_TEST=1 OPENCLAW_LIVE_OLLAMA=1 OPENCLAW_LIVE_OLLAMA_WEB_SEARCH=0 \ + pnpm test:live -- extensions/ollama/ollama.live.test.ts +``` + To add a new model, simply pull it with Ollama: ```bash diff --git a/extensions/ollama/index.test.ts b/extensions/ollama/index.test.ts index 198965abb37..d4b4560b37c 100644 --- a/extensions/ollama/index.test.ts +++ b/extensions/ollama/index.test.ts @@ -369,6 +369,57 @@ describe("ollama plugin", () => { }); }); + it("resolves dynamic local models from Ollama without generating PI models.json", async () => { + const provider = registerProvider(); + const previous = process.env.OLLAMA_API_KEY; + process.env.OLLAMA_API_KEY = "ollama-local"; + buildOllamaProviderMock.mockResolvedValueOnce({ + baseUrl: "http://127.0.0.1:11434", + api: "ollama", + models: [ + { + id: "llama3.2:latest", + name: "llama3.2:latest", + reasoning: false, + input: ["text"], + cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 }, + contextWindow: 8192, + maxTokens: 2048, + }, + ], + }); + + try { + await provider.prepareDynamicModel?.({ + config: {}, + provider: "ollama", + modelId: "llama3.2:latest", + modelRegistry: { find: vi.fn(() => null) }, + } as never); + + expect( + provider.resolveDynamicModel?.({ + config: {}, + provider: "ollama", + modelId: "llama3.2:latest", + modelRegistry: { find: vi.fn(() => null) }, + } as never), + ).toMatchObject({ + provider: "ollama", + id: "llama3.2:latest", + api: "ollama", + baseUrl: "http://127.0.0.1:11434", + }); + expect(buildOllamaProviderMock).toHaveBeenCalledWith(undefined, { quiet: true }); + } finally { + if (previous === undefined) { + delete process.env.OLLAMA_API_KEY; + } else { + process.env.OLLAMA_API_KEY = previous; + } + } + }); + it("skips implicit localhost discovery when a custom remote Ollama provider is configured", async () => { const provider = registerProvider(); diff --git a/extensions/ollama/index.ts b/extensions/ollama/index.ts index d303e921ec8..46b55691e2b 100644 --- a/extensions/ollama/index.ts +++ b/extensions/ollama/index.ts @@ -7,8 +7,13 @@ import { type ProviderAuthMethodNonInteractiveContext, type ProviderAuthResult, type ProviderDiscoveryContext, + type ProviderRuntimeModel, } from "openclaw/plugin-sdk/plugin-entry"; import { buildApiKeyCredential } from "openclaw/plugin-sdk/provider-auth"; +import type { + ModelDefinitionConfig, + ModelProviderConfig, +} from "openclaw/plugin-sdk/provider-model-shared"; import { buildOpenAICompatibleReplayPolicy, OPENAI_COMPATIBLE_REPLAY_HOOKS, @@ -57,6 +62,44 @@ function usesOllamaOpenAICompatTransport(model: { ); } +const dynamicModelCache = new Map(); + +function buildDynamicCacheKey(provider: string, baseUrl: string | undefined): string { + return `${provider}\0${baseUrl ?? ""}`; +} + +function hasOllamaDiscoverySignal(providerConfig: ModelProviderConfig | undefined): boolean { + return ( + Boolean(process.env.OLLAMA_API_KEY?.trim()) || + shouldUseSyntheticOllamaAuth(providerConfig) || + Boolean(providerConfig?.apiKey) + ); +} + +function toDynamicOllamaModel(params: { + provider: string; + providerConfig: ModelProviderConfig; + model: ModelDefinitionConfig; +}): ProviderRuntimeModel { + const input = (params.model.input ?? ["text"]).filter( + (value): value is "text" | "image" => value === "text" || value === "image", + ); + return { + id: params.model.id, + name: params.model.name ?? params.model.id, + provider: params.provider, + api: "ollama", + baseUrl: readProviderBaseUrl(params.providerConfig) ?? "", + reasoning: params.model.reasoning ?? false, + input: input.length > 0 ? input : ["text"], + cost: params.model.cost ?? { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 }, + contextWindow: params.model.contextWindow ?? 8192, + maxTokens: params.model.maxTokens ?? 8192, + ...(params.model.compat ? { compat: params.model.compat as never } : {}), + ...(params.model.params ? { params: params.model.params } : {}), + }; +} + export default definePluginEntry({ id: "ollama", name: "Ollama Provider", @@ -215,6 +258,36 @@ export default definePluginEntry({ }, shouldDeferSyntheticProfileAuth: ({ resolvedApiKey }) => resolvedApiKey?.trim() === OLLAMA_DEFAULT_API_KEY, + prepareDynamicModel: async (ctx) => { + const providerConfig = resolveConfiguredOllamaProviderConfig({ + config: ctx.config, + providerId: ctx.provider, + }); + if (!hasOllamaDiscoverySignal(providerConfig)) { + return; + } + const baseUrl = readProviderBaseUrl(providerConfig); + const provider = await buildOllamaProvider(baseUrl, { quiet: true }); + dynamicModelCache.set( + buildDynamicCacheKey(ctx.provider, baseUrl), + (provider.models ?? []).map((model) => + toDynamicOllamaModel({ + provider: ctx.provider, + providerConfig: provider, + model, + }), + ), + ); + }, + resolveDynamicModel: (ctx) => { + const providerConfig = resolveConfiguredOllamaProviderConfig({ + config: ctx.config, + providerId: ctx.provider, + }); + return dynamicModelCache + .get(buildDynamicCacheKey(ctx.provider, readProviderBaseUrl(providerConfig))) + ?.find((model) => model.id === ctx.modelId); + }, buildUnknownModelHint: () => "Ollama requires authentication to be registered as a provider. " + 'Set OLLAMA_API_KEY="ollama-local" (any value works) or run "openclaw configure". ' + diff --git a/extensions/ollama/ollama.live.test.ts b/extensions/ollama/ollama.live.test.ts index fa955b85431..9c2e9eb1179 100644 --- a/extensions/ollama/ollama.live.test.ts +++ b/extensions/ollama/ollama.live.test.ts @@ -1,3 +1,8 @@ +import { spawnSync } from "node:child_process"; +import * as fsSync from "node:fs"; +import fs from "node:fs/promises"; +import os from "node:os"; +import path from "node:path"; import { describe, expect, it } from "vitest"; import { createOllamaEmbeddingProvider } from "./src/embedding-provider.js"; import { createOllamaStreamFn } from "./src/stream.js"; @@ -20,7 +25,133 @@ async function collectStreamEvents(stream: AsyncIterable): Promise { return events; } +async function withTempOpenClawState(run: (paths: { root: string }) => Promise): Promise { + const root = await fs.mkdtemp(path.join(os.tmpdir(), "openclaw-ollama-cli-live-")); + try { + await fs.writeFile( + path.join(root, "openclaw.json"), + JSON.stringify( + { + models: { + providers: { + ollama: { + api: "ollama", + baseUrl: OLLAMA_BASE_URL, + apiKey: "ollama-local", + models: [], + }, + }, + }, + }, + null, + 2, + ), + ); + return await run({ root }); + } finally { + await fs.rm(root, { recursive: true, force: true }); + } +} + +async function runOpenClawCli(args: string[], env: NodeJS.ProcessEnv) { + const outputRoot = fsSync.mkdtempSync(path.join(os.tmpdir(), "openclaw-ollama-cli-output-")); + const stdoutPath = path.join(outputRoot, "stdout.txt"); + const stderrPath = path.join(outputRoot, "stderr.txt"); + const stdoutFd = fsSync.openSync(stdoutPath, "w"); + const stderrFd = fsSync.openSync(stderrPath, "w"); + let stdoutClosed = false; + let stderrClosed = false; + try { + const result = spawnSync(process.execPath, ["openclaw.mjs", ...args], { + cwd: process.cwd(), + env, + timeout: 90_000, + stdio: ["ignore", stdoutFd, stderrFd], + }); + fsSync.closeSync(stdoutFd); + stdoutClosed = true; + fsSync.closeSync(stderrFd); + stderrClosed = true; + return { + exitCode: result.status ?? (result.error ? 1 : 0), + stdout: fsSync.readFileSync(stdoutPath, "utf8"), + stderr: fsSync.readFileSync(stderrPath, "utf8"), + }; + } finally { + if (!stdoutClosed) { + fsSync.closeSync(stdoutFd); + } + if (!stderrClosed) { + fsSync.closeSync(stderrFd); + } + fsSync.rmSync(outputRoot, { recursive: true, force: true }); + } +} + +function parseJsonEnvelope(stdout: string): Record { + const trimmed = stdout.trim(); + const jsonStart = trimmed.lastIndexOf("\n{"); + const rawJson = jsonStart >= 0 ? trimmed.slice(jsonStart + 1) : trimmed; + return JSON.parse(rawJson) as Record; +} + +function buildCliEnv(root: string): NodeJS.ProcessEnv { + return { + PATH: process.env.PATH, + HOME: process.env.HOME, + USER: process.env.USER, + TMPDIR: process.env.TMPDIR, + NODE_PATH: process.env.NODE_PATH, + NODE_OPTIONS: process.env.NODE_OPTIONS, + OPENCLAW_LIVE_TEST: "1", + OPENCLAW_LIVE_OLLAMA: "1", + OPENCLAW_LIVE_OLLAMA_WEB_SEARCH: "0", + OPENCLAW_STATE_DIR: path.join(root, "state"), + OPENCLAW_CONFIG_PATH: path.join(root, "openclaw.json"), + OPENCLAW_NO_RESPAWN: "1", + OPENCLAW_TEST_FAST: "1", + OLLAMA_API_KEY: "ollama-local", + }; +} + describe.skipIf(!LIVE)("ollama live", () => { + it("runs infer model run through the local CLI path without PI model discovery", async () => { + await withTempOpenClawState(async ({ root }) => { + const result = await runOpenClawCli( + [ + "infer", + "model", + "run", + "--local", + "--model", + `ollama/${CHAT_MODEL}`, + "--prompt", + "Reply with exactly one word: pong", + "--json", + ], + buildCliEnv(root), + ); + + expect(result.exitCode).toBe(0); + expect(result.stderr).not.toContain("[agents/auth-profiles]"); + expect(result.stdout.trim(), result.stderr).not.toHaveLength(0); + const payload = parseJsonEnvelope(result.stdout) as { + ok?: boolean; + transport?: string; + provider?: string; + model?: string; + outputs?: Array<{ text?: string }>; + }; + expect(payload).toMatchObject({ + ok: true, + transport: "local", + provider: "ollama", + model: CHAT_MODEL, + }); + expect(payload.outputs?.[0]?.text?.trim().length ?? 0).toBeGreaterThan(0); + }); + }, 120_000); + it("runs native chat with a custom provider prefix and normalized tool schemas", async () => { const streamFn = createOllamaStreamFn(OLLAMA_BASE_URL); let payload: diff --git a/src/agents/model-auth.test.ts b/src/agents/model-auth.test.ts index 654067ee9c2..a1812cacf40 100644 --- a/src/agents/model-auth.test.ts +++ b/src/agents/model-auth.test.ts @@ -15,6 +15,9 @@ vi.mock("../plugins/plugin-registry.js", () => ({ { origin: "bundled", nonSecretAuthMarkers: ["gcp-vertex-credentials", "ollama-local"], + providerAuthEnvVars: { + ollama: ["OLLAMA_API_KEY"], + }, }, ], }), @@ -163,6 +166,20 @@ async function withoutEnv(key: string, fn: () => Promise): Promise { } } +async function withEnv(key: string, value: string, fn: () => Promise): Promise { + const previous = process.env[key]; + process.env[key] = value; + try { + return await fn(); + } finally { + if (previous === undefined) { + delete process.env[key]; + } else { + process.env[key] = previous; + } + } +} + function createCustomProviderConfig( baseUrl: string, modelId = "llama3", @@ -809,6 +826,30 @@ describe("resolveApiKeyForProvider", () => { mode: "api-key", }); }); + + it("prefers non-secret local env markers over ambient profiles", async () => { + const resolved = await withEnv("OLLAMA_API_KEY", "ollama-local", () => + resolveApiKeyForProvider({ + provider: "ollama", + store: { + version: 1, + profiles: { + "ollama:default": { + type: "api_key", + provider: "ollama", + key: "ollama-cloud-profile", // pragma: allowlist secret + }, + }, + }, + }), + ); + + expect(resolved).toMatchObject({ + apiKey: "ollama-local", + mode: "api-key", + }); + expect(resolved.source).toContain("OLLAMA_API_KEY"); + }); }); describe("resolveApiKeyForProvider – synthetic local auth for custom providers", () => { diff --git a/src/agents/model-auth.ts b/src/agents/model-auth.ts index 65046974c9c..285964b3322 100644 --- a/src/agents/model-auth.ts +++ b/src/agents/model-auth.ts @@ -523,6 +523,22 @@ export async function resolveApiKeyForProvider(params: { } const providerConfig = resolveProviderConfig(cfg, provider); + const configuredLocalKey = resolveUsableCustomProviderApiKey({ cfg, provider }); + if (configuredLocalKey && isNonSecretApiKeyMarker(configuredLocalKey.apiKey)) { + return { + apiKey: configuredLocalKey.apiKey, + source: configuredLocalKey.source, + mode: "api-key", + }; + } + const localMarkerEnv = resolveEnvApiKey(provider); + if (localMarkerEnv && isNonSecretApiKeyMarker(localMarkerEnv.apiKey)) { + return { + apiKey: localMarkerEnv.apiKey, + source: localMarkerEnv.source, + mode: "api-key", + }; + } const store = params.store ?? ensureAuthProfileStore(params.agentDir); const order = resolveAuthProfileOrder({ cfg, diff --git a/src/agents/pi-embedded-runner/model.test.ts b/src/agents/pi-embedded-runner/model.test.ts index 3f1ece55438..f1913cc9656 100644 --- a/src/agents/pi-embedded-runner/model.test.ts +++ b/src/agents/pi-embedded-runner/model.test.ts @@ -1,5 +1,5 @@ import { beforeEach, describe, expect, it, vi } from "vitest"; -import { discoverModels } from "../pi-model-discovery.js"; +import { discoverAuthStorage, discoverModels } from "../pi-model-discovery.js"; import { createProviderRuntimeTestMock } from "./model.provider-runtime.test-support.js"; vi.mock("../model-suppression.js", () => ({ @@ -55,6 +55,8 @@ import { beforeEach(() => { resetMockDiscoverModels(discoverModels); + vi.mocked(discoverModels).mockClear(); + vi.mocked(discoverAuthStorage).mockClear(); mockGetOpenRouterModelCapabilities.mockReset(); mockGetOpenRouterModelCapabilities.mockReturnValue(undefined); mockLoadOpenRouterModelCapabilities.mockReset(); @@ -110,6 +112,27 @@ function resolveModelAsyncForTest( } describe("resolveModel", () => { + it("skips PI auth and model discovery during dynamic model resolution", async () => { + const result = await resolveModelAsync( + "openrouter", + "openrouter/auto", + "/tmp/agent", + undefined, + { + runtimeHooks: createRuntimeHooks(), + skipPiDiscovery: true, + }, + ); + + expect(result.error).toBeUndefined(); + expect(result.model).toMatchObject({ + provider: "openrouter", + id: "openrouter/auto", + }); + expect(discoverAuthStorage).not.toHaveBeenCalled(); + expect(discoverModels).not.toHaveBeenCalled(); + }); + it("defaults model input to text when discovery omits input", () => { mockDiscoveredModel(discoverModels, { provider: "custom", diff --git a/src/agents/simple-completion-runtime.test.ts b/src/agents/simple-completion-runtime.test.ts index 925d3be6428..06eebe6f682 100644 --- a/src/agents/simple-completion-runtime.test.ts +++ b/src/agents/simple-completion-runtime.test.ts @@ -1,16 +1,29 @@ +import type { Model } from "@mariozechner/pi-ai"; import { beforeAll, beforeEach, describe, expect, it, vi } from "vitest"; const hoisted = vi.hoisted(() => ({ resolveModelMock: vi.fn(), + resolveModelAsyncMock: vi.fn(), getApiKeyForModelMock: vi.fn(), applyLocalNoAuthHeaderOverrideMock: vi.fn(), setRuntimeApiKeyMock: vi.fn(), resolveCopilotApiTokenMock: vi.fn(), prepareProviderRuntimeAuthMock: vi.fn(), + prepareModelForSimpleCompletionMock: vi.fn((params: { model: unknown }) => params.model), + completeMock: vi.fn(), +})); + +vi.mock("@mariozechner/pi-ai", () => ({ + complete: hoisted.completeMock, })); vi.mock("./pi-embedded-runner/model.js", () => ({ resolveModel: hoisted.resolveModelMock, + resolveModelAsync: hoisted.resolveModelAsyncMock, +})); + +vi.mock("./simple-completion-transport.js", () => ({ + prepareModelForSimpleCompletion: hoisted.prepareModelForSimpleCompletionMock, })); vi.mock("./model-auth.js", () => ({ @@ -26,21 +39,30 @@ vi.mock("../plugins/provider-runtime.runtime.js", () => ({ prepareProviderRuntimeAuth: hoisted.prepareProviderRuntimeAuthMock, })); +let completeWithPreparedSimpleCompletionModel: typeof import("./simple-completion-runtime.js").completeWithPreparedSimpleCompletionModel; let prepareSimpleCompletionModel: typeof import("./simple-completion-runtime.js").prepareSimpleCompletionModel; beforeAll(async () => { - ({ prepareSimpleCompletionModel } = await import("./simple-completion-runtime.js")); + ({ completeWithPreparedSimpleCompletionModel, prepareSimpleCompletionModel } = + await import("./simple-completion-runtime.js")); }); beforeEach(() => { hoisted.resolveModelMock.mockReset(); + hoisted.resolveModelAsyncMock.mockReset(); hoisted.getApiKeyForModelMock.mockReset(); hoisted.applyLocalNoAuthHeaderOverrideMock.mockReset(); hoisted.setRuntimeApiKeyMock.mockReset(); hoisted.resolveCopilotApiTokenMock.mockReset(); hoisted.prepareProviderRuntimeAuthMock.mockReset(); + hoisted.prepareModelForSimpleCompletionMock.mockReset(); + hoisted.completeMock.mockReset(); hoisted.applyLocalNoAuthHeaderOverrideMock.mockImplementation((model: unknown) => model); + hoisted.prepareModelForSimpleCompletionMock.mockImplementation( + (params: { model: unknown }) => params.model, + ); + hoisted.completeMock.mockResolvedValue({ content: [{ type: "text", text: "ok" }] }); hoisted.resolveModelMock.mockReturnValue({ model: { @@ -52,6 +74,9 @@ beforeEach(() => { }, modelRegistry: {}, }); + hoisted.resolveModelAsyncMock.mockImplementation((...args: unknown[]) => + Promise.resolve(hoisted.resolveModelMock(...args)), + ); hoisted.getApiKeyForModelMock.mockResolvedValue({ apiKey: "sk-test", source: "env:TEST_API_KEY", @@ -405,4 +430,86 @@ describe("prepareSimpleCompletionModel", () => { }), ); }); + + it("can skip Pi model/auth discovery for config-scoped one-shot completions", async () => { + hoisted.resolveModelAsyncMock.mockResolvedValueOnce({ + model: { + provider: "ollama", + id: "llama3.2:latest", + }, + authStorage: { + setRuntimeApiKey: hoisted.setRuntimeApiKeyMock, + }, + modelRegistry: {}, + }); + hoisted.getApiKeyForModelMock.mockResolvedValueOnce({ + apiKey: "ollama-local", + source: "models.json (local marker)", + mode: "api-key", + }); + + const result = await prepareSimpleCompletionModel({ + cfg: undefined, + provider: "ollama", + modelId: "llama3.2:latest", + skipPiDiscovery: true, + }); + + expect(result).not.toHaveProperty("error"); + expect(hoisted.resolveModelMock).not.toHaveBeenCalled(); + expect(hoisted.resolveModelAsyncMock).toHaveBeenCalledWith( + "ollama", + "llama3.2:latest", + undefined, + undefined, + { + skipPiDiscovery: true, + }, + ); + }); +}); + +describe("completeWithPreparedSimpleCompletionModel", () => { + it("prepares provider-owned stream APIs before running a completion", async () => { + const model = { + provider: "ollama", + id: "llama3.2:latest", + name: "llama3.2:latest", + api: "ollama", + baseUrl: "http://127.0.0.1:11434", + reasoning: false, + input: ["text"], + cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 }, + contextWindow: 8192, + maxTokens: 1024, + } satisfies Model<"ollama">; + const preparedModel = { + ...model, + api: "openclaw-ollama-simple-test", + }; + hoisted.prepareModelForSimpleCompletionMock.mockReturnValueOnce(preparedModel); + + await completeWithPreparedSimpleCompletionModel({ + model, + auth: { + apiKey: "ollama-local", + source: "models.json (local marker)", + mode: "api-key", + }, + context: { + messages: [{ role: "user", content: "pong", timestamp: 1 }], + }, + }); + + expect(hoisted.prepareModelForSimpleCompletionMock).toHaveBeenCalledWith({ model }); + expect(hoisted.completeMock).toHaveBeenCalledWith( + preparedModel, + { + messages: [{ role: "user", content: "pong", timestamp: 1 }], + }, + { + apiKey: "ollama-local", + }, + ); + }); }); diff --git a/src/agents/simple-completion-runtime.ts b/src/agents/simple-completion-runtime.ts index 6bb66dc0675..04ee880b5e7 100644 --- a/src/agents/simple-completion-runtime.ts +++ b/src/agents/simple-completion-runtime.ts @@ -15,7 +15,8 @@ import { resolveDefaultModelForAgent, resolveModelRefFromString, } from "./model-selection.js"; -import { resolveModel } from "./pi-embedded-runner/model.js"; +import { resolveModel, resolveModelAsync } from "./pi-embedded-runner/model.js"; +import { prepareModelForSimpleCompletion } from "./simple-completion-transport.js"; type SimpleCompletionAuthStorage = { setRuntimeApiKey: (provider: string, apiKey: string) => void; @@ -158,8 +159,13 @@ export async function prepareSimpleCompletionModel(params: { profileId?: string; preferredProfile?: string; allowMissingApiKeyModes?: ReadonlyArray; + skipPiDiscovery?: boolean; }): Promise { - const resolved = resolveModel(params.provider, params.modelId, params.agentDir, params.cfg); + const resolved = params.skipPiDiscovery + ? await resolveModelAsync(params.provider, params.modelId, params.agentDir, params.cfg, { + skipPiDiscovery: true, + }) + : resolveModel(params.provider, params.modelId, params.agentDir, params.cfg); if (!resolved.model) { return { error: resolved.error ?? `Unknown model: ${params.provider}/${params.modelId}`, @@ -233,6 +239,7 @@ export async function prepareSimpleCompletionModelForAgent(params: { modelRef?: string; preferredProfile?: string; allowMissingApiKeyModes?: ReadonlyArray; + skipPiDiscovery?: boolean; }): Promise { const selection = resolveSimpleCompletionSelectionForAgent({ cfg: params.cfg, @@ -252,6 +259,7 @@ export async function prepareSimpleCompletionModelForAgent(params: { profileId: selection.profileId, preferredProfile: params.preferredProfile, allowMissingApiKeyModes: params.allowMissingApiKeyModes, + skipPiDiscovery: params.skipPiDiscovery, }); if ("error" in prepared) { return { @@ -272,7 +280,8 @@ export async function completeWithPreparedSimpleCompletionModel(params: { context: Parameters[1]; options?: SimpleCompletionModelOptions; }) { - return await complete(params.model, params.context, { + const completionModel = prepareModelForSimpleCompletion({ model: params.model }); + return await complete(completionModel, params.context, { ...params.options, apiKey: params.auth.apiKey, }); diff --git a/src/cli/capability-cli.test.ts b/src/cli/capability-cli.test.ts index 059e75639d8..fbadc4e7e8d 100644 --- a/src/cli/capability-cli.test.ts +++ b/src/cli/capability-cli.test.ts @@ -34,9 +34,25 @@ const mocks = vi.hoisted(() => ({ ), resolveMemorySearchConfig: vi.fn(() => null), loadModelCatalog: vi.fn(async () => []), - agentCommand: vi.fn(async () => ({ - payloads: [{ text: "local reply" }], - meta: { agentMeta: { provider: "openai", model: "gpt-5.4" } }, + prepareSimpleCompletionModelForAgent: vi.fn(async () => ({ + selection: { + provider: "openai", + modelId: "gpt-5.4", + agentDir: "/tmp/agent", + }, + model: { + provider: "openai", + id: "gpt-5.4", + maxTokens: 128, + }, + auth: { + apiKey: "sk-test", + source: "env:TEST_API_KEY", + mode: "api-key", + }, + })), + completeWithPreparedSimpleCompletionModel: vi.fn(async () => ({ + content: [{ type: "text", text: "local reply" }], })), callGateway: vi.fn(async ({ method }: { method: string }) => { if (method === "tts.status") { @@ -131,11 +147,6 @@ vi.mock("../config/config.js", () => ({ loadConfig: mocks.loadConfig as typeof import("../config/config.js").loadConfig, })); -vi.mock("../agents/agent-command.js", () => ({ - agentCommand: - mocks.agentCommand as unknown as typeof import("../agents/agent-command.js").agentCommand, -})); - vi.mock("../agents/agent-scope.js", () => ({ resolveDefaultAgentId: () => "main", resolveAgentDir: () => "/tmp/agent", @@ -146,6 +157,13 @@ vi.mock("../agents/model-catalog.js", () => ({ mocks.loadModelCatalog as typeof import("../agents/model-catalog.js").loadModelCatalog, })); +vi.mock("../agents/simple-completion-runtime.js", () => ({ + prepareSimpleCompletionModelForAgent: + mocks.prepareSimpleCompletionModelForAgent as unknown as typeof import("../agents/simple-completion-runtime.js").prepareSimpleCompletionModelForAgent, + completeWithPreparedSimpleCompletionModel: + mocks.completeWithPreparedSimpleCompletionModel as unknown as typeof import("../agents/simple-completion-runtime.js").completeWithPreparedSimpleCompletionModel, +})); + vi.mock("../agents/auth-profiles.js", () => ({ loadAuthProfileStoreForRuntime: mocks.loadAuthProfileStoreForRuntime as unknown as typeof import("../agents/auth-profiles.js").loadAuthProfileStoreForRuntime, @@ -291,7 +309,8 @@ describe("capability cli", () => { return store; }); mocks.resolveMemorySearchConfig.mockReset().mockReturnValue(null); - mocks.agentCommand.mockClear(); + mocks.prepareSimpleCompletionModelForAgent.mockClear(); + mocks.completeWithPreparedSimpleCompletionModel.mockClear(); mocks.callGateway.mockClear().mockImplementation((async ({ method }: { method: string }) => { if (method === "tts.status") { return { enabled: true, provider: "openai" }; @@ -362,7 +381,8 @@ describe("capability cli", () => { argv: ["capability", "model", "run", "--prompt", "hello", "--json"], }); - expect(mocks.agentCommand).toHaveBeenCalledTimes(1); + expect(mocks.prepareSimpleCompletionModelForAgent).toHaveBeenCalledTimes(1); + expect(mocks.completeWithPreparedSimpleCompletionModel).toHaveBeenCalledTimes(1); expect(mocks.callGateway).not.toHaveBeenCalled(); expect(mocks.runtime.writeJson).toHaveBeenCalledWith( expect.objectContaining({ @@ -372,20 +392,30 @@ describe("capability cli", () => { ); }); - it("runs local model probes without chat-agent prompt policy or tools", async () => { + it("runs local model probes through the lean completion path", async () => { await runRegisteredCli({ register: registerCapabilityCli as (program: Command) => void, argv: ["capability", "model", "run", "--prompt", "hello", "--json"], }); - expect(mocks.agentCommand).toHaveBeenCalledWith( + expect(mocks.prepareSimpleCompletionModelForAgent).toHaveBeenCalledWith( expect.objectContaining({ - cleanupBundleMcpOnRunEnd: true, - modelRun: true, - promptMode: "none", + agentId: "main", + allowMissingApiKeyModes: ["aws-sdk"], + skipPiDiscovery: true, + }), + ); + expect(mocks.completeWithPreparedSimpleCompletionModel).toHaveBeenCalledWith( + expect.objectContaining({ + context: { + messages: [ + expect.objectContaining({ + role: "user", + content: "hello", + }), + ], + }, }), - expect.anything(), - expect.anything(), ); }); diff --git a/src/cli/capability-cli.ts b/src/cli/capability-cli.ts index 1873f49bfa4..d72fa71c48f 100644 --- a/src/cli/capability-cli.ts +++ b/src/cli/capability-cli.ts @@ -4,7 +4,6 @@ import path from "node:path"; import { Readable } from "node:stream"; import { pipeline } from "node:stream/promises"; import type { Command } from "commander"; -import { agentCommand } from "../agents/agent-command.js"; import { resolveAgentDir, resolveDefaultAgentId } from "../agents/agent-scope.js"; import { listProfilesForProvider, @@ -13,6 +12,10 @@ import { import { updateAuthProfileStoreWithLock } from "../agents/auth-profiles/store.js"; import { resolveMemorySearchConfig } from "../agents/memory-search.js"; import { loadModelCatalog } from "../agents/model-catalog.js"; +import { + completeWithPreparedSimpleCompletionModel, + prepareSimpleCompletionModelForAgent, +} from "../agents/simple-completion-runtime.js"; import { getRuntimeConfig } from "../config/config.js"; import { resolveAgentModelPrimaryValue } from "../config/model-input.js"; import type { OpenClawConfig } from "../config/types.openclaw.js"; @@ -79,7 +82,6 @@ import { runWebSearch, } from "../web-search/runtime.js"; import { runCommandWithRuntime } from "./cli-utils.js"; -import { createDefaultDeps } from "./deps.js"; import { removeCommandByName } from "./program/command-tree.js"; import { collectOption } from "./program/helpers.js"; @@ -576,34 +578,54 @@ async function runModelRun(params: { const cfg = getRuntimeConfig(); const agentId = resolveDefaultAgentId(cfg); if (params.transport === "local") { - const result = await agentCommand( - { - message: params.prompt, - agentId, - model: params.model, - json: false, - modelRun: true, - promptMode: "none", - cleanupBundleMcpOnRunEnd: true, + const prepared = await prepareSimpleCompletionModelForAgent({ + cfg, + agentId, + modelRef: params.model, + allowMissingApiKeyModes: ["aws-sdk"], + skipPiDiscovery: true, + }); + if ("error" in prepared) { + throw new Error(prepared.error); + } + const result = await completeWithPreparedSimpleCompletionModel({ + model: prepared.model, + auth: prepared.auth, + context: { + messages: [ + { + role: "user", + content: params.prompt, + timestamp: Date.now(), + }, + ], }, - { - ...defaultRuntime, - log: () => {}, + options: { + maxTokens: + typeof prepared.model.maxTokens === "number" && Number.isFinite(prepared.model.maxTokens) + ? prepared.model.maxTokens + : undefined, }, - createDefaultDeps(), - ); + }); + const text = result.content + .map((block) => (block.type === "text" ? block.text : "")) + .join("") + .trim(); return { ok: true, capability: "model.run", transport: "local" as const, - provider: result?.meta?.agentMeta?.provider, - model: result?.meta?.agentMeta?.model, + provider: prepared.selection.provider, + model: prepared.selection.modelId, attempts: [], - outputs: (result?.payloads ?? []).map((payload) => ({ - text: payload.text, - mediaUrl: payload.mediaUrl, - mediaUrls: payload.mediaUrls, - })), + outputs: text + ? [ + { + text, + mediaUrl: null, + }, + ] + : [], } satisfies CapabilityEnvelope; }