fix(image): resolve configured image models

This commit is contained in:
zhang-guiping
2026-04-26 06:19:28 +08:00
committed by GitHub
parent 8f78932059
commit 91adb69c57
3 changed files with 156 additions and 2 deletions

View File

@@ -67,6 +67,9 @@ Docs: https://docs.openclaw.ai
- CLI/plugins: keep `message` startup, `channels logs`, `agents delete`, and
`agents set-identity` off broad plugin preloading; message delivery still
loads plugins when the action actually runs.
- Image understanding: resolve configured image models such as local LM Studio
vision entries before reporting `Unknown model` when the discovery registry
has not registered that provider. Fixes #66486. Thanks @zhanggpcsu.
- CLI/agents: keep `agents bind`, `agents unbind`, and `agents bindings` on
setup-safe channel metadata paths so they do not preload bundled plugin
runtimes or stage runtime dependencies. Fixes #71743.

View File

@@ -18,6 +18,8 @@ const hoisted = vi.hoisted(() => ({
discoverModelsMock: vi.fn(),
fetchMock: vi.fn(),
registerProviderStreamForModelMock: vi.fn(),
prepareProviderDynamicModelMock: vi.fn(async () => {}),
resolveModelWithRegistryMock: vi.fn(),
}));
const {
completeMock,
@@ -29,8 +31,16 @@ const {
discoverModelsMock,
fetchMock,
registerProviderStreamForModelMock,
prepareProviderDynamicModelMock,
resolveModelWithRegistryMock,
} = hoisted;
type ResolveModelWithRegistryTestParams = {
modelRegistry: { find: (provider: string, modelId: string) => unknown };
provider: string;
modelId: string;
};
vi.mock("@mariozechner/pi-ai", async () => {
const actual = await vi.importActual<typeof import("@mariozechner/pi-ai")>("@mariozechner/pi-ai");
return {
@@ -63,6 +73,17 @@ vi.mock("../agents/pi-model-discovery-runtime.js", () => ({
discoverModels: discoverModelsMock,
}));
vi.mock("../plugins/provider-runtime.js", async () => ({
...(await vi.importActual<typeof import("../plugins/provider-runtime.js")>(
"../plugins/provider-runtime.js",
)),
prepareProviderDynamicModel: prepareProviderDynamicModelMock,
}));
vi.mock("../agents/pi-embedded-runner/model.js", () => ({
resolveModelWithRegistry: resolveModelWithRegistryMock,
}));
const { describeImageWithModel } = await import("./image.js");
describe("describeImageWithModel", () => {
@@ -93,6 +114,12 @@ describe("describeImageWithModel", () => {
baseUrl: "https://api.minimax.io/anthropic",
})),
});
resolveModelWithRegistryMock.mockImplementation(
// Delegate to modelRegistry.find so tests that override discoverModelsMock
// automatically get the right model through resolveModelWithRegistry.
({ modelRegistry, provider, modelId }: ResolveModelWithRegistryTestParams) =>
modelRegistry.find(provider, modelId),
);
});
it("routes minimax-portal image models through the MiniMax VLM endpoint", async () => {
@@ -188,6 +215,84 @@ describe("describeImageWithModel", () => {
expect(fetchMock).not.toHaveBeenCalled();
});
it("resolves configured image models when discovery has not registered the provider", async () => {
const registryFind = vi.fn(() => null);
discoverModelsMock.mockReturnValue({ find: registryFind });
resolveModelWithRegistryMock.mockImplementationOnce(
({ provider, modelId }: ResolveModelWithRegistryTestParams) => ({
provider,
id: modelId,
api: "anthropic-messages",
input: ["text", "image"],
baseUrl: "http://127.0.0.1:1234",
}),
);
completeMock.mockResolvedValue({
role: "assistant",
api: "anthropic-messages",
provider: "lmstudio",
model: "google/gemma-4-e2b",
stopReason: "stop",
timestamp: Date.now(),
content: [{ type: "text", text: "local vision ok" }],
});
const result = await describeImageWithModel({
cfg: {
models: {
providers: {
lmstudio: {
api: "anthropic-messages",
baseUrl: "http://127.0.0.1:1234",
models: [
{
id: "google/gemma-4-e2b",
name: "google/gemma-4-e2b",
input: ["text", "image"],
reasoning: false,
cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 },
contextWindow: 131_072,
maxTokens: 4096,
},
],
},
},
},
},
agentDir: "/tmp/openclaw-agent",
provider: "lmstudio",
model: "google/gemma-4-e2b",
buffer: Buffer.from("png-bytes"),
fileName: "image.png",
mime: "image/png",
prompt: "Describe the image.",
timeoutMs: 1000,
});
expect(result).toEqual({
text: "local vision ok",
model: "google/gemma-4-e2b",
});
expect(registryFind).not.toHaveBeenCalled();
expect(resolveModelWithRegistryMock).toHaveBeenCalledWith(
expect.objectContaining({
provider: "lmstudio",
modelId: "google/gemma-4-e2b",
cfg: expect.objectContaining({
models: expect.objectContaining({
providers: expect.objectContaining({
lmstudio: expect.objectContaining({
baseUrl: "http://127.0.0.1:1234",
}),
}),
}),
}),
}),
);
expect(prepareProviderDynamicModelMock).not.toHaveBeenCalled();
expect(completeMock).toHaveBeenCalledOnce();
});
it("passes image prompt as system instructions for codex image requests", async () => {
discoverModelsMock.mockReturnValue({
find: vi.fn(() => ({

View File

@@ -6,14 +6,16 @@ import {
requireApiKey,
resolveApiKeyForProvider,
} from "../agents/model-auth.js";
import { normalizeModelRef } from "../agents/model-selection.js";
import { findNormalizedProviderValue, normalizeModelRef } from "../agents/model-selection.js";
import { ensureOpenClawModelsJson } from "../agents/models-config.js";
import { resolveModelWithRegistry } from "../agents/pi-embedded-runner/model.js";
import { resolveProviderRequestCapabilities } from "../agents/provider-attribution.js";
import { registerProviderStreamForModel } from "../agents/provider-stream.js";
import {
coerceImageAssistantText,
hasImageReasoningOnlyResponse,
} from "../agents/tools/image-tool.helpers.js";
import { prepareProviderDynamicModel } from "../plugins/provider-runtime.js";
import type {
ImageDescriptionRequest,
ImageDescriptionResult,
@@ -141,11 +143,55 @@ async function resolveImageRuntime(params: {
const authStorage = discoverAuthStorage(params.agentDir);
const modelRegistry = discoverModels(authStorage, params.agentDir);
const resolvedRef = normalizeModelRef(params.provider, params.model);
const model = modelRegistry.find(resolvedRef.provider, resolvedRef.model) as Model<Api> | null;
const configuredProviders = params.cfg.models?.providers;
const providerConfig =
configuredProviders?.[resolvedRef.provider] ??
findNormalizedProviderValue(configuredProviders, resolvedRef.provider);
// Fast path: resolve without dynamic model preparation first.
// This avoids unnecessary prepare hooks (e.g. OpenRouter catalog fetch)
// for models that are already explicitly resolvable.
let model = resolveModelWithRegistry({
provider: resolvedRef.provider,
modelId: resolvedRef.model,
modelRegistry,
cfg: params.cfg,
agentDir: params.agentDir,
}) as Model<Api> | null;
// If the model is not in the registry yet, prepare dynamic provider models
// and retry (needed for provider-runtime-backed dynamic models).
if (!model) {
await prepareProviderDynamicModel({
provider: resolvedRef.provider,
config: params.cfg,
context: {
config: params.cfg,
agentDir: params.agentDir,
provider: resolvedRef.provider,
modelId: resolvedRef.model,
modelRegistry,
providerConfig,
},
});
model = resolveModelWithRegistry({
provider: resolvedRef.provider,
modelId: resolvedRef.model,
modelRegistry,
cfg: params.cfg,
agentDir: params.agentDir,
}) as Model<Api> | null;
}
if (!model) {
throw new Error(`Unknown model: ${resolvedRef.provider}/${resolvedRef.model}`);
}
if (!model.input?.includes("image")) {
// resolveModelWithRegistry may synthesize a text-only fallback for configured
// providers, which would change "Unknown model" → "Model does not support images"
// and skip the MiniMax VLM recovery path. Throw Unknown model for MiniMax VLM
// models so the caller can attempt the fallback.
if (isMinimaxVlmModel(resolvedRef.provider, resolvedRef.model)) {
throw new Error(`Unknown model: ${resolvedRef.provider}/${resolvedRef.model}`);
}
throw new Error(`Model does not support images: ${params.provider}/${params.model}`);
}
const apiKeyInfo = await getApiKeyForModel({