mirror of
https://fastgit.cc/github.com/openclaw/openclaw
synced 2026-05-01 06:36:23 +08:00
fix(image): resolve configured image models
This commit is contained in:
@@ -67,6 +67,9 @@ Docs: https://docs.openclaw.ai
|
||||
- CLI/plugins: keep `message` startup, `channels logs`, `agents delete`, and
|
||||
`agents set-identity` off broad plugin preloading; message delivery still
|
||||
loads plugins when the action actually runs.
|
||||
- Image understanding: resolve configured image models such as local LM Studio
|
||||
vision entries before reporting `Unknown model` when the discovery registry
|
||||
has not registered that provider. Fixes #66486. Thanks @zhanggpcsu.
|
||||
- CLI/agents: keep `agents bind`, `agents unbind`, and `agents bindings` on
|
||||
setup-safe channel metadata paths so they do not preload bundled plugin
|
||||
runtimes or stage runtime dependencies. Fixes #71743.
|
||||
|
||||
@@ -18,6 +18,8 @@ const hoisted = vi.hoisted(() => ({
|
||||
discoverModelsMock: vi.fn(),
|
||||
fetchMock: vi.fn(),
|
||||
registerProviderStreamForModelMock: vi.fn(),
|
||||
prepareProviderDynamicModelMock: vi.fn(async () => {}),
|
||||
resolveModelWithRegistryMock: vi.fn(),
|
||||
}));
|
||||
const {
|
||||
completeMock,
|
||||
@@ -29,8 +31,16 @@ const {
|
||||
discoverModelsMock,
|
||||
fetchMock,
|
||||
registerProviderStreamForModelMock,
|
||||
prepareProviderDynamicModelMock,
|
||||
resolveModelWithRegistryMock,
|
||||
} = hoisted;
|
||||
|
||||
type ResolveModelWithRegistryTestParams = {
|
||||
modelRegistry: { find: (provider: string, modelId: string) => unknown };
|
||||
provider: string;
|
||||
modelId: string;
|
||||
};
|
||||
|
||||
vi.mock("@mariozechner/pi-ai", async () => {
|
||||
const actual = await vi.importActual<typeof import("@mariozechner/pi-ai")>("@mariozechner/pi-ai");
|
||||
return {
|
||||
@@ -63,6 +73,17 @@ vi.mock("../agents/pi-model-discovery-runtime.js", () => ({
|
||||
discoverModels: discoverModelsMock,
|
||||
}));
|
||||
|
||||
vi.mock("../plugins/provider-runtime.js", async () => ({
|
||||
...(await vi.importActual<typeof import("../plugins/provider-runtime.js")>(
|
||||
"../plugins/provider-runtime.js",
|
||||
)),
|
||||
prepareProviderDynamicModel: prepareProviderDynamicModelMock,
|
||||
}));
|
||||
|
||||
vi.mock("../agents/pi-embedded-runner/model.js", () => ({
|
||||
resolveModelWithRegistry: resolveModelWithRegistryMock,
|
||||
}));
|
||||
|
||||
const { describeImageWithModel } = await import("./image.js");
|
||||
|
||||
describe("describeImageWithModel", () => {
|
||||
@@ -93,6 +114,12 @@ describe("describeImageWithModel", () => {
|
||||
baseUrl: "https://api.minimax.io/anthropic",
|
||||
})),
|
||||
});
|
||||
resolveModelWithRegistryMock.mockImplementation(
|
||||
// Delegate to modelRegistry.find so tests that override discoverModelsMock
|
||||
// automatically get the right model through resolveModelWithRegistry.
|
||||
({ modelRegistry, provider, modelId }: ResolveModelWithRegistryTestParams) =>
|
||||
modelRegistry.find(provider, modelId),
|
||||
);
|
||||
});
|
||||
|
||||
it("routes minimax-portal image models through the MiniMax VLM endpoint", async () => {
|
||||
@@ -188,6 +215,84 @@ describe("describeImageWithModel", () => {
|
||||
expect(fetchMock).not.toHaveBeenCalled();
|
||||
});
|
||||
|
||||
it("resolves configured image models when discovery has not registered the provider", async () => {
|
||||
const registryFind = vi.fn(() => null);
|
||||
discoverModelsMock.mockReturnValue({ find: registryFind });
|
||||
resolveModelWithRegistryMock.mockImplementationOnce(
|
||||
({ provider, modelId }: ResolveModelWithRegistryTestParams) => ({
|
||||
provider,
|
||||
id: modelId,
|
||||
api: "anthropic-messages",
|
||||
input: ["text", "image"],
|
||||
baseUrl: "http://127.0.0.1:1234",
|
||||
}),
|
||||
);
|
||||
completeMock.mockResolvedValue({
|
||||
role: "assistant",
|
||||
api: "anthropic-messages",
|
||||
provider: "lmstudio",
|
||||
model: "google/gemma-4-e2b",
|
||||
stopReason: "stop",
|
||||
timestamp: Date.now(),
|
||||
content: [{ type: "text", text: "local vision ok" }],
|
||||
});
|
||||
|
||||
const result = await describeImageWithModel({
|
||||
cfg: {
|
||||
models: {
|
||||
providers: {
|
||||
lmstudio: {
|
||||
api: "anthropic-messages",
|
||||
baseUrl: "http://127.0.0.1:1234",
|
||||
models: [
|
||||
{
|
||||
id: "google/gemma-4-e2b",
|
||||
name: "google/gemma-4-e2b",
|
||||
input: ["text", "image"],
|
||||
reasoning: false,
|
||||
cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 },
|
||||
contextWindow: 131_072,
|
||||
maxTokens: 4096,
|
||||
},
|
||||
],
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
agentDir: "/tmp/openclaw-agent",
|
||||
provider: "lmstudio",
|
||||
model: "google/gemma-4-e2b",
|
||||
buffer: Buffer.from("png-bytes"),
|
||||
fileName: "image.png",
|
||||
mime: "image/png",
|
||||
prompt: "Describe the image.",
|
||||
timeoutMs: 1000,
|
||||
});
|
||||
|
||||
expect(result).toEqual({
|
||||
text: "local vision ok",
|
||||
model: "google/gemma-4-e2b",
|
||||
});
|
||||
expect(registryFind).not.toHaveBeenCalled();
|
||||
expect(resolveModelWithRegistryMock).toHaveBeenCalledWith(
|
||||
expect.objectContaining({
|
||||
provider: "lmstudio",
|
||||
modelId: "google/gemma-4-e2b",
|
||||
cfg: expect.objectContaining({
|
||||
models: expect.objectContaining({
|
||||
providers: expect.objectContaining({
|
||||
lmstudio: expect.objectContaining({
|
||||
baseUrl: "http://127.0.0.1:1234",
|
||||
}),
|
||||
}),
|
||||
}),
|
||||
}),
|
||||
}),
|
||||
);
|
||||
expect(prepareProviderDynamicModelMock).not.toHaveBeenCalled();
|
||||
expect(completeMock).toHaveBeenCalledOnce();
|
||||
});
|
||||
|
||||
it("passes image prompt as system instructions for codex image requests", async () => {
|
||||
discoverModelsMock.mockReturnValue({
|
||||
find: vi.fn(() => ({
|
||||
|
||||
@@ -6,14 +6,16 @@ import {
|
||||
requireApiKey,
|
||||
resolveApiKeyForProvider,
|
||||
} from "../agents/model-auth.js";
|
||||
import { normalizeModelRef } from "../agents/model-selection.js";
|
||||
import { findNormalizedProviderValue, normalizeModelRef } from "../agents/model-selection.js";
|
||||
import { ensureOpenClawModelsJson } from "../agents/models-config.js";
|
||||
import { resolveModelWithRegistry } from "../agents/pi-embedded-runner/model.js";
|
||||
import { resolveProviderRequestCapabilities } from "../agents/provider-attribution.js";
|
||||
import { registerProviderStreamForModel } from "../agents/provider-stream.js";
|
||||
import {
|
||||
coerceImageAssistantText,
|
||||
hasImageReasoningOnlyResponse,
|
||||
} from "../agents/tools/image-tool.helpers.js";
|
||||
import { prepareProviderDynamicModel } from "../plugins/provider-runtime.js";
|
||||
import type {
|
||||
ImageDescriptionRequest,
|
||||
ImageDescriptionResult,
|
||||
@@ -141,11 +143,55 @@ async function resolveImageRuntime(params: {
|
||||
const authStorage = discoverAuthStorage(params.agentDir);
|
||||
const modelRegistry = discoverModels(authStorage, params.agentDir);
|
||||
const resolvedRef = normalizeModelRef(params.provider, params.model);
|
||||
const model = modelRegistry.find(resolvedRef.provider, resolvedRef.model) as Model<Api> | null;
|
||||
const configuredProviders = params.cfg.models?.providers;
|
||||
const providerConfig =
|
||||
configuredProviders?.[resolvedRef.provider] ??
|
||||
findNormalizedProviderValue(configuredProviders, resolvedRef.provider);
|
||||
// Fast path: resolve without dynamic model preparation first.
|
||||
// This avoids unnecessary prepare hooks (e.g. OpenRouter catalog fetch)
|
||||
// for models that are already explicitly resolvable.
|
||||
let model = resolveModelWithRegistry({
|
||||
provider: resolvedRef.provider,
|
||||
modelId: resolvedRef.model,
|
||||
modelRegistry,
|
||||
cfg: params.cfg,
|
||||
agentDir: params.agentDir,
|
||||
}) as Model<Api> | null;
|
||||
|
||||
// If the model is not in the registry yet, prepare dynamic provider models
|
||||
// and retry (needed for provider-runtime-backed dynamic models).
|
||||
if (!model) {
|
||||
await prepareProviderDynamicModel({
|
||||
provider: resolvedRef.provider,
|
||||
config: params.cfg,
|
||||
context: {
|
||||
config: params.cfg,
|
||||
agentDir: params.agentDir,
|
||||
provider: resolvedRef.provider,
|
||||
modelId: resolvedRef.model,
|
||||
modelRegistry,
|
||||
providerConfig,
|
||||
},
|
||||
});
|
||||
model = resolveModelWithRegistry({
|
||||
provider: resolvedRef.provider,
|
||||
modelId: resolvedRef.model,
|
||||
modelRegistry,
|
||||
cfg: params.cfg,
|
||||
agentDir: params.agentDir,
|
||||
}) as Model<Api> | null;
|
||||
}
|
||||
if (!model) {
|
||||
throw new Error(`Unknown model: ${resolvedRef.provider}/${resolvedRef.model}`);
|
||||
}
|
||||
if (!model.input?.includes("image")) {
|
||||
// resolveModelWithRegistry may synthesize a text-only fallback for configured
|
||||
// providers, which would change "Unknown model" → "Model does not support images"
|
||||
// and skip the MiniMax VLM recovery path. Throw Unknown model for MiniMax VLM
|
||||
// models so the caller can attempt the fallback.
|
||||
if (isMinimaxVlmModel(resolvedRef.provider, resolvedRef.model)) {
|
||||
throw new Error(`Unknown model: ${resolvedRef.provider}/${resolvedRef.model}`);
|
||||
}
|
||||
throw new Error(`Model does not support images: ${params.provider}/${params.model}`);
|
||||
}
|
||||
const apiKeyInfo = await getApiKeyForModel({
|
||||
|
||||
Reference in New Issue
Block a user