refactor: centralize realtime voice resolution

This commit is contained in:
Peter Steinberger
2026-04-24 01:42:29 +01:00
parent 57e139100b
commit b164bb3717
8 changed files with 229 additions and 37 deletions

View File

@@ -296,7 +296,7 @@ Current bundled provider examples:
| `plugin-sdk/speech` | Speech helpers | Speech provider types plus provider-facing directive, registry, and validation helpers |
| `plugin-sdk/speech-core` | Shared speech core | Speech provider types, registry, directives, normalization |
| `plugin-sdk/realtime-transcription` | Realtime transcription helpers | Provider types, registry helpers, and shared WebSocket session helper |
| `plugin-sdk/realtime-voice` | Realtime voice helpers | Provider types, registry helpers, and bridge session helpers |
| `plugin-sdk/realtime-voice` | Realtime voice helpers | Provider types, registry/resolution helpers, and bridge session helpers |
| `plugin-sdk/image-generation-core` | Shared image-generation core | Image-generation types, failover, auth, and registry helpers |
| `plugin-sdk/music-generation` | Music-generation helpers | Music-generation provider/request/result types |
| `plugin-sdk/music-generation-core` | Shared music-generation core | Music-generation types, failover helpers, provider lookup, and model-ref parsing |

View File

@@ -1,4 +1,5 @@
export {
getRealtimeVoiceProvider,
listRealtimeVoiceProviders,
resolveConfiguredRealtimeVoiceProvider,
} from "openclaw/plugin-sdk/realtime-voice";

View File

@@ -1,10 +1,6 @@
import type { OpenClawConfig } from "openclaw/plugin-sdk/config-runtime";
import { formatErrorMessage } from "openclaw/plugin-sdk/error-runtime";
import { resolveConfiguredCapabilityProvider } from "openclaw/plugin-sdk/provider-selection-runtime";
import type {
RealtimeVoiceProviderConfig,
RealtimeVoiceProviderPlugin,
} from "openclaw/plugin-sdk/realtime-voice";
import type { ResolvedRealtimeVoiceProvider } from "openclaw/plugin-sdk/realtime-voice";
import type { VoiceCallConfig } from "./config.js";
import { resolveVoiceCallConfig, validateProviderConfig } from "./config.js";
import type { CoreAgentDeps, CoreConfig } from "./core-bridge.js";
@@ -34,10 +30,7 @@ type Logger = {
debug?: (message: string) => void;
};
type ResolvedRealtimeProvider = {
provider: RealtimeVoiceProviderPlugin;
providerConfig: RealtimeVoiceProviderConfig;
};
type ResolvedRealtimeProvider = ResolvedRealtimeVoiceProvider;
type TelnyxProviderModule = typeof import("./providers/telnyx.js");
type TwilioProviderModule = typeof import("./providers/twilio.js");
@@ -197,36 +190,12 @@ async function resolveRealtimeProvider(params: {
config: VoiceCallConfig;
fullConfig: OpenClawConfig;
}): Promise<ResolvedRealtimeProvider> {
const { getRealtimeVoiceProvider, listRealtimeVoiceProviders } = await loadRealtimeVoiceRuntime();
const resolution = resolveConfiguredCapabilityProvider({
const { resolveConfiguredRealtimeVoiceProvider } = await loadRealtimeVoiceRuntime();
return resolveConfiguredRealtimeVoiceProvider({
configuredProviderId: params.config.realtime.provider,
providerConfigs: params.config.realtime.providers,
cfg: params.fullConfig,
cfgForResolve: params.fullConfig,
getConfiguredProvider: (providerId) => getRealtimeVoiceProvider(providerId, params.fullConfig),
listProviders: () => listRealtimeVoiceProviders(params.fullConfig),
resolveProviderConfig: ({ provider, cfg, rawConfig }) =>
provider.resolveConfig?.({ cfg, rawConfig }) ?? rawConfig,
isProviderConfigured: ({ provider, cfg, providerConfig }) =>
provider.isConfigured({ cfg, providerConfig }),
});
if (!resolution.ok && resolution.code === "missing-configured-provider") {
throw new Error(
`Realtime voice provider "${resolution.configuredProviderId}" is not registered`,
);
}
if (!resolution.ok && resolution.code === "no-registered-provider") {
throw new Error("No realtime voice provider registered");
}
if (!resolution.ok) {
throw new Error(`Realtime voice provider "${resolution.provider?.id}" is not configured`);
}
const provider = resolution.provider;
return {
provider,
providerConfig: resolution.providerConfig,
};
}
export async function createVoiceCallRuntime(params: {

View File

@@ -18,9 +18,15 @@ export {
listRealtimeVoiceProviders,
normalizeRealtimeVoiceProviderId,
} from "../realtime-voice/provider-registry.js";
export {
resolveConfiguredRealtimeVoiceProvider,
type ResolvedRealtimeVoiceProvider,
type ResolveConfiguredRealtimeVoiceProviderParams,
} from "../realtime-voice/provider-resolver.js";
export {
createRealtimeVoiceBridgeSession,
type RealtimeVoiceAudioSink,
type RealtimeVoiceBridgeSession,
type RealtimeVoiceBridgeSessionParams,
type RealtimeVoiceMarkStrategy,
} from "../realtime-voice/session-runtime.js";

View File

@@ -0,0 +1,89 @@
import { describe, expect, it } from "vitest";
import type { RealtimeVoiceProviderPlugin } from "../plugins/types.js";
import { resolveConfiguredRealtimeVoiceProvider } from "./provider-resolver.js";
describe("realtime voice provider resolver", () => {
const providers: RealtimeVoiceProviderPlugin[] = [
{
id: "first",
label: "First",
autoSelectOrder: 1,
isConfigured: () => false,
createBridge: () => {
throw new Error("unused");
},
},
{
id: "second",
label: "Second",
autoSelectOrder: 2,
resolveConfig: ({ rawConfig }) => ({ ...rawConfig, resolved: true }),
isConfigured: ({ providerConfig }) => providerConfig.enabled === true,
createBridge: () => {
throw new Error("unused");
},
},
];
it("auto-selects the first configured realtime voice provider", () => {
const resolution = resolveConfiguredRealtimeVoiceProvider({
cfg: {},
providers,
providerConfigs: {
second: { enabled: true },
},
});
expect(resolution).toMatchObject({
provider: providers[1],
providerConfig: {
enabled: true,
resolved: true,
},
});
});
it("applies a default model before provider config resolution", () => {
const resolution = resolveConfiguredRealtimeVoiceProvider({
cfg: {},
configuredProviderId: "second",
defaultModel: "gpt-realtime",
providers,
providerConfigs: {
second: { enabled: true },
},
});
expect(resolution.providerConfig).toMatchObject({
enabled: true,
model: "gpt-realtime",
resolved: true,
});
});
it("keeps explicit provider model over the default model", () => {
const resolution = resolveConfiguredRealtimeVoiceProvider({
cfg: {},
configuredProviderId: "second",
defaultModel: "gpt-realtime",
providers,
providerConfigs: {
second: { enabled: true, model: "custom-realtime" },
},
});
expect(resolution.providerConfig).toMatchObject({
model: "custom-realtime",
});
});
it("throws a caller-specified message when no providers exist", () => {
expect(() =>
resolveConfiguredRealtimeVoiceProvider({
cfg: {},
providers: [],
noRegisteredProviderMessage: "No configured realtime voice provider registered",
}),
).toThrow("No configured realtime voice provider registered");
});
});

View File

@@ -0,0 +1,63 @@
import type { OpenClawConfig } from "../config/types.openclaw.js";
import { resolveConfiguredCapabilityProvider } from "../plugin-sdk/provider-selection-runtime.js";
import type { RealtimeVoiceProviderPlugin } from "../plugins/types.js";
import { getRealtimeVoiceProvider, listRealtimeVoiceProviders } from "./provider-registry.js";
import type { RealtimeVoiceProviderConfig } from "./provider-types.js";
export type ResolvedRealtimeVoiceProvider = {
provider: RealtimeVoiceProviderPlugin;
providerConfig: RealtimeVoiceProviderConfig;
};
export type ResolveConfiguredRealtimeVoiceProviderParams = {
configuredProviderId?: string;
providerConfigs?: Record<string, Record<string, unknown> | undefined>;
cfg?: OpenClawConfig;
cfgForResolve?: OpenClawConfig;
providers?: RealtimeVoiceProviderPlugin[];
defaultModel?: string;
noRegisteredProviderMessage?: string;
};
export function resolveConfiguredRealtimeVoiceProvider(
params: ResolveConfiguredRealtimeVoiceProviderParams,
): ResolvedRealtimeVoiceProvider {
const cfgForResolve = params.cfgForResolve ?? params.cfg ?? ({} as OpenClawConfig);
const providers = params.providers ?? listRealtimeVoiceProviders(params.cfg);
const resolution = resolveConfiguredCapabilityProvider({
configuredProviderId: params.configuredProviderId,
providerConfigs: params.providerConfigs,
cfg: params.cfg,
cfgForResolve,
getConfiguredProvider: (providerId) =>
params.providers?.find((entry) => entry.id === providerId) ??
getRealtimeVoiceProvider(providerId, params.cfg),
listProviders: () => providers,
resolveProviderConfig: ({ provider, cfg, rawConfig }) => {
const rawConfigWithModel =
params.defaultModel && rawConfig.model === undefined
? { ...rawConfig, model: params.defaultModel }
: rawConfig;
return provider.resolveConfig?.({ cfg, rawConfig: rawConfigWithModel }) ?? rawConfigWithModel;
},
isProviderConfigured: ({ provider, cfg, providerConfig }) =>
provider.isConfigured({ cfg, providerConfig }),
});
if (!resolution.ok && resolution.code === "missing-configured-provider") {
throw new Error(
`Realtime voice provider "${resolution.configuredProviderId}" is not registered`,
);
}
if (!resolution.ok && resolution.code === "no-registered-provider") {
throw new Error(params.noRegisteredProviderMessage ?? "No realtime voice provider registered");
}
if (!resolution.ok) {
throw new Error(`Realtime voice provider "${resolution.provider?.id}" is not configured`);
}
return {
provider: resolution.provider,
providerConfig: resolution.providerConfig,
};
}

View File

@@ -54,6 +54,60 @@ describe("realtime voice bridge session runtime", () => {
expect(sendMark).toHaveBeenCalledWith("mark-1");
});
it("can acknowledge provider marks without transport mark support", () => {
let callbacks: Parameters<RealtimeVoiceProviderPlugin["createBridge"]>[0] | undefined;
const bridge = makeBridge();
const provider: RealtimeVoiceProviderPlugin = {
id: "test",
label: "Test",
isConfigured: () => true,
createBridge: (request) => {
callbacks = request;
return bridge;
},
};
const sendMark = vi.fn();
createRealtimeVoiceBridgeSession({
provider,
providerConfig: {},
audioSink: { sendAudio: vi.fn(), sendMark },
markStrategy: "ack-immediately",
});
callbacks?.onMark?.("mark-1");
expect(sendMark).not.toHaveBeenCalled();
expect(bridge.acknowledgeMark).toHaveBeenCalled();
});
it("can ignore provider marks", () => {
let callbacks: Parameters<RealtimeVoiceProviderPlugin["createBridge"]>[0] | undefined;
const bridge = makeBridge();
const provider: RealtimeVoiceProviderPlugin = {
id: "test",
label: "Test",
isConfigured: () => true,
createBridge: (request) => {
callbacks = request;
return bridge;
},
};
const sendMark = vi.fn();
createRealtimeVoiceBridgeSession({
provider,
providerConfig: {},
audioSink: { sendAudio: vi.fn(), sendMark },
markStrategy: "ignore",
});
callbacks?.onMark?.("mark-1");
expect(sendMark).not.toHaveBeenCalled();
expect(bridge.acknowledgeMark).not.toHaveBeenCalled();
});
it("passes tool calls the active session and triggers initial greeting on ready", () => {
let callbacks: Parameters<RealtimeVoiceProviderPlugin["createBridge"]>[0] | undefined;
const bridge = makeBridge();

View File

@@ -15,6 +15,8 @@ export type RealtimeVoiceAudioSink = {
sendMark?: (markName: string) => void;
};
export type RealtimeVoiceMarkStrategy = "transport" | "ack-immediately" | "ignore";
export type RealtimeVoiceBridgeSession = {
bridge: RealtimeVoiceBridge;
acknowledgeMark(): void;
@@ -33,6 +35,7 @@ export type RealtimeVoiceBridgeSessionParams = {
audioSink: RealtimeVoiceAudioSink;
instructions?: string;
initialGreetingInstructions?: string;
markStrategy?: RealtimeVoiceMarkStrategy;
triggerGreetingOnReady?: boolean;
tools?: RealtimeVoiceTool[];
onTranscript?: (role: RealtimeVoiceRole, text: string, isFinal: boolean) => void;
@@ -81,7 +84,14 @@ export function createRealtimeVoiceBridgeSession(
}
},
onMark: (markName) => {
if (canSendAudio()) {
if (!canSendAudio() || params.markStrategy === "ignore") {
return;
}
if (params.markStrategy === "ack-immediately") {
bridge?.acknowledgeMark();
return;
}
if (params.markStrategy === undefined || params.markStrategy === "transport") {
params.audioSink.sendMark?.(markName);
}
},