diff --git a/docs/plugins/sdk-migration.md b/docs/plugins/sdk-migration.md index de9c4ef912c..f6d18fc4475 100644 --- a/docs/plugins/sdk-migration.md +++ b/docs/plugins/sdk-migration.md @@ -296,7 +296,7 @@ Current bundled provider examples: | `plugin-sdk/speech` | Speech helpers | Speech provider types plus provider-facing directive, registry, and validation helpers | | `plugin-sdk/speech-core` | Shared speech core | Speech provider types, registry, directives, normalization | | `plugin-sdk/realtime-transcription` | Realtime transcription helpers | Provider types, registry helpers, and shared WebSocket session helper | - | `plugin-sdk/realtime-voice` | Realtime voice helpers | Provider types, registry helpers, and bridge session helpers | + | `plugin-sdk/realtime-voice` | Realtime voice helpers | Provider types, registry/resolution helpers, and bridge session helpers | | `plugin-sdk/image-generation-core` | Shared image-generation core | Image-generation types, failover, auth, and registry helpers | | `plugin-sdk/music-generation` | Music-generation helpers | Music-generation provider/request/result types | | `plugin-sdk/music-generation-core` | Shared music-generation core | Music-generation types, failover helpers, provider lookup, and model-ref parsing | diff --git a/extensions/voice-call/src/realtime-voice.runtime.ts b/extensions/voice-call/src/realtime-voice.runtime.ts index 6f169676adf..99ddb7ae7e1 100644 --- a/extensions/voice-call/src/realtime-voice.runtime.ts +++ b/extensions/voice-call/src/realtime-voice.runtime.ts @@ -1,4 +1,5 @@ export { getRealtimeVoiceProvider, listRealtimeVoiceProviders, + resolveConfiguredRealtimeVoiceProvider, } from "openclaw/plugin-sdk/realtime-voice"; diff --git a/extensions/voice-call/src/runtime.ts b/extensions/voice-call/src/runtime.ts index d9fdbea8d0c..2df69c4e75f 100644 --- a/extensions/voice-call/src/runtime.ts +++ b/extensions/voice-call/src/runtime.ts @@ -1,10 +1,6 @@ import type { OpenClawConfig } from "openclaw/plugin-sdk/config-runtime"; import { formatErrorMessage } from "openclaw/plugin-sdk/error-runtime"; -import { resolveConfiguredCapabilityProvider } from "openclaw/plugin-sdk/provider-selection-runtime"; -import type { - RealtimeVoiceProviderConfig, - RealtimeVoiceProviderPlugin, -} from "openclaw/plugin-sdk/realtime-voice"; +import type { ResolvedRealtimeVoiceProvider } from "openclaw/plugin-sdk/realtime-voice"; import type { VoiceCallConfig } from "./config.js"; import { resolveVoiceCallConfig, validateProviderConfig } from "./config.js"; import type { CoreAgentDeps, CoreConfig } from "./core-bridge.js"; @@ -34,10 +30,7 @@ type Logger = { debug?: (message: string) => void; }; -type ResolvedRealtimeProvider = { - provider: RealtimeVoiceProviderPlugin; - providerConfig: RealtimeVoiceProviderConfig; -}; +type ResolvedRealtimeProvider = ResolvedRealtimeVoiceProvider; type TelnyxProviderModule = typeof import("./providers/telnyx.js"); type TwilioProviderModule = typeof import("./providers/twilio.js"); @@ -197,36 +190,12 @@ async function resolveRealtimeProvider(params: { config: VoiceCallConfig; fullConfig: OpenClawConfig; }): Promise { - const { getRealtimeVoiceProvider, listRealtimeVoiceProviders } = await loadRealtimeVoiceRuntime(); - const resolution = resolveConfiguredCapabilityProvider({ + const { resolveConfiguredRealtimeVoiceProvider } = await loadRealtimeVoiceRuntime(); + return resolveConfiguredRealtimeVoiceProvider({ configuredProviderId: params.config.realtime.provider, providerConfigs: params.config.realtime.providers, cfg: params.fullConfig, - cfgForResolve: params.fullConfig, - getConfiguredProvider: (providerId) => getRealtimeVoiceProvider(providerId, params.fullConfig), - listProviders: () => listRealtimeVoiceProviders(params.fullConfig), - resolveProviderConfig: ({ provider, cfg, rawConfig }) => - provider.resolveConfig?.({ cfg, rawConfig }) ?? rawConfig, - isProviderConfigured: ({ provider, cfg, providerConfig }) => - provider.isConfigured({ cfg, providerConfig }), }); - if (!resolution.ok && resolution.code === "missing-configured-provider") { - throw new Error( - `Realtime voice provider "${resolution.configuredProviderId}" is not registered`, - ); - } - if (!resolution.ok && resolution.code === "no-registered-provider") { - throw new Error("No realtime voice provider registered"); - } - if (!resolution.ok) { - throw new Error(`Realtime voice provider "${resolution.provider?.id}" is not configured`); - } - - const provider = resolution.provider; - return { - provider, - providerConfig: resolution.providerConfig, - }; } export async function createVoiceCallRuntime(params: { diff --git a/src/plugin-sdk/realtime-voice.ts b/src/plugin-sdk/realtime-voice.ts index 720b48d4e5a..4dcfb7c9c76 100644 --- a/src/plugin-sdk/realtime-voice.ts +++ b/src/plugin-sdk/realtime-voice.ts @@ -18,9 +18,15 @@ export { listRealtimeVoiceProviders, normalizeRealtimeVoiceProviderId, } from "../realtime-voice/provider-registry.js"; +export { + resolveConfiguredRealtimeVoiceProvider, + type ResolvedRealtimeVoiceProvider, + type ResolveConfiguredRealtimeVoiceProviderParams, +} from "../realtime-voice/provider-resolver.js"; export { createRealtimeVoiceBridgeSession, type RealtimeVoiceAudioSink, type RealtimeVoiceBridgeSession, type RealtimeVoiceBridgeSessionParams, + type RealtimeVoiceMarkStrategy, } from "../realtime-voice/session-runtime.js"; diff --git a/src/realtime-voice/provider-resolver.test.ts b/src/realtime-voice/provider-resolver.test.ts new file mode 100644 index 00000000000..5ce495229d7 --- /dev/null +++ b/src/realtime-voice/provider-resolver.test.ts @@ -0,0 +1,89 @@ +import { describe, expect, it } from "vitest"; +import type { RealtimeVoiceProviderPlugin } from "../plugins/types.js"; +import { resolveConfiguredRealtimeVoiceProvider } from "./provider-resolver.js"; + +describe("realtime voice provider resolver", () => { + const providers: RealtimeVoiceProviderPlugin[] = [ + { + id: "first", + label: "First", + autoSelectOrder: 1, + isConfigured: () => false, + createBridge: () => { + throw new Error("unused"); + }, + }, + { + id: "second", + label: "Second", + autoSelectOrder: 2, + resolveConfig: ({ rawConfig }) => ({ ...rawConfig, resolved: true }), + isConfigured: ({ providerConfig }) => providerConfig.enabled === true, + createBridge: () => { + throw new Error("unused"); + }, + }, + ]; + + it("auto-selects the first configured realtime voice provider", () => { + const resolution = resolveConfiguredRealtimeVoiceProvider({ + cfg: {}, + providers, + providerConfigs: { + second: { enabled: true }, + }, + }); + + expect(resolution).toMatchObject({ + provider: providers[1], + providerConfig: { + enabled: true, + resolved: true, + }, + }); + }); + + it("applies a default model before provider config resolution", () => { + const resolution = resolveConfiguredRealtimeVoiceProvider({ + cfg: {}, + configuredProviderId: "second", + defaultModel: "gpt-realtime", + providers, + providerConfigs: { + second: { enabled: true }, + }, + }); + + expect(resolution.providerConfig).toMatchObject({ + enabled: true, + model: "gpt-realtime", + resolved: true, + }); + }); + + it("keeps explicit provider model over the default model", () => { + const resolution = resolveConfiguredRealtimeVoiceProvider({ + cfg: {}, + configuredProviderId: "second", + defaultModel: "gpt-realtime", + providers, + providerConfigs: { + second: { enabled: true, model: "custom-realtime" }, + }, + }); + + expect(resolution.providerConfig).toMatchObject({ + model: "custom-realtime", + }); + }); + + it("throws a caller-specified message when no providers exist", () => { + expect(() => + resolveConfiguredRealtimeVoiceProvider({ + cfg: {}, + providers: [], + noRegisteredProviderMessage: "No configured realtime voice provider registered", + }), + ).toThrow("No configured realtime voice provider registered"); + }); +}); diff --git a/src/realtime-voice/provider-resolver.ts b/src/realtime-voice/provider-resolver.ts new file mode 100644 index 00000000000..e749568b364 --- /dev/null +++ b/src/realtime-voice/provider-resolver.ts @@ -0,0 +1,63 @@ +import type { OpenClawConfig } from "../config/types.openclaw.js"; +import { resolveConfiguredCapabilityProvider } from "../plugin-sdk/provider-selection-runtime.js"; +import type { RealtimeVoiceProviderPlugin } from "../plugins/types.js"; +import { getRealtimeVoiceProvider, listRealtimeVoiceProviders } from "./provider-registry.js"; +import type { RealtimeVoiceProviderConfig } from "./provider-types.js"; + +export type ResolvedRealtimeVoiceProvider = { + provider: RealtimeVoiceProviderPlugin; + providerConfig: RealtimeVoiceProviderConfig; +}; + +export type ResolveConfiguredRealtimeVoiceProviderParams = { + configuredProviderId?: string; + providerConfigs?: Record | undefined>; + cfg?: OpenClawConfig; + cfgForResolve?: OpenClawConfig; + providers?: RealtimeVoiceProviderPlugin[]; + defaultModel?: string; + noRegisteredProviderMessage?: string; +}; + +export function resolveConfiguredRealtimeVoiceProvider( + params: ResolveConfiguredRealtimeVoiceProviderParams, +): ResolvedRealtimeVoiceProvider { + const cfgForResolve = params.cfgForResolve ?? params.cfg ?? ({} as OpenClawConfig); + const providers = params.providers ?? listRealtimeVoiceProviders(params.cfg); + const resolution = resolveConfiguredCapabilityProvider({ + configuredProviderId: params.configuredProviderId, + providerConfigs: params.providerConfigs, + cfg: params.cfg, + cfgForResolve, + getConfiguredProvider: (providerId) => + params.providers?.find((entry) => entry.id === providerId) ?? + getRealtimeVoiceProvider(providerId, params.cfg), + listProviders: () => providers, + resolveProviderConfig: ({ provider, cfg, rawConfig }) => { + const rawConfigWithModel = + params.defaultModel && rawConfig.model === undefined + ? { ...rawConfig, model: params.defaultModel } + : rawConfig; + return provider.resolveConfig?.({ cfg, rawConfig: rawConfigWithModel }) ?? rawConfigWithModel; + }, + isProviderConfigured: ({ provider, cfg, providerConfig }) => + provider.isConfigured({ cfg, providerConfig }), + }); + + if (!resolution.ok && resolution.code === "missing-configured-provider") { + throw new Error( + `Realtime voice provider "${resolution.configuredProviderId}" is not registered`, + ); + } + if (!resolution.ok && resolution.code === "no-registered-provider") { + throw new Error(params.noRegisteredProviderMessage ?? "No realtime voice provider registered"); + } + if (!resolution.ok) { + throw new Error(`Realtime voice provider "${resolution.provider?.id}" is not configured`); + } + + return { + provider: resolution.provider, + providerConfig: resolution.providerConfig, + }; +} diff --git a/src/realtime-voice/session-runtime.test.ts b/src/realtime-voice/session-runtime.test.ts index 6afd1378f9e..65b79c6764d 100644 --- a/src/realtime-voice/session-runtime.test.ts +++ b/src/realtime-voice/session-runtime.test.ts @@ -54,6 +54,60 @@ describe("realtime voice bridge session runtime", () => { expect(sendMark).toHaveBeenCalledWith("mark-1"); }); + it("can acknowledge provider marks without transport mark support", () => { + let callbacks: Parameters[0] | undefined; + const bridge = makeBridge(); + const provider: RealtimeVoiceProviderPlugin = { + id: "test", + label: "Test", + isConfigured: () => true, + createBridge: (request) => { + callbacks = request; + return bridge; + }, + }; + const sendMark = vi.fn(); + + createRealtimeVoiceBridgeSession({ + provider, + providerConfig: {}, + audioSink: { sendAudio: vi.fn(), sendMark }, + markStrategy: "ack-immediately", + }); + + callbacks?.onMark?.("mark-1"); + + expect(sendMark).not.toHaveBeenCalled(); + expect(bridge.acknowledgeMark).toHaveBeenCalled(); + }); + + it("can ignore provider marks", () => { + let callbacks: Parameters[0] | undefined; + const bridge = makeBridge(); + const provider: RealtimeVoiceProviderPlugin = { + id: "test", + label: "Test", + isConfigured: () => true, + createBridge: (request) => { + callbacks = request; + return bridge; + }, + }; + const sendMark = vi.fn(); + + createRealtimeVoiceBridgeSession({ + provider, + providerConfig: {}, + audioSink: { sendAudio: vi.fn(), sendMark }, + markStrategy: "ignore", + }); + + callbacks?.onMark?.("mark-1"); + + expect(sendMark).not.toHaveBeenCalled(); + expect(bridge.acknowledgeMark).not.toHaveBeenCalled(); + }); + it("passes tool calls the active session and triggers initial greeting on ready", () => { let callbacks: Parameters[0] | undefined; const bridge = makeBridge(); diff --git a/src/realtime-voice/session-runtime.ts b/src/realtime-voice/session-runtime.ts index b79feedcfdd..3887e799876 100644 --- a/src/realtime-voice/session-runtime.ts +++ b/src/realtime-voice/session-runtime.ts @@ -15,6 +15,8 @@ export type RealtimeVoiceAudioSink = { sendMark?: (markName: string) => void; }; +export type RealtimeVoiceMarkStrategy = "transport" | "ack-immediately" | "ignore"; + export type RealtimeVoiceBridgeSession = { bridge: RealtimeVoiceBridge; acknowledgeMark(): void; @@ -33,6 +35,7 @@ export type RealtimeVoiceBridgeSessionParams = { audioSink: RealtimeVoiceAudioSink; instructions?: string; initialGreetingInstructions?: string; + markStrategy?: RealtimeVoiceMarkStrategy; triggerGreetingOnReady?: boolean; tools?: RealtimeVoiceTool[]; onTranscript?: (role: RealtimeVoiceRole, text: string, isFinal: boolean) => void; @@ -81,7 +84,14 @@ export function createRealtimeVoiceBridgeSession( } }, onMark: (markName) => { - if (canSendAudio()) { + if (!canSendAudio() || params.markStrategy === "ignore") { + return; + } + if (params.markStrategy === "ack-immediately") { + bridge?.acknowledgeMark(); + return; + } + if (params.markStrategy === undefined || params.markStrategy === "transport") { params.audioSink.sendMark?.(markName); } },