mirror of
https://fastgit.cc/github.com/openclaw/openclaw
synced 2026-04-30 22:12:32 +08:00
refactor: centralize realtime voice resolution
This commit is contained in:
@@ -296,7 +296,7 @@ Current bundled provider examples:
|
||||
| `plugin-sdk/speech` | Speech helpers | Speech provider types plus provider-facing directive, registry, and validation helpers |
|
||||
| `plugin-sdk/speech-core` | Shared speech core | Speech provider types, registry, directives, normalization |
|
||||
| `plugin-sdk/realtime-transcription` | Realtime transcription helpers | Provider types, registry helpers, and shared WebSocket session helper |
|
||||
| `plugin-sdk/realtime-voice` | Realtime voice helpers | Provider types, registry helpers, and bridge session helpers |
|
||||
| `plugin-sdk/realtime-voice` | Realtime voice helpers | Provider types, registry/resolution helpers, and bridge session helpers |
|
||||
| `plugin-sdk/image-generation-core` | Shared image-generation core | Image-generation types, failover, auth, and registry helpers |
|
||||
| `plugin-sdk/music-generation` | Music-generation helpers | Music-generation provider/request/result types |
|
||||
| `plugin-sdk/music-generation-core` | Shared music-generation core | Music-generation types, failover helpers, provider lookup, and model-ref parsing |
|
||||
|
||||
@@ -1,4 +1,5 @@
|
||||
export {
|
||||
getRealtimeVoiceProvider,
|
||||
listRealtimeVoiceProviders,
|
||||
resolveConfiguredRealtimeVoiceProvider,
|
||||
} from "openclaw/plugin-sdk/realtime-voice";
|
||||
|
||||
@@ -1,10 +1,6 @@
|
||||
import type { OpenClawConfig } from "openclaw/plugin-sdk/config-runtime";
|
||||
import { formatErrorMessage } from "openclaw/plugin-sdk/error-runtime";
|
||||
import { resolveConfiguredCapabilityProvider } from "openclaw/plugin-sdk/provider-selection-runtime";
|
||||
import type {
|
||||
RealtimeVoiceProviderConfig,
|
||||
RealtimeVoiceProviderPlugin,
|
||||
} from "openclaw/plugin-sdk/realtime-voice";
|
||||
import type { ResolvedRealtimeVoiceProvider } from "openclaw/plugin-sdk/realtime-voice";
|
||||
import type { VoiceCallConfig } from "./config.js";
|
||||
import { resolveVoiceCallConfig, validateProviderConfig } from "./config.js";
|
||||
import type { CoreAgentDeps, CoreConfig } from "./core-bridge.js";
|
||||
@@ -34,10 +30,7 @@ type Logger = {
|
||||
debug?: (message: string) => void;
|
||||
};
|
||||
|
||||
type ResolvedRealtimeProvider = {
|
||||
provider: RealtimeVoiceProviderPlugin;
|
||||
providerConfig: RealtimeVoiceProviderConfig;
|
||||
};
|
||||
type ResolvedRealtimeProvider = ResolvedRealtimeVoiceProvider;
|
||||
|
||||
type TelnyxProviderModule = typeof import("./providers/telnyx.js");
|
||||
type TwilioProviderModule = typeof import("./providers/twilio.js");
|
||||
@@ -197,36 +190,12 @@ async function resolveRealtimeProvider(params: {
|
||||
config: VoiceCallConfig;
|
||||
fullConfig: OpenClawConfig;
|
||||
}): Promise<ResolvedRealtimeProvider> {
|
||||
const { getRealtimeVoiceProvider, listRealtimeVoiceProviders } = await loadRealtimeVoiceRuntime();
|
||||
const resolution = resolveConfiguredCapabilityProvider({
|
||||
const { resolveConfiguredRealtimeVoiceProvider } = await loadRealtimeVoiceRuntime();
|
||||
return resolveConfiguredRealtimeVoiceProvider({
|
||||
configuredProviderId: params.config.realtime.provider,
|
||||
providerConfigs: params.config.realtime.providers,
|
||||
cfg: params.fullConfig,
|
||||
cfgForResolve: params.fullConfig,
|
||||
getConfiguredProvider: (providerId) => getRealtimeVoiceProvider(providerId, params.fullConfig),
|
||||
listProviders: () => listRealtimeVoiceProviders(params.fullConfig),
|
||||
resolveProviderConfig: ({ provider, cfg, rawConfig }) =>
|
||||
provider.resolveConfig?.({ cfg, rawConfig }) ?? rawConfig,
|
||||
isProviderConfigured: ({ provider, cfg, providerConfig }) =>
|
||||
provider.isConfigured({ cfg, providerConfig }),
|
||||
});
|
||||
if (!resolution.ok && resolution.code === "missing-configured-provider") {
|
||||
throw new Error(
|
||||
`Realtime voice provider "${resolution.configuredProviderId}" is not registered`,
|
||||
);
|
||||
}
|
||||
if (!resolution.ok && resolution.code === "no-registered-provider") {
|
||||
throw new Error("No realtime voice provider registered");
|
||||
}
|
||||
if (!resolution.ok) {
|
||||
throw new Error(`Realtime voice provider "${resolution.provider?.id}" is not configured`);
|
||||
}
|
||||
|
||||
const provider = resolution.provider;
|
||||
return {
|
||||
provider,
|
||||
providerConfig: resolution.providerConfig,
|
||||
};
|
||||
}
|
||||
|
||||
export async function createVoiceCallRuntime(params: {
|
||||
|
||||
@@ -18,9 +18,15 @@ export {
|
||||
listRealtimeVoiceProviders,
|
||||
normalizeRealtimeVoiceProviderId,
|
||||
} from "../realtime-voice/provider-registry.js";
|
||||
export {
|
||||
resolveConfiguredRealtimeVoiceProvider,
|
||||
type ResolvedRealtimeVoiceProvider,
|
||||
type ResolveConfiguredRealtimeVoiceProviderParams,
|
||||
} from "../realtime-voice/provider-resolver.js";
|
||||
export {
|
||||
createRealtimeVoiceBridgeSession,
|
||||
type RealtimeVoiceAudioSink,
|
||||
type RealtimeVoiceBridgeSession,
|
||||
type RealtimeVoiceBridgeSessionParams,
|
||||
type RealtimeVoiceMarkStrategy,
|
||||
} from "../realtime-voice/session-runtime.js";
|
||||
|
||||
89
src/realtime-voice/provider-resolver.test.ts
Normal file
89
src/realtime-voice/provider-resolver.test.ts
Normal file
@@ -0,0 +1,89 @@
|
||||
import { describe, expect, it } from "vitest";
|
||||
import type { RealtimeVoiceProviderPlugin } from "../plugins/types.js";
|
||||
import { resolveConfiguredRealtimeVoiceProvider } from "./provider-resolver.js";
|
||||
|
||||
describe("realtime voice provider resolver", () => {
|
||||
const providers: RealtimeVoiceProviderPlugin[] = [
|
||||
{
|
||||
id: "first",
|
||||
label: "First",
|
||||
autoSelectOrder: 1,
|
||||
isConfigured: () => false,
|
||||
createBridge: () => {
|
||||
throw new Error("unused");
|
||||
},
|
||||
},
|
||||
{
|
||||
id: "second",
|
||||
label: "Second",
|
||||
autoSelectOrder: 2,
|
||||
resolveConfig: ({ rawConfig }) => ({ ...rawConfig, resolved: true }),
|
||||
isConfigured: ({ providerConfig }) => providerConfig.enabled === true,
|
||||
createBridge: () => {
|
||||
throw new Error("unused");
|
||||
},
|
||||
},
|
||||
];
|
||||
|
||||
it("auto-selects the first configured realtime voice provider", () => {
|
||||
const resolution = resolveConfiguredRealtimeVoiceProvider({
|
||||
cfg: {},
|
||||
providers,
|
||||
providerConfigs: {
|
||||
second: { enabled: true },
|
||||
},
|
||||
});
|
||||
|
||||
expect(resolution).toMatchObject({
|
||||
provider: providers[1],
|
||||
providerConfig: {
|
||||
enabled: true,
|
||||
resolved: true,
|
||||
},
|
||||
});
|
||||
});
|
||||
|
||||
it("applies a default model before provider config resolution", () => {
|
||||
const resolution = resolveConfiguredRealtimeVoiceProvider({
|
||||
cfg: {},
|
||||
configuredProviderId: "second",
|
||||
defaultModel: "gpt-realtime",
|
||||
providers,
|
||||
providerConfigs: {
|
||||
second: { enabled: true },
|
||||
},
|
||||
});
|
||||
|
||||
expect(resolution.providerConfig).toMatchObject({
|
||||
enabled: true,
|
||||
model: "gpt-realtime",
|
||||
resolved: true,
|
||||
});
|
||||
});
|
||||
|
||||
it("keeps explicit provider model over the default model", () => {
|
||||
const resolution = resolveConfiguredRealtimeVoiceProvider({
|
||||
cfg: {},
|
||||
configuredProviderId: "second",
|
||||
defaultModel: "gpt-realtime",
|
||||
providers,
|
||||
providerConfigs: {
|
||||
second: { enabled: true, model: "custom-realtime" },
|
||||
},
|
||||
});
|
||||
|
||||
expect(resolution.providerConfig).toMatchObject({
|
||||
model: "custom-realtime",
|
||||
});
|
||||
});
|
||||
|
||||
it("throws a caller-specified message when no providers exist", () => {
|
||||
expect(() =>
|
||||
resolveConfiguredRealtimeVoiceProvider({
|
||||
cfg: {},
|
||||
providers: [],
|
||||
noRegisteredProviderMessage: "No configured realtime voice provider registered",
|
||||
}),
|
||||
).toThrow("No configured realtime voice provider registered");
|
||||
});
|
||||
});
|
||||
63
src/realtime-voice/provider-resolver.ts
Normal file
63
src/realtime-voice/provider-resolver.ts
Normal file
@@ -0,0 +1,63 @@
|
||||
import type { OpenClawConfig } from "../config/types.openclaw.js";
|
||||
import { resolveConfiguredCapabilityProvider } from "../plugin-sdk/provider-selection-runtime.js";
|
||||
import type { RealtimeVoiceProviderPlugin } from "../plugins/types.js";
|
||||
import { getRealtimeVoiceProvider, listRealtimeVoiceProviders } from "./provider-registry.js";
|
||||
import type { RealtimeVoiceProviderConfig } from "./provider-types.js";
|
||||
|
||||
export type ResolvedRealtimeVoiceProvider = {
|
||||
provider: RealtimeVoiceProviderPlugin;
|
||||
providerConfig: RealtimeVoiceProviderConfig;
|
||||
};
|
||||
|
||||
export type ResolveConfiguredRealtimeVoiceProviderParams = {
|
||||
configuredProviderId?: string;
|
||||
providerConfigs?: Record<string, Record<string, unknown> | undefined>;
|
||||
cfg?: OpenClawConfig;
|
||||
cfgForResolve?: OpenClawConfig;
|
||||
providers?: RealtimeVoiceProviderPlugin[];
|
||||
defaultModel?: string;
|
||||
noRegisteredProviderMessage?: string;
|
||||
};
|
||||
|
||||
export function resolveConfiguredRealtimeVoiceProvider(
|
||||
params: ResolveConfiguredRealtimeVoiceProviderParams,
|
||||
): ResolvedRealtimeVoiceProvider {
|
||||
const cfgForResolve = params.cfgForResolve ?? params.cfg ?? ({} as OpenClawConfig);
|
||||
const providers = params.providers ?? listRealtimeVoiceProviders(params.cfg);
|
||||
const resolution = resolveConfiguredCapabilityProvider({
|
||||
configuredProviderId: params.configuredProviderId,
|
||||
providerConfigs: params.providerConfigs,
|
||||
cfg: params.cfg,
|
||||
cfgForResolve,
|
||||
getConfiguredProvider: (providerId) =>
|
||||
params.providers?.find((entry) => entry.id === providerId) ??
|
||||
getRealtimeVoiceProvider(providerId, params.cfg),
|
||||
listProviders: () => providers,
|
||||
resolveProviderConfig: ({ provider, cfg, rawConfig }) => {
|
||||
const rawConfigWithModel =
|
||||
params.defaultModel && rawConfig.model === undefined
|
||||
? { ...rawConfig, model: params.defaultModel }
|
||||
: rawConfig;
|
||||
return provider.resolveConfig?.({ cfg, rawConfig: rawConfigWithModel }) ?? rawConfigWithModel;
|
||||
},
|
||||
isProviderConfigured: ({ provider, cfg, providerConfig }) =>
|
||||
provider.isConfigured({ cfg, providerConfig }),
|
||||
});
|
||||
|
||||
if (!resolution.ok && resolution.code === "missing-configured-provider") {
|
||||
throw new Error(
|
||||
`Realtime voice provider "${resolution.configuredProviderId}" is not registered`,
|
||||
);
|
||||
}
|
||||
if (!resolution.ok && resolution.code === "no-registered-provider") {
|
||||
throw new Error(params.noRegisteredProviderMessage ?? "No realtime voice provider registered");
|
||||
}
|
||||
if (!resolution.ok) {
|
||||
throw new Error(`Realtime voice provider "${resolution.provider?.id}" is not configured`);
|
||||
}
|
||||
|
||||
return {
|
||||
provider: resolution.provider,
|
||||
providerConfig: resolution.providerConfig,
|
||||
};
|
||||
}
|
||||
@@ -54,6 +54,60 @@ describe("realtime voice bridge session runtime", () => {
|
||||
expect(sendMark).toHaveBeenCalledWith("mark-1");
|
||||
});
|
||||
|
||||
it("can acknowledge provider marks without transport mark support", () => {
|
||||
let callbacks: Parameters<RealtimeVoiceProviderPlugin["createBridge"]>[0] | undefined;
|
||||
const bridge = makeBridge();
|
||||
const provider: RealtimeVoiceProviderPlugin = {
|
||||
id: "test",
|
||||
label: "Test",
|
||||
isConfigured: () => true,
|
||||
createBridge: (request) => {
|
||||
callbacks = request;
|
||||
return bridge;
|
||||
},
|
||||
};
|
||||
const sendMark = vi.fn();
|
||||
|
||||
createRealtimeVoiceBridgeSession({
|
||||
provider,
|
||||
providerConfig: {},
|
||||
audioSink: { sendAudio: vi.fn(), sendMark },
|
||||
markStrategy: "ack-immediately",
|
||||
});
|
||||
|
||||
callbacks?.onMark?.("mark-1");
|
||||
|
||||
expect(sendMark).not.toHaveBeenCalled();
|
||||
expect(bridge.acknowledgeMark).toHaveBeenCalled();
|
||||
});
|
||||
|
||||
it("can ignore provider marks", () => {
|
||||
let callbacks: Parameters<RealtimeVoiceProviderPlugin["createBridge"]>[0] | undefined;
|
||||
const bridge = makeBridge();
|
||||
const provider: RealtimeVoiceProviderPlugin = {
|
||||
id: "test",
|
||||
label: "Test",
|
||||
isConfigured: () => true,
|
||||
createBridge: (request) => {
|
||||
callbacks = request;
|
||||
return bridge;
|
||||
},
|
||||
};
|
||||
const sendMark = vi.fn();
|
||||
|
||||
createRealtimeVoiceBridgeSession({
|
||||
provider,
|
||||
providerConfig: {},
|
||||
audioSink: { sendAudio: vi.fn(), sendMark },
|
||||
markStrategy: "ignore",
|
||||
});
|
||||
|
||||
callbacks?.onMark?.("mark-1");
|
||||
|
||||
expect(sendMark).not.toHaveBeenCalled();
|
||||
expect(bridge.acknowledgeMark).not.toHaveBeenCalled();
|
||||
});
|
||||
|
||||
it("passes tool calls the active session and triggers initial greeting on ready", () => {
|
||||
let callbacks: Parameters<RealtimeVoiceProviderPlugin["createBridge"]>[0] | undefined;
|
||||
const bridge = makeBridge();
|
||||
|
||||
@@ -15,6 +15,8 @@ export type RealtimeVoiceAudioSink = {
|
||||
sendMark?: (markName: string) => void;
|
||||
};
|
||||
|
||||
export type RealtimeVoiceMarkStrategy = "transport" | "ack-immediately" | "ignore";
|
||||
|
||||
export type RealtimeVoiceBridgeSession = {
|
||||
bridge: RealtimeVoiceBridge;
|
||||
acknowledgeMark(): void;
|
||||
@@ -33,6 +35,7 @@ export type RealtimeVoiceBridgeSessionParams = {
|
||||
audioSink: RealtimeVoiceAudioSink;
|
||||
instructions?: string;
|
||||
initialGreetingInstructions?: string;
|
||||
markStrategy?: RealtimeVoiceMarkStrategy;
|
||||
triggerGreetingOnReady?: boolean;
|
||||
tools?: RealtimeVoiceTool[];
|
||||
onTranscript?: (role: RealtimeVoiceRole, text: string, isFinal: boolean) => void;
|
||||
@@ -81,7 +84,14 @@ export function createRealtimeVoiceBridgeSession(
|
||||
}
|
||||
},
|
||||
onMark: (markName) => {
|
||||
if (canSendAudio()) {
|
||||
if (!canSendAudio() || params.markStrategy === "ignore") {
|
||||
return;
|
||||
}
|
||||
if (params.markStrategy === "ack-immediately") {
|
||||
bridge?.acknowledgeMark();
|
||||
return;
|
||||
}
|
||||
if (params.markStrategy === undefined || params.markStrategy === "transport") {
|
||||
params.audioSink.sendMark?.(markName);
|
||||
}
|
||||
},
|
||||
|
||||
Reference in New Issue
Block a user