refactor(tts): resolve voice delivery from channel capabilities

This commit is contained in:
Peter Steinberger
2026-04-26 06:51:19 +01:00
parent 2784710f4d
commit d613c8e29b
20 changed files with 349 additions and 71 deletions

View File

@@ -150,6 +150,8 @@ Docs: https://docs.openclaw.ai
same inbound audio twice. Fixes #70580.
- TTS/BlueBubbles: deliver compatible auto-TTS audio as iMessage voice memo
bubbles instead of plain MP3/CAF file attachments. Fixes #16848.
- TTS: resolve voice-note and voice-memo routing from channel plugin
capabilities instead of speech-core-owned channel id lists.
- ACP: send subagent and async-task completion wakes to external ACP harnesses as
plain prompts instead of OpenClaw internal runtime-context envelopes, while
keeping those envelopes out of ACP transcripts.

View File

@@ -1,2 +1,2 @@
690c1cd4c0c2c3d31577958120e14ac0bf555af529e03aa5e7965b1d04659c49 plugin-sdk-api-baseline.json
a0e6ba472ddd3acea34c0a8fda8cbb7d1172b1671a671d5fef5a9f42d749ce0d plugin-sdk-api-baseline.jsonl
a81b6ddeb1fd24bf234a3b7ba1d51d18d7060afa49378dd92988f326e140db13 plugin-sdk-api-baseline.json
90a6e45404c2c017c23ab9ee75e71503ec683a680f64266504fdab69e43f288b plugin-sdk-api-baseline.jsonl

View File

@@ -646,6 +646,44 @@ or `messages.tts.prefsPath`.
These override the effective config from `messages.tts` plus the active
`agents.list[].tts` block for that host.
## Output formats (fixed)
TTS voice delivery is channel-capability driven. Channel plugins advertise
whether voice-style TTS should ask providers for a native `voice-note` target or
keep normal `audio-file` synthesis and only mark compatible output for voice
delivery.
- **Voice-note capable channels**: voice-note replies prefer Opus (`opus_48000_64` from ElevenLabs, `opus` from OpenAI).
- 48kHz / 64kbps is a good voice message tradeoff.
- **Feishu / WhatsApp**: when a voice-note reply is produced as MP3/WebM/WAV/M4A
or another likely audio file, the channel plugin transcodes it to 48kHz
Ogg/Opus with `ffmpeg` before sending the native voice message. WhatsApp sends
the result through the Baileys `audio` payload with `ptt: true` and
`audio/ogg; codecs=opus`. If conversion fails, Feishu receives the original
file as an attachment; WhatsApp send fails rather than posting an incompatible
PTT payload.
- **BlueBubbles**: keeps provider synthesis on the normal audio-file path; MP3
and CAF outputs are marked for iMessage voice memo delivery.
- **Other channels**: MP3 (`mp3_44100_128` from ElevenLabs, `mp3` from OpenAI).
- 44.1kHz / 128kbps is the default balance for speech clarity.
- **MiniMax**: MP3 (`speech-2.8-hd` model, 32kHz sample rate) for normal audio attachments. For channel-advertised voice-note targets, OpenClaw transcodes the MiniMax MP3 to 48kHz Opus with `ffmpeg` before delivery when the channel advertises transcoding.
- **Xiaomi MiMo**: MP3 by default, or WAV when configured. For channel-advertised voice-note targets, OpenClaw transcodes Xiaomi output to 48kHz Opus with `ffmpeg` before delivery when the channel advertises transcoding.
- **Local CLI**: uses the configured `outputFormat`. Voice-note targets are
converted to Ogg/Opus and telephony output is converted to raw 16 kHz mono PCM
with `ffmpeg`.
- **Google Gemini**: Gemini API TTS returns raw 24kHz PCM. OpenClaw wraps it as WAV for audio attachments, transcodes it to 48kHz Opus for voice-note targets, and returns PCM directly for Talk/telephony.
- **Gradium**: WAV for audio attachments, Opus for voice-note targets, and `ulaw_8000` at 8 kHz for telephony.
- **Inworld**: MP3 for normal audio attachments, native `OGG_OPUS` for voice-note targets, and raw `PCM` at 22050 Hz for Talk/telephony.
- **xAI**: MP3 by default; `responseFormat` may be `mp3`, `wav`, `pcm`, `mulaw`, or `alaw`. OpenClaw uses xAI's batch REST TTS endpoint and returns a complete audio attachment; xAI's streaming TTS WebSocket is not used by this provider path. Native Opus voice-note format is not supported by this path.
- **Microsoft**: uses `microsoft.outputFormat` (default `audio-24khz-48kbitrate-mono-mp3`).
- The bundled transport accepts an `outputFormat`, but not all formats are available from the service.
- Output format values follow Microsoft Speech output formats (including Ogg/WebM Opus).
- Telegram `sendVoice` accepts OGG/MP3/M4A; use OpenAI/ElevenLabs if you need
guaranteed Opus voice messages.
- If the configured Microsoft output format fails, OpenClaw retries with MP3.
OpenAI/ElevenLabs output formats are fixed per channel (see above).
## Auto-TTS behavior
When `messages.tts.auto` is enabled, OpenClaw:

View File

@@ -1,13 +1,16 @@
import { readFileSync } from "node:fs";
import { resolve } from "node:path";
import { beforeEach, describe, expect, it, vi } from "vitest";
import { afterEach, beforeEach, describe, expect, it, vi } from "vitest";
import type { OpenClawConfig } from "../../src/config/config.js";
import { buildPluginApi } from "../../src/plugins/api-builder.js";
import type { PluginRuntime } from "../../src/plugins/runtime/types.js";
import { registerSingleProviderPlugin } from "../../test/helpers/plugins/plugin-registration.js";
import { resetBedrockDiscoveryCacheForTest } from "./discovery.js";
import amazonBedrockPlugin from "./index.js";
import { resetBedrockAppProfileCacheEligibilityForTest } from "./register.sync.runtime.js";
import {
resetBedrockAppProfileCacheEligibilityForTest,
setBedrockAppProfileControlPlaneForTest,
} from "./register.sync.runtime.js";
type BedrockClientResult =
| {
@@ -211,6 +214,19 @@ describe("amazon-bedrock provider plugin", () => {
sendBedrockCommand.mockClear();
resetBedrockDiscoveryCacheForTest();
resetBedrockAppProfileCacheEligibilityForTest();
setBedrockAppProfileControlPlaneForTest((region) => ({
async getInferenceProfile(input) {
class GetInferenceProfileCommand {
constructor(readonly input: Record<string, unknown> = {}) {}
}
bedrockClientConfigs.push(region ? { region } : {});
return await sendBedrockCommand(new GetInferenceProfileCommand(input));
},
}));
});
afterEach(() => {
setBedrockAppProfileControlPlaneForTest(undefined);
});
it("marks Claude 4.6 Bedrock models as adaptive by default", async () => {

View File

@@ -153,10 +153,42 @@ function resolvedModelSupportsCaching(modelArn: string): boolean {
*/
const appProfileCacheEligibleCache = new Map<string, boolean>();
type BedrockGetInferenceProfileResponse = {
models?: Array<{ modelArn?: string }>;
};
type BedrockControlPlane = {
getInferenceProfile: (input: {
inferenceProfileIdentifier: string;
}) => Promise<BedrockGetInferenceProfileResponse>;
};
type BedrockControlPlaneFactory = (region: string | undefined) => BedrockControlPlane;
let bedrockControlPlaneOverride: BedrockControlPlaneFactory | undefined;
export function resetBedrockAppProfileCacheEligibilityForTest(): void {
appProfileCacheEligibleCache.clear();
}
export function setBedrockAppProfileControlPlaneForTest(
controlPlane: BedrockControlPlaneFactory | undefined,
): void {
bedrockControlPlaneOverride = controlPlane;
resetBedrockAppProfileCacheEligibilityForTest();
}
async function createBedrockControlPlane(region: string | undefined): Promise<BedrockControlPlane> {
if (bedrockControlPlaneOverride) {
return bedrockControlPlaneOverride(region);
}
const { BedrockClient, GetInferenceProfileCommand } = await import("@aws-sdk/client-bedrock");
const client = new BedrockClient(region ? { region } : {});
return {
getInferenceProfile: async (input) => await client.send(new GetInferenceProfileCommand(input)),
};
}
async function resolveAppProfileCacheEligible(
modelId: string,
fallbackRegion: string | undefined,
@@ -165,12 +197,9 @@ async function resolveAppProfileCacheEligible(
return appProfileCacheEligibleCache.get(modelId)!;
}
try {
const { BedrockClient, GetInferenceProfileCommand } = await import("@aws-sdk/client-bedrock");
const region = extractRegionFromArn(modelId) ?? fallbackRegion;
const client = new BedrockClient(region ? { region } : {});
const resp = await client.send(
new GetInferenceProfileCommand({ inferenceProfileIdentifier: modelId }),
);
const controlPlane = await createBedrockControlPlane(region);
const resp = await controlPlane.getInferenceProfile({ inferenceProfileIdentifier: modelId });
const models = resp.models ?? [];
const eligible =
models.length > 0 &&

View File

@@ -31,6 +31,12 @@ export const bluebubblesMeta = {
export const bluebubblesCapabilities: ChannelPlugin<ResolvedBlueBubblesAccount>["capabilities"] = {
chatTypes: ["direct", "group"],
media: true,
tts: {
voice: {
synthesisTarget: "audio-file",
audioFileFormats: ["mp3", "caf", "audio/mpeg", "audio/x-caf"],
},
},
reactions: true,
edit: true,
unsend: true,

View File

@@ -96,6 +96,11 @@ export function createDiscordPluginBase(params: {
reactions: true,
threads: true,
media: true,
tts: {
voice: {
synthesisTarget: "voice-note",
},
},
nativeCommands: true,
},
commands: {

View File

@@ -588,6 +588,12 @@ export const feishuPlugin: ChannelPlugin<ResolvedFeishuAccount, FeishuProbeResul
polls: false,
threads: true,
media: true,
tts: {
voice: {
synthesisTarget: "voice-note",
transcodesAudio: true,
},
},
reactions: true,
edit: true,
reply: true,

View File

@@ -334,6 +334,11 @@ export const matrixPlugin: ChannelPlugin<ResolvedMatrixAccount, MatrixProbe> =
reactions: true,
threads: true,
media: true,
tts: {
voice: {
synthesisTarget: "voice-note",
},
},
},
reload: { configPrefixes: ["channels.matrix"] },
configSchema: buildChannelConfigSchema(MatrixConfigSchema),

View File

@@ -21,12 +21,6 @@ const runtimeMocks = vi.hoisted(() => ({
refreshOpenAICodexToken: vi.fn(),
}));
type OpenAIRefreshDelegateGlobal = typeof globalThis & {
__OPENCLAW_TEST_REFRESH_OPENAI_CODEX_TOKEN__?: (...args: unknown[]) => unknown;
};
const openAIRefreshDelegateGlobal = () => globalThis as OpenAIRefreshDelegateGlobal;
vi.mock("openclaw/plugin-sdk/runtime-env", async () => {
const actual = await vi.importActual<typeof import("openclaw/plugin-sdk/runtime-env")>(
"openclaw/plugin-sdk/runtime-env",
@@ -41,12 +35,10 @@ vi.mock("@mariozechner/pi-ai/oauth", () => ({
getOAuthApiKey: vi.fn(),
getOAuthProviders: () => [],
loginOpenAICodex: vi.fn(),
refreshOpenAICodexToken: vi.fn((...args: unknown[]) =>
openAIRefreshDelegateGlobal().__OPENCLAW_TEST_REFRESH_OPENAI_CODEX_TOKEN__?.(...args),
),
refreshOpenAICodexToken: vi.fn(),
}));
import { refreshOpenAICodexToken } from "./openai-codex-provider.runtime.js";
import { createOpenAICodexProviderRuntime } from "./openai-codex-provider.runtime.js";
const _registerOpenAIPlugin = async () =>
registerProviderPlugin({
@@ -312,19 +304,19 @@ describe("openai plugin", () => {
expires: Date.now() + 60_000,
};
runtimeMocks.refreshOpenAICodexToken.mockResolvedValue(refreshed);
openAIRefreshDelegateGlobal().__OPENCLAW_TEST_REFRESH_OPENAI_CODEX_TOKEN__ =
runtimeMocks.refreshOpenAICodexToken;
try {
await expect(refreshOpenAICodexToken("refresh-token")).resolves.toBe(refreshed);
const runtime = createOpenAICodexProviderRuntime({
ensureGlobalUndiciEnvProxyDispatcher: runtimeMocks.ensureGlobalUndiciEnvProxyDispatcher,
getOAuthApiKey: vi.fn(),
refreshOpenAICodexToken: runtimeMocks.refreshOpenAICodexToken,
});
expect(runtimeMocks.ensureGlobalUndiciEnvProxyDispatcher).toHaveBeenCalledOnce();
expect(runtimeMocks.refreshOpenAICodexToken).toHaveBeenCalledOnce();
expect(
runtimeMocks.ensureGlobalUndiciEnvProxyDispatcher.mock.invocationCallOrder[0],
).toBeLessThan(runtimeMocks.refreshOpenAICodexToken.mock.invocationCallOrder[0]);
} finally {
delete openAIRefreshDelegateGlobal().__OPENCLAW_TEST_REFRESH_OPENAI_CODEX_TOKEN__;
}
await expect(runtime.refreshOpenAICodexToken("refresh-token")).resolves.toBe(refreshed);
expect(runtimeMocks.ensureGlobalUndiciEnvProxyDispatcher).toHaveBeenCalledOnce();
expect(runtimeMocks.refreshOpenAICodexToken).toHaveBeenCalledOnce();
expect(
runtimeMocks.ensureGlobalUndiciEnvProxyDispatcher.mock.invocationCallOrder[0],
).toBeLessThan(runtimeMocks.refreshOpenAICodexToken.mock.invocationCallOrder[0]);
});
it("registers provider-owned OpenAI tool compat hooks for openai and codex", async () => {

View File

@@ -4,16 +4,42 @@ import {
} from "@mariozechner/pi-ai/oauth";
import { ensureGlobalUndiciEnvProxyDispatcher } from "openclaw/plugin-sdk/runtime-env";
type OpenAICodexProviderRuntimeDeps = {
ensureGlobalUndiciEnvProxyDispatcher: typeof ensureGlobalUndiciEnvProxyDispatcher;
getOAuthApiKey: typeof getOAuthApiKeyFromPi;
refreshOpenAICodexToken: typeof refreshOpenAICodexTokenFromPi;
};
export function createOpenAICodexProviderRuntime(deps: OpenAICodexProviderRuntimeDeps): {
getOAuthApiKey: typeof getOAuthApiKey;
refreshOpenAICodexToken: typeof refreshOpenAICodexToken;
} {
return {
async getOAuthApiKey(...args) {
deps.ensureGlobalUndiciEnvProxyDispatcher();
return await deps.getOAuthApiKey(...args);
},
async refreshOpenAICodexToken(...args) {
deps.ensureGlobalUndiciEnvProxyDispatcher();
return await deps.refreshOpenAICodexToken(...args);
},
};
}
const runtime = createOpenAICodexProviderRuntime({
ensureGlobalUndiciEnvProxyDispatcher,
getOAuthApiKey: getOAuthApiKeyFromPi,
refreshOpenAICodexToken: refreshOpenAICodexTokenFromPi,
});
export async function getOAuthApiKey(
...args: Parameters<typeof getOAuthApiKeyFromPi>
): Promise<Awaited<ReturnType<typeof getOAuthApiKeyFromPi>>> {
ensureGlobalUndiciEnvProxyDispatcher();
return await getOAuthApiKeyFromPi(...args);
return await runtime.getOAuthApiKey(...args);
}
export async function refreshOpenAICodexToken(
...args: Parameters<typeof refreshOpenAICodexTokenFromPi>
): Promise<Awaited<ReturnType<typeof refreshOpenAICodexTokenFromPi>>> {
ensureGlobalUndiciEnvProxyDispatcher();
return await refreshOpenAICodexTokenFromPi(...args);
return await runtime.refreshOpenAICodexToken(...args);
}

View File

@@ -30,6 +30,22 @@ const getSpeechProviderMock = vi.hoisted(() => vi.fn());
vi.mock("openclaw/plugin-sdk/channel-targets", () => ({
normalizeChannelId: (channel: string | undefined) => channel?.trim().toLowerCase() ?? null,
resolveChannelTtsVoiceDelivery: (channel: string | undefined) => {
const normalized = channel?.trim().toLowerCase();
if (normalized === "bluebubbles") {
return {
synthesisTarget: "audio-file",
audioFileFormats: ["mp3", "caf", "audio/mpeg", "audio/x-caf"],
};
}
if (normalized === "feishu" || normalized === "whatsapp") {
return { synthesisTarget: "voice-note", transcodesAudio: true };
}
if (normalized === "discord" || normalized === "matrix" || normalized === "telegram") {
return { synthesisTarget: "voice-note" };
}
return undefined;
},
}));
vi.mock("../api.js", async () => {
@@ -152,7 +168,7 @@ describe("speech-core native voice-note routing", () => {
installSpeechProviders([createMockSpeechProvider()]);
});
it("keeps native voice-note channel support centralized", () => {
it("resolves voice delivery support from channel capabilities", () => {
for (const channel of nativeVoiceNoteChannels) {
expect(_test.supportsNativeVoiceNoteTts(channel)).toBe(true);
expect(_test.supportsNativeVoiceNoteTts(channel.toUpperCase())).toBe(true);

View File

@@ -9,7 +9,7 @@ import {
unlinkSync,
} from "node:fs";
import path from "node:path";
import { normalizeChannelId, type ChannelId } from "openclaw/plugin-sdk/channel-targets";
import { resolveChannelTtsVoiceDelivery } from "openclaw/plugin-sdk/channel-targets";
import type {
OpenClawConfig,
ResolvedTtsPersona,
@@ -738,52 +738,34 @@ export function setLastTtsAttempt(entry: TtsStatusEntry | undefined): void {
lastTtsAttempt = entry;
}
const VOICE_DELIVERY_CHANNELS = new Set([
"bluebubbles",
"telegram",
"feishu",
"whatsapp",
"matrix",
"discord",
]);
const OPUS_CHANNELS = new Set(["telegram", "feishu", "whatsapp", "matrix", "discord"]);
const TRANSCODED_VOICE_NOTE_CHANNELS = new Set(["feishu", "whatsapp"]);
const AUDIO_FILE_VOICE_MEMO_CHANNELS = new Set(["bluebubbles"]);
function resolveChannelId(channel: string | undefined): ChannelId | null {
return channel ? normalizeChannelId(channel) : null;
}
function supportsNativeVoiceNoteTts(channel: string | undefined): boolean {
const channelId = resolveChannelId(channel);
return channelId !== null && VOICE_DELIVERY_CHANNELS.has(channelId);
return resolveChannelTtsVoiceDelivery(channel) !== undefined;
}
function supportsTranscodedVoiceNoteTts(channel: string | undefined): boolean {
const channelId = resolveChannelId(channel);
return channelId !== null && TRANSCODED_VOICE_NOTE_CHANNELS.has(channelId);
const delivery = resolveChannelTtsVoiceDelivery(channel);
return delivery?.synthesisTarget === "voice-note" && delivery.transcodesAudio === true;
}
function resolveTtsSynthesisTarget(channel: string | undefined): "audio-file" | "voice-note" {
const channelId = resolveChannelId(channel);
return channelId !== null && OPUS_CHANNELS.has(channelId) ? "voice-note" : "audio-file";
return resolveChannelTtsVoiceDelivery(channel)?.synthesisTarget ?? "audio-file";
}
function supportsAudioFileVoiceMemoOutput(params: {
fileExtension?: string;
outputFormat?: string;
audioFileFormats?: readonly string[];
}): boolean {
const formats = new Set(params.audioFileFormats?.map((format) => format.trim().toLowerCase()));
if (formats.size === 0) {
return false;
}
const extension = params.fileExtension?.trim().toLowerCase();
if (extension === ".mp3" || extension === ".caf") {
if (extension && formats.has(extension.replace(/^\./, ""))) {
return true;
}
const outputFormat = params.outputFormat?.trim().toLowerCase();
return (
outputFormat === "mp3" ||
outputFormat === "caf" ||
outputFormat === "audio/mpeg" ||
outputFormat === "audio/x-caf"
);
return outputFormat ? formats.has(outputFormat) : false;
}
function shouldDeliverTtsAsVoice(params: {
@@ -793,17 +775,24 @@ function shouldDeliverTtsAsVoice(params: {
fileExtension?: string;
outputFormat?: string;
}): boolean {
const channelId = resolveChannelId(params.channel);
if (channelId === null || !supportsNativeVoiceNoteTts(channelId)) {
const delivery = resolveChannelTtsVoiceDelivery(params.channel);
if (!delivery) {
return false;
}
if (AUDIO_FILE_VOICE_MEMO_CHANNELS.has(channelId)) {
return params.target === "audio-file" && supportsAudioFileVoiceMemoOutput(params);
if (delivery.synthesisTarget === "audio-file") {
return (
params.target === "audio-file" &&
supportsAudioFileVoiceMemoOutput({
fileExtension: params.fileExtension,
outputFormat: params.outputFormat,
audioFileFormats: delivery.audioFileFormats,
})
);
}
if (params.target !== "voice-note") {
return false;
}
return params.voiceCompatible === true || supportsTranscodedVoiceNoteTts(params.channel);
return params.voiceCompatible === true || delivery.transcodesAudio === true;
}
export function resolveTtsProviderOrder(primary: TtsProvider, cfg?: OpenClawConfig): TtsProvider[] {

View File

@@ -142,6 +142,11 @@ export function createTelegramPluginBase(params: {
reactions: true,
threads: true,
media: true,
tts: {
voice: {
synthesisTarget: "voice-note",
},
},
polls: true,
nativeCommands: true,
blockStreaming: true,

View File

@@ -212,6 +212,12 @@ export function createWhatsAppPluginBase(params: {
polls: true,
reactions: true,
media: true,
tts: {
voice: {
synthesisTarget: "voice-note",
transcodesAudio: true,
},
},
},
reload: { configPrefixes: ["web"], noopPrefixes: ["channels.whatsapp"] },
gatewayMethods: ["web.login.start", "web.login.wait"],

View File

@@ -0,0 +1,111 @@
import { afterEach, describe, expect, it } from "vitest";
import { createEmptyPluginRegistry } from "../../plugins/registry-empty.js";
import { setActivePluginRegistry } from "../../plugins/runtime.js";
import {
createChannelTestPluginBase,
createTestRegistry,
} from "../../test-utils/channel-plugins.js";
import { resolveChannelTtsVoiceDelivery } from "./tts-capabilities.js";
import type { ChannelPlugin } from "./types.js";
function createChannelPlugin(
id: string,
capabilities: ChannelPlugin["capabilities"],
): ChannelPlugin {
return createChannelTestPluginBase({
id,
label: id,
capabilities,
config: {
listAccountIds: () => ["default"],
},
});
}
describe("resolveChannelTtsVoiceDelivery", () => {
afterEach(() => {
setActivePluginRegistry(createEmptyPluginRegistry());
});
it("reads voice delivery behavior from channel plugin capabilities", () => {
setActivePluginRegistry(
createTestRegistry([
{
pluginId: "bluebubbles",
plugin: createChannelPlugin("bluebubbles", {
chatTypes: ["direct"],
tts: {
voice: {
synthesisTarget: "audio-file",
audioFileFormats: ["mp3", "caf", "audio/mpeg", "audio/x-caf"],
},
},
}),
source: "test",
},
{
pluginId: "discord",
plugin: createChannelPlugin("discord", {
chatTypes: ["direct"],
tts: { voice: { synthesisTarget: "voice-note" } },
}),
source: "test",
},
{
pluginId: "feishu",
plugin: createChannelPlugin("feishu", {
chatTypes: ["direct"],
tts: { voice: { synthesisTarget: "voice-note", transcodesAudio: true } },
}),
source: "test",
},
{
pluginId: "matrix",
plugin: createChannelPlugin("matrix", {
chatTypes: ["direct"],
tts: { voice: { synthesisTarget: "voice-note" } },
}),
source: "test",
},
{
pluginId: "telegram",
plugin: createChannelPlugin("telegram", {
chatTypes: ["direct"],
tts: { voice: { synthesisTarget: "voice-note" } },
}),
source: "test",
},
{
pluginId: "whatsapp",
plugin: createChannelPlugin("whatsapp", {
chatTypes: ["direct"],
tts: { voice: { synthesisTarget: "voice-note", transcodesAudio: true } },
}),
source: "test",
},
]),
);
expect(resolveChannelTtsVoiceDelivery("bluebubbles")).toEqual({
synthesisTarget: "audio-file",
audioFileFormats: ["mp3", "caf", "audio/mpeg", "audio/x-caf"],
});
expect(resolveChannelTtsVoiceDelivery("discord")).toEqual({
synthesisTarget: "voice-note",
});
expect(resolveChannelTtsVoiceDelivery("feishu")).toEqual({
synthesisTarget: "voice-note",
transcodesAudio: true,
});
expect(resolveChannelTtsVoiceDelivery("matrix")).toEqual({
synthesisTarget: "voice-note",
});
expect(resolveChannelTtsVoiceDelivery("telegram")).toEqual({
synthesisTarget: "voice-note",
});
expect(resolveChannelTtsVoiceDelivery("whatsapp")).toEqual({
synthesisTarget: "voice-note",
transcodesAudio: true,
});
expect(resolveChannelTtsVoiceDelivery("slack")).toBeUndefined();
});
});

View File

@@ -0,0 +1,13 @@
import { normalizeChannelId } from "./registry.js";
import { getChannelPlugin } from "./registry.js";
import type { ChannelTtsVoiceDeliveryCapabilities } from "./types.core.js";
export function resolveChannelTtsVoiceDelivery(
channel: string | undefined,
): ChannelTtsVoiceDeliveryCapabilities | undefined {
const channelId = normalizeChannelId(channel);
if (!channelId) {
return undefined;
}
return getChannelPlugin(channelId)?.capabilities.tts?.voice;
}

View File

@@ -272,6 +272,13 @@ export type ChannelGroupContext = {
senderE164?: string | null;
};
/** TTS voice delivery behavior advertised by a channel plugin. */
export type ChannelTtsVoiceDeliveryCapabilities = {
synthesisTarget: "audio-file" | "voice-note";
transcodesAudio?: boolean;
audioFileFormats?: readonly string[];
};
/** Static capability flags advertised by a channel plugin. */
export type ChannelCapabilities = {
chatTypes: Array<ChatType | "thread">;
@@ -284,6 +291,9 @@ export type ChannelCapabilities = {
groupManagement?: boolean;
threads?: boolean;
media?: boolean;
tts?: {
voice?: ChannelTtsVoiceDeliveryCapabilities;
};
nativeCommands?: boolean;
blockStreaming?: boolean;
};

View File

@@ -39,6 +39,7 @@ export {
} from "../channels/plugins/chat-target-prefixes.js";
export type { ChannelId } from "../channels/plugins/types.public.js";
export { normalizeChannelId } from "../channels/plugins/registry.js";
export { resolveChannelTtsVoiceDelivery } from "../channels/plugins/tts-capabilities.js";
export {
buildUnresolvedTargetResults,
resolveTargetsWithOptionalToken,

View File

@@ -112,6 +112,8 @@ export type TtsTestFacade = {
channel: string | undefined;
target: TtsSpeechTarget | undefined;
voiceCompatible: boolean | undefined;
fileExtension?: string;
outputFormat?: string;
}) => boolean;
summarizeText: (...args: unknown[]) => Promise<SummarizeResult>;
getResolvedSpeechProviderConfig: (