fix(talk): honor configured speech locale

This commit is contained in:
Peter Steinberger
2026-04-25 21:05:15 +01:00
parent 8fb24ac3ce
commit 02f3e9cfa2
25 changed files with 297 additions and 11 deletions

View File

@@ -59,6 +59,8 @@ Docs: https://docs.openclaw.ai
### Fixes
- iOS/macOS Talk Mode: allow `talk.speechLocale` to set the speech
recognition locale for non-English voice conversations. Fixes #44688.
- Plugins/Voice Call: treat missing provider credentials as setup-incomplete
during Gateway startup and log the missing keys as a warning instead of a
runtime startup error, while keeping explicit command/tool errors when used. Thanks

View File

@@ -21,6 +21,7 @@ struct SettingsTab: View {
@AppStorage("node.instanceId") private var instanceId: String = UUID().uuidString
@AppStorage("voiceWake.enabled") private var voiceWakeEnabled: Bool = false
@AppStorage("talk.enabled") private var talkEnabled: Bool = false
@AppStorage(TalkSpeechLocale.storageKey) private var talkSpeechLocale: String = TalkSpeechLocale.automaticID
@AppStorage("talk.button.enabled") private var talkButtonEnabled: Bool = true
@AppStorage("talk.background.enabled") private var talkBackgroundEnabled: Bool = false
@AppStorage("camera.enabled") private var cameraEnabled: Bool = true
@@ -278,6 +279,11 @@ struct SettingsTab: View {
help: "Enables voice conversation mode with your connected OpenClaw agent.") { newValue in
self.appModel.setTalkEnabled(newValue)
}
Picker("Speech Language", selection: self.$talkSpeechLocale) {
ForEach(TalkSpeechLocale.supportedOptions()) { option in
Text(option.label).tag(option.id)
}
}
self.featureToggle(
"Background Listening",
isOn: self.$talkBackgroundEnabled,

View File

@@ -12,6 +12,7 @@ struct TalkModeGatewayConfigState {
let rawConfigApiKey: String?
let interruptOnSpeech: Bool?
let silenceTimeoutMs: Int
let speechLocaleID: String?
}
enum TalkModeGatewayConfigParser {
@@ -53,6 +54,7 @@ enum TalkModeGatewayConfigParser {
let silenceTimeoutMs = TalkConfigParsing.resolvedSilenceTimeoutMs(
talk,
fallback: defaultSilenceTimeoutMs)
let speechLocaleID = TalkConfigParsing.resolvedSpeechLocaleID(talk)
return TalkModeGatewayConfigState(
activeProvider: activeProvider,
@@ -64,6 +66,7 @@ enum TalkModeGatewayConfigParser {
defaultOutputFormat: defaultOutputFormat,
rawConfigApiKey: rawConfigApiKey,
interruptOnSpeech: interruptOnSpeech,
silenceTimeoutMs: silenceTimeoutMs)
silenceTimeoutMs: silenceTimeoutMs,
speechLocaleID: speechLocaleID)
}
}

View File

@@ -87,6 +87,7 @@ final class TalkModeManager: NSObject {
private var apiKey: String?
private var voiceAliases: [String: String] = [:]
private var interruptOnSpeech: Bool = true
private var gatewaySpeechLocaleID: String?
private var mainSessionKey: String = "main"
private var fallbackVoiceId: String?
private var lastPlaybackWasPCM: Bool = false
@@ -500,12 +501,17 @@ final class TalkModeManager: NSObject {
#endif
self.stopRecognition()
self.speechRecognizer = SFSpeechRecognizer()
let localSpeechLocale = UserDefaults.standard.string(forKey: TalkSpeechLocale.storageKey)
let resolvedSpeech = TalkSpeechLocale.makeRecognizer(
localSelection: localSpeechLocale,
gatewaySelection: self.gatewaySpeechLocaleID)
self.speechRecognizer = resolvedSpeech.recognizer
guard let recognizer = self.speechRecognizer else {
throw NSError(domain: "TalkMode", code: 1, userInfo: [
NSLocalizedDescriptionKey: "Speech recognizer unavailable",
])
}
GatewayDiagnostics.log("talk speech: locale=\(resolvedSpeech.localeID ?? "default")")
self.recognitionRequest = SFSpeechAudioBufferRecognitionRequest()
self.recognitionRequest?.shouldReportPartialResults = true
@@ -2027,6 +2033,7 @@ extension TalkModeManager {
if let interrupt = parsed.interruptOnSpeech {
self.interruptOnSpeech = interrupt
}
self.gatewaySpeechLocaleID = parsed.speechLocaleID
self.silenceWindow = TimeInterval(parsed.silenceTimeoutMs) / 1000
if parsed.normalizedPayload || parsed.defaultVoiceId != nil || parsed.rawConfigApiKey != nil {
GatewayDiagnostics.log(
@@ -2041,6 +2048,7 @@ extension TalkModeManager {
self.gatewayTalkDefaultModelId = nil
self.gatewayTalkApiKeyConfigured = false
self.gatewayTalkConfigLoaded = false
self.gatewaySpeechLocaleID = nil
self.silenceWindow = TimeInterval(Self.defaultSilenceTimeoutMs) / 1000
}
}

View File

@@ -0,0 +1,100 @@
import Foundation
import OpenClawKit
import Speech
enum TalkSpeechLocale {
static let storageKey = "talk.speechLocale"
static let automaticID = "auto"
static let fallbackLocaleID = "en-US"
struct Option: Identifiable {
let id: String
let label: String
}
static func supportedOptions(
supportedLocales: Set<Locale> = SFSpeechRecognizer.supportedLocales()
) -> [Option] {
var seen = Set<String>()
let dynamic: [Option] = supportedLocales
.compactMap { locale in
let id = self.canonicalID(locale.identifier)
guard seen.insert(id).inserted else { return nil }
return Option(id: id, label: self.friendlyName(for: locale))
}
.sorted { (lhs: Option, rhs: Option) in
lhs.label.localizedCaseInsensitiveCompare(rhs.label) == .orderedAscending
}
return [Option(id: self.automaticID, label: "Automatic")] + dynamic
}
static func resolvedLocaleID(
localSelection: String?,
gatewaySelection: String?,
deviceLocaleID: String = Locale.autoupdatingCurrent.identifier,
fallbackLocaleID: String = Self.fallbackLocaleID,
supportedLocaleIDs: Set<String>
) -> String? {
TalkConfigParsing.resolvedSpeechRecognitionLocaleID(
preferredLocaleIDs: [
TalkConfigParsing.normalizedExplicitSpeechLocaleID(localSelection),
TalkConfigParsing.normalizedExplicitSpeechLocaleID(gatewaySelection),
deviceLocaleID,
],
fallbackLocaleID: fallbackLocaleID,
supportedLocaleIDs: supportedLocaleIDs)
}
static func makeRecognizer(
localSelection: String?,
gatewaySelection: String?,
supportedLocales: Set<Locale> = SFSpeechRecognizer.supportedLocales()
) -> (recognizer: SFSpeechRecognizer?, localeID: String?) {
let supportedIDs = Set(supportedLocales.map(\.identifier))
guard let localeID = self.resolvedLocaleID(
localSelection: localSelection,
gatewaySelection: gatewaySelection,
supportedLocaleIDs: supportedIDs)
else {
let recognizer = SFSpeechRecognizer()
return (recognizer, recognizer?.locale.identifier)
}
if let recognizer = SFSpeechRecognizer(locale: Locale(identifier: localeID)) {
return (recognizer, localeID)
}
let recognizer = SFSpeechRecognizer()
return (recognizer, recognizer?.locale.identifier)
}
static func normalizedExplicitLocaleID(_ raw: String?) -> String? {
TalkConfigParsing.normalizedExplicitSpeechLocaleID(raw, automaticID: self.automaticID)
}
private static func normalizedLocaleID(_ raw: String?) -> String? {
TalkConfigParsing.normalizedSpeechLocaleID(raw)
}
private static func canonicalID(_ raw: String) -> String {
raw.replacingOccurrences(of: "_", with: "-")
}
private static func friendlyName(for locale: Locale) -> String {
let id = self.canonicalID(locale.identifier)
let cleanLocale = Locale(identifier: id)
if let langCode = cleanLocale.language.languageCode?.identifier,
let lang = cleanLocale.localizedString(forLanguageCode: langCode),
let regionCode = cleanLocale.region?.identifier,
let region = cleanLocale.localizedString(forRegionCode: regionCode)
{
return "\(lang) (\(region))"
}
if let langCode = cleanLocale.language.languageCode?.identifier,
let lang = cleanLocale.localizedString(forLanguageCode: langCode)
{
return lang
}
return cleanLocale.localizedString(forIdentifier: id) ?? id
}
}

View File

@@ -47,6 +47,16 @@ private let iOSSilenceTimeoutMs = 900
fallback: iOSSilenceTimeoutMs) == 1500)
}
@Test func readsConfiguredSpeechLocale() {
let talk: [String: Any] = [
"speechLocale": " ru-RU ",
]
#expect(
TalkConfigParsing.resolvedSpeechLocaleID(
TalkConfigParsing.bridgeFoundationDictionary(talk)) == "ru-RU")
}
@Test func defaultsSilenceTimeoutMsWhenMissing() {
#expect(TalkConfigParsing.resolvedSilenceTimeoutMs(nil, fallback: iOSSilenceTimeoutMs) == iOSSilenceTimeoutMs)
}

View File

@@ -0,0 +1,41 @@
import Foundation
import Testing
@testable import OpenClaw
@Suite struct TalkSpeechLocaleTests {
@Test func localSelectionOverridesGatewayConfig() {
let locale = TalkSpeechLocale.resolvedLocaleID(
localSelection: "de-DE",
gatewaySelection: "ru-RU",
deviceLocaleID: "en-US",
supportedLocaleIDs: ["de-DE", "ru-RU", "en-US"])
#expect(locale == "de-DE")
}
@Test func automaticLocalSelectionAllowsGatewayConfig() {
let locale = TalkSpeechLocale.resolvedLocaleID(
localSelection: TalkSpeechLocale.automaticID,
gatewaySelection: "ru_RU",
deviceLocaleID: "en-US",
supportedLocaleIDs: ["ru-RU", "en-US"])
#expect(locale == "ru-RU")
}
@Test func unsupportedConfiguredLocaleFallsBackToDeviceThenEnglish() {
let deviceLocale = TalkSpeechLocale.resolvedLocaleID(
localSelection: "zz-ZZ",
gatewaySelection: nil,
deviceLocaleID: "fr-FR",
supportedLocaleIDs: ["fr-FR", "en-US"])
let english = TalkSpeechLocale.resolvedLocaleID(
localSelection: "zz-ZZ",
gatewaySelection: nil,
deviceLocaleID: "yy-YY",
supportedLocaleIDs: ["en-US"])
#expect(deviceLocale == "fr-FR")
#expect(english == "en-US")
}
}

View File

@@ -11,6 +11,7 @@ struct TalkModeGatewayConfigState {
let outputFormat: String?
let interruptOnSpeech: Bool
let silenceTimeoutMs: Int
let speechLocaleID: String?
let apiKey: String?
let seamColorHex: String?
}
@@ -53,6 +54,7 @@ enum TalkModeGatewayConfigParser {
}
let outputFormat = activeConfig?["outputFormat"]?.stringValue
let interrupt = talk?["interruptOnSpeech"]?.boolValue
let speechLocaleID = TalkConfigParsing.resolvedSpeechLocaleID(talk)
let apiKey = activeConfig?["apiKey"]?.stringValue
let resolvedVoice: String? = if activeProvider == defaultProvider {
(voice?.trimmingCharacters(in: .whitespacesAndNewlines).isEmpty == false ? voice : nil) ??
@@ -78,6 +80,7 @@ enum TalkModeGatewayConfigParser {
outputFormat: outputFormat,
interruptOnSpeech: interrupt ?? true,
silenceTimeoutMs: silenceTimeoutMs,
speechLocaleID: speechLocaleID,
apiKey: resolvedApiKey,
seamColorHex: rawSeam.isEmpty ? nil : rawSeam)
}
@@ -104,6 +107,7 @@ enum TalkModeGatewayConfigParser {
outputFormat: nil,
interruptOnSpeech: true,
silenceTimeoutMs: defaultSilenceTimeoutMs,
speechLocaleID: nil,
apiKey: resolvedApiKey,
seamColorHex: nil)
}

View File

@@ -70,6 +70,7 @@ actor TalkModeRuntime {
private var defaultOutputFormat: String?
private var interruptOnSpeech: Bool = true
private var activeTalkProvider = TalkModeRuntime.defaultTalkProvider
private var speechLocaleID: String?
private var lastInterruptedAtSeconds: Double?
private var voiceAliases: [String: String] = [:]
private var lastSpokenText: String?
@@ -186,12 +187,23 @@ actor TalkModeRuntime {
self.recognitionGeneration &+= 1
let generation = self.recognitionGeneration
let locale = await MainActor.run { AppStateStore.shared.voiceWakeLocaleID }
self.recognizer = SFSpeechRecognizer(locale: Locale(identifier: locale))
let voiceWakeLocale = await MainActor.run { AppStateStore.shared.voiceWakeLocaleID }
let supportedLocaleIDs = Set(SFSpeechRecognizer.supportedLocales().map(\.identifier))
let localeID = TalkConfigParsing.resolvedSpeechRecognitionLocaleID(
preferredLocaleIDs: [
self.speechLocaleID,
voiceWakeLocale,
Locale.autoupdatingCurrent.identifier,
],
supportedLocaleIDs: supportedLocaleIDs)
self.recognizer = localeID
.map { SFSpeechRecognizer(locale: Locale(identifier: $0)) }
?? SFSpeechRecognizer()
guard let recognizer, recognizer.isAvailable else {
self.logger.error("talk recognizer unavailable")
return
}
self.logger.debug("talk recognizer locale=\(recognizer.locale.identifier, privacy: .public)")
let request = SFSpeechAudioBufferRecognitionRequest()
Self.configureRecognitionRequest(request)
@@ -1010,6 +1022,7 @@ extension TalkModeRuntime {
self.interruptOnSpeech = cfg.interruptOnSpeech
self.activeTalkProvider = cfg.activeProvider
self.silenceWindow = TimeInterval(cfg.silenceTimeoutMs) / 1000
self.speechLocaleID = cfg.speechLocaleID
self.apiKey = cfg.apiKey
let hasApiKey = (cfg.apiKey?.isEmpty == false)
let voiceLabel = (cfg.voiceId?.isEmpty == false) ? cfg.voiceId! : "none"
@@ -1021,7 +1034,8 @@ extension TalkModeRuntime {
"modelId=\(modelLabel, privacy: .public) " +
"apiKey=\(hasApiKey, privacy: .public) " +
"interrupt=\(cfg.interruptOnSpeech, privacy: .public) " +
"silenceTimeoutMs=\(cfg.silenceTimeoutMs, privacy: .public)")
"silenceTimeoutMs=\(cfg.silenceTimeoutMs, privacy: .public) " +
"speechLocale=\(cfg.speechLocaleID ?? "device", privacy: .public)")
}
static func selectTalkProviderConfig(

View File

@@ -25,10 +25,10 @@ struct TalkModeGatewayConfigTests {
"voiceId": "unused-voice",
],
],
"speechLocale": "ru-RU",
]),
],
issues: nil
)
issues: nil)
let parsed = TalkModeGatewayConfigParser.parse(
snapshot: snapshot,
@@ -37,12 +37,12 @@ struct TalkModeGatewayConfigTests {
defaultSilenceTimeoutMs: TalkDefaults.silenceTimeoutMs,
envVoice: "env-voice",
sagVoice: "sag-voice",
envApiKey: "env-key"
)
envApiKey: "env-key")
#expect(parsed.activeProvider == "mlx")
#expect(parsed.modelId == nil)
#expect(parsed.apiKey == nil)
#expect(parsed.voiceId == "unused-voice")
#expect(parsed.speechLocaleID == "ru-RU")
}
}

View File

@@ -56,6 +56,46 @@ public enum TalkConfigParsing {
self.resolvedPositiveInt(talk?["silenceTimeoutMs"], fallback: fallback)
}
public static func normalizedSpeechLocaleID(_ value: String?) -> String? {
let trimmed = (value ?? "").trimmingCharacters(in: .whitespacesAndNewlines)
return trimmed.isEmpty ? nil : trimmed.replacingOccurrences(of: "_", with: "-")
}
public static func resolvedSpeechLocaleID(
_ talk: [String: AnyCodable]?,
fallback: String? = nil
) -> String? {
self.normalizedSpeechLocaleID(talk?["speechLocale"]?.stringValue)
?? self.normalizedSpeechLocaleID(fallback)
}
public static func normalizedExplicitSpeechLocaleID(
_ value: String?,
automaticID: String = "auto"
) -> String? {
let normalized = self.normalizedSpeechLocaleID(value)
return normalized == automaticID ? nil : normalized
}
public static func resolvedSpeechRecognitionLocaleID(
preferredLocaleIDs: [String?],
fallbackLocaleID: String = "en-US",
supportedLocaleIDs: Set<String>
) -> String? {
let supported = Set(supportedLocaleIDs.compactMap(self.normalizedSpeechLocaleID))
var seen = Set<String>()
let candidates = (preferredLocaleIDs + [fallbackLocaleID])
.compactMap(self.normalizedSpeechLocaleID)
for candidate in candidates {
guard seen.insert(candidate).inserted else { continue }
if supported.isEmpty || supported.contains(candidate) {
return candidate
}
}
return nil
}
private static func normalizedTalkProviderID(_ raw: String?) -> String? {
let trimmed = (raw ?? "").trimmingCharacters(in: .whitespacesAndNewlines).lowercased()
return trimmed.isEmpty ? nil : trimmed

View File

@@ -116,4 +116,21 @@ struct TalkConfigParsingTests {
#expect(TalkConfigParsing.resolvedPositiveInt(AnyCodable(true), fallback: 700) == 700)
#expect(TalkConfigParsing.resolvedPositiveInt(AnyCodable("1500"), fallback: 700) == 700)
}
@Test func resolvesSpeechLocaleID() {
#expect(TalkConfigParsing.resolvedSpeechLocaleID(["speechLocale": AnyCodable(" ru_RU ")]) == "ru-RU")
#expect(TalkConfigParsing.resolvedSpeechLocaleID(["speechLocale": AnyCodable("")], fallback: "en-US") == "en-US")
}
@Test func resolvesSpeechRecognitionLocaleFromSupportedFallbacks() {
let locale = TalkConfigParsing.resolvedSpeechRecognitionLocaleID(
preferredLocaleIDs: ["zz-ZZ", "fr-FR"],
supportedLocaleIDs: ["fr-FR", "en-US"])
let fallback = TalkConfigParsing.resolvedSpeechRecognitionLocaleID(
preferredLocaleIDs: ["zz-ZZ", "yy-YY"],
supportedLocaleIDs: ["en-US"])
#expect(locale == "fr-FR")
#expect(fallback == "en-US")
}
}

View File

@@ -1,4 +1,4 @@
6ed33ef102e7c92816243bfabc3626222a679c3270c12ec5ea47b28b66204b3b config-baseline.json
f86cb4d57ec1f5fd75008be0ab86151194945eb013a47ab4bdeaddafd3780da7 config-baseline.core.json
9ac3d271f9bfa9611557f0b52e4d0a600693bdd1de75cc1bafc320fc4d4f0075 config-baseline.json
271fdf1d6652927e0fc160a6f25276bf6dccb8f1b27fab15e0fc2620e8cacab4 config-baseline.core.json
7cd9c908f066c143eab2a201efbc9640f483ab28bba92ddeca1d18cc2b528bc3 config-baseline.channel.json
7825b56a5b3fcdbe2e09ef8fe5d9f12ac3598435afebe20413051e45b0d1968e config-baseline.plugin.json

View File

@@ -1332,6 +1332,7 @@ Defaults for Talk mode (macOS/iOS/Android).
},
system: {},
},
speechLocale: "ru-RU",
silenceTimeoutMs: 1500,
interruptOnSpeech: true,
},
@@ -1346,6 +1347,7 @@ Defaults for Talk mode (macOS/iOS/Android).
- `providers.*.voiceAliases` lets Talk directives use friendly names.
- `providers.mlx.modelId` selects the Hugging Face repo used by the macOS local MLX helper. If omitted, macOS uses `mlx-community/Soprano-80M-bf16`.
- macOS MLX playback runs through the bundled `openclaw-mlx-tts` helper when present, or an executable on `PATH`; `OPENCLAW_MLX_TTS_BIN` overrides the helper path for development.
- `speechLocale` sets the BCP 47 locale id used by iOS/macOS Talk speech recognition. Leave unset to use the device default.
- `silenceTimeoutMs` controls how long Talk mode waits after user silence before it sends the transcript. Unset keeps the platform default pause window (`700 ms on macOS and Android, 900 ms on iOS`).
---

View File

@@ -43,6 +43,7 @@ Moved to a dedicated page — see
- `session.*` (session lifecycle, compaction, pruning)
- `messages.*` (message delivery, TTS, markdown rendering)
- `talk.*` (Talk mode)
- `talk.speechLocale`: optional BCP 47 locale id for Talk speech recognition on iOS/macOS
- `talk.silenceTimeoutMs`: when unset, Talk keeps the platform default pause window before sending the transcript (`700 ms on macOS and Android, 900 ms on iOS`)
## Tools and custom providers

View File

@@ -63,6 +63,7 @@ Supported keys:
},
system: {},
},
speechLocale: "ru-RU",
silenceTimeoutMs: 1500,
interruptOnSpeech: true,
},
@@ -78,6 +79,7 @@ Defaults:
- `providers.elevenlabs.modelId`: defaults to `eleven_v3` when unset.
- `providers.mlx.modelId`: defaults to `mlx-community/Soprano-80M-bf16` when unset.
- `providers.elevenlabs.apiKey`: falls back to `ELEVENLABS_API_KEY` (or gateway shell profile if available).
- `speechLocale`: optional BCP 47 locale id for on-device Talk speech recognition on iOS/macOS. Leave unset to use the device default.
- `outputFormat`: defaults to `pcm_44100` on macOS/iOS and `pcm_24000` on Android (set `mp3_*` to force MP3 streaming)
## macOS UI

View File

@@ -21075,6 +21075,12 @@ export const GENERATED_BASE_CONFIG_SCHEMA: BaseConfigSchemaResponse = {
description:
"Provider-specific Talk settings keyed by provider id. During migration, prefer this over legacy talk.* keys.",
},
speechLocale: {
type: "string",
title: "Talk Speech Locale",
description:
'BCP 47 locale id for Talk speech recognition on device nodes, for example "ru-RU". Leave unset to use each device default.',
},
interruptOnSpeech: {
type: "boolean",
title: "Talk Interrupt on Speech",
@@ -27273,6 +27279,11 @@ export const GENERATED_BASE_CONFIG_SCHEMA: BaseConfigSchemaResponse = {
help: "Enables automatic live-reload behavior for canvas assets during development workflows. Keep disabled in production-like environments where deterministic output is preferred.",
tags: ["reliability"],
},
"talk.speechLocale": {
label: "Talk Speech Locale",
help: 'BCP 47 locale id for Talk speech recognition on device nodes, for example "ru-RU". Leave unset to use each device default.',
tags: ["media"],
},
"talk.interruptOnSpeech": {
label: "Talk Interrupt on Speech",
help: "If true (default), stop assistant speech when the user starts speaking in Talk mode. Keep enabled for conversational turn-taking.",

View File

@@ -148,6 +148,8 @@ export const FIELD_HELP: Record<string, string> = {
"Provider-specific Talk settings keyed by provider id. During migration, prefer this over legacy talk.* keys.",
"talk.providers.*": "Provider-owned Talk config fields for the matching provider id.",
"talk.providers.*.apiKey": "Provider API key for Talk mode.", // pragma: allowlist secret
"talk.speechLocale":
'BCP 47 locale id for Talk speech recognition on device nodes, for example "ru-RU". Leave unset to use each device default.',
"talk.interruptOnSpeech":
"If true (default), stop assistant speech when the user starts speaking in Talk mode. Keep enabled for conversational turn-taking.",
"talk.silenceTimeoutMs": `Milliseconds of user silence before Talk mode finalizes and sends the current transcript. Leave unset to keep the platform default pause window (${describeTalkSilenceTimeoutDefaults()}).`,

View File

@@ -786,6 +786,7 @@ export const FIELD_LABELS: Record<string, string> = {
"canvasHost.port": "Canvas Host Port",
"canvasHost.liveReload": "Canvas Host Live Reload",
talk: "Talk",
"talk.speechLocale": "Talk Speech Locale",
"talk.interruptOnSpeech": "Talk Interrupt on Speech",
"talk.silenceTimeoutMs": "Talk Silence Timeout (ms)",
messages: "Messages",

View File

@@ -10,11 +10,13 @@ describe("talk normalization", () => {
modelId: "eleven_v3",
outputFormat: "pcm_44100",
apiKey: "secret-key", // pragma: allowlist secret
speechLocale: " ru-RU ",
interruptOnSpeech: false,
silenceTimeoutMs: 1500,
} as unknown as never);
expect(normalized).toEqual({
speechLocale: "ru-RU",
interruptOnSpeech: false,
silenceTimeoutMs: 1500,
});
@@ -77,6 +79,7 @@ describe("talk normalization", () => {
modelId: "acme-model",
},
},
speechLocale: "ru-RU",
interruptOnSpeech: true,
});
@@ -95,6 +98,7 @@ describe("talk normalization", () => {
modelId: "acme-model",
},
},
speechLocale: "ru-RU",
interruptOnSpeech: true,
});
});

View File

@@ -105,6 +105,10 @@ export function normalizeTalkSection(value: TalkConfig | undefined): TalkConfig
const source = value as Record<string, unknown>;
const normalized: TalkConfig = {};
const speechLocale = normalizeOptionalString(source.speechLocale);
if (speechLocale) {
normalized.speechLocale = speechLocale;
}
if (typeof source.interruptOnSpeech === "boolean") {
normalized.interruptOnSpeech = source.interruptOnSpeech;
}
@@ -172,6 +176,9 @@ export function buildTalkConfigResponse(value: unknown): TalkConfigResponse | un
if (typeof normalized?.silenceTimeoutMs === "number") {
payload.silenceTimeoutMs = normalized.silenceTimeoutMs;
}
if (typeof normalized?.speechLocale === "string") {
payload.speechLocale = normalized.speechLocale;
}
if (normalized?.providers && Object.keys(normalized.providers).length > 0) {
payload.providers = normalized.providers;
}

View File

@@ -67,6 +67,8 @@ export type TalkConfig = {
provider?: string;
/** Provider-specific Talk config keyed by provider id. */
providers?: Record<string, TalkProviderConfig>;
/** BCP 47 locale id used for Talk speech recognition on device nodes. */
speechLocale?: string;
/** Stop speaking when user starts talking (default: true). */
interruptOnSpeech?: boolean;
/** Milliseconds of user silence before Talk mode sends the transcript after a pause. */

View File

@@ -186,6 +186,7 @@ const TalkSchema = z
.object({
provider: z.string().optional(),
providers: z.record(z.string(), TalkProviderEntrySchema).optional(),
speechLocale: z.string().optional(),
interruptOnSpeech: z.boolean().optional(),
silenceTimeoutMs: z.number().int().positive().optional(),
})

View File

@@ -78,6 +78,7 @@ const TalkConfigSchema = Type.Object(
provider: Type.Optional(Type.String()),
providers: Type.Optional(Type.Record(Type.String(), TalkProviderConfigSchema)),
resolved: ResolvedTalkConfigSchema,
speechLocale: Type.Optional(Type.String()),
interruptOnSpeech: Type.Optional(Type.Boolean()),
silenceTimeoutMs: Type.Optional(Type.Integer({ minimum: 1 })),
},

View File

@@ -34,6 +34,7 @@ type TalkConfigPayload = {
provider?: string;
config?: { voiceId?: string; apiKey?: string | SecretRef };
};
speechLocale?: string;
silenceTimeoutMs?: number;
};
session?: { mainKey?: string };
@@ -144,6 +145,7 @@ function expectTalkConfig(
apiKey?: string | SecretRef;
providerApiKey?: string | SecretRef;
resolvedApiKey?: string | SecretRef;
speechLocale?: string;
silenceTimeoutMs?: number;
},
) {
@@ -162,6 +164,9 @@ function expectTalkConfig(
if ("resolvedApiKey" in expected) {
expect(talk?.resolved?.config?.apiKey).toEqual(expected.resolvedApiKey);
}
if ("speechLocale" in expected) {
expect(talk?.speechLocale).toBe(expected.speechLocale);
}
if ("silenceTimeoutMs" in expected) {
expect(talk?.silenceTimeoutMs).toBe(expected.silenceTimeoutMs);
}
@@ -179,6 +184,7 @@ describe("gateway talk.config", () => {
apiKey: "secret-key-abc", // pragma: allowlist secret
},
},
speechLocale: "ru-RU",
silenceTimeoutMs: 1500,
},
session: {
@@ -196,6 +202,7 @@ describe("gateway talk.config", () => {
provider: GENERIC_TALK_PROVIDER_ID,
voiceId: "voice-123",
apiKey: "__OPENCLAW_REDACTED__",
speechLocale: "ru-RU",
silenceTimeoutMs: 1500,
});
expect(res.payload?.config?.session?.mainKey).toBe("main-test");