diff --git a/CHANGELOG.md b/CHANGELOG.md index 604e0a710cc..bd902366bf7 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -66,6 +66,7 @@ Docs: https://docs.openclaw.ai - Discord/subagents: preserve thread-bound completion delivery by keeping the requester-agent announce path primary and falling back to direct thread sends only when the announce produces no visible output. (#71064) Thanks @DolencLuka. - Browser/tool: give Chrome MCP existing-session manage calls a longer default timeout, pass explicit tool timeouts through tab management, and recover stale selected-page MCP sessions instead of forcing a manual reset. Thanks @steipete. +- Plugins/Voice Call: pin voice response sessions to `responseModel` before embedded agent runs, avoiding live-session model switch failures when the global default model differs. Fixes #60118. Thanks @xinbenlv. - Gateway/sessions: recover main-agent turns interrupted by a gateway restart from stale transcript-lock evidence, avoiding stuck `status: "running"` sessions without broad post-boot transcript scans. Fixes #70555. Thanks @bitloi. - Codex approvals: keep command approval responses within Codex app-server `availableDecisions`, including deny/cancel fallbacks for prompts that do not offer `decline`. (#71338) Thanks @Lucenx9. - Plugins/Google Meet: include live Chrome-node readiness in `googlemeet setup` and document the Parallels recovery checks, so stale node tokens or disconnected VM browsers are visible before an agent opens a meeting. Thanks @steipete. diff --git a/extensions/voice-call/src/response-generator.test.ts b/extensions/voice-call/src/response-generator.test.ts index 214b4f1a80c..07ad6eecba4 100644 --- a/extensions/voice-call/src/response-generator.test.ts +++ b/extensions/voice-call/src/response-generator.test.ts @@ -4,6 +4,8 @@ import type { CoreAgentDeps, CoreConfig } from "./core-bridge.js"; import { generateVoiceResponse } from "./response-generator.js"; function createAgentRuntime(payloads: Array>) { + const sessionStore: Record = {}; + const saveSessionStore = vi.fn(async () => {}); const runEmbeddedPiAgent = vi.fn(async () => ({ payloads, meta: { durationMs: 12, aborted: false }, @@ -23,13 +25,13 @@ function createAgentRuntime(payloads: Array>) { runEmbeddedPiAgent, session: { resolveStorePath: () => "/tmp/openclaw/sessions.json", - loadSessionStore: () => ({}), - saveSessionStore: async () => {}, + loadSessionStore: () => sessionStore, + saveSessionStore, resolveSessionFilePath: () => "/tmp/openclaw/sessions/session.jsonl", }, } as unknown as CoreAgentDeps; - return { runtime, runEmbeddedPiAgent }; + return { runtime, runEmbeddedPiAgent, saveSessionStore, sessionStore }; } function requireEmbeddedAgentArgs(runEmbeddedPiAgent: ReturnType) { @@ -126,4 +128,39 @@ describe("generateVoiceResponse", () => { expect(result.text).toBe("Absolutely. Tell me what you want to do next."); }); + + it("pins the voice session to responseModel before running the embedded agent", async () => { + const { runtime, runEmbeddedPiAgent, saveSessionStore, sessionStore } = createAgentRuntime([ + { text: '{"spoken":"Pinned model works."}' }, + ]); + const voiceConfig = VoiceCallConfigSchema.parse({ + responseModel: "openai/gpt-4.1-nano", + responseTimeoutMs: 5000, + }); + + const result = await generateVoiceResponse({ + voiceConfig, + coreConfig: {} as CoreConfig, + agentRuntime: runtime, + callId: "call-123", + from: "+15550001111", + transcript: [{ speaker: "user", text: "hello there" }], + userMessage: "hello there", + }); + + expect(result.text).toBe("Pinned model works."); + expect(sessionStore["voice:15550001111"]).toMatchObject({ + providerOverride: "openai", + modelOverride: "gpt-4.1-nano", + modelOverrideSource: "auto", + }); + expect(saveSessionStore).toHaveBeenCalledWith("/tmp/openclaw/sessions.json", sessionStore); + expect(runEmbeddedPiAgent).toHaveBeenCalledWith( + expect.objectContaining({ + provider: "openai", + model: "gpt-4.1-nano", + sessionKey: "voice:15550001111", + }), + ); + }); }); diff --git a/extensions/voice-call/src/response-generator.ts b/extensions/voice-call/src/response-generator.ts index 85e16891bea..f91d3184bd6 100644 --- a/extensions/voice-call/src/response-generator.ts +++ b/extensions/voice-call/src/response-generator.ts @@ -4,6 +4,7 @@ */ import crypto from "node:crypto"; +import { applyModelOverrideToSessionEntry } from "openclaw/plugin-sdk/config-runtime"; import { normalizeLowercaseStringOrEmpty } from "openclaw/plugin-sdk/text-runtime"; import type { SessionEntry } from "../api.js"; import type { VoiceCallConfig } from "./config.js"; @@ -202,6 +203,7 @@ export async function generateVoiceResponse( const sessionStore = agentRuntime.session.loadSessionStore(storePath); const now = Date.now(); let sessionEntry = sessionStore[sessionKey] as SessionEntry | undefined; + let sessionEntryUpdated = false; if (!sessionEntry) { sessionEntry = { @@ -209,16 +211,29 @@ export async function generateVoiceResponse( updatedAt: now, }; sessionStore[sessionKey] = sessionEntry; - await agentRuntime.session.saveSessionStore(storePath, sessionStore); + sessionEntryUpdated = true; } const sessionId = sessionEntry.sessionId; - const sessionFile = agentRuntime.session.resolveSessionFilePath(sessionId, sessionEntry, { - agentId, - }); // Resolve model from config const { provider, model } = resolveVoiceResponseModel({ voiceConfig, agentRuntime }); + if (voiceConfig.responseModel) { + sessionEntryUpdated = + applyModelOverrideToSessionEntry({ + entry: sessionEntry, + selection: { provider, model }, + selectionSource: "auto", + }).updated || sessionEntryUpdated; + } + + if (sessionEntryUpdated) { + await agentRuntime.session.saveSessionStore(storePath, sessionStore); + } + + const sessionFile = agentRuntime.session.resolveSessionFilePath(sessionId, sessionEntry, { + agentId, + }); // Resolve thinking level const thinkLevel = agentRuntime.resolveThinkingDefault({ cfg, provider, model });