fix(voice-call): pin response model sessions

This commit is contained in:
Peter Steinberger
2026-04-25 02:52:19 +01:00
parent 5d4931cc3f
commit a5ab488691
3 changed files with 60 additions and 7 deletions

View File

@@ -66,6 +66,7 @@ Docs: https://docs.openclaw.ai
- Discord/subagents: preserve thread-bound completion delivery by keeping the requester-agent announce path primary and falling back to direct thread sends only when the announce produces no visible output. (#71064) Thanks @DolencLuka.
- Browser/tool: give Chrome MCP existing-session manage calls a longer default timeout, pass explicit tool timeouts through tab management, and recover stale selected-page MCP sessions instead of forcing a manual reset. Thanks @steipete.
- Plugins/Voice Call: pin voice response sessions to `responseModel` before embedded agent runs, avoiding live-session model switch failures when the global default model differs. Fixes #60118. Thanks @xinbenlv.
- Gateway/sessions: recover main-agent turns interrupted by a gateway restart from stale transcript-lock evidence, avoiding stuck `status: "running"` sessions without broad post-boot transcript scans. Fixes #70555. Thanks @bitloi.
- Codex approvals: keep command approval responses within Codex app-server `availableDecisions`, including deny/cancel fallbacks for prompts that do not offer `decline`. (#71338) Thanks @Lucenx9.
- Plugins/Google Meet: include live Chrome-node readiness in `googlemeet setup` and document the Parallels recovery checks, so stale node tokens or disconnected VM browsers are visible before an agent opens a meeting. Thanks @steipete.

View File

@@ -4,6 +4,8 @@ import type { CoreAgentDeps, CoreConfig } from "./core-bridge.js";
import { generateVoiceResponse } from "./response-generator.js";
function createAgentRuntime(payloads: Array<Record<string, unknown>>) {
const sessionStore: Record<string, { sessionId: string; updatedAt: number }> = {};
const saveSessionStore = vi.fn(async () => {});
const runEmbeddedPiAgent = vi.fn(async () => ({
payloads,
meta: { durationMs: 12, aborted: false },
@@ -23,13 +25,13 @@ function createAgentRuntime(payloads: Array<Record<string, unknown>>) {
runEmbeddedPiAgent,
session: {
resolveStorePath: () => "/tmp/openclaw/sessions.json",
loadSessionStore: () => ({}),
saveSessionStore: async () => {},
loadSessionStore: () => sessionStore,
saveSessionStore,
resolveSessionFilePath: () => "/tmp/openclaw/sessions/session.jsonl",
},
} as unknown as CoreAgentDeps;
return { runtime, runEmbeddedPiAgent };
return { runtime, runEmbeddedPiAgent, saveSessionStore, sessionStore };
}
function requireEmbeddedAgentArgs(runEmbeddedPiAgent: ReturnType<typeof vi.fn>) {
@@ -126,4 +128,39 @@ describe("generateVoiceResponse", () => {
expect(result.text).toBe("Absolutely. Tell me what you want to do next.");
});
it("pins the voice session to responseModel before running the embedded agent", async () => {
const { runtime, runEmbeddedPiAgent, saveSessionStore, sessionStore } = createAgentRuntime([
{ text: '{"spoken":"Pinned model works."}' },
]);
const voiceConfig = VoiceCallConfigSchema.parse({
responseModel: "openai/gpt-4.1-nano",
responseTimeoutMs: 5000,
});
const result = await generateVoiceResponse({
voiceConfig,
coreConfig: {} as CoreConfig,
agentRuntime: runtime,
callId: "call-123",
from: "+15550001111",
transcript: [{ speaker: "user", text: "hello there" }],
userMessage: "hello there",
});
expect(result.text).toBe("Pinned model works.");
expect(sessionStore["voice:15550001111"]).toMatchObject({
providerOverride: "openai",
modelOverride: "gpt-4.1-nano",
modelOverrideSource: "auto",
});
expect(saveSessionStore).toHaveBeenCalledWith("/tmp/openclaw/sessions.json", sessionStore);
expect(runEmbeddedPiAgent).toHaveBeenCalledWith(
expect.objectContaining({
provider: "openai",
model: "gpt-4.1-nano",
sessionKey: "voice:15550001111",
}),
);
});
});

View File

@@ -4,6 +4,7 @@
*/
import crypto from "node:crypto";
import { applyModelOverrideToSessionEntry } from "openclaw/plugin-sdk/config-runtime";
import { normalizeLowercaseStringOrEmpty } from "openclaw/plugin-sdk/text-runtime";
import type { SessionEntry } from "../api.js";
import type { VoiceCallConfig } from "./config.js";
@@ -202,6 +203,7 @@ export async function generateVoiceResponse(
const sessionStore = agentRuntime.session.loadSessionStore(storePath);
const now = Date.now();
let sessionEntry = sessionStore[sessionKey] as SessionEntry | undefined;
let sessionEntryUpdated = false;
if (!sessionEntry) {
sessionEntry = {
@@ -209,16 +211,29 @@ export async function generateVoiceResponse(
updatedAt: now,
};
sessionStore[sessionKey] = sessionEntry;
await agentRuntime.session.saveSessionStore(storePath, sessionStore);
sessionEntryUpdated = true;
}
const sessionId = sessionEntry.sessionId;
const sessionFile = agentRuntime.session.resolveSessionFilePath(sessionId, sessionEntry, {
agentId,
});
// Resolve model from config
const { provider, model } = resolveVoiceResponseModel({ voiceConfig, agentRuntime });
if (voiceConfig.responseModel) {
sessionEntryUpdated =
applyModelOverrideToSessionEntry({
entry: sessionEntry,
selection: { provider, model },
selectionSource: "auto",
}).updated || sessionEntryUpdated;
}
if (sessionEntryUpdated) {
await agentRuntime.session.saveSessionStore(storePath, sessionStore);
}
const sessionFile = agentRuntime.session.resolveSessionFilePath(sessionId, sessionEntry, {
agentId,
});
// Resolve thinking level
const thinkLevel = agentRuntime.resolveThinkingDefault({ cfg, provider, model });