fix: refresh Google Meet speech retry readiness

This commit is contained in:
Peter Steinberger
2026-04-30 02:45:44 +01:00
parent cab86dc325
commit f94d970cee
7 changed files with 299 additions and 8 deletions

View File

@@ -32,6 +32,7 @@ Docs: https://docs.openclaw.ai
### Fixes
- CLI/status: resolve read-only channel setup runtime fallback from the packaged OpenClaw dist root, so `status --all`, `status --deep`, channel, and doctor paths do not crash when an external channel plugin needs setup metadata. Fixes #74693. Thanks @giangthb.
- Google Meet: block managed Chrome intro/test speech until browser health proves the participant is in-call, and expose `speechReady` diagnostics so login, admission, permission, and audio-bridge blockers no longer look like successful speech. Refs #72478. Thanks @DougButdorf.
- CLI/update: scope packaged Node compile caches by OpenClaw version and install metadata, so global installs no longer reuse stale compiled chunks after package updates. Thanks @pashpashpash.
- Channels/Voice call: keep pre-auth webhook in-flight limiting active when socket remote address metadata is missing, so slow-body requests from stripped-IP proxy paths still share the fallback bucket. (#74453) Thanks @davidangularme.
- Plugin SDK/testing: lazy-load TypeScript from the plugin test-contract runtime and add release checks for critical SDK contract entrypoint imports and bundle size, so published packages fail preflight before shipping ESM-incompatible or oversized contract helpers. Thanks @vincentkoc.

View File

@@ -166,7 +166,10 @@ health such as `inCall`, `manualActionRequired`, `providerConnected`,
timestamps, byte counters, and bridge closed state. If a safe Meet page prompt
appears, browser automation handles it when it can. Login, host admission, and
browser/OS permission prompts are reported as manual action with a reason and
message for the agent to relay.
message for the agent to relay. Managed Chrome sessions only emit the intro or
test phrase after browser health reports `inCall: true`; otherwise status reports
`speechReady: false` and the speech attempt is blocked instead of pretending the
agent spoke into the meeting.
Local Chrome joins through the signed-in OpenClaw browser profile. Realtime mode
requires `BlackHole 2ch` for the microphone/speaker path used by OpenClaw. For
@@ -1006,6 +1009,9 @@ a session ended.
- `manualActionRequired` / `manualActionReason` / `manualActionMessage`: the
browser profile needs manual login, Meet host admission, permissions, or
browser-control repair before speech can work
- `speechReady` / `speechBlockedReason` / `speechBlockedMessage`: whether
managed Chrome speech is allowed now. `speechReady: false` means OpenClaw did
not send the intro/test phrase into the audio bridge.
- `providerConnected` / `realtimeReady`: realtime voice bridge state
- `lastInputAt` / `lastOutputAt`: last audio seen from or sent to the bridge

View File

@@ -1998,6 +1998,9 @@ describe("google-meet plugin", () => {
details: {
manualActionRequired?: boolean;
manualActionReason?: string;
speechReady?: boolean;
speechBlockedReason?: string;
spoken?: boolean;
session?: { chrome?: { health?: { manualActionRequired?: boolean } } };
};
}>;
@@ -2012,17 +2015,157 @@ describe("google-meet plugin", () => {
expect(result.details).toMatchObject({
manualActionRequired: true,
manualActionReason: "google-login-required",
spoken: false,
speechReady: false,
speechBlockedReason: "google-login-required",
session: {
chrome: {
health: {
manualActionRequired: true,
manualActionReason: "google-login-required",
speechReady: false,
speechBlockedReason: "google-login-required",
},
},
},
});
});
it("refreshes browser health before blocking an explicit speech retry", async () => {
let openedTab = false;
let browserReady = false;
const { methods, nodesInvoke } = setup(
{
defaultTransport: "chrome-node",
defaultMode: "realtime",
},
{
nodesInvokeHandler: async ({ command, params }) => {
const raw = params as { path?: string; body?: { url?: string; targetId?: string } };
if (command === "browser.proxy") {
if (raw.path === "/tabs") {
return {
payload: {
result: {
running: true,
tabs: openedTab
? [
{
targetId: "tab-1",
title: "Meet",
url: "https://meet.google.com/abc-defg-hij",
},
]
: [],
},
},
};
}
if (raw.path === "/tabs/open") {
openedTab = true;
return {
payload: {
result: {
targetId: "tab-1",
title: "Meet",
url: raw.body?.url ?? "https://meet.google.com/abc-defg-hij",
},
},
};
}
if (raw.path === "/tabs/focus" || raw.path === "/permissions/grant") {
return { payload: { result: { ok: true } } };
}
if (raw.path === "/act") {
return {
payload: {
result: {
ok: true,
targetId: raw.body?.targetId ?? "tab-1",
result: JSON.stringify(
browserReady
? {
inCall: true,
micMuted: false,
manualActionRequired: false,
title: "Meet call",
url: "https://meet.google.com/abc-defg-hij",
}
: {
inCall: false,
manualActionRequired: true,
manualActionReason: "google-login-required",
manualActionMessage:
"Sign in to Google in the OpenClaw browser profile, then retry the Meet join.",
title: "Sign in - Google Accounts",
url: "https://accounts.google.com/signin",
},
),
},
},
};
}
}
if (command === "googlemeet.chrome") {
return { payload: { launched: true } };
}
throw new Error(`unexpected invoke ${command}`);
},
},
);
const join = (await invokeGoogleMeetGatewayMethodForTest(methods, "googlemeet.join", {
url: "https://meet.google.com/abc-defg-hij",
message: "Say exactly: hello.",
})) as {
session: { id: string; chrome?: { health?: { speechBlockedReason?: string } } };
spoken: boolean;
};
expect(join.spoken).toBe(false);
expect(join.session.chrome?.health?.speechBlockedReason).toBe("google-login-required");
browserReady = true;
const retry = (await invokeGoogleMeetGatewayMethodForTest(methods, "googlemeet.speak", {
sessionId: join.session.id,
message: "Say exactly: hello again.",
})) as {
found: boolean;
spoken: boolean;
session?: {
chrome?: {
health?: {
inCall?: boolean;
manualActionRequired?: boolean;
speechBlockedReason?: string;
};
};
};
};
expect(retry).toMatchObject({
found: true,
spoken: false,
session: {
chrome: {
health: {
inCall: true,
manualActionRequired: false,
speechBlockedReason: "audio-bridge-unavailable",
},
},
},
});
expect(nodesInvoke).toHaveBeenCalledWith(
expect.objectContaining({
command: "browser.proxy",
params: expect.objectContaining({
path: "/tabs/focus",
body: { targetId: "tab-1" },
}),
}),
);
});
it("explains when chrome-node has no capable paired node", async () => {
const { tools } = setup(
{

View File

@@ -823,7 +823,7 @@ export default definePluginEntry({
return;
}
const rt = await ensureRuntime();
respond(true, rt.speak(sessionId, normalizeOptionalString(params?.message)));
respond(true, await rt.speak(sessionId, normalizeOptionalString(params?.message)));
} catch (err) {
sendError(respond, err);
}

View File

@@ -268,6 +268,11 @@ function writeDoctorStatus(status: ReturnType<GoogleMeetRuntime["status"]>): voi
writeStdoutLine("manual reason: %s", formatOptional(health.manualActionReason));
writeStdoutLine("manual message: %s", formatOptional(health.manualActionMessage));
}
writeStdoutLine("speech ready: %s", formatBoolean(health?.speechReady));
if (health?.speechReady === false) {
writeStdoutLine("speech blocked reason: %s", formatOptional(health.speechBlockedReason));
writeStdoutLine("speech blocked message: %s", formatOptional(health.speechBlockedMessage));
}
writeStdoutLine("provider connected: %s", formatBoolean(health?.providerConnected));
writeStdoutLine("realtime ready: %s", formatBoolean(health?.realtimeReady));
writeStdoutLine("audio input active: %s", formatBoolean(health?.audioInputActive));
@@ -2017,12 +2022,15 @@ export function registerGoogleMeetCli(params: {
.argument("[message]", "Realtime instructions to speak now")
.action(async (sessionId: string, message?: string) => {
const rt = await params.ensureRuntime();
const result = rt.speak(sessionId, message);
const result = await rt.speak(sessionId, message);
if (!result.found) {
throw new Error("session not found");
}
if (!result.spoken) {
throw new Error("session has no active realtime audio bridge");
throw new Error(
result.session?.chrome?.health?.speechBlockedMessage ??
"session has no active realtime audio bridge",
);
}
writeStdoutLine("speaking on %s", sessionId);
});

View File

@@ -66,6 +66,66 @@ function sleep(ms: number): Promise<void> {
return new Promise((resolve) => setTimeout(resolve, ms));
}
function isManagedChromeBrowserSession(session: GoogleMeetSession): boolean {
return Boolean(
(session.transport === "chrome" || session.transport === "chrome-node") &&
session.chrome &&
session.chrome.launched,
);
}
function evaluateSpeechReadiness(session: GoogleMeetSession): {
ready: boolean;
reason?: NonNullable<GoogleMeetChromeHealth["speechBlockedReason"]>;
message?: string;
} {
if (session.mode !== "realtime" || !session.chrome) {
return { ready: true };
}
if (!isManagedChromeBrowserSession(session)) {
if (session.chrome.audioBridge) {
return { ready: true };
}
return {
ready: false,
reason: "audio-bridge-unavailable",
message: "Realtime speech requires an active Chrome audio bridge.",
};
}
const health = session.chrome.health;
if (health?.manualActionRequired) {
return {
ready: false,
reason: health.manualActionReason ?? "browser-unverified",
message:
health.manualActionMessage ??
"Resolve the Google Meet browser prompt before asking OpenClaw to speak.",
};
}
if (health?.inCall === true) {
if (session.chrome.audioBridge) {
return { ready: true };
}
return {
ready: false,
reason: "audio-bridge-unavailable",
message: "Realtime speech requires an active Chrome audio bridge.",
};
}
if (health?.inCall === false) {
return {
ready: false,
reason: "not-in-call",
message: "Google Meet has not reported that the browser participant is in the call.",
};
}
return {
ready: false,
reason: "browser-unverified",
message: "Google Meet browser state has not been verified yet.",
};
}
function collectChromeAudioCommands(config: GoogleMeetConfig): string[] {
const commands = config.chrome.audioBridgeCommand
? [config.chrome.audioBridgeCommand[0]]
@@ -228,6 +288,7 @@ export class GoogleMeetRuntime {
);
const speechInstructions = request.message ?? this.params.config.realtime.introMessage;
if (reusable) {
await this.#refreshBrowserHealthForChromeSession(reusable);
reusable.notes = [
...reusable.notes.filter((note) => note !== "Reused existing active Meet session."),
"Reused existing active Meet session.",
@@ -235,7 +296,7 @@ export class GoogleMeetRuntime {
reusable.updatedAt = nowIso();
const spoken =
mode === "realtime" && speechInstructions
? this.speak(reusable.id, speechInstructions).spoken
? (await this.speak(reusable.id, speechInstructions)).spoken
: false;
return { session: reusable, spoken };
}
@@ -320,6 +381,7 @@ export class GoogleMeetRuntime {
? "Chrome transport joins as the signed-in Google profile and expects BlackHole 2ch audio routing."
: "Chrome transport joins as the signed-in Google profile without starting the realtime audio bridge.",
);
this.#refreshSpeechReadiness(session);
} else {
const dialInNumber = normalizeDialInNumber(
request.dialInNumber ?? this.params.config.twilio.defaultDialInNumber,
@@ -367,7 +429,7 @@ export class GoogleMeetRuntime {
this.#sessions.set(session.id, session);
const spoken =
mode === "realtime" && speechInstructions
? this.speak(session.id, speechInstructions).spoken
? (await this.speak(session.id, speechInstructions)).spoken
: false;
return { session, spoken };
}
@@ -389,18 +451,28 @@ export class GoogleMeetRuntime {
return { found: true, session };
}
speak(
async speak(
sessionId: string,
instructions?: string,
): { found: boolean; spoken: boolean; session?: GoogleMeetSession } {
): Promise<{ found: boolean; spoken: boolean; session?: GoogleMeetSession }> {
const session = this.#sessions.get(sessionId);
if (!session) {
return { found: false, spoken: false };
}
await this.#refreshBrowserHealthForChromeSession(session);
const speak = this.#sessionSpeakers.get(sessionId);
if (!speak || session.state !== "active") {
return { found: true, spoken: false, session };
}
const readiness = this.#refreshSpeechReadiness(session);
if (!readiness.ready) {
const note = readiness.message
? `Realtime speech blocked: ${readiness.message}`
: "Realtime speech blocked until Google Meet is ready.";
session.notes = [...session.notes.filter((item) => item !== note), note];
session.updatedAt = nowIso();
return { found: true, spoken: false, session };
}
speak(instructions || this.params.config.realtime.introMessage);
session.updatedAt = nowIso();
this.#refreshHealth(sessionId);
@@ -416,6 +488,9 @@ export class GoogleMeetRuntime {
spoken: boolean;
speechOutputVerified: boolean;
speechOutputTimedOut: boolean;
speechReady?: boolean;
speechBlockedReason?: GoogleMeetChromeHealth["speechBlockedReason"];
speechBlockedMessage?: string;
audioOutputActive?: boolean;
lastOutputBytes?: number;
session: GoogleMeetSession;
@@ -470,12 +545,60 @@ export class GoogleMeetRuntime {
spoken: result.spoken ?? false,
speechOutputVerified,
speechOutputTimedOut: shouldWaitForOutput && !speechOutputVerified,
speechReady: health?.speechReady,
speechBlockedReason: health?.speechBlockedReason,
speechBlockedMessage: health?.speechBlockedMessage,
audioOutputActive: health?.audioOutputActive,
lastOutputBytes: health?.lastOutputBytes,
session: result.session,
};
}
async #refreshBrowserHealthForChromeSession(session: GoogleMeetSession) {
if (!isManagedChromeBrowserSession(session) || evaluateSpeechReadiness(session).ready) {
this.#refreshSpeechReadiness(session);
return;
}
try {
const result =
session.transport === "chrome-node"
? await recoverCurrentMeetTabOnNode({
runtime: this.params.runtime,
config: this.params.config,
url: session.url,
})
: await recoverCurrentMeetTab({
config: this.params.config,
url: session.url,
});
if (result.found && result.browser && session.chrome) {
session.chrome.health = {
...session.chrome.health,
...result.browser,
};
session.updatedAt = nowIso();
}
} catch (error) {
this.params.logger.debug?.(
`[google-meet] browser readiness refresh ignored: ${formatErrorMessage(error)}`,
);
}
this.#refreshSpeechReadiness(session);
}
#refreshSpeechReadiness(session: GoogleMeetSession) {
const readiness = evaluateSpeechReadiness(session);
if (session.chrome) {
session.chrome.health = {
...session.chrome.health,
speechReady: readiness.ready,
speechBlockedReason: readiness.reason,
speechBlockedMessage: readiness.message,
};
}
return readiness;
}
#refreshHealth(sessionId?: string) {
const ids = sessionId ? [sessionId] : [...this.#sessionHealth.keys()];
for (const id of ids) {
@@ -488,6 +611,7 @@ export class GoogleMeetRuntime {
...session.chrome.health,
...getHealth(),
};
this.#refreshSpeechReadiness(session);
}
}
}

View File

@@ -19,12 +19,21 @@ export type GoogleMeetManualActionReason =
| "meet-audio-choice-required"
| "browser-control-unavailable";
export type GoogleMeetSpeechBlockedReason =
| GoogleMeetManualActionReason
| "not-in-call"
| "browser-unverified"
| "audio-bridge-unavailable";
export type GoogleMeetChromeHealth = {
inCall?: boolean;
micMuted?: boolean;
manualActionRequired?: boolean;
manualActionReason?: GoogleMeetManualActionReason;
manualActionMessage?: string;
speechReady?: boolean;
speechBlockedReason?: GoogleMeetSpeechBlockedReason;
speechBlockedMessage?: string;
providerConnected?: boolean;
realtimeReady?: boolean;
audioInputActive?: boolean;