mirror of
https://fastgit.cc/github.com/openclaw/openclaw
synced 2026-04-30 22:12:32 +08:00
fix: refresh Google Meet speech retry readiness
This commit is contained in:
@@ -32,6 +32,7 @@ Docs: https://docs.openclaw.ai
|
||||
### Fixes
|
||||
|
||||
- CLI/status: resolve read-only channel setup runtime fallback from the packaged OpenClaw dist root, so `status --all`, `status --deep`, channel, and doctor paths do not crash when an external channel plugin needs setup metadata. Fixes #74693. Thanks @giangthb.
|
||||
- Google Meet: block managed Chrome intro/test speech until browser health proves the participant is in-call, and expose `speechReady` diagnostics so login, admission, permission, and audio-bridge blockers no longer look like successful speech. Refs #72478. Thanks @DougButdorf.
|
||||
- CLI/update: scope packaged Node compile caches by OpenClaw version and install metadata, so global installs no longer reuse stale compiled chunks after package updates. Thanks @pashpashpash.
|
||||
- Channels/Voice call: keep pre-auth webhook in-flight limiting active when socket remote address metadata is missing, so slow-body requests from stripped-IP proxy paths still share the fallback bucket. (#74453) Thanks @davidangularme.
|
||||
- Plugin SDK/testing: lazy-load TypeScript from the plugin test-contract runtime and add release checks for critical SDK contract entrypoint imports and bundle size, so published packages fail preflight before shipping ESM-incompatible or oversized contract helpers. Thanks @vincentkoc.
|
||||
|
||||
@@ -166,7 +166,10 @@ health such as `inCall`, `manualActionRequired`, `providerConnected`,
|
||||
timestamps, byte counters, and bridge closed state. If a safe Meet page prompt
|
||||
appears, browser automation handles it when it can. Login, host admission, and
|
||||
browser/OS permission prompts are reported as manual action with a reason and
|
||||
message for the agent to relay.
|
||||
message for the agent to relay. Managed Chrome sessions only emit the intro or
|
||||
test phrase after browser health reports `inCall: true`; otherwise status reports
|
||||
`speechReady: false` and the speech attempt is blocked instead of pretending the
|
||||
agent spoke into the meeting.
|
||||
|
||||
Local Chrome joins through the signed-in OpenClaw browser profile. Realtime mode
|
||||
requires `BlackHole 2ch` for the microphone/speaker path used by OpenClaw. For
|
||||
@@ -1006,6 +1009,9 @@ a session ended.
|
||||
- `manualActionRequired` / `manualActionReason` / `manualActionMessage`: the
|
||||
browser profile needs manual login, Meet host admission, permissions, or
|
||||
browser-control repair before speech can work
|
||||
- `speechReady` / `speechBlockedReason` / `speechBlockedMessage`: whether
|
||||
managed Chrome speech is allowed now. `speechReady: false` means OpenClaw did
|
||||
not send the intro/test phrase into the audio bridge.
|
||||
- `providerConnected` / `realtimeReady`: realtime voice bridge state
|
||||
- `lastInputAt` / `lastOutputAt`: last audio seen from or sent to the bridge
|
||||
|
||||
|
||||
@@ -1998,6 +1998,9 @@ describe("google-meet plugin", () => {
|
||||
details: {
|
||||
manualActionRequired?: boolean;
|
||||
manualActionReason?: string;
|
||||
speechReady?: boolean;
|
||||
speechBlockedReason?: string;
|
||||
spoken?: boolean;
|
||||
session?: { chrome?: { health?: { manualActionRequired?: boolean } } };
|
||||
};
|
||||
}>;
|
||||
@@ -2012,17 +2015,157 @@ describe("google-meet plugin", () => {
|
||||
expect(result.details).toMatchObject({
|
||||
manualActionRequired: true,
|
||||
manualActionReason: "google-login-required",
|
||||
spoken: false,
|
||||
speechReady: false,
|
||||
speechBlockedReason: "google-login-required",
|
||||
session: {
|
||||
chrome: {
|
||||
health: {
|
||||
manualActionRequired: true,
|
||||
manualActionReason: "google-login-required",
|
||||
speechReady: false,
|
||||
speechBlockedReason: "google-login-required",
|
||||
},
|
||||
},
|
||||
},
|
||||
});
|
||||
});
|
||||
|
||||
it("refreshes browser health before blocking an explicit speech retry", async () => {
|
||||
let openedTab = false;
|
||||
let browserReady = false;
|
||||
const { methods, nodesInvoke } = setup(
|
||||
{
|
||||
defaultTransport: "chrome-node",
|
||||
defaultMode: "realtime",
|
||||
},
|
||||
{
|
||||
nodesInvokeHandler: async ({ command, params }) => {
|
||||
const raw = params as { path?: string; body?: { url?: string; targetId?: string } };
|
||||
if (command === "browser.proxy") {
|
||||
if (raw.path === "/tabs") {
|
||||
return {
|
||||
payload: {
|
||||
result: {
|
||||
running: true,
|
||||
tabs: openedTab
|
||||
? [
|
||||
{
|
||||
targetId: "tab-1",
|
||||
title: "Meet",
|
||||
url: "https://meet.google.com/abc-defg-hij",
|
||||
},
|
||||
]
|
||||
: [],
|
||||
},
|
||||
},
|
||||
};
|
||||
}
|
||||
if (raw.path === "/tabs/open") {
|
||||
openedTab = true;
|
||||
return {
|
||||
payload: {
|
||||
result: {
|
||||
targetId: "tab-1",
|
||||
title: "Meet",
|
||||
url: raw.body?.url ?? "https://meet.google.com/abc-defg-hij",
|
||||
},
|
||||
},
|
||||
};
|
||||
}
|
||||
if (raw.path === "/tabs/focus" || raw.path === "/permissions/grant") {
|
||||
return { payload: { result: { ok: true } } };
|
||||
}
|
||||
if (raw.path === "/act") {
|
||||
return {
|
||||
payload: {
|
||||
result: {
|
||||
ok: true,
|
||||
targetId: raw.body?.targetId ?? "tab-1",
|
||||
result: JSON.stringify(
|
||||
browserReady
|
||||
? {
|
||||
inCall: true,
|
||||
micMuted: false,
|
||||
manualActionRequired: false,
|
||||
title: "Meet call",
|
||||
url: "https://meet.google.com/abc-defg-hij",
|
||||
}
|
||||
: {
|
||||
inCall: false,
|
||||
manualActionRequired: true,
|
||||
manualActionReason: "google-login-required",
|
||||
manualActionMessage:
|
||||
"Sign in to Google in the OpenClaw browser profile, then retry the Meet join.",
|
||||
title: "Sign in - Google Accounts",
|
||||
url: "https://accounts.google.com/signin",
|
||||
},
|
||||
),
|
||||
},
|
||||
},
|
||||
};
|
||||
}
|
||||
}
|
||||
if (command === "googlemeet.chrome") {
|
||||
return { payload: { launched: true } };
|
||||
}
|
||||
throw new Error(`unexpected invoke ${command}`);
|
||||
},
|
||||
},
|
||||
);
|
||||
|
||||
const join = (await invokeGoogleMeetGatewayMethodForTest(methods, "googlemeet.join", {
|
||||
url: "https://meet.google.com/abc-defg-hij",
|
||||
message: "Say exactly: hello.",
|
||||
})) as {
|
||||
session: { id: string; chrome?: { health?: { speechBlockedReason?: string } } };
|
||||
spoken: boolean;
|
||||
};
|
||||
expect(join.spoken).toBe(false);
|
||||
expect(join.session.chrome?.health?.speechBlockedReason).toBe("google-login-required");
|
||||
|
||||
browserReady = true;
|
||||
const retry = (await invokeGoogleMeetGatewayMethodForTest(methods, "googlemeet.speak", {
|
||||
sessionId: join.session.id,
|
||||
message: "Say exactly: hello again.",
|
||||
})) as {
|
||||
found: boolean;
|
||||
spoken: boolean;
|
||||
session?: {
|
||||
chrome?: {
|
||||
health?: {
|
||||
inCall?: boolean;
|
||||
manualActionRequired?: boolean;
|
||||
speechBlockedReason?: string;
|
||||
};
|
||||
};
|
||||
};
|
||||
};
|
||||
|
||||
expect(retry).toMatchObject({
|
||||
found: true,
|
||||
spoken: false,
|
||||
session: {
|
||||
chrome: {
|
||||
health: {
|
||||
inCall: true,
|
||||
manualActionRequired: false,
|
||||
speechBlockedReason: "audio-bridge-unavailable",
|
||||
},
|
||||
},
|
||||
},
|
||||
});
|
||||
expect(nodesInvoke).toHaveBeenCalledWith(
|
||||
expect.objectContaining({
|
||||
command: "browser.proxy",
|
||||
params: expect.objectContaining({
|
||||
path: "/tabs/focus",
|
||||
body: { targetId: "tab-1" },
|
||||
}),
|
||||
}),
|
||||
);
|
||||
});
|
||||
|
||||
it("explains when chrome-node has no capable paired node", async () => {
|
||||
const { tools } = setup(
|
||||
{
|
||||
|
||||
@@ -823,7 +823,7 @@ export default definePluginEntry({
|
||||
return;
|
||||
}
|
||||
const rt = await ensureRuntime();
|
||||
respond(true, rt.speak(sessionId, normalizeOptionalString(params?.message)));
|
||||
respond(true, await rt.speak(sessionId, normalizeOptionalString(params?.message)));
|
||||
} catch (err) {
|
||||
sendError(respond, err);
|
||||
}
|
||||
|
||||
@@ -268,6 +268,11 @@ function writeDoctorStatus(status: ReturnType<GoogleMeetRuntime["status"]>): voi
|
||||
writeStdoutLine("manual reason: %s", formatOptional(health.manualActionReason));
|
||||
writeStdoutLine("manual message: %s", formatOptional(health.manualActionMessage));
|
||||
}
|
||||
writeStdoutLine("speech ready: %s", formatBoolean(health?.speechReady));
|
||||
if (health?.speechReady === false) {
|
||||
writeStdoutLine("speech blocked reason: %s", formatOptional(health.speechBlockedReason));
|
||||
writeStdoutLine("speech blocked message: %s", formatOptional(health.speechBlockedMessage));
|
||||
}
|
||||
writeStdoutLine("provider connected: %s", formatBoolean(health?.providerConnected));
|
||||
writeStdoutLine("realtime ready: %s", formatBoolean(health?.realtimeReady));
|
||||
writeStdoutLine("audio input active: %s", formatBoolean(health?.audioInputActive));
|
||||
@@ -2017,12 +2022,15 @@ export function registerGoogleMeetCli(params: {
|
||||
.argument("[message]", "Realtime instructions to speak now")
|
||||
.action(async (sessionId: string, message?: string) => {
|
||||
const rt = await params.ensureRuntime();
|
||||
const result = rt.speak(sessionId, message);
|
||||
const result = await rt.speak(sessionId, message);
|
||||
if (!result.found) {
|
||||
throw new Error("session not found");
|
||||
}
|
||||
if (!result.spoken) {
|
||||
throw new Error("session has no active realtime audio bridge");
|
||||
throw new Error(
|
||||
result.session?.chrome?.health?.speechBlockedMessage ??
|
||||
"session has no active realtime audio bridge",
|
||||
);
|
||||
}
|
||||
writeStdoutLine("speaking on %s", sessionId);
|
||||
});
|
||||
|
||||
@@ -66,6 +66,66 @@ function sleep(ms: number): Promise<void> {
|
||||
return new Promise((resolve) => setTimeout(resolve, ms));
|
||||
}
|
||||
|
||||
function isManagedChromeBrowserSession(session: GoogleMeetSession): boolean {
|
||||
return Boolean(
|
||||
(session.transport === "chrome" || session.transport === "chrome-node") &&
|
||||
session.chrome &&
|
||||
session.chrome.launched,
|
||||
);
|
||||
}
|
||||
|
||||
function evaluateSpeechReadiness(session: GoogleMeetSession): {
|
||||
ready: boolean;
|
||||
reason?: NonNullable<GoogleMeetChromeHealth["speechBlockedReason"]>;
|
||||
message?: string;
|
||||
} {
|
||||
if (session.mode !== "realtime" || !session.chrome) {
|
||||
return { ready: true };
|
||||
}
|
||||
if (!isManagedChromeBrowserSession(session)) {
|
||||
if (session.chrome.audioBridge) {
|
||||
return { ready: true };
|
||||
}
|
||||
return {
|
||||
ready: false,
|
||||
reason: "audio-bridge-unavailable",
|
||||
message: "Realtime speech requires an active Chrome audio bridge.",
|
||||
};
|
||||
}
|
||||
const health = session.chrome.health;
|
||||
if (health?.manualActionRequired) {
|
||||
return {
|
||||
ready: false,
|
||||
reason: health.manualActionReason ?? "browser-unverified",
|
||||
message:
|
||||
health.manualActionMessage ??
|
||||
"Resolve the Google Meet browser prompt before asking OpenClaw to speak.",
|
||||
};
|
||||
}
|
||||
if (health?.inCall === true) {
|
||||
if (session.chrome.audioBridge) {
|
||||
return { ready: true };
|
||||
}
|
||||
return {
|
||||
ready: false,
|
||||
reason: "audio-bridge-unavailable",
|
||||
message: "Realtime speech requires an active Chrome audio bridge.",
|
||||
};
|
||||
}
|
||||
if (health?.inCall === false) {
|
||||
return {
|
||||
ready: false,
|
||||
reason: "not-in-call",
|
||||
message: "Google Meet has not reported that the browser participant is in the call.",
|
||||
};
|
||||
}
|
||||
return {
|
||||
ready: false,
|
||||
reason: "browser-unverified",
|
||||
message: "Google Meet browser state has not been verified yet.",
|
||||
};
|
||||
}
|
||||
|
||||
function collectChromeAudioCommands(config: GoogleMeetConfig): string[] {
|
||||
const commands = config.chrome.audioBridgeCommand
|
||||
? [config.chrome.audioBridgeCommand[0]]
|
||||
@@ -228,6 +288,7 @@ export class GoogleMeetRuntime {
|
||||
);
|
||||
const speechInstructions = request.message ?? this.params.config.realtime.introMessage;
|
||||
if (reusable) {
|
||||
await this.#refreshBrowserHealthForChromeSession(reusable);
|
||||
reusable.notes = [
|
||||
...reusable.notes.filter((note) => note !== "Reused existing active Meet session."),
|
||||
"Reused existing active Meet session.",
|
||||
@@ -235,7 +296,7 @@ export class GoogleMeetRuntime {
|
||||
reusable.updatedAt = nowIso();
|
||||
const spoken =
|
||||
mode === "realtime" && speechInstructions
|
||||
? this.speak(reusable.id, speechInstructions).spoken
|
||||
? (await this.speak(reusable.id, speechInstructions)).spoken
|
||||
: false;
|
||||
return { session: reusable, spoken };
|
||||
}
|
||||
@@ -320,6 +381,7 @@ export class GoogleMeetRuntime {
|
||||
? "Chrome transport joins as the signed-in Google profile and expects BlackHole 2ch audio routing."
|
||||
: "Chrome transport joins as the signed-in Google profile without starting the realtime audio bridge.",
|
||||
);
|
||||
this.#refreshSpeechReadiness(session);
|
||||
} else {
|
||||
const dialInNumber = normalizeDialInNumber(
|
||||
request.dialInNumber ?? this.params.config.twilio.defaultDialInNumber,
|
||||
@@ -367,7 +429,7 @@ export class GoogleMeetRuntime {
|
||||
this.#sessions.set(session.id, session);
|
||||
const spoken =
|
||||
mode === "realtime" && speechInstructions
|
||||
? this.speak(session.id, speechInstructions).spoken
|
||||
? (await this.speak(session.id, speechInstructions)).spoken
|
||||
: false;
|
||||
return { session, spoken };
|
||||
}
|
||||
@@ -389,18 +451,28 @@ export class GoogleMeetRuntime {
|
||||
return { found: true, session };
|
||||
}
|
||||
|
||||
speak(
|
||||
async speak(
|
||||
sessionId: string,
|
||||
instructions?: string,
|
||||
): { found: boolean; spoken: boolean; session?: GoogleMeetSession } {
|
||||
): Promise<{ found: boolean; spoken: boolean; session?: GoogleMeetSession }> {
|
||||
const session = this.#sessions.get(sessionId);
|
||||
if (!session) {
|
||||
return { found: false, spoken: false };
|
||||
}
|
||||
await this.#refreshBrowserHealthForChromeSession(session);
|
||||
const speak = this.#sessionSpeakers.get(sessionId);
|
||||
if (!speak || session.state !== "active") {
|
||||
return { found: true, spoken: false, session };
|
||||
}
|
||||
const readiness = this.#refreshSpeechReadiness(session);
|
||||
if (!readiness.ready) {
|
||||
const note = readiness.message
|
||||
? `Realtime speech blocked: ${readiness.message}`
|
||||
: "Realtime speech blocked until Google Meet is ready.";
|
||||
session.notes = [...session.notes.filter((item) => item !== note), note];
|
||||
session.updatedAt = nowIso();
|
||||
return { found: true, spoken: false, session };
|
||||
}
|
||||
speak(instructions || this.params.config.realtime.introMessage);
|
||||
session.updatedAt = nowIso();
|
||||
this.#refreshHealth(sessionId);
|
||||
@@ -416,6 +488,9 @@ export class GoogleMeetRuntime {
|
||||
spoken: boolean;
|
||||
speechOutputVerified: boolean;
|
||||
speechOutputTimedOut: boolean;
|
||||
speechReady?: boolean;
|
||||
speechBlockedReason?: GoogleMeetChromeHealth["speechBlockedReason"];
|
||||
speechBlockedMessage?: string;
|
||||
audioOutputActive?: boolean;
|
||||
lastOutputBytes?: number;
|
||||
session: GoogleMeetSession;
|
||||
@@ -470,12 +545,60 @@ export class GoogleMeetRuntime {
|
||||
spoken: result.spoken ?? false,
|
||||
speechOutputVerified,
|
||||
speechOutputTimedOut: shouldWaitForOutput && !speechOutputVerified,
|
||||
speechReady: health?.speechReady,
|
||||
speechBlockedReason: health?.speechBlockedReason,
|
||||
speechBlockedMessage: health?.speechBlockedMessage,
|
||||
audioOutputActive: health?.audioOutputActive,
|
||||
lastOutputBytes: health?.lastOutputBytes,
|
||||
session: result.session,
|
||||
};
|
||||
}
|
||||
|
||||
async #refreshBrowserHealthForChromeSession(session: GoogleMeetSession) {
|
||||
if (!isManagedChromeBrowserSession(session) || evaluateSpeechReadiness(session).ready) {
|
||||
this.#refreshSpeechReadiness(session);
|
||||
return;
|
||||
}
|
||||
try {
|
||||
const result =
|
||||
session.transport === "chrome-node"
|
||||
? await recoverCurrentMeetTabOnNode({
|
||||
runtime: this.params.runtime,
|
||||
config: this.params.config,
|
||||
url: session.url,
|
||||
})
|
||||
: await recoverCurrentMeetTab({
|
||||
config: this.params.config,
|
||||
url: session.url,
|
||||
});
|
||||
if (result.found && result.browser && session.chrome) {
|
||||
session.chrome.health = {
|
||||
...session.chrome.health,
|
||||
...result.browser,
|
||||
};
|
||||
session.updatedAt = nowIso();
|
||||
}
|
||||
} catch (error) {
|
||||
this.params.logger.debug?.(
|
||||
`[google-meet] browser readiness refresh ignored: ${formatErrorMessage(error)}`,
|
||||
);
|
||||
}
|
||||
this.#refreshSpeechReadiness(session);
|
||||
}
|
||||
|
||||
#refreshSpeechReadiness(session: GoogleMeetSession) {
|
||||
const readiness = evaluateSpeechReadiness(session);
|
||||
if (session.chrome) {
|
||||
session.chrome.health = {
|
||||
...session.chrome.health,
|
||||
speechReady: readiness.ready,
|
||||
speechBlockedReason: readiness.reason,
|
||||
speechBlockedMessage: readiness.message,
|
||||
};
|
||||
}
|
||||
return readiness;
|
||||
}
|
||||
|
||||
#refreshHealth(sessionId?: string) {
|
||||
const ids = sessionId ? [sessionId] : [...this.#sessionHealth.keys()];
|
||||
for (const id of ids) {
|
||||
@@ -488,6 +611,7 @@ export class GoogleMeetRuntime {
|
||||
...session.chrome.health,
|
||||
...getHealth(),
|
||||
};
|
||||
this.#refreshSpeechReadiness(session);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -19,12 +19,21 @@ export type GoogleMeetManualActionReason =
|
||||
| "meet-audio-choice-required"
|
||||
| "browser-control-unavailable";
|
||||
|
||||
export type GoogleMeetSpeechBlockedReason =
|
||||
| GoogleMeetManualActionReason
|
||||
| "not-in-call"
|
||||
| "browser-unverified"
|
||||
| "audio-bridge-unavailable";
|
||||
|
||||
export type GoogleMeetChromeHealth = {
|
||||
inCall?: boolean;
|
||||
micMuted?: boolean;
|
||||
manualActionRequired?: boolean;
|
||||
manualActionReason?: GoogleMeetManualActionReason;
|
||||
manualActionMessage?: string;
|
||||
speechReady?: boolean;
|
||||
speechBlockedReason?: GoogleMeetSpeechBlockedReason;
|
||||
speechBlockedMessage?: string;
|
||||
providerConnected?: boolean;
|
||||
realtimeReady?: boolean;
|
||||
audioInputActive?: boolean;
|
||||
|
||||
Reference in New Issue
Block a user