fix(gateway): enforce localRoots containment on webchat audio embedding path [AI-assisted] (#67298)

* fix: address issue

* fix: address review feedback

* fix: address PR review feedback

* docs: add changelog entry for PR merge
This commit is contained in:
Pavan Kumar Gondhi
2026-04-15 22:54:06 +05:30
committed by GitHub
parent 7c6f2c0a5a
commit 6e58f1f9f5
4 changed files with 105 additions and 26 deletions

View File

@@ -8,6 +8,7 @@ Docs: https://docs.openclaw.ai
### Fixes
- fix(gateway): enforce localRoots containment on webchat audio embedding path [AI-assisted]. (#67298) Thanks @pgondhi987.
- fix(matrix): block DM pairing-store entries from authorizing room control commands [AI-assisted]. (#67294) Thanks @pgondhi987.
- Docker/build: verify `@matrix-org/matrix-sdk-crypto-nodejs` native bindings with `find` under `node_modules` instead of a hardcoded `.pnpm/...` path so pnpm v10+ virtual-store layouts no longer fail the image build. (#67143) thanks @ly85206559.
- Matrix/E2EE: keep startup bootstrap conservative for passwordless token-auth bots, still attempt the guarded repair pass without requiring `channels.matrix.password`, and document the remaining password-UIA limitation. (#66228) Thanks @SARAMALI15792.

View File

@@ -3,6 +3,7 @@ import os from "node:os";
import path from "node:path";
import { pathToFileURL } from "node:url";
import { afterEach, describe, expect, it, vi } from "vitest";
import { getDefaultLocalRoots } from "../../media/local-media-access.js";
import { buildWebchatAudioContentBlocksFromReplyPayloads } from "./chat-webchat-media.js";
describe("buildWebchatAudioContentBlocksFromReplyPayloads", () => {
@@ -15,12 +16,15 @@ describe("buildWebchatAudioContentBlocksFromReplyPayloads", () => {
tmpDir = undefined;
});
it("embeds a local audio file as a base64 gateway chat block", () => {
it("embeds a local audio file as a base64 gateway chat block when it is under localRoots", async () => {
tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), "openclaw-webchat-audio-"));
const audioPath = path.join(tmpDir, "clip.mp3");
fs.writeFileSync(audioPath, Buffer.from([0xff, 0xfb, 0x90, 0x00]));
const blocks = buildWebchatAudioContentBlocksFromReplyPayloads([{ mediaUrl: audioPath }]);
const blocks = await buildWebchatAudioContentBlocksFromReplyPayloads(
[{ mediaUrl: audioPath }],
{ localRoots: [tmpDir] },
);
expect(blocks).toHaveLength(1);
const block = blocks[0] as {
@@ -36,48 +40,90 @@ describe("buildWebchatAudioContentBlocksFromReplyPayloads", () => {
);
});
it("skips remote URLs", () => {
const blocks = buildWebchatAudioContentBlocksFromReplyPayloads([
it("skips remote URLs", async () => {
const blocks = await buildWebchatAudioContentBlocksFromReplyPayloads([
{ mediaUrl: "https://example.com/a.mp3" },
]);
expect(blocks).toHaveLength(0);
});
it("skips non-audio local files", () => {
it("skips non-audio local files", async () => {
tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), "openclaw-webchat-audio-"));
const imagePath = path.join(tmpDir, "clip.png");
fs.writeFileSync(imagePath, Buffer.from([0x89, 0x50, 0x4e, 0x47]));
const blocks = buildWebchatAudioContentBlocksFromReplyPayloads([{ mediaUrl: imagePath }]);
const blocks = await buildWebchatAudioContentBlocksFromReplyPayloads(
[{ mediaUrl: imagePath }],
{ localRoots: [tmpDir] },
);
expect(blocks).toHaveLength(0);
});
it("dedupes repeated paths", () => {
it("dedupes repeated paths", async () => {
tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), "openclaw-webchat-audio-"));
const audioPath = path.join(tmpDir, "clip.mp3");
fs.writeFileSync(audioPath, Buffer.from([0x00]));
const blocks = buildWebchatAudioContentBlocksFromReplyPayloads([
{ mediaUrl: audioPath },
{ mediaUrl: audioPath },
]);
const blocks = await buildWebchatAudioContentBlocksFromReplyPayloads(
[{ mediaUrl: audioPath }, { mediaUrl: audioPath }],
{ localRoots: [tmpDir] },
);
expect(blocks).toHaveLength(1);
});
it("embeds file:// URLs pointing at a local file", () => {
it("embeds file:// URLs pointing at a local file within localRoots", async () => {
tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), "openclaw-webchat-audio-"));
const audioPath = path.join(tmpDir, "clip.mp3");
fs.writeFileSync(audioPath, Buffer.from([0x01]));
const fileUrl = pathToFileURL(audioPath).href;
const blocks = buildWebchatAudioContentBlocksFromReplyPayloads([{ mediaUrl: fileUrl }]);
const blocks = await buildWebchatAudioContentBlocksFromReplyPayloads([{ mediaUrl: fileUrl }], {
localRoots: [tmpDir],
});
expect(blocks).toHaveLength(1);
expect((blocks[0] as { type?: string }).type).toBe("audio");
});
it("does not read file contents when stat reports size over the cap", () => {
it("rejects a local audio file outside configured localRoots", async () => {
tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), "openclaw-webchat-audio-"));
const allowedRoot = path.join(tmpDir, "allowed");
const outsideRoot = path.join(tmpDir, "outside");
fs.mkdirSync(allowedRoot, { recursive: true });
fs.mkdirSync(outsideRoot, { recursive: true });
const audioPath = path.join(outsideRoot, "clip.mp3");
fs.writeFileSync(audioPath, Buffer.from([0x03]));
const onLocalAudioAccessDenied = vi.fn();
const blocks = await buildWebchatAudioContentBlocksFromReplyPayloads(
[{ mediaUrl: audioPath }],
{
localRoots: [allowedRoot],
onLocalAudioAccessDenied,
},
);
expect(blocks).toHaveLength(0);
expect(onLocalAudioAccessDenied).toHaveBeenCalledOnce();
});
it("falls back to default localRoots when explicit roots are omitted", async () => {
const [defaultRoot] = getDefaultLocalRoots();
expect(defaultRoot).toBeTruthy();
fs.mkdirSync(defaultRoot, { recursive: true });
tmpDir = fs.mkdtempSync(path.join(defaultRoot, "openclaw-webchat-audio-default-"));
const audioPath = path.join(tmpDir, "clip.mp3");
fs.writeFileSync(audioPath, Buffer.from([0x04]));
const blocks = await buildWebchatAudioContentBlocksFromReplyPayloads([{ mediaUrl: audioPath }]);
expect(blocks).toHaveLength(1);
expect((blocks[0] as { type?: string }).type).toBe("audio");
});
it("does not read file contents when stat reports size over the cap", async () => {
tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), "openclaw-webchat-audio-"));
const audioPath = path.join(tmpDir, "huge.mp3");
fs.writeFileSync(audioPath, Buffer.from([0x02]));
@@ -91,7 +137,10 @@ describe("buildWebchatAudioContentBlocksFromReplyPayloads", () => {
});
const readSpy = vi.spyOn(fs, "readFileSync");
const blocks = buildWebchatAudioContentBlocksFromReplyPayloads([{ mediaUrl: audioPath }]);
const blocks = await buildWebchatAudioContentBlocksFromReplyPayloads(
[{ mediaUrl: audioPath }],
{ localRoots: [tmpDir] },
);
expect(blocks).toHaveLength(0);
expect(readSpy).not.toHaveBeenCalled();

View File

@@ -2,6 +2,7 @@ import fs from "node:fs";
import path from "node:path";
import { fileURLToPath } from "node:url";
import type { ReplyPayload } from "../../auto-reply/reply-payload.js";
import { assertLocalMediaAllowed, LocalMediaAccessError } from "../../media/local-media-access.js";
import { isAudioFileName } from "../../media/mime.js";
import { resolveSendableOutboundReplyParts } from "../../plugin-sdk/reply-payload.js";
import { normalizeLowercaseStringOrEmpty } from "../../shared/string-coerce.js";
@@ -20,6 +21,11 @@ const MIME_BY_EXT: Record<string, string> = {
".webm": "audio/webm",
};
type WebchatAudioEmbeddingOptions = {
localRoots?: readonly string[];
onLocalAudioAccessDenied?: (err: LocalMediaAccessError) => void;
};
/** Map `mediaUrl` strings to an absolute filesystem path for local embedding (plain paths or `file:` URLs). */
function resolveLocalMediaPathForEmbedding(raw: string): string | null {
const trimmed = raw.trim();
@@ -50,7 +56,10 @@ function resolveLocalMediaPathForEmbedding(raw: string): string | null {
}
/** Returns a readable local file path when it is a regular file and within the size cap (single stat before read). */
function resolveLocalAudioFileForEmbedding(raw: string): string | null {
async function resolveLocalAudioFileForEmbedding(
raw: string,
options: WebchatAudioEmbeddingOptions | undefined,
): Promise<string | null> {
const resolved = resolveLocalMediaPathForEmbedding(raw);
if (!resolved) {
return null;
@@ -59,12 +68,16 @@ function resolveLocalAudioFileForEmbedding(raw: string): string | null {
return null;
}
try {
await assertLocalMediaAllowed(resolved, options?.localRoots);
const st = fs.statSync(resolved);
if (!st.isFile() || st.size > MAX_WEBCHAT_AUDIO_BYTES) {
return null;
}
return resolved;
} catch {
} catch (err) {
if (err instanceof LocalMediaAccessError) {
options?.onLocalAudioAccessDenied?.(err);
}
return null;
}
}
@@ -78,9 +91,10 @@ function mimeTypeForPath(filePath: string): string {
* Build Control UI / transcript `content` blocks for local TTS (or other) audio files
* referenced by slash-command / agent replies when the webchat path only had text aggregation.
*/
export function buildWebchatAudioContentBlocksFromReplyPayloads(
export async function buildWebchatAudioContentBlocksFromReplyPayloads(
payloads: ReplyPayload[],
): Array<Record<string, unknown>> {
options?: WebchatAudioEmbeddingOptions,
): Promise<Array<Record<string, unknown>>> {
const seen = new Set<string>();
const blocks: Array<Record<string, unknown>> = [];
for (const payload of payloads) {
@@ -90,7 +104,7 @@ export function buildWebchatAudioContentBlocksFromReplyPayloads(
if (!url) {
continue;
}
const resolved = resolveLocalAudioFileForEmbedding(url);
const resolved = await resolveLocalAudioFileForEmbedding(url, options);
if (!resolved || seen.has(resolved)) {
continue;
}

View File

@@ -13,6 +13,7 @@ import type { MsgContext } from "../../auto-reply/templating.js";
import { extractCanvasFromText } from "../../chat/canvas-render.js";
import { resolveSessionFilePath } from "../../config/sessions.js";
import { jsonUtf8Bytes } from "../../infra/json-utf8-bytes.js";
import { getAgentScopedMediaLocalRoots } from "../../media/local-roots.js";
import { isAudioFileName } from "../../media/mime.js";
import type { PromptImageOrderEntry } from "../../media/prompt-image-order.js";
import { type SavedMedia, saveMediaBuffer } from "../../media/store.js";
@@ -121,10 +122,19 @@ function isMediaBearingPayload(payload: ReplyPayload): boolean {
return false;
}
function buildWebchatAudioOnlyAssistantMessage(
async function buildWebchatAudioOnlyAssistantMessage(
payloads: ReplyPayload[],
): { content: Array<Record<string, unknown>>; transcriptText: string } | null {
const audioBlocks = buildWebchatAudioContentBlocksFromReplyPayloads(payloads);
options?: {
localRoots?: readonly string[];
onLocalAudioAccessDenied?: (message: string) => void;
},
): Promise<{ content: Array<Record<string, unknown>>; transcriptText: string } | null> {
const audioBlocks = await buildWebchatAudioContentBlocksFromReplyPayloads(payloads, {
localRoots: options?.localRoots,
onLocalAudioAccessDenied: (err) => {
options?.onLocalAudioAccessDenied?.(formatForLog(err));
},
});
if (audioBlocks.length === 0) {
return null;
}
@@ -2075,11 +2085,16 @@ export const chatHandlers: GatewayRequestHandlers = {
savedImages: await persistedImagesPromise,
});
};
const appendWebchatAgentAudioTranscriptIfNeeded = (payload: ReplyPayload) => {
const appendWebchatAgentAudioTranscriptIfNeeded = async (payload: ReplyPayload) => {
if (!agentRunStarted || appendedWebchatAgentAudio || !isMediaBearingPayload(payload)) {
return;
}
const audioMessage = buildWebchatAudioOnlyAssistantMessage([payload]);
const audioMessage = await buildWebchatAudioOnlyAssistantMessage([payload], {
localRoots: getAgentScopedMediaLocalRoots(cfg, agentId),
onLocalAudioAccessDenied: (message) => {
context.logGateway.warn(`webchat audio embedding denied local path: ${message}`);
},
});
if (!audioMessage) {
return;
}
@@ -2113,7 +2128,7 @@ export const chatHandlers: GatewayRequestHandlers = {
case "block":
case "final":
deliveredReplies.push({ payload, kind: info.kind });
appendWebchatAgentAudioTranscriptIfNeeded(payload);
await appendWebchatAgentAudioTranscriptIfNeeded(payload);
break;
case "tool":
// Tool results that carry audio (e.g. the TTS tool) must be promoted