From 6e58f1f9f54bca1fea1268ec0ee4c01a2af03dde Mon Sep 17 00:00:00 2001 From: Pavan Kumar Gondhi Date: Wed, 15 Apr 2026 22:54:06 +0530 Subject: [PATCH] fix(gateway): enforce localRoots containment on webchat audio embedding path [AI-assisted] (#67298) * fix: address issue * fix: address review feedback * fix: address PR review feedback * docs: add changelog entry for PR merge --- CHANGELOG.md | 1 + .../server-methods/chat-webchat-media.test.ts | 79 +++++++++++++++---- .../server-methods/chat-webchat-media.ts | 24 ++++-- src/gateway/server-methods/chat.ts | 27 +++++-- 4 files changed, 105 insertions(+), 26 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 045d9cce59d..8dce89f66ef 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -8,6 +8,7 @@ Docs: https://docs.openclaw.ai ### Fixes +- fix(gateway): enforce localRoots containment on webchat audio embedding path [AI-assisted]. (#67298) Thanks @pgondhi987. - fix(matrix): block DM pairing-store entries from authorizing room control commands [AI-assisted]. (#67294) Thanks @pgondhi987. - Docker/build: verify `@matrix-org/matrix-sdk-crypto-nodejs` native bindings with `find` under `node_modules` instead of a hardcoded `.pnpm/...` path so pnpm v10+ virtual-store layouts no longer fail the image build. (#67143) thanks @ly85206559. - Matrix/E2EE: keep startup bootstrap conservative for passwordless token-auth bots, still attempt the guarded repair pass without requiring `channels.matrix.password`, and document the remaining password-UIA limitation. (#66228) Thanks @SARAMALI15792. diff --git a/src/gateway/server-methods/chat-webchat-media.test.ts b/src/gateway/server-methods/chat-webchat-media.test.ts index 29698942e73..747ba96beab 100644 --- a/src/gateway/server-methods/chat-webchat-media.test.ts +++ b/src/gateway/server-methods/chat-webchat-media.test.ts @@ -3,6 +3,7 @@ import os from "node:os"; import path from "node:path"; import { pathToFileURL } from "node:url"; import { afterEach, describe, expect, it, vi } from "vitest"; +import { getDefaultLocalRoots } from "../../media/local-media-access.js"; import { buildWebchatAudioContentBlocksFromReplyPayloads } from "./chat-webchat-media.js"; describe("buildWebchatAudioContentBlocksFromReplyPayloads", () => { @@ -15,12 +16,15 @@ describe("buildWebchatAudioContentBlocksFromReplyPayloads", () => { tmpDir = undefined; }); - it("embeds a local audio file as a base64 gateway chat block", () => { + it("embeds a local audio file as a base64 gateway chat block when it is under localRoots", async () => { tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), "openclaw-webchat-audio-")); const audioPath = path.join(tmpDir, "clip.mp3"); fs.writeFileSync(audioPath, Buffer.from([0xff, 0xfb, 0x90, 0x00])); - const blocks = buildWebchatAudioContentBlocksFromReplyPayloads([{ mediaUrl: audioPath }]); + const blocks = await buildWebchatAudioContentBlocksFromReplyPayloads( + [{ mediaUrl: audioPath }], + { localRoots: [tmpDir] }, + ); expect(blocks).toHaveLength(1); const block = blocks[0] as { @@ -36,48 +40,90 @@ describe("buildWebchatAudioContentBlocksFromReplyPayloads", () => { ); }); - it("skips remote URLs", () => { - const blocks = buildWebchatAudioContentBlocksFromReplyPayloads([ + it("skips remote URLs", async () => { + const blocks = await buildWebchatAudioContentBlocksFromReplyPayloads([ { mediaUrl: "https://example.com/a.mp3" }, ]); expect(blocks).toHaveLength(0); }); - it("skips non-audio local files", () => { + it("skips non-audio local files", async () => { tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), "openclaw-webchat-audio-")); const imagePath = path.join(tmpDir, "clip.png"); fs.writeFileSync(imagePath, Buffer.from([0x89, 0x50, 0x4e, 0x47])); - const blocks = buildWebchatAudioContentBlocksFromReplyPayloads([{ mediaUrl: imagePath }]); + const blocks = await buildWebchatAudioContentBlocksFromReplyPayloads( + [{ mediaUrl: imagePath }], + { localRoots: [tmpDir] }, + ); expect(blocks).toHaveLength(0); }); - it("dedupes repeated paths", () => { + it("dedupes repeated paths", async () => { tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), "openclaw-webchat-audio-")); const audioPath = path.join(tmpDir, "clip.mp3"); fs.writeFileSync(audioPath, Buffer.from([0x00])); - const blocks = buildWebchatAudioContentBlocksFromReplyPayloads([ - { mediaUrl: audioPath }, - { mediaUrl: audioPath }, - ]); + const blocks = await buildWebchatAudioContentBlocksFromReplyPayloads( + [{ mediaUrl: audioPath }, { mediaUrl: audioPath }], + { localRoots: [tmpDir] }, + ); expect(blocks).toHaveLength(1); }); - it("embeds file:// URLs pointing at a local file", () => { + it("embeds file:// URLs pointing at a local file within localRoots", async () => { tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), "openclaw-webchat-audio-")); const audioPath = path.join(tmpDir, "clip.mp3"); fs.writeFileSync(audioPath, Buffer.from([0x01])); const fileUrl = pathToFileURL(audioPath).href; - const blocks = buildWebchatAudioContentBlocksFromReplyPayloads([{ mediaUrl: fileUrl }]); + const blocks = await buildWebchatAudioContentBlocksFromReplyPayloads([{ mediaUrl: fileUrl }], { + localRoots: [tmpDir], + }); expect(blocks).toHaveLength(1); expect((blocks[0] as { type?: string }).type).toBe("audio"); }); - it("does not read file contents when stat reports size over the cap", () => { + it("rejects a local audio file outside configured localRoots", async () => { + tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), "openclaw-webchat-audio-")); + const allowedRoot = path.join(tmpDir, "allowed"); + const outsideRoot = path.join(tmpDir, "outside"); + fs.mkdirSync(allowedRoot, { recursive: true }); + fs.mkdirSync(outsideRoot, { recursive: true }); + const audioPath = path.join(outsideRoot, "clip.mp3"); + fs.writeFileSync(audioPath, Buffer.from([0x03])); + + const onLocalAudioAccessDenied = vi.fn(); + const blocks = await buildWebchatAudioContentBlocksFromReplyPayloads( + [{ mediaUrl: audioPath }], + { + localRoots: [allowedRoot], + onLocalAudioAccessDenied, + }, + ); + + expect(blocks).toHaveLength(0); + expect(onLocalAudioAccessDenied).toHaveBeenCalledOnce(); + }); + + it("falls back to default localRoots when explicit roots are omitted", async () => { + const [defaultRoot] = getDefaultLocalRoots(); + expect(defaultRoot).toBeTruthy(); + + fs.mkdirSync(defaultRoot, { recursive: true }); + tmpDir = fs.mkdtempSync(path.join(defaultRoot, "openclaw-webchat-audio-default-")); + const audioPath = path.join(tmpDir, "clip.mp3"); + fs.writeFileSync(audioPath, Buffer.from([0x04])); + + const blocks = await buildWebchatAudioContentBlocksFromReplyPayloads([{ mediaUrl: audioPath }]); + + expect(blocks).toHaveLength(1); + expect((blocks[0] as { type?: string }).type).toBe("audio"); + }); + + it("does not read file contents when stat reports size over the cap", async () => { tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), "openclaw-webchat-audio-")); const audioPath = path.join(tmpDir, "huge.mp3"); fs.writeFileSync(audioPath, Buffer.from([0x02])); @@ -91,7 +137,10 @@ describe("buildWebchatAudioContentBlocksFromReplyPayloads", () => { }); const readSpy = vi.spyOn(fs, "readFileSync"); - const blocks = buildWebchatAudioContentBlocksFromReplyPayloads([{ mediaUrl: audioPath }]); + const blocks = await buildWebchatAudioContentBlocksFromReplyPayloads( + [{ mediaUrl: audioPath }], + { localRoots: [tmpDir] }, + ); expect(blocks).toHaveLength(0); expect(readSpy).not.toHaveBeenCalled(); diff --git a/src/gateway/server-methods/chat-webchat-media.ts b/src/gateway/server-methods/chat-webchat-media.ts index c6ee258f4ea..ee52c8c4b80 100644 --- a/src/gateway/server-methods/chat-webchat-media.ts +++ b/src/gateway/server-methods/chat-webchat-media.ts @@ -2,6 +2,7 @@ import fs from "node:fs"; import path from "node:path"; import { fileURLToPath } from "node:url"; import type { ReplyPayload } from "../../auto-reply/reply-payload.js"; +import { assertLocalMediaAllowed, LocalMediaAccessError } from "../../media/local-media-access.js"; import { isAudioFileName } from "../../media/mime.js"; import { resolveSendableOutboundReplyParts } from "../../plugin-sdk/reply-payload.js"; import { normalizeLowercaseStringOrEmpty } from "../../shared/string-coerce.js"; @@ -20,6 +21,11 @@ const MIME_BY_EXT: Record = { ".webm": "audio/webm", }; +type WebchatAudioEmbeddingOptions = { + localRoots?: readonly string[]; + onLocalAudioAccessDenied?: (err: LocalMediaAccessError) => void; +}; + /** Map `mediaUrl` strings to an absolute filesystem path for local embedding (plain paths or `file:` URLs). */ function resolveLocalMediaPathForEmbedding(raw: string): string | null { const trimmed = raw.trim(); @@ -50,7 +56,10 @@ function resolveLocalMediaPathForEmbedding(raw: string): string | null { } /** Returns a readable local file path when it is a regular file and within the size cap (single stat before read). */ -function resolveLocalAudioFileForEmbedding(raw: string): string | null { +async function resolveLocalAudioFileForEmbedding( + raw: string, + options: WebchatAudioEmbeddingOptions | undefined, +): Promise { const resolved = resolveLocalMediaPathForEmbedding(raw); if (!resolved) { return null; @@ -59,12 +68,16 @@ function resolveLocalAudioFileForEmbedding(raw: string): string | null { return null; } try { + await assertLocalMediaAllowed(resolved, options?.localRoots); const st = fs.statSync(resolved); if (!st.isFile() || st.size > MAX_WEBCHAT_AUDIO_BYTES) { return null; } return resolved; - } catch { + } catch (err) { + if (err instanceof LocalMediaAccessError) { + options?.onLocalAudioAccessDenied?.(err); + } return null; } } @@ -78,9 +91,10 @@ function mimeTypeForPath(filePath: string): string { * Build Control UI / transcript `content` blocks for local TTS (or other) audio files * referenced by slash-command / agent replies when the webchat path only had text aggregation. */ -export function buildWebchatAudioContentBlocksFromReplyPayloads( +export async function buildWebchatAudioContentBlocksFromReplyPayloads( payloads: ReplyPayload[], -): Array> { + options?: WebchatAudioEmbeddingOptions, +): Promise>> { const seen = new Set(); const blocks: Array> = []; for (const payload of payloads) { @@ -90,7 +104,7 @@ export function buildWebchatAudioContentBlocksFromReplyPayloads( if (!url) { continue; } - const resolved = resolveLocalAudioFileForEmbedding(url); + const resolved = await resolveLocalAudioFileForEmbedding(url, options); if (!resolved || seen.has(resolved)) { continue; } diff --git a/src/gateway/server-methods/chat.ts b/src/gateway/server-methods/chat.ts index 5f7bd493f64..ce7b63cbfde 100644 --- a/src/gateway/server-methods/chat.ts +++ b/src/gateway/server-methods/chat.ts @@ -13,6 +13,7 @@ import type { MsgContext } from "../../auto-reply/templating.js"; import { extractCanvasFromText } from "../../chat/canvas-render.js"; import { resolveSessionFilePath } from "../../config/sessions.js"; import { jsonUtf8Bytes } from "../../infra/json-utf8-bytes.js"; +import { getAgentScopedMediaLocalRoots } from "../../media/local-roots.js"; import { isAudioFileName } from "../../media/mime.js"; import type { PromptImageOrderEntry } from "../../media/prompt-image-order.js"; import { type SavedMedia, saveMediaBuffer } from "../../media/store.js"; @@ -121,10 +122,19 @@ function isMediaBearingPayload(payload: ReplyPayload): boolean { return false; } -function buildWebchatAudioOnlyAssistantMessage( +async function buildWebchatAudioOnlyAssistantMessage( payloads: ReplyPayload[], -): { content: Array>; transcriptText: string } | null { - const audioBlocks = buildWebchatAudioContentBlocksFromReplyPayloads(payloads); + options?: { + localRoots?: readonly string[]; + onLocalAudioAccessDenied?: (message: string) => void; + }, +): Promise<{ content: Array>; transcriptText: string } | null> { + const audioBlocks = await buildWebchatAudioContentBlocksFromReplyPayloads(payloads, { + localRoots: options?.localRoots, + onLocalAudioAccessDenied: (err) => { + options?.onLocalAudioAccessDenied?.(formatForLog(err)); + }, + }); if (audioBlocks.length === 0) { return null; } @@ -2075,11 +2085,16 @@ export const chatHandlers: GatewayRequestHandlers = { savedImages: await persistedImagesPromise, }); }; - const appendWebchatAgentAudioTranscriptIfNeeded = (payload: ReplyPayload) => { + const appendWebchatAgentAudioTranscriptIfNeeded = async (payload: ReplyPayload) => { if (!agentRunStarted || appendedWebchatAgentAudio || !isMediaBearingPayload(payload)) { return; } - const audioMessage = buildWebchatAudioOnlyAssistantMessage([payload]); + const audioMessage = await buildWebchatAudioOnlyAssistantMessage([payload], { + localRoots: getAgentScopedMediaLocalRoots(cfg, agentId), + onLocalAudioAccessDenied: (message) => { + context.logGateway.warn(`webchat audio embedding denied local path: ${message}`); + }, + }); if (!audioMessage) { return; } @@ -2113,7 +2128,7 @@ export const chatHandlers: GatewayRequestHandlers = { case "block": case "final": deliveredReplies.push({ payload, kind: info.kind }); - appendWebchatAgentAudioTranscriptIfNeeded(payload); + await appendWebchatAgentAudioTranscriptIfNeeded(payload); break; case "tool": // Tool results that carry audio (e.g. the TTS tool) must be promoted