mirror of
https://fastgit.cc/github.com/openclaw/openclaw
synced 2026-04-30 14:02:56 +08:00
fix(media): allow host-local CSV and Markdown uploads via Slack (#67047)
Merged via squash.
Prepared head SHA: 5ce11d0bac
Co-authored-by: Unayung <1853105+Unayung@users.noreply.github.com>
Co-authored-by: frankekn <712880+frankekn@users.noreply.github.com>
Reviewed-by: @frankekn
This commit is contained in:
@@ -51,6 +51,7 @@ Docs: https://docs.openclaw.ai
|
||||
- Claude CLI/sessions: classify `No conversation found with session ID` as `session_expired` so expired CLI-backed conversations clear the stale binding and recover on the next turn. (#65028) thanks @Ivan-Fn.
|
||||
- Context Engine: gracefully fall back to the legacy engine when a third-party context engine plugin fails at resolution time (unregistered id, factory throw, or contract violation), preventing a full gateway outage on every channel. (#66930) Thanks @openperf.
|
||||
- Control UI/chat: keep optimistic user message cards visible during active sends by deferring same-session history reloads until the active run ends, including aborted and errored runs. (#66997) Thanks @scotthuang and @vincentkoc.
|
||||
- Media/Slack: allow host-local CSV and Markdown uploads only when the fallback buffer actually decodes as text, so real plain-text files work without letting opaque non-text blobs renamed to `.csv` or `.md` slip past the host-read guard. (#67047) Thanks @Unayung.
|
||||
|
||||
## 2026.4.14
|
||||
|
||||
|
||||
@@ -185,6 +185,248 @@ describe("loadWebMedia", () => {
|
||||
});
|
||||
});
|
||||
|
||||
it("allows host-read CSV files", async () => {
|
||||
const csvFile = path.join(fixtureRoot, "data.csv");
|
||||
await fs.writeFile(csvFile, "name,value\nfoo,1\nbar,2\n", "utf8");
|
||||
const result = await loadWebMedia(csvFile, {
|
||||
maxBytes: 1024 * 1024,
|
||||
localRoots: "any",
|
||||
readFile: async (filePath) => await fs.readFile(filePath),
|
||||
hostReadCapability: true,
|
||||
});
|
||||
expect(result.kind).toBe("document");
|
||||
expect(result.contentType).toBe("text/csv");
|
||||
});
|
||||
|
||||
it("allows host-read Markdown files", async () => {
|
||||
const mdFile = path.join(fixtureRoot, "notes.md");
|
||||
await fs.writeFile(mdFile, "# Title\n\nSome **bold** text.\n", "utf8");
|
||||
const result = await loadWebMedia(mdFile, {
|
||||
maxBytes: 1024 * 1024,
|
||||
localRoots: "any",
|
||||
readFile: async (filePath) => await fs.readFile(filePath),
|
||||
hostReadCapability: true,
|
||||
});
|
||||
expect(result.kind).toBe("document");
|
||||
expect(result.contentType).toBe("text/markdown");
|
||||
});
|
||||
|
||||
it("rejects binary data disguised as a CSV file", async () => {
|
||||
const fakeCsv = path.join(fixtureRoot, "evil.csv");
|
||||
// Write ZIP magic bytes — file-type detects application/zip (not image, not CSV),
|
||||
// so it is rejected by the host-read policy rather than allowed as an image.
|
||||
await fs.writeFile(fakeCsv, Buffer.from([0x50, 0x4b, 0x03, 0x04]));
|
||||
await expect(
|
||||
loadWebMedia(fakeCsv, {
|
||||
maxBytes: 1024 * 1024,
|
||||
localRoots: "any",
|
||||
readFile: async (filePath) => await fs.readFile(filePath),
|
||||
hostReadCapability: true,
|
||||
}),
|
||||
).rejects.toMatchObject({
|
||||
code: "path-not-allowed",
|
||||
});
|
||||
});
|
||||
|
||||
it.each([
|
||||
{ label: "CSV", fileName: "opaque.csv" },
|
||||
{ label: "Markdown", fileName: "opaque.md" },
|
||||
])("rejects opaque non-NUL binary data disguised as %s", async ({ fileName }) => {
|
||||
const fakeTextFile = path.join(fixtureRoot, fileName);
|
||||
const opaqueBinary = Buffer.alloc(9000);
|
||||
for (let i = 0; i < opaqueBinary.length; i += 1) {
|
||||
opaqueBinary[i] = (i % 255) + 1;
|
||||
}
|
||||
await fs.writeFile(fakeTextFile, opaqueBinary);
|
||||
await expect(
|
||||
loadWebMedia(fakeTextFile, {
|
||||
maxBytes: 1024 * 1024,
|
||||
localRoots: "any",
|
||||
readFile: async (filePath) => await fs.readFile(filePath),
|
||||
hostReadCapability: true,
|
||||
}),
|
||||
).rejects.toMatchObject({
|
||||
code: "path-not-allowed",
|
||||
});
|
||||
});
|
||||
|
||||
it.each([
|
||||
{ label: "CSV", fileName: "prefix-tail.csv" },
|
||||
{ label: "Markdown", fileName: "prefix-tail.md" },
|
||||
])(
|
||||
"rejects %s files with a text prefix and binary tail after the old sample window",
|
||||
async ({ fileName }) => {
|
||||
const fakeTextFile = path.join(fixtureRoot, fileName);
|
||||
const textPrefix = Buffer.from(`name,value\n${"row,1\n".repeat(1400)}`, "utf8");
|
||||
expect(textPrefix.length).toBeGreaterThan(8192);
|
||||
const binaryTail = Buffer.from([0x00, 0xff, 0x10, 0x80]);
|
||||
await fs.writeFile(fakeTextFile, Buffer.concat([textPrefix, binaryTail]));
|
||||
await expect(
|
||||
loadWebMedia(fakeTextFile, {
|
||||
maxBytes: 1024 * 1024,
|
||||
localRoots: "any",
|
||||
readFile: async (filePath) => await fs.readFile(filePath),
|
||||
hostReadCapability: true,
|
||||
}),
|
||||
).rejects.toMatchObject({
|
||||
code: "path-not-allowed",
|
||||
});
|
||||
},
|
||||
);
|
||||
|
||||
it.each([
|
||||
{
|
||||
label: "CSV",
|
||||
fileName: "punctuation.csv",
|
||||
contentType: "text/csv",
|
||||
body: ",,,,,,,,,,\n",
|
||||
},
|
||||
{
|
||||
label: "Markdown",
|
||||
fileName: "punctuation.md",
|
||||
contentType: "text/markdown",
|
||||
body: "---\n***\n> > >\n",
|
||||
},
|
||||
])(
|
||||
"loads valid punctuation-heavy %s files when host-read capability is enabled",
|
||||
async ({ fileName, contentType, body }) => {
|
||||
const textFile = path.join(fixtureRoot, fileName);
|
||||
await fs.writeFile(textFile, Buffer.from(body, "utf8"));
|
||||
const result = await loadWebMedia(textFile, {
|
||||
maxBytes: 1024 * 1024,
|
||||
localRoots: "any",
|
||||
readFile: async (filePath) => await fs.readFile(filePath),
|
||||
hostReadCapability: true,
|
||||
});
|
||||
expect(result.kind).toBe("document");
|
||||
expect(result.contentType).toBe(contentType);
|
||||
},
|
||||
);
|
||||
|
||||
it.each([
|
||||
{
|
||||
label: "CSV",
|
||||
fileName: "legacy.csv",
|
||||
contentType: "text/csv",
|
||||
body: Buffer.from("caf\xe9,ni\xf1o\n", "latin1"),
|
||||
},
|
||||
{
|
||||
label: "Markdown",
|
||||
fileName: "legacy.md",
|
||||
contentType: "text/markdown",
|
||||
body: Buffer.from("R\xe9sum\xe9\nni\xf1o\n", "latin1"),
|
||||
},
|
||||
])(
|
||||
"loads valid single-byte encoded %s files when host-read capability is enabled",
|
||||
async ({ fileName, contentType, body }) => {
|
||||
const textFile = path.join(fixtureRoot, fileName);
|
||||
await fs.writeFile(textFile, body);
|
||||
const result = await loadWebMedia(textFile, {
|
||||
maxBytes: 1024 * 1024,
|
||||
localRoots: "any",
|
||||
readFile: async (filePath) => await fs.readFile(filePath),
|
||||
hostReadCapability: true,
|
||||
});
|
||||
expect(result.kind).toBe("document");
|
||||
expect(result.contentType).toBe(contentType);
|
||||
},
|
||||
);
|
||||
|
||||
it.each([
|
||||
{ label: "CSV", fileName: "nul-padded.csv" },
|
||||
{ label: "Markdown", fileName: "nul-padded.md" },
|
||||
])("rejects NUL-padded binary data disguised as %s", async ({ fileName }) => {
|
||||
const fakeTextFile = path.join(fixtureRoot, fileName);
|
||||
// Alternating 0x00/0xFF — UTF-8 decode fails (0xFF is invalid UTF-8), then
|
||||
// hasSingleByteTextShape rejects because 0x00 bytes are control chars (< 0x20).
|
||||
const nulPadded = Buffer.alloc(9000);
|
||||
for (let i = 0; i < nulPadded.length; i += 1) {
|
||||
nulPadded[i] = i % 2 === 0 ? 0x00 : 0xff;
|
||||
}
|
||||
await fs.writeFile(fakeTextFile, nulPadded);
|
||||
await expect(
|
||||
loadWebMedia(fakeTextFile, {
|
||||
maxBytes: 1024 * 1024,
|
||||
localRoots: "any",
|
||||
readFile: async (filePath) => await fs.readFile(filePath),
|
||||
hostReadCapability: true,
|
||||
}),
|
||||
).rejects.toMatchObject({
|
||||
code: "path-not-allowed",
|
||||
});
|
||||
});
|
||||
|
||||
it.each([
|
||||
{ label: "CSV", fileName: "bom-binary.csv" },
|
||||
{ label: "Markdown", fileName: "bom-binary.md" },
|
||||
])("rejects UTF-16 BOM-prefixed binary data disguised as %s", async ({ fileName }) => {
|
||||
const fakeTextFile = path.join(fixtureRoot, fileName);
|
||||
// UTF-16LE BOM + repeating 0xFF bytes: if UTF-16 decoding were attempted,
|
||||
// every byte pair would produce a printable code point and pass getTextStats.
|
||||
// With UTF-16 decoding removed, falls through to UTF-8 strict decode (throws
|
||||
// on 0xFF), then hasSingleByteTextShape rejects due to high-byte ratio > 30%.
|
||||
const bom = Buffer.from([0xff, 0xfe]);
|
||||
const garbage = Buffer.alloc(9000, 0xff);
|
||||
await fs.writeFile(fakeTextFile, Buffer.concat([bom, garbage]));
|
||||
await expect(
|
||||
loadWebMedia(fakeTextFile, {
|
||||
maxBytes: 1024 * 1024,
|
||||
localRoots: "any",
|
||||
readFile: async (filePath) => await fs.readFile(filePath),
|
||||
hostReadCapability: true,
|
||||
}),
|
||||
).rejects.toMatchObject({
|
||||
code: "path-not-allowed",
|
||||
});
|
||||
});
|
||||
|
||||
it.each([
|
||||
{ label: "CSV", fileName: "alternating-high.csv" },
|
||||
{ label: "Markdown", fileName: "alternating-high.md" },
|
||||
])("rejects alternating ASCII/high-byte data disguised as %s", async ({ fileName }) => {
|
||||
const fakeTextFile = path.join(fixtureRoot, fileName);
|
||||
// Alternating 0x41 ('A') and 0xFF — exactly 50% ASCII, 50% high bytes.
|
||||
// With the old 50% threshold hasSingleByteTextShape would accept this;
|
||||
// the tightened 70%/30% thresholds must reject it.
|
||||
const mixed = Buffer.alloc(9000);
|
||||
for (let i = 0; i < mixed.length; i += 1) {
|
||||
mixed[i] = i % 2 === 0 ? 0x41 : 0xff;
|
||||
}
|
||||
await fs.writeFile(fakeTextFile, mixed);
|
||||
await expect(
|
||||
loadWebMedia(fakeTextFile, {
|
||||
maxBytes: 1024 * 1024,
|
||||
localRoots: "any",
|
||||
readFile: async (filePath) => await fs.readFile(filePath),
|
||||
hostReadCapability: true,
|
||||
}),
|
||||
).rejects.toMatchObject({
|
||||
code: "path-not-allowed",
|
||||
});
|
||||
});
|
||||
|
||||
it.each([
|
||||
{ label: "CSV", fileName: "high-bytes.csv" },
|
||||
{ label: "Markdown", fileName: "high-bytes.md" },
|
||||
])("rejects high-byte opaque data disguised as %s", async ({ fileName }) => {
|
||||
const fakeTextFile = path.join(fixtureRoot, fileName);
|
||||
const opaqueBinary = Buffer.alloc(9000);
|
||||
for (let i = 0; i < opaqueBinary.length; i += 1) {
|
||||
opaqueBinary[i] = 0xa0 + (i % 96);
|
||||
}
|
||||
await fs.writeFile(fakeTextFile, opaqueBinary);
|
||||
await expect(
|
||||
loadWebMedia(fakeTextFile, {
|
||||
maxBytes: 1024 * 1024,
|
||||
localRoots: "any",
|
||||
readFile: async (filePath) => await fs.readFile(filePath),
|
||||
hostReadCapability: true,
|
||||
}),
|
||||
).rejects.toMatchObject({
|
||||
code: "path-not-allowed",
|
||||
});
|
||||
});
|
||||
|
||||
it("rejects traversal-style canvas media paths before filesystem access", async () => {
|
||||
await expect(
|
||||
loadWebMedia(`${CANVAS_HOST_PATH}/documents/../collection.media/tiny.png`),
|
||||
|
||||
@@ -24,6 +24,7 @@ import {
|
||||
extensionForMime,
|
||||
getFileExtension,
|
||||
kindFromMime,
|
||||
mimeTypeFromFilePath,
|
||||
normalizeMimeType,
|
||||
} from "./mime.js";
|
||||
|
||||
@@ -83,9 +84,95 @@ const HOST_READ_ALLOWED_DOCUMENT_MIMES = new Set([
|
||||
"application/vnd.openxmlformats-officedocument.presentationml.presentation",
|
||||
"application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
|
||||
"application/vnd.openxmlformats-officedocument.wordprocessingml.document",
|
||||
"text/csv",
|
||||
"text/markdown",
|
||||
]);
|
||||
// file-type returns undefined (no magic bytes) for plain-text formats like CSV and
|
||||
// Markdown, so host-read needs an explicit "this really decodes as text" fallback.
|
||||
const HOST_READ_TEXT_PLAIN_ALIASES = new Set(["text/csv", "text/markdown"]);
|
||||
const MB = 1024 * 1024;
|
||||
|
||||
function getTextStats(text: string): { printableRatio: number } {
|
||||
if (!text) {
|
||||
return { printableRatio: 0 };
|
||||
}
|
||||
let printable = 0;
|
||||
let control = 0;
|
||||
for (const char of text) {
|
||||
const code = char.codePointAt(0) ?? 0;
|
||||
if (code === 9 || code === 10 || code === 13 || code === 32) {
|
||||
printable += 1;
|
||||
continue;
|
||||
}
|
||||
if (code < 32 || (code >= 0x7f && code <= 0x9f)) {
|
||||
control += 1;
|
||||
continue;
|
||||
}
|
||||
printable += 1;
|
||||
}
|
||||
const total = printable + control;
|
||||
if (total === 0) {
|
||||
return { printableRatio: 0 };
|
||||
}
|
||||
return { printableRatio: printable / total };
|
||||
}
|
||||
|
||||
function hasSingleByteTextShape(buffer: Buffer): boolean {
|
||||
if (buffer.length === 0) {
|
||||
return true;
|
||||
}
|
||||
let asciiText = 0;
|
||||
let control = 0;
|
||||
for (const byte of buffer) {
|
||||
if (byte === 9 || byte === 10 || byte === 13 || (byte >= 0x20 && byte <= 0x7e)) {
|
||||
asciiText += 1;
|
||||
continue;
|
||||
}
|
||||
if (byte < 0x20 || byte === 0x7f) {
|
||||
control += 1;
|
||||
}
|
||||
}
|
||||
const total = buffer.length;
|
||||
const highBytes = total - asciiText - control;
|
||||
return control === 0 && asciiText / total >= 0.7 && highBytes / total <= 0.3;
|
||||
}
|
||||
|
||||
function decodeHostReadText(buffer: Buffer): string | undefined {
|
||||
if (buffer.length === 0) {
|
||||
return "";
|
||||
}
|
||||
// UTF-16 decoding is intentionally omitted: TextDecoder("utf-16le/be") never throws on
|
||||
// arbitrary byte pairs, so every byte pair is a valid (if meaningless) Unicode scalar —
|
||||
// an attacker can prepend a BOM and pass getTextStats with printableRatio≈1.0 on pure
|
||||
// binary garbage. The Latin-1 path below already covers the most common non-UTF-8
|
||||
// real-world case (Excel CSV exports with accented chars like é, ñ) while remaining
|
||||
// safe because hasSingleByteTextShape gates on byte shape *before* any decode.
|
||||
try {
|
||||
return new TextDecoder("utf-8", { fatal: true }).decode(buffer);
|
||||
} catch {
|
||||
if (!hasSingleByteTextShape(buffer)) {
|
||||
return undefined;
|
||||
}
|
||||
// WHATWG latin1 decodes common Excel-style single-byte exports via Windows-1252 mapping.
|
||||
return new TextDecoder("latin1").decode(buffer);
|
||||
}
|
||||
}
|
||||
|
||||
function isValidatedHostReadText(buffer?: Buffer): boolean {
|
||||
if (!buffer) {
|
||||
return false;
|
||||
}
|
||||
if (buffer.length === 0) {
|
||||
return true;
|
||||
}
|
||||
const text = decodeHostReadText(buffer);
|
||||
if (text === undefined) {
|
||||
return false;
|
||||
}
|
||||
const { printableRatio } = getTextStats(text);
|
||||
return printableRatio > 0.95;
|
||||
}
|
||||
|
||||
function formatMb(bytes: number, digits = 2): string {
|
||||
return (bytes / MB).toFixed(digits);
|
||||
}
|
||||
@@ -113,7 +200,23 @@ function assertHostReadMediaAllowed(params: {
|
||||
contentType?: string;
|
||||
filePath?: string;
|
||||
kind: MediaKind | undefined;
|
||||
buffer?: Buffer;
|
||||
}): void {
|
||||
const declaredMime = normalizeMimeType(mimeTypeFromFilePath(params.filePath));
|
||||
const normalizedMime = normalizeMimeType(params.contentType);
|
||||
// For extension-declared plain-text aliases such as .csv/.md, trust only the
|
||||
// text validator path. Some opaque blobs can still produce bogus binary MIME
|
||||
// hits (for example BOM-prefixed 0xFF data sniffing as audio/mpeg), and
|
||||
// host-read should reject those instead of returning early on the sniff.
|
||||
if (declaredMime && HOST_READ_TEXT_PLAIN_ALIASES.has(declaredMime)) {
|
||||
if (!params.sniffedContentType && params.buffer && isValidatedHostReadText(params.buffer)) {
|
||||
return;
|
||||
}
|
||||
throw new LocalMediaAccessError(
|
||||
"path-not-allowed",
|
||||
"hostReadCapability permits only validated plain-text CSV/Markdown documents for local reads",
|
||||
);
|
||||
}
|
||||
const sniffedKind = kindFromMime(params.sniffedContentType);
|
||||
if (sniffedKind === "image" || sniffedKind === "audio" || sniffedKind === "video") {
|
||||
return;
|
||||
@@ -132,7 +235,20 @@ function assertHostReadMediaAllowed(params: {
|
||||
) {
|
||||
return;
|
||||
}
|
||||
const normalizedMime = normalizeMimeType(params.contentType);
|
||||
// CSV / Markdown exception: file-type v22 returns undefined (not "text/plain") for
|
||||
// plain-text buffers that have no binary magic bytes. Allow these formats when:
|
||||
// - sniffedMime is undefined (no binary signature detected by file-type)
|
||||
// - The extension-derived MIME is text/csv or text/markdown (operator intent)
|
||||
// - The buffer decodes as actual text instead of opaque binary bytes
|
||||
if (
|
||||
!sniffedMime &&
|
||||
normalizedMime &&
|
||||
HOST_READ_TEXT_PLAIN_ALIASES.has(normalizedMime) &&
|
||||
params.buffer &&
|
||||
isValidatedHostReadText(params.buffer)
|
||||
) {
|
||||
return;
|
||||
}
|
||||
if (
|
||||
params.kind === "document" &&
|
||||
normalizedMime &&
|
||||
@@ -392,6 +508,7 @@ async function loadWebMediaInternal(
|
||||
contentType: mime,
|
||||
filePath: mediaUrl,
|
||||
kind,
|
||||
buffer: data,
|
||||
});
|
||||
}
|
||||
let fileName = path.basename(mediaUrl) || undefined;
|
||||
|
||||
Reference in New Issue
Block a user