From e71d7d48fb2805fadc643bd154a6005b0cd9ad75 Mon Sep 17 00:00:00 2001 From: peter Date: Wed, 29 Apr 2026 02:58:25 -0400 Subject: [PATCH] fix(telegram): probe video dimensions through sdk Fix Telegram portrait video distortion by probing video dimensions through the shared media helper and passing width/height to sendVideo. Validation: - Targeted Telegram/media tests passed locally. - Plugin SDK API baseline check passed locally. - Formatter and git diff whitespace checks passed locally. CI note: current boundary drift observed on prior run came from existing src/plugin-sdk/discord.ts and src/plugin-sdk/telegram-account.ts, not this PR diff. --- CHANGELOG.md | 1 + .../.generated/plugin-sdk-api-baseline.sha256 | 4 +- docs/plugins/sdk-migration.md | 2 +- docs/plugins/sdk-subpaths.md | 2 +- .../telegram/src/bot/delivery.replies.ts | 10 ++- extensions/telegram/src/bot/delivery.test.ts | 70 +++++++++++++++++++ extensions/telegram/src/send.runtime.ts | 1 + extensions/telegram/src/send.test-harness.ts | 9 +++ extensions/telegram/src/send.test.ts | 69 ++++++++++++++++++ extensions/telegram/src/send.ts | 4 ++ src/media/ffmpeg-exec.ts | 23 ++++-- src/media/video-dimensions.test.ts | 61 ++++++++++++++++ src/media/video-dimensions.ts | 43 ++++++++++++ src/plugin-sdk/media-runtime.ts | 1 + 14 files changed, 288 insertions(+), 12 deletions(-) create mode 100644 src/media/video-dimensions.test.ts create mode 100644 src/media/video-dimensions.ts diff --git a/CHANGELOG.md b/CHANGELOG.md index b44712e3620..797944c37f0 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -18,6 +18,7 @@ Docs: https://docs.openclaw.ai - Agents/tools: clamp `process.poll` waits to 30 seconds, advertise that cap in the tool schema, and honor abort signals while waiting, so long command polls cannot pin agent responsiveness after cancellation. Thanks @vincentkoc. - Plugin SDK: add tracked Discord component-message helpers and a Telegram account-resolution compatibility facade, so existing plugins using those subpaths resolve while new plugins stay on generic channel SDK contracts. Thanks @vincentkoc. - Shared labels: preserve Unicode combining marks and NFC-equivalent accented text in group/channel slug normalization so non-Latin labels no longer lose meaningful characters. Fixes #58932; carries forward #58942 and #58995. Thanks @fengqing-git, @Starhappysh, and @koen666. +- Channels/Telegram: include probed video width and height when sending regular Telegram videos, so portrait clips render with the correct orientation instead of being stretched by clients. (#18915) Thanks @storyarcade. - Docs/Hetzner: clarify that SSH tunnel access requires `AllowTcpForwarding local` before running `ssh -L`, so hardened VPS sshd configs do not block loopback Gateway access. Fixes #54557; carries forward #54564; refs #54954. Thanks @satishkc7, @blackstrype, and @Aftabbs. - Gateway/shutdown: report structured shutdown warnings and HTTP close timeout warnings through `ShutdownResult` while preserving lifecycle hook hardening. Carries forward #41296. Thanks @edenfunf. - Plugins/QA: prebuild the private QA channel runtime before plugin gauntlet source runs so wrapper CPU/RSS measurements are not polluted by private QA dist rebuild work. Thanks @vincentkoc. diff --git a/docs/.generated/plugin-sdk-api-baseline.sha256 b/docs/.generated/plugin-sdk-api-baseline.sha256 index 03be48b490b..903fe6c3357 100644 --- a/docs/.generated/plugin-sdk-api-baseline.sha256 +++ b/docs/.generated/plugin-sdk-api-baseline.sha256 @@ -1,2 +1,2 @@ -21c1ddb7b6ab3da24d51971aca47b76044acf62229351dafc10ec1c0fc9ae1ff plugin-sdk-api-baseline.json -b4e011edd075864353ad238b8eeef0f6837a65f1500f21836aad7547c0c4507c plugin-sdk-api-baseline.jsonl +244286f93cd42484f81061b672437d7a769a61864c72eb3795f9e7abc739f60b plugin-sdk-api-baseline.json +5b7e45d83a0a7862f26f59a32647cdb04289609419e61473284568dd3adf9736 plugin-sdk-api-baseline.jsonl diff --git a/docs/plugins/sdk-migration.md b/docs/plugins/sdk-migration.md index e2f13077054..1033cdb7fb3 100644 --- a/docs/plugins/sdk-migration.md +++ b/docs/plugins/sdk-migration.md @@ -489,7 +489,7 @@ releases. | `plugin-sdk/provider-stream` | Provider stream wrapper helpers | `ProviderStreamFamily`, `buildProviderStreamFamilyHooks`, `composeProviderStreamWrappers`, stream wrapper types, and shared Anthropic/Bedrock/DeepSeek V4/Google/Kilocode/Moonshot/OpenAI/OpenRouter/Z.A.I/MiniMax/Copilot wrapper helpers | | `plugin-sdk/provider-transport-runtime` | Provider transport helpers | Native provider transport helpers such as guarded fetch, transport message transforms, and writable transport event streams | | `plugin-sdk/keyed-async-queue` | Ordered async queue | `KeyedAsyncQueue` | - | `plugin-sdk/media-runtime` | Shared media helpers | Media fetch/transform/store helpers plus media payload builders | + | `plugin-sdk/media-runtime` | Shared media helpers | Media fetch/transform/store helpers, ffprobe-backed video dimension probing, and media payload builders | | `plugin-sdk/media-generation-runtime` | Shared media-generation helpers | Shared failover helpers, candidate selection, and missing-model messaging for image/video/music generation | | `plugin-sdk/media-understanding` | Media-understanding helpers | Media understanding provider types plus provider-facing image/audio helper exports | | `plugin-sdk/text-runtime` | Shared text helpers | Assistant-visible-text stripping, markdown render/chunking/table helpers, redaction helpers, directive-tag helpers, safe-text utilities, and related text/logging helpers | diff --git a/docs/plugins/sdk-subpaths.md b/docs/plugins/sdk-subpaths.md index 23fad984a6f..be679985d56 100644 --- a/docs/plugins/sdk-subpaths.md +++ b/docs/plugins/sdk-subpaths.md @@ -258,7 +258,7 @@ For the plugin authoring guide, see [Plugin SDK overview](/plugins/sdk-overview) | Subpath | Key exports | | --- | --- | - | `plugin-sdk/media-runtime` | Shared media fetch/transform/store helpers plus media payload builders | + | `plugin-sdk/media-runtime` | Shared media fetch/transform/store helpers, ffprobe-backed video dimension probing, and media payload builders | | `plugin-sdk/media-store` | Narrow media store helpers such as `saveMediaBuffer` | | `plugin-sdk/media-generation-runtime` | Shared media-generation failover helpers, candidate selection, and missing-model messaging | | `plugin-sdk/media-understanding` | Media understanding provider types plus provider-facing image/audio helper exports | diff --git a/extensions/telegram/src/bot/delivery.replies.ts b/extensions/telegram/src/bot/delivery.replies.ts index 41c872e5735..f5f9ca074ea 100644 --- a/extensions/telegram/src/bot/delivery.replies.ts +++ b/extensions/telegram/src/bot/delivery.replies.ts @@ -10,8 +10,12 @@ import { toPluginMessageSentEvent, } from "openclaw/plugin-sdk/hook-runtime"; import type { ReplyPayloadDelivery } from "openclaw/plugin-sdk/interactive-runtime"; -import { buildOutboundMediaLoadOptions } from "openclaw/plugin-sdk/media-runtime"; -import { isGifMedia, kindFromMime } from "openclaw/plugin-sdk/media-runtime"; +import { + buildOutboundMediaLoadOptions, + isGifMedia, + kindFromMime, + probeVideoDimensions, +} from "openclaw/plugin-sdk/media-runtime"; import { createOutboundPayloadPlan, projectOutboundPayloadPlanForDelivery, @@ -361,10 +365,12 @@ async function deliverMediaReply(params: { progress: params.progress, }); const shouldAttachButtonsToMedia = isFirstMedia && params.replyMarkup && !followUpText; + const videoDimensions = kind === "video" ? await probeVideoDimensions(media.buffer) : undefined; const mediaParams: Record = { caption: htmlCaption, ...(htmlCaption ? { parse_mode: "HTML" } : {}), ...(shouldAttachButtonsToMedia ? { reply_markup: params.replyMarkup } : {}), + ...(videoDimensions ? { width: videoDimensions.width, height: videoDimensions.height } : {}), ...buildTelegramSendParams({ replyToMessageId, replyQuoteMessageId: params.replyQuoteMessageId, diff --git a/extensions/telegram/src/bot/delivery.test.ts b/extensions/telegram/src/bot/delivery.test.ts index cc06e72c839..02c25c6cdd1 100644 --- a/extensions/telegram/src/bot/delivery.test.ts +++ b/extensions/telegram/src/bot/delivery.test.ts @@ -4,6 +4,9 @@ import { beforeEach, describe, expect, it, vi } from "vitest"; const { loadWebMedia } = vi.hoisted(() => ({ loadWebMedia: vi.fn(), })); +const { probeVideoDimensions } = vi.hoisted(() => ({ + probeVideoDimensions: vi.fn(), +})); const triggerInternalHook = vi.hoisted(() => vi.fn(async () => {})); const messageHookRunner = vi.hoisted(() => ({ hasHooks: vi.fn<(name: string) => boolean>(() => false), @@ -28,6 +31,14 @@ vi.mock("openclaw/plugin-sdk/web-media", () => ({ loadWebMedia: (...args: unknown[]) => loadWebMedia(...args), })); +vi.mock("openclaw/plugin-sdk/media-runtime", async (importOriginal) => { + const actual = await importOriginal(); + return { + ...actual, + probeVideoDimensions, + }; +}); + vi.mock("openclaw/plugin-sdk/hook-runtime", async (importOriginal) => { const actual = await importOriginal(); return { @@ -135,6 +146,8 @@ function createVoiceFailureHarness(params: { describe("deliverReplies", () => { beforeEach(() => { loadWebMedia.mockClear(); + probeVideoDimensions.mockReset(); + probeVideoDimensions.mockResolvedValue(undefined); triggerInternalHook.mockReset(); messageHookRunner.hasHooks.mockReset(); messageHookRunner.hasHooks.mockReturnValue(false); @@ -489,6 +502,63 @@ describe("deliverReplies", () => { ); }); + it("passes probed dimensions to video reply sends", async () => { + const runtime = createRuntime(); + const sendVideo = vi.fn().mockResolvedValue({ + message_id: 22, + chat: { id: "123" }, + }); + const bot = createBot({ sendVideo }); + probeVideoDimensions.mockResolvedValueOnce({ width: 720, height: 1280 }); + + mockMediaLoad("video.mp4", "video/mp4", "video"); + + await deliverWith({ + replies: [{ mediaUrl: "https://example.com/video.mp4", text: "hi **boss**" }], + runtime, + bot, + }); + + expect(probeVideoDimensions).toHaveBeenCalledWith(Buffer.from("video")); + expect(sendVideo).toHaveBeenCalledWith( + "123", + expect.anything(), + expect.objectContaining({ + caption: "hi boss", + parse_mode: "HTML", + width: 720, + height: 1280, + }), + ); + }); + + it("does not probe GIF reply animations", async () => { + const runtime = createRuntime(); + const sendAnimation = vi.fn().mockResolvedValue({ + message_id: 23, + chat: { id: "123" }, + }); + const bot = createBot({ sendAnimation }); + + mockMediaLoad("fun.gif", "image/gif", "GIF89a"); + + await deliverWith({ + replies: [{ mediaUrl: "https://example.com/fun.gif", text: "gif" }], + runtime, + bot, + }); + + expect(probeVideoDimensions).not.toHaveBeenCalled(); + expect(sendAnimation).toHaveBeenCalledWith( + "123", + expect.anything(), + expect.not.objectContaining({ + width: expect.any(Number), + height: expect.any(Number), + }), + ); + }); + it("passes mediaLocalRoots to media loading", async () => { const runtime = createRuntime(); const sendPhoto = vi.fn().mockResolvedValue({ diff --git a/extensions/telegram/src/send.runtime.ts b/extensions/telegram/src/send.runtime.ts index 760f15e9c2c..b8d7753dfc9 100644 --- a/extensions/telegram/src/send.runtime.ts +++ b/extensions/telegram/src/send.runtime.ts @@ -8,5 +8,6 @@ export { isGifMedia, kindFromMime, normalizePollInput, + probeVideoDimensions, } from "openclaw/plugin-sdk/media-runtime"; export { loadWebMedia } from "openclaw/plugin-sdk/web-media"; diff --git a/extensions/telegram/src/send.test-harness.ts b/extensions/telegram/src/send.test-harness.ts index 1e0a3164309..d1030f05af9 100644 --- a/extensions/telegram/src/send.test-harness.ts +++ b/extensions/telegram/src/send.test-harness.ts @@ -42,6 +42,10 @@ const { imageMetadata } = vi.hoisted(() => ({ }, })); +const { probeVideoDimensions } = vi.hoisted(() => ({ + probeVideoDimensions: vi.fn(), +})); + const { loadConfig, resolveStorePath } = vi.hoisted(() => ({ loadConfig: vi.fn(() => ({})), resolveStorePath: vi.fn( @@ -90,6 +94,7 @@ type TelegramSendTestMocks = { loadWebMedia: MockFn; maybePersistResolvedTelegramTarget: MockFn; imageMetadata: { width: number | undefined; height: number | undefined }; + probeVideoDimensions: MockFn; }; vi.mock("openclaw/plugin-sdk/web-media", () => ({ @@ -153,6 +158,7 @@ vi.mock("./send.runtime.js", () => ({ loadConfig, loadWebMedia, normalizePollInput, + probeVideoDimensions, requireRuntimeConfig: vi.fn((cfg: unknown) => cfg ?? loadConfig()), resolveMarkdownTableMode, resolveStorePath, @@ -171,6 +177,7 @@ export function getTelegramSendTestMocks(): TelegramSendTestMocks { loadWebMedia, maybePersistResolvedTelegramTarget, imageMetadata, + probeVideoDimensions, }; } @@ -179,6 +186,8 @@ export function installTelegramSendTestHooks() { loadConfig.mockReturnValue({}); resolveStorePath.mockReturnValue("/tmp/openclaw-telegram-send-tests.json"); loadWebMedia.mockReset(); + probeVideoDimensions.mockReset(); + probeVideoDimensions.mockResolvedValue(undefined); imageMetadata.width = 1200; imageMetadata.height = 800; maybePersistResolvedTelegramTarget.mockReset(); diff --git a/extensions/telegram/src/send.test.ts b/extensions/telegram/src/send.test.ts index 0de7ac36547..df717339567 100644 --- a/extensions/telegram/src/send.test.ts +++ b/extensions/telegram/src/send.test.ts @@ -23,6 +23,7 @@ const { loadConfig, loadWebMedia, maybePersistResolvedTelegramTarget, + probeVideoDimensions, } = getTelegramSendTestMocks(); const { buildInlineKeyboard, @@ -978,6 +979,73 @@ describe("sendMessageTelegram", () => { } }); + it("passes probed dimensions to regular video sends", async () => { + const chatId = "123"; + const videoBuffer = Buffer.from("fake-video"); + const sendVideo = vi.fn().mockResolvedValue({ + message_id: 201, + chat: { id: chatId }, + }); + const api = { sendVideo } as unknown as { + sendVideo: typeof sendVideo; + }; + probeVideoDimensions.mockResolvedValueOnce({ width: 720, height: 1280 }); + + mockLoadedMedia({ + buffer: videoBuffer, + contentType: "video/mp4", + fileName: "video.mp4", + }); + + await sendMessageTelegram(chatId, "my caption", { + cfg: TELEGRAM_TEST_CFG, + token: "tok", + api, + mediaUrl: "https://example.com/video.mp4", + }); + + expect(probeVideoDimensions).toHaveBeenCalledWith(videoBuffer); + expect(sendVideo).toHaveBeenCalledWith(chatId, expect.anything(), { + caption: "my caption", + parse_mode: "HTML", + width: 720, + height: 1280, + }); + }); + + it("does not probe video dimensions for video notes", async () => { + const chatId = "123"; + const sendVideoNote = vi.fn().mockResolvedValue({ + message_id: 101, + chat: { id: chatId }, + }); + const sendMessage = vi.fn().mockResolvedValue({ + message_id: 102, + chat: { id: chatId }, + }); + const api = { sendVideoNote, sendMessage } as unknown as { + sendVideoNote: typeof sendVideoNote; + sendMessage: typeof sendMessage; + }; + + mockLoadedMedia({ + buffer: Buffer.from("fake-video"), + contentType: "video/mp4", + fileName: "video.mp4", + }); + + await sendMessageTelegram(chatId, "ignored caption context", { + cfg: TELEGRAM_TEST_CFG, + token: "tok", + api, + mediaUrl: "https://example.com/video.mp4", + asVideoNote: true, + }); + + expect(probeVideoDimensions).not.toHaveBeenCalled(); + expect(sendVideoNote).toHaveBeenCalledWith(chatId, expect.anything(), {}); + }); + it("applies reply markup and thread options to split video-note sends", async () => { const chatId = "123"; const cases: Array<{ @@ -1195,6 +1263,7 @@ describe("sendMessageTelegram", () => { caption: "caption", parse_mode: "HTML", }); + expect(probeVideoDimensions).not.toHaveBeenCalled(); expect(res.messageId).toBe("9"); }); diff --git a/extensions/telegram/src/send.ts b/extensions/telegram/src/send.ts index 7e3552ae94b..7b2b7a6015e 100644 --- a/extensions/telegram/src/send.ts +++ b/extensions/telegram/src/send.ts @@ -36,6 +36,7 @@ import { loadWebMedia, type MediaKind, normalizePollInput, + probeVideoDimensions, type OpenClawConfig, type PollInput, requireRuntimeConfig, @@ -821,10 +822,13 @@ export async function sendMessageTelegram( ...(hasThreadParams ? threadParams : {}), ...(!needsSeparateText && replyMarkup ? { reply_markup: replyMarkup } : {}), }; + const videoDimensions = + kind === "video" && !isVideoNote ? await probeVideoDimensions(media.buffer) : undefined; const mediaParams = { ...(htmlCaption ? { caption: htmlCaption, parse_mode: "HTML" as const } : {}), ...baseMediaParams, ...(opts.silent === true ? { disable_notification: true } : {}), + ...(videoDimensions ? { width: videoDimensions.width, height: videoDimensions.height } : {}), }; const sendMedia = async ( label: string, diff --git a/src/media/ffmpeg-exec.ts b/src/media/ffmpeg-exec.ts index f31b79e01eb..072176584c5 100644 --- a/src/media/ffmpeg-exec.ts +++ b/src/media/ffmpeg-exec.ts @@ -13,6 +13,7 @@ const execFileAsync = promisify(execFile); export type MediaExecOptions = { timeoutMs?: number; maxBufferBytes?: number; + input?: Buffer | string; }; function resolveExecOptions( @@ -41,12 +42,22 @@ function requireSystemBin(name: string): string { } export async function runFfprobe(args: string[], options?: MediaExecOptions): Promise { - const { stdout } = await execFileAsync( - requireSystemBin("ffprobe"), - args, - resolveExecOptions(MEDIA_FFPROBE_TIMEOUT_MS, options), - ); - return stdout.toString(); + const execOptions = resolveExecOptions(MEDIA_FFPROBE_TIMEOUT_MS, options); + if (options?.input == null) { + const { stdout } = await execFileAsync(requireSystemBin("ffprobe"), args, execOptions); + return stdout.toString(); + } + + return await new Promise((resolve, reject) => { + const proc = execFile(requireSystemBin("ffprobe"), args, execOptions, (err, stdout) => { + if (err) { + reject(err); + return; + } + resolve(stdout.toString()); + }); + proc.stdin?.end(options.input); + }); } export async function runFfmpeg(args: string[], options?: MediaExecOptions): Promise { diff --git a/src/media/video-dimensions.test.ts b/src/media/video-dimensions.test.ts new file mode 100644 index 00000000000..a7e09a380c9 --- /dev/null +++ b/src/media/video-dimensions.test.ts @@ -0,0 +1,61 @@ +import { describe, expect, it, vi } from "vitest"; + +const { runFfprobe } = vi.hoisted(() => ({ + runFfprobe: vi.fn(), +})); + +vi.mock("./ffmpeg-exec.js", () => ({ + runFfprobe, +})); + +const { parseFfprobeVideoDimensions, probeVideoDimensions } = await import("./video-dimensions.js"); + +describe("parseFfprobeVideoDimensions", () => { + it("returns positive integer dimensions from ffprobe JSON", () => { + expect( + parseFfprobeVideoDimensions(JSON.stringify({ streams: [{ width: 720, height: 1280 }] })), + ).toEqual({ width: 720, height: 1280 }); + }); + + it("ignores missing or invalid dimensions", () => { + expect(parseFfprobeVideoDimensions(JSON.stringify({ streams: [] }))).toBeUndefined(); + expect( + parseFfprobeVideoDimensions(JSON.stringify({ streams: [{ width: 0, height: 1280 }] })), + ).toBeUndefined(); + expect( + parseFfprobeVideoDimensions(JSON.stringify({ streams: [{ width: 720.5, height: 1280 }] })), + ).toBeUndefined(); + }); +}); + +describe("probeVideoDimensions", () => { + it("probes video dimensions through ffprobe stdin", async () => { + const buffer = Buffer.from("video"); + runFfprobe.mockResolvedValueOnce(JSON.stringify({ streams: [{ width: 720, height: 1280 }] })); + + await expect(probeVideoDimensions(buffer)).resolves.toEqual({ width: 720, height: 1280 }); + + expect(runFfprobe).toHaveBeenCalledWith( + [ + "-v", + "error", + "-select_streams", + "v:0", + "-show_entries", + "stream=width,height", + "-of", + "json", + "pipe:0", + ], + { input: buffer }, + ); + }); + + it("falls back when ffprobe fails or returns malformed output", async () => { + runFfprobe.mockRejectedValueOnce(new Error("missing ffprobe")); + await expect(probeVideoDimensions(Buffer.from("video"))).resolves.toBeUndefined(); + + runFfprobe.mockResolvedValueOnce("{"); + await expect(probeVideoDimensions(Buffer.from("video"))).resolves.toBeUndefined(); + }); +}); diff --git a/src/media/video-dimensions.ts b/src/media/video-dimensions.ts new file mode 100644 index 00000000000..fdc797e6d32 --- /dev/null +++ b/src/media/video-dimensions.ts @@ -0,0 +1,43 @@ +import { runFfprobe } from "./ffmpeg-exec.js"; + +export type VideoDimensions = { + width: number; + height: number; +}; + +function parsePositiveDimension(value: unknown): number | undefined { + if (typeof value !== "number" || !Number.isInteger(value) || value <= 0) { + return undefined; + } + return value; +} + +export function parseFfprobeVideoDimensions(stdout: string): VideoDimensions | undefined { + const parsed = JSON.parse(stdout) as { streams?: Array<{ width?: unknown; height?: unknown }> }; + const stream = parsed.streams?.[0]; + const width = parsePositiveDimension(stream?.width); + const height = parsePositiveDimension(stream?.height); + return width && height ? { width, height } : undefined; +} + +export async function probeVideoDimensions(buffer: Buffer): Promise { + try { + const stdout = await runFfprobe( + [ + "-v", + "error", + "-select_streams", + "v:0", + "-show_entries", + "stream=width,height", + "-of", + "json", + "pipe:0", + ], + { input: buffer }, + ); + return parseFfprobeVideoDimensions(stdout); + } catch { + return undefined; + } +} diff --git a/src/plugin-sdk/media-runtime.ts b/src/plugin-sdk/media-runtime.ts index 9276333a607..400031063de 100644 --- a/src/plugin-sdk/media-runtime.ts +++ b/src/plugin-sdk/media-runtime.ts @@ -20,6 +20,7 @@ export * from "../media/qr-terminal.ts"; export * from "../media/read-response-with-limit.js"; export * from "../media/store.js"; export * from "../media/temp-files.js"; +export * from "../media/video-dimensions.js"; export { resolveChannelMediaMaxBytes } from "../channels/plugins/media-limits.js"; export * from "./agent-media-payload.js"; export * from "../media-understanding/audio-preflight.ts";