From b2f51e42e16bb9bddad4aebecab357cd4c996579 Mon Sep 17 00:00:00 2001 From: rianli Date: Fri, 6 Mar 2026 01:08:28 +0800 Subject: [PATCH] feat: enhance media handling with upload cache, video support, and retry - Add upload-cache.ts: skip duplicate uploads via content hash (like Telegram file_id) - Add media-tags.ts: auto-fix common LLM tag misspellings (qqimage->qqimg, etc.) - Add file-utils.ts: async file I/O, size validation (20MB limit) - Add tag support for video messages - Add upload retry with exponential backoff for network resilience - Add file_name param to upload APIs for proper file naming - Improve STT/TTS config with two-level fallback (channel -> framework) - Optimize voice handling: prefer voice_wav_url, skip SILK->WAV when possible - Filter large base64 from API logs; switch to async file I/O in outbound - Update SKILL.md and README docs --- README.md | 61 +++ README.zh.md | 61 +++ skills/qqbot-media/SKILL.md | 100 +++-- src/api.ts | 219 +++++++++- src/gateway.ts | 794 ++++++++++++++++++++++++++++-------- src/outbound.ts | 619 +++++++++++++++++++++++++--- src/types.ts | 30 ++ src/utils/audio-convert.ts | 291 ++++++++++++- src/utils/file-utils.ts | 122 ++++++ src/utils/media-tags.ts | 107 +++++ src/utils/upload-cache.ts | 128 ++++++ 11 files changed, 2256 insertions(+), 276 deletions(-) create mode 100644 src/utils/file-utils.ts create mode 100644 src/utils/media-tags.ts create mode 100644 src/utils/upload-cache.ts diff --git a/README.md b/README.md index 5a354b1..0b3c53c 100644 --- a/README.md +++ b/README.md @@ -164,6 +164,67 @@ Edit ~/.openclaw/openclaw.json: } ``` +## Voice Capabilities (Optional) + +### STT (Speech-to-Text) — Transcribe incoming voice messages + +STT reuses your existing model provider configuration. Add an audio model entry in `tools.media.audio.models`: + +``` json +{ + "tools": { + "media": { + "audio": { + "models": [ + { + "provider": "openai", + "model": "whisper-1" + } + ] + } + } + }, + "models": { + "providers": { + "openai": { + "baseUrl": "https://api.openai.com/v1", + "apiKey": "sk-xxx" + } + } + } +} +``` + +- `provider` — references a key in `models.providers` to inherit `baseUrl` and `apiKey` (default: `"openai"`) +- `model` — STT model name (default: `"whisper-1"`) +- You can also set `baseUrl` / `apiKey` directly in the audio model entry to override the provider defaults +- When configured, incoming voice messages are automatically converted (SILK→WAV) and transcribed + +### TTS (Text-to-Speech) — Send voice messages + +Configure TTS under `channels.qqbot.tts`: + +``` json +{ + "channels": { + "qqbot": { + "tts": { + "provider": "openai", + "model": "tts-1", + "voice": "alloy" + } + } + } +} +``` + +- `provider` — references a key in `models.providers` to inherit `baseUrl` and `apiKey` (default: `"openai"`) +- `model` — TTS model name (default: `"tts-1"`) +- `voice` — voice variant (default: `"alloy"`) +- `baseUrl` / `apiKey` — optional overrides for the provider defaults +- `enabled` — set to `false` to disable (default: `true`) +- When configured, the AI can use `` tags to generate and send voice messages via OpenAI-compatible TTS API + # Step 4: Start and Test ## 1. Start the gateway diff --git a/README.zh.md b/README.zh.md index 4d7920c..8f7e1fc 100644 --- a/README.zh.md +++ b/README.zh.md @@ -99,6 +99,67 @@ openclaw channels add --channel qqbot --token "AppID:AppSecret" } ``` +## 语音能力配置(可选) + +### STT(语音转文字)— 自动转录用户发来的语音消息 + +STT 复用已有的模型 provider 配置,在 `tools.media.audio.models` 中添加音频模型条目: + +``` json +{ + "tools": { + "media": { + "audio": { + "models": [ + { + "provider": "openai", + "model": "whisper-1" + } + ] + } + } + }, + "models": { + "providers": { + "openai": { + "baseUrl": "https://api.openai.com/v1", + "apiKey": "sk-xxx" + } + } + } +} +``` + +- `provider` — 引用 `models.providers` 中的 key,继承 `baseUrl` 和 `apiKey`(默认:`"openai"`) +- `model` — STT 模型名称(默认:`"whisper-1"`) +- 也可在音频模型条目中直接设置 `baseUrl` / `apiKey` 来覆盖 provider 默认值 +- 配置后,用户发来的语音消息会自动转换(SILK→WAV)并转录为文字 + +### TTS(文字转语音)— 机器人发送语音消息 + +在 `channels.qqbot.tts` 下配置 TTS: + +``` json +{ + "channels": { + "qqbot": { + "tts": { + "provider": "openai", + "model": "tts-1", + "voice": "alloy" + } + } + } +} +``` + +- `provider` — 引用 `models.providers` 中的 key,继承 `baseUrl` 和 `apiKey`(默认:`"openai"`) +- `model` — TTS 模型名称(默认:`"tts-1"`) +- `voice` — 语音音色(默认:`"alloy"`) +- `baseUrl` / `apiKey` — 可选,覆盖 provider 默认值 +- `enabled` — 设为 `false` 可禁用(默认:`true`) +- 配置后,AI 可使用 `` 标签通过 OpenAI 兼容 TTS API 生成并发送语音消息 + # 步骤4:启动与测试 ## 1.启动gateway diff --git a/skills/qqbot-media/SKILL.md b/skills/qqbot-media/SKILL.md index 0ff6dc0..f995f55 100644 --- a/skills/qqbot-media/SKILL.md +++ b/skills/qqbot-media/SKILL.md @@ -1,36 +1,21 @@ --- name: qqbot-media -description: QQ Bot 媒体发送指南。教 AI 如何发送图片给用户。【重要】当用户要求发送图片时必须使用 标签。 -metadata: {"clawdbot":{"emoji":"📸"}} -triggers: - - qqbot - - qq - - 发送图片 - - 发送文件 - - 发图 - - 发给我 - - 给我发 - - 图片 - - 本地文件 - - 本地图片 - - 生成的图 - - 画的图 - - 那张图 - - 上面的图 - - 刚才的图 - - 之前的图 - - 这张图 - - 那个图 - - png - - jpg - - jpeg - - gif - - 截图 - - 照片 -priority: 90 +description: QQBot 图片/语音/视频/文件收发能力。用户发来的图片自动下载到本地,发送图片使用 标签,发送语音使用 标签,发送视频使用 标签,发送文件使用 标签。当通过 QQ 通道通信时使用此技能。 +metadata: {"openclaw":{"emoji":"📸","requires":{"config":["channels.qqbot"]}}} --- -# QQBot 媒体发送指南 +# QQBot 图片/语音/视频/文件收发 + +## 标签速查(直接复制使用) + +| 类型 | 标签格式 | 示例 | +|------|----------|------| +| 图片 | `绝对路径或URL` | `/tmp/pic.jpg` | +| 语音 | `绝对路径` | `/tmp/voice.mp3` | +| 视频 | `绝对路径或URL` | `/tmp/video.mp4` | +| 文件 | `绝对路径或URL` | `/tmp/doc.pdf` | + +**标签拼写必须严格按上表**,只有这 4 个标签名:`qqimg`、`qqvoice`、`qqvideo`、`qqfile`。 ## ⚠️ 重要:你有能力发送本地图片! @@ -74,7 +59,37 @@ priority: 90 https://example.com/image.png ``` -### ✅ 发送多张图片 +支持格式:jpg, jpeg, png, gif, webp, bmp。支持 `` 或 `` 闭合。 + +## 接收图片 + +用户发来的图片**自动下载到本地**,路径在上下文【会话上下文 → 附件】中。 +可直接用 `路径` 回发。历史图片在 `~/.openclaw/qqbot/downloads/` 下。 + +## 发送语音 + +使用 `` 标签包裹**已有的本地音频文件路径**即可发送语音: + +``` +/tmp/tts/voice.mp3 +``` + +注意:语音发送需要有可用的音频文件(通常由 TTS 工具生成)。**如果会话上下文中的【语音消息说明】提示 TTS 未配置,则不要使用 `` 标签。** + +## 发送视频 + +使用 `` 标签包裹**视频路径或公网 URL** 即可发送视频: + +``` +/path/to/video.mp4 +https://example.com/video.mp4 +``` + +支持本地文件路径(系统自动读取上传)和公网 HTTP/HTTPS URL。 + +## 发送文件 + +使用 `` 标签包裹路径即可发送文件(本地路径或网络 URL): ``` 这是你要的所有图片: @@ -84,13 +99,26 @@ priority: 90 ### 📝 标签说明 -| 格式 | 说明 | -|------|------| -| `本地路径` | 发送本地图片(绝对路径) | -| `网络URL` | 发送网络图片 | -| `路径` | 也支持此闭合方式 | +## ⚠️ 关键注意事项(必须遵守) -### ⚠️ 注意事项 +1. **必须使用绝对路径**:标签内的路径必须是绝对路径(以 `/` 开头),禁止使用相对路径如 `./pic.jpg` + - ❌ 错误:`./pic.jpg` + - ✅ 正确:`/Users/james23/.openclaw/workspace/pic.jpg` +2. **标签格式必须完整**:`` 开头和 `` 结尾都不能少,不能漏掉 `<` 符号 + - ❌ 错误:`qqimg>./pic.jpg` + - ✅ 正确:`/absolute/path/to/pic.jpg` +3. **工作空间路径**:当前工作空间为 `/Users/james23/.openclaw/workspace/`,文件路径应基于此拼接绝对路径 +4. **标签必须单独成行或前后有空格**,不要嵌入在句子中间 +5. **文件大小限制**:上传文件(图片、语音、视频、文件)最大不超过 **20MB** + +## 规则 + +- ⚠️ **禁止使用 message tool 发送图片/文件**,直接在回复文本中写对应标签即可,系统自动处理 +- **永远不要说**"无法发送图片"或"无法访问之前的图片" +- 直接使用对应标签,不要只输出路径文本 +- 标签外的文字会作为消息正文一起发送 +- 多个媒体使用多个标签,图片用 ``,语音用 ``,视频用 ``,文件用 `` +- **以会话上下文中的能力说明为准**,如果提示语音未启用,不要尝试发送语音 1. **路径必须正确**:本地文件需要绝对路径,网络图片需要完整 URL 2. **支持的图片格式**:jpg, jpeg, png, gif, webp, bmp diff --git a/src/api.ts b/src/api.ts index e183c5e..041fe72 100644 --- a/src/api.ts +++ b/src/api.ts @@ -2,6 +2,8 @@ * QQ Bot API 鉴权和请求封装 */ +import { computeFileHash, getCachedFileInfo, setCachedFileInfo } from "./utils/upload-cache.js"; + const API_BASE = "https://api.sgroup.qq.com"; const TOKEN_URL = "https://bots.qq.com/app/getAppAccessToken"; @@ -220,7 +222,12 @@ export async function apiRequest( console.log(`[qqbot-api] >>> ${method} ${url} (timeout: ${timeout}ms)`); console.log(`[qqbot-api] >>> Headers:`, JSON.stringify(headers, null, 2)); if (body) { - console.log(`[qqbot-api] >>> Body:`, JSON.stringify(body, null, 2)); + // 过滤 file_data 等大二进制字段,避免刷屏 + const logBody = { ...body } as Record; + if (typeof logBody.file_data === "string") { + logBody.file_data = ``; + } + console.log(`[qqbot-api] >>> Body:`, JSON.stringify(logBody, null, 2)); } let res: Response; @@ -265,6 +272,48 @@ export async function apiRequest( return data; } +// ============ 上传重试(指数退避) ============ + +/** 上传重试配置 */ +const UPLOAD_MAX_RETRIES = 2; +const UPLOAD_BASE_DELAY_MS = 1000; // 首次重试等待 1 秒 + +/** + * 带指数退避重试的 API 请求 + * 仅用于上传类请求(/files),普通请求不重试 + */ +async function apiRequestWithRetry( + accessToken: string, + method: string, + path: string, + body?: unknown, + maxRetries = UPLOAD_MAX_RETRIES, +): Promise { + let lastError: Error | null = null; + + for (let attempt = 0; attempt <= maxRetries; attempt++) { + try { + return await apiRequest(accessToken, method, path, body); + } catch (err) { + lastError = err instanceof Error ? err : new Error(String(err)); + + // 不对以下错误重试:参数错误(400)、鉴权错误(401)、格式错误 + const errMsg = lastError.message; + if (errMsg.includes("400") || errMsg.includes("401") || errMsg.includes("Invalid")) { + throw lastError; + } + + if (attempt < maxRetries) { + const delay = UPLOAD_BASE_DELAY_MS * Math.pow(2, attempt); // 1s, 2s + console.log(`[qqbot-api] Upload attempt ${attempt + 1} failed, retrying in ${delay}ms: ${errMsg.slice(0, 100)}`); + await new Promise(resolve => setTimeout(resolve, delay)); + } + } + } + + throw lastError!; +} + /** * 获取 WebSocket Gateway URL */ @@ -466,8 +515,14 @@ export interface UploadMediaResponse { /** * 上传富媒体文件到 C2C 单聊 + * + * 改进: + * 1. file_info 缓存 — 相同文件不重复上传(借鉴 Telegram file_id) + * 2. 指数退避重试 — 网络波动时自动重试最多 2 次 + * * @param url - 公网可访问的图片 URL(与 fileData 二选一) * @param fileData - Base64 编码的文件内容(与 url 二选一) + * @param fileName - 文件名(file_type=FILE 时必传,例如 "readme.md") */ export async function uploadC2CMedia( accessToken: string, @@ -475,12 +530,23 @@ export async function uploadC2CMedia( fileType: MediaFileType, url?: string, fileData?: string, - srvSendMsg = false + srvSendMsg = false, + fileName?: string, ): Promise { if (!url && !fileData) { throw new Error("uploadC2CMedia: url or fileData is required"); } + // 缓存查询:如果有 fileData,用内容 hash 查缓存 + if (fileData) { + const contentHash = computeFileHash(fileData); + const cachedInfo = getCachedFileInfo(contentHash, "c2c", openid, fileType); + if (cachedInfo) { + console.log(`[qqbot-api] uploadC2CMedia: using cached file_info (skip upload)`); + return { file_uuid: "", file_info: cachedInfo, ttl: 0 }; + } + } + const body: Record = { file_type: fileType, srv_send_msg: srvSendMsg, @@ -491,14 +557,33 @@ export async function uploadC2CMedia( } else if (fileData) { body.file_data = fileData; } + + if (fileType === MediaFileType.FILE && fileName) { + body.file_name = fileName; + } - return apiRequest(accessToken, "POST", `/v2/users/${openid}/files`, body); + // 使用带重试的请求 + const result = await apiRequestWithRetry( + accessToken, "POST", `/v2/users/${openid}/files`, body + ); + + // 写入缓存 + if (fileData && result.file_info && result.ttl > 0) { + const contentHash = computeFileHash(fileData); + setCachedFileInfo(contentHash, "c2c", openid, fileType, result.file_info, result.file_uuid, result.ttl); + } + + return result; } /** * 上传富媒体文件到群聊 + * + * 改进:同 uploadC2CMedia + * * @param url - 公网可访问的图片 URL(与 fileData 二选一) * @param fileData - Base64 编码的文件内容(与 url 二选一) + * @param fileName - 文件名(file_type=FILE 时必传,例如 "readme.md") */ export async function uploadGroupMedia( accessToken: string, @@ -506,12 +591,23 @@ export async function uploadGroupMedia( fileType: MediaFileType, url?: string, fileData?: string, - srvSendMsg = false + srvSendMsg = false, + fileName?: string, ): Promise { if (!url && !fileData) { throw new Error("uploadGroupMedia: url or fileData is required"); } + // 缓存查询 + if (fileData) { + const contentHash = computeFileHash(fileData); + const cachedInfo = getCachedFileInfo(contentHash, "group", groupOpenid, fileType); + if (cachedInfo) { + console.log(`[qqbot-api] uploadGroupMedia: using cached file_info (skip upload)`); + return { file_uuid: "", file_info: cachedInfo, ttl: 0 }; + } + } + const body: Record = { file_type: fileType, srv_send_msg: srvSendMsg, @@ -522,8 +618,23 @@ export async function uploadGroupMedia( } else if (fileData) { body.file_data = fileData; } + + if (fileType === MediaFileType.FILE && fileName) { + body.file_name = fileName; + } - return apiRequest(accessToken, "POST", `/v2/groups/${groupOpenid}/files`, body); + // 使用带重试的请求 + const result = await apiRequestWithRetry( + accessToken, "POST", `/v2/groups/${groupOpenid}/files`, body + ); + + // 写入缓存 + if (fileData && result.file_info && result.ttl > 0) { + const contentHash = computeFileHash(fileData); + setCachedFileInfo(contentHash, "group", groupOpenid, fileType, result.file_info, result.file_uuid, result.ttl); + } + + return result; } /** @@ -634,6 +745,104 @@ export async function sendGroupImageMessage( return sendGroupMediaMessage(accessToken, groupOpenid, uploadResult.file_info, msgId, content); } +/** + * 发送 C2C 单聊语音消息(封装上传+发送) + * @param voiceBase64 - SILK 格式语音的 Base64 编码 + */ +export async function sendC2CVoiceMessage( + accessToken: string, + openid: string, + voiceBase64: string, + msgId?: string, +): Promise<{ id: string; timestamp: number }> { + const uploadResult = await uploadC2CMedia(accessToken, openid, MediaFileType.VOICE, undefined, voiceBase64, false); + return sendC2CMediaMessage(accessToken, openid, uploadResult.file_info, msgId); +} + +/** + * 发送群聊语音消息(封装上传+发送) + * @param voiceBase64 - SILK 格式语音的 Base64 编码 + */ +export async function sendGroupVoiceMessage( + accessToken: string, + groupOpenid: string, + voiceBase64: string, + msgId?: string, +): Promise<{ id: string; timestamp: string }> { + const uploadResult = await uploadGroupMedia(accessToken, groupOpenid, MediaFileType.VOICE, undefined, voiceBase64, false); + return sendGroupMediaMessage(accessToken, groupOpenid, uploadResult.file_info, msgId); +} + +/** + * 发送 C2C 单聊文件消息(封装上传+发送) + * @param fileBase64 - Base64 编码的文件内容 + * @param fileUrl - 公网可访问的文件 URL(与 fileBase64 二选一) + * @param fileName - 文件名(例如 "readme.md"),从本地路径自动提取 + */ +export async function sendC2CFileMessage( + accessToken: string, + openid: string, + fileBase64?: string, + fileUrl?: string, + msgId?: string, + fileName?: string, +): Promise<{ id: string; timestamp: number }> { + const uploadResult = await uploadC2CMedia(accessToken, openid, MediaFileType.FILE, fileUrl, fileBase64, false, fileName); + return sendC2CMediaMessage(accessToken, openid, uploadResult.file_info, msgId); +} + +/** + * 发送群聊文件消息(封装上传+发送) + * @param fileBase64 - Base64 编码的文件内容 + * @param fileUrl - 公网可访问的文件 URL(与 fileBase64 二选一) + * @param fileName - 文件名(例如 "readme.md"),从本地路径自动提取 + */ +export async function sendGroupFileMessage( + accessToken: string, + groupOpenid: string, + fileBase64?: string, + fileUrl?: string, + msgId?: string, + fileName?: string, +): Promise<{ id: string; timestamp: string }> { + const uploadResult = await uploadGroupMedia(accessToken, groupOpenid, MediaFileType.FILE, fileUrl, fileBase64, false, fileName); + return sendGroupMediaMessage(accessToken, groupOpenid, uploadResult.file_info, msgId); +} + +/** + * 发送 C2C 单聊视频消息(封装上传+发送) + * @param videoUrl - 公网可访问的视频 URL(与 videoBase64 二选一) + * @param videoBase64 - Base64 编码的视频内容(与 videoUrl 二选一) + */ +export async function sendC2CVideoMessage( + accessToken: string, + openid: string, + videoUrl?: string, + videoBase64?: string, + msgId?: string, + content?: string +): Promise<{ id: string; timestamp: number }> { + const uploadResult = await uploadC2CMedia(accessToken, openid, MediaFileType.VIDEO, videoUrl, videoBase64, false); + return sendC2CMediaMessage(accessToken, openid, uploadResult.file_info, msgId, content); +} + +/** + * 发送群聊视频消息(封装上传+发送) + * @param videoUrl - 公网可访问的视频 URL(与 videoBase64 二选一) + * @param videoBase64 - Base64 编码的视频内容(与 videoUrl 二选一) + */ +export async function sendGroupVideoMessage( + accessToken: string, + groupOpenid: string, + videoUrl?: string, + videoBase64?: string, + msgId?: string, + content?: string +): Promise<{ id: string; timestamp: string }> { + const uploadResult = await uploadGroupMedia(accessToken, groupOpenid, MediaFileType.VIDEO, videoUrl, videoBase64, false); + return sendGroupMediaMessage(accessToken, groupOpenid, uploadResult.file_info, msgId, content); +} + // ============ 后台 Token 刷新 (P1-1) ============ /** diff --git a/src/gateway.ts b/src/gateway.ts index 39f2e64..a4caa1a 100644 --- a/src/gateway.ts +++ b/src/gateway.ts @@ -2,14 +2,98 @@ import WebSocket from "ws"; import path from "node:path"; import * as fs from "node:fs"; import type { ResolvedQQBotAccount, WSPayload, C2CMessageEvent, GuildMessageEvent, GroupMessageEvent } from "./types.js"; -import { getAccessToken, getGatewayUrl, sendC2CMessage, sendChannelMessage, sendGroupMessage, clearTokenCache, sendC2CImageMessage, sendGroupImageMessage, initApiConfig, startBackgroundTokenRefresh, stopBackgroundTokenRefresh, sendC2CInputNotify } from "./api.js"; +import { getAccessToken, getGatewayUrl, sendC2CMessage, sendChannelMessage, sendGroupMessage, clearTokenCache, sendC2CImageMessage, sendGroupImageMessage, sendC2CVoiceMessage, sendGroupVoiceMessage, sendC2CVideoMessage, sendGroupVideoMessage, sendC2CFileMessage, sendGroupFileMessage, initApiConfig, startBackgroundTokenRefresh, stopBackgroundTokenRefresh, sendC2CInputNotify } from "./api.js"; import { loadSession, saveSession, clearSession, type SessionState } from "./session-store.js"; import { recordKnownUser, flushKnownUsers } from "./known-users.js"; import { getQQBotRuntime } from "./runtime.js"; import { startImageServer, isImageServerRunning, downloadFile, type ImageServerConfig } from "./image-server.js"; import { getImageSize, formatQQBotMarkdownImage, hasQQBotImageSize, DEFAULT_IMAGE_SIZE } from "./utils/image-size.js"; import { parseQQBotPayload, encodePayloadForCron, isCronReminderPayload, isMediaPayload, type CronReminderPayload, type MediaPayload } from "./utils/payload.js"; -import { convertSilkToWav, isVoiceAttachment, formatDuration } from "./utils/audio-convert.js"; +import { convertSilkToWav, isVoiceAttachment, formatDuration, resolveTTSConfig, textToSilk, audioFileToSilkBase64, waitForFile } from "./utils/audio-convert.js"; +import { normalizeMediaTags } from "./utils/media-tags.js"; +import { checkFileSize, readFileAsync, fileExistsAsync, isLargeFile, formatFileSize } from "./utils/file-utils.js"; + +/** + * 通用 OpenAI 兼容 STT(语音转文字) + * + * 为什么在插件侧做 STT 而不走框架管道? + * 框架的 applyMediaUnderstanding 同时执行 runCapability("audio") 和 extractFileBlocks。 + * 后者会把 WAV 文件的 PCM 二进制当文本注入 Body(looksLikeUtf8Text 误判),导致 context 爆炸。 + * 在插件侧完成 STT 后不把 WAV 放入 MediaPaths,即可规避此框架 bug。 + * + * 配置解析策略(与 TTS 统一的两级回退): + * 1. 优先 channels.qqbot.stt(插件专属配置) + * 2. 回退 tools.media.audio.models[0](框架级配置) + * 3. 再从 models.providers.[provider] 继承 apiKey/baseUrl + * 4. 支持任何 OpenAI 兼容的 STT 服务 + */ +interface STTConfig { + baseUrl: string; + apiKey: string; + model: string; +} + +function resolveSTTConfig(cfg: Record): STTConfig | null { + const c = cfg as any; + + // 优先使用 channels.qqbot.stt(插件专属配置) + const channelStt = c?.channels?.qqbot?.stt; + if (channelStt && channelStt.enabled !== false) { + const providerId: string = channelStt?.provider || "openai"; + const providerCfg = c?.models?.providers?.[providerId]; + const baseUrl: string | undefined = channelStt?.baseUrl || providerCfg?.baseUrl; + const apiKey: string | undefined = channelStt?.apiKey || providerCfg?.apiKey; + const model: string = channelStt?.model || "whisper-1"; + if (baseUrl && apiKey) { + return { baseUrl: baseUrl.replace(/\/+$/, ""), apiKey, model }; + } + } + + // 回退到 tools.media.audio.models[0](框架级配置) + const audioModelEntry = c?.tools?.media?.audio?.models?.[0]; + if (audioModelEntry) { + const providerId: string = audioModelEntry?.provider || "openai"; + const providerCfg = c?.models?.providers?.[providerId]; + const baseUrl: string | undefined = audioModelEntry?.baseUrl || providerCfg?.baseUrl; + const apiKey: string | undefined = audioModelEntry?.apiKey || providerCfg?.apiKey; + const model: string = audioModelEntry?.model || "whisper-1"; + if (baseUrl && apiKey) { + return { baseUrl: baseUrl.replace(/\/+$/, ""), apiKey, model }; + } + } + + return null; +} + +async function transcribeAudio(audioPath: string, cfg: Record): Promise { + const sttCfg = resolveSTTConfig(cfg); + if (!sttCfg) return null; + + const fileBuffer = fs.readFileSync(audioPath); + const fileName = path.basename(audioPath); + const mime = fileName.endsWith(".wav") ? "audio/wav" + : fileName.endsWith(".mp3") ? "audio/mpeg" + : fileName.endsWith(".ogg") ? "audio/ogg" + : "application/octet-stream"; + + const form = new FormData(); + form.append("file", new Blob([fileBuffer], { type: mime }), fileName); + form.append("model", sttCfg.model); + + const resp = await fetch(`${sttCfg.baseUrl}/audio/transcriptions`, { + method: "POST", + headers: { "Authorization": `Bearer ${sttCfg.apiKey}` }, + body: form, + }); + + if (!resp.ok) { + const detail = await resp.text().catch(() => ""); + throw new Error(`STT failed (HTTP ${resp.status}): ${detail.slice(0, 300)}`); + } + + const result = await resp.json() as { text?: string }; + return result.text?.trim() || null; +} // QQ Bot intents - 按权限级别分组 const INTENTS = { @@ -193,7 +277,7 @@ interface QueuedMessage { channelId?: string; guildId?: string; groupOpenid?: string; - attachments?: Array<{ content_type: string; url: string; filename?: string }>; + attachments?: Array<{ content_type: string; url: string; filename?: string; voice_wav_url?: string }>; } /** @@ -424,7 +508,7 @@ export async function startGateway(ctx: GatewayContext): Promise { channelId?: string; guildId?: string; groupOpenid?: string; - attachments?: Array<{ content_type: string; url: string; filename?: string }>; + attachments?: Array<{ content_type: string; url: string; filename?: string; voice_wav_url?: string }>; }) => { log?.debug?.(`[qqbot:${account.accountId}] Received message: ${JSON.stringify(event)}`); @@ -439,10 +523,24 @@ export async function startGateway(ctx: GatewayContext): Promise { direction: "inbound", }); - try{ - await sendC2CInputNotify(accessToken, event.senderId, event.messageId, 60); + // 发送输入状态提示(非关键,失败不影响主流程) + try { + let token = await getAccessToken(account.appId, account.clientSecret); + try { + await sendC2CInputNotify(token, event.senderId, event.messageId, 60); + } catch (notifyErr) { + const errMsg = String(notifyErr); + if (errMsg.includes("token") || errMsg.includes("401") || errMsg.includes("11244")) { + log?.info(`[qqbot:${account.accountId}] InputNotify token expired, refreshing...`); + clearTokenCache(); + token = await getAccessToken(account.appId, account.clientSecret); + await sendC2CInputNotify(token, event.senderId, event.messageId, 60); + } else { + throw notifyErr; + } + } log?.info(`[qqbot:${account.accountId}] Sent input notify to ${event.senderId}`); - }catch(err){ + } catch (err) { log?.error(`[qqbot:${account.accountId}] sendC2CInputNotify error: ${err}`); } @@ -467,9 +565,8 @@ export async function startGateway(ctx: GatewayContext): Promise { // 静态系统提示已移至 skills/qqbot-cron/SKILL.md 和 skills/qqbot-media/SKILL.md // BodyForAgent 只保留必要的动态上下文信息 - // ============ 用户标识信息(用于定时提醒和主动消息) ============ + // ============ 用户标识信息 ============ const isGroupChat = event.type === "group"; - const targetAddress = isGroupChat ? `group:${event.groupOpenid}` : event.senderId; // 收集额外的系统提示(如果配置了账户级别的 systemPrompt) const systemPrompts: string[] = []; @@ -481,121 +578,120 @@ export async function startGateway(ctx: GatewayContext): Promise { let attachmentInfo = ""; const imageUrls: string[] = []; const imageMediaTypes: string[] = []; + const voiceTranscripts: string[] = []; // 存到 .openclaw/qqbot 目录下的 downloads 文件夹 const downloadDir = path.join(process.env.HOME || "/home/ubuntu", ".openclaw", "qqbot", "downloads"); if (event.attachments?.length) { - // ============ 接收附件描述生成(图片 / 语音 / 其他) ============ - const imageDescriptions: string[] = []; - const voiceDescriptions: string[] = []; const otherAttachments: string[] = []; for (const att of event.attachments) { - // 下载附件到本地,使用原始文件名 - const localPath = await downloadFile(att.url, downloadDir, att.filename); + // 修复 QQ 返回的 // 前缀 URL + const attUrl = att.url?.startsWith("//") ? `https:${att.url}` : att.url; + + // 语音附件:优先下载 WAV(voice_wav_url),减少 SILK→WAV 转换 + const isVoice = isVoiceAttachment(att); + let localPath: string | null = null; + let audioPath: string | null = null; // 用于 STT 的音频路径 + + if (isVoice && att.voice_wav_url) { + const wavUrl = att.voice_wav_url.startsWith("//") ? `https:${att.voice_wav_url}` : att.voice_wav_url; + const wavLocalPath = await downloadFile(wavUrl, downloadDir); + if (wavLocalPath) { + localPath = wavLocalPath; + audioPath = wavLocalPath; + log?.info(`[qqbot:${account.accountId}] Voice attachment: ${att.filename}, downloaded WAV directly (skip SILK→WAV)`); + } else { + log?.error(`[qqbot:${account.accountId}] Failed to download voice_wav_url, falling back to original URL`); + } + } + + // WAV 下载失败或不是语音附件:下载原始文件 + if (!localPath) { + localPath = await downloadFile(attUrl, downloadDir, att.filename); + } + if (localPath) { if (att.content_type?.startsWith("image/")) { imageUrls.push(localPath); imageMediaTypes.push(att.content_type); - - // 构建自然语言描述(根据需求 4.2) - const format = att.content_type?.split("/")[1] || "未知格式"; - const timestamp = new Date().toLocaleString("zh-CN", { timeZone: "Asia/Shanghai" }); - - imageDescriptions.push(` -用户发送了一张图片: -- 图片地址:${localPath} -- 图片格式:${format} -- 消息ID:${event.messageId} -- 发送时间:${timestamp} - -请根据图片内容进行回复。`); - } else if (isVoiceAttachment(att)) { - // ============ 语音消息处理:SILK → WAV ============ - log?.info(`[qqbot:${account.accountId}] Voice attachment detected: ${att.filename}, converting SILK to WAV...`); - try { - const result = await convertSilkToWav(localPath, downloadDir); - if (result) { - const durationStr = formatDuration(result.duration); - log?.info(`[qqbot:${account.accountId}] Voice converted: ${result.wavPath} (duration: ${durationStr})`); - - const timestamp = new Date().toLocaleString("zh-CN", { timeZone: "Asia/Shanghai" }); - voiceDescriptions.push(` -用户发送了一条语音消息: -- 语音文件:${result.wavPath} -- 语音时长:${durationStr} -- 发送时间:${timestamp}`); - } else { - // SILK 解码失败,保留原始文件 - log?.info(`[qqbot:${account.accountId}] Voice file is not SILK format, keeping original: ${localPath}`); - voiceDescriptions.push(` -用户发送了一条语音消息(非SILK格式,无法转换): -- 语音文件:${localPath} -- 原始格式:${att.filename || "unknown"} -- 消息ID:${event.messageId} - -请告知用户该语音格式暂不支持解析。`); + } else if (isVoice) { + // 语音消息处理:先检查 STT 是否可用,避免无意义的转换开销 + const sttCfg = resolveSTTConfig(cfg as Record); + if (!sttCfg) { + log?.info(`[qqbot:${account.accountId}] Voice attachment: ${att.filename} (STT not configured, skipping transcription)`); + voiceTranscripts.push("[语音消息 - 语音识别未配置,无法转录]"); + } else { + // 如果还没有 WAV 路径(voice_wav_url 不可用),需要 SILK→WAV 转换 + if (!audioPath) { + const sttFormats = account.config?.audioFormatPolicy?.sttDirectFormats; + log?.info(`[qqbot:${account.accountId}] Voice attachment: ${att.filename}, converting SILK→WAV...`); + try { + const wavResult = await convertSilkToWav(localPath, downloadDir, sttFormats); + if (wavResult) { + audioPath = wavResult.wavPath; + log?.info(`[qqbot:${account.accountId}] Voice converted: ${wavResult.wavPath} (${formatDuration(wavResult.duration)})`); + } else { + audioPath = localPath; // 转换失败,尝试用原始文件 + } + } catch (convertErr) { + log?.error(`[qqbot:${account.accountId}] Voice conversion failed: ${convertErr}`); + voiceTranscripts.push("[语音消息 - 格式转换失败]"); + continue; + } } - } catch (convertErr) { - log?.error(`[qqbot:${account.accountId}] Voice conversion failed: ${convertErr}`); - voiceDescriptions.push(` -用户发送了一条语音消息(转换失败): -- 原始文件:${localPath} -- 错误信息:${convertErr} -- 消息ID:${event.messageId} -请告知用户语音处理出现问题。`); + // STT 转录 + try { + const transcript = await transcribeAudio(audioPath!, cfg as Record); + if (transcript) { + log?.info(`[qqbot:${account.accountId}] STT transcript: ${transcript.slice(0, 100)}...`); + voiceTranscripts.push(transcript); + } else { + log?.info(`[qqbot:${account.accountId}] STT returned empty result`); + voiceTranscripts.push("[语音消息 - 转录结果为空]"); + } + } catch (sttErr) { + log?.error(`[qqbot:${account.accountId}] STT failed: ${sttErr}`); + voiceTranscripts.push("[语音消息 - 转录失败]"); + } } } else { otherAttachments.push(`[附件: ${localPath}]`); } log?.info(`[qqbot:${account.accountId}] Downloaded attachment to: ${localPath}`); } else { - // 下载失败,提供原始 URL 作为后备 - log?.error(`[qqbot:${account.accountId}] Failed to download attachment: ${att.url}`); + // 下载失败,fallback 到原始 URL + log?.error(`[qqbot:${account.accountId}] Failed to download: ${attUrl}`); if (att.content_type?.startsWith("image/")) { - imageUrls.push(att.url); + imageUrls.push(attUrl); imageMediaTypes.push(att.content_type); - - // 下载失败时的自然语言描述 - const format = att.content_type?.split("/")[1] || "未知格式"; - const timestamp = new Date().toLocaleString("zh-CN", { timeZone: "Asia/Shanghai" }); - - imageDescriptions.push(` -用户发送了一张图片(下载失败,使用原始URL): -- 图片地址:${att.url} -- 图片格式:${format} -- 消息ID:${event.messageId} -- 发送时间:${timestamp} - -请根据图片内容进行回复。`); } else { otherAttachments.push(`[附件: ${att.filename ?? att.content_type}] (下载失败)`); } } } - // 组合附件信息:先图片描述,后语音描述,后其他附件 - if (imageDescriptions.length > 0) { - attachmentInfo += "\n" + imageDescriptions.join("\n"); - } - if (voiceDescriptions.length > 0) { - attachmentInfo += "\n" + voiceDescriptions.join("\n"); - } if (otherAttachments.length > 0) { attachmentInfo += "\n" + otherAttachments.join("\n"); } } - // 解析 QQ 表情标签,将 替换为 【表情: 中文名】 - const parsedContent = parseFaceTags(event.content); - const userContent = parsedContent + attachmentInfo; - let messageBody = `【系统提示】\n${systemPrompts.join("\n")}\n\n【用户输入】\n${userContent}`; - - if(userContent.startsWith("/")){ // 保留Openclaw原始命令 - messageBody = userContent + // 语音转录文本注入到用户消息中 + let voiceText = ""; + if (voiceTranscripts.length > 0) { + voiceText = voiceTranscripts.length === 1 + ? `[语音消息] ${voiceTranscripts[0]}` + : voiceTranscripts.map((t, i) => `[语音${i + 1}] ${t}`).join("\n"); } + // 解析 QQ 表情标签,将 替换为 【表情: 中文名】 + const parsedContent = parseFaceTags(event.content); + const userContent = voiceText + ? (parsedContent.trim() ? `${parsedContent}\n${voiceText}` : voiceText) + attachmentInfo + : parsedContent + attachmentInfo; + + // Body: 展示用的用户原文(Web UI 看到的) const body = pluginRuntime.channel.reply.formatInboundEnvelope({ channel: "qqbot", from: event.senderName ?? event.senderId, @@ -607,61 +703,83 @@ export async function startGateway(ctx: GatewayContext): Promise { name: event.senderName, }, envelope: envelopeOptions, - // 传递图片 URL 列表 ...(imageUrls.length > 0 ? { imageUrls } : {}), }); - // AI 可见的完整上下文(简洁的动态信息 + 用户消息) - // 静态能力说明已通过 skills 加载,这里只提供必要的运行时上下文 - // 📌 关键:直接注入图片发送说明,确保 AI 知道如何发送图片 + // BodyForAgent: AI 实际看到的完整上下文(动态数据 + 系统提示 + 用户输入) const nowMs = Date.now(); + + // 构建媒体附件纯数据描述(图片 + 语音统一列出) + let receivedMediaSection = ""; + if (imageUrls.length > 0) { + const entries = imageUrls.map((p, i) => ` - ${p} (${imageMediaTypes[i] || "unknown"})`); + receivedMediaSection = `\n- 附件:\n${entries.join("\n")}`; + } + + // AI 看到的投递地址必须带完整前缀(qqbot:c2c: / qqbot:group:) + const qualifiedTarget = isGroupChat ? `qqbot:group:${event.groupOpenid}` : `qqbot:c2c:${event.senderId}`; + + // 动态检测 TTS/STT 配置状态 + const hasTTS = !!resolveTTSConfig(cfg as Record); + const hasSTT = !!resolveSTTConfig(cfg as Record); + + // 语音能力说明: 标签本身只负责发送已有的音频文件,不依赖插件 TTS。 + // TTS 只是生成音频文件的一种方式,框架侧的 TTS 工具(如 audio_speech)也能生成。 + // 因此始终暴露 能力,但根据 TTS 状态给出不同的使用指引。 + const ttsHint = hasTTS + ? `6. 🎤 插件 TTS 已启用: 如果你有 TTS 工具(如 audio_speech),可用它生成音频文件后用 发送` + : `6. ⚠️ 插件 TTS 未配置: 如果你有 TTS 工具(如 audio_speech),仍可用它生成音频文件后用 发送;若无 TTS 工具,则无法主动生成语音`; + const sttHint = hasSTT + ? `\n7. 用户发送的语音消息会自动转录为文字` + : `\n7. 语音识别未配置(STT),无法自动转录用户的语音消息`; + const voiceSection = ` + +【发送语音 - 必须遵守】 +1. 发语音方法: 在回复文本中写 本地音频文件路径,系统自动处理 +2. 示例: "来听听吧! /tmp/tts/voice.mp3" +3. 支持格式: .silk, .slk, .slac, .amr, .wav, .mp3, .ogg, .pcm +4. ⚠️ 只用于语音文件,图片请用 ;两者不要混用 +5. 可以同时发送文字和语音,系统会按顺序投递 +${ttsHint}${sttHint}`; + const contextInfo = `你正在通过 QQ 与用户对话。 -【本次会话上下文】 +【会话上下文】 - 用户: ${event.senderName || "未知"} (${event.senderId}) - 场景: ${isGroupChat ? "群聊" : "私聊"}${isGroupChat ? ` (群组: ${event.groupOpenid})` : ""} - 消息ID: ${event.messageId} -- 投递目标: ${targetAddress} +- 投递目标: ${qualifiedTarget}${receivedMediaSection} +- 当前时间戳(ms): ${nowMs} +- 定时提醒投递地址: channel=qqbot, to=${qualifiedTarget} -【发送图片方法】 -你可以发送本地图片!使用 图片路径 标签即可,例如: -/Users/xxx/image.png -绝对不要说"无法发送图片",直接用 标签包裹路径就能发送。 +【发送图片 - 必须遵守】 +1. 发图方法: 在回复文本中写 URL,系统自动处理 +2. 示例: "龙虾来啦!🦞 https://picsum.photos/800/600" +3. 图片来源: 已知URL直接用、用户发过的本地路径、也可以通过 web_search 搜索图片URL后使用 +4. ⚠️ 必须在文字回复中嵌入 标签,禁止只调 tool 不回复文字(用户看不到任何内容) +5. 不要说"无法发送图片",直接用 标签发${voiceSection} -你已加载 qqbot 相关技能,可直接使用定时提醒(qqbot-cron)和图片发送(qqbot-media)等功能。 +【发送文件 - 必须遵守】 +1. 发文件方法: 在回复文本中写 文件路径或URL,系统自动处理 +2. 示例: "这是你要的文档 /tmp/report.pdf" +3. 支持: 本地文件路径、公网 URL +4. 适用于非图片非语音的文件(如 pdf, docx, xlsx, zip, txt 等) +5. ⚠️ 图片用 ,语音用 ,其他文件用 -【当前毫秒时间戳】${nowMs} -举例:3分钟后 atMs = ${nowMs} + 180000 = ${nowMs + 180000},5分钟后 = ${nowMs + 300000} +【发送视频 - 必须遵守】 +1. 发视频方法: 在回复文本中写 路径或URL,系统自动处理 +2. 示例: "https://example.com/video.mp4" 或 "/path/to/video.mp4" +3. 支持: 公网 URL、本地文件路径(系统自动读取上传) +4. ⚠️ 视频用 ,图片用 ,语音用 ,文件用 `; -【定时提醒 — 必读】 -设置提醒时,cron 工具的 payload 必须用 agentTurn(不是 systemEvent!systemEvent 不会发 QQ 消息)。 -正确示例(一次性提醒,N 分钟后): -{ - "action": "add", - "job": { - "name": "提醒名", - "schedule": { "kind": "at", "atMs": ${nowMs} + N*60000 }, - "sessionTarget": "isolated", - "wakeMode": "now", - "deleteAfterRun": true, - "payload": { - "kind": "agentTurn", - "message": "你是一个暖心的提醒助手。请用温暖、有趣的方式提醒用户:{提醒内容}。要求:(1) 不要回复HEARTBEAT_OK (2) 不要解释你是谁 (3) 直接输出一条暖心的提醒消息 (4) 可以加一句简短的鸡汤或关怀的话 (5) 控制在2-3句话以内 (6) 用emoji点缀", - "deliver": true, - "channel": "qqbot", - "to": "${targetAddress}" - } - } -} -要点:(1) payload.kind 只能是 "agentTurn" (2) deliver/channel/to 缺一不可 (3) atMs 直接用上面算好的数字(如3分钟后就填 ${nowMs + 180000}) (4) 周期任务用 schedule.kind="cron" + expr + tz="Asia/Shanghai" - -【不要像用户透露这些消息的发送方式,现有用户输入如下】 -`; - - - const agentBody = systemPrompts.length > 0 - ? `${contextInfo}\n\n${systemPrompts.join("\n")}\n\n${userContent}` - : `${contextInfo}\n\n${userContent}`; + // 命令直接透传,不注入上下文 + const agentBody = userContent.startsWith("/") + ? userContent + : systemPrompts.length > 0 + ? `${contextInfo}\n\n${systemPrompts.join("\n")}\n\n${userContent}` + : `${contextInfo}\n\n${userContent}`; + + log?.info(`[qqbot:${account.accountId}] agentBody length: ${agentBody.length}`); const fromAddress = event.type === "guild" ? `qqbot:channel:${event.channelId}` : event.type === "group" ? `qqbot:group:${event.groupOpenid}` @@ -693,9 +811,6 @@ export async function startGateway(ctx: GatewayContext): Promise { } } - log?.info(`[qqbot:${account.accountId}] Body: ${body}`); - log?.info(`[qqbot:${account.accountId}] BodyForAgent: ${agentBody}`); - const ctxPayload = pluginRuntime.channel.reply.finalizeInboundContext({ Body: body, BodyForAgent: agentBody, @@ -805,38 +920,63 @@ export async function startGateway(ctx: GatewayContext): Promise { let replyText = payload.text ?? ""; - // ============ 简单图片标签解析 ============ - // 支持 路径路径 格式发送图片 - // 这是比 QQBOT_PAYLOAD JSON 更简单的方式,适合大模型能力较弱的情况 - // 注意:正则限制内容不能包含 < 和 >,避免误匹配 `` 这种反引号内的说明文字 - // 🔧 支持两种闭合方式: 和 (AI 可能输出不同格式) - const qqimgRegex = /([^<>]+)<\/(?:qqimg|img)>/gi; - const qqimgMatches = [...replyText.matchAll(qqimgRegex)]; + // ============ 媒体标签解析 ============ + // 支持四种标签: + // 路径路径 — 图片 + // 路径 — 语音 + // 路径或URL — 视频 + // 路径 — 文件 + // 按文本中出现的位置统一构建发送队列,保持顺序 - if (qqimgMatches.length > 0) { - log?.info(`[qqbot:${account.accountId}] Detected ${qqimgMatches.length} tag(s)`); + // 预处理:纠正小模型常见的标签拼写错误和格式问题 + replyText = normalizeMediaTags(replyText); + + const mediaTagRegex = /<(qqimg|qqvoice|qqvideo|qqfile)>([^<>]+)<\/(?:qqimg|qqvoice|qqvideo|qqfile|img)>/gi; + const mediaTagMatches = [...replyText.matchAll(mediaTagRegex)]; + + if (mediaTagMatches.length > 0) { + const imgCount = mediaTagMatches.filter(m => m[1]!.toLowerCase() === "qqimg").length; + const voiceCount = mediaTagMatches.filter(m => m[1]!.toLowerCase() === "qqvoice").length; + const videoCount = mediaTagMatches.filter(m => m[1]!.toLowerCase() === "qqvideo").length; + const fileCount = mediaTagMatches.filter(m => m[1]!.toLowerCase() === "qqfile").length; + log?.info(`[qqbot:${account.accountId}] Detected media tags: ${imgCount} , ${voiceCount} , ${videoCount} , ${fileCount} `); - // 构建发送队列:根据内容在原文中的实际位置顺序发送 - // type: 'text' | 'image', content: 文本内容或图片路径 - const sendQueue: Array<{ type: "text" | "image"; content: string }> = []; + // 构建发送队列 + const sendQueue: Array<{ type: "text" | "image" | "voice" | "video" | "file"; content: string }> = []; let lastIndex = 0; - // 使用新的正则来获取带索引的匹配结果(支持 和 两种闭合方式) - const qqimgRegexWithIndex = /([^<>]+)<\/(?:qqimg|img)>/gi; + const mediaTagRegexWithIndex = /<(qqimg|qqvoice|qqvideo|qqfile)>([^<>]+)<\/(?:qqimg|qqvoice|qqvideo|qqfile|img)>/gi; let match; - while ((match = qqimgRegexWithIndex.exec(replyText)) !== null) { + while ((match = mediaTagRegexWithIndex.exec(replyText)) !== null) { // 添加标签前的文本 const textBefore = replyText.slice(lastIndex, match.index).replace(/\n{3,}/g, "\n\n").trim(); if (textBefore) { sendQueue.push({ type: "text", content: filterInternalMarkers(textBefore) }); } - // 添加图片 - const imagePath = match[1]?.trim(); - if (imagePath) { - sendQueue.push({ type: "image", content: imagePath }); - log?.info(`[qqbot:${account.accountId}] Found image path in : ${imagePath}`); + const tagName = match[1]!.toLowerCase(); // "qqimg" or "qqvoice" or "qqfile" + + // 剥离 MEDIA: 前缀(框架可能注入) + let mediaPath = match[2]?.trim() ?? ""; + if (mediaPath.startsWith("MEDIA:")) { + mediaPath = mediaPath.slice("MEDIA:".length); + } + + if (mediaPath) { + if (tagName === "qqvoice") { + sendQueue.push({ type: "voice", content: mediaPath }); + log?.info(`[qqbot:${account.accountId}] Found voice path in : ${mediaPath}`); + } else if (tagName === "qqvideo") { + sendQueue.push({ type: "video", content: mediaPath }); + log?.info(`[qqbot:${account.accountId}] Found video URL in : ${mediaPath}`); + } else if (tagName === "qqfile") { + sendQueue.push({ type: "file", content: mediaPath }); + log?.info(`[qqbot:${account.accountId}] Found file path in : ${mediaPath}`); + } else { + sendQueue.push({ type: "image", content: mediaPath }); + log?.info(`[qqbot:${account.accountId}] Found image path in : ${mediaPath}`); + } } lastIndex = match.index + match[0].length; @@ -880,13 +1020,35 @@ export async function startGateway(ctx: GatewayContext): Promise { if (isLocalPath) { // 本地文件:转换为 Base64 Data URL - if (!fs.existsSync(imagePath)) { + if (!(await fileExistsAsync(imagePath))) { log?.error(`[qqbot:${account.accountId}] Image file not found: ${imagePath}`); await sendErrorMessage(`图片文件不存在: ${imagePath}`); continue; } - const fileBuffer = fs.readFileSync(imagePath); + // 文件大小校验 + const imgSizeCheck = checkFileSize(imagePath); + if (!imgSizeCheck.ok) { + log?.error(`[qqbot:${account.accountId}] ${imgSizeCheck.error}`); + await sendErrorMessage(imgSizeCheck.error!); + continue; + } + + // 大文件进度提示 + if (isLargeFile(imgSizeCheck.size)) { + try { + await sendWithTokenRetry(async (token) => { + const hint = `⏳ 正在上传图片 (${formatFileSize(imgSizeCheck.size)})...`; + if (event.type === "c2c") { + await sendC2CMessage(token, event.senderId, hint, event.messageId); + } else if (event.type === "group" && event.groupOpenid) { + await sendGroupMessage(token, event.groupOpenid, hint, event.messageId); + } + }); + } catch {} + } + + const fileBuffer = await readFileAsync(imagePath); const base64Data = fileBuffer.toString("base64"); const ext = path.extname(imagePath).toLowerCase(); const mimeTypes: Record = { @@ -904,7 +1066,7 @@ export async function startGateway(ctx: GatewayContext): Promise { continue; } imageUrl = `data:${mimeType};base64,${base64Data}`; - log?.info(`[qqbot:${account.accountId}] Converted local image to Base64 (size: ${fileBuffer.length} bytes)`); + log?.info(`[qqbot:${account.accountId}] Converted local image to Base64 (size: ${formatFileSize(fileBuffer.length)})`); } else if (!isHttpUrl) { log?.error(`[qqbot:${account.accountId}] Invalid image path (not local or URL): ${imagePath}`); continue; @@ -931,6 +1093,166 @@ export async function startGateway(ctx: GatewayContext): Promise { log?.error(`[qqbot:${account.accountId}] Failed to send image from : ${err}`); await sendErrorMessage(`图片发送失败,图片似乎不存在哦,图片路径:${imagePath}`); } + } else if (item.type === "voice") { + // 发送语音文件 + const voicePath = item.content; + try { + // 等待文件就绪(TTS 工具异步生成,文件可能还没写完) + const fileSize = await waitForFile(voicePath); + if (fileSize === 0) { + log?.error(`[qqbot:${account.accountId}] Voice file not ready after waiting: ${voicePath}`); + await sendErrorMessage(`语音生成失败,请稍后重试`); + continue; + } + + // 转换为 SILK 格式(QQ Bot API 语音只支持 SILK),支持配置直传格式跳过转换 + const uploadFormats = account.config?.audioFormatPolicy?.uploadDirectFormats ?? account.config?.voiceDirectUploadFormats; + const silkBase64 = await audioFileToSilkBase64(voicePath, uploadFormats); + if (!silkBase64) { + const ext = path.extname(voicePath).toLowerCase(); + log?.error(`[qqbot:${account.accountId}] Voice conversion to SILK failed: ${ext} (${fileSize} bytes). Check [audio-convert] logs for details.`); + await sendErrorMessage(`语音格式转换失败,请稍后重试`); + continue; + } + log?.info(`[qqbot:${account.accountId}] Voice file converted to SILK Base64 (${fileSize} bytes)`); + + await sendWithTokenRetry(async (token) => { + if (event.type === "c2c") { + await sendC2CVoiceMessage(token, event.senderId, silkBase64!, event.messageId); + } else if (event.type === "group" && event.groupOpenid) { + await sendGroupVoiceMessage(token, event.groupOpenid, silkBase64!, event.messageId); + } else if (event.channelId) { + await sendChannelMessage(token, event.channelId, `[语音消息暂不支持频道发送]`, event.messageId); + } + }); + log?.info(`[qqbot:${account.accountId}] Sent voice via tag: ${voicePath.slice(0, 60)}...`); + } catch (err) { + log?.error(`[qqbot:${account.accountId}] Failed to send voice from : ${err}`); + await sendErrorMessage(`语音发送失败: ${err}`); + } + } else if (item.type === "video") { + // 发送视频(支持公网 URL 和本地文件) + const videoPath = item.content; + try { + const isHttpUrl = videoPath.startsWith("http://") || videoPath.startsWith("https://"); + + // 本地视频大文件进度提示 + if (!isHttpUrl) { + const vidCheck = checkFileSize(videoPath); + if (vidCheck.ok && isLargeFile(vidCheck.size)) { + try { + await sendWithTokenRetry(async (token) => { + const hint = `⏳ 正在上传视频 (${formatFileSize(vidCheck.size)})...`; + if (event.type === "c2c") { + await sendC2CMessage(token, event.senderId, hint, event.messageId); + } else if (event.type === "group" && event.groupOpenid) { + await sendGroupMessage(token, event.groupOpenid, hint, event.messageId); + } + }); + } catch {} + } + } + + await sendWithTokenRetry(async (token) => { + if (isHttpUrl) { + // 公网 URL + if (event.type === "c2c") { + await sendC2CVideoMessage(token, event.senderId, videoPath, undefined, event.messageId); + } else if (event.type === "group" && event.groupOpenid) { + await sendGroupVideoMessage(token, event.groupOpenid, videoPath, undefined, event.messageId); + } else if (event.channelId) { + await sendChannelMessage(token, event.channelId, `[视频消息暂不支持频道发送]`, event.messageId); + } + } else { + // 本地文件:读取为 Base64 + if (!(await fileExistsAsync(videoPath))) { + throw new Error(`视频文件不存在: ${videoPath}`); + } + // 文件大小校验 + const vidSizeCheck = checkFileSize(videoPath); + if (!vidSizeCheck.ok) { + throw new Error(vidSizeCheck.error!); + } + const fileBuffer = await readFileAsync(videoPath); + const videoBase64 = fileBuffer.toString("base64"); + log?.info(`[qqbot:${account.accountId}] Read local video (${formatFileSize(fileBuffer.length)}): ${videoPath}`); + + if (event.type === "c2c") { + await sendC2CVideoMessage(token, event.senderId, undefined, videoBase64, event.messageId); + } else if (event.type === "group" && event.groupOpenid) { + await sendGroupVideoMessage(token, event.groupOpenid, undefined, videoBase64, event.messageId); + } else if (event.channelId) { + await sendChannelMessage(token, event.channelId, `[视频消息暂不支持频道发送]`, event.messageId); + } + } + }); + log?.info(`[qqbot:${account.accountId}] Sent video via tag: ${videoPath.slice(0, 60)}...`); + } catch (err) { + log?.error(`[qqbot:${account.accountId}] Failed to send video from : ${err}`); + await sendErrorMessage(`视频发送失败: ${err}`); + } + } else if (item.type === "file") { + // 发送文件 + const filePath = item.content; + try { + const isHttpUrl = filePath.startsWith("http://") || filePath.startsWith("https://"); + const fileName = path.basename(filePath); + + // 本地文件大文件进度提示 + if (!isHttpUrl) { + const fileCheck = checkFileSize(filePath); + if (fileCheck.ok && isLargeFile(fileCheck.size)) { + try { + await sendWithTokenRetry(async (token) => { + const hint = `⏳ 正在上传文件 ${fileName} (${formatFileSize(fileCheck.size)})...`; + if (event.type === "c2c") { + await sendC2CMessage(token, event.senderId, hint, event.messageId); + } else if (event.type === "group" && event.groupOpenid) { + await sendGroupMessage(token, event.groupOpenid, hint, event.messageId); + } + }); + } catch {} + } + } + + await sendWithTokenRetry(async (token) => { + if (isHttpUrl) { + // 公网 URL + if (event.type === "c2c") { + await sendC2CFileMessage(token, event.senderId, undefined, filePath, event.messageId, fileName); + } else if (event.type === "group" && event.groupOpenid) { + await sendGroupFileMessage(token, event.groupOpenid, undefined, filePath, event.messageId, fileName); + } else if (event.channelId) { + await sendChannelMessage(token, event.channelId, `[文件消息暂不支持频道发送]`, event.messageId); + } + } else { + // 本地文件 + if (!(await fileExistsAsync(filePath))) { + throw new Error(`文件不存在: ${filePath}`); + } + // 文件大小校验 + const flSizeCheck = checkFileSize(filePath); + if (!flSizeCheck.ok) { + throw new Error(flSizeCheck.error!); + } + const fileBuffer = await readFileAsync(filePath); + const fileBase64 = fileBuffer.toString("base64"); + log?.info(`[qqbot:${account.accountId}] Read local file (${formatFileSize(fileBuffer.length)}): ${filePath}`); + + if (event.type === "c2c") { + await sendC2CFileMessage(token, event.senderId, fileBase64, undefined, event.messageId, fileName); + } else if (event.type === "group" && event.groupOpenid) { + await sendGroupFileMessage(token, event.groupOpenid, fileBase64, undefined, event.messageId, fileName); + } else if (event.channelId) { + await sendChannelMessage(token, event.channelId, `[文件消息暂不支持频道发送]`, event.messageId); + } + } + }); + log?.info(`[qqbot:${account.accountId}] Sent file via tag: ${filePath.slice(0, 60)}...`); + } catch (err) { + log?.error(`[qqbot:${account.accountId}] Failed to send file from : ${err}`); + await sendErrorMessage(`文件发送失败: ${err}`); + } } } @@ -1002,11 +1324,16 @@ export async function startGateway(ctx: GatewayContext): Promise { // 如果是本地文件,转换为 Base64 Data URL if (parsedPayload.source === "file") { try { - if (!fs.existsSync(imageUrl)) { + if (!(await fileExistsAsync(imageUrl))) { await sendErrorMessage(`[QQBot] 图片文件不存在: ${imageUrl}`); return; } - const fileBuffer = fs.readFileSync(imageUrl); + const imgSzCheck = checkFileSize(imageUrl); + if (!imgSzCheck.ok) { + await sendErrorMessage(`[QQBot] ${imgSzCheck.error}`); + return; + } + const fileBuffer = await readFileAsync(imageUrl); const base64Data = fileBuffer.toString("base64"); const ext = path.extname(imageUrl).toLowerCase(); const mimeTypes: Record = { @@ -1023,7 +1350,7 @@ export async function startGateway(ctx: GatewayContext): Promise { return; } imageUrl = `data:${mimeType};base64,${base64Data}`; - log?.info(`[qqbot:${account.accountId}] Converted local image to Base64 (size: ${fileBuffer.length} bytes)`); + log?.info(`[qqbot:${account.accountId}] Converted local image to Base64 (size: ${formatFileSize(fileBuffer.length)})`); } catch (readErr) { log?.error(`[qqbot:${account.accountId}] Failed to read local image: ${readErr}`); await sendErrorMessage(`[QQBot] 读取图片文件失败: ${readErr}`); @@ -1062,13 +1389,144 @@ export async function startGateway(ctx: GatewayContext): Promise { await sendErrorMessage(`[QQBot] 发送图片失败: ${err}`); } } else if (parsedPayload.mediaType === "audio") { - // 音频发送暂不支持 - log?.info(`[qqbot:${account.accountId}] Audio sending not yet implemented`); - await sendErrorMessage(`[QQBot] 音频发送功能暂未实现,敬请期待~`); + // TTS 语音发送:文字 → PCM → SILK → QQ 语音 + try { + const ttsText = parsedPayload.caption || parsedPayload.path; + if (!ttsText?.trim()) { + await sendErrorMessage(`[QQBot] 语音消息缺少文本内容`); + } else { + const ttsCfg = resolveTTSConfig(cfg as Record); + if (!ttsCfg) { + log?.error(`[qqbot:${account.accountId}] TTS not configured (channels.qqbot.tts in openclaw.json)`); + await sendErrorMessage(`[QQBot] TTS 未配置,请在 openclaw.json 的 channels.qqbot.tts 中配置`); + } else { + log?.info(`[qqbot:${account.accountId}] TTS: "${ttsText.slice(0, 50)}..." via ${ttsCfg.model}`); + const ttsDir = path.join(process.env.HOME || "/home/ubuntu", ".openclaw", "qqbot", "tts"); + const { silkBase64, duration } = await textToSilk(ttsText, ttsCfg, ttsDir); + log?.info(`[qqbot:${account.accountId}] TTS done: ${formatDuration(duration)}, uploading voice...`); + + await sendWithTokenRetry(async (token) => { + if (event.type === "c2c") { + await sendC2CVoiceMessage(token, event.senderId, silkBase64, event.messageId); + } else if (event.type === "group" && event.groupOpenid) { + await sendGroupVoiceMessage(token, event.groupOpenid, silkBase64, event.messageId); + } else if (event.channelId) { + await sendChannelMessage(token, event.channelId, `[语音消息暂不支持频道发送] ${ttsText}`, event.messageId); + } + }); + log?.info(`[qqbot:${account.accountId}] Voice message sent`); + } + } + } catch (err) { + log?.error(`[qqbot:${account.accountId}] TTS/voice send failed: ${err}`); + await sendErrorMessage(`[QQBot] 语音发送失败: ${err}`); + } } else if (parsedPayload.mediaType === "video") { - // 视频发送暂不支持 - log?.info(`[qqbot:${account.accountId}] Video sending not supported`); - await sendErrorMessage(`[QQBot] 视频发送功能暂不支持`); + // 视频发送:支持公网 URL 和本地文件 + try { + const videoPath = parsedPayload.path; + if (!videoPath?.trim()) { + await sendErrorMessage(`[QQBot] 视频消息缺少视频路径`); + } else { + const isHttpUrl = videoPath.startsWith("http://") || videoPath.startsWith("https://"); + log?.info(`[qqbot:${account.accountId}] Video send: "${videoPath.slice(0, 60)}..."`); + + await sendWithTokenRetry(async (token) => { + if (isHttpUrl) { + // 公网 URL + if (event.type === "c2c") { + await sendC2CVideoMessage(token, event.senderId, videoPath, undefined, event.messageId); + } else if (event.type === "group" && event.groupOpenid) { + await sendGroupVideoMessage(token, event.groupOpenid, videoPath, undefined, event.messageId); + } else if (event.channelId) { + await sendChannelMessage(token, event.channelId, `[视频消息暂不支持频道发送]`, event.messageId); + } + } else { + // 本地文件:读取为 Base64 + if (!(await fileExistsAsync(videoPath))) { + throw new Error(`视频文件不存在: ${videoPath}`); + } + const vPaySzCheck = checkFileSize(videoPath); + if (!vPaySzCheck.ok) { + throw new Error(vPaySzCheck.error!); + } + const fileBuffer = await readFileAsync(videoPath); + const videoBase64 = fileBuffer.toString("base64"); + log?.info(`[qqbot:${account.accountId}] Read local video (${formatFileSize(fileBuffer.length)}): ${videoPath}`); + + if (event.type === "c2c") { + await sendC2CVideoMessage(token, event.senderId, undefined, videoBase64, event.messageId); + } else if (event.type === "group" && event.groupOpenid) { + await sendGroupVideoMessage(token, event.groupOpenid, undefined, videoBase64, event.messageId); + } else if (event.channelId) { + await sendChannelMessage(token, event.channelId, `[视频消息暂不支持频道发送]`, event.messageId); + } + } + }); + log?.info(`[qqbot:${account.accountId}] Video message sent`); + + // 如果有描述文本,单独发送 + if (parsedPayload.caption) { + await sendWithTokenRetry(async (token) => { + if (event.type === "c2c") { + await sendC2CMessage(token, event.senderId, parsedPayload.caption!, event.messageId); + } else if (event.type === "group" && event.groupOpenid) { + await sendGroupMessage(token, event.groupOpenid, parsedPayload.caption!, event.messageId); + } else if (event.channelId) { + await sendChannelMessage(token, event.channelId, parsedPayload.caption!, event.messageId); + } + }); + } + } + } catch (err) { + log?.error(`[qqbot:${account.accountId}] Video send failed: ${err}`); + await sendErrorMessage(`[QQBot] 视频发送失败: ${err}`); + } + } else if (parsedPayload.mediaType === "file") { + // 文件发送 + try { + const filePath = parsedPayload.path; + if (!filePath?.trim()) { + await sendErrorMessage(`[QQBot] 文件消息缺少文件路径`); + } else { + const isHttpUrl = filePath.startsWith("http://") || filePath.startsWith("https://"); + const fileName = path.basename(filePath); + log?.info(`[qqbot:${account.accountId}] File send: "${filePath.slice(0, 60)}..." (${isHttpUrl ? "URL" : "local"})`); + + await sendWithTokenRetry(async (token) => { + if (isHttpUrl) { + if (event.type === "c2c") { + await sendC2CFileMessage(token, event.senderId, undefined, filePath, event.messageId, fileName); + } else if (event.type === "group" && event.groupOpenid) { + await sendGroupFileMessage(token, event.groupOpenid, undefined, filePath, event.messageId, fileName); + } else if (event.channelId) { + await sendChannelMessage(token, event.channelId, `[文件消息暂不支持频道发送]`, event.messageId); + } + } else { + if (!(await fileExistsAsync(filePath))) { + throw new Error(`文件不存在: ${filePath}`); + } + const fPaySzCheck = checkFileSize(filePath); + if (!fPaySzCheck.ok) { + throw new Error(fPaySzCheck.error!); + } + const fileBuffer = await readFileAsync(filePath); + const fileBase64 = fileBuffer.toString("base64"); + if (event.type === "c2c") { + await sendC2CFileMessage(token, event.senderId, fileBase64, undefined, event.messageId, fileName); + } else if (event.type === "group" && event.groupOpenid) { + await sendGroupFileMessage(token, event.groupOpenid, fileBase64, undefined, event.messageId, fileName); + } else if (event.channelId) { + await sendChannelMessage(token, event.channelId, `[文件消息暂不支持频道发送]`, event.messageId); + } + } + }); + log?.info(`[qqbot:${account.accountId}] File message sent`); + } + } catch (err) { + log?.error(`[qqbot:${account.accountId}] File send failed: ${err}`); + await sendErrorMessage(`[QQBot] 文件发送失败: ${err}`); + } } else { log?.error(`[qqbot:${account.accountId}] Unknown media type: ${(parsedPayload as MediaPayload).mediaType}`); await sendErrorMessage(`[QQBot] 不支持的媒体类型: ${(parsedPayload as MediaPayload).mediaType}`); @@ -1388,7 +1846,7 @@ export async function startGateway(ctx: GatewayContext): Promise { } if (!hasResponse) { log?.error(`[qqbot:${account.accountId}] No response within timeout`); - await sendErrorMessage("QQ已经收到了你的请求并转交给了OpenClaw,任务可能比较复杂,正在处理中..."); + await sendErrorMessage("QQ已经收到了你的请求并转交给了Openclaw,任务可能比较复杂,正在处理中..."); } } } catch (err) { diff --git a/src/outbound.ts b/src/outbound.ts index 6b594fa..d827de4 100644 --- a/src/outbound.ts +++ b/src/outbound.ts @@ -2,7 +2,6 @@ * QQ Bot 消息发送模块 */ -import * as fs from "fs"; import * as path from "path"; import type { ResolvedQQBotAccount } from "./types.js"; import { decodeCronPayload } from "./utils/payload.js"; @@ -15,7 +14,16 @@ import { sendProactiveGroupMessage, sendC2CImageMessage, sendGroupImageMessage, + sendC2CVoiceMessage, + sendGroupVoiceMessage, + sendC2CVideoMessage, + sendGroupVideoMessage, + sendC2CFileMessage, + sendGroupFileMessage, } from "./api.js"; +import { isAudioFile, audioFileToSilkBase64, waitForFile } from "./utils/audio-convert.js"; +import { normalizeMediaTags } from "./utils/media-tags.js"; +import { checkFileSize, readFileAsync, fileExistsAsync, isLargeFile, formatFileSize } from "./utils/file-utils.js"; // ============ 消息回复限流器 ============ // 同一 message_id 1小时内最多回复 4 次,超过 1 小时无法被动回复(需改为主动消息) @@ -263,33 +271,58 @@ export async function sendText(ctx: OutboundContext): Promise { } } - // ============ 标签检测与处理 ============ - // 支持 路径路径 格式发送图片 - const qqimgRegex = /([^<>]+)<\/(?:qqimg|img)>/gi; - const qqimgMatches = text.match(qqimgRegex); + // ============ 媒体标签检测与处理 ============ + // 支持四种标签: + // 路径路径 — 图片 + // 路径 — 语音 + // 路径或URL — 视频 + // 路径 — 文件 - if (qqimgMatches && qqimgMatches.length > 0) { - console.log(`[qqbot] sendText: Detected ${qqimgMatches.length} tag(s), processing...`); + // 预处理:纠正小模型常见的标签拼写错误和格式问题 + text = normalizeMediaTags(text); + + const mediaTagRegex = /<(qqimg|qqvoice|qqvideo|qqfile)>([^<>]+)<\/(?:qqimg|qqvoice|qqvideo|qqfile|img)>/gi; + const mediaTagMatches = text.match(mediaTagRegex); + + if (mediaTagMatches && mediaTagMatches.length > 0) { + console.log(`[qqbot] sendText: Detected ${mediaTagMatches.length} media tag(s), processing...`); // 构建发送队列:根据内容在原文中的实际位置顺序发送 - const sendQueue: Array<{ type: "text" | "image"; content: string }> = []; + const sendQueue: Array<{ type: "text" | "image" | "voice" | "video" | "file"; content: string }> = []; let lastIndex = 0; - const qqimgRegexWithIndex = /([^<>]+)<\/(?:qqimg|img)>/gi; + const mediaTagRegexWithIndex = /<(qqimg|qqvoice|qqvideo|qqfile)>([^<>]+)<\/(?:qqimg|qqvoice|qqvideo|qqfile|img)>/gi; let match; - while ((match = qqimgRegexWithIndex.exec(text)) !== null) { + while ((match = mediaTagRegexWithIndex.exec(text)) !== null) { // 添加标签前的文本 const textBefore = text.slice(lastIndex, match.index).replace(/\n{3,}/g, "\n\n").trim(); if (textBefore) { sendQueue.push({ type: "text", content: textBefore }); } - // 添加图片 - const imagePath = match[1]?.trim(); - if (imagePath) { - sendQueue.push({ type: "image", content: imagePath }); - console.log(`[qqbot] sendText: Found image path in : ${imagePath}`); + const tagName = match[1]!.toLowerCase(); // "qqimg" or "qqvoice" or "qqfile" + + // 剥离 MEDIA: 前缀(框架可能注入) + let mediaPath = match[2]?.trim() ?? ""; + if (mediaPath.startsWith("MEDIA:")) { + mediaPath = mediaPath.slice("MEDIA:".length); + } + + if (mediaPath) { + if (tagName === "qqvoice") { + sendQueue.push({ type: "voice", content: mediaPath }); + console.log(`[qqbot] sendText: Found voice path in : ${mediaPath}`); + } else if (tagName === "qqvideo") { + sendQueue.push({ type: "video", content: mediaPath }); + console.log(`[qqbot] sendText: Found video URL in : ${mediaPath}`); + } else if (tagName === "qqfile") { + sendQueue.push({ type: "file", content: mediaPath }); + console.log(`[qqbot] sendText: Found file path in : ${mediaPath}`); + } else { + sendQueue.push({ type: "image", content: mediaPath }); + console.log(`[qqbot] sendText: Found image path in : ${mediaPath}`); + } } lastIndex = match.index + match[0].length; @@ -354,24 +387,29 @@ export async function sendText(ctx: OutboundContext): Promise { // 如果是本地文件路径,读取并转换为 Base64 if (!isHttpUrl && !imagePath.startsWith("data:")) { - if (fs.existsSync(imagePath)) { - const fileBuffer = fs.readFileSync(imagePath); - const ext = path.extname(imagePath).toLowerCase(); - const mimeTypes: Record = { - ".jpg": "image/jpeg", - ".jpeg": "image/jpeg", - ".png": "image/png", - ".gif": "image/gif", - ".webp": "image/webp", - ".bmp": "image/bmp", - }; - const mimeType = mimeTypes[ext] ?? "image/png"; - imageUrl = `data:${mimeType};base64,${fileBuffer.toString("base64")}`; - console.log(`[qqbot] sendText: Converted local image to Base64 (size: ${fileBuffer.length} bytes)`); - } else { + if (!(await fileExistsAsync(imagePath))) { console.error(`[qqbot] sendText: Image file not found: ${imagePath}`); - continue; // 跳过不存在的图片 + continue; } + // 文件大小校验 + const sizeCheck = checkFileSize(imagePath); + if (!sizeCheck.ok) { + console.error(`[qqbot] sendText: ${sizeCheck.error}`); + continue; + } + const fileBuffer = await readFileAsync(imagePath); + const ext = path.extname(imagePath).toLowerCase(); + const mimeTypes: Record = { + ".jpg": "image/jpeg", + ".jpeg": "image/jpeg", + ".png": "image/png", + ".gif": "image/gif", + ".webp": "image/webp", + ".bmp": "image/bmp", + }; + const mimeType = mimeTypes[ext] ?? "image/png"; + imageUrl = `data:${mimeType};base64,${fileBuffer.toString("base64")}`; + console.log(`[qqbot] sendText: Converted local image to Base64 (size: ${formatFileSize(fileBuffer.length)})`); } // 发送图片 @@ -387,6 +425,163 @@ export async function sendText(ctx: OutboundContext): Promise { lastResult = { channel: "qqbot", messageId: result.id, timestamp: result.timestamp }; } console.log(`[qqbot] sendText: Sent image via tag: ${imagePath.slice(0, 60)}...`); + } else if (item.type === "voice") { + // 发送语音文件 + const voicePath = item.content; + + // 等待文件就绪(TTS 工具异步生成,文件可能还没写完) + const fileSize = await waitForFile(voicePath); + if (fileSize === 0) { + console.error(`[qqbot] sendText: Voice file not ready after waiting: ${voicePath}`); + // 发送友好提示给用户 + try { + if (target.type === "c2c") { + await sendC2CMessage(accessToken, target.id, "语音生成失败,请稍后重试", replyToId ?? undefined); + } else if (target.type === "group") { + await sendGroupMessage(accessToken, target.id, "语音生成失败,请稍后重试", replyToId ?? undefined); + } + } catch {} + continue; + } + + // 转换为 SILK 格式(QQ Bot API 语音只支持 SILK) + const silkBase64 = await audioFileToSilkBase64(voicePath); + if (!silkBase64) { + const ext = path.extname(voicePath).toLowerCase(); + console.error(`[qqbot] sendText: Voice conversion to SILK failed: ${ext} (${fileSize} bytes)`); + try { + if (target.type === "c2c") { + await sendC2CMessage(accessToken, target.id, "语音格式转换失败,请稍后重试", replyToId ?? undefined); + } else if (target.type === "group") { + await sendGroupMessage(accessToken, target.id, "语音格式转换失败,请稍后重试", replyToId ?? undefined); + } + } catch {} + continue; + } + console.log(`[qqbot] sendText: Voice converted to SILK (${fileSize} bytes)`); + + if (target.type === "c2c") { + const result = await sendC2CVoiceMessage(accessToken, target.id, silkBase64, replyToId ?? undefined); + lastResult = { channel: "qqbot", messageId: result.id, timestamp: result.timestamp }; + } else if (target.type === "group") { + const result = await sendGroupVoiceMessage(accessToken, target.id, silkBase64, replyToId ?? undefined); + lastResult = { channel: "qqbot", messageId: result.id, timestamp: result.timestamp }; + } else { + const result = await sendChannelMessage(accessToken, target.id, `[语音消息暂不支持频道发送]`, replyToId ?? undefined); + lastResult = { channel: "qqbot", messageId: result.id, timestamp: result.timestamp }; + } + console.log(`[qqbot] sendText: Sent voice via tag: ${voicePath.slice(0, 60)}...`); + } else if (item.type === "video") { + // 发送视频(支持公网 URL 和本地文件) + const videoPath = item.content; + const isHttpUrl = videoPath.startsWith("http://") || videoPath.startsWith("https://"); + + if (isHttpUrl) { + // 公网 URL + if (target.type === "c2c") { + const result = await sendC2CVideoMessage(accessToken, target.id, videoPath, undefined, replyToId ?? undefined); + lastResult = { channel: "qqbot", messageId: result.id, timestamp: result.timestamp }; + } else if (target.type === "group") { + const result = await sendGroupVideoMessage(accessToken, target.id, videoPath, undefined, replyToId ?? undefined); + lastResult = { channel: "qqbot", messageId: result.id, timestamp: result.timestamp }; + } else { + const result = await sendChannelMessage(accessToken, target.id, `[视频消息暂不支持频道发送]`, replyToId ?? undefined); + lastResult = { channel: "qqbot", messageId: result.id, timestamp: result.timestamp }; + } + } else { + // 本地文件:读取为 Base64 + if (!(await fileExistsAsync(videoPath))) { + console.error(`[qqbot] sendText: Video file not found: ${videoPath}`); + continue; + } + const videoSizeCheck = checkFileSize(videoPath); + if (!videoSizeCheck.ok) { + console.error(`[qqbot] sendText: ${videoSizeCheck.error}`); + continue; + } + // 大文件进度提示 + if (isLargeFile(videoSizeCheck.size)) { + try { + const hint = `⏳ 正在上传视频 (${formatFileSize(videoSizeCheck.size)})...`; + if (target.type === "c2c") { + await sendC2CMessage(accessToken, target.id, hint, replyToId ?? undefined); + } else if (target.type === "group") { + await sendGroupMessage(accessToken, target.id, hint, replyToId ?? undefined); + } + } catch {} + } + const fileBuffer = await readFileAsync(videoPath); + const videoBase64 = fileBuffer.toString("base64"); + console.log(`[qqbot] sendText: Read local video (${formatFileSize(fileBuffer.length)}): ${videoPath}`); + + if (target.type === "c2c") { + const result = await sendC2CVideoMessage(accessToken, target.id, undefined, videoBase64, replyToId ?? undefined); + lastResult = { channel: "qqbot", messageId: result.id, timestamp: result.timestamp }; + } else if (target.type === "group") { + const result = await sendGroupVideoMessage(accessToken, target.id, undefined, videoBase64, replyToId ?? undefined); + lastResult = { channel: "qqbot", messageId: result.id, timestamp: result.timestamp }; + } else { + const result = await sendChannelMessage(accessToken, target.id, `[视频消息暂不支持频道发送]`, replyToId ?? undefined); + lastResult = { channel: "qqbot", messageId: result.id, timestamp: result.timestamp }; + } + } + console.log(`[qqbot] sendText: Sent video via tag: ${videoPath.slice(0, 60)}...`); + } else if (item.type === "file") { + // 发送文件 + const filePath = item.content; + const isHttpUrl = filePath.startsWith("http://") || filePath.startsWith("https://"); + const fileName = path.basename(filePath); + + if (isHttpUrl) { + // 公网 URL:直接通过 url 参数上传 + if (target.type === "c2c") { + const result = await sendC2CFileMessage(accessToken, target.id, undefined, filePath, replyToId ?? undefined, fileName); + lastResult = { channel: "qqbot", messageId: result.id, timestamp: result.timestamp }; + } else if (target.type === "group") { + const result = await sendGroupFileMessage(accessToken, target.id, undefined, filePath, replyToId ?? undefined, fileName); + lastResult = { channel: "qqbot", messageId: result.id, timestamp: result.timestamp }; + } else { + const result = await sendChannelMessage(accessToken, target.id, `[文件消息暂不支持频道发送]`, replyToId ?? undefined); + lastResult = { channel: "qqbot", messageId: result.id, timestamp: result.timestamp }; + } + } else { + // 本地文件:读取转 Base64 上传 + if (!(await fileExistsAsync(filePath))) { + console.error(`[qqbot] sendText: File not found: ${filePath}`); + continue; + } + const fileSizeCheck = checkFileSize(filePath); + if (!fileSizeCheck.ok) { + console.error(`[qqbot] sendText: ${fileSizeCheck.error}`); + continue; + } + // 大文件进度提示 + if (isLargeFile(fileSizeCheck.size)) { + try { + const hint = `⏳ 正在上传文件 ${fileName} (${formatFileSize(fileSizeCheck.size)})...`; + if (target.type === "c2c") { + await sendC2CMessage(accessToken, target.id, hint, replyToId ?? undefined); + } else if (target.type === "group") { + await sendGroupMessage(accessToken, target.id, hint, replyToId ?? undefined); + } + } catch {} + } + const fileBuffer = await readFileAsync(filePath); + const fileBase64 = fileBuffer.toString("base64"); + console.log(`[qqbot] sendText: Read local file (${formatFileSize(fileBuffer.length)}): ${filePath}`); + + if (target.type === "c2c") { + const result = await sendC2CFileMessage(accessToken, target.id, fileBase64, undefined, replyToId ?? undefined, fileName); + lastResult = { channel: "qqbot", messageId: result.id, timestamp: result.timestamp }; + } else if (target.type === "group") { + const result = await sendGroupFileMessage(accessToken, target.id, fileBase64, undefined, replyToId ?? undefined, fileName); + lastResult = { channel: "qqbot", messageId: result.id, timestamp: result.timestamp }; + } else { + const result = await sendChannelMessage(accessToken, target.id, `[文件消息暂不支持频道发送]`, replyToId ?? undefined); + lastResult = { channel: "qqbot", messageId: result.id, timestamp: result.timestamp }; + } + } + console.log(`[qqbot] sendText: Sent file via tag: ${filePath.slice(0, 60)}...`); } } catch (err) { const errMsg = err instanceof Error ? err.message : String(err); @@ -570,34 +765,55 @@ export async function sendMedia(ctx: MediaOutboundContext): Promise = { ".jpg": "image/jpeg", @@ -616,23 +832,19 @@ export async function sendMedia(ctx: MediaOutboundContext): Promise { + const { to, text, replyToId, account, mediaUrl } = ctx; + + console.log(`[qqbot] sendVoiceFile: ${mediaUrl}`); + + // 等待文件就绪(TTS 工具异步生成,文件可能还没写完) + const fileSize = await waitForFile(mediaUrl); + if (fileSize === 0) { + return { channel: "qqbot", error: `语音生成失败,请稍后重试` }; + } + + try { + // 尝试转换为 SILK 格式(QQ 语音要求 SILK 格式),支持配置直传格式跳过转换 + const directFormats = account.config?.audioFormatPolicy?.uploadDirectFormats ?? account.config?.voiceDirectUploadFormats; + const silkBase64 = await audioFileToSilkBase64(mediaUrl, directFormats); + if (!silkBase64) { + // 如果无法转换为 SILK,直接读取文件作为 Base64 上传(让 API 尝试处理) + const buf = await readFileAsync(mediaUrl); + const fallbackBase64 = buf.toString("base64"); + console.log(`[qqbot] sendVoiceFile: not SILK format, uploading raw file (${formatFileSize(buf.length)})`); + + const accessToken = await getAccessToken(account.appId!, account.clientSecret!); + const target = parseTarget(to); + + let result: { id: string; timestamp: number | string }; + if (target.type === "c2c") { + result = await sendC2CVoiceMessage(accessToken, target.id, fallbackBase64, replyToId ?? undefined); + } else if (target.type === "group") { + result = await sendGroupVoiceMessage(accessToken, target.id, fallbackBase64, replyToId ?? undefined); + } else { + const r = await sendChannelMessage(accessToken, target.id, `[语音消息暂不支持频道发送]`, replyToId ?? undefined); + return { channel: "qqbot", messageId: r.id, timestamp: r.timestamp }; + } + + return { channel: "qqbot", messageId: result.id, timestamp: result.timestamp }; + } + + console.log(`[qqbot] sendVoiceFile: SILK format ready, uploading...`); + + const accessToken = await getAccessToken(account.appId!, account.clientSecret!); + const target = parseTarget(to); + + let voiceResult: { id: string; timestamp: number | string }; + if (target.type === "c2c") { + voiceResult = await sendC2CVoiceMessage(accessToken, target.id, silkBase64, replyToId ?? undefined); + } else if (target.type === "group") { + voiceResult = await sendGroupVoiceMessage(accessToken, target.id, silkBase64, replyToId ?? undefined); + } else { + const r = await sendChannelMessage(accessToken, target.id, `[语音消息暂不支持频道发送]`, replyToId ?? undefined); + return { channel: "qqbot", messageId: r.id, timestamp: r.timestamp }; + } + // 如果有文本说明,再发送一条文本消息 if (text?.trim()) { try { @@ -679,14 +956,226 @@ export async function sendMedia(ctx: MediaOutboundContext): Promise { + const { to, text, replyToId, account, mediaUrl } = ctx; + + console.log(`[qqbot] sendVideoUrl: ${mediaUrl}`); + + if (!account.appId || !account.clientSecret) { + return { channel: "qqbot", error: "QQBot not configured (missing appId or clientSecret)" }; + } + + try { + const accessToken = await getAccessToken(account.appId, account.clientSecret); + const target = parseTarget(to); + + let videoResult: { id: string; timestamp: number | string }; + if (target.type === "c2c") { + videoResult = await sendC2CVideoMessage(accessToken, target.id, mediaUrl, undefined, replyToId ?? undefined); + } else if (target.type === "group") { + videoResult = await sendGroupVideoMessage(accessToken, target.id, mediaUrl, undefined, replyToId ?? undefined); + } else { + const r = await sendChannelMessage(accessToken, target.id, `[视频消息暂不支持频道发送]`, replyToId ?? undefined); + return { channel: "qqbot", messageId: r.id, timestamp: r.timestamp }; + } + + // 如果有文本说明,再发送一条文本消息 + if (text?.trim()) { + try { + if (target.type === "c2c") { + await sendC2CMessage(accessToken, target.id, text, replyToId ?? undefined); + } else if (target.type === "group") { + await sendGroupMessage(accessToken, target.id, text, replyToId ?? undefined); + } + } catch (textErr) { + console.error(`[qqbot] Failed to send text after video: ${textErr}`); + } + } + + console.log(`[qqbot] sendVideoUrl: video message sent`); + return { channel: "qqbot", messageId: videoResult.id, timestamp: videoResult.timestamp }; + } catch (err) { + const message = err instanceof Error ? err.message : String(err); + console.error(`[qqbot] sendVideoUrl: failed: ${message}`); + return { channel: "qqbot", error: message }; + } +} + +/** + * 发送本地视频文件 + * 流程:读取本地文件 → Base64 → 上传(file_type=2) → 发送 + */ +async function sendVideoFile(ctx: MediaOutboundContext): Promise { + const { to, text, replyToId, account, mediaUrl } = ctx; + + console.log(`[qqbot] sendVideoFile: ${mediaUrl}`); + + if (!account.appId || !account.clientSecret) { + return { channel: "qqbot", error: "QQBot not configured (missing appId or clientSecret)" }; + } + + try { + if (!(await fileExistsAsync(mediaUrl))) { + return { channel: "qqbot", error: `视频文件不存在: ${mediaUrl}` }; + } + + // 文件大小校验 + const sizeCheck = checkFileSize(mediaUrl); + if (!sizeCheck.ok) { + return { channel: "qqbot", error: sizeCheck.error! }; + } + + const fileBuffer = await readFileAsync(mediaUrl); + const videoBase64 = fileBuffer.toString("base64"); + console.log(`[qqbot] sendVideoFile: Read local video (${formatFileSize(fileBuffer.length)})`); + + const accessToken = await getAccessToken(account.appId, account.clientSecret); + const target = parseTarget(to); + + let videoResult: { id: string; timestamp: number | string }; + if (target.type === "c2c") { + videoResult = await sendC2CVideoMessage(accessToken, target.id, undefined, videoBase64, replyToId ?? undefined); + } else if (target.type === "group") { + videoResult = await sendGroupVideoMessage(accessToken, target.id, undefined, videoBase64, replyToId ?? undefined); + } else { + const r = await sendChannelMessage(accessToken, target.id, `[视频消息暂不支持频道发送]`, replyToId ?? undefined); + return { channel: "qqbot", messageId: r.id, timestamp: r.timestamp }; + } + + // 如果有文本说明,再发送一条文本消息 + if (text?.trim()) { + try { + if (target.type === "c2c") { + await sendC2CMessage(accessToken, target.id, text, replyToId ?? undefined); + } else if (target.type === "group") { + await sendGroupMessage(accessToken, target.id, text, replyToId ?? undefined); + } + } catch (textErr) { + console.error(`[qqbot] Failed to send text after video: ${textErr}`); + } + } + + console.log(`[qqbot] sendVideoFile: video message sent`); + return { channel: "qqbot", messageId: videoResult.id, timestamp: videoResult.timestamp }; + } catch (err) { + const message = err instanceof Error ? err.message : String(err); + console.error(`[qqbot] sendVideoFile: failed: ${message}`); + return { channel: "qqbot", error: message }; + } +} + +/** + * 发送文件消息 + * 流程:读取本地文件 → Base64 → 上传(file_type=4) → 发送 + * 支持本地文件路径和公网 URL + */ +async function sendDocumentFile(ctx: MediaOutboundContext): Promise { + const { to, text, replyToId, account, mediaUrl } = ctx; + + console.log(`[qqbot] sendDocumentFile: ${mediaUrl}`); + + if (!account.appId || !account.clientSecret) { + return { channel: "qqbot", error: "QQBot not configured (missing appId or clientSecret)" }; + } + + const isHttpUrl = mediaUrl.startsWith("http://") || mediaUrl.startsWith("https://"); + const fileName = path.basename(mediaUrl); + + try { + const accessToken = await getAccessToken(account.appId, account.clientSecret); + const target = parseTarget(to); + + let fileResult: { id: string; timestamp: number | string }; + + if (isHttpUrl) { + // 公网 URL:通过 url 参数上传 + console.log(`[qqbot] sendDocumentFile: uploading via URL: ${mediaUrl}`); + if (target.type === "c2c") { + fileResult = await sendC2CFileMessage(accessToken, target.id, undefined, mediaUrl, replyToId ?? undefined, fileName); + } else if (target.type === "group") { + fileResult = await sendGroupFileMessage(accessToken, target.id, undefined, mediaUrl, replyToId ?? undefined, fileName); + } else { + const r = await sendChannelMessage(accessToken, target.id, `[文件消息暂不支持频道发送]`, replyToId ?? undefined); + return { channel: "qqbot", messageId: r.id, timestamp: r.timestamp }; + } + } else { + // 本地文件:读取转 Base64 上传 + if (!(await fileExistsAsync(mediaUrl))) { + return { channel: "qqbot", error: `本地文件不存在: ${mediaUrl}` }; + } + + // 文件大小校验 + const docSizeCheck = checkFileSize(mediaUrl); + if (!docSizeCheck.ok) { + return { channel: "qqbot", error: docSizeCheck.error! }; + } + + const fileBuffer = await readFileAsync(mediaUrl); + if (fileBuffer.length === 0) { + return { channel: "qqbot", error: `文件内容为空: ${mediaUrl}` }; + } + + const fileBase64 = fileBuffer.toString("base64"); + console.log(`[qqbot] sendDocumentFile: read local file (${formatFileSize(fileBuffer.length)}), uploading...`); + + if (target.type === "c2c") { + fileResult = await sendC2CFileMessage(accessToken, target.id, fileBase64, undefined, replyToId ?? undefined, fileName); + } else if (target.type === "group") { + fileResult = await sendGroupFileMessage(accessToken, target.id, fileBase64, undefined, replyToId ?? undefined, fileName); + } else { + const r = await sendChannelMessage(accessToken, target.id, `[文件消息暂不支持频道发送]`, replyToId ?? undefined); + return { channel: "qqbot", messageId: r.id, timestamp: r.timestamp }; + } + } + + // 如果有附带文本说明,再发送一条文本消息 + if (text?.trim()) { + try { + if (target.type === "c2c") { + await sendC2CMessage(accessToken, target.id, text, replyToId ?? undefined); + } else if (target.type === "group") { + await sendGroupMessage(accessToken, target.id, text, replyToId ?? undefined); + } + } catch (textErr) { + console.error(`[qqbot] Failed to send text after file: ${textErr}`); + } + } + + console.log(`[qqbot] sendDocumentFile: file message sent`); + return { channel: "qqbot", messageId: fileResult.id, timestamp: fileResult.timestamp }; + } catch (err) { + const message = err instanceof Error ? err.message : String(err); + console.error(`[qqbot] sendDocumentFile: failed: ${message}`); return { channel: "qqbot", error: message }; } } diff --git a/src/types.ts b/src/types.ts index 4ebbda3..f000d4a 100644 --- a/src/types.ts +++ b/src/types.ts @@ -43,6 +43,35 @@ export interface QQBotAccountConfig { imageServerBaseUrl?: string; /** 是否支持 markdown 消息(默认 true,设为 false 可禁用) */ markdownSupport?: boolean; + /** + * @deprecated 请使用 audioFormatPolicy.uploadDirectFormats + * 可直接上传的音频格式(不转换为 SILK),向后兼容 + */ + voiceDirectUploadFormats?: string[]; + /** + * 音频格式策略配置 + * 统一管理入站(STT)和出站(上传)的音频格式转换行为 + */ + audioFormatPolicy?: AudioFormatPolicy; +} + +/** + * 音频格式策略:控制哪些格式可跳过转换 + */ +export interface AudioFormatPolicy { + /** + * STT 模型直接支持的音频格式(入站:跳过 SILK→WAV 转换) + * 如果 STT 服务支持直接处理某些格式(如 silk/amr),可将其加入此列表 + * 例如: [".silk", ".amr", ".wav", ".mp3", ".ogg"] + * 默认为空(所有语音都先转换为 WAV 再送 STT) + */ + sttDirectFormats?: string[]; + /** + * QQ 平台支持直传的音频格式(出站:跳过→SILK 转换) + * 默认为 [".wav", ".mp3", ".silk"](QQ Bot API 原生支持的三种格式) + * 仅当需要覆盖默认值时才配置此项 + */ + uploadDirectFormats?: string[]; } /** @@ -55,6 +84,7 @@ export interface MessageAttachment { width?: number; size?: number; url: string; + voice_wav_url?: string; // QQ 提供的 WAV 格式语音直链,有值时优先使用以避免 SILK→WAV 转换 } /** diff --git a/src/utils/audio-convert.ts b/src/utils/audio-convert.ts index 49b4bcd..0c0dc29 100644 --- a/src/utils/audio-convert.ts +++ b/src/utils/audio-convert.ts @@ -1,6 +1,6 @@ import * as fs from "node:fs"; import * as path from "node:path"; -import { decode, isSilk } from "silk-wasm"; +import { decode, encode, isSilk } from "silk-wasm"; /** * 检查文件是否为 SILK 格式(QQ/微信语音常用格式) @@ -121,7 +121,7 @@ export function isVoiceAttachment(att: { content_type?: string; filename?: strin return true; } const ext = att.filename ? path.extname(att.filename).toLowerCase() : ""; - return [".amr", ".silk", ".slk"].includes(ext); + return [".amr", ".silk", ".slk", ".slac"].includes(ext); } /** @@ -136,3 +136,290 @@ export function formatDuration(durationMs: number): string { const remainSeconds = seconds % 60; return remainSeconds > 0 ? `${minutes}分${remainSeconds}秒` : `${minutes}分钟`; } + +export function isAudioFile(filePath: string): boolean { + const ext = path.extname(filePath).toLowerCase(); + return [".silk", ".slk", ".amr", ".wav", ".mp3", ".ogg", ".opus", ".aac", ".flac", ".m4a", ".wma", ".pcm"].includes(ext); +} + +// ============ TTS(文字转语音)============ + +export interface TTSConfig { + baseUrl: string; + apiKey: string; + model: string; + voice: string; +} + +export function resolveTTSConfig(cfg: Record): TTSConfig | null { + const c = cfg as any; + + // 优先使用 channels.qqbot.tts(插件专属配置) + const channelTts = c?.channels?.qqbot?.tts; + if (channelTts && channelTts.enabled !== false) { + const providerId: string = channelTts?.provider || "openai"; + const providerCfg = c?.models?.providers?.[providerId]; + const baseUrl: string | undefined = channelTts?.baseUrl || providerCfg?.baseUrl; + const apiKey: string | undefined = channelTts?.apiKey || providerCfg?.apiKey; + const model: string = channelTts?.model || "tts-1"; + const voice: string = channelTts?.voice || "alloy"; + if (baseUrl && apiKey) { + return { baseUrl: baseUrl.replace(/\/+$/, ""), apiKey, model, voice }; + } + } + + // 回退到 messages.tts(openclaw 框架级 TTS 配置) + const msgTts = c?.messages?.tts; + if (msgTts && msgTts.auto !== "disabled") { + const providerId: string = msgTts?.provider || "openai"; + const providerBlock = msgTts?.[providerId]; // messages.tts.openai / messages.tts.xxx + const providerCfg = c?.models?.providers?.[providerId]; + const baseUrl: string | undefined = providerBlock?.baseUrl || providerCfg?.baseUrl; + const apiKey: string | undefined = providerBlock?.apiKey || providerCfg?.apiKey; + const model: string = providerBlock?.model || "tts-1"; + const voice: string = providerBlock?.voice || "alloy"; + if (baseUrl && apiKey) { + return { baseUrl: baseUrl.replace(/\/+$/, ""), apiKey, model, voice }; + } + } + + return null; +} + +export async function textToSpeechPCM( + text: string, + ttsCfg: TTSConfig, +): Promise<{ pcmBuffer: Buffer; sampleRate: number }> { + const sampleRate = 24000; + + const resp = await fetch(`${ttsCfg.baseUrl}/audio/speech`, { + method: "POST", + headers: { + "Authorization": `Bearer ${ttsCfg.apiKey}`, + "Content-Type": "application/json", + }, + body: JSON.stringify({ + model: ttsCfg.model, + input: text, + voice: ttsCfg.voice, + response_format: "pcm", + sample_rate: sampleRate, + stream: false, + }), + }); + + if (!resp.ok) { + const detail = await resp.text().catch(() => ""); + throw new Error(`TTS failed (HTTP ${resp.status}): ${detail.slice(0, 300)}`); + } + + const arrayBuffer = await resp.arrayBuffer(); + return { pcmBuffer: Buffer.from(arrayBuffer), sampleRate }; +} + +export async function pcmToSilk( + pcmBuffer: Buffer, + sampleRate: number, +): Promise<{ silkBuffer: Buffer; duration: number }> { + const pcmData = new Uint8Array(pcmBuffer.buffer, pcmBuffer.byteOffset, pcmBuffer.byteLength); + const result = await encode(pcmData, sampleRate); + return { + silkBuffer: Buffer.from(result.data.buffer, result.data.byteOffset, result.data.byteLength), + duration: result.duration, + }; +} + +export async function textToSilk( + text: string, + ttsCfg: TTSConfig, + outputDir: string, +): Promise<{ silkPath: string; silkBase64: string; duration: number }> { + const { pcmBuffer, sampleRate } = await textToSpeechPCM(text, ttsCfg); + const { silkBuffer, duration } = await pcmToSilk(pcmBuffer, sampleRate); + + if (!fs.existsSync(outputDir)) fs.mkdirSync(outputDir, { recursive: true }); + const silkPath = path.join(outputDir, `tts-${Date.now()}.silk`); + fs.writeFileSync(silkPath, silkBuffer); + + return { silkPath, silkBase64: silkBuffer.toString("base64"), duration }; +} + +// ============ 核心:任意音频 → SILK Base64 ============ + +/** QQ Bot API 原生支持上传的音频格式(无需转换为 SILK) */ +const QQ_NATIVE_UPLOAD_FORMATS = [".wav", ".mp3", ".silk"]; + +/** + * 将本地音频文件转换为 QQ Bot 可上传的 Base64 + * + * QQ Bot API 支持直传 WAV、MP3、SILK 三种格式,其他格式仍需转换。 + * 转换策略(参考 NapCat/go-cqhttp/Discord/Telegram 的做法): + * + * 1. WAV / MP3 / SILK → 直传(跳过转换) + * 2. 有 ffmpeg → ffmpeg 万能解码为 PCM → silk-wasm 编码 + * 支持: ogg, opus, aac, flac, wma, m4a, pcm 等所有 ffmpeg 支持的格式 + * 3. 无 ffmpeg → WASM fallback(仅支持 pcm, wav) + * + * @param directUploadFormats - 自定义直传格式列表,覆盖默认值。传 undefined 使用 QQ_NATIVE_UPLOAD_FORMATS + */ +export async function audioFileToSilkBase64(filePath: string, directUploadFormats?: string[]): Promise { + if (!fs.existsSync(filePath)) return null; + + const buf = fs.readFileSync(filePath); + if (buf.length === 0) { + console.error(`[audio-convert] file is empty: ${filePath}`); + return null; + } + + const ext = path.extname(filePath).toLowerCase(); + + // 0. 直传判断:QQ Bot API 原生支持 WAV/MP3/SILK,可通过配置覆盖 + const uploadFormats = directUploadFormats ? normalizeFormats(directUploadFormats) : QQ_NATIVE_UPLOAD_FORMATS; + if (uploadFormats.includes(ext)) { + console.log(`[audio-convert] direct upload (QQ native format): ${ext} (${buf.length} bytes)`); + return buf.toString("base64"); + } + + // 1. .slk / .amr 扩展名 → 检测 SILK 魔数,是 SILK 则直传 + if ([".slk", ".slac"].includes(ext)) { + const stripped = stripAmrHeader(buf); + const raw = new Uint8Array(stripped.buffer, stripped.byteOffset, stripped.byteLength); + if (isSilk(raw)) { + console.log(`[audio-convert] SILK file, direct use: ${filePath} (${buf.length} bytes)`); + return buf.toString("base64"); + } + } + + // 按文件头检测 SILK(不依赖扩展名) + const rawCheck = new Uint8Array(buf.buffer, buf.byteOffset, buf.byteLength); + const strippedCheck = stripAmrHeader(buf); + const strippedRaw = new Uint8Array(strippedCheck.buffer, strippedCheck.byteOffset, strippedCheck.byteLength); + if (isSilk(rawCheck) || isSilk(strippedRaw)) { + console.log(`[audio-convert] SILK detected by header: ${filePath} (${buf.length} bytes)`); + return buf.toString("base64"); + } + + const targetRate = 24000; + + // 2. 优先使用 ffmpeg(业界标准做法) + const hasFfmpeg = await checkFfmpeg(); + if (hasFfmpeg) { + try { + console.log(`[audio-convert] ffmpeg: converting ${ext} (${buf.length} bytes) → PCM s16le ${targetRate}Hz`); + const pcmBuf = await ffmpegToPCM(filePath, targetRate); + if (pcmBuf.length === 0) { + console.error(`[audio-convert] ffmpeg produced empty PCM output`); + return null; + } + const { silkBuffer } = await pcmToSilk(pcmBuf, targetRate); + console.log(`[audio-convert] ffmpeg: ${ext} → SILK done (${silkBuffer.length} bytes)`); + return silkBuffer.toString("base64"); + } catch (err) { + console.error(`[audio-convert] ffmpeg conversion failed: ${err instanceof Error ? err.message : String(err)}`); + // ffmpeg 失败后不 return,继续尝试 WASM fallback + } + } + + // 3. WASM fallback(无 ffmpeg 时的降级方案) + console.log(`[audio-convert] fallback: trying WASM decoders for ${ext}`); + + // 3a. PCM:视为 s16le 24000Hz 单声道 + if (ext === ".pcm") { + const pcmBuf = Buffer.from(buf.buffer, buf.byteOffset, buf.byteLength); + const { silkBuffer } = await pcmToSilk(pcmBuf, targetRate); + return silkBuffer.toString("base64"); + } + + // 3b. WAV:手动解析(仅支持标准 PCM WAV) + if (ext === ".wav" || (buf.length >= 4 && buf.toString("ascii", 0, 4) === "RIFF")) { + const wavInfo = parseWavFallback(buf); + if (wavInfo) { + const { silkBuffer } = await pcmToSilk(wavInfo, targetRate); + return silkBuffer.toString("base64"); + } + } + + // 3c. MP3:WASM 解码 + if (ext === ".mp3" || ext === ".mpeg") { + const pcmBuf = await wasmDecodeMp3ToPCM(buf, targetRate); + if (pcmBuf) { + const { silkBuffer } = await pcmToSilk(pcmBuf, targetRate); + console.log(`[audio-convert] WASM: MP3 → SILK done (${silkBuffer.length} bytes)`); + return silkBuffer.toString("base64"); + } + } + + console.error(`[audio-convert] unsupported format: ${ext} (no ffmpeg available). Install ffmpeg for full format support.`); + return null; +} + +/** + * WAV fallback 解析(无 ffmpeg 时使用) + * 仅支持标准 PCM WAV (format=1, 16bit) + */ +function parseWavFallback(buf: Buffer): Buffer | null { + if (buf.length < 44) return null; + if (buf.toString("ascii", 0, 4) !== "RIFF") return null; + if (buf.toString("ascii", 8, 12) !== "WAVE") return null; + if (buf.toString("ascii", 12, 16) !== "fmt ") return null; + + const audioFormat = buf.readUInt16LE(20); + if (audioFormat !== 1) return null; + + const channels = buf.readUInt16LE(22); + const sampleRate = buf.readUInt32LE(24); + const bitsPerSample = buf.readUInt16LE(34); + if (bitsPerSample !== 16) return null; + + // 找 data chunk + let offset = 36; + while (offset < buf.length - 8) { + const chunkId = buf.toString("ascii", offset, offset + 4); + const chunkSize = buf.readUInt32LE(offset + 4); + if (chunkId === "data") { + const dataStart = offset + 8; + const dataEnd = Math.min(dataStart + chunkSize, buf.length); + let pcm = new Uint8Array(buf.buffer, buf.byteOffset + dataStart, dataEnd - dataStart); + + // 多声道混缩 + if (channels > 1) { + const samplesPerCh = pcm.length / (2 * channels); + const mono = new Uint8Array(samplesPerCh * 2); + const inV = new DataView(pcm.buffer, pcm.byteOffset, pcm.byteLength); + const outV = new DataView(mono.buffer, mono.byteOffset, mono.byteLength); + for (let i = 0; i < samplesPerCh; i++) { + let sum = 0; + for (let ch = 0; ch < channels; ch++) sum += inV.getInt16((i * channels + ch) * 2, true); + outV.setInt16(i * 2, Math.max(-32768, Math.min(32767, Math.round(sum / channels))), true); + } + pcm = mono; + } + + // 简单线性插值重采样 + const targetRate = 24000; + if (sampleRate !== targetRate) { + const inSamples = pcm.length / 2; + const outSamples = Math.round(inSamples * targetRate / sampleRate); + const out = new Uint8Array(outSamples * 2); + const inV = new DataView(pcm.buffer, pcm.byteOffset, pcm.byteLength); + const outV = new DataView(out.buffer, out.byteOffset, out.byteLength); + for (let i = 0; i < outSamples; i++) { + const src = i * sampleRate / targetRate; + const i0 = Math.floor(src); + const i1 = Math.min(i0 + 1, inSamples - 1); + const f = src - i0; + const s0 = inV.getInt16(i0 * 2, true); + const s1 = inV.getInt16(i1 * 2, true); + outV.setInt16(i * 2, Math.max(-32768, Math.min(32767, Math.round(s0 + (s1 - s0) * f))), true); + } + pcm = out; + } + + return Buffer.from(pcm.buffer, pcm.byteOffset, pcm.byteLength); + } + offset += 8 + chunkSize; + } + + return null; +} + diff --git a/src/utils/file-utils.ts b/src/utils/file-utils.ts new file mode 100644 index 0000000..2b35e60 --- /dev/null +++ b/src/utils/file-utils.ts @@ -0,0 +1,122 @@ +/** + * 文件操作工具 — 异步读取 + 大小校验 + 进度提示 + */ + +import * as fs from "node:fs"; +import * as path from "node:path"; + +/** QQ Bot API 最大上传文件大小:20MB */ +export const MAX_UPLOAD_SIZE = 20 * 1024 * 1024; + +/** 大文件阈值(超过此值发送进度提示):5MB */ +export const LARGE_FILE_THRESHOLD = 5 * 1024 * 1024; + +/** + * 文件大小校验结果 + */ +export interface FileSizeCheckResult { + ok: boolean; + size: number; + error?: string; +} + +/** + * 校验文件大小是否在上传限制内 + * @param filePath 文件路径 + * @param maxSize 最大允许大小(字节),默认 20MB + */ +export function checkFileSize(filePath: string, maxSize = MAX_UPLOAD_SIZE): FileSizeCheckResult { + try { + const stat = fs.statSync(filePath); + if (stat.size > maxSize) { + const sizeMB = (stat.size / (1024 * 1024)).toFixed(1); + const limitMB = (maxSize / (1024 * 1024)).toFixed(0); + return { + ok: false, + size: stat.size, + error: `文件过大 (${sizeMB}MB),QQ Bot API 上传限制为 ${limitMB}MB`, + }; + } + return { ok: true, size: stat.size }; + } catch (err) { + return { + ok: false, + size: 0, + error: `无法读取文件信息: ${err instanceof Error ? err.message : String(err)}`, + }; + } +} + +/** + * 异步读取文件内容 + * 替代 fs.readFileSync,避免阻塞事件循环 + */ +export async function readFileAsync(filePath: string): Promise { + return fs.promises.readFile(filePath); +} + +/** + * 异步检查文件是否存在 + */ +export async function fileExistsAsync(filePath: string): Promise { + try { + await fs.promises.access(filePath, fs.constants.R_OK); + return true; + } catch { + return false; + } +} + +/** + * 异步获取文件大小 + */ +export async function getFileSizeAsync(filePath: string): Promise { + const stat = await fs.promises.stat(filePath); + return stat.size; +} + +/** + * 判断文件是否为"大文件"(需要进度提示) + */ +export function isLargeFile(sizeBytes: number): boolean { + return sizeBytes >= LARGE_FILE_THRESHOLD; +} + +/** + * 格式化文件大小为人类可读的字符串 + */ +export function formatFileSize(bytes: number): string { + if (bytes < 1024) return `${bytes}B`; + if (bytes < 1024 * 1024) return `${(bytes / 1024).toFixed(1)}KB`; + return `${(bytes / (1024 * 1024)).toFixed(1)}MB`; +} + +/** + * 根据文件扩展名获取 MIME 类型 + */ +export function getMimeType(filePath: string): string { + const ext = path.extname(filePath).toLowerCase(); + const mimeTypes: Record = { + ".jpg": "image/jpeg", + ".jpeg": "image/jpeg", + ".png": "image/png", + ".gif": "image/gif", + ".webp": "image/webp", + ".bmp": "image/bmp", + ".mp4": "video/mp4", + ".mov": "video/quicktime", + ".avi": "video/x-msvideo", + ".mkv": "video/x-matroska", + ".webm": "video/webm", + ".pdf": "application/pdf", + ".doc": "application/msword", + ".docx": "application/vnd.openxmlformats-officedocument.wordprocessingml.document", + ".xls": "application/vnd.ms-excel", + ".xlsx": "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet", + ".zip": "application/zip", + ".tar": "application/x-tar", + ".gz": "application/gzip", + ".txt": "text/plain", + }; + return mimeTypes[ext] ?? "application/octet-stream"; +} diff --git a/src/utils/media-tags.ts b/src/utils/media-tags.ts new file mode 100644 index 0000000..a3d14ed --- /dev/null +++ b/src/utils/media-tags.ts @@ -0,0 +1,107 @@ +/** + * 富媒体标签预处理与纠错 + * + * 小模型常见的标签拼写错误及变体,在正则匹配前统一修正为标准格式。 + */ + +// 标准标签名 +const VALID_TAGS = ["qqimg", "qqvoice", "qqvideo", "qqfile"] as const; + +// 开头标签别名映射(key 全部小写) +const TAG_ALIASES: Record = { + // ---- qqimg 变体 ---- + "qq_img": "qqimg", + "qqimage": "qqimg", + "qq_image": "qqimg", + "qqpic": "qqimg", + "qq_pic": "qqimg", + "qqpicture": "qqimg", + "qq_picture": "qqimg", + "qqphoto": "qqimg", + "qq_photo": "qqimg", + "img": "qqimg", + "image": "qqimg", + "pic": "qqimg", + "picture": "qqimg", + "photo": "qqimg", + // ---- qqvoice 变体 ---- + "qq_voice": "qqvoice", + "qqaudio": "qqvoice", + "qq_audio": "qqvoice", + "voice": "qqvoice", + "audio": "qqvoice", + // ---- qqvideo 变体 ---- + "qq_video": "qqvideo", + "video": "qqvideo", + // ---- qqfile 变体 ---- + "qq_file": "qqfile", + "qqdoc": "qqfile", + "qq_doc": "qqfile", + "file": "qqfile", + "doc": "qqfile", + "document": "qqfile", +}; + +// 构建所有可识别的标签名列表(标准名 + 别名) +const ALL_TAG_NAMES = [...VALID_TAGS, ...Object.keys(TAG_ALIASES)]; +// 按长度降序排列,优先匹配更长的名称(避免 "img" 抢先匹配 "qqimg" 的子串) +ALL_TAG_NAMES.sort((a, b) => b.length - a.length); + +const TAG_NAME_PATTERN = ALL_TAG_NAMES.join("|"); + +/** + * 构建一个宽容的正则,能匹配各种畸形标签写法: + * + * 常见错误模式: + * 1. 标签名拼错:, , , , ... + * 2. 标签内多余空格:, < qqimg>, + * 3. 闭合标签不匹配:url, url + * 4. 闭合标签缺失斜杠:url (用开头标签代替闭合标签) + * 5. 闭合标签缺失尖括号:url/qqimg> + * 6. 中文尖括号:<qqimg>url</qqimg> 或 url + * 7. 多余引号包裹路径:"path" + * 8. Markdown 代码块包裹:`path` + */ +const FUZZY_MEDIA_TAG_REGEX = new RegExp( + // 可选 Markdown 行内代码反引号 + "`?" + + // 开头标签:允许中文/英文尖括号,标签名前后可有空格 + "[<<<]\\s*(" + TAG_NAME_PATTERN + ")\\s*[>>>]" + + // 内容:非贪婪匹配,允许引号包裹 + "[\"']?\\s*" + + "([^<<<>>\"'`]+?)" + + "\\s*[\"']?" + + // 闭合标签:允许各种不规范写法 + "[<<<]\\s*/?\\s*(?:" + TAG_NAME_PATTERN + ")\\s*[>>>]" + + // 可选结尾反引号 + "`?", + "gi" +); + +/** + * 将标签名映射为标准名称 + */ +function resolveTagName(raw: string): typeof VALID_TAGS[number] { + const lower = raw.toLowerCase(); + if ((VALID_TAGS as readonly string[]).includes(lower)) { + return lower as typeof VALID_TAGS[number]; + } + return TAG_ALIASES[lower] ?? "qqimg"; +} + +/** + * 预处理 LLM 输出文本,将各种畸形/错误的富媒体标签修正为标准格式。 + * + * 标准格式:/path/to/file + * + * @param text LLM 原始输出 + * @returns 修正后的文本(如果没有匹配到任何标签则原样返回) + */ +export function normalizeMediaTags(text: string): string { + return text.replace(FUZZY_MEDIA_TAG_REGEX, (_match, rawTag: string, content: string) => { + const tag = resolveTagName(rawTag); + const trimmed = content.trim(); + if (!trimmed) return _match; // 空内容不处理 + return `<${tag}>${trimmed}`; + }); +} diff --git a/src/utils/upload-cache.ts b/src/utils/upload-cache.ts new file mode 100644 index 0000000..865aece --- /dev/null +++ b/src/utils/upload-cache.ts @@ -0,0 +1,128 @@ +/** + * file_info 缓存 — 借鉴 Telegram file_id 机制 + * + * QQ Bot API 上传文件后返回 file_info + ttl,在 TTL 内相同文件可直接复用 file_info + * 避免重复上传同一文件,节省带宽和时间。 + * + * 缓存 key = md5(fileContent) + targetType(c2c/group) + targetId + fileType + */ + +import * as crypto from "node:crypto"; +import * as fs from "node:fs"; + +interface CacheEntry { + fileInfo: string; + fileUuid: string; + /** 过期时间戳(ms),比 API 返回的 TTL 提前 60 秒失效 */ + expiresAt: number; +} + +// 内存缓存,key 格式:`${contentHash}:${scope}:${targetId}:${fileType}` +const cache = new Map(); + +// 最大缓存条目数,防止内存泄漏 +const MAX_CACHE_SIZE = 500; + +/** + * 计算文件内容的 MD5 hash(用于缓存 key) + * 对于 Base64 数据直接 hash,对于文件路径读取后 hash + */ +export function computeFileHash(data: string | Buffer): string { + const content = typeof data === "string" ? data : data; + return crypto.createHash("md5").update(content).digest("hex"); +} + +/** + * 构建缓存 key + * @param contentHash - 文件内容 hash + * @param scope - "c2c" | "group" + * @param targetId - 用户 openid 或群 openid + * @param fileType - 1=IMAGE, 2=VIDEO, 3=VOICE, 4=FILE + */ +function buildCacheKey(contentHash: string, scope: string, targetId: string, fileType: number): string { + return `${contentHash}:${scope}:${targetId}:${fileType}`; +} + +/** + * 从缓存获取 file_info + * @returns file_info 字符串,未命中或已过期返回 null + */ +export function getCachedFileInfo( + contentHash: string, + scope: "c2c" | "group", + targetId: string, + fileType: number, +): string | null { + const key = buildCacheKey(contentHash, scope, targetId, fileType); + const entry = cache.get(key); + + if (!entry) return null; + + // 检查是否过期 + if (Date.now() >= entry.expiresAt) { + cache.delete(key); + return null; + } + + console.log(`[upload-cache] Cache HIT: key=${key.slice(0, 40)}..., fileUuid=${entry.fileUuid}`); + return entry.fileInfo; +} + +/** + * 将上传结果写入缓存 + * @param ttl - API 返回的 TTL(秒),缓存会提前 60 秒失效 + */ +export function setCachedFileInfo( + contentHash: string, + scope: "c2c" | "group", + targetId: string, + fileType: number, + fileInfo: string, + fileUuid: string, + ttl: number, +): void { + // 清理过期条目(惰性清理) + if (cache.size >= MAX_CACHE_SIZE) { + const now = Date.now(); + for (const [k, v] of cache) { + if (now >= v.expiresAt) { + cache.delete(k); + } + } + // 如果清理后仍然超限,删除最早的一半 + if (cache.size >= MAX_CACHE_SIZE) { + const keys = Array.from(cache.keys()); + for (let i = 0; i < keys.length / 2; i++) { + cache.delete(keys[i]!); + } + } + } + + const key = buildCacheKey(contentHash, scope, targetId, fileType); + // 提前 60 秒失效,避免临界点过期 + const safetyMargin = 60; + const effectiveTtl = Math.max(ttl - safetyMargin, 10); + + cache.set(key, { + fileInfo, + fileUuid, + expiresAt: Date.now() + effectiveTtl * 1000, + }); + + console.log(`[upload-cache] Cache SET: key=${key.slice(0, 40)}..., ttl=${effectiveTtl}s, uuid=${fileUuid}`); +} + +/** + * 获取缓存统计 + */ +export function getUploadCacheStats(): { size: number; maxSize: number } { + return { size: cache.size, maxSize: MAX_CACHE_SIZE }; +} + +/** + * 清除所有缓存 + */ +export function clearUploadCache(): void { + cache.clear(); + console.log(`[upload-cache] Cache cleared`); +}