From 4da25d0125efbb4a617df06088847c4a0dbfca3d Mon Sep 17 00:00:00 2001 From: Peter Steinberger Date: Sat, 25 Apr 2026 03:17:10 +0100 Subject: [PATCH] fix: keep session store live during rotation --- CHANGELOG.md | 1 + src/config/sessions/store-maintenance.ts | 15 ++++--- src/config/sessions/store.pruning.test.ts | 48 ++++++++++++++++++++++- 3 files changed, 56 insertions(+), 8 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index ab788646d01..dbf2f67ca36 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -68,6 +68,7 @@ Docs: https://docs.openclaw.ai - Providers/OpenAI: separate API-key and Codex sign-in onboarding groups, and avoid replaying stale OpenAI Responses reasoning blocks after a model route switch. - Browser/config: expand `~` in `browser.executablePath` before Chromium launch, so home-relative custom browser paths no longer fail with `ENOENT`. Fixes #67264. Thanks @Quratulain-bilal. - Telegram/streaming: hide tool-progress status updates by default while keeping explicit `streaming.preview.toolProgress` opt-in support for edited preview messages. Fixes #71320. Thanks @neeravmakwana. +- Gateway/sessions: copy the oversized `sessions.json` to a rotation backup before the atomic rewrite instead of renaming the live store away, so a crash during rotation keeps the existing session-to-transcript mapping authoritative. Fixes #68229. Thanks @jjjojoj. - Discord/subagents: preserve thread-bound completion delivery by keeping the requester-agent announce path primary and falling back to direct thread sends only when the announce produces no visible output. (#71064) Thanks @DolencLuka. - Browser/tool: give Chrome MCP existing-session manage calls a longer default timeout, pass explicit tool timeouts through tab management, and recover stale selected-page MCP sessions instead of forcing a manual reset. Thanks @steipete. - Browser/sandbox: clean up idle tracked tabs opened by primary-agent browser sessions, while preserving active tab reuse and lifecycle cleanup for subagents, cron, and ACP sessions. Fixes #71165. Thanks @dwbutler. diff --git a/src/config/sessions/store-maintenance.ts b/src/config/sessions/store-maintenance.ts index 2d9c316859b..8b64bce67e9 100644 --- a/src/config/sessions/store-maintenance.ts +++ b/src/config/sessions/store-maintenance.ts @@ -317,7 +317,7 @@ async function getSessionFileSize(storePath: string): Promise { /** * Rotate the sessions file if it exceeds the configured size threshold. - * Renames the current file to `sessions.json.bak.{timestamp}` and cleans up + * Copies the current file to `sessions.json.bak.{timestamp}` and cleans up * old rotation backups, keeping only the 3 most recent `.bak.*` files. */ export async function rotateSessionFile( @@ -336,16 +336,19 @@ export async function rotateSessionFile( return false; } - // Rotate: rename current file to .bak.{timestamp} + // Keep the live store authoritative until the caller's later atomic write succeeds. + // A rename would remove sessions.json and create a crash window where startup sees + // an empty store; a copy gives us a backup without changing the live file. const backupPath = `${storePath}.bak.${Date.now()}`; try { - await fs.promises.rename(storePath, backupPath); - log.info("rotated session store file", { + await fs.promises.copyFile(storePath, backupPath); + log.info("backed up session store file before rotation", { backupPath: path.basename(backupPath), sizeBytes: fileSize, }); - } catch { - // If rename fails (e.g. file disappeared), skip rotation. + } catch (err) { + // If backup creation fails (e.g. file disappeared), skip rotation backup only. + log.warn("session store rotation backup failed", { err }); return false; } diff --git a/src/config/sessions/store.pruning.test.ts b/src/config/sessions/store.pruning.test.ts index 5ddf943e671..575fa905308 100644 --- a/src/config/sessions/store.pruning.test.ts +++ b/src/config/sessions/store.pruning.test.ts @@ -7,6 +7,7 @@ import { resolveMaintenanceConfigFromInput } from "./store-maintenance.js"; import { capEntryCount, getActiveSessionMaintenanceWarning, + loadSessionStore, pruneStaleEntries, rotateSessionFile, } from "./store.js"; @@ -135,14 +136,14 @@ describe("rotateSessionFile", () => { storePath = path.join(testDir, "sessions.json"); }); - it("file over maxBytes: renamed to .bak.{timestamp}, returns true", async () => { + it("file over maxBytes: copies to .bak.{timestamp}, returns true", async () => { const bigContent = "x".repeat(200); await fs.writeFile(storePath, bigContent, "utf-8"); const rotated = await rotateSessionFile(storePath, 100); expect(rotated).toBe(true); - await expect(fs.stat(storePath)).rejects.toThrow(); + await expect(fs.readFile(storePath, "utf-8")).resolves.toBe(bigContent); const files = await fs.readdir(testDir); const bakFiles = files.filter((f) => f.startsWith("sessions.json.bak.")); expect(bakFiles).toHaveLength(1); @@ -150,6 +151,49 @@ describe("rotateSessionFile", () => { expect(bakContent).toBe(bigContent); }); + it("keeps live sessions readable if rotation is interrupted before the final save", async () => { + const store = makeStore([["group:telegram:1", makeEntry(Date.now())]]); + await fs.writeFile(storePath, JSON.stringify(store, null, 2), "utf-8"); + + const rotated = await rotateSessionFile(storePath, 10); + const loaded = loadSessionStore(storePath, { + skipCache: true, + maintenanceConfig: { + mode: "enforce", + pruneAfterMs: DAY_MS, + maxEntries: 100, + rotateBytes: 1024 * 1024, + resetArchiveRetentionMs: null, + maxDiskBytes: null, + highWaterBytes: null, + }, + }); + + expect(rotated).toBe(true); + expect(loaded["group:telegram:1"]?.sessionId).toBe(store["group:telegram:1"].sessionId); + }); + + it("keeps an empty live store authoritative when stale backups exist", async () => { + const staleStore = makeStore([["stale", makeEntry(Date.now())]]); + await fs.writeFile(`${storePath}.bak.${Date.now()}`, JSON.stringify(staleStore), "utf-8"); + await fs.writeFile(storePath, "{}", "utf-8"); + + const loaded = loadSessionStore(storePath, { + skipCache: true, + maintenanceConfig: { + mode: "enforce", + pruneAfterMs: DAY_MS, + maxEntries: 100, + rotateBytes: 1024 * 1024, + resetArchiveRetentionMs: null, + maxDiskBytes: null, + highWaterBytes: null, + }, + }); + + expect(loaded).toEqual({}); + }); + it("multiple rotations: only keeps 3 most recent .bak files", async () => { let now = Date.now(); const nowSpy = vi.spyOn(Date, "now").mockImplementation(() => (now += 5));