fix: allow memory flush model override

2026-04-30 14:02:56 +08:00 · 2026-04-28 05:25:42 +01:00
parent dc3df62e67
commit 540cbe24be
18 changed files with 186 additions and 3 deletions
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -46,6 +46,7 @@ Docs: https://docs.openclaw.ai

 - Export/session: keep inline export HTML scripts and vendor libraries injected after template formatting so generated session exports open with the app code, markdown renderer, and syntax highlighter present. Fixes #41862 and #49957; carries forward #41861 and #68947. Thanks @briannewman, @martenzi, and @armanddp.
 - Agents/ACPX: stage the patched Claude ACP adapter as an ACPX runtime dependency and route known Codex/Claude ACP commands through local wrappers, so Gateway runtime no longer depends on live `npx` adapter resolution. Fixes #73202. Thanks @joerod26.
+- Memory/compaction: let pre-compaction memory flush use an exact `agents.defaults.compaction.memoryFlush.model` override such as `ollama/qwen3:8b` without inheriting the active session fallback chain, so local housekeeping can avoid paid conversation models. Fixes #53772. Thanks @limen96.
 - Gateway/hooks: route non-delivered hook completion and error summaries to the target agent's main session instead of the default agent session, preserving multi-agent hook isolation. Fixes #24693; carries forward #68667. Thanks @abersonFAC and @bluesky6868.
 - Control UI/models: request the configured Gateway model-list view so dashboards with only `models.providers.*.models` show those configured models first instead of flooding the picker with the full built-in catalog. Fixes #65405. Thanks @wbyanclaw.
 - CLI/models: keep default-model and allowlist pickers on explicit `models.providers.*.models` entries when `models.mode` is `replace` instead of loading the full built-in catalog. Fixes #64950. Thanks @mrozentsvayg.
--- a/docs/concepts/compaction.md
+++ b/docs/concepts/compaction.md
@@ -132,7 +132,23 @@ By default, compaction runs silently. Set `notifyUser` to show brief status mess

 ### Memory flush

-Before compaction, OpenClaw can run a **silent memory flush** turn to store durable notes to disk. See [Memory](/concepts/memory) for details and config.
+Before compaction, OpenClaw can run a **silent memory flush** turn to store durable notes to disk. Set `agents.defaults.compaction.memoryFlush.model` when this housekeeping turn should use a local model instead of the active conversation model:
+
+```json
+{
+  "agents": {
+    "defaults": {
+      "compaction": {
+        "memoryFlush": {
+          "model": "ollama/qwen3:8b"
+        }
+      }
+    }
+  }
+}
+```
+
+The memory-flush model override is exact and does not inherit the active session fallback chain. See [Memory](/concepts/memory) for details and config.

 ## Pluggable compaction providers

--- a/docs/concepts/memory.md
+++ b/docs/concepts/memory.md
@@ -110,6 +110,26 @@ Before [compaction](/concepts/compaction) summarizes your conversation, OpenClaw
 runs a silent turn that reminds the agent to save important context to memory
 files. This is on by default — you do not need to configure anything.

+To keep that housekeeping turn on a local model, set an exact memory-flush model
+override:
+
+```json
+{
+  "agents": {
+    "defaults": {
+      "compaction": {
+        "memoryFlush": {
+          "model": "ollama/qwen3:8b"
+        }
+      }
+    }
+  }
+}
+```
+
+The override applies only to the memory-flush turn and does not inherit the
+active session fallback chain.
+
 <Tip>
 The memory flush prevents context loss during compaction. If your agent has
 important facts in the conversation that are not yet written to a file, they
--- a/docs/gateway/config-agents.md
+++ b/docs/gateway/config-agents.md
@@ -559,6 +559,7 @@ Periodic heartbeat runs.
        notifyUser: true, // send brief notices when compaction starts and completes (default: false)
        memoryFlush: {
          enabled: true,
+          model: "ollama/qwen3:8b", // optional memory-flush-only model override
          softThresholdTokens: 6000,
          systemPrompt: "Session nearing compaction. Store durable memories now.",
          prompt: "Write any lasting notes to memory/YYYY-MM-DD.md; reply with the exact silent token NO_REPLY if nothing to store.",
@@ -580,7 +581,7 @@ Periodic heartbeat runs.
 - `model`: optional `provider/model-id` override for compaction summarization only. Use this when the main session should keep one model but compaction summaries should run on another; when unset, compaction uses the session's primary model.
 - `maxActiveTranscriptBytes`: optional byte threshold (`number` or strings like `"20mb"`) that triggers normal local compaction before a run when the active JSONL grows past the threshold. Requires `truncateAfterCompaction` so successful compaction can rotate to a smaller successor transcript. Disabled when unset or `0`.
 - `notifyUser`: when `true`, sends brief notices to the user when compaction starts and when it completes (for example, "Compacting context..." and "Compaction complete"). Disabled by default to keep compaction silent.
- `memoryFlush`: silent agentic turn before auto-compaction to store durable memories. Skipped when workspace is read-only.
+- `memoryFlush`: silent agentic turn before auto-compaction to store durable memories. Set `model` to an exact provider/model such as `ollama/qwen3:8b` when this housekeeping turn should stay on a local model; the override does not inherit the active session fallback chain. Skipped when workspace is read-only.

 ### `agents.defaults.contextPruning`

--- a/docs/plugins/sdk-overview.md
+++ b/docs/plugins/sdk-overview.md
@@ -273,6 +273,9 @@ AI CLI backend such as `codex-cli`.
  memory plugin's private layout.
 - `registerMemoryPromptSection`, `registerMemoryFlushPlan`, and
  `registerMemoryRuntime` are legacy-compatible exclusive memory-plugin APIs.
+- `MemoryFlushPlan.model` can pin the flush turn to an exact `provider/model`
+  reference, such as `ollama/qwen3:8b`, without inheriting the active fallback
+  chain.
 - `registerMemoryEmbeddingProvider` lets the active memory plugin register one
  or more embedding adapter ids (for example `openai`, `gemini`, or a custom
  plugin-defined id).
--- a/docs/reference/session-management-compaction.md
+++ b/docs/reference/session-management-compaction.md
@@ -381,6 +381,7 @@ OpenClaw uses the **pre-threshold flush** approach:
 Config (`agents.defaults.compaction.memoryFlush`):

 - `enabled` (default: `true`)
+- `model` (optional exact provider/model override for the flush turn, for example `ollama/qwen3:8b`)
 - `softThresholdTokens` (default: `4000`)
 - `prompt` (user message for the flush turn)
 - `systemPrompt` (extra system prompt appended for the flush turn)
@@ -389,6 +390,9 @@ Notes:

 - The default prompt/system prompt include a `NO_REPLY` hint to suppress
  delivery.
+- When `model` is set, the flush turn uses that model without inheriting the
+  active session fallback chain, so local-only housekeeping does not silently
+  fall back to a paid conversation model.
 - The flush runs once per compaction cycle (tracked in `sessions.json`).
 - The flush runs only for embedded Pi sessions (CLI backends skip it).
 - The flush is skipped when the session workspace is read-only (`workspaceAccess: "ro"` or `"none"`).
--- a/extensions/memory-core/index.test.ts
+++ b/extensions/memory-core/index.test.ts
@@ -134,6 +134,24 @@ describe("buildMemoryFlushPlan", () => {
    ).toBeNull();
  });

+  it("carries configured memory flush model override", () => {
+    const plan = buildMemoryFlushPlan({
+      cfg: {
+        agents: {
+          defaults: {
+            compaction: {
+              memoryFlush: {
+                model: "ollama/qwen3:8b",
+              },
+            },
+          },
+        },
+      },
+    });
+
+    expect(plan?.model).toBe("ollama/qwen3:8b");
+  });
+
  it("falls back to defaults when numeric values are invalid", () => {
    const plan = buildMemoryFlushPlan({
      cfg: {
--- a/extensions/memory-core/src/flush-plan.ts
+++ b/extensions/memory-core/src/flush-plan.ts
@@ -132,6 +132,7 @@ export function buildMemoryFlushPlan(
    softThresholdTokens,
    forceFlushTranscriptBytes,
    reserveTokensFloor,
+    model: defaults?.model?.trim() || undefined,
    prompt: appendCurrentTimeLine(promptBase.replaceAll("YYYY-MM-DD", dateStamp), timeLine),
    systemPrompt: systemPrompt.replaceAll("YYYY-MM-DD", dateStamp),
    relativePath,
--- a/src/auto-reply/reply/agent-runner-memory.test.ts
+++ b/src/auto-reply/reply/agent-runner-memory.test.ts
@@ -176,6 +176,68 @@ describe("runMemoryFlushIfNeeded", () => {
    expect(persisted.main.memoryFlushAt).toBe(1_700_000_000_000);
  });

+  it("runs memory flush on the configured maintenance model without active fallbacks", async () => {
+    registerMemoryFlushPlanResolver(() => ({
+      softThresholdTokens: 4_000,
+      forceFlushTranscriptBytes: 1_000_000_000,
+      reserveTokensFloor: 20_000,
+      model: "ollama/qwen3:8b",
+      prompt: "Pre-compaction memory flush.\nNO_REPLY",
+      systemPrompt: "Write memory to memory/YYYY-MM-DD.md.",
+      relativePath: "memory/2023-11-14.md",
+    }));
+    const sessionEntry: SessionEntry = {
+      sessionId: "session",
+      updatedAt: Date.now(),
+      totalTokens: 80_000,
+      compactionCount: 1,
+    };
+
+    await runMemoryFlushIfNeeded({
+      cfg: {
+        agents: {
+          defaults: {
+            model: {
+              primary: "anthropic/claude",
+              fallbacks: ["openai/gpt-5.4"],
+            },
+            compaction: {
+              memoryFlush: {
+                model: "ollama/qwen3:8b",
+              },
+            },
+          },
+        },
+      },
+      followupRun: createTestFollowupRun({ provider: "anthropic", model: "claude" }),
+      sessionCtx: { Provider: "whatsapp" } as unknown as TemplateContext,
+      defaultModel: "anthropic/claude",
+      agentCfgContextTokens: 100_000,
+      resolvedVerboseLevel: "off",
+      sessionEntry,
+      sessionStore: { main: sessionEntry },
+      sessionKey: "main",
+      isHeartbeat: false,
+      replyOperation: createReplyOperation(),
+    });
+
+    expect(runWithModelFallbackMock).toHaveBeenCalledWith(
+      expect.objectContaining({
+        provider: "ollama",
+        model: "qwen3:8b",
+        fallbacksOverride: [],
+      }),
+    );
+    expect(runEmbeddedPiAgentMock).toHaveBeenCalledWith(
+      expect.objectContaining({
+        provider: "ollama",
+        model: "qwen3:8b",
+        authProfileId: undefined,
+        authProfileIdSource: undefined,
+      }),
+    );
+  });
+
  it("skips memory flush for CLI providers", async () => {
    const sessionEntry: SessionEntry = {
      sessionId: "session",
--- a/src/auto-reply/reply/agent-runner-memory.ts
+++ b/src/auto-reply/reply/agent-runner-memory.ts
@@ -123,6 +123,38 @@ export function resolveEffectivePromptTokens(
  return base + output + estimate;
 }

+export function resolveMemoryFlushModelFallbackOptions(
+  run: FollowupRun["run"],
+  model?: string,
+  configOverride: FollowupRun["run"]["config"] = run.config,
+) {
+  const options = resolveModelFallbackOptions(run, configOverride);
+  const override = normalizeOptionalString(model);
+  if (!override) {
+    return options;
+  }
+  // A memory-flush maintenance model is an exact override: do not let a failed
+  // local flush silently fall through to the paid active conversation fallback.
+  const slashIdx = override.indexOf("/");
+  if (slashIdx > 0) {
+    const overrideProvider = override.slice(0, slashIdx).trim();
+    const overrideModel = override.slice(slashIdx + 1).trim();
+    if (overrideProvider && overrideModel) {
+      return {
+        ...options,
+        provider: overrideProvider,
+        model: overrideModel,
+        fallbacksOverride: [],
+      };
+    }
+  }
+  return {
+    ...options,
+    model: override,
+    fallbacksOverride: [],
+  };
+}
+
 export type SessionTranscriptUsageSnapshot = {
  promptTokens?: number;
  outputTokens?: number;
@@ -796,7 +828,11 @@ export async function runMemoryFlushIfNeeded(params: {
  let postCompactionSessionFile: string | undefined;
  try {
    await memoryDeps.runWithModelFallback({
-      ...resolveModelFallbackOptions(params.followupRun.run),
+      ...resolveMemoryFlushModelFallbackOptions(
+        params.followupRun.run,
+        activeMemoryFlushPlan.model,
+        params.cfg,
+      ),
      runId: flushRunId,
      run: async (provider, model, runOptions) => {
        const { embeddedContext, senderContext, runBaseParams } = buildEmbeddedRunExecutionParams({
--- a/src/config/config.compaction-settings.test.ts
+++ b/src/config/config.compaction-settings.test.ts
@@ -28,6 +28,7 @@ describe("config compaction settings", () => {
      },
      memoryFlush: {
        enabled: false,
+        model: "ollama/qwen3:8b",
        softThresholdTokens: 1234,
        prompt: "Write notes.",
        systemPrompt: "Flush memory now.",
@@ -44,6 +45,7 @@ describe("config compaction settings", () => {
    expect(compaction?.qualityGuard?.enabled).toBe(true);
    expect(compaction?.qualityGuard?.maxRetries).toBe(2);
    expect(compaction?.memoryFlush?.enabled).toBe(false);
+    expect(compaction?.memoryFlush?.model).toBe("ollama/qwen3:8b");
    expect(compaction?.memoryFlush?.softThresholdTokens).toBe(1234);
    expect(compaction?.memoryFlush?.prompt).toBe("Write notes.");
    expect(compaction?.memoryFlush?.systemPrompt).toBe("Flush memory now.");
--- a/src/config/schema.base.generated.ts
+++ b/src/config/schema.base.generated.ts
@@ -5010,6 +5010,12 @@ export const GENERATED_BASE_CONFIG_SCHEMA: BaseConfigSchemaResponse = {
                        description:
                          "Enables pre-compaction memory flush before the runtime performs stronger history reduction near token limits. Keep enabled unless you intentionally disable memory side effects in constrained environments.",
                      },
+                      model: {
+                        type: "string",
+                        title: "Compaction Memory Flush Model Override",
+                        description:
+                          "Optional provider/model override used only for pre-compaction memory flush turns. Set this to a local model such as ollama/qwen3:8b when durable memory extraction should avoid the active session's paid model. The override is exact and does not inherit the active model fallback chain.",
+                      },
                      softThresholdTokens: {
                        type: "integer",
                        minimum: 0,
@@ -27030,6 +27036,11 @@ export const GENERATED_BASE_CONFIG_SCHEMA: BaseConfigSchemaResponse = {
      help: "Enables pre-compaction memory flush before the runtime performs stronger history reduction near token limits. Keep enabled unless you intentionally disable memory side effects in constrained environments.",
      tags: ["advanced"],
    },
+    "agents.defaults.compaction.memoryFlush.model": {
+      label: "Compaction Memory Flush Model Override",
+      help: "Optional provider/model override used only for pre-compaction memory flush turns. Set this to a local model such as ollama/qwen3:8b when durable memory extraction should avoid the active session's paid model. The override is exact and does not inherit the active model fallback chain.",
+      tags: ["models"],
+    },
    "agents.defaults.compaction.memoryFlush.softThresholdTokens": {
      label: "Compaction Memory Flush Soft Threshold",
      help: "Threshold distance to compaction (in tokens) that triggers pre-compaction memory flush execution. Use earlier thresholds for safer persistence, or tighter thresholds for lower flush frequency.",
--- a/src/config/schema.help.quality.test.ts
+++ b/src/config/schema.help.quality.test.ts
@@ -399,6 +399,7 @@ const TARGET_KEYS = [
  "agents.defaults.compaction.maxActiveTranscriptBytes",
  "agents.defaults.compaction.memoryFlush",
  "agents.defaults.compaction.memoryFlush.enabled",
+  "agents.defaults.compaction.memoryFlush.model",
  "agents.defaults.compaction.memoryFlush.softThresholdTokens",
  "agents.defaults.compaction.memoryFlush.prompt",
  "agents.defaults.compaction.memoryFlush.systemPrompt",
--- a/src/config/schema.help.ts
+++ b/src/config/schema.help.ts
@@ -1291,6 +1291,8 @@ export const FIELD_HELP: Record<string, string> = {
    "Pre-compaction memory flush settings that run an agentic memory write before heavy compaction. Keep enabled for long sessions so salient context is persisted before aggressive trimming.",
  "agents.defaults.compaction.memoryFlush.enabled":
    "Enables pre-compaction memory flush before the runtime performs stronger history reduction near token limits. Keep enabled unless you intentionally disable memory side effects in constrained environments.",
+  "agents.defaults.compaction.memoryFlush.model":
+    "Optional provider/model override used only for pre-compaction memory flush turns. Set this to a local model such as ollama/qwen3:8b when durable memory extraction should avoid the active session's paid model. The override is exact and does not inherit the active model fallback chain.",
  "agents.defaults.compaction.memoryFlush.softThresholdTokens":
    "Threshold distance to compaction (in tokens) that triggers pre-compaction memory flush execution. Use earlier thresholds for safer persistence, or tighter thresholds for lower flush frequency.",
  "agents.defaults.compaction.memoryFlush.forceFlushTranscriptBytes":
--- a/src/config/schema.labels.ts
+++ b/src/config/schema.labels.ts
@@ -609,6 +609,7 @@ export const FIELD_LABELS: Record<string, string> = {
  "agents.defaults.compaction.notifyUser": "Compaction Notify User",
  "agents.defaults.compaction.memoryFlush": "Compaction Memory Flush",
  "agents.defaults.compaction.memoryFlush.enabled": "Compaction Memory Flush Enabled",
+  "agents.defaults.compaction.memoryFlush.model": "Compaction Memory Flush Model Override",
  "agents.defaults.compaction.memoryFlush.softThresholdTokens":
    "Compaction Memory Flush Soft Threshold",
  "agents.defaults.compaction.memoryFlush.forceFlushTranscriptBytes":
--- a/src/config/types.agent-defaults.ts
+++ b/src/config/types.agent-defaults.ts
@@ -493,6 +493,8 @@ export type AgentCompactionConfig = {
 export type AgentCompactionMemoryFlushConfig = {
  /** Enable the pre-compaction memory flush (default: true). */
  enabled?: boolean;
+  /** Optional provider/model override used only for pre-compaction memory flush turns. */
+  model?: string;
  /** Run the memory flush when context is within this many tokens of the compaction threshold. */
  softThresholdTokens?: number;
  /**
--- a/src/config/zod-schema.agent-defaults.ts
+++ b/src/config/zod-schema.agent-defaults.ts
@@ -189,6 +189,7 @@ export const AgentDefaultsSchema = z
        memoryFlush: z
          .object({
            enabled: z.boolean().optional(),
+            model: z.string().optional(),
            softThresholdTokens: z.number().int().nonnegative().optional(),
            forceFlushTranscriptBytes: NonNegativeByteSizeSchema.optional(),
            prompt: z.string().optional(),
--- a/src/plugins/memory-state.ts
+++ b/src/plugins/memory-state.ts
@@ -68,6 +68,7 @@ export type MemoryFlushPlan = {
  softThresholdTokens: number;
  forceFlushTranscriptBytes: number;
  reserveTokensFloor: number;
+  model?: string;
  prompt: string;
  systemPrompt: string;
  relativePath: string;