fix: allow memory flush model override

This commit is contained in:
Peter Steinberger
2026-04-28 05:25:42 +01:00
parent dc3df62e67
commit 540cbe24be
18 changed files with 186 additions and 3 deletions

View File

@@ -46,6 +46,7 @@ Docs: https://docs.openclaw.ai
- Export/session: keep inline export HTML scripts and vendor libraries injected after template formatting so generated session exports open with the app code, markdown renderer, and syntax highlighter present. Fixes #41862 and #49957; carries forward #41861 and #68947. Thanks @briannewman, @martenzi, and @armanddp.
- Agents/ACPX: stage the patched Claude ACP adapter as an ACPX runtime dependency and route known Codex/Claude ACP commands through local wrappers, so Gateway runtime no longer depends on live `npx` adapter resolution. Fixes #73202. Thanks @joerod26.
- Memory/compaction: let pre-compaction memory flush use an exact `agents.defaults.compaction.memoryFlush.model` override such as `ollama/qwen3:8b` without inheriting the active session fallback chain, so local housekeeping can avoid paid conversation models. Fixes #53772. Thanks @limen96.
- Gateway/hooks: route non-delivered hook completion and error summaries to the target agent's main session instead of the default agent session, preserving multi-agent hook isolation. Fixes #24693; carries forward #68667. Thanks @abersonFAC and @bluesky6868.
- Control UI/models: request the configured Gateway model-list view so dashboards with only `models.providers.*.models` show those configured models first instead of flooding the picker with the full built-in catalog. Fixes #65405. Thanks @wbyanclaw.
- CLI/models: keep default-model and allowlist pickers on explicit `models.providers.*.models` entries when `models.mode` is `replace` instead of loading the full built-in catalog. Fixes #64950. Thanks @mrozentsvayg.

View File

@@ -132,7 +132,23 @@ By default, compaction runs silently. Set `notifyUser` to show brief status mess
### Memory flush
Before compaction, OpenClaw can run a **silent memory flush** turn to store durable notes to disk. See [Memory](/concepts/memory) for details and config.
Before compaction, OpenClaw can run a **silent memory flush** turn to store durable notes to disk. Set `agents.defaults.compaction.memoryFlush.model` when this housekeeping turn should use a local model instead of the active conversation model:
```json
{
"agents": {
"defaults": {
"compaction": {
"memoryFlush": {
"model": "ollama/qwen3:8b"
}
}
}
}
}
```
The memory-flush model override is exact and does not inherit the active session fallback chain. See [Memory](/concepts/memory) for details and config.
## Pluggable compaction providers

View File

@@ -110,6 +110,26 @@ Before [compaction](/concepts/compaction) summarizes your conversation, OpenClaw
runs a silent turn that reminds the agent to save important context to memory
files. This is on by default — you do not need to configure anything.
To keep that housekeeping turn on a local model, set an exact memory-flush model
override:
```json
{
"agents": {
"defaults": {
"compaction": {
"memoryFlush": {
"model": "ollama/qwen3:8b"
}
}
}
}
}
```
The override applies only to the memory-flush turn and does not inherit the
active session fallback chain.
<Tip>
The memory flush prevents context loss during compaction. If your agent has
important facts in the conversation that are not yet written to a file, they

View File

@@ -559,6 +559,7 @@ Periodic heartbeat runs.
notifyUser: true, // send brief notices when compaction starts and completes (default: false)
memoryFlush: {
enabled: true,
model: "ollama/qwen3:8b", // optional memory-flush-only model override
softThresholdTokens: 6000,
systemPrompt: "Session nearing compaction. Store durable memories now.",
prompt: "Write any lasting notes to memory/YYYY-MM-DD.md; reply with the exact silent token NO_REPLY if nothing to store.",
@@ -580,7 +581,7 @@ Periodic heartbeat runs.
- `model`: optional `provider/model-id` override for compaction summarization only. Use this when the main session should keep one model but compaction summaries should run on another; when unset, compaction uses the session's primary model.
- `maxActiveTranscriptBytes`: optional byte threshold (`number` or strings like `"20mb"`) that triggers normal local compaction before a run when the active JSONL grows past the threshold. Requires `truncateAfterCompaction` so successful compaction can rotate to a smaller successor transcript. Disabled when unset or `0`.
- `notifyUser`: when `true`, sends brief notices to the user when compaction starts and when it completes (for example, "Compacting context..." and "Compaction complete"). Disabled by default to keep compaction silent.
- `memoryFlush`: silent agentic turn before auto-compaction to store durable memories. Skipped when workspace is read-only.
- `memoryFlush`: silent agentic turn before auto-compaction to store durable memories. Set `model` to an exact provider/model such as `ollama/qwen3:8b` when this housekeeping turn should stay on a local model; the override does not inherit the active session fallback chain. Skipped when workspace is read-only.
### `agents.defaults.contextPruning`

View File

@@ -273,6 +273,9 @@ AI CLI backend such as `codex-cli`.
memory plugin's private layout.
- `registerMemoryPromptSection`, `registerMemoryFlushPlan`, and
`registerMemoryRuntime` are legacy-compatible exclusive memory-plugin APIs.
- `MemoryFlushPlan.model` can pin the flush turn to an exact `provider/model`
reference, such as `ollama/qwen3:8b`, without inheriting the active fallback
chain.
- `registerMemoryEmbeddingProvider` lets the active memory plugin register one
or more embedding adapter ids (for example `openai`, `gemini`, or a custom
plugin-defined id).

View File

@@ -381,6 +381,7 @@ OpenClaw uses the **pre-threshold flush** approach:
Config (`agents.defaults.compaction.memoryFlush`):
- `enabled` (default: `true`)
- `model` (optional exact provider/model override for the flush turn, for example `ollama/qwen3:8b`)
- `softThresholdTokens` (default: `4000`)
- `prompt` (user message for the flush turn)
- `systemPrompt` (extra system prompt appended for the flush turn)
@@ -389,6 +390,9 @@ Notes:
- The default prompt/system prompt include a `NO_REPLY` hint to suppress
delivery.
- When `model` is set, the flush turn uses that model without inheriting the
active session fallback chain, so local-only housekeeping does not silently
fall back to a paid conversation model.
- The flush runs once per compaction cycle (tracked in `sessions.json`).
- The flush runs only for embedded Pi sessions (CLI backends skip it).
- The flush is skipped when the session workspace is read-only (`workspaceAccess: "ro"` or `"none"`).

View File

@@ -134,6 +134,24 @@ describe("buildMemoryFlushPlan", () => {
).toBeNull();
});
it("carries configured memory flush model override", () => {
const plan = buildMemoryFlushPlan({
cfg: {
agents: {
defaults: {
compaction: {
memoryFlush: {
model: "ollama/qwen3:8b",
},
},
},
},
},
});
expect(plan?.model).toBe("ollama/qwen3:8b");
});
it("falls back to defaults when numeric values are invalid", () => {
const plan = buildMemoryFlushPlan({
cfg: {

View File

@@ -132,6 +132,7 @@ export function buildMemoryFlushPlan(
softThresholdTokens,
forceFlushTranscriptBytes,
reserveTokensFloor,
model: defaults?.model?.trim() || undefined,
prompt: appendCurrentTimeLine(promptBase.replaceAll("YYYY-MM-DD", dateStamp), timeLine),
systemPrompt: systemPrompt.replaceAll("YYYY-MM-DD", dateStamp),
relativePath,

View File

@@ -176,6 +176,68 @@ describe("runMemoryFlushIfNeeded", () => {
expect(persisted.main.memoryFlushAt).toBe(1_700_000_000_000);
});
it("runs memory flush on the configured maintenance model without active fallbacks", async () => {
registerMemoryFlushPlanResolver(() => ({
softThresholdTokens: 4_000,
forceFlushTranscriptBytes: 1_000_000_000,
reserveTokensFloor: 20_000,
model: "ollama/qwen3:8b",
prompt: "Pre-compaction memory flush.\nNO_REPLY",
systemPrompt: "Write memory to memory/YYYY-MM-DD.md.",
relativePath: "memory/2023-11-14.md",
}));
const sessionEntry: SessionEntry = {
sessionId: "session",
updatedAt: Date.now(),
totalTokens: 80_000,
compactionCount: 1,
};
await runMemoryFlushIfNeeded({
cfg: {
agents: {
defaults: {
model: {
primary: "anthropic/claude",
fallbacks: ["openai/gpt-5.4"],
},
compaction: {
memoryFlush: {
model: "ollama/qwen3:8b",
},
},
},
},
},
followupRun: createTestFollowupRun({ provider: "anthropic", model: "claude" }),
sessionCtx: { Provider: "whatsapp" } as unknown as TemplateContext,
defaultModel: "anthropic/claude",
agentCfgContextTokens: 100_000,
resolvedVerboseLevel: "off",
sessionEntry,
sessionStore: { main: sessionEntry },
sessionKey: "main",
isHeartbeat: false,
replyOperation: createReplyOperation(),
});
expect(runWithModelFallbackMock).toHaveBeenCalledWith(
expect.objectContaining({
provider: "ollama",
model: "qwen3:8b",
fallbacksOverride: [],
}),
);
expect(runEmbeddedPiAgentMock).toHaveBeenCalledWith(
expect.objectContaining({
provider: "ollama",
model: "qwen3:8b",
authProfileId: undefined,
authProfileIdSource: undefined,
}),
);
});
it("skips memory flush for CLI providers", async () => {
const sessionEntry: SessionEntry = {
sessionId: "session",

View File

@@ -123,6 +123,38 @@ export function resolveEffectivePromptTokens(
return base + output + estimate;
}
export function resolveMemoryFlushModelFallbackOptions(
run: FollowupRun["run"],
model?: string,
configOverride: FollowupRun["run"]["config"] = run.config,
) {
const options = resolveModelFallbackOptions(run, configOverride);
const override = normalizeOptionalString(model);
if (!override) {
return options;
}
// A memory-flush maintenance model is an exact override: do not let a failed
// local flush silently fall through to the paid active conversation fallback.
const slashIdx = override.indexOf("/");
if (slashIdx > 0) {
const overrideProvider = override.slice(0, slashIdx).trim();
const overrideModel = override.slice(slashIdx + 1).trim();
if (overrideProvider && overrideModel) {
return {
...options,
provider: overrideProvider,
model: overrideModel,
fallbacksOverride: [],
};
}
}
return {
...options,
model: override,
fallbacksOverride: [],
};
}
export type SessionTranscriptUsageSnapshot = {
promptTokens?: number;
outputTokens?: number;
@@ -796,7 +828,11 @@ export async function runMemoryFlushIfNeeded(params: {
let postCompactionSessionFile: string | undefined;
try {
await memoryDeps.runWithModelFallback({
...resolveModelFallbackOptions(params.followupRun.run),
...resolveMemoryFlushModelFallbackOptions(
params.followupRun.run,
activeMemoryFlushPlan.model,
params.cfg,
),
runId: flushRunId,
run: async (provider, model, runOptions) => {
const { embeddedContext, senderContext, runBaseParams } = buildEmbeddedRunExecutionParams({

View File

@@ -28,6 +28,7 @@ describe("config compaction settings", () => {
},
memoryFlush: {
enabled: false,
model: "ollama/qwen3:8b",
softThresholdTokens: 1234,
prompt: "Write notes.",
systemPrompt: "Flush memory now.",
@@ -44,6 +45,7 @@ describe("config compaction settings", () => {
expect(compaction?.qualityGuard?.enabled).toBe(true);
expect(compaction?.qualityGuard?.maxRetries).toBe(2);
expect(compaction?.memoryFlush?.enabled).toBe(false);
expect(compaction?.memoryFlush?.model).toBe("ollama/qwen3:8b");
expect(compaction?.memoryFlush?.softThresholdTokens).toBe(1234);
expect(compaction?.memoryFlush?.prompt).toBe("Write notes.");
expect(compaction?.memoryFlush?.systemPrompt).toBe("Flush memory now.");

View File

@@ -5010,6 +5010,12 @@ export const GENERATED_BASE_CONFIG_SCHEMA: BaseConfigSchemaResponse = {
description:
"Enables pre-compaction memory flush before the runtime performs stronger history reduction near token limits. Keep enabled unless you intentionally disable memory side effects in constrained environments.",
},
model: {
type: "string",
title: "Compaction Memory Flush Model Override",
description:
"Optional provider/model override used only for pre-compaction memory flush turns. Set this to a local model such as ollama/qwen3:8b when durable memory extraction should avoid the active session's paid model. The override is exact and does not inherit the active model fallback chain.",
},
softThresholdTokens: {
type: "integer",
minimum: 0,
@@ -27030,6 +27036,11 @@ export const GENERATED_BASE_CONFIG_SCHEMA: BaseConfigSchemaResponse = {
help: "Enables pre-compaction memory flush before the runtime performs stronger history reduction near token limits. Keep enabled unless you intentionally disable memory side effects in constrained environments.",
tags: ["advanced"],
},
"agents.defaults.compaction.memoryFlush.model": {
label: "Compaction Memory Flush Model Override",
help: "Optional provider/model override used only for pre-compaction memory flush turns. Set this to a local model such as ollama/qwen3:8b when durable memory extraction should avoid the active session's paid model. The override is exact and does not inherit the active model fallback chain.",
tags: ["models"],
},
"agents.defaults.compaction.memoryFlush.softThresholdTokens": {
label: "Compaction Memory Flush Soft Threshold",
help: "Threshold distance to compaction (in tokens) that triggers pre-compaction memory flush execution. Use earlier thresholds for safer persistence, or tighter thresholds for lower flush frequency.",

View File

@@ -399,6 +399,7 @@ const TARGET_KEYS = [
"agents.defaults.compaction.maxActiveTranscriptBytes",
"agents.defaults.compaction.memoryFlush",
"agents.defaults.compaction.memoryFlush.enabled",
"agents.defaults.compaction.memoryFlush.model",
"agents.defaults.compaction.memoryFlush.softThresholdTokens",
"agents.defaults.compaction.memoryFlush.prompt",
"agents.defaults.compaction.memoryFlush.systemPrompt",

View File

@@ -1291,6 +1291,8 @@ export const FIELD_HELP: Record<string, string> = {
"Pre-compaction memory flush settings that run an agentic memory write before heavy compaction. Keep enabled for long sessions so salient context is persisted before aggressive trimming.",
"agents.defaults.compaction.memoryFlush.enabled":
"Enables pre-compaction memory flush before the runtime performs stronger history reduction near token limits. Keep enabled unless you intentionally disable memory side effects in constrained environments.",
"agents.defaults.compaction.memoryFlush.model":
"Optional provider/model override used only for pre-compaction memory flush turns. Set this to a local model such as ollama/qwen3:8b when durable memory extraction should avoid the active session's paid model. The override is exact and does not inherit the active model fallback chain.",
"agents.defaults.compaction.memoryFlush.softThresholdTokens":
"Threshold distance to compaction (in tokens) that triggers pre-compaction memory flush execution. Use earlier thresholds for safer persistence, or tighter thresholds for lower flush frequency.",
"agents.defaults.compaction.memoryFlush.forceFlushTranscriptBytes":

View File

@@ -609,6 +609,7 @@ export const FIELD_LABELS: Record<string, string> = {
"agents.defaults.compaction.notifyUser": "Compaction Notify User",
"agents.defaults.compaction.memoryFlush": "Compaction Memory Flush",
"agents.defaults.compaction.memoryFlush.enabled": "Compaction Memory Flush Enabled",
"agents.defaults.compaction.memoryFlush.model": "Compaction Memory Flush Model Override",
"agents.defaults.compaction.memoryFlush.softThresholdTokens":
"Compaction Memory Flush Soft Threshold",
"agents.defaults.compaction.memoryFlush.forceFlushTranscriptBytes":

View File

@@ -493,6 +493,8 @@ export type AgentCompactionConfig = {
export type AgentCompactionMemoryFlushConfig = {
/** Enable the pre-compaction memory flush (default: true). */
enabled?: boolean;
/** Optional provider/model override used only for pre-compaction memory flush turns. */
model?: string;
/** Run the memory flush when context is within this many tokens of the compaction threshold. */
softThresholdTokens?: number;
/**

View File

@@ -189,6 +189,7 @@ export const AgentDefaultsSchema = z
memoryFlush: z
.object({
enabled: z.boolean().optional(),
model: z.string().optional(),
softThresholdTokens: z.number().int().nonnegative().optional(),
forceFlushTranscriptBytes: NonNegativeByteSizeSchema.optional(),
prompt: z.string().optional(),

View File

@@ -68,6 +68,7 @@ export type MemoryFlushPlan = {
softThresholdTokens: number;
forceFlushTranscriptBytes: number;
reserveTokensFloor: number;
model?: string;
prompt: string;
systemPrompt: string;
relativePath: string;