mirror of
https://fastgit.cc/github.com/openclaw/openclaw
synced 2026-04-30 14:02:56 +08:00
fix: allow memory flush model override
This commit is contained in:
@@ -46,6 +46,7 @@ Docs: https://docs.openclaw.ai
|
||||
|
||||
- Export/session: keep inline export HTML scripts and vendor libraries injected after template formatting so generated session exports open with the app code, markdown renderer, and syntax highlighter present. Fixes #41862 and #49957; carries forward #41861 and #68947. Thanks @briannewman, @martenzi, and @armanddp.
|
||||
- Agents/ACPX: stage the patched Claude ACP adapter as an ACPX runtime dependency and route known Codex/Claude ACP commands through local wrappers, so Gateway runtime no longer depends on live `npx` adapter resolution. Fixes #73202. Thanks @joerod26.
|
||||
- Memory/compaction: let pre-compaction memory flush use an exact `agents.defaults.compaction.memoryFlush.model` override such as `ollama/qwen3:8b` without inheriting the active session fallback chain, so local housekeeping can avoid paid conversation models. Fixes #53772. Thanks @limen96.
|
||||
- Gateway/hooks: route non-delivered hook completion and error summaries to the target agent's main session instead of the default agent session, preserving multi-agent hook isolation. Fixes #24693; carries forward #68667. Thanks @abersonFAC and @bluesky6868.
|
||||
- Control UI/models: request the configured Gateway model-list view so dashboards with only `models.providers.*.models` show those configured models first instead of flooding the picker with the full built-in catalog. Fixes #65405. Thanks @wbyanclaw.
|
||||
- CLI/models: keep default-model and allowlist pickers on explicit `models.providers.*.models` entries when `models.mode` is `replace` instead of loading the full built-in catalog. Fixes #64950. Thanks @mrozentsvayg.
|
||||
|
||||
@@ -132,7 +132,23 @@ By default, compaction runs silently. Set `notifyUser` to show brief status mess
|
||||
|
||||
### Memory flush
|
||||
|
||||
Before compaction, OpenClaw can run a **silent memory flush** turn to store durable notes to disk. See [Memory](/concepts/memory) for details and config.
|
||||
Before compaction, OpenClaw can run a **silent memory flush** turn to store durable notes to disk. Set `agents.defaults.compaction.memoryFlush.model` when this housekeeping turn should use a local model instead of the active conversation model:
|
||||
|
||||
```json
|
||||
{
|
||||
"agents": {
|
||||
"defaults": {
|
||||
"compaction": {
|
||||
"memoryFlush": {
|
||||
"model": "ollama/qwen3:8b"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
The memory-flush model override is exact and does not inherit the active session fallback chain. See [Memory](/concepts/memory) for details and config.
|
||||
|
||||
## Pluggable compaction providers
|
||||
|
||||
|
||||
@@ -110,6 +110,26 @@ Before [compaction](/concepts/compaction) summarizes your conversation, OpenClaw
|
||||
runs a silent turn that reminds the agent to save important context to memory
|
||||
files. This is on by default — you do not need to configure anything.
|
||||
|
||||
To keep that housekeeping turn on a local model, set an exact memory-flush model
|
||||
override:
|
||||
|
||||
```json
|
||||
{
|
||||
"agents": {
|
||||
"defaults": {
|
||||
"compaction": {
|
||||
"memoryFlush": {
|
||||
"model": "ollama/qwen3:8b"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
The override applies only to the memory-flush turn and does not inherit the
|
||||
active session fallback chain.
|
||||
|
||||
<Tip>
|
||||
The memory flush prevents context loss during compaction. If your agent has
|
||||
important facts in the conversation that are not yet written to a file, they
|
||||
|
||||
@@ -559,6 +559,7 @@ Periodic heartbeat runs.
|
||||
notifyUser: true, // send brief notices when compaction starts and completes (default: false)
|
||||
memoryFlush: {
|
||||
enabled: true,
|
||||
model: "ollama/qwen3:8b", // optional memory-flush-only model override
|
||||
softThresholdTokens: 6000,
|
||||
systemPrompt: "Session nearing compaction. Store durable memories now.",
|
||||
prompt: "Write any lasting notes to memory/YYYY-MM-DD.md; reply with the exact silent token NO_REPLY if nothing to store.",
|
||||
@@ -580,7 +581,7 @@ Periodic heartbeat runs.
|
||||
- `model`: optional `provider/model-id` override for compaction summarization only. Use this when the main session should keep one model but compaction summaries should run on another; when unset, compaction uses the session's primary model.
|
||||
- `maxActiveTranscriptBytes`: optional byte threshold (`number` or strings like `"20mb"`) that triggers normal local compaction before a run when the active JSONL grows past the threshold. Requires `truncateAfterCompaction` so successful compaction can rotate to a smaller successor transcript. Disabled when unset or `0`.
|
||||
- `notifyUser`: when `true`, sends brief notices to the user when compaction starts and when it completes (for example, "Compacting context..." and "Compaction complete"). Disabled by default to keep compaction silent.
|
||||
- `memoryFlush`: silent agentic turn before auto-compaction to store durable memories. Skipped when workspace is read-only.
|
||||
- `memoryFlush`: silent agentic turn before auto-compaction to store durable memories. Set `model` to an exact provider/model such as `ollama/qwen3:8b` when this housekeeping turn should stay on a local model; the override does not inherit the active session fallback chain. Skipped when workspace is read-only.
|
||||
|
||||
### `agents.defaults.contextPruning`
|
||||
|
||||
|
||||
@@ -273,6 +273,9 @@ AI CLI backend such as `codex-cli`.
|
||||
memory plugin's private layout.
|
||||
- `registerMemoryPromptSection`, `registerMemoryFlushPlan`, and
|
||||
`registerMemoryRuntime` are legacy-compatible exclusive memory-plugin APIs.
|
||||
- `MemoryFlushPlan.model` can pin the flush turn to an exact `provider/model`
|
||||
reference, such as `ollama/qwen3:8b`, without inheriting the active fallback
|
||||
chain.
|
||||
- `registerMemoryEmbeddingProvider` lets the active memory plugin register one
|
||||
or more embedding adapter ids (for example `openai`, `gemini`, or a custom
|
||||
plugin-defined id).
|
||||
|
||||
@@ -381,6 +381,7 @@ OpenClaw uses the **pre-threshold flush** approach:
|
||||
Config (`agents.defaults.compaction.memoryFlush`):
|
||||
|
||||
- `enabled` (default: `true`)
|
||||
- `model` (optional exact provider/model override for the flush turn, for example `ollama/qwen3:8b`)
|
||||
- `softThresholdTokens` (default: `4000`)
|
||||
- `prompt` (user message for the flush turn)
|
||||
- `systemPrompt` (extra system prompt appended for the flush turn)
|
||||
@@ -389,6 +390,9 @@ Notes:
|
||||
|
||||
- The default prompt/system prompt include a `NO_REPLY` hint to suppress
|
||||
delivery.
|
||||
- When `model` is set, the flush turn uses that model without inheriting the
|
||||
active session fallback chain, so local-only housekeeping does not silently
|
||||
fall back to a paid conversation model.
|
||||
- The flush runs once per compaction cycle (tracked in `sessions.json`).
|
||||
- The flush runs only for embedded Pi sessions (CLI backends skip it).
|
||||
- The flush is skipped when the session workspace is read-only (`workspaceAccess: "ro"` or `"none"`).
|
||||
|
||||
@@ -134,6 +134,24 @@ describe("buildMemoryFlushPlan", () => {
|
||||
).toBeNull();
|
||||
});
|
||||
|
||||
it("carries configured memory flush model override", () => {
|
||||
const plan = buildMemoryFlushPlan({
|
||||
cfg: {
|
||||
agents: {
|
||||
defaults: {
|
||||
compaction: {
|
||||
memoryFlush: {
|
||||
model: "ollama/qwen3:8b",
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
});
|
||||
|
||||
expect(plan?.model).toBe("ollama/qwen3:8b");
|
||||
});
|
||||
|
||||
it("falls back to defaults when numeric values are invalid", () => {
|
||||
const plan = buildMemoryFlushPlan({
|
||||
cfg: {
|
||||
|
||||
@@ -132,6 +132,7 @@ export function buildMemoryFlushPlan(
|
||||
softThresholdTokens,
|
||||
forceFlushTranscriptBytes,
|
||||
reserveTokensFloor,
|
||||
model: defaults?.model?.trim() || undefined,
|
||||
prompt: appendCurrentTimeLine(promptBase.replaceAll("YYYY-MM-DD", dateStamp), timeLine),
|
||||
systemPrompt: systemPrompt.replaceAll("YYYY-MM-DD", dateStamp),
|
||||
relativePath,
|
||||
|
||||
@@ -176,6 +176,68 @@ describe("runMemoryFlushIfNeeded", () => {
|
||||
expect(persisted.main.memoryFlushAt).toBe(1_700_000_000_000);
|
||||
});
|
||||
|
||||
it("runs memory flush on the configured maintenance model without active fallbacks", async () => {
|
||||
registerMemoryFlushPlanResolver(() => ({
|
||||
softThresholdTokens: 4_000,
|
||||
forceFlushTranscriptBytes: 1_000_000_000,
|
||||
reserveTokensFloor: 20_000,
|
||||
model: "ollama/qwen3:8b",
|
||||
prompt: "Pre-compaction memory flush.\nNO_REPLY",
|
||||
systemPrompt: "Write memory to memory/YYYY-MM-DD.md.",
|
||||
relativePath: "memory/2023-11-14.md",
|
||||
}));
|
||||
const sessionEntry: SessionEntry = {
|
||||
sessionId: "session",
|
||||
updatedAt: Date.now(),
|
||||
totalTokens: 80_000,
|
||||
compactionCount: 1,
|
||||
};
|
||||
|
||||
await runMemoryFlushIfNeeded({
|
||||
cfg: {
|
||||
agents: {
|
||||
defaults: {
|
||||
model: {
|
||||
primary: "anthropic/claude",
|
||||
fallbacks: ["openai/gpt-5.4"],
|
||||
},
|
||||
compaction: {
|
||||
memoryFlush: {
|
||||
model: "ollama/qwen3:8b",
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
followupRun: createTestFollowupRun({ provider: "anthropic", model: "claude" }),
|
||||
sessionCtx: { Provider: "whatsapp" } as unknown as TemplateContext,
|
||||
defaultModel: "anthropic/claude",
|
||||
agentCfgContextTokens: 100_000,
|
||||
resolvedVerboseLevel: "off",
|
||||
sessionEntry,
|
||||
sessionStore: { main: sessionEntry },
|
||||
sessionKey: "main",
|
||||
isHeartbeat: false,
|
||||
replyOperation: createReplyOperation(),
|
||||
});
|
||||
|
||||
expect(runWithModelFallbackMock).toHaveBeenCalledWith(
|
||||
expect.objectContaining({
|
||||
provider: "ollama",
|
||||
model: "qwen3:8b",
|
||||
fallbacksOverride: [],
|
||||
}),
|
||||
);
|
||||
expect(runEmbeddedPiAgentMock).toHaveBeenCalledWith(
|
||||
expect.objectContaining({
|
||||
provider: "ollama",
|
||||
model: "qwen3:8b",
|
||||
authProfileId: undefined,
|
||||
authProfileIdSource: undefined,
|
||||
}),
|
||||
);
|
||||
});
|
||||
|
||||
it("skips memory flush for CLI providers", async () => {
|
||||
const sessionEntry: SessionEntry = {
|
||||
sessionId: "session",
|
||||
|
||||
@@ -123,6 +123,38 @@ export function resolveEffectivePromptTokens(
|
||||
return base + output + estimate;
|
||||
}
|
||||
|
||||
export function resolveMemoryFlushModelFallbackOptions(
|
||||
run: FollowupRun["run"],
|
||||
model?: string,
|
||||
configOverride: FollowupRun["run"]["config"] = run.config,
|
||||
) {
|
||||
const options = resolveModelFallbackOptions(run, configOverride);
|
||||
const override = normalizeOptionalString(model);
|
||||
if (!override) {
|
||||
return options;
|
||||
}
|
||||
// A memory-flush maintenance model is an exact override: do not let a failed
|
||||
// local flush silently fall through to the paid active conversation fallback.
|
||||
const slashIdx = override.indexOf("/");
|
||||
if (slashIdx > 0) {
|
||||
const overrideProvider = override.slice(0, slashIdx).trim();
|
||||
const overrideModel = override.slice(slashIdx + 1).trim();
|
||||
if (overrideProvider && overrideModel) {
|
||||
return {
|
||||
...options,
|
||||
provider: overrideProvider,
|
||||
model: overrideModel,
|
||||
fallbacksOverride: [],
|
||||
};
|
||||
}
|
||||
}
|
||||
return {
|
||||
...options,
|
||||
model: override,
|
||||
fallbacksOverride: [],
|
||||
};
|
||||
}
|
||||
|
||||
export type SessionTranscriptUsageSnapshot = {
|
||||
promptTokens?: number;
|
||||
outputTokens?: number;
|
||||
@@ -796,7 +828,11 @@ export async function runMemoryFlushIfNeeded(params: {
|
||||
let postCompactionSessionFile: string | undefined;
|
||||
try {
|
||||
await memoryDeps.runWithModelFallback({
|
||||
...resolveModelFallbackOptions(params.followupRun.run),
|
||||
...resolveMemoryFlushModelFallbackOptions(
|
||||
params.followupRun.run,
|
||||
activeMemoryFlushPlan.model,
|
||||
params.cfg,
|
||||
),
|
||||
runId: flushRunId,
|
||||
run: async (provider, model, runOptions) => {
|
||||
const { embeddedContext, senderContext, runBaseParams } = buildEmbeddedRunExecutionParams({
|
||||
|
||||
@@ -28,6 +28,7 @@ describe("config compaction settings", () => {
|
||||
},
|
||||
memoryFlush: {
|
||||
enabled: false,
|
||||
model: "ollama/qwen3:8b",
|
||||
softThresholdTokens: 1234,
|
||||
prompt: "Write notes.",
|
||||
systemPrompt: "Flush memory now.",
|
||||
@@ -44,6 +45,7 @@ describe("config compaction settings", () => {
|
||||
expect(compaction?.qualityGuard?.enabled).toBe(true);
|
||||
expect(compaction?.qualityGuard?.maxRetries).toBe(2);
|
||||
expect(compaction?.memoryFlush?.enabled).toBe(false);
|
||||
expect(compaction?.memoryFlush?.model).toBe("ollama/qwen3:8b");
|
||||
expect(compaction?.memoryFlush?.softThresholdTokens).toBe(1234);
|
||||
expect(compaction?.memoryFlush?.prompt).toBe("Write notes.");
|
||||
expect(compaction?.memoryFlush?.systemPrompt).toBe("Flush memory now.");
|
||||
|
||||
@@ -5010,6 +5010,12 @@ export const GENERATED_BASE_CONFIG_SCHEMA: BaseConfigSchemaResponse = {
|
||||
description:
|
||||
"Enables pre-compaction memory flush before the runtime performs stronger history reduction near token limits. Keep enabled unless you intentionally disable memory side effects in constrained environments.",
|
||||
},
|
||||
model: {
|
||||
type: "string",
|
||||
title: "Compaction Memory Flush Model Override",
|
||||
description:
|
||||
"Optional provider/model override used only for pre-compaction memory flush turns. Set this to a local model such as ollama/qwen3:8b when durable memory extraction should avoid the active session's paid model. The override is exact and does not inherit the active model fallback chain.",
|
||||
},
|
||||
softThresholdTokens: {
|
||||
type: "integer",
|
||||
minimum: 0,
|
||||
@@ -27030,6 +27036,11 @@ export const GENERATED_BASE_CONFIG_SCHEMA: BaseConfigSchemaResponse = {
|
||||
help: "Enables pre-compaction memory flush before the runtime performs stronger history reduction near token limits. Keep enabled unless you intentionally disable memory side effects in constrained environments.",
|
||||
tags: ["advanced"],
|
||||
},
|
||||
"agents.defaults.compaction.memoryFlush.model": {
|
||||
label: "Compaction Memory Flush Model Override",
|
||||
help: "Optional provider/model override used only for pre-compaction memory flush turns. Set this to a local model such as ollama/qwen3:8b when durable memory extraction should avoid the active session's paid model. The override is exact and does not inherit the active model fallback chain.",
|
||||
tags: ["models"],
|
||||
},
|
||||
"agents.defaults.compaction.memoryFlush.softThresholdTokens": {
|
||||
label: "Compaction Memory Flush Soft Threshold",
|
||||
help: "Threshold distance to compaction (in tokens) that triggers pre-compaction memory flush execution. Use earlier thresholds for safer persistence, or tighter thresholds for lower flush frequency.",
|
||||
|
||||
@@ -399,6 +399,7 @@ const TARGET_KEYS = [
|
||||
"agents.defaults.compaction.maxActiveTranscriptBytes",
|
||||
"agents.defaults.compaction.memoryFlush",
|
||||
"agents.defaults.compaction.memoryFlush.enabled",
|
||||
"agents.defaults.compaction.memoryFlush.model",
|
||||
"agents.defaults.compaction.memoryFlush.softThresholdTokens",
|
||||
"agents.defaults.compaction.memoryFlush.prompt",
|
||||
"agents.defaults.compaction.memoryFlush.systemPrompt",
|
||||
|
||||
@@ -1291,6 +1291,8 @@ export const FIELD_HELP: Record<string, string> = {
|
||||
"Pre-compaction memory flush settings that run an agentic memory write before heavy compaction. Keep enabled for long sessions so salient context is persisted before aggressive trimming.",
|
||||
"agents.defaults.compaction.memoryFlush.enabled":
|
||||
"Enables pre-compaction memory flush before the runtime performs stronger history reduction near token limits. Keep enabled unless you intentionally disable memory side effects in constrained environments.",
|
||||
"agents.defaults.compaction.memoryFlush.model":
|
||||
"Optional provider/model override used only for pre-compaction memory flush turns. Set this to a local model such as ollama/qwen3:8b when durable memory extraction should avoid the active session's paid model. The override is exact and does not inherit the active model fallback chain.",
|
||||
"agents.defaults.compaction.memoryFlush.softThresholdTokens":
|
||||
"Threshold distance to compaction (in tokens) that triggers pre-compaction memory flush execution. Use earlier thresholds for safer persistence, or tighter thresholds for lower flush frequency.",
|
||||
"agents.defaults.compaction.memoryFlush.forceFlushTranscriptBytes":
|
||||
|
||||
@@ -609,6 +609,7 @@ export const FIELD_LABELS: Record<string, string> = {
|
||||
"agents.defaults.compaction.notifyUser": "Compaction Notify User",
|
||||
"agents.defaults.compaction.memoryFlush": "Compaction Memory Flush",
|
||||
"agents.defaults.compaction.memoryFlush.enabled": "Compaction Memory Flush Enabled",
|
||||
"agents.defaults.compaction.memoryFlush.model": "Compaction Memory Flush Model Override",
|
||||
"agents.defaults.compaction.memoryFlush.softThresholdTokens":
|
||||
"Compaction Memory Flush Soft Threshold",
|
||||
"agents.defaults.compaction.memoryFlush.forceFlushTranscriptBytes":
|
||||
|
||||
@@ -493,6 +493,8 @@ export type AgentCompactionConfig = {
|
||||
export type AgentCompactionMemoryFlushConfig = {
|
||||
/** Enable the pre-compaction memory flush (default: true). */
|
||||
enabled?: boolean;
|
||||
/** Optional provider/model override used only for pre-compaction memory flush turns. */
|
||||
model?: string;
|
||||
/** Run the memory flush when context is within this many tokens of the compaction threshold. */
|
||||
softThresholdTokens?: number;
|
||||
/**
|
||||
|
||||
@@ -189,6 +189,7 @@ export const AgentDefaultsSchema = z
|
||||
memoryFlush: z
|
||||
.object({
|
||||
enabled: z.boolean().optional(),
|
||||
model: z.string().optional(),
|
||||
softThresholdTokens: z.number().int().nonnegative().optional(),
|
||||
forceFlushTranscriptBytes: NonNegativeByteSizeSchema.optional(),
|
||||
prompt: z.string().optional(),
|
||||
|
||||
@@ -68,6 +68,7 @@ export type MemoryFlushPlan = {
|
||||
softThresholdTokens: number;
|
||||
forceFlushTranscriptBytes: number;
|
||||
reserveTokensFloor: number;
|
||||
model?: string;
|
||||
prompt: string;
|
||||
systemPrompt: string;
|
||||
relativePath: string;
|
||||
|
||||
Reference in New Issue
Block a user