diff --git a/CHANGELOG.md b/CHANGELOG.md index 9387f5a94ce..ace9d907b27 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -71,6 +71,7 @@ Docs: https://docs.openclaw.ai - Agents/Claude: treat zero-token empty `stop` turns as failed provider output, retry once, repair replay, and allow configured model fallback instead of preserving them as successful silent replies. Fixes #71880. Thanks @MagnaAI. +- Tasks: normalize task lifecycle timestamps at create, update, and restore time, and report retained lost tasks as audit warnings until their cleanup window expires. (#71871) Thanks @likewen-tech. - Diagnostics/OTEL: treat normal early model stream cleanup as a completed model call instead of exporting a misleading `StreamAbandoned` error span. Thanks @vincentkoc. - Gateway/pairing: stop corrupt or unreadable device/node pairing stores from being treated as empty state, preserving `paired.json` for repair instead of overwriting approved pairings. Fixes #71873. Thanks @iret77. - ACP: keep `/acp` management commands, plus local `/status` and `/unfocus`, on the Gateway path inside ACP-bound threads so they are not consumed as ACP prompt text. Fixes #66298. Thanks @kindomLee. diff --git a/docs/automation/tasks.md b/docs/automation/tasks.md index 73c567ace40..686cfc057ab 100644 --- a/docs/automation/tasks.md +++ b/docs/automation/tasks.md @@ -194,14 +194,14 @@ openclaw tasks audit [--json] Surfaces operational issues. Findings also appear in `openclaw status` when issues are detected. -| Finding | Severity | Trigger | -| ------------------------- | -------- | ----------------------------------------------------- | -| `stale_queued` | warn | Queued for more than 10 minutes | -| `stale_running` | error | Running for more than 30 minutes | -| `lost` | error | Runtime-backed task ownership disappeared | -| `delivery_failed` | warn | Delivery failed and notify policy is not `silent` | -| `missing_cleanup` | warn | Terminal task with no cleanup timestamp | -| `inconsistent_timestamps` | warn | Timeline violation (for example ended before started) | +| Finding | Severity | Trigger | +| ------------------------- | ---------- | ------------------------------------------------------------------------------------------------------------ | +| `stale_queued` | warn | Queued for more than 10 minutes | +| `stale_running` | error | Running for more than 30 minutes | +| `lost` | warn/error | Runtime-backed task ownership disappeared; retained lost tasks warn until `cleanupAfter`, then become errors | +| `delivery_failed` | warn | Delivery failed and notify policy is not `silent` | +| `missing_cleanup` | warn | Terminal task with no cleanup timestamp | +| `inconsistent_timestamps` | warn | Timeline violation (for example ended before started) | ### `tasks maintenance` @@ -284,7 +284,7 @@ The registry loads into memory at gateway start and syncs writes to SQLite for d A sweeper runs every **60 seconds** and handles three things: 1. **Reconciliation** — checks whether active tasks still have authoritative runtime backing. ACP/subagent tasks use child-session state, cron tasks use active-job ownership, and chat-backed CLI tasks use the owning run context. If that backing state is gone for more than 5 minutes, the task is marked `lost`. -2. **Cleanup stamping** — sets a `cleanupAfter` timestamp on terminal tasks (endedAt + 7 days). +2. **Cleanup stamping** — sets a `cleanupAfter` timestamp on terminal tasks (endedAt + 7 days). During retention, lost tasks still appear in audit as warnings; after `cleanupAfter` expires or when cleanup metadata is missing, they are errors. 3. **Pruning** — deletes records past their `cleanupAfter` date. **Retention**: terminal task records are kept for **7 days**, then automatically pruned. No configuration needed. diff --git a/docs/cli/tasks.md b/docs/cli/tasks.md index fdc76d1bc3e..58ea07f9730 100644 --- a/docs/cli/tasks.md +++ b/docs/cli/tasks.md @@ -75,7 +75,7 @@ Cancels a running background task. openclaw tasks audit [--severity ] [--code ] [--limit ] [--json] ``` -Surfaces stale, lost, delivery-failed, or otherwise inconsistent task and Task Flow records. +Surfaces stale, lost, delivery-failed, or otherwise inconsistent task and Task Flow records. Lost tasks retained until `cleanupAfter` are warnings; expired or unstamped lost tasks are errors. ### `maintenance` diff --git a/src/tasks/task-registry.test.ts b/src/tasks/task-registry.test.ts index 38aa881cb1b..51af5da44a9 100644 --- a/src/tasks/task-registry.test.ts +++ b/src/tasks/task-registry.test.ts @@ -1342,6 +1342,13 @@ describe("task-registry", () => { error: "backing session missing", }); expect(getTaskById(task.taskId)?.cleanupAfter).toBeGreaterThan(now); + expect(getInspectableTaskAuditSummary()).toMatchObject({ + errors: 0, + warnings: 1, + byCode: expect.objectContaining({ + lost: 1, + }), + }); }); }); diff --git a/src/tasks/task-registry.ts b/src/tasks/task-registry.ts index ad4d7101d30..fcab081ed73 100644 --- a/src/tasks/task-registry.ts +++ b/src/tasks/task-registry.ts @@ -181,6 +181,8 @@ function cloneTaskRecord(record: TaskRecord): TaskRecord { } function normalizeTaskTimestamps(task: TaskRecord): TaskRecord { + // Detached runtimes can report lifecycle times captured before the registry + // inserted or restored the row; keep createdAt as the visible lifecycle floor. let createdAt = task.createdAt; for (const candidate of [task.startedAt, task.lastEventAt, task.endedAt]) { if (typeof candidate === "number" && candidate < createdAt) {