mirror of
https://fastgit.cc/github.com/openclaw/openclaw
synced 2026-04-30 22:12:32 +08:00
fix(cron): preserve structured denial failures
This commit is contained in:
@@ -10,7 +10,7 @@ Docs: https://docs.openclaw.ai
|
||||
|
||||
### Fixes
|
||||
|
||||
- Cron: classify isolated runs as errors when final output narrates known execution-denial markers such as `SYSTEM_RUN_DENIED`, `INVALID_REQUEST`, or approval-binding refusal phrases, so blocked commands no longer appear green in cron history. Fixes #67172; carries forward #67186. Thanks @oc-gh-dr, @hclsys, and @1yihui.
|
||||
- Cron: classify isolated runs as errors from structured embedded-run execution-denial metadata, with final-output marker fallback for `SYSTEM_RUN_DENIED`, `INVALID_REQUEST`, and approval-binding refusals, so blocked commands no longer appear green in cron history. Fixes #67172; carries forward #67186. Thanks @oc-gh-dr, @hclsys, and @1yihui.
|
||||
- Gateway/install: add a validated `--wrapper`/`OPENCLAW_WRAPPER` service install path that persists executable LaunchAgent/systemd wrappers across forced reinstalls, updates, and doctor repairs instead of falling back to raw node/bun `ProgramArguments`. Fixes #69400. (#72445) Thanks @willtmc.
|
||||
- macOS Gateway: write launchd services with a state-dir `WorkingDirectory`, use a durable state-dir temp path instead of freezing macOS session `TMPDIR`, create that temp directory before bootstrap, and label abort-shaped launchd exits as `SIGABRT/abort` in status output. Fixes #53679 and #70223; refs #71848. Thanks @dlturock, @stammi922, and @palladius.
|
||||
- Exec approvals: accept runtime-owned `source: "allow-always"` and `commandText` allowlist metadata in gateway and node approval-set payloads so Control UI round-trips no longer fail with `unexpected property 'source'`. Fixes #60000; carries forward #60064. Thanks @sd1471123, @sharkqwy, and @luoyanglang.
|
||||
|
||||
@@ -47,7 +47,7 @@ Cron is the Gateway's built-in scheduler. It persists jobs, wakes the agent at t
|
||||
- One-shot jobs (`--at`) auto-delete after success by default.
|
||||
- Isolated cron runs best-effort close tracked browser tabs/processes for their `cron:<jobId>` session when the run completes, so detached browser automation does not leave orphaned processes behind.
|
||||
- Isolated cron runs also guard against stale acknowledgement replies. If the first result is just an interim status update (`on it`, `pulling everything together`, and similar hints) and no descendant subagent run is still responsible for the final answer, OpenClaw re-prompts once for the actual result before delivery.
|
||||
- Isolated cron runs classify known execution-denial markers in the final summary/output as failures, including host markers such as `SYSTEM_RUN_DENIED` and `INVALID_REQUEST`, so a blocked command is not reported as a green run.
|
||||
- Isolated cron runs prefer structured execution-denial metadata from the embedded run, then fall back to known final summary/output markers such as `SYSTEM_RUN_DENIED` and `INVALID_REQUEST`, so a blocked command is not reported as a green run.
|
||||
|
||||
<a id="maintenance"></a>
|
||||
|
||||
|
||||
@@ -57,10 +57,11 @@ Note: if an isolated cron run returns only the silent token (`NO_REPLY` /
|
||||
`no_reply`), cron suppresses direct outbound delivery and the fallback queued
|
||||
summary path as well, so nothing is posted back to chat.
|
||||
|
||||
Note: isolated cron runs treat known denial markers in final output, such as
|
||||
`SYSTEM_RUN_DENIED`, `INVALID_REQUEST`, and approval-binding refusal phrases, as
|
||||
errors. `cron list` and run history then surface the matched token in the error
|
||||
reason instead of reporting a blocked command as `ok`.
|
||||
Note: isolated cron runs prefer structured execution-denial metadata from the
|
||||
embedded run, then fall back to known denial markers in final output, such as
|
||||
`SYSTEM_RUN_DENIED`, `INVALID_REQUEST`, and approval-binding refusal phrases.
|
||||
`cron list` and run history surface the denial reason instead of reporting a
|
||||
blocked command as `ok`.
|
||||
|
||||
Note: `cron add|edit --model ...` uses that selected allowed model for the job.
|
||||
If the model is not allowed, cron warns and falls back to the job's agent/default
|
||||
|
||||
95
src/agents/pi-embedded-runner/failure-signal.test.ts
Normal file
95
src/agents/pi-embedded-runner/failure-signal.test.ts
Normal file
@@ -0,0 +1,95 @@
|
||||
import { describe, expect, it } from "vitest";
|
||||
import { resolveEmbeddedRunFailureSignal } from "./failure-signal.js";
|
||||
|
||||
describe("resolveEmbeddedRunFailureSignal", () => {
|
||||
it("classifies cron exec denials from tool error metadata", () => {
|
||||
expect(
|
||||
resolveEmbeddedRunFailureSignal({
|
||||
trigger: "cron",
|
||||
lastToolError: {
|
||||
toolName: "exec",
|
||||
error: "SYSTEM_RUN_DENIED: approval required",
|
||||
},
|
||||
}),
|
||||
).toEqual({
|
||||
kind: "execution_denied",
|
||||
source: "tool",
|
||||
toolName: "exec",
|
||||
code: "SYSTEM_RUN_DENIED",
|
||||
message: "SYSTEM_RUN_DENIED: approval required",
|
||||
fatalForCron: true,
|
||||
});
|
||||
});
|
||||
|
||||
it("classifies invalid request denials from tool error metadata", () => {
|
||||
expect(
|
||||
resolveEmbeddedRunFailureSignal({
|
||||
trigger: "cron",
|
||||
lastToolError: {
|
||||
toolName: "bash",
|
||||
error: "INVALID_REQUEST: approval denied",
|
||||
},
|
||||
})?.code,
|
||||
).toBe("INVALID_REQUEST");
|
||||
});
|
||||
|
||||
it("does not mark non-cron runs", () => {
|
||||
expect(
|
||||
resolveEmbeddedRunFailureSignal({
|
||||
trigger: "user",
|
||||
lastToolError: {
|
||||
toolName: "exec",
|
||||
error: "SYSTEM_RUN_DENIED: approval required",
|
||||
},
|
||||
}),
|
||||
).toBeUndefined();
|
||||
});
|
||||
|
||||
it("does not mark ordinary tool failures as cron-denial failures", () => {
|
||||
expect(
|
||||
resolveEmbeddedRunFailureSignal({
|
||||
trigger: "cron",
|
||||
lastToolError: {
|
||||
toolName: "exec",
|
||||
error: "/bin/bash: line 1: python: command not found",
|
||||
},
|
||||
}),
|
||||
).toBeUndefined();
|
||||
});
|
||||
|
||||
it("does not mark non-exec validation errors as execution denials", () => {
|
||||
expect(
|
||||
resolveEmbeddedRunFailureSignal({
|
||||
trigger: "cron",
|
||||
lastToolError: {
|
||||
toolName: "browser",
|
||||
error: "INVALID_REQUEST: url required",
|
||||
},
|
||||
}),
|
||||
).toBeUndefined();
|
||||
});
|
||||
|
||||
it("does not mark non-exec tool output that merely mentions host denial tokens", () => {
|
||||
expect(
|
||||
resolveEmbeddedRunFailureSignal({
|
||||
trigger: "cron",
|
||||
lastToolError: {
|
||||
toolName: "web_fetch",
|
||||
error: "The fetched page says SYSTEM_RUN_DENIED in its troubleshooting section.",
|
||||
},
|
||||
}),
|
||||
).toBeUndefined();
|
||||
});
|
||||
|
||||
it("infers approval-binding denials even when the host code is omitted", () => {
|
||||
expect(
|
||||
resolveEmbeddedRunFailureSignal({
|
||||
trigger: "cron",
|
||||
lastToolError: {
|
||||
toolName: "exec",
|
||||
error: "Approval cannot safely bind this interpreter/runtime command",
|
||||
},
|
||||
})?.code,
|
||||
).toBe("SYSTEM_RUN_DENIED");
|
||||
});
|
||||
});
|
||||
46
src/agents/pi-embedded-runner/failure-signal.ts
Normal file
46
src/agents/pi-embedded-runner/failure-signal.ts
Normal file
@@ -0,0 +1,46 @@
|
||||
import { normalizeOptionalString } from "../../shared/string-coerce.js";
|
||||
import { isExecLikeToolName, type ToolErrorSummary } from "../tool-error-summary.js";
|
||||
import type { EmbeddedRunFailureSignal } from "./types.js";
|
||||
|
||||
const FAILURE_SIGNAL_CODES = ["SYSTEM_RUN_DENIED", "INVALID_REQUEST"] as const;
|
||||
|
||||
function resolveFailureSignalCode(message: string): EmbeddedRunFailureSignal["code"] | undefined {
|
||||
for (const code of FAILURE_SIGNAL_CODES) {
|
||||
if (message.includes(code)) {
|
||||
return code;
|
||||
}
|
||||
}
|
||||
if (message.toLowerCase().includes("approval cannot safely bind")) {
|
||||
return "SYSTEM_RUN_DENIED";
|
||||
}
|
||||
return undefined;
|
||||
}
|
||||
|
||||
export function resolveEmbeddedRunFailureSignal(params: {
|
||||
trigger?: string | undefined;
|
||||
lastToolError?: ToolErrorSummary | undefined;
|
||||
}): EmbeddedRunFailureSignal | undefined {
|
||||
if (params.trigger !== "cron") {
|
||||
return undefined;
|
||||
}
|
||||
const lastToolError = params.lastToolError;
|
||||
if (!lastToolError || !isExecLikeToolName(lastToolError.toolName)) {
|
||||
return undefined;
|
||||
}
|
||||
const message = normalizeOptionalString(lastToolError.error);
|
||||
if (!message) {
|
||||
return undefined;
|
||||
}
|
||||
const code = resolveFailureSignalCode(message);
|
||||
if (!code) {
|
||||
return undefined;
|
||||
}
|
||||
return {
|
||||
kind: "execution_denied",
|
||||
source: "tool",
|
||||
...(lastToolError.toolName ? { toolName: lastToolError.toolName } : {}),
|
||||
code,
|
||||
message,
|
||||
fatalForCron: true,
|
||||
};
|
||||
}
|
||||
@@ -84,6 +84,7 @@ import { redactRunIdentifier, resolveRunWorkspaceDir } from "../workspace-run.js
|
||||
import { runPostCompactionSideEffects } from "./compaction-hooks.js";
|
||||
import { buildEmbeddedCompactionRuntimeContext } from "./compaction-runtime-context.js";
|
||||
import { runContextEngineMaintenance } from "./context-engine-maintenance.js";
|
||||
import { resolveEmbeddedRunFailureSignal } from "./failure-signal.js";
|
||||
import { resolveGlobalLane, resolveSessionLane } from "./lanes.js";
|
||||
import { log } from "./logger.js";
|
||||
import { resolveModelAsync } from "./model.js";
|
||||
@@ -1853,6 +1854,10 @@ export async function runEmbeddedPiAgent(
|
||||
toolMetas: attempt.toolMetas,
|
||||
hadFailure: Boolean(attempt.lastToolError),
|
||||
});
|
||||
const failureSignal = resolveEmbeddedRunFailureSignal({
|
||||
trigger: params.trigger,
|
||||
lastToolError: attempt.lastToolError,
|
||||
});
|
||||
|
||||
// Timeout aborts can leave the run without any assistant payloads.
|
||||
// Emit an explicit timeout error instead of silently completing, so
|
||||
@@ -1893,6 +1898,7 @@ export async function runEmbeddedPiAgent(
|
||||
replayInvalid,
|
||||
livenessState,
|
||||
toolSummary: attemptToolSummary,
|
||||
...(failureSignal ? { failureSignal } : {}),
|
||||
agentHarnessResultClassification: attempt.agentHarnessResultClassification,
|
||||
},
|
||||
didSendViaMessagingTool: attempt.didSendViaMessagingTool,
|
||||
@@ -2070,6 +2076,7 @@ export async function runEmbeddedPiAgent(
|
||||
replayInvalid,
|
||||
livenessState,
|
||||
toolSummary: attemptToolSummary,
|
||||
...(failureSignal ? { failureSignal } : {}),
|
||||
agentHarnessResultClassification: attempt.agentHarnessResultClassification,
|
||||
},
|
||||
didSendViaMessagingTool: attempt.didSendViaMessagingTool,
|
||||
@@ -2119,6 +2126,7 @@ export async function runEmbeddedPiAgent(
|
||||
replayInvalid,
|
||||
livenessState,
|
||||
toolSummary: attemptToolSummary,
|
||||
...(failureSignal ? { failureSignal } : {}),
|
||||
agentHarnessResultClassification: attempt.agentHarnessResultClassification,
|
||||
},
|
||||
didSendViaMessagingTool: attempt.didSendViaMessagingTool,
|
||||
@@ -2227,6 +2235,7 @@ export async function runEmbeddedPiAgent(
|
||||
replayInvalid,
|
||||
livenessState,
|
||||
toolSummary: attemptToolSummary,
|
||||
...(failureSignal ? { failureSignal } : {}),
|
||||
agentHarnessResultClassification: attempt.agentHarnessResultClassification,
|
||||
},
|
||||
didSendViaMessagingTool: attempt.didSendViaMessagingTool,
|
||||
@@ -2334,6 +2343,7 @@ export async function runEmbeddedPiAgent(
|
||||
...(params.blockReplyBreak ? { blockStreaming: params.blockReplyBreak } : {}),
|
||||
},
|
||||
toolSummary: attemptToolSummary,
|
||||
...(failureSignal ? { failureSignal } : {}),
|
||||
completion: {
|
||||
...(stopReason ? { stopReason } : {}),
|
||||
...(stopReason ? { finishReason: stopReason } : {}),
|
||||
|
||||
@@ -103,6 +103,15 @@ export type ContextManagementTrace = {
|
||||
|
||||
export type EmbeddedRunLivenessState = "working" | "paused" | "blocked" | "abandoned";
|
||||
|
||||
export type EmbeddedRunFailureSignal = {
|
||||
kind: "execution_denied";
|
||||
source: "tool";
|
||||
toolName?: string;
|
||||
code: "SYSTEM_RUN_DENIED" | "INVALID_REQUEST";
|
||||
message: string;
|
||||
fatalForCron: true;
|
||||
};
|
||||
|
||||
export type EmbeddedPiRunMeta = {
|
||||
durationMs: number;
|
||||
agentMeta?: EmbeddedPiAgentMeta;
|
||||
@@ -124,6 +133,7 @@ export type EmbeddedPiRunMeta = {
|
||||
| "retry_limit";
|
||||
message: string;
|
||||
};
|
||||
failureSignal?: EmbeddedRunFailureSignal;
|
||||
/** Stop reason for the agent run (e.g., "completed", "tool_calls"). */
|
||||
stopReason?: string;
|
||||
/** Pending tool calls when stopReason is "tool_calls". */
|
||||
|
||||
@@ -12,4 +12,25 @@ describe("extractToolErrorMessage", () => {
|
||||
expect(extractToolErrorMessage({ details: { status: "failed" } })).toBe("failed");
|
||||
expect(extractToolErrorMessage({ details: { status: "timeout" } })).toBe("timeout");
|
||||
});
|
||||
|
||||
it("prefers node-host aggregated denial text over generic failed status", () => {
|
||||
expect(
|
||||
extractToolErrorMessage({
|
||||
content: [{ type: "text", text: "SYSTEM_RUN_DENIED: approval required" }],
|
||||
details: {
|
||||
status: "failed",
|
||||
aggregated: "SYSTEM_RUN_DENIED: approval required",
|
||||
},
|
||||
}),
|
||||
).toBe("SYSTEM_RUN_DENIED: approval required");
|
||||
});
|
||||
|
||||
it("uses result text before generic failed status when details omit aggregated output", () => {
|
||||
expect(
|
||||
extractToolErrorMessage({
|
||||
content: [{ type: "text", text: "SYSTEM_RUN_DENIED: approval required" }],
|
||||
details: { status: "failed" },
|
||||
}),
|
||||
).toBe("SYSTEM_RUN_DENIED: approval required");
|
||||
});
|
||||
});
|
||||
|
||||
@@ -75,10 +75,7 @@ function extractErrorField(value: unknown): string | undefined {
|
||||
return undefined;
|
||||
}
|
||||
const record = value as Record<string, unknown>;
|
||||
const direct =
|
||||
readErrorCandidate(record.error) ??
|
||||
readErrorCandidate(record.message) ??
|
||||
readErrorCandidate(record.reason);
|
||||
const direct = extractDirectErrorField(record);
|
||||
if (direct) {
|
||||
return direct;
|
||||
}
|
||||
@@ -89,6 +86,34 @@ function extractErrorField(value: unknown): string | undefined {
|
||||
return normalizeToolErrorText(status);
|
||||
}
|
||||
|
||||
function extractDirectErrorField(value: unknown): string | undefined {
|
||||
if (!value || typeof value !== "object") {
|
||||
return undefined;
|
||||
}
|
||||
const record = value as Record<string, unknown>;
|
||||
return (
|
||||
readErrorCandidate(record.error) ??
|
||||
readErrorCandidate(record.message) ??
|
||||
readErrorCandidate(record.reason)
|
||||
);
|
||||
}
|
||||
|
||||
function extractAggregatedErrorField(value: unknown): string | undefined {
|
||||
if (!value || typeof value !== "object") {
|
||||
return undefined;
|
||||
}
|
||||
const record = value as Record<string, unknown>;
|
||||
return readErrorCandidate(record.aggregated);
|
||||
}
|
||||
|
||||
function isHostDenialToolText(text: string): boolean {
|
||||
const normalized = text.trim();
|
||||
if (normalized.includes("SYSTEM_RUN_DENIED") || normalized.includes("INVALID_REQUEST")) {
|
||||
return true;
|
||||
}
|
||||
return normalized.toLowerCase().includes("approval cannot safely bind");
|
||||
}
|
||||
|
||||
export function sanitizeToolResult(result: unknown): unknown {
|
||||
if (!result || typeof result !== "object") {
|
||||
return result;
|
||||
@@ -388,28 +413,42 @@ export function extractToolErrorMessage(result: unknown): string | undefined {
|
||||
return undefined;
|
||||
}
|
||||
const record = result as Record<string, unknown>;
|
||||
const fromDetails = extractErrorField(record.details);
|
||||
const fromDetails = extractDirectErrorField(record.details);
|
||||
if (fromDetails) {
|
||||
return fromDetails;
|
||||
}
|
||||
const fromRoot = extractErrorField(record);
|
||||
const fromDetailsAggregated = extractAggregatedErrorField(record.details);
|
||||
if (fromDetailsAggregated) {
|
||||
return fromDetailsAggregated;
|
||||
}
|
||||
const fromRoot = extractDirectErrorField(record);
|
||||
if (fromRoot) {
|
||||
return fromRoot;
|
||||
}
|
||||
const text = extractToolResultText(result);
|
||||
if (!text) {
|
||||
return undefined;
|
||||
}
|
||||
try {
|
||||
const parsed = JSON.parse(text) as unknown;
|
||||
const fromJson = extractErrorField(parsed);
|
||||
if (fromJson) {
|
||||
return fromJson;
|
||||
if (text) {
|
||||
try {
|
||||
const parsed = JSON.parse(text) as unknown;
|
||||
const fromJson = extractErrorField(parsed);
|
||||
if (fromJson) {
|
||||
return fromJson;
|
||||
}
|
||||
} catch {
|
||||
// Fall through to status/text fallback.
|
||||
}
|
||||
if (isHostDenialToolText(text)) {
|
||||
return normalizeToolErrorText(text);
|
||||
}
|
||||
} catch {
|
||||
// Fall through to first-line text fallback.
|
||||
}
|
||||
return normalizeToolErrorText(text);
|
||||
const fromDetailsStatus = extractErrorField(record.details);
|
||||
if (fromDetailsStatus) {
|
||||
return fromDetailsStatus;
|
||||
}
|
||||
const fromRootStatus = extractErrorField(record);
|
||||
if (fromRootStatus) {
|
||||
return fromRootStatus;
|
||||
}
|
||||
return text ? normalizeToolErrorText(text) : undefined;
|
||||
}
|
||||
|
||||
function resolveMessageToolTarget(args: Record<string, unknown>): string | undefined {
|
||||
|
||||
@@ -190,6 +190,51 @@ describe("resolveCronPayloadOutcome", () => {
|
||||
);
|
||||
});
|
||||
|
||||
it("prefers typed failure signals over denial-token fallback", () => {
|
||||
const result = resolveCronPayloadOutcome({
|
||||
payloads: [{ text: "On it, retrying now." }],
|
||||
failureSignal: {
|
||||
kind: "execution_denied",
|
||||
source: "tool",
|
||||
toolName: "exec",
|
||||
code: "SYSTEM_RUN_DENIED",
|
||||
message: "SYSTEM_RUN_DENIED: approval required",
|
||||
fatalForCron: true,
|
||||
},
|
||||
});
|
||||
|
||||
expect(result.hasFatalErrorPayload).toBe(true);
|
||||
expect(result.embeddedRunError).toBe(
|
||||
"cron classifier: execution_denied failure from exec (SYSTEM_RUN_DENIED): SYSTEM_RUN_DENIED: approval required",
|
||||
);
|
||||
expect(result.summary).toBe("SYSTEM_RUN_DENIED: approval required");
|
||||
expect(result.outputText).toBe("SYSTEM_RUN_DENIED: approval required");
|
||||
expect(result.synthesizedText).toBe("SYSTEM_RUN_DENIED: approval required");
|
||||
expect(result.deliveryPayload).toEqual({
|
||||
text: "SYSTEM_RUN_DENIED: approval required",
|
||||
isError: true,
|
||||
});
|
||||
expect(result.deliveryPayloads).toEqual([
|
||||
{ text: "SYSTEM_RUN_DENIED: approval required", isError: true },
|
||||
]);
|
||||
expect(result.deliveryPayloadHasStructuredContent).toBe(false);
|
||||
});
|
||||
|
||||
it("ignores non-fatal failure signal metadata", () => {
|
||||
const result = resolveCronPayloadOutcome({
|
||||
payloads: [{ text: "ordinary success" }],
|
||||
failureSignal: {
|
||||
kind: "execution_denied",
|
||||
source: "tool",
|
||||
message: "SYSTEM_RUN_DENIED: approval required",
|
||||
fatalForCron: false,
|
||||
},
|
||||
});
|
||||
|
||||
expect(result.hasFatalErrorPayload).toBe(false);
|
||||
expect(result.embeddedRunError).toBeUndefined();
|
||||
});
|
||||
|
||||
it("keeps structured error payload reasons ahead of denial-token reasons", () => {
|
||||
const result = resolveCronPayloadOutcome({
|
||||
payloads: [
|
||||
|
||||
@@ -26,6 +26,20 @@ type CronDenialSignal = {
|
||||
field: string;
|
||||
};
|
||||
|
||||
type CronFailureSignal = {
|
||||
kind?: string;
|
||||
source?: string;
|
||||
toolName?: string;
|
||||
code?: string;
|
||||
message?: string;
|
||||
fatalForCron?: boolean;
|
||||
};
|
||||
|
||||
type NormalizedCronFailureSignal = CronFailureSignal & {
|
||||
message: string;
|
||||
fatalForCron: true;
|
||||
};
|
||||
|
||||
const CRON_DENIAL_EXACT_TOKENS = ["SYSTEM_RUN_DENIED", "INVALID_REQUEST"] as const;
|
||||
const CRON_DENIAL_CASE_INSENSITIVE_TOKENS = [
|
||||
"approval cannot safely bind",
|
||||
@@ -75,6 +89,25 @@ function formatCronDenialSignal(signal: CronDenialSignal): string {
|
||||
return `cron classifier: denial token "${signal.token}" detected in ${signal.field}`;
|
||||
}
|
||||
|
||||
function normalizeCronFailureSignal(
|
||||
signal: CronFailureSignal | undefined,
|
||||
): NormalizedCronFailureSignal | undefined {
|
||||
const message = normalizeOptionalString(signal?.message);
|
||||
if (signal?.fatalForCron !== true || !message) {
|
||||
return undefined;
|
||||
}
|
||||
return { ...signal, message, fatalForCron: true };
|
||||
}
|
||||
|
||||
function formatCronFailureSignal(signal: NormalizedCronFailureSignal): string {
|
||||
const kind = normalizeOptionalString(signal.kind) ?? "run";
|
||||
const code = normalizeOptionalString(signal.code);
|
||||
const source = normalizeOptionalString(signal.toolName) ?? normalizeOptionalString(signal.source);
|
||||
return `cron classifier: ${kind} failure${source ? ` from ${source}` : ""}${
|
||||
code ? ` (${code})` : ""
|
||||
}: ${signal.message}`;
|
||||
}
|
||||
|
||||
export function pickSummaryFromOutput(text: string | undefined) {
|
||||
const clean = (text ?? "").trim();
|
||||
if (!clean) {
|
||||
@@ -191,7 +224,8 @@ export function resolveHeartbeatAckMaxChars(agentCfg?: { heartbeat?: { ackMaxCha
|
||||
export function resolveCronPayloadOutcome(params: {
|
||||
payloads: DeliveryPayload[];
|
||||
runLevelError?: unknown;
|
||||
finalAssistantVisibleText?: string;
|
||||
failureSignal?: CronFailureSignal | undefined;
|
||||
finalAssistantVisibleText?: string | undefined;
|
||||
preferFinalAssistantVisibleText?: boolean;
|
||||
}): CronPayloadOutcome {
|
||||
const firstText = params.payloads[0]?.text ?? "";
|
||||
@@ -254,19 +288,34 @@ export function resolveCronPayloadOutcome(params: {
|
||||
text: payload?.text,
|
||||
})),
|
||||
]);
|
||||
const hasFatalErrorPayload = hasFatalStructuredErrorPayload || denialSignal !== undefined;
|
||||
const failureSignal = normalizeCronFailureSignal(params.failureSignal);
|
||||
const hasFatalErrorPayload =
|
||||
hasFatalStructuredErrorPayload || failureSignal !== undefined || denialSignal !== undefined;
|
||||
const shouldUseFailureSignalPayload =
|
||||
failureSignal !== undefined && !hasFatalStructuredErrorPayload;
|
||||
const failureSignalDeliveryPayload = shouldUseFailureSignalPayload
|
||||
? ({ text: failureSignal.message, isError: true } satisfies DeliveryPayload)
|
||||
: undefined;
|
||||
return {
|
||||
summary,
|
||||
outputText,
|
||||
synthesizedText,
|
||||
deliveryPayload,
|
||||
deliveryPayloads: resolvedDeliveryPayloads,
|
||||
deliveryPayloadHasStructuredContent,
|
||||
summary: shouldUseFailureSignalPayload
|
||||
? (pickSummaryFromOutput(failureSignal.message) ?? summary)
|
||||
: summary,
|
||||
outputText: shouldUseFailureSignalPayload ? failureSignal.message : outputText,
|
||||
synthesizedText: shouldUseFailureSignalPayload ? failureSignal.message : synthesizedText,
|
||||
deliveryPayload: failureSignalDeliveryPayload ?? deliveryPayload,
|
||||
deliveryPayloads: failureSignalDeliveryPayload
|
||||
? [failureSignalDeliveryPayload]
|
||||
: resolvedDeliveryPayloads,
|
||||
deliveryPayloadHasStructuredContent: failureSignalDeliveryPayload
|
||||
? false
|
||||
: deliveryPayloadHasStructuredContent,
|
||||
hasFatalErrorPayload,
|
||||
embeddedRunError: hasFatalStructuredErrorPayload
|
||||
? (lastErrorPayloadText ?? "cron isolated run returned an error payload")
|
||||
: denialSignal
|
||||
? formatCronDenialSignal(denialSignal)
|
||||
: undefined,
|
||||
: failureSignal
|
||||
? formatCronFailureSignal(failureSignal)
|
||||
: denialSignal
|
||||
? formatCronDenialSignal(denialSignal)
|
||||
: undefined,
|
||||
};
|
||||
}
|
||||
|
||||
@@ -359,10 +359,12 @@ export async function executeCronRun(params: {
|
||||
const interimPayloads = runResult.payloads ?? [];
|
||||
const {
|
||||
deliveryPayloadHasStructuredContent: interimPayloadHasStructuredContent,
|
||||
hasFatalErrorPayload: interimHasFatalErrorPayload,
|
||||
outputText: interimOutputText,
|
||||
} = resolveCronPayloadOutcome({
|
||||
payloads: interimPayloads,
|
||||
runLevelError: runResult.meta?.error,
|
||||
failureSignal: runResult.meta?.failureSignal,
|
||||
finalAssistantVisibleText: runResult.meta?.finalAssistantVisibleText,
|
||||
preferFinalAssistantVisibleText: (
|
||||
await resolveCronChannelOutputPolicy(params.resolvedDelivery.channel)
|
||||
@@ -371,6 +373,7 @@ export async function executeCronRun(params: {
|
||||
const interimText = interimOutputText?.trim() ?? "";
|
||||
const shouldRetryInterimAck =
|
||||
!runResult.meta?.error &&
|
||||
!interimHasFatalErrorPayload &&
|
||||
!runResult.didSendViaMessagingTool &&
|
||||
!interimPayloadHasStructuredContent &&
|
||||
!interimPayloads.some((payload) => payload?.isError === true) &&
|
||||
|
||||
@@ -5,10 +5,13 @@ import {
|
||||
} from "./run.suite-helpers.js";
|
||||
import {
|
||||
countActiveDescendantRunsMock,
|
||||
dispatchCronDeliveryMock,
|
||||
isHeartbeatOnlyResponseMock,
|
||||
listDescendantRunsForRequesterMock,
|
||||
loadRunCronIsolatedAgentTurn,
|
||||
mockRunCronFallbackPassthrough,
|
||||
pickLastNonEmptyTextFromPayloadsMock,
|
||||
resolveCronDeliveryPlanMock,
|
||||
runEmbeddedPiAgentMock,
|
||||
runWithModelFallbackMock,
|
||||
} from "./run.test-harness.js";
|
||||
@@ -74,6 +77,69 @@ describe("runCronIsolatedAgentTurn — interim ack retry", () => {
|
||||
await runTurnAndExpectOk(1, 1);
|
||||
});
|
||||
|
||||
it("does not retry over a fatal structured failure signal", async () => {
|
||||
usePayloadTextExtraction();
|
||||
runEmbeddedPiAgentMock.mockResolvedValueOnce({
|
||||
payloads: [{ text: "On it, retrying now." }],
|
||||
meta: {
|
||||
agentMeta: { usage: { input: 10, output: 20 } },
|
||||
failureSignal: {
|
||||
kind: "execution_denied",
|
||||
source: "tool",
|
||||
toolName: "exec",
|
||||
code: "SYSTEM_RUN_DENIED",
|
||||
message: "SYSTEM_RUN_DENIED: approval required",
|
||||
fatalForCron: true,
|
||||
},
|
||||
},
|
||||
});
|
||||
|
||||
mockRunCronFallbackPassthrough();
|
||||
const result = await runCronIsolatedAgentTurn(makeIsolatedAgentTurnParams());
|
||||
|
||||
expect(result.status).toBe("error");
|
||||
expect(result.error).toBe("SYSTEM_RUN_DENIED: approval required");
|
||||
expect(runWithModelFallbackMock).toHaveBeenCalledTimes(1);
|
||||
expect(runEmbeddedPiAgentMock).toHaveBeenCalledTimes(1);
|
||||
});
|
||||
|
||||
it("delivers synthesized fatal failure signals even when the original payloads are empty", async () => {
|
||||
usePayloadTextExtraction();
|
||||
resolveCronDeliveryPlanMock.mockReturnValue({
|
||||
requested: true,
|
||||
mode: "announce",
|
||||
channel: "messagechat",
|
||||
to: "123",
|
||||
});
|
||||
isHeartbeatOnlyResponseMock.mockReturnValue(true);
|
||||
runEmbeddedPiAgentMock.mockResolvedValueOnce({
|
||||
payloads: [],
|
||||
meta: {
|
||||
agentMeta: { usage: { input: 10, output: 20 } },
|
||||
failureSignal: {
|
||||
kind: "execution_denied",
|
||||
source: "tool",
|
||||
toolName: "exec",
|
||||
code: "SYSTEM_RUN_DENIED",
|
||||
message: "SYSTEM_RUN_DENIED: approval required",
|
||||
fatalForCron: true,
|
||||
},
|
||||
},
|
||||
});
|
||||
|
||||
mockRunCronFallbackPassthrough();
|
||||
const result = await runCronIsolatedAgentTurn(makeIsolatedAgentTurnParams());
|
||||
|
||||
expect(result.status).toBe("error");
|
||||
expect(result.error).toBe("SYSTEM_RUN_DENIED: approval required");
|
||||
expect(dispatchCronDeliveryMock).toHaveBeenCalledWith(
|
||||
expect.objectContaining({
|
||||
skipHeartbeatDelivery: false,
|
||||
deliveryPayloads: [{ text: "SYSTEM_RUN_DENIED: approval required", isError: true }],
|
||||
}),
|
||||
);
|
||||
});
|
||||
|
||||
it("does not retry when descendants were spawned in this run even if they already settled", async () => {
|
||||
usePayloadTextExtraction();
|
||||
runEmbeddedPiAgentMock.mockResolvedValueOnce({
|
||||
|
||||
@@ -362,21 +362,40 @@ function resetRunOutcomeMocks(): void {
|
||||
pickLastNonEmptyTextFromPayloadsMock.mockReturnValue("test output");
|
||||
resolveCronPayloadOutcomeMock.mockReset();
|
||||
resolveCronPayloadOutcomeMock.mockImplementation(
|
||||
({ payloads }: { payloads: Array<{ isError?: boolean }> }) => {
|
||||
const outputText = pickLastNonEmptyTextFromPayloadsMock(payloads);
|
||||
({
|
||||
payloads,
|
||||
failureSignal,
|
||||
}: {
|
||||
payloads: Array<{ isError?: boolean }>;
|
||||
failureSignal?: { fatalForCron?: boolean; message?: string };
|
||||
}) => {
|
||||
const failureMessage =
|
||||
failureSignal?.fatalForCron === true
|
||||
? (failureSignal.message ?? "cron isolated run returned a fatal failure signal")
|
||||
: undefined;
|
||||
const outputText = failureMessage ?? pickLastNonEmptyTextFromPayloadsMock(payloads);
|
||||
const synthesizedText = outputText?.trim() || "summary";
|
||||
const hasFatalErrorPayload = payloads.some((payload) => payload?.isError === true);
|
||||
const hasFatalErrorPayload =
|
||||
payloads.some((payload) => payload?.isError === true) || failureMessage !== undefined;
|
||||
const deliveryPayload = failureMessage ? { text: failureMessage, isError: true } : undefined;
|
||||
return {
|
||||
summary: "summary",
|
||||
summary: failureMessage ?? "summary",
|
||||
outputText,
|
||||
synthesizedText,
|
||||
deliveryPayload: undefined,
|
||||
deliveryPayloads: synthesizedText ? [{ text: synthesizedText }] : [],
|
||||
deliveryPayload,
|
||||
deliveryPayloads: deliveryPayload
|
||||
? [deliveryPayload]
|
||||
: synthesizedText
|
||||
? [{ text: synthesizedText }]
|
||||
: [],
|
||||
deliveryPayloadHasStructuredContent: false,
|
||||
hasFatalErrorPayload,
|
||||
embeddedRunError: hasFatalErrorPayload
|
||||
? "cron isolated run returned an error payload"
|
||||
: undefined,
|
||||
embeddedRunError:
|
||||
failureMessage !== undefined
|
||||
? failureMessage
|
||||
: hasFatalErrorPayload
|
||||
? "cron isolated run returned an error payload"
|
||||
: undefined,
|
||||
};
|
||||
},
|
||||
);
|
||||
|
||||
@@ -839,6 +839,7 @@ async function finalizeCronRun(params: {
|
||||
} = resolveCronPayloadOutcome({
|
||||
payloads,
|
||||
runLevelError: finalRunResult.meta?.error,
|
||||
failureSignal: finalRunResult.meta?.failureSignal,
|
||||
finalAssistantVisibleText: finalRunResult.meta?.finalAssistantVisibleText,
|
||||
preferFinalAssistantVisibleText: (
|
||||
await resolveCronChannelOutputPolicy(prepared.resolvedDelivery.channel)
|
||||
@@ -864,7 +865,8 @@ async function finalizeCronRun(params: {
|
||||
|
||||
const skipHeartbeatDelivery =
|
||||
prepared.deliveryRequested &&
|
||||
isHeartbeatOnlyResponse(payloads, resolveHeartbeatAckMaxChars(prepared.agentCfg));
|
||||
!hasFatalErrorPayload &&
|
||||
isHeartbeatOnlyResponse(deliveryPayloads, resolveHeartbeatAckMaxChars(prepared.agentCfg));
|
||||
const {
|
||||
dispatchCronDelivery,
|
||||
matchesMessagingToolDeliveryTarget,
|
||||
|
||||
Reference in New Issue
Block a user