mirror of
https://fastgit.cc/github.com/openclaw/openclaw
synced 2026-05-01 06:36:23 +08:00
fix(gateway): reuse paired auth for probes
This commit is contained in:
@@ -26,6 +26,7 @@ Docs: https://docs.openclaw.ai
|
||||
- Providers/Bedrock: omit deprecated `temperature` for Claude Opus 4.7 Bedrock model ids, named and application inference profiles, including dotted `opus-4.7` refs, and classify the nested validation response for failover. Fixes #73663. Thanks @bstanbury.
|
||||
- Gateway: raise the preauth/connect-challenge timeout to 15s so cold CLI starts on slower hosts have more time to process the WebSocket challenge before the Gateway closes the connection. Fixes #51469; refs #73592 and #62060. Thanks @GothicFox and @jackychen-png.
|
||||
- CLI/status: fall back to a bounded local `status` RPC when loopback detail probes time out or report unknown capability, so reachable local gateways are no longer marked unreachable by slow read diagnostics. Fixes #73535; refs #48360, #62762, #51357, and #42019. Thanks @RacecarGuy, @justinschille, @DJBlackhawk, @tianyaqpzm, and @0xrsydn.
|
||||
- CLI/gateway: reuse cached paired-device auth during `gateway probe` and report post-connect diagnostic failures as degraded reachability, so healthy local gateways are no longer marked unreachable after loopback auth or read timeouts. Fixes #48360. Thanks @RacecarGuy.
|
||||
- Channels/Discord: give Discord Gateway WebSocket handshakes a 30s timeout so stalled TLS/network transitions emit an error and Carbon can continue its reconnect loop instead of leaving the bot silent until restart. Refs #50046. Thanks @codexGW.
|
||||
- NVIDIA/NIM: persist the `NVIDIA_API_KEY` provider marker and mark bundled NVIDIA Chat Completions models as string-content compatible, so NIM models load from `models.json` and OpenAI-compatible subagent calls send plain text content. Fixes #73013 and #50107; refs #73014. Thanks @bautrey, @iot2edge, @ifearghal, and @futhgar.
|
||||
- Channels/Discord: let text-only configs drop the `GuildVoiceStates` gateway intent and expose a bounded `/gateway/bot` metadata timeout with rate-limited fallback logs, reducing idle CPU and warning floods. Fixes #73709 and #73585. Thanks @sanchezm86 and @trac3r00.
|
||||
|
||||
@@ -323,6 +323,7 @@ openclaw gateway probe --json
|
||||
- `Capability: read-only|write-capable|admin-capable|pairing-pending|connect-only` reports what the probe could prove about auth. It is separate from reachability.
|
||||
- `Read probe: ok` means read-scope detail RPC calls (`health`/`status`/`system-presence`/`config.get`) also succeeded.
|
||||
- `Read probe: limited - missing scope: operator.read` means connect succeeded but read-scope RPC is limited. This is reported as **degraded** reachability, not full failure.
|
||||
- `Read probe: failed` after `Connect: ok` means the Gateway accepted the WebSocket connection, but follow-up read diagnostics timed out or failed. This is also **degraded** reachability, not an unreachable Gateway.
|
||||
- Like `gateway status`, probe reuses existing cached device auth but does not create first-time device identity or pairing state.
|
||||
- Exit code is non-zero only when no probed target is reachable.
|
||||
|
||||
@@ -331,7 +332,7 @@ openclaw gateway probe --json
|
||||
Top level:
|
||||
|
||||
- `ok`: at least one target is reachable.
|
||||
- `degraded`: at least one target had scope-limited detail RPC.
|
||||
- `degraded`: at least one target accepted a connection but did not complete full detail RPC diagnostics.
|
||||
- `capability`: best capability seen across reachable targets (`read_only`, `write_capable`, `admin_capable`, `pairing_pending`, `connected_no_operator_scope`, or `unknown`).
|
||||
- `primaryTargetId`: best target to treat as the active winner in this order: explicit URL, SSH tunnel, configured remote, then local loopback.
|
||||
- `warnings[]`: best-effort warning records with `code`, `message`, and optional `targetIds`.
|
||||
|
||||
@@ -380,6 +380,7 @@ Common signatures:
|
||||
- `SSH tunnel failed to start; falling back to direct probes.` → SSH setup failed, but the command still tried direct configured/loopback targets.
|
||||
- `multiple reachable gateways detected` → more than one target answered. Usually this means an intentional multi-gateway setup or stale/duplicate listeners.
|
||||
- `Read-probe diagnostics are limited by gateway scopes (missing operator.read)` → connect worked, but detail RPC is scope-limited; pair device identity or use credentials with `operator.read`.
|
||||
- `Gateway accepted the WebSocket connection, but follow-up read diagnostics failed` → connect worked, but the full diagnostic RPC set timed out or failed. Treat this as a reachable Gateway with degraded diagnostics; compare `connect.ok` and `connect.rpcOk` in `--json` output.
|
||||
- `Capability: pairing-pending` or `gateway closed (1008): pairing required` → the gateway answered, but this client still needs pairing/approval before normal operator access.
|
||||
- unresolved `gateway.auth.*` / `gateway.remote.*` SecretRef warning text → auth material was unavailable in this command path for the failed target.
|
||||
|
||||
|
||||
@@ -4,6 +4,7 @@ import {
|
||||
buildNetworkHints,
|
||||
extractConfigSummary,
|
||||
isProbeReachable,
|
||||
isPostConnectProbeFailure,
|
||||
isScopeLimitedProbeFailure,
|
||||
renderProbeSummaryLine,
|
||||
resolveAuthForTarget,
|
||||
@@ -250,7 +251,7 @@ describe("probe reachability classification", () => {
|
||||
expect(renderProbeSummaryLine(probe, false)).toContain("Read probe: limited");
|
||||
});
|
||||
|
||||
it("keeps non-scope RPC failures as unreachable", () => {
|
||||
it("treats post-connect read failures as reachable with failed diagnostics", () => {
|
||||
const probe = {
|
||||
ok: false,
|
||||
url: "ws://127.0.0.1:18789",
|
||||
@@ -269,10 +270,33 @@ describe("probe reachability classification", () => {
|
||||
};
|
||||
|
||||
expect(isScopeLimitedProbeFailure(probe)).toBe(false);
|
||||
expect(isProbeReachable(probe)).toBe(false);
|
||||
expect(isPostConnectProbeFailure(probe)).toBe(true);
|
||||
expect(isProbeReachable(probe)).toBe(true);
|
||||
expect(renderProbeSummaryLine(probe, false)).toContain("Capability: connect-only");
|
||||
expect(renderProbeSummaryLine(probe, false)).toContain("Read probe: failed");
|
||||
});
|
||||
|
||||
it("keeps failed-before-connect probes unreachable", () => {
|
||||
const probe = {
|
||||
ok: false,
|
||||
url: "ws://127.0.0.1:18789",
|
||||
connectLatencyMs: null,
|
||||
error: "timeout",
|
||||
close: null,
|
||||
auth: {
|
||||
role: null,
|
||||
scopes: [],
|
||||
capability: "unknown" as const,
|
||||
},
|
||||
health: null,
|
||||
status: null,
|
||||
presence: null,
|
||||
configSnapshot: null,
|
||||
};
|
||||
|
||||
expect(isPostConnectProbeFailure(probe)).toBe(false);
|
||||
expect(isProbeReachable(probe)).toBe(false);
|
||||
});
|
||||
});
|
||||
describe("gateway-status local target scheme", () => {
|
||||
it("uses wss for local loopback targets and network hints when gateway TLS is enabled", () => {
|
||||
|
||||
@@ -276,8 +276,12 @@ export function isScopeLimitedProbeFailure(probe: GatewayProbeResult): boolean {
|
||||
return MISSING_SCOPE_PATTERN.test(probe.error ?? "");
|
||||
}
|
||||
|
||||
export function isPostConnectProbeFailure(probe: GatewayProbeResult): boolean {
|
||||
return !probe.ok && probe.connectLatencyMs != null;
|
||||
}
|
||||
|
||||
export function isProbeReachable(probe: GatewayProbeResult): boolean {
|
||||
return probe.ok || isScopeLimitedProbeFailure(probe);
|
||||
return probe.ok || probe.connectLatencyMs != null;
|
||||
}
|
||||
|
||||
function getGatewayProbeCapability(probe: GatewayProbeResult): GatewayProbeCapability {
|
||||
|
||||
@@ -95,11 +95,11 @@ describe("gateway status output", () => {
|
||||
discovery: [],
|
||||
probed: [
|
||||
createTarget(
|
||||
"unreachable-admin",
|
||||
"unreachable-before-connect",
|
||||
createProbe("admin_capable", {
|
||||
ok: false,
|
||||
connectLatencyMs: 40,
|
||||
error: "unknown method: status",
|
||||
connectLatencyMs: null,
|
||||
error: "timeout",
|
||||
}),
|
||||
),
|
||||
createTarget(
|
||||
@@ -132,11 +132,11 @@ describe("gateway status output", () => {
|
||||
discovery: [],
|
||||
probed: [
|
||||
createTarget(
|
||||
"unreachable-admin",
|
||||
"unreachable-before-connect",
|
||||
createProbe("admin_capable", {
|
||||
ok: false,
|
||||
connectLatencyMs: 40,
|
||||
error: "unknown method: status",
|
||||
connectLatencyMs: null,
|
||||
error: "timeout",
|
||||
}),
|
||||
),
|
||||
createTarget(
|
||||
@@ -153,4 +153,57 @@ describe("gateway status output", () => {
|
||||
|
||||
expect(runtime.log).toHaveBeenCalledWith("Capability: read-only");
|
||||
});
|
||||
|
||||
it("reports post-connect detail failures as reachable but degraded in json output", () => {
|
||||
const runtime = createRuntimeCapture();
|
||||
writeGatewayStatusJson({
|
||||
runtime,
|
||||
startedAt: Date.now() - 50,
|
||||
overallTimeoutMs: 5_000,
|
||||
discoveryTimeoutMs: 500,
|
||||
network: {
|
||||
localLoopbackUrl: "ws://127.0.0.1:18789",
|
||||
localTailnetUrl: null,
|
||||
tailnetIPv4: null,
|
||||
},
|
||||
discovery: [],
|
||||
probed: [
|
||||
createTarget(
|
||||
"detail-timeout",
|
||||
createProbe("read_only", {
|
||||
ok: false,
|
||||
connectLatencyMs: 40,
|
||||
error: "timeout",
|
||||
}),
|
||||
),
|
||||
],
|
||||
warnings: [
|
||||
{
|
||||
code: "probe_detail_failed",
|
||||
message:
|
||||
"Gateway accepted the WebSocket connection, but follow-up read diagnostics failed: timeout",
|
||||
targetIds: ["detail-timeout"],
|
||||
},
|
||||
],
|
||||
primaryTargetId: "detail-timeout",
|
||||
});
|
||||
|
||||
expect(writeRuntimeJson).toHaveBeenCalledWith(
|
||||
runtime,
|
||||
expect.objectContaining({
|
||||
ok: true,
|
||||
degraded: true,
|
||||
primaryTargetId: "detail-timeout",
|
||||
targets: [
|
||||
expect.objectContaining({
|
||||
connect: expect.objectContaining({
|
||||
ok: true,
|
||||
rpcOk: false,
|
||||
error: "timeout",
|
||||
}),
|
||||
}),
|
||||
],
|
||||
}),
|
||||
);
|
||||
});
|
||||
});
|
||||
|
||||
@@ -4,6 +4,7 @@ import { colorize, theme } from "../../terminal/theme.js";
|
||||
import { serializeGatewayDiscoveryBeacon } from "./discovery.js";
|
||||
import {
|
||||
isProbeReachable,
|
||||
isPostConnectProbeFailure,
|
||||
isScopeLimitedProbeFailure,
|
||||
summarizeGatewayProbeCapability,
|
||||
renderProbeSummaryLine,
|
||||
@@ -39,6 +40,9 @@ export function buildGatewayStatusWarnings(params: {
|
||||
const degradedScopeLimited = params.probed.filter((entry) =>
|
||||
isScopeLimitedProbeFailure(entry.probe),
|
||||
);
|
||||
const degradedDetailFailed = params.probed.filter(
|
||||
(entry) => isPostConnectProbeFailure(entry.probe) && !isScopeLimitedProbeFailure(entry.probe),
|
||||
);
|
||||
const warnings: GatewayStatusWarning[] = [];
|
||||
if (params.sshTarget && !params.sshTunnelStarted) {
|
||||
warnings.push({
|
||||
@@ -83,6 +87,14 @@ export function buildGatewayStatusWarnings(params: {
|
||||
targetIds: [result.target.id],
|
||||
});
|
||||
}
|
||||
for (const result of degradedDetailFailed) {
|
||||
const detail = result.probe.error ? `: ${result.probe.error}` : ".";
|
||||
warnings.push({
|
||||
code: "probe_detail_failed",
|
||||
message: `Gateway accepted the WebSocket connection, but follow-up read diagnostics failed${detail}`,
|
||||
targetIds: [result.target.id],
|
||||
});
|
||||
}
|
||||
return warnings;
|
||||
}
|
||||
|
||||
@@ -98,7 +110,7 @@ export function writeGatewayStatusJson(params: {
|
||||
primaryTargetId: string | null;
|
||||
}) {
|
||||
const reachable = params.probed.filter((entry) => isProbeReachable(entry.probe));
|
||||
const degraded = params.probed.some((entry) => isScopeLimitedProbeFailure(entry.probe));
|
||||
const degraded = params.probed.some((entry) => isPostConnectProbeFailure(entry.probe));
|
||||
const capability = summarizeGatewayProbeCapability(reachable.map((entry) => entry.probe));
|
||||
writeRuntimeJson(params.runtime, {
|
||||
ok: reachable.length > 0,
|
||||
|
||||
@@ -203,7 +203,18 @@ describe("probeGateway", () => {
|
||||
expect(gatewayClientState.options?.scopes).toEqual(["operator.read"]);
|
||||
});
|
||||
|
||||
it("keeps device identity disabled for unauthenticated loopback probes", async () => {
|
||||
it("reuses cached device identity for unauthenticated loopback probes", async () => {
|
||||
await probeGateway({
|
||||
url: "ws://127.0.0.1:18789",
|
||||
timeoutMs: 1_000,
|
||||
});
|
||||
|
||||
expect(gatewayClientState.options?.deviceIdentity).toEqual(deviceIdentityState.value);
|
||||
});
|
||||
|
||||
it("keeps device identity disabled for first-time unauthenticated loopback probes", async () => {
|
||||
deviceIdentityState.cachedToken = null;
|
||||
|
||||
await probeGateway({
|
||||
url: "ws://127.0.0.1:18789",
|
||||
timeoutMs: 1_000,
|
||||
@@ -220,7 +231,7 @@ describe("probeGateway", () => {
|
||||
});
|
||||
|
||||
expect(result.ok).toBe(true);
|
||||
expect(gatewayClientState.options?.deviceIdentity).toBeNull();
|
||||
expect(gatewayClientState.options?.deviceIdentity).toEqual(deviceIdentityState.value);
|
||||
expect(gatewayClientState.requests).toEqual([]);
|
||||
});
|
||||
|
||||
|
||||
@@ -5,7 +5,6 @@ import type { SystemPresence } from "../infra/system-presence.js";
|
||||
import { MAX_SAFE_TIMEOUT_DELAY_MS, resolveSafeTimeoutDelayMs } from "../utils/timer-delay.js";
|
||||
import { GatewayClient, GatewayClientRequestError } from "./client.js";
|
||||
import { READ_SCOPE } from "./method-scopes.js";
|
||||
import { isLoopbackHost } from "./net.js";
|
||||
import { GATEWAY_CLIENT_MODES, GATEWAY_CLIENT_NAMES } from "./protocol/client-info.js";
|
||||
|
||||
export type GatewayProbeAuth = {
|
||||
@@ -162,24 +161,18 @@ export async function probeGateway(opts: {
|
||||
const detailLevel = opts.includeDetails === false ? "none" : (opts.detailLevel ?? "full");
|
||||
|
||||
const deviceIdentity = await (async () => {
|
||||
let hostname: string;
|
||||
try {
|
||||
hostname = new URL(opts.url).hostname;
|
||||
} catch {
|
||||
return null;
|
||||
}
|
||||
// Keep probes non-mutating: only attach a device identity when this CLI
|
||||
// already has a cached operator device token. Fresh diagnostics should not
|
||||
// create a read-only pairing baseline that later blocks admin commands.
|
||||
if (isLoopbackHost(hostname) && !(opts.auth?.token || opts.auth?.password)) {
|
||||
return null;
|
||||
}
|
||||
try {
|
||||
if (!URL.canParse(opts.url)) {
|
||||
return null;
|
||||
}
|
||||
const { loadDeviceIdentityIfPresent } = await import("../infra/device-identity.js");
|
||||
const identity = loadDeviceIdentityIfPresent();
|
||||
if (!identity) {
|
||||
return null;
|
||||
}
|
||||
// Keep probes non-mutating: only attach a device identity when this CLI
|
||||
// already has a cached operator device token. Fresh diagnostics should not
|
||||
// create a read-only pairing baseline that later blocks admin commands.
|
||||
const cachedOperatorToken = loadDeviceAuthToken({
|
||||
deviceId: identity.deviceId,
|
||||
role: "operator",
|
||||
|
||||
Reference in New Issue
Block a user