fix(bedrock): omit Opus temperature for profiles

This commit is contained in:
Peter Steinberger
2026-04-28 20:36:05 +01:00
parent 1f26e32f5f
commit 771846c5fa
2 changed files with 252 additions and 24 deletions

View File

@@ -296,6 +296,105 @@ describe("amazon-bedrock provider plugin", () => {
}); });
}); });
it("omits temperature for Bedrock Opus 4.7 model ids", async () => {
const provider = await registerSingleProviderPlugin(amazonBedrockPlugin);
const wrapped = provider.wrapStreamFn?.({
provider: "amazon-bedrock",
modelId: "us.anthropic.claude-opus-4-7",
streamFn: spyStreamFn,
} as never);
expect(
wrapped?.(
{
api: "bedrock-converse-stream",
provider: "amazon-bedrock",
id: "us.anthropic.claude-opus-4-7",
} as never,
{ messages: [] } as never,
{ temperature: 0.2, maxTokens: 10 },
),
).toEqual({ maxTokens: 10 });
});
it("omits temperature for dotted Bedrock Opus 4.7 model ids", async () => {
const provider = await registerSingleProviderPlugin(amazonBedrockPlugin);
const wrapped = provider.wrapStreamFn?.({
provider: "amazon-bedrock",
modelId: "us.anthropic.claude-opus-4.7-v1:0",
streamFn: spyStreamFn,
} as never);
expect(
wrapped?.(
{
api: "bedrock-converse-stream",
provider: "amazon-bedrock",
id: "us.anthropic.claude-opus-4.7-v1:0",
} as never,
{ messages: [] } as never,
{ temperature: 0.2, maxTokens: 10 },
),
).toEqual({ maxTokens: 10 });
});
it("omits temperature for named Bedrock Opus 4.7 inference profile ARNs", async () => {
const provider = await registerSingleProviderPlugin(amazonBedrockPlugin);
const modelId =
"arn:aws:bedrock:us-west-2:123456789012:inference-profile/us.anthropic.claude-opus-4-7";
const wrapped = provider.wrapStreamFn?.({
provider: "amazon-bedrock",
modelId,
streamFn: spyStreamFn,
} as never);
expect(
wrapped?.(
{
api: "bedrock-converse-stream",
provider: "amazon-bedrock",
id: modelId,
} as never,
{ messages: [] } as never,
{ temperature: 0, region: "us-west-2" } as never,
),
).toEqual({ region: "us-west-2" });
});
it("omits temperature for non-US Bedrock Opus 4.7 regional profiles", async () => {
const provider = await registerSingleProviderPlugin(amazonBedrockPlugin);
const wrapped = provider.wrapStreamFn?.({
provider: "amazon-bedrock",
modelId: "eu.anthropic.claude-opus-4-7",
streamFn: spyStreamFn,
} as never);
expect(
wrapped?.(
{
api: "bedrock-converse-stream",
provider: "amazon-bedrock",
id: "eu.anthropic.claude-opus-4-7",
} as never,
{ messages: [] } as never,
{ temperature: 0.4, maxTokens: 12 },
),
).toEqual({ maxTokens: 12 });
});
it("classifies nested Bedrock deprecated-temperature validation as format failover", async () => {
const provider = await registerSingleProviderPlugin(amazonBedrockPlugin);
expect(
provider.classifyFailoverReason?.({
provider: "amazon-bedrock",
modelId: "us.anthropic.claude-opus-4-7",
errorMessage:
'ValidationException: The model returned the following errors: {"type":"error","error":{"type":"invalid_request_error","message":"`temperature` is deprecated for this model."}}',
} as never),
).toBe("format");
});
describe("guardrail config schema", () => { describe("guardrail config schema", () => {
it("defines discovery and guardrail objects with the expected shape", () => { it("defines discovery and guardrail objects with the expected shape", () => {
const pluginJson = JSON.parse( const pluginJson = JSON.parse(
@@ -747,6 +846,66 @@ describe("amazon-bedrock provider plugin", () => {
expect(bedrockClientConfigs).toEqual([{ region: "us-east-1" }]); expect(bedrockClientConfigs).toEqual([{ region: "us-east-1" }]);
}); });
it("omits temperature for opaque application inference profile ARNs that resolve to Opus 4.7", async () => {
const modelId =
"arn:aws:bedrock:us-west-2:123456789012:application-inference-profile/z27qyso459dd";
inferenceProfileGetResults.push({
models: [
{
modelArn: "arn:aws:bedrock:us-west-2::foundation-model/anthropic.claude-opus-4.7-v1:0",
},
],
});
const provider = await registerWithConfig(undefined);
const payload: Record<string, unknown> = {
inferenceConfig: { temperature: 0.3, maxTokens: 10 },
system: [{ text: "You are helpful." }],
messages: [{ role: "user", content: [{ text: "Hello" }] }],
};
await callWrappedStreamWithPayload(
provider,
modelId,
makeAppInferenceProfileDescriptor(modelId),
{ temperature: 0.3, maxTokens: 10, cacheRetention: "none" },
payload,
);
expect(payload.inferenceConfig).toEqual({ maxTokens: 10 });
expect(sendBedrockCommand).toHaveBeenCalledTimes(1);
expect(bedrockClientConfigs).toEqual([{ region: "us-west-2" }]);
});
it("omits temperature for Claude-named application inference profile ARNs that resolve to Opus 4.7", async () => {
inferenceProfileGetResults.push({
models: [
{
modelArn: "arn:aws:bedrock:us-east-1::foundation-model/anthropic.claude-opus-4-7-v1:0",
},
],
});
const provider = await registerWithConfig(undefined);
const payload: Record<string, unknown> = {
inferenceConfig: { temperature: 0.3, maxTokens: 10 },
system: [{ text: "You are helpful." }],
messages: [{ role: "user", content: [{ text: "Hello" }] }],
};
await callWrappedStreamWithPayload(
provider,
APP_INFERENCE_PROFILE_ARN,
APP_INFERENCE_PROFILE_DESCRIPTOR,
{ temperature: 0.3, maxTokens: 10, cacheRetention: "short" },
payload,
);
const system = payload.system as Array<Record<string, unknown>>;
expect(payload.inferenceConfig).toEqual({ maxTokens: 10 });
expect(system[1]).toEqual({ cachePoint: { type: "default" } });
expect(sendBedrockCommand).toHaveBeenCalledTimes(1);
expect(bedrockClientConfigs).toEqual([{ region: "us-east-1" }]);
});
it("does not inject cache points when any resolved profile target is not cacheable", async () => { it("does not inject cache points when any resolved profile target is not cacheable", async () => {
const modelId = const modelId =
"arn:aws:bedrock:us-east-1:123456789012:application-inference-profile/z27qyso459db"; "arn:aws:bedrock:us-east-1:123456789012:application-inference-profile/z27qyso459db";

View File

@@ -144,15 +144,27 @@ function resolvedModelSupportsCaching(modelArn: string): boolean {
return matchesPiAiPromptCachingModelId(modelArn); return matchesPiAiPromptCachingModelId(modelArn);
} }
function isOpus47BedrockModelRef(modelRef: string): boolean {
return /(?:^|[/.:])(?:(?:us|eu|ap|apac|au|jp|global)\.)?anthropic\.claude-opus-4[.-]7(?:$|[-.:/])/i.test(
modelRef,
);
}
/** /**
* Resolve the underlying foundation model for an application inference profile * Resolve the underlying foundation model for an application inference profile
* via GetInferenceProfile. Results are cached so we only call the API once per * via GetInferenceProfile. Results are cached so we only call the API once per
* profile ARN. Returns true if the underlying model supports prompt caching. * profile ARN. Returns traits needed for request shaping when the model id is
* otherwise opaque.
* *
* Region is extracted from the profile ARN itself to avoid mismatches when * Region is extracted from the profile ARN itself to avoid mismatches when
* the OpenClaw config region differs from the profile's home region. * the OpenClaw config region differs from the profile's home region.
*/ */
const appProfileCacheEligibleCache = new Map<string, boolean>(); type BedrockAppProfileTraits = {
cacheEligible: boolean;
omitTemperature: boolean;
};
const appProfileTraitsCache = new Map<string, BedrockAppProfileTraits>();
type BedrockGetInferenceProfileResponse = { type BedrockGetInferenceProfileResponse = {
models?: Array<{ modelArn?: string }>; models?: Array<{ modelArn?: string }>;
@@ -169,7 +181,7 @@ type BedrockControlPlaneFactory = (region: string | undefined) => BedrockControl
let bedrockControlPlaneOverride: BedrockControlPlaneFactory | undefined; let bedrockControlPlaneOverride: BedrockControlPlaneFactory | undefined;
export function resetBedrockAppProfileCacheEligibilityForTest(): void { export function resetBedrockAppProfileCacheEligibilityForTest(): void {
appProfileCacheEligibleCache.clear(); appProfileTraitsCache.clear();
} }
export function setBedrockAppProfileControlPlaneForTest( export function setBedrockAppProfileControlPlaneForTest(
@@ -190,27 +202,34 @@ async function createBedrockControlPlane(region: string | undefined): Promise<Be
}; };
} }
async function resolveAppProfileCacheEligible( async function resolveAppProfileTraits(
modelId: string, modelId: string,
fallbackRegion: string | undefined, fallbackRegion: string | undefined,
): Promise<boolean> { ): Promise<BedrockAppProfileTraits> {
if (appProfileCacheEligibleCache.has(modelId)) { const cached = appProfileTraitsCache.get(modelId);
return appProfileCacheEligibleCache.get(modelId)!; if (cached) {
return cached;
} }
try { try {
const region = extractRegionFromArn(modelId) ?? fallbackRegion; const region = extractRegionFromArn(modelId) ?? fallbackRegion;
const controlPlane = await createBedrockControlPlane(region); const controlPlane = await createBedrockControlPlane(region);
const resp = await controlPlane.getInferenceProfile({ inferenceProfileIdentifier: modelId }); const resp = await controlPlane.getInferenceProfile({ inferenceProfileIdentifier: modelId });
const models = resp.models ?? []; const models = resp.models ?? [];
const eligible = const modelArns = models.map((m: { modelArn?: string }) => m.modelArn ?? "");
models.length > 0 && const traits = {
models.every((m: { modelArn?: string }) => resolvedModelSupportsCaching(m.modelArn ?? "")); cacheEligible:
appProfileCacheEligibleCache.set(modelId, eligible); models.length > 0 && modelArns.every((modelArn) => resolvedModelSupportsCaching(modelArn)),
return eligible; omitTemperature: modelArns.some(isOpus47BedrockModelRef),
};
appProfileTraitsCache.set(modelId, traits);
return traits;
} catch { } catch {
// Transient failures (throttling, network, IAM) should not be cached — // Transient failures (throttling, network, IAM) should not be cached —
// return the heuristic fallback but allow retry on the next request. // return the heuristic fallback but allow retry on the next request.
return isAnthropicBedrockModel(modelId); return {
cacheEligible: isAnthropicBedrockModel(modelId),
omitTemperature: isOpus47BedrockModelRef(modelId),
};
} }
} }
@@ -279,6 +298,8 @@ export function registerAmazonBedrockPlugin(api: OpenClawPluginApi): void {
/ValidationException.*(?:exceeds? the (?:maximum|max) (?:number of )?(?:input )?tokens)/i, /ValidationException.*(?:exceeds? the (?:maximum|max) (?:number of )?(?:input )?tokens)/i,
/ModelStreamErrorException.*(?:Input is too long|too many input tokens)/i, /ModelStreamErrorException.*(?:Input is too long|too many input tokens)/i,
] as const; ] as const;
const deprecatedTemperatureValidationRe =
/ValidationException[\s\S]*(?:invalid_request_error[\s\S]*)?temperature[\s\S]*deprecated|ValidationException[\s\S]*deprecated[\s\S]*temperature/i;
const anthropicByModelReplayHooks = ANTHROPIC_BY_MODEL_REPLAY_HOOKS; const anthropicByModelReplayHooks = ANTHROPIC_BY_MODEL_REPLAY_HOOKS;
const startupPluginConfig = (api.pluginConfig ?? {}) as AmazonBedrockPluginConfig; const startupPluginConfig = (api.pluginConfig ?? {}) as AmazonBedrockPluginConfig;
@@ -306,6 +327,26 @@ export function registerAmazonBedrockPlugin(api: OpenClawPluginApi): void {
return createBedrockNoCacheWrapper(streamFn); return createBedrockNoCacheWrapper(streamFn);
}; };
function omitDeprecatedOpus47Temperature<TOptions extends object>(
modelId: string,
options: TOptions,
): TOptions {
if (!isOpus47BedrockModelRef(modelId) || !("temperature" in options)) {
return options;
}
const next = { ...options } as typeof options & { temperature?: unknown };
delete next.temperature;
return next;
}
function omitDeprecatedOpus47PayloadTemperature(payload: Record<string, unknown>): void {
const inferenceConfig = payload.inferenceConfig;
if (!inferenceConfig || typeof inferenceConfig !== "object") {
return;
}
delete (inferenceConfig as Record<string, unknown>).temperature;
}
/** Extract the AWS region from a bedrock-runtime baseUrl. */ /** Extract the AWS region from a bedrock-runtime baseUrl. */
function extractRegionFromBaseUrl(baseUrl: string | undefined): string | undefined { function extractRegionFromBaseUrl(baseUrl: string | undefined): string | undefined {
if (!baseUrl) { if (!baseUrl) {
@@ -386,12 +427,13 @@ export function registerAmazonBedrockPlugin(api: OpenClawPluginApi): void {
const region = resolveBedrockRegion(config) ?? extractRegionFromBaseUrl(model?.baseUrl); const region = resolveBedrockRegion(config) ?? extractRegionFromBaseUrl(model?.baseUrl);
const mayNeedCacheInjection = const mayNeedCacheInjection =
isBedrockAppInferenceProfile(modelId) && !piAiWouldInjectCachePoints(modelId); isBedrockAppInferenceProfile(modelId) && !piAiWouldInjectCachePoints(modelId);
const shouldOmitTemperature = isOpus47BedrockModelRef(modelId);
// For known Anthropic models (heuristic match), enable injection immediately. // For known Anthropic models (heuristic match), enable injection immediately.
// For opaque profile IDs, we'll resolve via GetInferenceProfile on first call. // For opaque profile IDs, we'll resolve via GetInferenceProfile on first call.
const heuristicMatch = needsCachePointInjection(modelId); const heuristicMatch = needsCachePointInjection(modelId);
if (!region && !mayNeedCacheInjection) { if (!region && !mayNeedCacheInjection && !shouldOmitTemperature) {
return wrapped; return wrapped;
} }
@@ -400,7 +442,10 @@ export function registerAmazonBedrockPlugin(api: OpenClawPluginApi): void {
return wrapped; return wrapped;
} }
return (streamModel, context, options) => { return (streamModel, context, options) => {
const merged = Object.assign({}, options, region ? { region } : {}); const merged = omitDeprecatedOpus47Temperature(
modelId,
Object.assign({}, options, region ? { region } : {}),
);
if (!mayNeedCacheInjection) { if (!mayNeedCacheInjection) {
return underlying(streamModel, context, merged); return underlying(streamModel, context, merged);
@@ -416,25 +461,46 @@ export function registerAmazonBedrockPlugin(api: OpenClawPluginApi): void {
// want caching enabled, so defaulting to "short" is the safer behavior. // want caching enabled, so defaulting to "short" is the safer behavior.
const cacheRetention = const cacheRetention =
typeof merged.cacheRetention === "string" ? merged.cacheRetention : "short"; typeof merged.cacheRetention === "string" ? merged.cacheRetention : "short";
const originalOnPayload = merged.onPayload as
| ((payload: unknown, model: unknown) => unknown)
| undefined;
if (heuristicMatch) { if (heuristicMatch) {
// Fast path: ARN heuristic already identified this as Claude. // Fast path: ARN heuristic already identified this as Claude, but the
return streamWithPayloadPatch(underlying, streamModel, context, merged, (payload) => { // concrete target may still need profile traits for Opus 4.7 payloads.
injectBedrockCachePoints(payload, cacheRetention); const mayNeedTemperatureTrait = "temperature" in merged;
return underlying(streamModel, context, {
...merged,
onPayload: async (payload: unknown, payloadModel: unknown) => {
if (payload && typeof payload === "object") {
const payloadRecord = payload as Record<string, unknown>;
injectBedrockCachePoints(payloadRecord, cacheRetention);
if (mayNeedTemperatureTrait) {
const traits = await resolveAppProfileTraits(modelId, region);
if (traits.omitTemperature) {
omitDeprecatedOpus47PayloadTemperature(payloadRecord);
}
}
}
return originalOnPayload?.(payload, payloadModel);
},
}); });
} }
// Slow path: opaque profile ID — resolve underlying model via API (cached). // Slow path: opaque profile ID — resolve underlying model via API (cached).
// pi-ai's onPayload supports async, so we await the resolution inline. // pi-ai's onPayload supports async, so we await the resolution inline.
const originalOnPayload = merged.onPayload as
| ((payload: unknown, model: unknown) => unknown)
| undefined;
return underlying(streamModel, context, { return underlying(streamModel, context, {
...merged, ...merged,
onPayload: async (payload: unknown, payloadModel: unknown) => { onPayload: async (payload: unknown, payloadModel: unknown) => {
const eligible = await resolveAppProfileCacheEligible(modelId, region); const traits = await resolveAppProfileTraits(modelId, region);
if (eligible && payload && typeof payload === "object") { if (payload && typeof payload === "object") {
injectBedrockCachePoints(payload as Record<string, unknown>, cacheRetention); const payloadRecord = payload as Record<string, unknown>;
if (traits.cacheEligible) {
injectBedrockCachePoints(payloadRecord, cacheRetention);
}
if (traits.omitTemperature) {
omitDeprecatedOpus47PayloadTemperature(payloadRecord);
}
} }
return originalOnPayload?.(payload, payloadModel); return originalOnPayload?.(payload, payloadModel);
}, },
@@ -450,6 +516,9 @@ export function registerAmazonBedrockPlugin(api: OpenClawPluginApi): void {
if (/ModelNotReadyException/i.test(errorMessage)) { if (/ModelNotReadyException/i.test(errorMessage)) {
return "overloaded"; return "overloaded";
} }
if (deprecatedTemperatureValidationRe.test(errorMessage)) {
return "format";
}
return undefined; return undefined;
}, },
resolveThinkingProfile: ({ modelId }) => ({ resolveThinkingProfile: ({ modelId }) => ({