fix(bedrock): omit Opus temperature for profiles

This commit is contained in:
Peter Steinberger
2026-04-28 20:36:05 +01:00
parent 1f26e32f5f
commit 771846c5fa
2 changed files with 252 additions and 24 deletions

View File

@@ -296,6 +296,105 @@ describe("amazon-bedrock provider plugin", () => {
});
});
it("omits temperature for Bedrock Opus 4.7 model ids", async () => {
const provider = await registerSingleProviderPlugin(amazonBedrockPlugin);
const wrapped = provider.wrapStreamFn?.({
provider: "amazon-bedrock",
modelId: "us.anthropic.claude-opus-4-7",
streamFn: spyStreamFn,
} as never);
expect(
wrapped?.(
{
api: "bedrock-converse-stream",
provider: "amazon-bedrock",
id: "us.anthropic.claude-opus-4-7",
} as never,
{ messages: [] } as never,
{ temperature: 0.2, maxTokens: 10 },
),
).toEqual({ maxTokens: 10 });
});
it("omits temperature for dotted Bedrock Opus 4.7 model ids", async () => {
const provider = await registerSingleProviderPlugin(amazonBedrockPlugin);
const wrapped = provider.wrapStreamFn?.({
provider: "amazon-bedrock",
modelId: "us.anthropic.claude-opus-4.7-v1:0",
streamFn: spyStreamFn,
} as never);
expect(
wrapped?.(
{
api: "bedrock-converse-stream",
provider: "amazon-bedrock",
id: "us.anthropic.claude-opus-4.7-v1:0",
} as never,
{ messages: [] } as never,
{ temperature: 0.2, maxTokens: 10 },
),
).toEqual({ maxTokens: 10 });
});
it("omits temperature for named Bedrock Opus 4.7 inference profile ARNs", async () => {
const provider = await registerSingleProviderPlugin(amazonBedrockPlugin);
const modelId =
"arn:aws:bedrock:us-west-2:123456789012:inference-profile/us.anthropic.claude-opus-4-7";
const wrapped = provider.wrapStreamFn?.({
provider: "amazon-bedrock",
modelId,
streamFn: spyStreamFn,
} as never);
expect(
wrapped?.(
{
api: "bedrock-converse-stream",
provider: "amazon-bedrock",
id: modelId,
} as never,
{ messages: [] } as never,
{ temperature: 0, region: "us-west-2" } as never,
),
).toEqual({ region: "us-west-2" });
});
it("omits temperature for non-US Bedrock Opus 4.7 regional profiles", async () => {
const provider = await registerSingleProviderPlugin(amazonBedrockPlugin);
const wrapped = provider.wrapStreamFn?.({
provider: "amazon-bedrock",
modelId: "eu.anthropic.claude-opus-4-7",
streamFn: spyStreamFn,
} as never);
expect(
wrapped?.(
{
api: "bedrock-converse-stream",
provider: "amazon-bedrock",
id: "eu.anthropic.claude-opus-4-7",
} as never,
{ messages: [] } as never,
{ temperature: 0.4, maxTokens: 12 },
),
).toEqual({ maxTokens: 12 });
});
it("classifies nested Bedrock deprecated-temperature validation as format failover", async () => {
const provider = await registerSingleProviderPlugin(amazonBedrockPlugin);
expect(
provider.classifyFailoverReason?.({
provider: "amazon-bedrock",
modelId: "us.anthropic.claude-opus-4-7",
errorMessage:
'ValidationException: The model returned the following errors: {"type":"error","error":{"type":"invalid_request_error","message":"`temperature` is deprecated for this model."}}',
} as never),
).toBe("format");
});
describe("guardrail config schema", () => {
it("defines discovery and guardrail objects with the expected shape", () => {
const pluginJson = JSON.parse(
@@ -747,6 +846,66 @@ describe("amazon-bedrock provider plugin", () => {
expect(bedrockClientConfigs).toEqual([{ region: "us-east-1" }]);
});
it("omits temperature for opaque application inference profile ARNs that resolve to Opus 4.7", async () => {
const modelId =
"arn:aws:bedrock:us-west-2:123456789012:application-inference-profile/z27qyso459dd";
inferenceProfileGetResults.push({
models: [
{
modelArn: "arn:aws:bedrock:us-west-2::foundation-model/anthropic.claude-opus-4.7-v1:0",
},
],
});
const provider = await registerWithConfig(undefined);
const payload: Record<string, unknown> = {
inferenceConfig: { temperature: 0.3, maxTokens: 10 },
system: [{ text: "You are helpful." }],
messages: [{ role: "user", content: [{ text: "Hello" }] }],
};
await callWrappedStreamWithPayload(
provider,
modelId,
makeAppInferenceProfileDescriptor(modelId),
{ temperature: 0.3, maxTokens: 10, cacheRetention: "none" },
payload,
);
expect(payload.inferenceConfig).toEqual({ maxTokens: 10 });
expect(sendBedrockCommand).toHaveBeenCalledTimes(1);
expect(bedrockClientConfigs).toEqual([{ region: "us-west-2" }]);
});
it("omits temperature for Claude-named application inference profile ARNs that resolve to Opus 4.7", async () => {
inferenceProfileGetResults.push({
models: [
{
modelArn: "arn:aws:bedrock:us-east-1::foundation-model/anthropic.claude-opus-4-7-v1:0",
},
],
});
const provider = await registerWithConfig(undefined);
const payload: Record<string, unknown> = {
inferenceConfig: { temperature: 0.3, maxTokens: 10 },
system: [{ text: "You are helpful." }],
messages: [{ role: "user", content: [{ text: "Hello" }] }],
};
await callWrappedStreamWithPayload(
provider,
APP_INFERENCE_PROFILE_ARN,
APP_INFERENCE_PROFILE_DESCRIPTOR,
{ temperature: 0.3, maxTokens: 10, cacheRetention: "short" },
payload,
);
const system = payload.system as Array<Record<string, unknown>>;
expect(payload.inferenceConfig).toEqual({ maxTokens: 10 });
expect(system[1]).toEqual({ cachePoint: { type: "default" } });
expect(sendBedrockCommand).toHaveBeenCalledTimes(1);
expect(bedrockClientConfigs).toEqual([{ region: "us-east-1" }]);
});
it("does not inject cache points when any resolved profile target is not cacheable", async () => {
const modelId =
"arn:aws:bedrock:us-east-1:123456789012:application-inference-profile/z27qyso459db";

View File

@@ -144,15 +144,27 @@ function resolvedModelSupportsCaching(modelArn: string): boolean {
return matchesPiAiPromptCachingModelId(modelArn);
}
function isOpus47BedrockModelRef(modelRef: string): boolean {
return /(?:^|[/.:])(?:(?:us|eu|ap|apac|au|jp|global)\.)?anthropic\.claude-opus-4[.-]7(?:$|[-.:/])/i.test(
modelRef,
);
}
/**
* Resolve the underlying foundation model for an application inference profile
* via GetInferenceProfile. Results are cached so we only call the API once per
* profile ARN. Returns true if the underlying model supports prompt caching.
* profile ARN. Returns traits needed for request shaping when the model id is
* otherwise opaque.
*
* Region is extracted from the profile ARN itself to avoid mismatches when
* the OpenClaw config region differs from the profile's home region.
*/
const appProfileCacheEligibleCache = new Map<string, boolean>();
type BedrockAppProfileTraits = {
cacheEligible: boolean;
omitTemperature: boolean;
};
const appProfileTraitsCache = new Map<string, BedrockAppProfileTraits>();
type BedrockGetInferenceProfileResponse = {
models?: Array<{ modelArn?: string }>;
@@ -169,7 +181,7 @@ type BedrockControlPlaneFactory = (region: string | undefined) => BedrockControl
let bedrockControlPlaneOverride: BedrockControlPlaneFactory | undefined;
export function resetBedrockAppProfileCacheEligibilityForTest(): void {
appProfileCacheEligibleCache.clear();
appProfileTraitsCache.clear();
}
export function setBedrockAppProfileControlPlaneForTest(
@@ -190,27 +202,34 @@ async function createBedrockControlPlane(region: string | undefined): Promise<Be
};
}
async function resolveAppProfileCacheEligible(
async function resolveAppProfileTraits(
modelId: string,
fallbackRegion: string | undefined,
): Promise<boolean> {
if (appProfileCacheEligibleCache.has(modelId)) {
return appProfileCacheEligibleCache.get(modelId)!;
): Promise<BedrockAppProfileTraits> {
const cached = appProfileTraitsCache.get(modelId);
if (cached) {
return cached;
}
try {
const region = extractRegionFromArn(modelId) ?? fallbackRegion;
const controlPlane = await createBedrockControlPlane(region);
const resp = await controlPlane.getInferenceProfile({ inferenceProfileIdentifier: modelId });
const models = resp.models ?? [];
const eligible =
models.length > 0 &&
models.every((m: { modelArn?: string }) => resolvedModelSupportsCaching(m.modelArn ?? ""));
appProfileCacheEligibleCache.set(modelId, eligible);
return eligible;
const modelArns = models.map((m: { modelArn?: string }) => m.modelArn ?? "");
const traits = {
cacheEligible:
models.length > 0 && modelArns.every((modelArn) => resolvedModelSupportsCaching(modelArn)),
omitTemperature: modelArns.some(isOpus47BedrockModelRef),
};
appProfileTraitsCache.set(modelId, traits);
return traits;
} catch {
// Transient failures (throttling, network, IAM) should not be cached —
// return the heuristic fallback but allow retry on the next request.
return isAnthropicBedrockModel(modelId);
return {
cacheEligible: isAnthropicBedrockModel(modelId),
omitTemperature: isOpus47BedrockModelRef(modelId),
};
}
}
@@ -279,6 +298,8 @@ export function registerAmazonBedrockPlugin(api: OpenClawPluginApi): void {
/ValidationException.*(?:exceeds? the (?:maximum|max) (?:number of )?(?:input )?tokens)/i,
/ModelStreamErrorException.*(?:Input is too long|too many input tokens)/i,
] as const;
const deprecatedTemperatureValidationRe =
/ValidationException[\s\S]*(?:invalid_request_error[\s\S]*)?temperature[\s\S]*deprecated|ValidationException[\s\S]*deprecated[\s\S]*temperature/i;
const anthropicByModelReplayHooks = ANTHROPIC_BY_MODEL_REPLAY_HOOKS;
const startupPluginConfig = (api.pluginConfig ?? {}) as AmazonBedrockPluginConfig;
@@ -306,6 +327,26 @@ export function registerAmazonBedrockPlugin(api: OpenClawPluginApi): void {
return createBedrockNoCacheWrapper(streamFn);
};
function omitDeprecatedOpus47Temperature<TOptions extends object>(
modelId: string,
options: TOptions,
): TOptions {
if (!isOpus47BedrockModelRef(modelId) || !("temperature" in options)) {
return options;
}
const next = { ...options } as typeof options & { temperature?: unknown };
delete next.temperature;
return next;
}
function omitDeprecatedOpus47PayloadTemperature(payload: Record<string, unknown>): void {
const inferenceConfig = payload.inferenceConfig;
if (!inferenceConfig || typeof inferenceConfig !== "object") {
return;
}
delete (inferenceConfig as Record<string, unknown>).temperature;
}
/** Extract the AWS region from a bedrock-runtime baseUrl. */
function extractRegionFromBaseUrl(baseUrl: string | undefined): string | undefined {
if (!baseUrl) {
@@ -386,12 +427,13 @@ export function registerAmazonBedrockPlugin(api: OpenClawPluginApi): void {
const region = resolveBedrockRegion(config) ?? extractRegionFromBaseUrl(model?.baseUrl);
const mayNeedCacheInjection =
isBedrockAppInferenceProfile(modelId) && !piAiWouldInjectCachePoints(modelId);
const shouldOmitTemperature = isOpus47BedrockModelRef(modelId);
// For known Anthropic models (heuristic match), enable injection immediately.
// For opaque profile IDs, we'll resolve via GetInferenceProfile on first call.
const heuristicMatch = needsCachePointInjection(modelId);
if (!region && !mayNeedCacheInjection) {
if (!region && !mayNeedCacheInjection && !shouldOmitTemperature) {
return wrapped;
}
@@ -400,7 +442,10 @@ export function registerAmazonBedrockPlugin(api: OpenClawPluginApi): void {
return wrapped;
}
return (streamModel, context, options) => {
const merged = Object.assign({}, options, region ? { region } : {});
const merged = omitDeprecatedOpus47Temperature(
modelId,
Object.assign({}, options, region ? { region } : {}),
);
if (!mayNeedCacheInjection) {
return underlying(streamModel, context, merged);
@@ -416,25 +461,46 @@ export function registerAmazonBedrockPlugin(api: OpenClawPluginApi): void {
// want caching enabled, so defaulting to "short" is the safer behavior.
const cacheRetention =
typeof merged.cacheRetention === "string" ? merged.cacheRetention : "short";
const originalOnPayload = merged.onPayload as
| ((payload: unknown, model: unknown) => unknown)
| undefined;
if (heuristicMatch) {
// Fast path: ARN heuristic already identified this as Claude.
return streamWithPayloadPatch(underlying, streamModel, context, merged, (payload) => {
injectBedrockCachePoints(payload, cacheRetention);
// Fast path: ARN heuristic already identified this as Claude, but the
// concrete target may still need profile traits for Opus 4.7 payloads.
const mayNeedTemperatureTrait = "temperature" in merged;
return underlying(streamModel, context, {
...merged,
onPayload: async (payload: unknown, payloadModel: unknown) => {
if (payload && typeof payload === "object") {
const payloadRecord = payload as Record<string, unknown>;
injectBedrockCachePoints(payloadRecord, cacheRetention);
if (mayNeedTemperatureTrait) {
const traits = await resolveAppProfileTraits(modelId, region);
if (traits.omitTemperature) {
omitDeprecatedOpus47PayloadTemperature(payloadRecord);
}
}
}
return originalOnPayload?.(payload, payloadModel);
},
});
}
// Slow path: opaque profile ID — resolve underlying model via API (cached).
// pi-ai's onPayload supports async, so we await the resolution inline.
const originalOnPayload = merged.onPayload as
| ((payload: unknown, model: unknown) => unknown)
| undefined;
return underlying(streamModel, context, {
...merged,
onPayload: async (payload: unknown, payloadModel: unknown) => {
const eligible = await resolveAppProfileCacheEligible(modelId, region);
if (eligible && payload && typeof payload === "object") {
injectBedrockCachePoints(payload as Record<string, unknown>, cacheRetention);
const traits = await resolveAppProfileTraits(modelId, region);
if (payload && typeof payload === "object") {
const payloadRecord = payload as Record<string, unknown>;
if (traits.cacheEligible) {
injectBedrockCachePoints(payloadRecord, cacheRetention);
}
if (traits.omitTemperature) {
omitDeprecatedOpus47PayloadTemperature(payloadRecord);
}
}
return originalOnPayload?.(payload, payloadModel);
},
@@ -450,6 +516,9 @@ export function registerAmazonBedrockPlugin(api: OpenClawPluginApi): void {
if (/ModelNotReadyException/i.test(errorMessage)) {
return "overloaded";
}
if (deprecatedTemperatureValidationRe.test(errorMessage)) {
return "format";
}
return undefined;
},
resolveThinkingProfile: ({ modelId }) => ({