fix(llm): preserve native stream fallback parity

docs(llm): document prepare<Target>, PreparedRequestOf, and LLMEvent.is.* in AGENTS.md
simplify(llm): inline single-use llmEventIs const and drop redundant as const
2026-05-01 22:44:21 +08:00 · 2026-05-01 08:54:05 -04:00 · 2026-05-01 08:12:38 -04:00 · 2026-05-01 08:12:37 -04:00 · 2026-05-01 08:12:37 -04:00 · 2026-05-01 08:12:37 -04:00
95 changed files with 13544 additions and 74 deletions
--- a/bun.lock
+++ b/bun.lock
@@ -352,6 +352,36 @@
        "typescript": "catalog:",
      },
    },
+    "packages/http-recorder": {
+      "name": "@opencode-ai/http-recorder",
+      "version": "0.0.0",
+      "dependencies": {
+        "effect": "catalog:",
+      },
+      "devDependencies": {
+        "@effect/platform-node": "catalog:",
+        "@tsconfig/bun": "catalog:",
+        "@types/bun": "catalog:",
+        "@typescript/native-preview": "catalog:",
+      },
+    },
+    "packages/llm": {
+      "name": "@opencode-ai/llm",
+      "version": "1.14.25",
+      "dependencies": {
+        "@smithy/eventstream-codec": "4.2.14",
+        "@smithy/util-utf8": "4.2.2",
+        "aws4fetch": "1.0.20",
+        "effect": "catalog:",
+      },
+      "devDependencies": {
+        "@effect/platform-node": "catalog:",
+        "@opencode-ai/http-recorder": "workspace:*",
+        "@tsconfig/bun": "catalog:",
+        "@types/bun": "catalog:",
+        "@typescript/native-preview": "catalog:",
+      },
+    },
    "packages/opencode": {
      "name": "opencode",
      "version": "1.14.31",
@@ -396,6 +426,7 @@
        "@octokit/graphql": "9.0.2",
        "@octokit/rest": "catalog:",
        "@openauthjs/openauth": "catalog:",
+        "@opencode-ai/llm": "workspace:*",
        "@opencode-ai/plugin": "workspace:*",
        "@opencode-ai/script": "workspace:*",
        "@opencode-ai/sdk": "workspace:*",
@@ -1576,6 +1607,10 @@

    "@opencode-ai/function": ["@opencode-ai/function@workspace:packages/function"],

+    "@opencode-ai/http-recorder": ["@opencode-ai/http-recorder@workspace:packages/http-recorder"],
+
+    "@opencode-ai/llm": ["@opencode-ai/llm@workspace:packages/llm"],
+
    "@opencode-ai/plugin": ["@opencode-ai/plugin@workspace:packages/plugin"],

    "@opencode-ai/script": ["@opencode-ai/script@workspace:packages/script"],
@@ -5636,6 +5671,10 @@

    "@opencode-ai/desktop-electron/typescript": ["typescript@5.6.3", "", { "bin": { "tsc": "bin/tsc", "tsserver": "bin/tsserver" } }, "sha512-hjcS1mhfuyi4WW8IWtjP7brDrG2cuDZukyrYrSauoXGNgx0S7zceP07adYkJycEr56BOUTNPzbInooiN3fn1qw=="],

+    "@opencode-ai/llm/@smithy/eventstream-codec": ["@smithy/eventstream-codec@4.2.14", "", { "dependencies": { "@aws-crypto/crc32": "5.2.0", "@smithy/types": "^4.14.1", "@smithy/util-hex-encoding": "^4.2.2", "tslib": "^2.6.2" } }, "sha512-erZq0nOIpzfeZdCyzZjdJb4nVSKLUmSkaQUVkRGQTXs30gyUGeKnrYEg+Xe1W5gE3aReS7IgsvANwVPxSzY6Pw=="],
+
+    "@opencode-ai/llm/@smithy/util-utf8": ["@smithy/util-utf8@4.2.2", "", { "dependencies": { "@smithy/util-buffer-from": "^4.2.2", "tslib": "^2.6.2" } }, "sha512-75MeYpjdWRe8M5E3AW0O4Cx3UadweS+cwdXjwYGBW5h/gxxnbeZ877sLPX/ZJA9GVTlL/qG0dXP29JWFCD1Ayw=="],
+
    "@opencode-ai/ui/@solid-primitives/resize-observer": ["@solid-primitives/resize-observer@2.1.3", "", { "dependencies": { "@solid-primitives/event-listener": "^2.4.3", "@solid-primitives/rootless": "^1.5.2", "@solid-primitives/static-store": "^0.1.2", "@solid-primitives/utils": "^6.3.2" }, "peerDependencies": { "solid-js": "^1.6.12" } }, "sha512-zBLje5E06TgOg93S7rGPldmhDnouNGhvfZVKOp+oG2XU8snA+GoCSSCz1M+jpNAg5Ek2EakU5UVQqL152WmdXQ=="],

    "@opencode-ai/web/@shikijs/transformers": ["@shikijs/transformers@3.20.0", "", { "dependencies": { "@shikijs/core": "3.20.0", "@shikijs/types": "3.20.0" } }, "sha512-PrHHMRr3Q5W1qB/42kJW6laqFyWdhrPF2hNR9qjOm1xcSiAO3hAHo7HaVyHE6pMyevmy3i51O8kuGGXC78uK3g=="],
@@ -6620,6 +6659,8 @@

    "@opencode-ai/desktop/@actions/artifact/@actions/http-client": ["@actions/http-client@2.2.3", "", { "dependencies": { "tunnel": "^0.0.6", "undici": "^5.25.4" } }, "sha512-mx8hyJi/hjFvbPokCg4uRd4ZX78t+YyRPtnKWwIl+RzNaVuFpQHfmlGVfsKEJN8LwTCvL+DfVgAM04XaHkm6bA=="],

+    "@opencode-ai/llm/@smithy/eventstream-codec/@smithy/types": ["@smithy/types@4.14.1", "", { "dependencies": { "tslib": "^2.6.2" } }, "sha512-59b5HtSVrVR/eYNei3BUj3DCPKD/G7EtDDe7OEJE7i7FtQFugYo6MxbotS8mVJkLNVf8gYaAlEBwwtJ9HzhWSg=="],
+
    "@opencode-ai/web/@shikijs/transformers/@shikijs/core": ["@shikijs/core@3.20.0", "", { "dependencies": { "@shikijs/types": "3.20.0", "@shikijs/vscode-textmate": "^10.0.2", "@types/hast": "^3.0.4", "hast-util-to-html": "^9.0.5" } }, "sha512-f2ED7HYV4JEk827mtMDwe/yQ25pRiXZmtHjWF8uzZKuKiEsJR7Ce1nuQ+HhV9FzDcbIo4ObBCD9GPTzNuy9S1g=="],

    "@opencode-ai/web/@shikijs/transformers/@shikijs/types": ["@shikijs/types@3.20.0", "", { "dependencies": { "@shikijs/vscode-textmate": "^10.0.2", "@types/hast": "^3.0.4" } }, "sha512-lhYAATn10nkZcBQ0BlzSbJA3wcmL5MXUUF8d2Zzon6saZDlToKaiRX60n2+ZaHJCmXEcZRWNzn+k9vplr8Jhsw=="],
--- a/packages/core/src/flag/flag.ts
+++ b/packages/core/src/flag/flag.ts
@@ -67,6 +67,13 @@ export const Flag = {
  OPENCODE_ENABLE_EXA: truthy("OPENCODE_ENABLE_EXA") || OPENCODE_EXPERIMENTAL || truthy("OPENCODE_EXPERIMENTAL_EXA"),
  OPENCODE_EXPERIMENTAL_BASH_DEFAULT_TIMEOUT_MS: number("OPENCODE_EXPERIMENTAL_BASH_DEFAULT_TIMEOUT_MS"),
  OPENCODE_EXPERIMENTAL_OUTPUT_TOKEN_MAX: number("OPENCODE_EXPERIMENTAL_OUTPUT_TOKEN_MAX"),
+  // Opt-in to the LLM-native stream path in `session/llm.ts`. Today this
+  // routes a narrow slice of sessions (text-only, Anthropic, with explicit
+  // `nativeMessages` populated by the caller) through the
+  // `@opencode-ai/llm` core stack instead of `streamText` from the AI SDK.
+  // Everything else falls through to the existing path. The flag will go
+  // away once parity is proven across all six protocols.
+  OPENCODE_EXPERIMENTAL_LLM_NATIVE: OPENCODE_EXPERIMENTAL || truthy("OPENCODE_EXPERIMENTAL_LLM_NATIVE"),
  OPENCODE_EXPERIMENTAL_OXFMT: OPENCODE_EXPERIMENTAL || truthy("OPENCODE_EXPERIMENTAL_OXFMT"),
  OPENCODE_EXPERIMENTAL_LSP_TY: truthy("OPENCODE_EXPERIMENTAL_LSP_TY"),
  OPENCODE_EXPERIMENTAL_LSP_TOOL: OPENCODE_EXPERIMENTAL || truthy("OPENCODE_EXPERIMENTAL_LSP_TOOL"),
--- a/packages/http-recorder/package.json
+++ b/packages/http-recorder/package.json
@@ -0,0 +1,25 @@
+{
+  "$schema": "https://json.schemastore.org/package.json",
+  "version": "0.0.0",
+  "name": "@opencode-ai/http-recorder",
+  "type": "module",
+  "license": "MIT",
+  "private": true,
+  "scripts": {
+    "test": "bun test --timeout 30000",
+    "typecheck": "tsgo --noEmit"
+  },
+  "exports": {
+    ".": "./src/index.ts",
+    "./*": "./src/*.ts"
+  },
+  "devDependencies": {
+    "@effect/platform-node": "catalog:",
+    "@tsconfig/bun": "catalog:",
+    "@types/bun": "catalog:",
+    "@typescript/native-preview": "catalog:"
+  },
+  "dependencies": {
+    "effect": "catalog:"
+  }
+}
--- a/packages/http-recorder/src/diff.ts
+++ b/packages/http-recorder/src/diff.ts
@@ -0,0 +1,90 @@
+import { HttpClientRequest } from "effect/unstable/http"
+import { decodeJson } from "./matching"
+import { REDACTED, redactUrl, secretFindings } from "./redaction"
+import type { Cassette, RequestSnapshot } from "./schema"
+import { Option } from "effect"
+
+const safeText = (value: unknown) => {
+  if (value === undefined) return "undefined"
+  if (secretFindings(value).length > 0) return JSON.stringify(REDACTED)
+  const text = typeof value === "string" ? JSON.stringify(value) : JSON.stringify(value)
+  if (!text) return String(value)
+  return text.length > 300 ? `${text.slice(0, 300)}...` : text
+}
+
+const jsonBody = (body: string) => Option.getOrUndefined(decodeJson(body))
+
+const valueDiffs = (expected: unknown, received: unknown, base = "$", limit = 8): ReadonlyArray<string> => {
+  if (Object.is(expected, received)) return []
+  if (
+    expected &&
+    received &&
+    typeof expected === "object" &&
+    typeof received === "object" &&
+    !Array.isArray(expected) &&
+    !Array.isArray(received)
+  ) {
+    return [...new Set([...Object.keys(expected), ...Object.keys(received)])]
+      .toSorted()
+      .flatMap((key) =>
+        valueDiffs(
+          (expected as Record<string, unknown>)[key],
+          (received as Record<string, unknown>)[key],
+          `${base}.${key}`,
+          limit,
+        ),
+      )
+      .slice(0, limit)
+  }
+  if (Array.isArray(expected) && Array.isArray(received)) {
+    return Array.from({ length: Math.max(expected.length, received.length) }, (_, index) => index)
+      .flatMap((index) => valueDiffs(expected[index], received[index], `${base}[${index}]`, limit))
+      .slice(0, limit)
+  }
+  return [`${base} expected ${safeText(expected)}, received ${safeText(received)}`]
+}
+
+const headerDiffs = (expected: Record<string, string>, received: Record<string, string>) =>
+  [...new Set([...Object.keys(expected), ...Object.keys(received)])].toSorted().flatMap((key) => {
+    if (expected[key] === received[key]) return []
+    if (expected[key] === undefined) return [`  ${key} unexpected ${safeText(received[key])}`]
+    if (received[key] === undefined) return [`  ${key} missing expected ${safeText(expected[key])}`]
+    return [`  ${key} expected ${safeText(expected[key])}, received ${safeText(received[key])}`]
+  })
+
+export const requestDiff = (expected: RequestSnapshot, received: RequestSnapshot) => {
+  const lines = []
+  if (expected.method !== received.method) {
+    lines.push("method:", `  expected ${expected.method}, received ${received.method}`)
+  }
+  if (expected.url !== received.url) {
+    lines.push("url:", `  expected ${expected.url}`, `  received ${received.url}`)
+  }
+  const headers = headerDiffs(expected.headers, received.headers)
+  if (headers.length > 0) lines.push("headers:", ...headers.slice(0, 8))
+  const expectedBody = jsonBody(expected.body)
+  const receivedBody = jsonBody(received.body)
+  const body = expectedBody !== undefined && receivedBody !== undefined
+    ? valueDiffs(expectedBody, receivedBody).map((line) => `  ${line}`)
+    : expected.body === received.body
+      ? []
+      : [`  expected ${safeText(expected.body)}, received ${safeText(received.body)}`]
+  if (body.length > 0) lines.push("body:", ...body)
+  return lines
+}
+
+export const mismatchDetail = (cassette: Cassette, incoming: RequestSnapshot) => {
+  if (cassette.interactions.length === 0) return "cassette has no recorded interactions"
+  const ranked = cassette.interactions
+    .map((interaction, index) => ({ index, lines: requestDiff(interaction.request, incoming) }))
+    .toSorted((a, b) => a.lines.length - b.lines.length || a.index - b.index)
+  const best = ranked[0]
+  return [
+    "no recorded interaction matched",
+    `closest interaction: #${best.index + 1}`,
+    ...best.lines,
+  ].join("\n")
+}
+
+export const redactedErrorRequest = (request: HttpClientRequest.HttpClientRequest) =>
+  HttpClientRequest.modify(request, { url: redactUrl(request.url) })
--- a/packages/http-recorder/src/effect.ts
+++ b/packages/http-recorder/src/effect.ts
@@ -0,0 +1,177 @@
+import { NodeFileSystem } from "@effect/platform-node"
+import { Effect, FileSystem, Layer, Option, Ref } from "effect"
+import {
+  FetchHttpClient,
+  HttpClient,
+  HttpClientError,
+  HttpClientRequest,
+  HttpClientResponse,
+} from "effect/unstable/http"
+import * as path from "node:path"
+import { redactedErrorRequest, mismatchDetail } from "./diff"
+import { defaultMatcher, decodeJson, type RequestMatcher } from "./matching"
+import { cassetteSecretFindings, redactHeaders, redactUrl, type SecretFinding } from "./redaction"
+import type { Cassette, CassetteMetadata, Interaction, ResponseSnapshot } from "./schema"
+import { cassetteFor, cassettePath, formatCassette, parseCassette } from "./storage"
+
+const isRecordMode = process.env.RECORD === "true"
+
+export const DEFAULT_REQUEST_HEADERS: ReadonlyArray<string> = ["content-type", "accept", "openai-beta"]
+const DEFAULT_RESPONSE_HEADERS: ReadonlyArray<string> = ["content-type"]
+
+export interface RecordReplayOptions {
+  readonly directory?: string
+  readonly metadata?: CassetteMetadata
+  readonly redact?: {
+    readonly headers?: ReadonlyArray<string>
+    readonly query?: ReadonlyArray<string>
+  }
+  readonly requestHeaders?: ReadonlyArray<string>
+  readonly responseHeaders?: ReadonlyArray<string>
+  readonly redactBody?: (body: unknown) => unknown
+  readonly dispatch?: "match" | "sequential"
+  readonly match?: RequestMatcher
+}
+
+const responseHeaders = (
+  response: HttpClientResponse.HttpClientResponse,
+  allow: ReadonlyArray<string>,
+  redact: ReadonlyArray<string> | undefined,
+) => {
+  const merged = redactHeaders(response.headers as Record<string, string>, allow, redact)
+  if (!merged["content-type"]) merged["content-type"] = "text/event-stream"
+  return merged
+}
+
+const BINARY_CONTENT_TYPES: ReadonlyArray<string> = ["vnd.amazon.eventstream", "octet-stream"]
+
+const isBinaryContentType = (contentType: string | undefined) => {
+  if (!contentType) return false
+  const lower = contentType.toLowerCase()
+  return BINARY_CONTENT_TYPES.some((token) => lower.includes(token))
+}
+
+const captureResponseBody = (
+  response: HttpClientResponse.HttpClientResponse,
+  contentType: string | undefined,
+) =>
+  isBinaryContentType(contentType)
+    ? response.arrayBuffer.pipe(
+        Effect.map((bytes) => ({ body: Buffer.from(bytes).toString("base64"), bodyEncoding: "base64" as const })),
+      )
+    : response.text.pipe(Effect.map((body) => ({ body })))
+
+const decodeResponseBody = (snapshot: ResponseSnapshot) =>
+  snapshot.bodyEncoding === "base64" ? Buffer.from(snapshot.body, "base64") : snapshot.body
+
+const fixtureMissing = (request: HttpClientRequest.HttpClientRequest, name: string) =>
+  new HttpClientError.HttpClientError({
+    reason: new HttpClientError.TransportError({
+      request,
+      description: `Fixture "${name}" not found. Run with RECORD=true to create it.`,
+    }),
+  })
+
+const fixtureMismatch = (request: HttpClientRequest.HttpClientRequest, name: string, detail: string) =>
+  new HttpClientError.HttpClientError({
+    reason: new HttpClientError.TransportError({
+      request: redactedErrorRequest(request),
+      description: `Fixture "${name}" does not match the current request: ${detail}. Run with RECORD=true to update it.`,
+    }),
+  })
+
+const unsafeCassette = (
+  request: HttpClientRequest.HttpClientRequest,
+  name: string,
+  findings: ReadonlyArray<SecretFinding>,
+) =>
+  new HttpClientError.HttpClientError({
+    reason: new HttpClientError.TransportError({
+      request,
+      description: `Refusing to write cassette "${name}" because it contains possible secrets: ${findings
+        .map((item) => `${item.path} (${item.reason})`)
+        .join(", ")}`,
+    }),
+  })
+
+export const cassetteLayer = (
+  name: string,
+  options: RecordReplayOptions = {},
+): Layer.Layer<HttpClient.HttpClient> =>
+  Layer.effect(
+    HttpClient.HttpClient,
+    Effect.gen(function* () {
+      const upstream = yield* HttpClient.HttpClient
+      const fileSystem = yield* FileSystem.FileSystem
+      const file = cassettePath(name, options.directory)
+      const dir = path.dirname(file)
+      const requestHeadersAllow = options.requestHeaders ?? DEFAULT_REQUEST_HEADERS
+      const responseHeadersAllow = options.responseHeaders ?? DEFAULT_RESPONSE_HEADERS
+      const match = options.match ?? defaultMatcher
+      const sequential = options.dispatch === "sequential"
+      const recorded = yield* Ref.make<ReadonlyArray<Interaction>>([])
+      const cursor = yield* Ref.make(0)
+
+      const snapshotRequest = (request: HttpClientRequest.HttpClientRequest) =>
+        Effect.gen(function* () {
+          const web = yield* HttpClientRequest.toWeb(request).pipe(Effect.orDie)
+          const raw = yield* Effect.promise(() => web.text())
+          const body = options.redactBody
+            ? Option.match(decodeJson(raw), {
+                onNone: () => raw,
+                onSome: (parsed) => JSON.stringify(options.redactBody?.(parsed)),
+              })
+            : raw
+          return {
+            method: web.method,
+            url: redactUrl(web.url, options.redact?.query),
+            headers: redactHeaders(Object.fromEntries(web.headers.entries()), requestHeadersAllow, options.redact?.headers),
+            body,
+          }
+        })
+
+      const selectInteraction = (cassette: Cassette, incoming: Interaction["request"]) =>
+        Effect.gen(function* () {
+          if (sequential) {
+            const index = yield* Ref.getAndUpdate(cursor, (n) => n + 1)
+            const interaction = cassette.interactions[index]
+            return { interaction, detail: `interaction ${index + 1} of ${cassette.interactions.length} not recorded` }
+          }
+          const interaction = cassette.interactions.find((candidate) => match(incoming, candidate.request))
+          return { interaction, detail: interaction ? "" : mismatchDetail(cassette, incoming) }
+        })
+
+      return HttpClient.make((request) => {
+        if (isRecordMode) {
+          return Effect.gen(function* () {
+            const currentRequest = yield* snapshotRequest(request)
+            const response = yield* upstream.execute(request)
+            const headers = responseHeaders(response, responseHeadersAllow, options.redact?.headers)
+            const captured = yield* captureResponseBody(response, headers["content-type"])
+            const interaction: Interaction = {
+              request: currentRequest,
+              response: { status: response.status, headers, ...captured },
+            }
+            const interactions = yield* Ref.updateAndGet(recorded, (prev) => [...prev, interaction])
+            const cassette = cassetteFor(name, interactions, options.metadata)
+            const findings = cassetteSecretFindings(cassette)
+            if (findings.length > 0) return yield* unsafeCassette(request, name, findings)
+            yield* fileSystem.makeDirectory(dir, { recursive: true }).pipe(Effect.orDie)
+            yield* fileSystem.writeFileString(file, formatCassette(cassette)).pipe(Effect.orDie)
+            return HttpClientResponse.fromWeb(request, new Response(decodeResponseBody(interaction.response), interaction.response))
+          })
+        }
+
+        return Effect.gen(function* () {
+          const cassette = parseCassette(
+            yield* fileSystem.readFileString(file).pipe(Effect.mapError(() => fixtureMissing(request, name))),
+          )
+          const incoming = yield* snapshotRequest(request)
+          const { interaction, detail } = yield* selectInteraction(cassette, incoming)
+          if (!interaction) return yield* fixtureMismatch(request, name, detail)
+
+          return HttpClientResponse.fromWeb(request, new Response(decodeResponseBody(interaction.response), interaction.response))
+        })
+      })
+    }),
+  ).pipe(Layer.provide(FetchHttpClient.layer), Layer.provide(NodeFileSystem.layer))
--- a/packages/http-recorder/src/index.ts
+++ b/packages/http-recorder/src/index.ts
@@ -0,0 +1,8 @@
+export * from "./schema"
+export * from "./redaction"
+export * from "./matching"
+export * from "./diff"
+export * from "./storage"
+export * from "./effect"
+
+export * as HttpRecorder from "."
--- a/packages/http-recorder/src/matching.ts
+++ b/packages/http-recorder/src/matching.ts
@@ -0,0 +1,33 @@
+import { Option, Schema } from "effect"
+import type { RequestSnapshot } from "./schema"
+
+const JsonValue = Schema.fromJsonString(Schema.Unknown)
+export const decodeJson = Schema.decodeUnknownOption(JsonValue)
+
+const canonicalize = (value: unknown): unknown => {
+  if (Array.isArray(value)) return value.map(canonicalize)
+  if (value !== null && typeof value === "object") {
+    return Object.fromEntries(
+      Object.keys(value as Record<string, unknown>)
+        .toSorted()
+        .map((key) => [key, canonicalize((value as Record<string, unknown>)[key])]),
+    )
+  }
+  return value
+}
+
+export type RequestMatcher = (incoming: RequestSnapshot, recorded: RequestSnapshot) => boolean
+
+export const canonicalSnapshot = (snapshot: RequestSnapshot): string =>
+  JSON.stringify({
+    method: snapshot.method,
+    url: snapshot.url,
+    headers: canonicalize(snapshot.headers),
+    body: Option.match(decodeJson(snapshot.body), {
+      onNone: () => snapshot.body,
+      onSome: canonicalize,
+    }),
+  })
+
+export const defaultMatcher: RequestMatcher = (incoming, recorded) =>
+  canonicalSnapshot(incoming) === canonicalSnapshot(recorded)
--- a/packages/http-recorder/src/redaction.ts
+++ b/packages/http-recorder/src/redaction.ts
@@ -0,0 +1,108 @@
+import type { Cassette } from "./schema"
+
+export const REDACTED = "[REDACTED]"
+
+const DEFAULT_REDACT_HEADERS = [
+  "authorization",
+  "cookie",
+  "proxy-authorization",
+  "set-cookie",
+  "x-api-key",
+  "x-amz-security-token",
+  "x-goog-api-key",
+]
+
+const DEFAULT_REDACT_QUERY = [
+  "access_token",
+  "api-key",
+  "api_key",
+  "apikey",
+  "code",
+  "key",
+  "signature",
+  "sig",
+  "token",
+  "x-amz-credential",
+  "x-amz-security-token",
+  "x-amz-signature",
+]
+
+const SECRET_PATTERNS: ReadonlyArray<{ readonly label: string; readonly pattern: RegExp }> = [
+  { label: "bearer token", pattern: /\bBearer\s+[A-Za-z0-9._~+/=-]{16,}\b/i },
+  { label: "API key", pattern: /\bsk-[A-Za-z0-9][A-Za-z0-9_-]{20,}\b/ },
+  { label: "Anthropic API key", pattern: /\bsk-ant-[A-Za-z0-9_-]{20,}\b/ },
+  { label: "Google API key", pattern: /\bAIza[0-9A-Za-z_-]{20,}\b/ },
+  { label: "AWS access key", pattern: /\b(?:AKIA|ASIA)[0-9A-Z]{16}\b/ },
+  { label: "GitHub token", pattern: /\bgh[pousr]_[A-Za-z0-9_]{20,}\b/ },
+  { label: "private key", pattern: /-----BEGIN [A-Z ]*PRIVATE KEY-----/ },
+]
+
+const ENV_SECRET_NAMES = /(?:API|AUTH|BEARER|CREDENTIAL|KEY|PASSWORD|SECRET|TOKEN)/i
+const SAFE_ENV_VALUES = new Set(["fixture", "test", "test-key"])
+
+const envSecrets = () =>
+  Object.entries(process.env).flatMap(([name, value]) => {
+    if (!value) return []
+    if (!ENV_SECRET_NAMES.test(name)) return []
+    if (value.length < 12) return []
+    if (SAFE_ENV_VALUES.has(value.toLowerCase())) return []
+    return [{ name, value }]
+  })
+
+const pathFor = (base: string, key: string) => (base ? `${base}.${key}` : key)
+
+const stringEntries = (value: unknown, base = ""): ReadonlyArray<{ readonly path: string; readonly value: string }> => {
+  if (typeof value === "string") return [{ path: base, value }]
+  if (Array.isArray(value)) return value.flatMap((item, index) => stringEntries(item, `${base}[${index}]`))
+  if (value && typeof value === "object") {
+    return Object.entries(value).flatMap(([key, child]) => stringEntries(child, pathFor(base, key)))
+  }
+  return []
+}
+
+const redactionSet = (values: ReadonlyArray<string> | undefined, defaults: ReadonlyArray<string>) =>
+  new Set([...defaults, ...(values ?? [])].map((value) => value.toLowerCase()))
+
+export const redactUrl = (raw: string, query: ReadonlyArray<string> = DEFAULT_REDACT_QUERY) => {
+  if (!URL.canParse(raw)) return raw
+  const url = new URL(raw)
+  const redacted = redactionSet(query, DEFAULT_REDACT_QUERY)
+  for (const key of [...url.searchParams.keys()]) {
+    if (redacted.has(key.toLowerCase())) url.searchParams.set(key, REDACTED)
+  }
+  return url.toString()
+}
+
+export const redactHeaders = (
+  headers: Record<string, string>,
+  allow: ReadonlyArray<string>,
+  redact: ReadonlyArray<string> = DEFAULT_REDACT_HEADERS,
+) => {
+  const allowed = new Set(allow.map((name) => name.toLowerCase()))
+  const redacted = redactionSet(redact, DEFAULT_REDACT_HEADERS)
+  return Object.fromEntries(
+    Object.entries(headers)
+      .map(([name, value]) => [name.toLowerCase(), value] as const)
+      .filter(([name]) => allowed.has(name))
+      .map(([name, value]) => [name, redacted.has(name) ? REDACTED : value] as const)
+      .toSorted(([a], [b]) => a.localeCompare(b)),
+  )
+}
+
+export type SecretFinding = {
+  readonly path: string
+  readonly reason: string
+}
+
+export const secretFindings = (value: unknown): ReadonlyArray<SecretFinding> =>
+  stringEntries(value).flatMap((entry) => [
+    ...SECRET_PATTERNS.filter((item) => item.pattern.test(entry.value)).map((item) => ({
+      path: entry.path,
+      reason: item.label,
+    })),
+    ...envSecrets()
+      .filter((item) => entry.value.includes(item.value))
+      .map((item) => ({ path: entry.path, reason: `environment secret ${item.name}` })),
+  ])
+
+export const cassetteSecretFindings = (cassette: Cassette) => secretFindings(cassette)
--- a/packages/http-recorder/src/schema.ts
+++ b/packages/http-recorder/src/schema.ts
@@ -0,0 +1,36 @@
+import { Schema } from "effect"
+
+export const RequestSnapshotSchema = Schema.Struct({
+  method: Schema.String,
+  url: Schema.String,
+  headers: Schema.Record(Schema.String, Schema.String),
+  body: Schema.String,
+})
+export type RequestSnapshot = Schema.Schema.Type<typeof RequestSnapshotSchema>
+
+export const ResponseSnapshotSchema = Schema.Struct({
+  status: Schema.Number,
+  headers: Schema.Record(Schema.String, Schema.String),
+  body: Schema.String,
+  bodyEncoding: Schema.optional(Schema.Literals(["text", "base64"])),
+})
+export type ResponseSnapshot = Schema.Schema.Type<typeof ResponseSnapshotSchema>
+
+export const InteractionSchema = Schema.Struct({
+  request: RequestSnapshotSchema,
+  response: ResponseSnapshotSchema,
+})
+export type Interaction = Schema.Schema.Type<typeof InteractionSchema>
+
+export const CassetteMetadataSchema = Schema.Record(Schema.String, Schema.Unknown)
+export type CassetteMetadata = Schema.Schema.Type<typeof CassetteMetadataSchema>
+
+export const CassetteSchema = Schema.Struct({
+  version: Schema.Literal(1),
+  metadata: Schema.optional(CassetteMetadataSchema),
+  interactions: Schema.Array(InteractionSchema),
+})
+export type Cassette = Schema.Schema.Type<typeof CassetteSchema>
+
+export const decodeCassette = Schema.decodeUnknownSync(CassetteSchema)
+export const encodeCassette = Schema.encodeSync(CassetteSchema)
--- a/packages/http-recorder/src/storage.ts
+++ b/packages/http-recorder/src/storage.ts
@@ -0,0 +1,34 @@
+import { Option } from "effect"
+import * as fs from "node:fs"
+import * as path from "node:path"
+import { encodeCassette, decodeCassette, type Cassette, type CassetteMetadata, type Interaction } from "./schema"
+
+export const DEFAULT_RECORDINGS_DIR = path.resolve(process.cwd(), "test", "fixtures", "recordings")
+
+export const cassettePath = (name: string, directory = DEFAULT_RECORDINGS_DIR) => path.join(directory, `${name}.json`)
+
+const metadataFor = (name: string, metadata: CassetteMetadata | undefined): CassetteMetadata => ({
+  name,
+  recordedAt: new Date().toISOString(),
+  ...(metadata ?? {}),
+})
+
+export const cassetteFor = (
+  name: string,
+  interactions: ReadonlyArray<Interaction>,
+  metadata: CassetteMetadata | undefined,
+): Cassette => ({
+  version: 1,
+  metadata: metadataFor(name, metadata),
+  interactions,
+})
+
+export const formatCassette = (cassette: Cassette) => `${JSON.stringify(encodeCassette(cassette), null, 2)}\n`
+
+export const parseCassette = (raw: string) => decodeCassette(JSON.parse(raw))
+
+export const hasCassetteSync = (name: string, options: { readonly directory?: string } = {}) => {
+  const file = cassettePath(name, options.directory)
+  if (!fs.existsSync(file)) return false
+  return Option.isSome(Option.liftThrowable(parseCassette)(fs.readFileSync(file, "utf8")))
+}
--- a/packages/http-recorder/test/fixtures/recordings/record-replay/multi-step.json
+++ b/packages/http-recorder/test/fixtures/recordings/record-replay/multi-step.json
@@ -0,0 +1,39 @@
+{
+  "version": 1,
+  "interactions": [
+    {
+      "request": {
+        "method": "POST",
+        "url": "https://example.test/echo",
+        "headers": {
+          "content-type": "application/json"
+        },
+        "body": "{\"step\":1}"
+      },
+      "response": {
+        "status": 200,
+        "headers": {
+          "content-type": "application/json"
+        },
+        "body": "{\"reply\":\"first\"}"
+      }
+    },
+    {
+      "request": {
+        "method": "POST",
+        "url": "https://example.test/echo",
+        "headers": {
+          "content-type": "application/json"
+        },
+        "body": "{\"step\":2}"
+      },
+      "response": {
+        "status": 200,
+        "headers": {
+          "content-type": "application/json"
+        },
+        "body": "{\"reply\":\"second\"}"
+      }
+    }
+  ]
+}
--- a/packages/http-recorder/test/fixtures/recordings/record-replay/retry.json
+++ b/packages/http-recorder/test/fixtures/recordings/record-replay/retry.json
@@ -0,0 +1,39 @@
+{
+  "version": 1,
+  "interactions": [
+    {
+      "request": {
+        "method": "POST",
+        "url": "https://example.test/poll",
+        "headers": {
+          "content-type": "application/json"
+        },
+        "body": "{\"id\":\"job_1\"}"
+      },
+      "response": {
+        "status": 200,
+        "headers": {
+          "content-type": "application/json"
+        },
+        "body": "{\"status\":\"pending\"}"
+      }
+    },
+    {
+      "request": {
+        "method": "POST",
+        "url": "https://example.test/poll",
+        "headers": {
+          "content-type": "application/json"
+        },
+        "body": "{\"id\":\"job_1\"}"
+      },
+      "response": {
+        "status": 200,
+        "headers": {
+          "content-type": "application/json"
+        },
+        "body": "{\"status\":\"complete\"}"
+      }
+    }
+  ]
+}
--- a/packages/http-recorder/test/record-replay.test.ts
+++ b/packages/http-recorder/test/record-replay.test.ts
@@ -0,0 +1,159 @@
+import { describe, expect, test } from "bun:test"
+import { Cause, Effect, Exit } from "effect"
+import { HttpBody, HttpClient, HttpClientRequest } from "effect/unstable/http"
+import { HttpRecorder } from "../src"
+
+const post = (url: string, body: object) =>
+  Effect.gen(function* () {
+    const http = yield* HttpClient.HttpClient
+    const request = HttpClientRequest.post(url, {
+      headers: { "content-type": "application/json" },
+      body: HttpBody.text(JSON.stringify(body), "application/json"),
+    })
+    const response = yield* http.execute(request)
+    return yield* response.text
+  })
+
+const run = <A, E>(effect: Effect.Effect<A, E, HttpClient.HttpClient>) =>
+  Effect.runPromise(effect.pipe(Effect.provide(HttpRecorder.cassetteLayer("record-replay/multi-step"))))
+
+const runWith = <A, E>(name: string, options: HttpRecorder.RecordReplayOptions, effect: Effect.Effect<A, E, HttpClient.HttpClient>) =>
+  Effect.runPromise(effect.pipe(Effect.provide(HttpRecorder.cassetteLayer(name, options))))
+
+const failureText = (exit: Exit.Exit<unknown, unknown>) => {
+  if (Exit.isSuccess(exit)) return ""
+  return Cause.prettyErrors(exit.cause).join("\n")
+}
+
+describe("http-recorder", () => {
+  test("redacts sensitive URL query parameters", () => {
+    expect(
+      HttpRecorder.redactUrl(
+        "https://example.test/path?key=secret-google-key&api_key=secret-openai-key&safe=value&X-Amz-Signature=secret-signature",
+      ),
+    ).toBe(
+      "https://example.test/path?key=%5BREDACTED%5D&api_key=%5BREDACTED%5D&safe=value&X-Amz-Signature=%5BREDACTED%5D",
+    )
+  })
+
+  test("redacts sensitive headers when allow-listed", () => {
+    expect(
+      HttpRecorder.redactHeaders(
+        {
+          authorization: "Bearer secret-token",
+          "content-type": "application/json",
+          "x-custom-token": "custom-secret",
+          "x-api-key": "secret-key",
+          "x-goog-api-key": "secret-google-key",
+        },
+        ["authorization", "content-type", "x-api-key", "x-goog-api-key", "x-custom-token"],
+        ["x-custom-token"],
+      ),
+    ).toEqual({
+      authorization: "[REDACTED]",
+      "content-type": "application/json",
+      "x-api-key": "[REDACTED]",
+      "x-custom-token": "[REDACTED]",
+      "x-goog-api-key": "[REDACTED]",
+    })
+  })
+
+  test("detects secret-looking values without returning the secret", () => {
+    expect(
+      HttpRecorder.cassetteSecretFindings({
+        version: 1,
+        interactions: [
+          {
+            request: {
+              method: "POST",
+              url: "https://example.test/path?key=sk-123456789012345678901234",
+              headers: {},
+              body: JSON.stringify({ nested: "AIzaSyDHibiBRvJZLsFnPYPoiTwxY4ztQ55yqCE" }),
+            },
+            response: {
+              status: 200,
+              headers: {},
+              body: "Bearer abcdefghijklmnopqrstuvwxyz",
+            },
+          },
+        ],
+      }),
+    ).toEqual([
+      { path: "interactions[0].request.url", reason: "API key" },
+      { path: "interactions[0].request.body", reason: "Google API key" },
+      { path: "interactions[0].response.body", reason: "bearer token" },
+    ])
+  })
+
+  test("detects secret-looking values inside metadata", () => {
+    expect(
+      HttpRecorder.cassetteSecretFindings({
+        version: 1,
+        metadata: { token: "sk-123456789012345678901234" },
+        interactions: [],
+      }),
+    ).toEqual([{ path: "metadata.token", reason: "API key" }])
+  })
+
+  test("default matcher dispatches multi-interaction cassettes by request shape", async () => {
+    await run(
+      Effect.gen(function* () {
+        expect(yield* post("https://example.test/echo", { step: 2 })).toBe('{"reply":"second"}')
+        expect(yield* post("https://example.test/echo", { step: 1 })).toBe('{"reply":"first"}')
+      }),
+    )
+  })
+
+  test("sequential dispatch returns recorded responses in order for identical requests", async () => {
+    await runWith(
+      "record-replay/retry",
+      { dispatch: "sequential" },
+      Effect.gen(function* () {
+        expect(yield* post("https://example.test/poll", { id: "job_1" })).toBe('{"status":"pending"}')
+        expect(yield* post("https://example.test/poll", { id: "job_1" })).toBe('{"status":"complete"}')
+      }),
+    )
+  })
+
+  test("default matcher returns the first match for identical requests", async () => {
+    await runWith(
+      "record-replay/retry",
+      {},
+      Effect.gen(function* () {
+        expect(yield* post("https://example.test/poll", { id: "job_1" })).toBe('{"status":"pending"}')
+        expect(yield* post("https://example.test/poll", { id: "job_1" })).toBe('{"status":"pending"}')
+      }),
+    )
+  })
+
+  test("sequential dispatch reports cursor exhaustion when more requests are made than recorded", async () => {
+    await runWith(
+      "record-replay/multi-step",
+      { dispatch: "sequential" },
+      Effect.gen(function* () {
+        yield* post("https://example.test/echo", { step: 1 })
+        yield* post("https://example.test/echo", { step: 2 })
+        const exit = yield* Effect.exit(post("https://example.test/echo", { step: 3 }))
+        expect(Exit.isFailure(exit)).toBe(true)
+      }),
+    )
+  })
+
+  test("mismatch diagnostics show closest redacted request differences", async () => {
+    await run(
+      Effect.gen(function* () {
+        const exit = yield* Effect.exit(
+          post("https://example.test/echo?api_key=secret-value", { step: 3, token: "sk-123456789012345678901234" }),
+        )
+        const message = failureText(exit)
+        expect(message).toContain("closest interaction: #1")
+        expect(message).toContain("url:")
+        expect(message).toContain("https://example.test/echo?api_key=%5BREDACTED%5D")
+        expect(message).toContain("body:")
+        expect(message).toContain('$.step expected 1, received 3')
+        expect(message).toContain('$.token expected undefined, received "[REDACTED]"')
+        expect(message).not.toContain("sk-123456789012345678901234")
+      }),
+    )
+  })
+})
--- a/packages/http-recorder/tsconfig.json
+++ b/packages/http-recorder/tsconfig.json
@@ -0,0 +1,14 @@
+{
+  "$schema": "https://json.schemastore.org/tsconfig",
+  "extends": "@tsconfig/bun/tsconfig.json",
+  "compilerOptions": {
+    "noUncheckedIndexedAccess": false,
+    "plugins": [
+      {
+        "name": "@effect/language-service",
+        "transform": "@effect/language-service/transform",
+        "namespaceImportPackages": ["effect", "@effect/*"]
+      }
+    ]
+  }
+}
--- a/packages/llm/AGENTS.md
+++ b/packages/llm/AGENTS.md
@@ -0,0 +1,323 @@
+# LLM Package Guide
+
+## Effect
+
+- Prefer `HttpClient.HttpClient` / `HttpClientResponse.HttpClientResponse` over web `fetch` / `Response` at package boundaries.
+- Use `Stream.Stream` for streaming transformations. Avoid ad hoc async generators or manual web reader loops unless an Effect `Stream` API cannot model the behavior.
+- Use Effect Schema codecs for JSON encode/decode (`Schema.fromJsonString(...)`) instead of direct `JSON.parse` / `JSON.stringify` in implementation code.
+- In `Effect.gen`, yield yieldable errors directly (`return yield* new MyError(...)`) instead of `Effect.fail(new MyError(...))`.
+- Use `Effect.void` instead of `Effect.succeed(undefined)` when the successful value is intentionally void.
+
+## Tests
+
+- Use `testEffect(...)` from `test/lib/effect.ts` for tests requiring Effect layers.
+- Keep provider tests fixture-first. Live provider calls must stay behind `RECORD=true` and required API-key checks.
+
+## Architecture
+
+This package is an Effect Schema-first LLM core. The Schema classes in `src/schema.ts` are the canonical runtime data model. Convenience functions in `src/llm.ts` are thin constructors that return those same Schema class instances; they should improve callsites without creating a second model.
+
+### Request Flow
+
+The intended callsite is:
+
+```ts
+const request = LLM.request({
+  model: OpenAIChat.model({ id: "gpt-4o-mini", apiKey }),
+  system: "You are concise.",
+  prompt: "Say hello.",
+})
+
+const response = yield* LLMClient.make({ adapters: [OpenAIChat.adapter] }).generate(request)
+```
+
+`LLM.request(...)` builds an `LLMRequest`. `LLMClient.make(...)` selects an adapter by `request.model.protocol`, applies patches, prepares a typed provider target, asks the adapter for a real `HttpClientRequest.HttpClientRequest`, sends it through `RequestExecutor.Service`, parses the provider stream into common `LLMEvent`s, and finally returns an `LLMResponse`.
+
+Use `LLMClient.make(...).stream(request)` when callers want incremental `LLMEvent`s. Use `LLMClient.make(...).generate(request)` when callers want those same events collected into an `LLMResponse`. Use `LLMClient.make(...).prepare<Target>(request)` to compile a request through the adapter pipeline without sending it — the optional `Target` type argument narrows `.target` to the adapter's native shape (e.g. `prepare<OpenAIChatTarget>(...)` returns a `PreparedRequestOf<OpenAIChatTarget>`). The runtime payload is identical; the generic is a type-level assertion.
+
+Filter or narrow `LLMEvent` streams with `LLMEvent.is.*` (camelCase guards, e.g. `events.filter(LLMEvent.is.toolCall)`). The kebab-case `LLMEvent.guards["tool-call"]` form also works but prefer `is.*` in new code.
+
+### Adapters
+
+An adapter is the registered, runnable composition of four orthogonal pieces:
+
+- **`Protocol`** (`src/protocol.ts`) — semantic API contract. Owns request lowering, target validation, body encoding, and the streaming chunk-to-event state machine. Examples: `OpenAIChat.protocol`, `OpenAIResponses.protocol`, `AnthropicMessages.protocol`, `Gemini.protocol`, `BedrockConverse.protocol`.
+- **`Endpoint`** (`src/endpoint.ts`) — URL construction. Receives the request and the validated target so it can read `model.id`, `model.baseURL`, `model.queryParams`, and any target field that influences the URL (e.g. Bedrock's `modelId` segment). Reach for `Endpoint.baseURL({ default, path })` before hand-rolling a URL.
+- **`Auth`** (`src/auth.ts`) — per-request transport authentication. Adapters read `model.apiKey` at request time via `Auth.bearer` (the `Adapter.fromProtocol` default; sets `Authorization: Bearer <apiKey>`) or `Auth.apiKeyHeader(name)` for providers that use a custom header (Anthropic `x-api-key`, Gemini `x-goog-api-key`). Adapters that need per-request signing (Bedrock SigV4, future Vertex IAM, Azure AAD) implement `Auth` as a function that signs the body and merges signed headers into the result.
+- **`Framing`** (`src/framing.ts`) — bytes → frames. SSE (`Framing.sse`) is shared; Bedrock keeps its AWS event-stream framing as a typed `Framing<object>` value alongside its protocol.
+
+Compose them via `Adapter.fromProtocol(...)`:
+
+```ts
+export const adapter = Adapter.fromProtocol({
+  id: "openai-chat",
+  protocol: OpenAIChat.protocol,
+  endpoint: Endpoint.baseURL({ default: "https://api.openai.com/v1", path: "/chat/completions" }),
+  framing: Framing.sse,
+})
+```
+
+The four-axis decomposition is the reason DeepSeek, TogetherAI, Cerebras, Baseten, Fireworks, and DeepInfra all reuse `OpenAIChat.protocol` verbatim — each provider deployment is a 5-15 line `Adapter.fromProtocol(...)` call instead of a 300-400 line adapter clone. Bug fixes in one protocol propagate to every consumer of that protocol in a single commit.
+
+Reach for the lower-level `Adapter.unsafe(...)` only when an adapter genuinely cannot fit the four-axis model. The name signals that you're escaping the safe abstraction; new adapters should always start with `Adapter.fromProtocol(...)` and prove they need otherwise.
+
+When a provider ships a non-HTTP transport (OpenAI's WebSocket-based Codex backend, hypothetical bidirectional streaming APIs), the seam is `Framing` plus a parallel `Endpoint` / `Auth` interpretation — not a fork of the adapter contract.
+
+### Folder layout
+
+```
+packages/llm/src/
+  schema.ts             // LLMRequest, LLMEvent, errors — canonical Schema model
+  llm.ts                // request constructors and convenience helpers
+  adapter.ts            // Adapter.fromProtocol + LLMClient.make
+  executor.ts           // RequestExecutor service + transport error mapping
+  patch.ts              // Patch system (request/prompt/tool-schema/target/stream)
+
+  protocol.ts           // Protocol type + Protocol.define
+  endpoint.ts           // Endpoint type + Endpoint.baseURL
+  auth.ts               // Auth type + Auth.bearer / Auth.apiKeyHeader / Auth.passthrough
+  framing.ts            // Framing type + Framing.sse
+
+  provider/
+    shared.ts           // ProviderShared toolkit used inside protocol impls
+    patch.ts            // ProviderPatch helpers (defaults, capability gates)
+    openai-chat.ts      // protocol + adapter (compose OpenAIChat.protocol)
+    openai-responses.ts
+    anthropic-messages.ts
+    gemini.ts
+    bedrock-converse.ts
+    openai-compatible-chat.ts  // adapter that reuses OpenAIChat.protocol
+    openai-compatible-family.ts // family lookups (deepseek, togetherai, ...)
+    azure.ts / amazon-bedrock.ts / google.ts / ...  // ProviderResolver entries
+
+  provider-resolver.ts  // OpenCode-bridge resolver layer
+  tool.ts               // typed tool() helper
+  tool-runtime.ts       // ToolRuntime.run with full tool-loop type safety
+```
+
+The dependency arrow points down: `provider/*.ts` files import `protocol`, `endpoint`, `auth`, `framing` and never the other direction. Lower-level modules know nothing about specific providers.
+
+### Shared adapter helpers
+
+`ProviderShared` exports a small toolkit used inside protocol implementations to keep them focused on provider-native shapes:
+
+- `framed({ adapter, response, readError, framing, decodeChunk, initial, process, onHalt? })` — the canonical streaming pipeline used by `Adapter.fromProtocol(...)`. You rarely call this directly anymore.
+- `sseFraming` — the SSE-specific framing step. Already wired through `Framing.sse`; reach for it directly only when wrapping or composing.
+- `joinText(parts)` — joins an array of `TextPart` (or anything with a `.text`) with newlines. Use this anywhere a protocol flattens text content into a single string for a provider field.
+- `parseToolInput(adapter, name, raw)` — Schema-decodes a tool-call argument string with the canonical "Invalid JSON input for `<adapter>` tool call `<name>`" error message. Treats empty input as `{}`. Use this in `finishToolCall` / `finalizeToolCalls`; do not roll a fresh `parseJson` callsite.
+- `parseJson(adapter, raw, message)` — generic JSON-via-Schema decode for non-tool payloads.
+- `chunkError(adapter, message, ...)` — typed `ProviderChunkError` constructor for stream-time failures.
+- `validateWith(decoder)` — lifts a Schema decode effect into the protocol's `validate` shape, mapping parse errors to `InvalidRequestError`.
+- `codecs({ adapter, draft, target, chunk, chunkErrorMessage })` — the encode/decode bundle each protocol needs (request body encode, draft → target validate, chunk decode).
+
+If you find yourself copying a 3-to-5-line snippet between two protocols, lift it into `ProviderShared` next to these helpers rather than duplicating.
+
+### Patches
+
+Patches are the forcing function for provider/model quirks. If a behavior is not universal enough for common IR, keep it as a named patch with a trace entry. Good examples:
+
+- OpenAI Chat streaming usage: `target.openai-chat.include-usage` adds `stream_options.include_usage`.
+- Anthropic prompt caching: map common cache hints onto selected content/message blocks.
+- Mistral/OpenAI-compatible prompt cleanup: normalize empty text content or tool-call IDs only for affected models.
+- Reasoning models: map common reasoning intent to provider-specific effort, summary, or encrypted-content fields.
+
+Do not grow common request schemas just to fit one provider. Prefer adapter-local target schemas plus patches selected by provider/model predicates.
+
+### Tools
+
+Tool loops are represented in common messages and events:
+
+```ts
+const call = LLM.toolCall({ id: "call_1", name: "lookup", input: { query: "weather" } })
+const result = LLM.toolMessage({ id: "call_1", name: "lookup", result: { forecast: "sunny" } })
+
+const followUp = LLM.request({
+  model,
+  messages: [LLM.user("Weather?"), LLM.assistant([call]), result],
+})
+```
+
+Adapters lower this into provider-native assistant tool-call messages and tool-result messages. Streaming providers should emit `tool-input-delta` events while arguments arrive, then a final `tool-call` event with parsed input.
+
+### Tool runtime
+
+`ToolRuntime.run(client, options)` orchestrates the tool loop with full type safety:
+
+```ts
+const get_weather = tool({
+  description: "Get current weather for a city",
+  parameters: Schema.Struct({ city: Schema.String }),
+  success: Schema.Struct({ temperature: Schema.Number, condition: Schema.String }),
+  execute: ({ city }) =>
+    Effect.gen(function* () {
+      // city: string  — typed from parameters Schema
+      const data = yield* WeatherApi.fetch(city)
+      return { temperature: data.temp, condition: data.cond }
+      // return type checked against success Schema
+    }),
+})
+
+const events = yield* ToolRuntime.run(client, {
+  request,
+  tools: { get_weather, get_time, ... },
+  maxSteps: 10,
+  stopWhen: (state) => false,
+}).pipe(Stream.runCollect)
+```
+
+The runtime:
+
+- Adds tool definitions (derived from each tool's `parameters` Schema via `Schema.toJsonSchemaDocument`) onto `request.tools`.
+- Streams the model.
+- On `tool-call`: looks up the named tool, decodes input against `parameters` Schema, dispatches to the typed `execute`, encodes the result against `success` Schema, emits `tool-result`.
+- Loops when the step finishes with `tool-calls`, appending the assistant + tool messages.
+- Stops on a non-`tool-calls` finish, when `maxSteps` is reached, or when `stopWhen` returns `true`.
+
+Handler dependencies (services, permissions, plugin hooks, abort handling) are closed over by the consumer at tool-construction time. The runtime's only environment requirement is `RequestExecutor.Service`. Build the tools record inside an `Effect.gen` once and reuse it across many runs:
+
+```ts
+const tools = Effect.gen(function* () {
+  const fs = yield* FileSystem
+  const permission = yield* Permission
+  return {
+    read_file: tool({
+      ...
+      execute: ({ path }) =>
+        Effect.gen(function* () {
+          yield* permission.ask({ tool: "read_file", path })
+          return { content: yield* fs.readFile(path) }
+        }),
+    }),
+  }
+})
+```
+
+Errors must be expressed as `ToolFailure`. The runtime catches it and emits a `tool-error` event, then a `tool-result` of `type: "error"`, so the model can self-correct on the next step. Anything that is not a `ToolFailure` is treated as a defect and fails the stream. Three recoverable error paths produce `tool-error` events:
+
+- The model called an unknown tool name.
+- Input failed the `parameters` Schema.
+- The handler returned a `ToolFailure`.
+
+Provider-defined / hosted tools (e.g. Anthropic `web_search` / `code_execution` / `web_fetch`, OpenAI Responses `web_search_call` / `file_search_call` / `code_interpreter_call` / `mcp_call` / `local_shell_call` / `image_generation_call` / `computer_use_call`) pass through the runtime untouched:
+
+- Adapters surface the model's call as a `tool-call` event with `providerExecuted: true`, and the provider's result as a matching `tool-result` event with `providerExecuted: true`.
+- The runtime detects `providerExecuted` on `tool-call` and **skips client dispatch** — no handler is invoked and no `tool-error` is raised for "unknown tool". The provider already executed it.
+- Both events are appended to the assistant message in `assistantContent` so the next round's history carries the call + result for context. Anthropic encodes them back as `server_tool_use` + `web_search_tool_result` (or `code_execution_tool_result` / `web_fetch_tool_result`) blocks; OpenAI Responses callers typically use `previous_response_id` instead of resending hosted-tool items.
+
+Add provider-defined tools to `request.tools` (no runtime entry needed). The matching adapter must know how to lower the tool definition into the provider-native shape; right now Anthropic accepts `web_search` / `code_execution` / `web_fetch` and OpenAI Responses accepts the hosted tool names listed above.
+
+### Recording Tests
+
+Recorded tests use one cassette file per scenario. A cassette holds an ordered array of `{ request, response }` interactions, so multi-step flows (tool loops, retries, polling) record into a single file. Use `recordedTests({ prefix, requires })` and let the helper derive cassette names from test names:
+
+```ts
+const recorded = recordedTests({ prefix: "openai-chat", requires: ["OPENAI_API_KEY"] })
+
+recorded.effect("streams text", () =>
+  Effect.gen(function* () {
+    // test body
+  }),
+)
+```
+
+Replay is the default. `RECORD=true` records fresh cassettes and requires the listed env vars. Cassettes are written as pretty-printed JSON so multi-interaction diffs stay reviewable.
+
+Pass `provider`, `protocol`, and optional `tags` to `recordedTests(...)` / `recorded.effect.with(...)` so cassettes carry searchable metadata. Use recorded-test filters to replay or record a narrow subset without rewriting a whole file:
+
+- `RECORDED_PROVIDER=openai` matches tests tagged with `provider:openai`; comma-separated values are allowed.
+- `RECORDED_TAGS=tool` requires all listed tags to be present, e.g. `RECORDED_TAGS=provider:togetherai,tool`.
+- `RECORDED_TEST="streams text"` matches by test name, kebab-case test id, or cassette path.
+
+Filters apply in replay and record mode. Combine them with `RECORD=true` when refreshing only one provider or scenario.
+
+**Binary response bodies.** Most providers stream text (SSE, JSON). AWS Bedrock streams binary AWS event-stream frames whose CRC32 fields would be mangled by a UTF-8 round-trip — those bodies are stored as base64 with `bodyEncoding: "base64"` on the response snapshot. Detection is by `Content-Type` in `@opencode-ai/http-recorder` (currently `application/vnd.amazon.eventstream` and `application/octet-stream`); cassettes for SSE/JSON adapters omit the field and decode as text.
+
+**Matching strategies.** Replay defaults to structural matching, which finds an interaction by comparing method, URL, allow-listed headers, and the canonical JSON body. This is the right choice for tool loops because each round's request differs (the message history grows). For scenarios where successive requests are byte-identical and expect different responses (retries, polling), pass `dispatch: "sequential"` in `RecordReplayOptions` — replay then walks the cassette in record order via an internal cursor. `scriptedResponses` (in `test/lib/http.ts`) is the deterministic counterpart for tests that don't need a live provider; it scripts response bodies in order without reading from disk.
+
+Do not blanket re-record an entire test file when adding one cassette. `RECORD=true` rewrites every recorded case that runs, and provider streams contain volatile IDs, timestamps, fingerprints, and obfuscation fields. Prefer deleting the one cassette you intend to refresh, or run a focused test pattern that only registers the scenario you want to record. Keep stable existing cassettes unchanged unless their request shape or expected behavior changed.
+
+## TODO
+
+### Completed Foundation
+
+- [x] Add an adapter registry so `LLMClient.make(...)` can choose an adapter by provider/protocol instead of requiring a single adapter.
+- [x] Add request/response convenience helpers where callsites still expose schema internals, but keep constructors returning canonical Schema class instances.
+- [x] Expand OpenAI Chat support for assistant tool-call messages followed by tool-result messages.
+- [x] Add OpenAI Chat recorded tests for tool-result follow-up and usage chunks.
+- [x] Add deterministic fixture tests for unsupported content paths, including media in user messages and unsupported assistant content.
+- [x] Add provider patch examples from real opencode quirks, starting with prompt normalization and target-level provider options.
+- [x] Add an OpenAI Responses adapter once the Chat adapter shape feels stable.
+- [x] Add Anthropic Messages adapter coverage after Responses, especially content block mapping, tool use/result mapping, and cache hints.
+- [x] Add Gemini adapter coverage for text, media input, tool calls, reasoning deltas, finish reasons, usage, and recorded cassettes.
+- [x] Extract or port OpenCode's `ProviderTransform.schema` Gemini sanitizer into a tested `packages/llm` tool-schema patch; do not keep a divergent adapter-local copy long term.
+
+### Provider Coverage
+
+- [x] Add a generic OpenAI-compatible Chat adapter for non-OpenAI providers that expose `/chat/completions`.
+- [x] Keep OpenAI Responses as a separate first-class protocol for providers that actually implement `/responses`; do not treat generic OpenAI-compatible providers as Responses-capable by default.
+- [x] Cover OpenAI-compatible provider families that can share the generic adapter first: DeepSeek, TogetherAI, Cerebras, Baseten, Fireworks, DeepInfra, and similar providers.
+- [ ] Decide which providers need thin dedicated wrappers over OpenAI-compatible Chat because they have custom parsing/options: Mistral, Groq, xAI, Perplexity, and Cohere.
+- [x] Add Bedrock Converse support: wire format (messages / system / inferenceConfig / toolConfig), AWS event stream binary framing via `@smithy/eventstream-codec`, SigV4 signing via `aws4fetch` (or Bearer API key path), text/reasoning/tool/usage/finish decoding, cache hints, image/document content, deterministic tests, and recorded basic text/tool cassettes. Additional model-specific fields are still TODO.
+- [ ] Decide Vertex shape after Bedrock/OpenAI-compatible are stable: Vertex Gemini as Gemini target/http patch vs adapter, and Vertex Anthropic as Anthropic target/http patch vs adapter.
+- [ ] Add Gateway/OpenRouter-style routing support only after the generic OpenAI-compatible adapter and provider option patch model are stable.
+
+### OpenCode Parity Patches
+
+- [ ] Port Anthropic tool-use ordering into a prompt patch.
+- [ ] Finish Mistral/OpenAI-compatible cleanup patches, including message sequence repair after tool messages.
+- [ ] Port DeepSeek reasoning handling and interleaved reasoning field mapping.
+- [ ] Add unsupported attachment fallback patches keyed by model capabilities.
+- [ ] Add cache hint patches for Anthropic, OpenRouter, Bedrock, OpenAI-compatible, Copilot, and Alibaba-style providers.
+- [ ] Add provider option namespacing patches for Gateway, OpenRouter, Azure, OpenAI-compatible wrappers, and other provider-specific option bags.
+- [ ] Add model-specific reasoning option patches for providers that need effort, summary, or native reasoning fields.
+- [ ] Add provider-specific metadata extraction patches only where OpenCode needs returned reasoning, citations, usage details, or provider-native fields.
+
+### OpenCode Bridge
+
+- [x] Build a `Provider.Model` -> `LLM.ModelRef` bridge for OpenCode, including protocol selection, base URLs, headers, limits, capabilities, native provider metadata, and OpenAI-compatible provider family detection.
+- [x] Build a pure `session.llm` -> `LLM.request(...)` bridge for system prompts, message history, tool definitions, tool choice, generation options, reasoning variants, cache hints, and attachments.
+- [x] Add a typed `ToolRuntime` that drives the tool loop with Schema-typed parameters/success per tool, single-`ToolFailure` error channel, and `maxSteps`/`stopWhen` controls.
+- [x] Provider-defined tool pass-through: `providerExecuted` flag on `tool-call`/`tool-result` events; Anthropic `server_tool_use` / `web_search_tool_result` / `code_execution_tool_result` / `web_fetch_tool_result` round-trip; OpenAI Responses hosted-tool items decoded as `tool-call` + `tool-result` pairs; runtime skips client dispatch when `providerExecuted: true`.
+- [ ] Keep auth and deployment concerns in the OpenCode bridge where possible: Bedrock credentials/region/profile, Vertex project/location/token, Azure deployment/API version, and Gateway/OpenRouter routing headers.
+- [ ] Keep initial OpenCode integration behind a local flag/path until request payload parity and stream event parity are proven against the existing `session/llm.test.ts` cases.
+
+### Native OpenCode Rollout
+
+- [x] Add a native event bridge that maps `LLMEvent` streams into the existing `SessionProcessor` event contract without creating a second processor.
+- [ ] Extract runtime-neutral OpenCode tool resolution from `SessionPrompt.resolveTools`, then build both existing-stream and native `@opencode-ai/llm` tool adapters from the same resolved shape.
+- [ ] Map `Permission.RejectedError`, `Permission.CorrectedError`, validation failures, thrown tool failures, and aborts into model-visible native tool error/results.
+- [ ] Wire a native stream producer behind an explicit local flag and provider allowlist; the producer should consume `nativeMessages`, call `LLMNative.request(...)`, stream through `LLMClient.make(...)`, and feed `LLMNativeEvents.mapper()` into `SessionProcessor`.
+- [ ] Add end-to-end native stream tests through the actual session loop for text, reasoning, tool-call streaming, tool success, rejected permission, corrected permission, thrown tool error, abort, and provider-executed tool history.
+- [ ] Dogfood native streaming with the flag enabled for OpenAI first, then Anthropic, Gemini, OpenAI-compatible providers, Bedrock, and Copilot provider-by-provider.
+- [ ] Flip native streaming to default only after request parity, stream parity, tool execution, typecheck, focused provider tests, recorded cassettes, and manual dogfood pass for the enabled provider set.
+- [ ] Keep the existing stream path as an opt-out fallback during soak; remove it only after native default has proven stable.
+
+### Test And Recording Gaps
+
+- [x] Harden the generic HTTP recorder before adding more live cassettes: secret scanning before writes, sensitive header/query redaction, response/body secret scanning, and clear failure messages that identify the unsafe field without printing the secret.
+- [x] Refactor the recorder toward extractable library boundaries: core HTTP cassette schema/matching/redaction/diffing should stay LLM-agnostic; LLM tests should supply metadata and semantic assertions from a thin wrapper.
+- [x] Add cassette metadata support: recorder schema version, recorded timestamp, scenario name, tags, and caller-provided subject metadata such as provider/protocol/model/capabilities without making the core recorder depend on LLM concepts.
+- [x] Improve replay mismatch diagnostics: show method/URL/header/body diffs and closest recorded interaction while keeping secrets redacted. Unused-interaction reporting is still TODO if a test needs it.
+- [ ] Add semantic replay assertions for LLM cassettes: replay raw HTTP, parse provider streams, and compare normalized `LLMEvent[]` or `LLMResponse` snapshots in addition to request matching.
+- [ ] Add stream chunk-boundary fuzzing for text/SSE cassettes so parser tests prove correctness independent of provider chunk boundaries.
+- [ ] Keep deterministic coverage for malformed chunks and tool arguments that arrive in the first chunk unless a live provider reliably produces those shapes.
+- [x] Cover provider-error and HTTP-status sad paths with deterministic fixtures across adapters (Anthropic mid-stream + 4xx; OpenAI Responses mid-stream + 4xx; OpenAI Chat 4xx). Live recordings of provider errors are still TODO when stable cassettes can be captured.
+- [x] Improve cassette ergonomics for multi-interaction flows: pretty-printed JSON for diff-friendly cassettes, explicit sequential dispatch, and a recorded tool-loop scaffold (`openai-chat-tool-loop.recorded.test.ts`).
+- [x] Mirror OpenCode request-body parity tests through the new LLM path for OpenAI Responses, Anthropic Messages, Gemini, OpenAI-compatible Chat, and Bedrock once supported.
+- [x] Add adapter parity fixtures for generic OpenAI-compatible Chat before adding provider-specific wrappers.
+
+### Recorded Cassette Backlog
+
+- [x] DeepSeek OpenAI-compatible Chat basic streaming text.
+- [ ] DeepSeek OpenAI-compatible Chat tool call and tool-result follow-up.
+- [ ] DeepSeek reasoning output, including any interleaved reasoning fields the live API emits.
+- [x] TogetherAI OpenAI-compatible Chat basic streaming text and tool-call flow.
+- [ ] Cerebras OpenAI-compatible Chat basic streaming text and tool-call flow.
+- [ ] Baseten OpenAI-compatible Chat basic streaming text and deployed-model request shape.
+- [ ] Fireworks OpenAI-compatible Chat basic streaming text and tool-call flow.
+- [ ] DeepInfra OpenAI-compatible Chat basic streaming text and tool-call flow.
+- [ ] Provider-error cassettes for stable, non-secret error bodies where the provider returns deterministic 4xx/5xx payloads.
+- [ ] Mistral, Groq, xAI, Perplexity, and Cohere basic/tool cassettes after deciding whether each stays generic OpenAI-compatible or gets a thin wrapper.
+- [x] Bedrock Converse basic text and tool-call cassettes (recorded against `us.amazon.nova-micro-v1:0` in us-east-1). Cache-hint cassettes still TODO.
+- [ ] Vertex Gemini and Vertex Anthropic basic/tool cassettes after the Vertex adapter/patch shape is decided.
+- [ ] Gateway/OpenRouter routing-header cassettes after routing support lands.
--- a/packages/llm/package.json
+++ b/packages/llm/package.json
@@ -0,0 +1,29 @@
+{
+  "$schema": "https://json.schemastore.org/package.json",
+  "version": "1.14.25",
+  "name": "@opencode-ai/llm",
+  "type": "module",
+  "license": "MIT",
+  "private": true,
+  "scripts": {
+    "test": "bun test --timeout 30000",
+    "typecheck": "tsgo --noEmit"
+  },
+  "exports": {
+    ".": "./src/index.ts",
+    "./*": "./src/*.ts"
+  },
+  "devDependencies": {
+    "@effect/platform-node": "catalog:",
+    "@opencode-ai/http-recorder": "workspace:*",
+    "@tsconfig/bun": "catalog:",
+    "@types/bun": "catalog:",
+    "@typescript/native-preview": "catalog:"
+  },
+  "dependencies": {
+    "@smithy/eventstream-codec": "4.2.14",
+    "@smithy/util-utf8": "4.2.2",
+    "aws4fetch": "1.0.20",
+    "effect": "catalog:"
+  }
+}
--- a/packages/llm/src/adapter.ts
+++ b/packages/llm/src/adapter.ts
@@ -0,0 +1,330 @@
+import { Effect, Stream } from "effect"
+import { HttpClientRequest, type HttpClientResponse } from "effect/unstable/http"
+import type { Auth } from "./auth"
+import { bearer as authBearer } from "./auth"
+import type { Endpoint } from "./endpoint"
+import * as LLM from "./llm"
+import { RequestExecutor } from "./executor"
+import type { AnyPatch, Patch, PatchInput, PatchRegistry } from "./patch"
+import { context, emptyRegistry, plan, registry as makePatchRegistry, target as targetPatch } from "./patch"
+import type { Framing } from "./framing"
+import type { Protocol } from "./protocol"
+import { ProviderShared } from "./provider/shared"
+import type {
+  LLMError,
+  LLMEvent,
+  LLMRequest,
+  ModelRef,
+  PatchTrace,
+  PreparedRequest,
+  PreparedRequestOf,
+  ProtocolID,
+} from "./schema"
+import { LLMResponse, NoAdapterError, PreparedRequest as PreparedRequestSchema } from "./schema"
+
+interface RuntimeAdapter {
+  readonly id: string
+  readonly protocol: ProtocolID
+  readonly patches: ReadonlyArray<Patch<unknown>>
+  readonly redact: (target: unknown) => unknown
+  readonly prepare: (request: LLMRequest) => Effect.Effect<unknown, LLMError>
+  readonly validate: (draft: unknown) => Effect.Effect<unknown, LLMError>
+  readonly toHttp: (target: unknown, context: HttpContext) => Effect.Effect<HttpClientRequest.HttpClientRequest, LLMError>
+  readonly parse: (response: HttpClientResponse.HttpClientResponse) => Stream.Stream<LLMEvent, LLMError>
+}
+
+interface RuntimeAdapterSource {
+  readonly runtime: RuntimeAdapter
+}
+
+export interface HttpContext {
+  readonly request: LLMRequest
+  readonly patchTrace: ReadonlyArray<PatchTrace>
+}
+
+export interface Adapter<Draft, Target> {
+  readonly id: string
+  readonly protocol: ProtocolID
+  readonly patches: ReadonlyArray<Patch<Draft>>
+  readonly redact: (target: Target) => unknown
+  readonly prepare: (request: LLMRequest) => Effect.Effect<Draft, LLMError>
+  readonly validate: (draft: Draft) => Effect.Effect<Target, LLMError>
+  readonly toHttp: (target: Target, context: HttpContext) => Effect.Effect<HttpClientRequest.HttpClientRequest, LLMError>
+  readonly parse: (response: HttpClientResponse.HttpClientResponse) => Stream.Stream<LLMEvent, LLMError>
+}
+
+export interface AdapterInput<Draft, Target> {
+  readonly id: string
+  readonly protocol: ProtocolID
+  readonly patches?: ReadonlyArray<Patch<Draft>>
+  readonly redact: (target: Target) => unknown
+  readonly prepare: (request: LLMRequest) => Effect.Effect<Draft, LLMError>
+  readonly validate: (draft: Draft) => Effect.Effect<Target, LLMError>
+  readonly toHttp: (target: Target, context: HttpContext) => Effect.Effect<HttpClientRequest.HttpClientRequest, LLMError>
+  readonly parse: (response: HttpClientResponse.HttpClientResponse) => Stream.Stream<LLMEvent, LLMError>
+}
+
+export interface AdapterDefinition<Draft, Target> extends Adapter<Draft, Target> {
+  readonly runtime: RuntimeAdapter
+  readonly patch: (id: string, input: PatchInput<Draft>) => Patch<Draft>
+  readonly withPatches: (patches: ReadonlyArray<Patch<Draft>>) => AdapterDefinition<Draft, Target>
+}
+
+export interface LLMClient {
+  /**
+   * Compile a request through the adapter pipeline (patches, prepare, validate,
+   * toHttp) without sending it. Returns the prepared request including the
+   * provider-native target.
+   *
+   * Pass a `Target` type argument to statically expose the adapter's target
+   * shape (e.g. `prepare<OpenAIChatTarget>(...)`) — the runtime payload is
+   * identical, so this is a type-level assertion the caller makes about which
+   * adapter the request will resolve to.
+   */
+  readonly prepare: <Target = unknown>(
+    request: LLMRequest,
+  ) => Effect.Effect<PreparedRequestOf<Target>, LLMError>
+  readonly stream: (request: LLMRequest) => Stream.Stream<LLMEvent, LLMError, RequestExecutor.Service>
+  readonly generate: (request: LLMRequest) => Effect.Effect<LLMResponse, LLMError, RequestExecutor.Service>
+}
+
+export interface ClientOptions {
+  readonly adapters: ReadonlyArray<RuntimeAdapterSource>
+  readonly patches?: PatchRegistry | ReadonlyArray<AnyPatch>
+}
+
+const noAdapter = (model: ModelRef) =>
+  new NoAdapterError({ protocol: model.protocol, provider: model.provider, model: model.id })
+
+const normalizeRegistry = (patches: PatchRegistry | ReadonlyArray<AnyPatch> | undefined): PatchRegistry => {
+  if (!patches) return emptyRegistry
+  if ("request" in patches) return patches
+  return makePatchRegistry(patches)
+}
+
+/**
+ * Lower-level adapter constructor. Reach for this only when the adapter
+ * genuinely cannot fit `fromProtocol`'s four-axis model — for example, an
+ * adapter that needs hand-rolled `toHttp` / `parse` because no `Protocol`,
+ * `Endpoint`, `Auth`, or `Framing` value cleanly captures its behavior.
+ *
+ * Named `unsafe` to signal that you are escaping the safe abstraction; the
+ * canonical path is `Adapter.fromProtocol(...)`. New adapters should start
+ * there and prove they need otherwise before reaching for this.
+ */
+export function unsafe<Draft, Target>(input: AdapterInput<Draft, Target>): AdapterDefinition<Draft, Target> {
+  const build = (patches: ReadonlyArray<Patch<Draft>>): AdapterDefinition<Draft, Target> => ({
+    id: input.id,
+    protocol: input.protocol,
+    patches,
+    get runtime() {
+      // Runtime registry erases adapter draft/target generics after validation.
+      // oxlint-disable-next-line typescript-eslint/no-unsafe-type-assertion
+      return this as unknown as RuntimeAdapter
+    },
+    redact: input.redact,
+    prepare: input.prepare,
+    validate: input.validate,
+    toHttp: input.toHttp,
+    parse: input.parse,
+    patch: (id, patchInput) => targetPatch(`${input.id}.${id}`, patchInput),
+    withPatches: (next) => build([...patches, ...next]),
+  })
+
+  return build(input.patches ?? [])
+}
+
+export interface FromProtocolInput<Draft, Target, Frame, Chunk, State> {
+  /** Adapter id used in registry lookup, error messages, and patch namespaces. */
+  readonly id: string
+  /** Semantic API contract — owns lowering, validation, encoding, and parsing. */
+  readonly protocol: Protocol<Draft, Target, Frame, Chunk, State>
+  /** Where the request is sent. */
+  readonly endpoint: Endpoint<Target>
+  /**
+   * Per-request transport authentication. Defaults to `Auth.bearer`, which
+   * sets `Authorization: Bearer <model.apiKey>` when `model.apiKey` is set
+   * and is a no-op otherwise. Override with `Auth.apiKeyHeader(name)` for
+   * providers that use a custom header (Anthropic, Gemini), or supply a
+   * custom `Auth` for per-request signing (Bedrock SigV4).
+   */
+  readonly auth?: Auth
+  /** Stream framing — bytes -> frames before `protocol.decode`. */
+  readonly framing: Framing<Frame>
+  /** Static / per-request headers added before `auth` runs. */
+  readonly headers?: (input: { readonly request: LLMRequest }) => Record<string, string>
+  /** Provider patches that target this adapter (e.g. include-usage). */
+  readonly patches?: ReadonlyArray<Patch<Draft>>
+  /**
+   * Optional override for the adapter's protocol id. Defaults to
+   * `protocol.id`. Only set when an adapter intentionally registers under a
+   * different protocol than the wire it speaks (today: OpenAI-compatible Chat
+   * uses OpenAI Chat protocol but registers under `openai-compatible-chat`).
+   */
+  readonly protocolId?: ProtocolID
+}
+
+/**
+ * Build an `Adapter` by composing the four orthogonal pieces of a deployment:
+ *
+ * - `Protocol` — what is the API I'm speaking?
+ * - `Endpoint` — where do I send the request?
+ * - `Auth` — how do I authenticate it?
+ * - `Framing` — how do I cut the response stream into protocol frames?
+ *
+ * Plus optional `headers` and `patches` for cross-cutting deployment concerns
+ * (provider version pins, per-deployment quirks).
+ *
+ * This is the canonical adapter constructor. Reach for `unsafe(...)` only
+ * when an adapter genuinely cannot fit the four-axis model.
+ */
+export function fromProtocol<Draft, Target, Frame, Chunk, State>(
+  input: FromProtocolInput<Draft, Target, Frame, Chunk, State>,
+): AdapterDefinition<Draft, Target> {
+  const auth = input.auth ?? authBearer
+  const protocol = input.protocol
+  const buildHeaders = input.headers ?? (() => ({}))
+
+  const toHttp = (target: Target, ctx: HttpContext) =>
+    Effect.gen(function* () {
+      const url = (yield* input.endpoint({ request: ctx.request, target })).toString()
+      const body = protocol.encode(target)
+      const merged = { ...buildHeaders({ request: ctx.request }), ...ctx.request.model.headers }
+      const headers = yield* auth({
+        request: ctx.request,
+        method: "POST",
+        url,
+        body,
+        headers: merged,
+      })
+      return ProviderShared.jsonPost({ url, body, headers })
+    })
+
+  const parse = (response: HttpClientResponse.HttpClientResponse) =>
+    ProviderShared.framed({
+      adapter: input.id,
+      response,
+      readError: protocol.streamReadError,
+      framing: input.framing.frame,
+      decodeChunk: protocol.decode,
+      initial: protocol.initial,
+      process: protocol.process,
+      onHalt: protocol.onHalt,
+    })
+
+  return unsafe({
+    id: input.id,
+    protocol: input.protocolId ?? protocol.id,
+    patches: input.patches,
+    redact: protocol.redact,
+    prepare: protocol.prepare,
+    validate: protocol.validate,
+    toHttp,
+    parse,
+  })
+}
+
+const makeClient = (options: ClientOptions): LLMClient => {
+  const registry = normalizeRegistry(options.patches)
+  const adapters = new Map(
+    options.adapters.map((source) => [source.runtime.protocol, source.runtime] as const),
+  )
+
+  const compile = Effect.fn("LLM.compile")(function* (request: LLMRequest) {
+    const adapter = adapters.get(request.model.protocol)
+    if (!adapter) return yield* noAdapter(request.model)
+
+    const requestPlan = plan({
+      phase: "request",
+      context: context({ request }),
+      patches: registry.request,
+    })
+    const requestAfterRequestPatches = requestPlan.apply(request)
+    const promptPlan = plan({
+      phase: "prompt",
+      context: context({ request: requestAfterRequestPatches }),
+      patches: registry.prompt,
+    })
+    const requestBeforeToolPatches = promptPlan.apply(requestAfterRequestPatches)
+    const toolSchemaPlan = plan({
+      phase: "tool-schema",
+      context: context({ request: requestBeforeToolPatches }),
+      patches: registry.toolSchema,
+    })
+    const patchedRequest =
+      requestBeforeToolPatches.tools.length === 0
+        ? requestBeforeToolPatches
+        : LLM.updateRequest(requestBeforeToolPatches, { tools: requestBeforeToolPatches.tools.map(toolSchemaPlan.apply) })
+    const patchContext = context({ request: patchedRequest })
+    const draft = yield* adapter.prepare(patchedRequest)
+    const targetPlan = plan({
+      phase: "target",
+      context: patchContext,
+      patches: [...adapter.patches, ...registry.target],
+    })
+    const target = yield* adapter.validate(targetPlan.apply(draft))
+    const targetPatchTrace = [
+      ...requestPlan.trace,
+      ...promptPlan.trace,
+      ...(requestBeforeToolPatches.tools.length === 0 ? [] : toolSchemaPlan.trace),
+      ...targetPlan.trace,
+    ]
+    const http = yield* adapter.toHttp(target, { request: patchedRequest, patchTrace: targetPatchTrace })
+
+    return { request: patchedRequest, adapter, target, http, patchTrace: targetPatchTrace }
+  })
+
+  const prepare = Effect.fn("LLM.prepare")(function* (request: LLMRequest) {
+    const compiled = yield* compile(request)
+
+    return new PreparedRequestSchema({
+      id: compiled.request.id ?? "request",
+      adapter: compiled.adapter.id,
+      model: compiled.request.model,
+      target: compiled.target,
+      redactedTarget: compiled.adapter.redact(compiled.target),
+      patchTrace: compiled.patchTrace,
+    })
+  })
+
+  const stream = (request: LLMRequest) =>
+    Stream.unwrap(
+      Effect.gen(function* () {
+        const compiled = yield* compile(request)
+        const executor = yield* RequestExecutor.Service
+        const response = yield* executor.execute(compiled.http)
+        const streamPlan = plan({
+          phase: "stream",
+          context: context({ request: compiled.request }),
+          patches: registry.stream,
+        })
+        const events = compiled.adapter.parse(response)
+        if (streamPlan.patches.length === 0) return events
+        return events.pipe(Stream.map(streamPlan.apply))
+      }),
+    )
+
+  const generate = Effect.fn("LLM.generate")(function* (request: LLMRequest) {
+    return new LLMResponse(
+      yield* stream(request).pipe(
+        Stream.runFold(
+          () => ({ events: [] as LLMEvent[], usage: undefined as LLMResponse["usage"] }),
+          (acc, event) => {
+            acc.events.push(event)
+            if ("usage" in event && event.usage !== undefined) acc.usage = event.usage
+            return acc
+          },
+        ),
+      ),
+    )
+  })
+
+  // The runtime always emits a `PreparedRequest` (target: unknown). Callers
+  // who supply a `Target` type argument assert the shape they expect from
+  // their adapter; the cast hands them a typed view of the same payload.
+  return { prepare: prepare as LLMClient["prepare"], stream, generate }
+}
+
+export const LLMClient = { make: makeClient }
+
+export * as Adapter from "./adapter"
--- a/packages/llm/src/auth.ts
+++ b/packages/llm/src/auth.ts
@@ -0,0 +1,61 @@
+import { Effect } from "effect"
+import type { LLMError, LLMRequest } from "./schema"
+
+/**
+ * Per-request transport authentication.
+ *
+ * Receives the unsigned HTTP request shape (URL, method, body, headers) and
+ * returns the headers to actually send.
+ *
+ * Most adapters use the default `Auth.bearer`, which reads
+ * `request.model.apiKey` and sets `Authorization: Bearer ...`. Providers
+ * that use a different header pick `Auth.apiKeyHeader(name)` (e.g.
+ * Anthropic's `x-api-key`, Gemini's `x-goog-api-key`).
+ *
+ * Adapters that need per-request signing (AWS SigV4, future Vertex IAM,
+ * future Azure AAD) implement `Auth` as a function that hashes the body,
+ * mints a signature, and merges signed headers into the result.
+ */
+export type Auth = (input: AuthInput) => Effect.Effect<Record<string, string>, LLMError>
+
+export interface AuthInput {
+  readonly request: LLMRequest
+  readonly method: "POST" | "GET"
+  readonly url: string
+  readonly body: string
+  readonly headers: Record<string, string>
+}
+
+/**
+ * Auth that returns the headers untouched. Use when authentication is
+ * handled outside the LLM core (e.g. caller supplied `headers.authorization`
+ * directly, or there is genuinely no auth).
+ */
+export const passthrough: Auth = ({ headers }) => Effect.succeed(headers)
+
+/**
+ * Builds an `Auth` that reads `request.model.apiKey` and merges the headers
+ * produced by `from(apiKey)` into the outgoing headers. No-op when
+ * `model.apiKey` is unset, so callers who pre-set their own auth header keep
+ * working. The shared core for `bearer` and `apiKeyHeader`.
+ */
+const fromApiKey = (from: (apiKey: string) => Record<string, string>): Auth => ({ request, headers }) => {
+  const key = request.model.apiKey
+  if (!key) return Effect.succeed(headers)
+  return Effect.succeed({ ...headers, ...from(key) })
+}
+
+/**
+ * `Authorization: Bearer <apiKey>` from `request.model.apiKey`. No-op when
+ * `model.apiKey` is unset. Used by OpenAI, OpenAI Responses, OpenAI-compatible
+ * Chat, and (with Bedrock-specific fallback) Bedrock Converse.
+ */
+export const bearer: Auth = fromApiKey((key) => ({ authorization: `Bearer ${key}` }))
+
+/**
+ * Set a custom header to `request.model.apiKey`. No-op when `model.apiKey`
+ * is unset. Used by Anthropic (`x-api-key`) and Gemini (`x-goog-api-key`).
+ */
+export const apiKeyHeader = (name: string): Auth => fromApiKey((key) => ({ [name]: key }))
+
+export * as Auth from "./auth"
--- a/packages/llm/src/endpoint.ts
+++ b/packages/llm/src/endpoint.ts
@@ -0,0 +1,50 @@
+import { Effect } from "effect"
+import { ProviderShared } from "./provider/shared"
+import type { LLMError, LLMRequest } from "./schema"
+
+/**
+ * URL construction for one adapter.
+ *
+ * `Endpoint` is the deployment-side answer to "where does this request go?"
+ * It receives the `LLMRequest` (so it can read `model.id`, `model.baseURL`,
+ * and `model.queryParams`) and the validated `Target` (so adapters
+ * whose path depends on a target field — e.g. Bedrock's `modelId` segment —
+ * can read it safely after target patches).
+ *
+ * The result is a `URL` object so query-param composition stays correct
+ * regardless of caller-provided baseURL trailing slashes.
+ */
+export type Endpoint<Target> = (input: EndpointInput<Target>) => Effect.Effect<URL, LLMError>
+
+export interface EndpointInput<Target> {
+  readonly request: LLMRequest
+  readonly target: Target
+}
+
+/**
+ * Build a URL from the model's `baseURL` (or a default) plus a path. Appends
+ * `model.queryParams` so adapters that need request-level query params
+ * (Azure `api-version`, etc.) get them for free.
+ *
+ * Both `default` and `path` may be strings or functions of the
+ * `EndpointInput`, for adapters whose URL embeds the model id, region, or
+ * another target field.
+ */
+export const baseURL = <Target>(input: {
+  readonly default?: string | ((input: EndpointInput<Target>) => string)
+  readonly path: string | ((input: EndpointInput<Target>) => string)
+  /** Error message used when neither `model.baseURL` nor `default` is set. */
+  readonly required?: string
+}): Endpoint<Target> => (ctx) =>
+  Effect.gen(function* () {
+    const fallback = typeof input.default === "function" ? input.default(ctx) : input.default
+    const base = ctx.request.model.baseURL ?? fallback
+    if (!base) return yield* ProviderShared.invalidRequest(input.required ?? "Missing baseURL")
+    const path = typeof input.path === "string" ? input.path : input.path(ctx)
+    const url = new URL(`${ProviderShared.trimBaseUrl(base)}${path}`)
+    const params = ctx.request.model.queryParams
+    if (params) for (const [key, value] of Object.entries(params)) url.searchParams.set(key, value)
+    return url
+  })
+
+export * as Endpoint from "./endpoint"
--- a/packages/llm/src/executor.ts
+++ b/packages/llm/src/executor.ts
@@ -0,0 +1,54 @@
+import { Cause, Context, Effect, Layer } from "effect"
+import { FetchHttpClient, HttpClient, HttpClientError, HttpClientRequest, HttpClientResponse } from "effect/unstable/http"
+import { ProviderRequestError, TransportError, type LLMError } from "./schema"
+
+export interface Interface {
+  readonly execute: (
+    request: HttpClientRequest.HttpClientRequest,
+  ) => Effect.Effect<HttpClientResponse.HttpClientResponse, LLMError>
+}
+
+export class Service extends Context.Service<Service, Interface>()("@opencode/LLM/RequestExecutor") {}
+
+const statusError = (response: HttpClientResponse.HttpClientResponse) =>
+  Effect.gen(function* () {
+    if (response.status < 400) return response
+    const body = yield* response.text.pipe(Effect.catch(() => Effect.succeed(undefined)))
+    return yield* new ProviderRequestError({
+      status: response.status,
+      message: `Provider request failed with HTTP ${response.status}`,
+      body,
+    })
+  })
+
+const toHttpError = (error: unknown) => {
+  if (Cause.isTimeoutError(error)) return new TransportError({ message: error.message, reason: "Timeout" })
+  if (!HttpClientError.isHttpClientError(error)) return new TransportError({ message: "HTTP transport failed" })
+  const url = "request" in error ? error.request.url : undefined
+  if (error.reason._tag === "TransportError") {
+    return new TransportError({
+      message: error.reason.description ?? "HTTP transport failed",
+      reason: error.reason._tag,
+      url,
+    })
+  }
+  return new TransportError({
+    message: `HTTP transport failed: ${error.reason._tag}`,
+    reason: error.reason._tag,
+    url,
+  })
+}
+
+export const layer: Layer.Layer<Service, never, HttpClient.HttpClient> = Layer.effect(
+  Service,
+  Effect.gen(function* () {
+    const http = yield* HttpClient.HttpClient
+    return Service.of({
+      execute: (request) => http.execute(request).pipe(Effect.mapError(toHttpError), Effect.flatMap(statusError)),
+    })
+  }),
+)
+
+export const defaultLayer = layer.pipe(Layer.provide(FetchHttpClient.layer))
+
+export * as RequestExecutor from "./executor"
--- a/packages/llm/src/framing.ts
+++ b/packages/llm/src/framing.ts
@@ -0,0 +1,29 @@
+import type { Stream } from "effect"
+import { ProviderShared } from "./provider/shared"
+import type { ProviderChunkError } from "./schema"
+
+/**
+ * Decode a streaming HTTP response body into provider-protocol frames.
+ *
+ * `Framing` is the byte-stream-shaped seam between transport and protocol:
+ *
+ * - SSE (`Framing.sse`) — UTF-8 decode the body, run the SSE channel decoder,
+ *   drop empty / `[DONE]` keep-alives. Each emitted frame is the JSON `data:`
+ *   payload of one event.
+ * - AWS event stream — length-prefixed binary frames with CRC checksums.
+ *   Each emitted frame is one parsed binary event record.
+ *
+ * The frame type is opaque to this layer; the protocol's `decode` step turns
+ * a frame into a typed chunk.
+ */
+export interface Framing<Frame> {
+  readonly id: string
+  readonly frame: (
+    bytes: Stream.Stream<Uint8Array, ProviderChunkError>,
+  ) => Stream.Stream<Frame, ProviderChunkError>
+}
+
+/** Server-Sent Events framing. Used by every JSON-streaming HTTP provider. */
+export const sse: Framing<string> = { id: "sse", frame: ProviderShared.sseFraming }
+
+export * as Framing from "./framing"
--- a/packages/llm/src/index.ts
+++ b/packages/llm/src/index.ts
@@ -0,0 +1,41 @@
+export * from "./adapter"
+export * from "./executor"
+export * from "./patch"
+export * from "./schema"
+export * from "./tool"
+export * from "./tool-runtime"
+
+export { Auth } from "./auth"
+export { Endpoint } from "./endpoint"
+export { Framing } from "./framing"
+export { Protocol } from "./protocol"
+export type { Auth as AuthFn, AuthInput } from "./auth"
+export type { Endpoint as EndpointFn, EndpointInput } from "./endpoint"
+export type { Framing as FramingDef } from "./framing"
+export type { Protocol as ProtocolDef } from "./protocol"
+
+export * as LLM from "./llm"
+export * as ProviderPatch from "./provider/patch"
+export * as Schema from "./schema"
+export type { CapabilitiesInput } from "./llm"
+export type {
+  ProviderAuth,
+  ProviderResolution,
+  ProviderResolveInput,
+  ProviderResolver as ProviderResolverShape,
+} from "./provider-resolver"
+export { AnthropicMessages } from "./provider/anthropic-messages"
+export { AmazonBedrock } from "./provider/amazon-bedrock"
+export { Anthropic } from "./provider/anthropic"
+export { Azure } from "./provider/azure"
+export { BedrockConverse } from "./provider/bedrock-converse"
+export { Gemini } from "./provider/gemini"
+export { Google } from "./provider/google"
+export { GitHubCopilot } from "./provider/github-copilot"
+export { OpenAI } from "./provider/openai"
+export { OpenAIChat } from "./provider/openai-chat"
+export { OpenAICompatibleChat } from "./provider/openai-compatible-chat"
+export { OpenAICompatibleFamily } from "./provider/openai-compatible-family"
+export { OpenAIResponses } from "./provider/openai-responses"
+export { ProviderResolver } from "./provider-resolver"
+export { XAI } from "./provider/xai"
--- a/packages/llm/src/llm.ts
+++ b/packages/llm/src/llm.ts
@@ -0,0 +1,213 @@
+import {
+  GenerationOptions,
+  LLMEvent,
+  LLMRequest,
+  LLMResponse,
+  Message,
+  ModelCapabilities,
+  ModelID,
+  ModelLimits,
+  ModelRef,
+  ProviderID,
+  ToolChoice,
+  ToolDefinition,
+  type ContentPart,
+  type ModelID as ModelIDType,
+  type ProviderID as ProviderIDType,
+  type ReasoningEffort,
+  type SystemPart,
+  type ToolCallPart,
+  type ToolResultPart,
+  type ToolResultValue,
+} from "./schema"
+
+export type CapabilitiesInput = {
+  readonly input?: Partial<ModelCapabilities["input"]>
+  readonly output?: Partial<ModelCapabilities["output"]>
+  readonly tools?: Partial<ModelCapabilities["tools"]>
+  readonly cache?: Partial<ModelCapabilities["cache"]>
+  readonly reasoning?: Partial<Omit<ModelCapabilities["reasoning"], "efforts">> & {
+    readonly efforts?: ReadonlyArray<ReasoningEffort>
+  }
+}
+
+export type ModelInput = Omit<ConstructorParameters<typeof ModelRef>[0], "id" | "provider" | "capabilities" | "limits"> & {
+  readonly id: string | ModelIDType
+  readonly provider: string | ProviderIDType
+  readonly capabilities?: ModelCapabilities | CapabilitiesInput
+  readonly limits?: ModelLimits | ConstructorParameters<typeof ModelLimits>[0]
+}
+
+export type MessageInput = Omit<ConstructorParameters<typeof Message>[0], "content"> & {
+  readonly content: string | ContentPart | ReadonlyArray<ContentPart>
+}
+
+export type ToolChoiceInput =
+  | ToolChoice
+  | ConstructorParameters<typeof ToolChoice>[0]
+  | ToolDefinition
+  | string
+export type ToolChoiceMode = Exclude<ToolChoice["type"], "tool">
+
+export type ToolResultInput = Omit<ToolResultPart, "type" | "result"> & {
+  readonly result: unknown
+  readonly resultType?: ToolResultValue["type"]
+}
+
+export type RequestInput = Omit<
+  ConstructorParameters<typeof LLMRequest>[0],
+  "system" | "messages" | "tools" | "toolChoice" | "generation"
+> & {
+  readonly system?: string | SystemPart | ReadonlyArray<SystemPart>
+  readonly prompt?: string | ContentPart | ReadonlyArray<ContentPart>
+  readonly messages?: ReadonlyArray<Message | MessageInput>
+  readonly tools?: ReadonlyArray<ToolDefinition | ConstructorParameters<typeof ToolDefinition>[0]>
+  readonly toolChoice?: ToolChoiceInput
+  readonly generation?: GenerationOptions | ConstructorParameters<typeof GenerationOptions>[0]
+}
+
+export const capabilities = (input: CapabilitiesInput = {}) =>
+  new ModelCapabilities({
+    input: { text: true, image: false, audio: false, video: false, pdf: false, ...input.input },
+    output: { text: true, reasoning: false, ...input.output },
+    tools: { calls: false, streamingInput: false, providerExecuted: false, ...input.tools },
+    cache: { prompt: false, messageBlocks: false, contentBlocks: false, ...input.cache },
+    reasoning: { efforts: [], summaries: false, encryptedContent: false, ...input.reasoning },
+  })
+
+export const limits = (input: ConstructorParameters<typeof ModelLimits>[0] = {}) => new ModelLimits(input)
+
+export const text = (value: string): ContentPart => ({ type: "text", text: value })
+
+export const system = (value: string): SystemPart => ({ type: "text", text: value })
+
+const contentParts = (input: string | ContentPart | ReadonlyArray<ContentPart>) =>
+  typeof input === "string" ? [text(input)] : Array.isArray(input) ? [...input] : [input]
+
+const systemParts = (input?: string | SystemPart | ReadonlyArray<SystemPart>) => {
+  if (input === undefined) return []
+  return typeof input === "string" ? [system(input)] : Array.isArray(input) ? [...input] : [input]
+}
+
+export const message = (input: Message | MessageInput) => {
+  if (input instanceof Message) return input
+  return new Message({ ...input, content: contentParts(input.content) })
+}
+
+export const user = (content: string | ContentPart | ReadonlyArray<ContentPart>) =>
+  message({ role: "user", content })
+
+export const assistant = (content: string | ContentPart | ReadonlyArray<ContentPart>) =>
+  message({ role: "assistant", content })
+
+export const model = (input: ModelInput) => {
+  const { capabilities: modelCapabilities, limits: modelLimits, ...rest } = input
+  return new ModelRef({
+    ...rest,
+    id: ModelID.make(input.id),
+    provider: ProviderID.make(input.provider),
+    protocol: input.protocol,
+    capabilities: modelCapabilities instanceof ModelCapabilities ? modelCapabilities : capabilities(modelCapabilities),
+    limits: modelLimits instanceof ModelLimits ? modelLimits : limits(modelLimits),
+  })
+}
+
+export const toolDefinition = (input: ToolDefinition | ConstructorParameters<typeof ToolDefinition>[0]) => {
+  if (input instanceof ToolDefinition) return input
+  return new ToolDefinition(input)
+}
+
+export const toolCall = (input: Omit<ToolCallPart, "type">): ToolCallPart => ({ type: "tool-call", ...input })
+
+const isRecord = (value: unknown): value is Record<string, unknown> =>
+  typeof value === "object" && value !== null && !Array.isArray(value)
+
+const isToolResultValue = (value: unknown): value is ToolResultValue =>
+  isRecord(value) && (value.type === "text" || value.type === "json" || value.type === "error") && "value" in value
+
+const toolResultValue = (value: unknown, type: ToolResultValue["type"] = "json"): ToolResultValue => {
+  if (isToolResultValue(value)) return value
+  return { type, value }
+}
+
+export const toolResult = (input: ToolResultInput): ToolResultPart => ({
+  type: "tool-result",
+  id: input.id,
+  name: input.name,
+  result: toolResultValue(input.result, input.resultType),
+  providerExecuted: input.providerExecuted,
+  metadata: input.metadata,
+})
+
+export const toolMessage = (input: ToolResultPart | ToolResultInput) =>
+  message({ role: "tool", content: ["type" in input ? input : toolResult(input)] })
+
+export const toolChoiceName = (name: string) => new ToolChoice({ type: "tool", name })
+
+const isToolChoiceMode = (value: string): value is ToolChoiceMode =>
+  value === "auto" || value === "none" || value === "required"
+
+export const toolChoice = (input: ToolChoiceInput) => {
+  if (input instanceof ToolChoice) return input
+  if (input instanceof ToolDefinition) return new ToolChoice({ type: "tool", name: input.name })
+  if (typeof input === "string") return isToolChoiceMode(input) ? new ToolChoice({ type: input }) : toolChoiceName(input)
+  return new ToolChoice(input)
+}
+
+export const generation = (input: GenerationOptions | ConstructorParameters<typeof GenerationOptions>[0] = {}) => {
+  if (input instanceof GenerationOptions) return input
+  return new GenerationOptions(input)
+}
+
+export const requestInput = (input: LLMRequest): RequestInput => ({
+  id: input.id,
+  model: input.model,
+  system: input.system,
+  messages: input.messages,
+  tools: input.tools,
+  toolChoice: input.toolChoice,
+  generation: input.generation,
+  reasoning: input.reasoning,
+  cache: input.cache,
+  responseFormat: input.responseFormat,
+  metadata: input.metadata,
+  native: input.native,
+})
+
+export const request = (input: RequestInput) => {
+  const { system: requestSystem, prompt, messages, tools, toolChoice: requestToolChoice, generation: requestGeneration, ...rest } = input
+  return new LLMRequest({
+    ...rest,
+    system: systemParts(requestSystem),
+    messages: [...(messages?.map(message) ?? []), ...(prompt === undefined ? [] : [user(prompt)])],
+    tools: tools?.map(toolDefinition) ?? [],
+    toolChoice: requestToolChoice ? toolChoice(requestToolChoice) : undefined,
+    generation: generation(requestGeneration),
+  })
+}
+
+export const updateRequest = (input: LLMRequest, patch: Partial<RequestInput>) =>
+  request({ ...requestInput(input), ...patch })
+
+export const outputText = (response: LLMResponse | { readonly events: ReadonlyArray<LLMEvent> }) =>
+  response.events
+    .filter(LLMEvent.is.textDelta)
+    .map((event) => event.text)
+    .join("")
+
+export const outputUsage = (response: LLMResponse | { readonly events: ReadonlyArray<LLMEvent> }) => {
+  if (response instanceof LLMResponse) return response.usage
+  return response.events.reduce<LLMResponse["usage"]>(
+    (usage, event) => ("usage" in event && event.usage !== undefined ? event.usage : usage),
+    undefined,
+  )
+}
+
+export const outputToolCalls = (response: LLMResponse | { readonly events: ReadonlyArray<LLMEvent> }) =>
+  response.events.filter(LLMEvent.is.toolCall)
+
+export const outputReasoning = (response: LLMResponse | { readonly events: ReadonlyArray<LLMEvent> }) =>
+  response.events
+    .filter(LLMEvent.is.reasoningDelta)
+    .map((event) => event.text)
+    .join("")
--- a/packages/llm/src/patch.ts
+++ b/packages/llm/src/patch.ts
@@ -0,0 +1,159 @@
+import type { LLMEvent, LLMRequest, ModelRef, PatchPhase, ProtocolID, ToolDefinition } from "./schema"
+import { PatchTrace } from "./schema"
+
+export interface PatchContext {
+  readonly request: LLMRequest
+  readonly model: ModelRef
+  readonly protocol: ModelRef["protocol"]
+}
+
+export interface Patch<A> {
+  readonly id: string
+  readonly phase: PatchPhase
+  readonly reason: string
+  readonly order?: number
+  readonly when: (context: PatchContext) => boolean
+  readonly apply: (value: A, context: PatchContext) => A
+}
+
+export interface AnyPatch {
+  readonly id: string
+  readonly phase: PatchPhase
+  readonly reason: string
+  readonly order?: number
+  readonly when: (context: PatchContext) => boolean
+  readonly apply: (value: never, context: PatchContext) => unknown
+}
+
+export interface PatchInput<A> {
+  readonly reason: string
+  readonly order?: number
+  readonly when?: PatchPredicate | ((context: PatchContext) => boolean)
+  readonly apply: (value: A, context: PatchContext) => A
+}
+
+export interface PatchPredicate {
+  (context: PatchContext): boolean
+  readonly and: (...predicates: ReadonlyArray<PatchPredicate>) => PatchPredicate
+  readonly or: (...predicates: ReadonlyArray<PatchPredicate>) => PatchPredicate
+  readonly not: () => PatchPredicate
+}
+
+export interface PatchPlan<A> {
+  readonly phase: PatchPhase
+  readonly patches: ReadonlyArray<Patch<A>>
+  readonly trace: ReadonlyArray<PatchTrace>
+  readonly apply: (value: A) => A
+}
+
+export interface PatchRegistry {
+  readonly request: ReadonlyArray<Patch<LLMRequest>>
+  readonly prompt: ReadonlyArray<Patch<LLMRequest>>
+  readonly toolSchema: ReadonlyArray<Patch<ToolDefinition>>
+  readonly target: ReadonlyArray<Patch<unknown>>
+  readonly stream: ReadonlyArray<Patch<LLMEvent>>
+}
+
+export const emptyRegistry: PatchRegistry = {
+  request: [],
+  prompt: [],
+  toolSchema: [],
+  target: [],
+  stream: [],
+}
+
+export const predicate = (run: (context: PatchContext) => boolean): PatchPredicate => {
+  const self = Object.assign(run, {
+    and: (...predicates: ReadonlyArray<PatchPredicate>) =>
+      predicate((context) => self(context) && predicates.every((item) => item(context))),
+    or: (...predicates: ReadonlyArray<PatchPredicate>) =>
+      predicate((context) => self(context) || predicates.some((item) => item(context))),
+    not: () => predicate((context) => !self(context)),
+  })
+  return self
+}
+
+export const Model = {
+  provider: (provider: string) => predicate((context) => context.model.provider === provider),
+  protocol: (protocol: ProtocolID) => predicate((context) => context.protocol === protocol),
+  id: (id: string) => predicate((context) => context.model.id === id),
+  idIncludes: (value: string) => predicate((context) => context.model.id.toLowerCase().includes(value.toLowerCase())),
+}
+
+export const make = <A>(id: string, phase: PatchPhase, input: PatchInput<A>): Patch<A> => ({
+  id,
+  phase,
+  reason: input.reason,
+  order: input.order,
+  when: input.when ?? (() => true),
+  apply: input.apply,
+})
+
+export const request = (id: string, input: PatchInput<LLMRequest>) => make(`request.${id}`, "request", input)
+
+export const prompt = (id: string, input: PatchInput<LLMRequest>) => make(`prompt.${id}`, "prompt", input)
+
+export const toolSchema = (id: string, input: PatchInput<ToolDefinition>) => make(`schema.${id}`, "tool-schema", input)
+
+export const target = <A>(id: string, input: PatchInput<A>) => make(`target.${id}`, "target", input)
+
+export const stream = (id: string, input: PatchInput<LLMEvent>) => make(`stream.${id}`, "stream", input)
+
+export function registry(patches: ReadonlyArray<AnyPatch>): PatchRegistry {
+  return {
+    request: patches.filter((patch): patch is Patch<LLMRequest> => patch.phase === "request"),
+    prompt: patches.filter((patch): patch is Patch<LLMRequest> => patch.phase === "prompt"),
+    toolSchema: patches.filter((patch): patch is Patch<ToolDefinition> => patch.phase === "tool-schema"),
+    target: patches.filter((patch) => patch.phase === "target") as unknown as ReadonlyArray<Patch<unknown>>,
+    stream: patches.filter((patch): patch is Patch<LLMEvent> => patch.phase === "stream"),
+  }
+}
+
+export function context(input: {
+  readonly request: LLMRequest
+}): PatchContext {
+  return {
+    request: input.request,
+    model: input.request.model,
+    protocol: input.request.model.protocol,
+  }
+}
+
+export function plan<A>(input: {
+  readonly phase: PatchPhase
+  readonly context: PatchContext
+  readonly patches: ReadonlyArray<Patch<A>>
+}): PatchPlan<A> {
+  const patches = input.patches
+    .filter((patch) => patch.phase === input.phase && patch.when(input.context))
+    .toSorted((left, right) => (left.order ?? 0) - (right.order ?? 0) || left.id.localeCompare(right.id))
+
+  return {
+    phase: input.phase,
+    patches,
+    trace: patches.map(
+      (patch) =>
+        new PatchTrace({
+          id: patch.id,
+          phase: patch.phase,
+          reason: patch.reason,
+        }),
+    ),
+    apply: (value) => patches.reduce((next, patch) => patch.apply(next, input.context), value),
+  }
+}
+
+export function mergeRegistries(registries: ReadonlyArray<PatchRegistry>): PatchRegistry {
+  return registries.reduce(
+    (merged, registry) => ({
+      request: [...merged.request, ...registry.request],
+      prompt: [...merged.prompt, ...registry.prompt],
+      toolSchema: [...merged.toolSchema, ...registry.toolSchema],
+      target: [...merged.target, ...registry.target],
+      stream: [...merged.stream, ...registry.stream],
+    }),
+    emptyRegistry,
+  )
+}
+
+export * as Patch from "./patch"
--- a/packages/llm/src/protocol.ts
+++ b/packages/llm/src/protocol.ts
@@ -0,0 +1,72 @@
+import type { Effect } from "effect"
+import type { LLMError, LLMEvent, LLMRequest, ProtocolID, ProviderChunkError } from "./schema"
+
+/**
+ * The semantic API contract of one model server family.
+ *
+ * A `Protocol` owns the parts of an adapter that are intrinsic to "what does
+ * this API look like": how a common `LLMRequest` lowers into a provider-native
+ * shape, how that shape validates and encodes onto the wire, and how the
+ * streaming response decodes back into common `LLMEvent`s.
+ *
+ * Examples:
+ *
+ * - `OpenAIChat.protocol` — chat completions style
+ * - `OpenAIResponses.protocol` — responses API
+ * - `AnthropicMessages.protocol` — messages API with content blocks
+ * - `Gemini.protocol` — generateContent
+ * - `BedrockConverse.protocol` — Converse with binary event-stream framing
+ *
+ * A `Protocol` is **not** a deployment. It does not know which URL, which
+ * headers, or which auth scheme to use. Those are deployment concerns owned
+ * by `Adapter.fromProtocol(...)` along with the chosen `Endpoint`, `Auth`,
+ * and `Framing`. This separation is what lets DeepSeek, TogetherAI, Cerebras,
+ * etc. all reuse `OpenAIChat.protocol` without forking 300 lines per provider.
+ *
+ * The five type parameters reflect the pipeline:
+ *
+ * - `Draft` — provider-native shape *before* target patches.
+ * - `Target` — provider-native shape *after* target patches and Schema
+ *   validation. The body sent to the provider is `encode(target)`.
+ * - `Frame` — one unit of the framed response stream. SSE: a JSON data
+ *   string. AWS event stream: a parsed binary frame.
+ * - `Chunk` — schema-decoded provider chunk produced from one frame.
+ * - `State` — accumulator threaded through `process` to translate chunk
+ *   sequences into `LLMEvent` sequences.
+ */
+export interface Protocol<Draft, Target, Frame, Chunk, State> {
+  /** Stable id matching `ModelRef.protocol` for adapter registry lookup. */
+  readonly id: ProtocolID
+  /** Lower a common request into this protocol's draft shape. */
+  readonly prepare: (request: LLMRequest) => Effect.Effect<Draft, LLMError>
+  /** Validate the post-patch draft against the protocol's target schema. */
+  readonly validate: (draft: Draft) => Effect.Effect<Target, LLMError>
+  /** Serialize the validated target into a request body. */
+  readonly encode: (target: Target) => string
+  /** Produce a redacted copy for `PreparedRequest.redactedTarget`. */
+  readonly redact: (target: Target) => unknown
+  /** Decode one framed response unit into a typed provider chunk. */
+  readonly decode: (frame: Frame) => Effect.Effect<Chunk, ProviderChunkError>
+  /** Initial parser state. Called once per response. */
+  readonly initial: () => State
+  /** Translate one chunk into emitted events plus the next state. */
+  readonly process: (
+    state: State,
+    chunk: Chunk,
+  ) => Effect.Effect<readonly [State, ReadonlyArray<LLMEvent>], ProviderChunkError>
+  /** Optional flush emitted when the framed stream ends. */
+  readonly onHalt?: (state: State) => ReadonlyArray<LLMEvent>
+  /** Error message used when the underlying transport fails mid-stream. */
+  readonly streamReadError: string
+}
+
+/**
+ * Construct a `Protocol` from its parts. Currently a typed identity, but kept
+ * as the public constructor so future cross-cutting concerns (tracing spans,
+ * default redaction, instrumentation) can be added in one place.
+ */
+export const define = <Draft, Target, Frame, Chunk, State>(
+  input: Protocol<Draft, Target, Frame, Chunk, State>,
+): Protocol<Draft, Target, Frame, Chunk, State> => input
+
+export * as Protocol from "./protocol"
--- a/packages/llm/src/provider-resolver.ts
+++ b/packages/llm/src/provider-resolver.ts
@@ -0,0 +1,65 @@
+import { ModelID, ProviderID, type ProtocolID } from "./schema"
+import type { ModelID as ModelIDType, ProviderID as ProviderIDType } from "./schema"
+import type { CapabilitiesInput } from "./llm"
+
+/**
+ * Whether a provider needs an API key at request time. The OpenCode bridge
+ * consults this to decide whether to read `provider.key` and stamp it onto
+ * `model.apiKey`; the adapter's `Auth` axis owns header placement so this
+ * field does not need to distinguish bearer / x-api-key / x-goog-api-key.
+ */
+export type ProviderAuth = "key" | "none"
+
+export interface ProviderResolution {
+  readonly provider: ProviderIDType
+  readonly protocol: ProtocolID
+  readonly baseURL?: string
+  readonly auth: ProviderAuth
+  readonly queryParams?: Record<string, string>
+  readonly capabilities?: CapabilitiesInput
+}
+
+export interface ProviderResolveInput {
+  readonly modelID: ModelIDType
+  readonly providerID: ProviderIDType
+  readonly options: Record<string, unknown>
+}
+
+export interface ProviderResolver {
+  readonly id: ProviderIDType
+  readonly resolve: (input: ProviderResolveInput) => ProviderResolution | undefined
+}
+
+export const make = (
+  provider: string | ProviderIDType,
+  protocol: ProtocolID,
+  options: Partial<Omit<ProviderResolution, "provider" | "protocol">> = {},
+): ProviderResolution => ({
+  provider: ProviderID.make(provider),
+  protocol,
+  ...options,
+  auth: options.auth ?? "key",
+})
+
+export const define = (input: ProviderResolver): ProviderResolver => input
+
+export const fixed = (
+  provider: string | ProviderIDType,
+  protocol: ProtocolID,
+  options: Partial<Omit<ProviderResolution, "provider" | "protocol">> = {},
+): ProviderResolver => {
+  const resolution = make(provider, protocol, options)
+  return define({ id: resolution.provider, resolve: () => resolution })
+}
+
+export const input = (
+  modelID: string | ModelIDType,
+  providerID: string | ProviderIDType,
+  options: Record<string, unknown>,
+): ProviderResolveInput => ({
+  modelID: ModelID.make(modelID),
+  providerID: ProviderID.make(providerID),
+  options,
+})
+
+export * as ProviderResolver from "./provider-resolver"
--- a/packages/llm/src/provider/amazon-bedrock.ts
+++ b/packages/llm/src/provider/amazon-bedrock.ts
@@ -0,0 +1,5 @@
+import { ProviderResolver } from "../provider-resolver"
+
+export const resolver = ProviderResolver.fixed("amazon-bedrock", "bedrock-converse")
+
+export * as AmazonBedrock from "./amazon-bedrock"
--- a/packages/llm/src/provider/anthropic-messages.ts
+++ b/packages/llm/src/provider/anthropic-messages.ts
@@ -0,0 +1,542 @@
+import { Effect, Schema } from "effect"
+import { Adapter } from "../adapter"
+import { Auth } from "../auth"
+import { Endpoint } from "../endpoint"
+import { Framing } from "../framing"
+import { capabilities, model as llmModel, type ModelInput } from "../llm"
+import { Protocol } from "../protocol"
+import {
+  Usage,
+  type CacheHint,
+  type FinishReason,
+  type LLMEvent,
+  type LLMRequest,
+  type ToolCallPart,
+  type ToolDefinition,
+  type ToolResultPart,
+} from "../schema"
+import { ProviderShared } from "./shared"
+
+const ADAPTER = "anthropic-messages"
+
+export type AnthropicMessagesModelInput = Omit<ModelInput, "provider" | "protocol" | "headers"> & {
+  readonly apiKey?: string
+  readonly headers?: Record<string, string>
+}
+
+const AnthropicCacheControl = Schema.Struct({ type: Schema.Literal("ephemeral") })
+
+const AnthropicTextBlock = Schema.Struct({
+  type: Schema.Literal("text"),
+  text: Schema.String,
+  cache_control: Schema.optional(AnthropicCacheControl),
+})
+type AnthropicTextBlock = Schema.Schema.Type<typeof AnthropicTextBlock>
+
+const AnthropicThinkingBlock = Schema.Struct({
+  type: Schema.Literal("thinking"),
+  thinking: Schema.String,
+  signature: Schema.optional(Schema.String),
+  cache_control: Schema.optional(AnthropicCacheControl),
+})
+
+const AnthropicToolUseBlock = Schema.Struct({
+  type: Schema.Literal("tool_use"),
+  id: Schema.String,
+  name: Schema.String,
+  input: Schema.Unknown,
+  cache_control: Schema.optional(AnthropicCacheControl),
+})
+type AnthropicToolUseBlock = Schema.Schema.Type<typeof AnthropicToolUseBlock>
+
+const AnthropicServerToolUseBlock = Schema.Struct({
+  type: Schema.Literal("server_tool_use"),
+  id: Schema.String,
+  name: Schema.String,
+  input: Schema.Unknown,
+  cache_control: Schema.optional(AnthropicCacheControl),
+})
+type AnthropicServerToolUseBlock = Schema.Schema.Type<typeof AnthropicServerToolUseBlock>
+
+// Server tool result blocks: web_search_tool_result, code_execution_tool_result,
+// and web_fetch_tool_result. The provider executes the tool and inlines the
+// structured result into the assistant turn — there is no client tool_result
+// round-trip. We round-trip the structured `content` payload as opaque JSON so
+// the next request can echo it back when continuing the conversation.
+const AnthropicServerToolResultType = Schema.Literals([
+  "web_search_tool_result",
+  "code_execution_tool_result",
+  "web_fetch_tool_result",
+])
+type AnthropicServerToolResultType = Schema.Schema.Type<typeof AnthropicServerToolResultType>
+
+const AnthropicServerToolResultBlock = Schema.Struct({
+  type: AnthropicServerToolResultType,
+  tool_use_id: Schema.String,
+  content: Schema.Unknown,
+  cache_control: Schema.optional(AnthropicCacheControl),
+})
+type AnthropicServerToolResultBlock = Schema.Schema.Type<typeof AnthropicServerToolResultBlock>
+
+const AnthropicToolResultBlock = Schema.Struct({
+  type: Schema.Literal("tool_result"),
+  tool_use_id: Schema.String,
+  content: Schema.String,
+  is_error: Schema.optional(Schema.Boolean),
+  cache_control: Schema.optional(AnthropicCacheControl),
+})
+
+const AnthropicUserBlock = Schema.Union([AnthropicTextBlock, AnthropicToolResultBlock])
+const AnthropicAssistantBlock = Schema.Union([
+  AnthropicTextBlock,
+  AnthropicThinkingBlock,
+  AnthropicToolUseBlock,
+  AnthropicServerToolUseBlock,
+  AnthropicServerToolResultBlock,
+])
+type AnthropicAssistantBlock = Schema.Schema.Type<typeof AnthropicAssistantBlock>
+type AnthropicToolResultBlock = Schema.Schema.Type<typeof AnthropicToolResultBlock>
+
+const AnthropicMessage = Schema.Union([
+  Schema.Struct({ role: Schema.Literal("user"), content: Schema.Array(AnthropicUserBlock) }),
+  Schema.Struct({ role: Schema.Literal("assistant"), content: Schema.Array(AnthropicAssistantBlock) }),
+])
+type AnthropicMessage = Schema.Schema.Type<typeof AnthropicMessage>
+
+const AnthropicTool = Schema.Struct({
+  name: Schema.String,
+  description: Schema.String,
+  input_schema: Schema.Record(Schema.String, Schema.Unknown),
+  cache_control: Schema.optional(AnthropicCacheControl),
+})
+type AnthropicTool = Schema.Schema.Type<typeof AnthropicTool>
+
+const AnthropicToolChoice = Schema.Union([
+  Schema.Struct({ type: Schema.Literals(["auto", "any"]) }),
+  Schema.Struct({ type: Schema.Literal("tool"), name: Schema.String }),
+])
+
+const AnthropicThinking = Schema.Struct({
+  type: Schema.Literal("enabled"),
+  budget_tokens: Schema.Number,
+})
+
+const AnthropicTargetFields = {
+  model: Schema.String,
+  system: Schema.optional(Schema.Array(AnthropicTextBlock)),
+  messages: Schema.Array(AnthropicMessage),
+  tools: Schema.optional(Schema.Array(AnthropicTool)),
+  tool_choice: Schema.optional(AnthropicToolChoice),
+  stream: Schema.Literal(true),
+  max_tokens: Schema.Number,
+  temperature: Schema.optional(Schema.Number),
+  top_p: Schema.optional(Schema.Number),
+  stop_sequences: Schema.optional(Schema.Array(Schema.String)),
+  thinking: Schema.optional(AnthropicThinking),
+}
+const AnthropicMessagesDraft = Schema.Struct(AnthropicTargetFields)
+type AnthropicMessagesDraft = Schema.Schema.Type<typeof AnthropicMessagesDraft>
+const AnthropicMessagesTarget = Schema.Struct(AnthropicTargetFields)
+export type AnthropicMessagesTarget = Schema.Schema.Type<typeof AnthropicMessagesTarget>
+
+const AnthropicUsage = Schema.Struct({
+  input_tokens: Schema.optional(Schema.Number),
+  output_tokens: Schema.optional(Schema.Number),
+  cache_creation_input_tokens: Schema.optional(Schema.NullOr(Schema.Number)),
+  cache_read_input_tokens: Schema.optional(Schema.NullOr(Schema.Number)),
+})
+type AnthropicUsage = Schema.Schema.Type<typeof AnthropicUsage>
+
+const AnthropicStreamBlock = Schema.Struct({
+  type: Schema.String,
+  id: Schema.optional(Schema.String),
+  name: Schema.optional(Schema.String),
+  text: Schema.optional(Schema.String),
+  thinking: Schema.optional(Schema.String),
+  input: Schema.optional(Schema.Unknown),
+  // *_tool_result blocks arrive whole as content_block_start (no streaming
+  // delta) with the structured payload in `content` and the originating
+  // server_tool_use id in `tool_use_id`.
+  tool_use_id: Schema.optional(Schema.String),
+  content: Schema.optional(Schema.Unknown),
+})
+
+const AnthropicStreamDelta = Schema.Struct({
+  type: Schema.optional(Schema.String),
+  text: Schema.optional(Schema.String),
+  thinking: Schema.optional(Schema.String),
+  partial_json: Schema.optional(Schema.String),
+  signature: Schema.optional(Schema.String),
+  stop_reason: Schema.optional(Schema.NullOr(Schema.String)),
+  stop_sequence: Schema.optional(Schema.NullOr(Schema.String)),
+})
+
+const AnthropicChunk = Schema.Struct({
+  type: Schema.String,
+  index: Schema.optional(Schema.Number),
+  message: Schema.optional(Schema.Struct({ usage: Schema.optional(AnthropicUsage) })),
+  content_block: Schema.optional(AnthropicStreamBlock),
+  delta: Schema.optional(AnthropicStreamDelta),
+  usage: Schema.optional(AnthropicUsage),
+  error: Schema.optional(Schema.Struct({ type: Schema.String, message: Schema.String })),
+})
+type AnthropicChunk = Schema.Schema.Type<typeof AnthropicChunk>
+
+interface ToolAccumulator extends ProviderShared.ToolAccumulator {
+  readonly providerExecuted: boolean
+}
+
+interface ParserState {
+  readonly tools: Record<number, ToolAccumulator>
+  readonly usage?: Usage
+}
+
+const { encodeTarget, decodeTarget, decodeChunk } = ProviderShared.codecs({
+  adapter: ADAPTER,
+  draft: AnthropicMessagesDraft,
+  target: AnthropicMessagesTarget,
+  chunk: AnthropicChunk,
+  chunkErrorMessage: "Invalid Anthropic Messages stream chunk",
+})
+
+const invalid = ProviderShared.invalidRequest
+
+
+
+const cacheControl = (cache: CacheHint | undefined) => cache?.type === "ephemeral" ? { type: "ephemeral" as const } : undefined
+
+const lowerTool = (tool: ToolDefinition): AnthropicTool => ({
+  name: tool.name,
+  description: tool.description,
+  input_schema: tool.inputSchema,
+})
+
+const lowerToolChoice = Effect.fn("AnthropicMessages.lowerToolChoice")(function* (
+  toolChoice: NonNullable<LLMRequest["toolChoice"]>,
+) {
+  if (toolChoice.type === "none") return undefined
+  if (toolChoice.type === "required") return { type: "any" as const }
+  if (toolChoice.type !== "tool") return { type: "auto" as const }
+  if (!toolChoice.name) return yield* invalid("Anthropic Messages tool choice requires a tool name")
+  return { type: "tool" as const, name: toolChoice.name }
+})
+
+const lowerToolCall = (part: ToolCallPart): AnthropicToolUseBlock => ({
+  type: "tool_use",
+  id: part.id,
+  name: part.name,
+  input: part.input,
+})
+
+const lowerServerToolCall = (part: ToolCallPart): AnthropicServerToolUseBlock => ({
+  type: "server_tool_use",
+  id: part.id,
+  name: part.name,
+  input: part.input,
+})
+
+// Server tool result blocks are typed by name. Anthropic ships three today;
+// extend this list when new server tools land. The block content is the
+// structured payload returned by the provider, which we round-trip as-is.
+const serverToolResultType = (name: string): AnthropicServerToolResultType | undefined => {
+  if (name === "web_search") return "web_search_tool_result"
+  if (name === "code_execution") return "code_execution_tool_result"
+  if (name === "web_fetch") return "web_fetch_tool_result"
+  return undefined
+}
+
+const lowerServerToolResult = Effect.fn("AnthropicMessages.lowerServerToolResult")(function* (part: ToolResultPart) {
+  const wireType = serverToolResultType(part.name)
+  if (!wireType) return yield* invalid(`Anthropic Messages does not know how to round-trip server tool result for ${part.name}`)
+  return { type: wireType, tool_use_id: part.id, content: part.result.value } satisfies AnthropicServerToolResultBlock
+})
+
+const lowerMessages = Effect.fn("AnthropicMessages.lowerMessages")(function* (request: LLMRequest) {
+  const messages: AnthropicMessage[] = []
+
+  for (const message of request.messages) {
+    if (message.role === "user") {
+      const content: AnthropicTextBlock[] = []
+      for (const part of message.content) {
+        if (part.type !== "text") return yield* invalid(`Anthropic Messages user messages only support text content for now`)
+        content.push({ type: "text", text: part.text, cache_control: cacheControl(part.cache) })
+      }
+      messages.push({ role: "user", content })
+      continue
+    }
+
+    if (message.role === "assistant") {
+      const content: AnthropicAssistantBlock[] = []
+      for (const part of message.content) {
+        if (part.type === "text") {
+          content.push({ type: "text", text: part.text, cache_control: cacheControl(part.cache) })
+          continue
+        }
+        if (part.type === "reasoning") {
+          content.push({ type: "thinking", thinking: part.text, signature: part.encrypted })
+          continue
+        }
+        if (part.type === "tool-call") {
+          content.push(part.providerExecuted ? lowerServerToolCall(part) : lowerToolCall(part))
+          continue
+        }
+        if (part.type === "tool-result" && part.providerExecuted) {
+          content.push(yield* lowerServerToolResult(part))
+          continue
+        }
+        return yield* invalid(`Anthropic Messages assistant messages only support text, reasoning, and tool-call content for now`)
+      }
+      messages.push({ role: "assistant", content })
+      continue
+    }
+
+    const content: AnthropicToolResultBlock[] = []
+    for (const part of message.content) {
+      if (part.type !== "tool-result") return yield* invalid(`Anthropic Messages tool messages only support tool-result content`)
+      content.push({
+        type: "tool_result",
+        tool_use_id: part.id,
+        content: ProviderShared.toolResultText(part),
+        is_error: part.result.type === "error" ? true : undefined,
+      })
+    }
+    messages.push({ role: "user", content })
+  }
+
+  return messages
+})
+
+const thinkingBudget = (request: LLMRequest) => {
+  if (!request.reasoning?.enabled) return undefined
+  if (request.reasoning.effort === "minimal" || request.reasoning.effort === "low") return 1024
+  if (request.reasoning.effort === "high") return 16000
+  if (request.reasoning.effort === "xhigh") return 24576
+  if (request.reasoning.effort === "max") return 32000
+  return 8000
+}
+
+const prepare = Effect.fn("AnthropicMessages.prepare")(function* (request: LLMRequest) {
+  const toolChoice = request.toolChoice ? yield* lowerToolChoice(request.toolChoice) : undefined
+  const budget = thinkingBudget(request)
+  return {
+    model: request.model.id,
+    system: request.system.length === 0
+      ? undefined
+      : request.system.map((part) => ({ type: "text" as const, text: part.text, cache_control: cacheControl(part.cache) })),
+    messages: yield* lowerMessages(request),
+    tools: request.tools.length === 0 || request.toolChoice?.type === "none" ? undefined : request.tools.map(lowerTool),
+    tool_choice: toolChoice,
+    stream: true as const,
+    max_tokens: request.generation.maxTokens ?? request.model.limits.output ?? 4096,
+    temperature: request.generation.temperature,
+    top_p: request.generation.topP,
+    stop_sequences: request.generation.stop,
+    thinking: budget ? { type: "enabled" as const, budget_tokens: budget } : undefined,
+  }
+})
+
+const mapFinishReason = (reason: string | null | undefined): FinishReason => {
+  if (reason === "end_turn" || reason === "stop_sequence" || reason === "pause_turn") return "stop"
+  if (reason === "max_tokens") return "length"
+  if (reason === "tool_use") return "tool-calls"
+  if (reason === "refusal") return "content-filter"
+  return "unknown"
+}
+
+const mapUsage = (usage: AnthropicUsage | undefined): Usage | undefined => {
+  if (!usage) return undefined
+  return new Usage({
+    inputTokens: usage.input_tokens,
+    outputTokens: usage.output_tokens,
+    cacheReadInputTokens: usage.cache_read_input_tokens ?? undefined,
+    cacheWriteInputTokens: usage.cache_creation_input_tokens ?? undefined,
+    totalTokens: ProviderShared.totalTokens(usage.input_tokens, usage.output_tokens, undefined),
+    native: usage,
+  })
+}
+
+// Anthropic emits usage on `message_start` and again on `message_delta` — the
+// final delta carries the authoritative totals. Right-biased merge: each
+// field prefers `right` when defined, falls back to `left`. `totalTokens` is
+// recomputed from the merged input/output to stay consistent.
+const mergeUsage = (left: Usage | undefined, right: Usage | undefined) => {
+  if (!left) return right
+  if (!right) return left
+  const inputTokens = right.inputTokens ?? left.inputTokens
+  const outputTokens = right.outputTokens ?? left.outputTokens
+  return new Usage({
+    inputTokens,
+    outputTokens,
+    cacheReadInputTokens: right.cacheReadInputTokens ?? left.cacheReadInputTokens,
+    cacheWriteInputTokens: right.cacheWriteInputTokens ?? left.cacheWriteInputTokens,
+    totalTokens: ProviderShared.totalTokens(inputTokens, outputTokens, undefined),
+    native: { ...left.native, ...right.native },
+  })
+}
+
+const finishToolCall = (tool: ToolAccumulator | undefined) =>
+  Effect.gen(function* () {
+    if (!tool) return [] as ReadonlyArray<LLMEvent>
+    const input = yield* ProviderShared.parseToolInput(ADAPTER, tool.name, tool.input)
+    const event: LLMEvent = tool.providerExecuted
+      ? { type: "tool-call", id: tool.id, name: tool.name, input, providerExecuted: true }
+      : { type: "tool-call", id: tool.id, name: tool.name, input }
+    return [event]
+  })
+
+// Server tool result blocks come whole in `content_block_start` (no streaming
+// delta sequence). We convert the payload to a `tool-result` event with
+// `providerExecuted: true`. The runtime appends it to the assistant message
+// for round-trip; downstream consumers can inspect `result.value` for the
+// structured payload.
+const SERVER_TOOL_RESULT_NAMES: Record<AnthropicServerToolResultType, string> = {
+  web_search_tool_result: "web_search",
+  code_execution_tool_result: "code_execution",
+  web_fetch_tool_result: "web_fetch",
+}
+
+const isServerToolResultType = (type: string): type is AnthropicServerToolResultType =>
+  type in SERVER_TOOL_RESULT_NAMES
+
+const serverToolResultEvent = (block: NonNullable<AnthropicChunk["content_block"]>): LLMEvent | undefined => {
+  if (!block.type || !isServerToolResultType(block.type)) return undefined
+  const errorPayload =
+    typeof block.content === "object" && block.content !== null && "type" in block.content
+      ? String((block.content as Record<string, unknown>).type)
+      : ""
+  const isError = errorPayload.endsWith("_tool_result_error")
+  return {
+    type: "tool-result",
+    id: block.tool_use_id ?? "",
+    name: SERVER_TOOL_RESULT_NAMES[block.type],
+    result: isError
+      ? { type: "error", value: block.content }
+      : { type: "json", value: block.content },
+    providerExecuted: true,
+  }
+}
+
+const processChunk = (state: ParserState, chunk: AnthropicChunk) =>
+  Effect.gen(function* () {
+    if (chunk.type === "message_start") {
+      const usage = mapUsage(chunk.message?.usage)
+      return [usage ? { ...state, usage: mergeUsage(state.usage, usage) } : state, []] as const
+    }
+
+    if (
+      chunk.type === "content_block_start" &&
+      chunk.index !== undefined &&
+      (chunk.content_block?.type === "tool_use" || chunk.content_block?.type === "server_tool_use")
+    ) {
+      return [{
+        ...state,
+        tools: {
+          ...state.tools,
+          [chunk.index]: {
+            id: chunk.content_block.id ?? String(chunk.index),
+            name: chunk.content_block.name ?? "",
+            input: "",
+            providerExecuted: chunk.content_block.type === "server_tool_use",
+          },
+        },
+      }, []] as const
+    }
+
+    if (chunk.type === "content_block_start" && chunk.content_block?.type === "text" && chunk.content_block.text) {
+      return [state, [{ type: "text-delta", text: chunk.content_block.text }]] as const
+    }
+
+    if (chunk.type === "content_block_start" && chunk.content_block?.type === "thinking" && chunk.content_block.thinking) {
+      return [state, [{ type: "reasoning-delta", text: chunk.content_block.thinking }]] as const
+    }
+
+    if (chunk.type === "content_block_start" && chunk.content_block) {
+      const event = serverToolResultEvent(chunk.content_block)
+      if (event) return [state, [event]] as const
+    }
+
+    if (chunk.type === "content_block_delta" && chunk.delta?.type === "text_delta" && chunk.delta.text) {
+      return [state, [{ type: "text-delta", text: chunk.delta.text }]] as const
+    }
+
+    if (chunk.type === "content_block_delta" && chunk.delta?.type === "thinking_delta" && chunk.delta.thinking) {
+      return [state, [{ type: "reasoning-delta", text: chunk.delta.thinking }]] as const
+    }
+
+    if (chunk.type === "content_block_delta" && chunk.delta?.type === "input_json_delta" && chunk.index !== undefined) {
+      if (!chunk.delta.partial_json) return [state, []] as const
+      const current = state.tools[chunk.index]
+      if (!current) {
+        return yield* ProviderShared.chunkError(ADAPTER, "Anthropic Messages tool argument delta is missing its tool call")
+      }
+      const next = { ...current, input: `${current.input}${chunk.delta.partial_json}` }
+      return [{ ...state, tools: { ...state.tools, [chunk.index]: next } }, [
+        { type: "tool-input-delta" as const, id: next.id, name: next.name, text: chunk.delta.partial_json },
+      ]] as const
+    }
+
+    if (chunk.type === "content_block_stop" && chunk.index !== undefined) {
+      const events = yield* finishToolCall(state.tools[chunk.index])
+      const { [chunk.index]: _, ...tools } = state.tools
+      return [{ ...state, tools }, events] as const
+    }
+
+    if (chunk.type === "message_delta") {
+      const usage = mergeUsage(state.usage, mapUsage(chunk.usage))
+      return [{ ...state, usage }, [{ type: "request-finish" as const, reason: mapFinishReason(chunk.delta?.stop_reason), usage }]] as const
+    }
+
+    if (chunk.type === "error") {
+      return [state, [{ type: "provider-error" as const, message: chunk.error?.message ?? "Anthropic Messages stream error" }]] as const
+    }
+
+    return [state, []] as const
+  })
+
+/**
+ * The Anthropic Messages protocol — request lowering, target validation,
+ * body encoding, and the streaming-chunk state machine. Used by native
+ * Anthropic Cloud and (once registered) Vertex Anthropic / Bedrock-hosted
+ * Anthropic passthrough.
+ */
+export const protocol = Protocol.define<
+  AnthropicMessagesDraft,
+  AnthropicMessagesTarget,
+  string,
+  AnthropicChunk,
+  ParserState
+>({
+  id: "anthropic-messages",
+  prepare,
+  validate: ProviderShared.validateWith(decodeTarget),
+  encode: encodeTarget,
+  redact: (target) => target,
+  decode: decodeChunk,
+  initial: () => ({ tools: {} }),
+  process: processChunk,
+  streamReadError: "Failed to read Anthropic Messages stream",
+})
+
+export const adapter = Adapter.fromProtocol({
+  id: ADAPTER,
+  protocol,
+  endpoint: Endpoint.baseURL({ default: "https://api.anthropic.com/v1", path: "/messages" }),
+  auth: Auth.apiKeyHeader("x-api-key"),
+  framing: Framing.sse,
+  headers: () => ({ "anthropic-version": "2023-06-01" }),
+})
+
+export const model = (input: AnthropicMessagesModelInput) =>
+  llmModel({
+    ...input,
+    provider: "anthropic",
+    protocol: "anthropic-messages",
+    capabilities: input.capabilities ?? capabilities({
+      output: { reasoning: true },
+      tools: { calls: true, streamingInput: true },
+      cache: { prompt: true, contentBlocks: true },
+      reasoning: { efforts: ["low", "medium", "high", "xhigh", "max"], summaries: false, encryptedContent: true },
+    }),
+  })
+
+export * as AnthropicMessages from "./anthropic-messages"
--- a/packages/llm/src/provider/anthropic.ts
+++ b/packages/llm/src/provider/anthropic.ts
@@ -0,0 +1,5 @@
+import { ProviderResolver } from "../provider-resolver"
+
+export const resolver = ProviderResolver.fixed("anthropic", "anthropic-messages")
+
+export * as Anthropic from "./anthropic"
--- a/packages/llm/src/provider/azure.ts
+++ b/packages/llm/src/provider/azure.ts
@@ -0,0 +1,27 @@
+import { ProviderResolver } from "../provider-resolver"
+import { ProviderID } from "../schema"
+
+export const id = ProviderID.make("azure")
+
+const stringOption = (options: Record<string, unknown>, key: string) => {
+  const value = options[key]
+  if (typeof value === "string" && value.trim() !== "") return value
+  return undefined
+}
+
+const baseURL = (options: Record<string, unknown>) => {
+  const resource = stringOption(options, "resourceName")
+  if (!resource) return undefined
+  return `https://${resource}.openai.azure.com/openai/v1`
+}
+
+export const resolver = ProviderResolver.define({
+  id,
+  resolve: (input) =>
+    ProviderResolver.make(id, input.options.useCompletionUrls === true ? "openai-chat" : "openai-responses", {
+      baseURL: baseURL(input.options),
+      queryParams: { "api-version": stringOption(input.options, "apiVersion") ?? "v1" },
+    }),
+})
+
+export * as Azure from "./azure"
--- a/packages/llm/src/provider/bedrock-converse.ts
+++ b/packages/llm/src/provider/bedrock-converse.ts
@@ -0,0 +1,854 @@
+import { EventStreamCodec } from "@smithy/eventstream-codec"
+import { fromUtf8, toUtf8 } from "@smithy/util-utf8"
+import { AwsV4Signer } from "aws4fetch"
+import { Effect, Option, Schema, Stream } from "effect"
+import { Adapter } from "../adapter"
+import { Auth } from "../auth"
+import { Endpoint } from "../endpoint"
+import type { Framing } from "../framing"
+import { capabilities, model as llmModel, type ModelInput } from "../llm"
+import { Protocol } from "../protocol"
+import {
+  Usage,
+  type CacheHint,
+  type FinishReason,
+  type LLMEvent,
+  type LLMRequest,
+  type MediaPart,
+  type ProviderChunkError,
+  type ToolCallPart,
+  type ToolDefinition,
+  type ToolResultPart,
+} from "../schema"
+import { ProviderShared } from "./shared"
+
+const ADAPTER = "bedrock-converse"
+
+/**
+ * AWS credentials for SigV4 signing. Bedrock also supports Bearer API key auth
+ * — pass the key as `model.headers.authorization = "Bearer <key>"` to take that
+ * path instead. STS-vended credentials should be refreshed by the consumer
+ * (rebuild the model) before they expire; the adapter does not refresh.
+ */
+export interface BedrockCredentials {
+  readonly region: string
+  readonly accessKeyId: string
+  readonly secretAccessKey: string
+  readonly sessionToken?: string
+}
+
+export type BedrockConverseModelInput = Omit<ModelInput, "provider" | "protocol" | "headers"> & {
+  /**
+   * Bearer API key (Bedrock's newer API key auth). Sets the `Authorization`
+   * header and bypasses SigV4 signing. Mutually exclusive with `credentials`.
+   */
+  readonly apiKey?: string
+  /**
+   * AWS credentials for SigV4 signing. The adapter signs each request at
+   * `toHttp` time using `aws4fetch`. Mutually exclusive with `apiKey`.
+   */
+  readonly credentials?: BedrockCredentials
+  readonly headers?: Record<string, string>
+}
+
+const BedrockTextBlock = Schema.Struct({
+  text: Schema.String,
+})
+type BedrockTextBlock = Schema.Schema.Type<typeof BedrockTextBlock>
+
+const BedrockToolUseBlock = Schema.Struct({
+  toolUse: Schema.Struct({
+    toolUseId: Schema.String,
+    name: Schema.String,
+    input: Schema.Unknown,
+  }),
+})
+type BedrockToolUseBlock = Schema.Schema.Type<typeof BedrockToolUseBlock>
+
+const BedrockToolResultContentItem = Schema.Union([
+  Schema.Struct({ text: Schema.String }),
+  Schema.Struct({ json: Schema.Unknown }),
+])
+
+const BedrockToolResultBlock = Schema.Struct({
+  toolResult: Schema.Struct({
+    toolUseId: Schema.String,
+    content: Schema.Array(BedrockToolResultContentItem),
+    status: Schema.optional(Schema.Literals(["success", "error"])),
+  }),
+})
+type BedrockToolResultBlock = Schema.Schema.Type<typeof BedrockToolResultBlock>
+
+const BedrockReasoningBlock = Schema.Struct({
+  reasoningContent: Schema.Struct({
+    reasoningText: Schema.optional(
+      Schema.Struct({
+        text: Schema.String,
+        signature: Schema.optional(Schema.String),
+      }),
+    ),
+  }),
+})
+
+// Image block. Bedrock Converse accepts `format` as the file extension and
+// `source.bytes` as a base64 string (binary upload via base64 in the JSON
+// wire format). Supported formats per the Converse docs: png, jpeg, gif, webp.
+const BedrockImageFormat = Schema.Literals(["png", "jpeg", "gif", "webp"])
+type BedrockImageFormat = Schema.Schema.Type<typeof BedrockImageFormat>
+const BedrockImageBlock = Schema.Struct({
+  image: Schema.Struct({
+    format: BedrockImageFormat,
+    source: Schema.Struct({ bytes: Schema.String }),
+  }),
+})
+type BedrockImageBlock = Schema.Schema.Type<typeof BedrockImageBlock>
+
+// Document block. Required `name` is the user-facing filename so the model
+// can reference it. Supported formats per the Converse docs: pdf, csv, doc,
+// docx, xls, xlsx, html, txt, md.
+const BedrockDocumentFormat = Schema.Literals([
+  "pdf",
+  "csv",
+  "doc",
+  "docx",
+  "xls",
+  "xlsx",
+  "html",
+  "txt",
+  "md",
+])
+type BedrockDocumentFormat = Schema.Schema.Type<typeof BedrockDocumentFormat>
+const BedrockDocumentBlock = Schema.Struct({
+  document: Schema.Struct({
+    format: BedrockDocumentFormat,
+    name: Schema.String,
+    source: Schema.Struct({ bytes: Schema.String }),
+  }),
+})
+type BedrockDocumentBlock = Schema.Schema.Type<typeof BedrockDocumentBlock>
+
+// Cache breakpoint marker. Inserted positionally between content blocks (or
+// after a system text / tool spec) to mark the prefix as cacheable. Bedrock
+// Converse currently exposes `default` as the only cache-point type.
+const BedrockCachePointBlock = Schema.Struct({
+  cachePoint: Schema.Struct({ type: Schema.Literal("default") }),
+})
+type BedrockCachePointBlock = Schema.Schema.Type<typeof BedrockCachePointBlock>
+
+const BedrockUserBlock = Schema.Union([
+  BedrockTextBlock,
+  BedrockImageBlock,
+  BedrockDocumentBlock,
+  BedrockToolResultBlock,
+  BedrockCachePointBlock,
+])
+type BedrockUserBlock = Schema.Schema.Type<typeof BedrockUserBlock>
+
+const BedrockAssistantBlock = Schema.Union([
+  BedrockTextBlock,
+  BedrockReasoningBlock,
+  BedrockToolUseBlock,
+  BedrockCachePointBlock,
+])
+type BedrockAssistantBlock = Schema.Schema.Type<typeof BedrockAssistantBlock>
+
+const BedrockMessage = Schema.Union([
+  Schema.Struct({ role: Schema.Literal("user"), content: Schema.Array(BedrockUserBlock) }),
+  Schema.Struct({ role: Schema.Literal("assistant"), content: Schema.Array(BedrockAssistantBlock) }),
+])
+type BedrockMessage = Schema.Schema.Type<typeof BedrockMessage>
+
+const BedrockSystemBlock = Schema.Union([BedrockTextBlock, BedrockCachePointBlock])
+type BedrockSystemBlock = Schema.Schema.Type<typeof BedrockSystemBlock>
+
+const BedrockTool = Schema.Struct({
+  toolSpec: Schema.Struct({
+    name: Schema.String,
+    description: Schema.String,
+    inputSchema: Schema.Struct({
+      json: Schema.Record(Schema.String, Schema.Unknown),
+    }),
+  }),
+})
+type BedrockTool = Schema.Schema.Type<typeof BedrockTool>
+
+const BedrockToolChoice = Schema.Union([
+  Schema.Struct({ auto: Schema.Struct({}) }),
+  Schema.Struct({ any: Schema.Struct({}) }),
+  Schema.Struct({ tool: Schema.Struct({ name: Schema.String }) }),
+])
+
+const BedrockTargetFields = {
+  modelId: Schema.String,
+  messages: Schema.Array(BedrockMessage),
+  system: Schema.optional(Schema.Array(BedrockSystemBlock)),
+  inferenceConfig: Schema.optional(
+    Schema.Struct({
+      maxTokens: Schema.optional(Schema.Number),
+      temperature: Schema.optional(Schema.Number),
+      topP: Schema.optional(Schema.Number),
+      stopSequences: Schema.optional(Schema.Array(Schema.String)),
+    }),
+  ),
+  toolConfig: Schema.optional(
+    Schema.Struct({
+      tools: Schema.Array(BedrockTool),
+      toolChoice: Schema.optional(BedrockToolChoice),
+    }),
+  ),
+  additionalModelRequestFields: Schema.optional(Schema.Record(Schema.String, Schema.Unknown)),
+}
+const BedrockConverseDraft = Schema.Struct(BedrockTargetFields)
+type BedrockConverseDraft = Schema.Schema.Type<typeof BedrockConverseDraft>
+const BedrockConverseTarget = Schema.Struct(BedrockTargetFields)
+export type BedrockConverseTarget = Schema.Schema.Type<typeof BedrockConverseTarget>
+
+const BedrockUsageSchema = Schema.Struct({
+  inputTokens: Schema.optional(Schema.Number),
+  outputTokens: Schema.optional(Schema.Number),
+  totalTokens: Schema.optional(Schema.Number),
+  cacheReadInputTokens: Schema.optional(Schema.Number),
+  cacheWriteInputTokens: Schema.optional(Schema.Number),
+})
+type BedrockUsageSchema = Schema.Schema.Type<typeof BedrockUsageSchema>
+
+// Streaming chunk shape — the AWS event stream wraps each JSON payload by its
+// `:event-type` header (e.g. `messageStart`, `contentBlockDelta`). We
+// reconstruct that wrapping in `decodeFrames` below so the chunk schema can
+// stay a plain discriminated record.
+const BedrockChunk = Schema.Struct({
+  messageStart: Schema.optional(Schema.Struct({ role: Schema.String })),
+  contentBlockStart: Schema.optional(
+    Schema.Struct({
+      contentBlockIndex: Schema.Number,
+      start: Schema.optional(
+        Schema.Struct({
+          toolUse: Schema.optional(
+            Schema.Struct({ toolUseId: Schema.String, name: Schema.String }),
+          ),
+        }),
+      ),
+    }),
+  ),
+  contentBlockDelta: Schema.optional(
+    Schema.Struct({
+      contentBlockIndex: Schema.Number,
+      delta: Schema.optional(
+        Schema.Struct({
+          text: Schema.optional(Schema.String),
+          toolUse: Schema.optional(Schema.Struct({ input: Schema.String })),
+          reasoningContent: Schema.optional(
+            Schema.Struct({
+              text: Schema.optional(Schema.String),
+              signature: Schema.optional(Schema.String),
+            }),
+          ),
+        }),
+      ),
+    }),
+  ),
+  contentBlockStop: Schema.optional(Schema.Struct({ contentBlockIndex: Schema.Number })),
+  messageStop: Schema.optional(
+    Schema.Struct({
+      stopReason: Schema.String,
+      additionalModelResponseFields: Schema.optional(Schema.Unknown),
+    }),
+  ),
+  metadata: Schema.optional(
+    Schema.Struct({
+      usage: Schema.optional(BedrockUsageSchema),
+      metrics: Schema.optional(Schema.Unknown),
+    }),
+  ),
+  internalServerException: Schema.optional(Schema.Struct({ message: Schema.String })),
+  modelStreamErrorException: Schema.optional(Schema.Struct({ message: Schema.String })),
+  validationException: Schema.optional(Schema.Struct({ message: Schema.String })),
+  throttlingException: Schema.optional(Schema.Struct({ message: Schema.String })),
+  serviceUnavailableException: Schema.optional(Schema.Struct({ message: Schema.String })),
+})
+type BedrockChunk = Schema.Schema.Type<typeof BedrockChunk>
+
+// The eventstream codec already gives us a UTF-8 payload that we parse once
+// per frame; we then wrap it under the `:event-type` key and hand the parsed
+// object to `decodeChunkSync`. This keeps a single JSON parse per frame —
+// avoid `Schema.fromJsonString` here which would add an extra decode/encode
+// roundtrip.
+const decodeChunkSync = Schema.decodeUnknownSync(BedrockChunk)
+
+const decodeChunk = (data: unknown) =>
+  Effect.try({
+    try: () => decodeChunkSync(data),
+    catch: () =>
+      ProviderShared.chunkError(
+        ADAPTER,
+        "Invalid Bedrock Converse stream chunk",
+        typeof data === "string" ? data : ProviderShared.encodeJson(data),
+      ),
+  })
+
+const encodeTarget = Schema.encodeSync(Schema.fromJsonString(BedrockConverseTarget))
+const decodeTarget = Schema.decodeUnknownEffect(BedrockConverseDraft.pipe(Schema.decodeTo(BedrockConverseTarget)))
+
+const invalid = ProviderShared.invalidRequest
+
+const region = (request: LLMRequest) => {
+  const fromNative = request.model.native?.aws_region
+  if (typeof fromNative === "string" && fromNative !== "") return fromNative
+  return "us-east-1"
+}
+
+
+
+const lowerTool = (tool: ToolDefinition): BedrockTool => ({
+  toolSpec: {
+    name: tool.name,
+    description: tool.description,
+    inputSchema: { json: tool.inputSchema },
+  },
+})
+
+// Bedrock cache markers are positional — emit a `cachePoint` block right after
+// the content the caller wants treated as a cacheable prefix. Bedrock currently
+// exposes one cache-point type (`default`); both `ephemeral` and `persistent`
+// hints from the common `CacheHint` shape map onto it. Other cache-hint types
+// (none today) would need explicit handling.
+//
+// TODO: Bedrock recently added optional `ttl: "5m" | "1h"` on cachePoint —
+// once we have a recorded cassette to validate the wire shape, map
+// `CacheHint.ttlSeconds` here.
+const CACHE_POINT_DEFAULT: BedrockCachePointBlock = { cachePoint: { type: "default" } }
+
+const cachePointBlock = (cache: CacheHint | undefined): BedrockCachePointBlock | undefined => {
+  if (cache?.type !== "ephemeral" && cache?.type !== "persistent") return undefined
+  return CACHE_POINT_DEFAULT
+}
+
+// Emit a text block followed by an optional positional cache marker. Used by
+// system, user-text, and assistant-text lowering — all three share the same
+// "push text, push cachePoint if cache hint is present" shape. The return type
+// is the lowest common denominator (text | cachePoint) so callers can spread
+// it into any of the three block-union arrays.
+const textWithCache = (
+  text: string,
+  cache: CacheHint | undefined,
+): Array<BedrockTextBlock | BedrockCachePointBlock> => {
+  const cachePoint = cachePointBlock(cache)
+  return cachePoint ? [{ text }, cachePoint] : [{ text }]
+}
+
+// MIME type → Bedrock format mapping. Bedrock distinguishes image vs document
+// by the top-level block type, not the mediaType, so `lowerMedia` routes by
+// the `image/` prefix and the leaf functions look up the format. `image/jpg`
+// is included as a non-standard alias commonly seen in user-supplied data.
+const IMAGE_FORMATS = {
+  "image/png": "png",
+  "image/jpeg": "jpeg",
+  "image/jpg": "jpeg",
+  "image/gif": "gif",
+  "image/webp": "webp",
+} as const satisfies Record<string, BedrockImageFormat>
+
+const DOCUMENT_FORMATS = {
+  "application/pdf": "pdf",
+  "text/csv": "csv",
+  "application/msword": "doc",
+  "application/vnd.openxmlformats-officedocument.wordprocessingml.document": "docx",
+  "application/vnd.ms-excel": "xls",
+  "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet": "xlsx",
+  "text/html": "html",
+  "text/plain": "txt",
+  "text/markdown": "md",
+} as const satisfies Record<string, BedrockDocumentFormat>
+
+// Bedrock document blocks require a name; default to the filename if the
+// caller supplied one, otherwise generate a stable placeholder so the model
+// still sees a valid block.
+const lowerImage = (part: MediaPart, mime: string) => {
+  const format = IMAGE_FORMATS[mime as keyof typeof IMAGE_FORMATS]
+  if (!format) return invalid(`Bedrock Converse does not support image media type ${part.mediaType}`)
+  return Effect.succeed<BedrockImageBlock>({
+    image: { format, source: { bytes: ProviderShared.mediaBytes(part) } },
+  })
+}
+
+const lowerDocument = (part: MediaPart, mime: string) => {
+  const format = DOCUMENT_FORMATS[mime as keyof typeof DOCUMENT_FORMATS]
+  if (!format) return invalid(`Bedrock Converse does not support document media type ${part.mediaType}`)
+  return Effect.succeed<BedrockDocumentBlock>({
+    document: {
+      format,
+      name: part.filename ?? `document.${format}`,
+      source: { bytes: ProviderShared.mediaBytes(part) },
+    },
+  })
+}
+
+const lowerMedia = (part: MediaPart) => {
+  const mime = part.mediaType.toLowerCase()
+  return mime.startsWith("image/") ? lowerImage(part, mime) : lowerDocument(part, mime)
+}
+
+const lowerToolChoice = Effect.fn("BedrockConverse.lowerToolChoice")(function* (
+  toolChoice: NonNullable<LLMRequest["toolChoice"]>,
+) {
+  if (toolChoice.type === "none") return undefined
+  if (toolChoice.type === "required") return { any: {} } as const
+  if (toolChoice.type !== "tool") return { auto: {} } as const
+  if (!toolChoice.name) return yield* invalid("Bedrock Converse tool choice requires a tool name")
+  return { tool: { name: toolChoice.name } } as const
+})
+
+const lowerToolCall = (part: ToolCallPart): BedrockToolUseBlock => ({
+  toolUse: {
+    toolUseId: part.id,
+    name: part.name,
+    input: part.input,
+  },
+})
+
+const lowerToolResult = (part: ToolResultPart): BedrockToolResultBlock => ({
+  toolResult: {
+    toolUseId: part.id,
+    content:
+      part.result.type === "text" || part.result.type === "error"
+        ? [{ text: String(part.result.value) }]
+        : [{ json: part.result.value }],
+    status: part.result.type === "error" ? "error" : "success",
+  },
+})
+
+const lowerMessages = Effect.fn("BedrockConverse.lowerMessages")(function* (request: LLMRequest) {
+  const messages: BedrockMessage[] = []
+
+  for (const message of request.messages) {
+    if (message.role === "user") {
+      const content: BedrockUserBlock[] = []
+      for (const part of message.content) {
+        if (part.type === "text") {
+          content.push(...textWithCache(part.text, part.cache))
+          continue
+        }
+        if (part.type === "media") {
+          content.push(yield* lowerMedia(part))
+          continue
+        }
+        return yield* invalid("Bedrock Converse user messages only support text and media content for now")
+      }
+      messages.push({ role: "user", content })
+      continue
+    }
+
+    if (message.role === "assistant") {
+      const content: BedrockAssistantBlock[] = []
+      for (const part of message.content) {
+        if (part.type === "text") {
+          content.push(...textWithCache(part.text, part.cache))
+          continue
+        }
+        if (part.type === "reasoning") {
+          content.push({
+            reasoningContent: {
+              reasoningText: { text: part.text, signature: part.encrypted },
+            },
+          })
+          continue
+        }
+        if (part.type === "tool-call") {
+          content.push(lowerToolCall(part))
+          continue
+        }
+        return yield* invalid("Bedrock Converse assistant messages only support text, reasoning, and tool-call content for now")
+      }
+      messages.push({ role: "assistant", content })
+      continue
+    }
+
+    const content: BedrockToolResultBlock[] = []
+    for (const part of message.content) {
+      if (part.type !== "tool-result")
+        return yield* invalid("Bedrock Converse tool messages only support tool-result content")
+      content.push(lowerToolResult(part))
+    }
+    messages.push({ role: "user", content })
+  }
+
+  return messages
+})
+
+// System prompts share the cache-point convention: emit the text block, then
+// optionally a positional `cachePoint` marker.
+const lowerSystem = (system: ReadonlyArray<LLMRequest["system"][number]>): BedrockSystemBlock[] =>
+  system.flatMap((part) => textWithCache(part.text, part.cache))
+
+const prepare = Effect.fn("BedrockConverse.prepare")(function* (request: LLMRequest) {
+  const toolChoice = request.toolChoice ? yield* lowerToolChoice(request.toolChoice) : undefined
+  return {
+    modelId: request.model.id,
+    messages: yield* lowerMessages(request),
+    system: request.system.length === 0 ? undefined : lowerSystem(request.system),
+    inferenceConfig:
+      request.generation.maxTokens === undefined &&
+      request.generation.temperature === undefined &&
+      request.generation.topP === undefined &&
+      (request.generation.stop === undefined || request.generation.stop.length === 0)
+        ? undefined
+        : {
+            maxTokens: request.generation.maxTokens,
+            temperature: request.generation.temperature,
+            topP: request.generation.topP,
+            stopSequences: request.generation.stop,
+          },
+    toolConfig:
+      request.tools.length > 0 && request.toolChoice?.type !== "none"
+        ? { tools: request.tools.map(lowerTool), toolChoice }
+        : undefined,
+  }
+})
+
+// Credentials live on `model.native.aws_credentials` so the OpenCode bridge
+// can resolve them via `@aws-sdk/credential-providers` and stuff them in
+// without exposing the auth machinery to the rest of the LLM core. Schema
+// decode keeps this boundary honest — anything that doesn't match the shape
+// is treated as "no credentials".
+const NativeCredentials = Schema.Struct({
+  accessKeyId: Schema.String,
+  secretAccessKey: Schema.String,
+  region: Schema.optional(Schema.String),
+  sessionToken: Schema.optional(Schema.String),
+})
+const decodeNativeCredentials = Schema.decodeUnknownOption(NativeCredentials)
+
+const credentialsFromInput = (request: LLMRequest): BedrockCredentials | undefined =>
+  decodeNativeCredentials(request.model.native?.aws_credentials).pipe(
+    Option.map((creds) => ({ ...creds, region: creds.region ?? region(request) })),
+    Option.getOrUndefined,
+  )
+
+const signRequest = (input: {
+  readonly url: string
+  readonly body: string
+  readonly headers: Record<string, string>
+  readonly credentials: BedrockCredentials
+}) =>
+  Effect.tryPromise({
+    try: async () => {
+      const signed = await new AwsV4Signer({
+        url: input.url,
+        method: "POST",
+        headers: Object.entries(input.headers),
+        body: input.body,
+        region: input.credentials.region,
+        accessKeyId: input.credentials.accessKeyId,
+        secretAccessKey: input.credentials.secretAccessKey,
+        sessionToken: input.credentials.sessionToken,
+        service: "bedrock",
+      }).sign()
+      return Object.fromEntries(signed.headers.entries())
+    },
+    catch: (error) =>
+      invalid(`Bedrock Converse SigV4 signing failed: ${error instanceof Error ? error.message : String(error)}`),
+  })
+
+/**
+ * Bedrock auth. `model.apiKey` (Bedrock's newer Bearer API key auth) wins if
+ * set; otherwise we sign the request with SigV4 using AWS credentials from
+ * `model.native.aws_credentials`. SigV4 must sign the exact bytes that get
+ * sent, so the `content-type: application/json` header is included in the
+ * signing input — `jsonPost` then sets the same value below and the signature
+ * stays valid.
+ */
+const auth: Auth = (input) => {
+  if (input.request.model.apiKey) return Auth.bearer(input)
+  return Effect.gen(function* () {
+    const credentials = credentialsFromInput(input.request)
+    if (!credentials) {
+      return yield* invalid(
+        "Bedrock Converse requires either model.apiKey or AWS credentials in model.native.aws_credentials",
+      )
+    }
+    const headersForSigning = { ...input.headers, "content-type": "application/json" }
+    const signed = yield* signRequest({ url: input.url, body: input.body, headers: headersForSigning, credentials })
+    return { ...headersForSigning, ...signed }
+  })
+}
+
+const mapFinishReason = (reason: string): FinishReason => {
+  if (reason === "end_turn" || reason === "stop_sequence") return "stop"
+  if (reason === "max_tokens") return "length"
+  if (reason === "tool_use") return "tool-calls"
+  if (reason === "content_filtered" || reason === "guardrail_intervened") return "content-filter"
+  return "unknown"
+}
+
+const mapUsage = (usage: BedrockUsageSchema | undefined): Usage | undefined => {
+  if (!usage) return undefined
+  return new Usage({
+    inputTokens: usage.inputTokens,
+    outputTokens: usage.outputTokens,
+    totalTokens: ProviderShared.totalTokens(usage.inputTokens, usage.outputTokens, usage.totalTokens),
+    cacheReadInputTokens: usage.cacheReadInputTokens,
+    cacheWriteInputTokens: usage.cacheWriteInputTokens,
+    native: usage,
+  })
+}
+
+interface ParserState {
+  readonly tools: Record<number, ProviderShared.ToolAccumulator>
+  // Bedrock splits the finish into `messageStop` (carries `stopReason`) and
+  // `metadata` (carries usage). The raw stop reason is held here until
+  // `metadata` arrives, then mapped + emitted together as a single terminal
+  // `request-finish` event so consumers see one event with both.
+  readonly pendingStopReason: string | undefined
+}
+
+const finishToolCall = (tool: ProviderShared.ToolAccumulator | undefined) =>
+  Effect.gen(function* () {
+    if (!tool) return [] as ReadonlyArray<LLMEvent>
+    const input = yield* ProviderShared.parseToolInput(ADAPTER, tool.name, tool.input)
+    return [{ type: "tool-call" as const, id: tool.id, name: tool.name, input }]
+  })
+
+const processChunk = (state: ParserState, chunk: BedrockChunk) =>
+  Effect.gen(function* () {
+    if (chunk.contentBlockStart?.start?.toolUse) {
+      const index = chunk.contentBlockStart.contentBlockIndex
+      return [
+        {
+          ...state,
+          tools: {
+            ...state.tools,
+            [index]: {
+              id: chunk.contentBlockStart.start.toolUse.toolUseId,
+              name: chunk.contentBlockStart.start.toolUse.name,
+              input: "",
+            },
+          },
+        },
+        [],
+      ] as const
+    }
+
+    if (chunk.contentBlockDelta?.delta?.text) {
+      return [state, [{ type: "text-delta" as const, text: chunk.contentBlockDelta.delta.text }]] as const
+    }
+
+    if (chunk.contentBlockDelta?.delta?.reasoningContent?.text) {
+      return [
+        state,
+        [{ type: "reasoning-delta" as const, text: chunk.contentBlockDelta.delta.reasoningContent.text }],
+      ] as const
+    }
+
+    if (chunk.contentBlockDelta?.delta?.toolUse) {
+      const index = chunk.contentBlockDelta.contentBlockIndex
+      const current = state.tools[index]
+      if (!current) {
+        return yield* ProviderShared.chunkError(ADAPTER, "Bedrock Converse tool delta is missing its tool call")
+      }
+      const next = { ...current, input: `${current.input}${chunk.contentBlockDelta.delta.toolUse.input}` }
+      return [
+        { ...state, tools: { ...state.tools, [index]: next } },
+        [
+          {
+            type: "tool-input-delta" as const,
+            id: next.id,
+            name: next.name,
+            text: chunk.contentBlockDelta.delta.toolUse.input,
+          },
+        ],
+      ] as const
+    }
+
+    if (chunk.contentBlockStop) {
+      const events = yield* finishToolCall(state.tools[chunk.contentBlockStop.contentBlockIndex])
+      const { [chunk.contentBlockStop.contentBlockIndex]: _, ...tools } = state.tools
+      return [{ ...state, tools }, events] as const
+    }
+
+    if (chunk.messageStop) {
+      // Stash the reason — emit `request-finish` once `metadata` arrives with
+      // usage, so consumers see one terminal event carrying both. If metadata
+      // never arrives the `onHalt` fallback emits a usage-less finish.
+      return [{ ...state, pendingStopReason: chunk.messageStop.stopReason }, []] as const
+    }
+
+    if (chunk.metadata) {
+      const reason = state.pendingStopReason ? mapFinishReason(state.pendingStopReason) : "stop"
+      const usage = mapUsage(chunk.metadata.usage)
+      return [
+        { ...state, pendingStopReason: undefined },
+        [{ type: "request-finish" as const, reason, usage }],
+      ] as const
+    }
+
+    if (chunk.internalServerException || chunk.modelStreamErrorException || chunk.serviceUnavailableException) {
+      const message =
+        chunk.internalServerException?.message ??
+        chunk.modelStreamErrorException?.message ??
+        chunk.serviceUnavailableException?.message ??
+        "Bedrock Converse stream error"
+      return [state, [{ type: "provider-error" as const, message, retryable: true }]] as const
+    }
+
+    if (chunk.validationException || chunk.throttlingException) {
+      const message =
+        chunk.validationException?.message ?? chunk.throttlingException?.message ?? "Bedrock Converse error"
+      return [
+        state,
+        [{ type: "provider-error" as const, message, retryable: chunk.throttlingException !== undefined }],
+      ] as const
+    }
+
+    return [state, []] as const
+  })
+
+// Bedrock streams responses using the AWS event stream binary protocol — each
+// frame is `[length:4][headers-length:4][prelude-crc:4][headers][payload][crc:4]`.
+// We use `@smithy/eventstream-codec` to validate framing and CRCs, then
+// reconstruct the JSON wrapping by `:event-type` so the chunk schema can match.
+const eventCodec = new EventStreamCodec(toUtf8, fromUtf8)
+const utf8 = new TextDecoder()
+
+// Cursor-tracking buffer state. Bytes accumulate in `buffer`; `offset` is the
+// read position. Reading by `subarray` is zero-copy. We only allocate a fresh
+// buffer when (a) a new network chunk arrives and we need to append, or (b)
+// the consumed prefix is more than half the buffer (compaction).
+interface FrameBufferState {
+  readonly buffer: Uint8Array
+  readonly offset: number
+}
+
+const initialFrameBuffer: FrameBufferState = { buffer: new Uint8Array(0), offset: 0 }
+
+const appendChunk = (state: FrameBufferState, chunk: Uint8Array): FrameBufferState => {
+  const remaining = state.buffer.length - state.offset
+  // Compact: drop the consumed prefix and append the new chunk in one alloc.
+  // This bounds buffer growth to at most one network chunk past the live
+  // window, regardless of stream length.
+  const next = new Uint8Array(remaining + chunk.length)
+  next.set(state.buffer.subarray(state.offset), 0)
+  next.set(chunk, remaining)
+  return { buffer: next, offset: 0 }
+}
+
+const consumeFrames = (state: FrameBufferState, chunk: Uint8Array) =>
+  Effect.gen(function* () {
+    let cursor = appendChunk(state, chunk)
+    const out: object[] = []
+    while (cursor.buffer.length - cursor.offset >= 4) {
+      const view = cursor.buffer.subarray(cursor.offset)
+      const totalLength = new DataView(view.buffer, view.byteOffset, view.byteLength).getUint32(0, false)
+      if (view.length < totalLength) break
+
+      const decoded = yield* Effect.try({
+        try: () => eventCodec.decode(view.subarray(0, totalLength)),
+        catch: (error) =>
+          ProviderShared.chunkError(
+            ADAPTER,
+            `Failed to decode Bedrock Converse event-stream frame: ${
+              error instanceof Error ? error.message : String(error)
+            }`,
+          ),
+      })
+      cursor = { buffer: cursor.buffer, offset: cursor.offset + totalLength }
+
+      if (decoded.headers[":message-type"]?.value !== "event") continue
+      const eventType = decoded.headers[":event-type"]?.value
+      if (typeof eventType !== "string") continue
+      const payload = utf8.decode(decoded.body)
+      if (!payload) continue
+      // The AWS event stream pads short payloads with a `p` field. Drop it
+      // before handing the object to the chunk schema. JSON decode goes
+      // through the shared Schema-driven codec to satisfy the package rule
+      // against ad-hoc `JSON.parse` calls.
+      const parsed = (yield* ProviderShared.parseJson(
+        ADAPTER,
+        payload,
+        "Failed to parse Bedrock Converse event-stream payload",
+      )) as Record<string, unknown>
+      delete parsed.p
+      out.push({ [eventType]: parsed })
+    }
+    return [cursor, out] as const
+  })
+
+/**
+ * AWS event-stream framing for Bedrock Converse. Each frame is decoded by
+ * `@smithy/eventstream-codec` (length + header + payload + CRC) and rewrapped
+ * under its `:event-type` header so the chunk schema can match the JSON
+ * payload directly. Reusable for any AWS service that wraps JSON payloads in
+ * event-stream frames keyed by `:event-type`.
+ */
+const framing: Framing<object> = {
+  id: "aws-event-stream",
+  frame: (bytes) => bytes.pipe(Stream.mapAccumEffect(() => initialFrameBuffer, consumeFrames)),
+}
+
+// If a stream ends after `messageStop` but before `metadata` (rare but
+// possible on truncated transports), still surface a terminal finish.
+const onHalt = (state: ParserState): ReadonlyArray<LLMEvent> =>
+  state.pendingStopReason
+    ? [{ type: "request-finish", reason: mapFinishReason(state.pendingStopReason) }]
+    : []
+
+/**
+ * The Bedrock Converse protocol — request lowering, target validation,
+ * body encoding, and the streaming-chunk state machine.
+ */
+export const protocol = Protocol.define<
+  BedrockConverseDraft,
+  BedrockConverseTarget,
+  object,
+  BedrockChunk,
+  ParserState
+>({
+  id: "bedrock-converse",
+  prepare,
+  validate: ProviderShared.validateWith(decodeTarget),
+  encode: encodeTarget,
+  redact: (target) => target,
+  decode: decodeChunk,
+  initial: () => ({ tools: {}, pendingStopReason: undefined }),
+  process: processChunk,
+  onHalt,
+  streamReadError: "Failed to read Bedrock Converse stream",
+})
+
+export const adapter = Adapter.fromProtocol({
+  id: ADAPTER,
+  protocol,
+  endpoint: Endpoint.baseURL({
+    // Bedrock's URL embeds the region in the host and the validated modelId
+    // in the path. We reach into the target after target patches so the URL
+    // matches the body that gets signed.
+    default: ({ request }) => `https://bedrock-runtime.${region(request)}.amazonaws.com`,
+    path: ({ target }) => `/model/${encodeURIComponent(target.modelId)}/converse-stream`,
+  }),
+  auth,
+  framing,
+})
+
+export const model = (input: BedrockConverseModelInput) => {
+  const { credentials, ...rest } = input
+  return llmModel({
+    ...rest,
+    provider: "bedrock",
+    protocol: "bedrock-converse",
+    capabilities:
+      input.capabilities ??
+      capabilities({
+        output: { reasoning: true },
+        tools: { calls: true, streamingInput: true },
+        cache: { prompt: true, contentBlocks: true },
+      }),
+    native: credentials
+      ? {
+          ...input.native,
+          aws_credentials: credentials,
+          aws_region: credentials.region,
+        }
+      : input.native,
+  })
+}
+
+export * as BedrockConverse from "./bedrock-converse"
--- a/packages/llm/src/provider/gemini.ts
+++ b/packages/llm/src/provider/gemini.ts
@@ -0,0 +1,497 @@
+import { Effect, Schema } from "effect"
+import { Adapter } from "../adapter"
+import { Auth } from "../auth"
+import { Endpoint } from "../endpoint"
+import { Framing } from "../framing"
+import { capabilities, model as llmModel, type ModelInput } from "../llm"
+import { Protocol } from "../protocol"
+import {
+  Usage,
+  type FinishReason,
+  type LLMEvent,
+  type LLMRequest,
+  type MediaPart,
+  type ReasoningEffort,
+  type TextPart,
+  type ToolCallPart,
+  type ToolDefinition,
+} from "../schema"
+import { ProviderShared } from "./shared"
+
+const ADAPTER = "gemini"
+
+export type GeminiModelInput = Omit<ModelInput, "provider" | "protocol" | "headers"> & {
+  readonly apiKey?: string
+  readonly headers?: Record<string, string>
+}
+
+const GeminiTextPart = Schema.Struct({
+  text: Schema.String,
+  thought: Schema.optional(Schema.Boolean),
+  thoughtSignature: Schema.optional(Schema.String),
+})
+
+const GeminiInlineDataPart = Schema.Struct({
+  inlineData: Schema.Struct({
+    mimeType: Schema.String,
+    data: Schema.String,
+  }),
+})
+
+const GeminiFunctionCallPart = Schema.Struct({
+  functionCall: Schema.Struct({
+    name: Schema.String,
+    args: Schema.Unknown,
+  }),
+  thoughtSignature: Schema.optional(Schema.String),
+})
+
+const GeminiFunctionResponsePart = Schema.Struct({
+  functionResponse: Schema.Struct({
+    name: Schema.String,
+    response: Schema.Unknown,
+  }),
+})
+
+const GeminiContentPart = Schema.Union([
+  GeminiTextPart,
+  GeminiInlineDataPart,
+  GeminiFunctionCallPart,
+  GeminiFunctionResponsePart,
+])
+
+const GeminiContent = Schema.Struct({
+  role: Schema.Literals(["user", "model"]),
+  parts: Schema.Array(GeminiContentPart),
+})
+type GeminiContent = Schema.Schema.Type<typeof GeminiContent>
+
+const GeminiSystemInstruction = Schema.Struct({
+  parts: Schema.Array(Schema.Struct({ text: Schema.String })),
+})
+
+const GeminiFunctionDeclaration = Schema.Struct({
+  name: Schema.String,
+  description: Schema.String,
+  parameters: Schema.optional(Schema.Record(Schema.String, Schema.Unknown)),
+})
+
+const GeminiTool = Schema.Struct({
+  functionDeclarations: Schema.Array(GeminiFunctionDeclaration),
+})
+
+const GeminiToolConfig = Schema.Struct({
+  functionCallingConfig: Schema.Struct({
+    mode: Schema.Literals(["AUTO", "NONE", "ANY"]),
+    allowedFunctionNames: Schema.optional(Schema.Array(Schema.String)),
+  }),
+})
+
+const GeminiThinkingConfig = Schema.Struct({
+  thinkingBudget: Schema.optional(Schema.Number),
+  includeThoughts: Schema.optional(Schema.Boolean),
+})
+
+const GeminiGenerationConfig = Schema.Struct({
+  maxOutputTokens: Schema.optional(Schema.Number),
+  temperature: Schema.optional(Schema.Number),
+  topP: Schema.optional(Schema.Number),
+  stopSequences: Schema.optional(Schema.Array(Schema.String)),
+  thinkingConfig: Schema.optional(GeminiThinkingConfig),
+})
+
+const GeminiTargetFields = {
+  contents: Schema.Array(GeminiContent),
+  systemInstruction: Schema.optional(GeminiSystemInstruction),
+  tools: Schema.optional(Schema.Array(GeminiTool)),
+  toolConfig: Schema.optional(GeminiToolConfig),
+  generationConfig: Schema.optional(GeminiGenerationConfig),
+}
+const GeminiDraft = Schema.Struct(GeminiTargetFields)
+type GeminiDraft = Schema.Schema.Type<typeof GeminiDraft>
+const GeminiTarget = Schema.Struct(GeminiTargetFields)
+export type GeminiTarget = Schema.Schema.Type<typeof GeminiTarget>
+
+const GeminiUsage = Schema.Struct({
+  cachedContentTokenCount: Schema.optional(Schema.Number),
+  thoughtsTokenCount: Schema.optional(Schema.Number),
+  promptTokenCount: Schema.optional(Schema.Number),
+  candidatesTokenCount: Schema.optional(Schema.Number),
+  totalTokenCount: Schema.optional(Schema.Number),
+})
+type GeminiUsage = Schema.Schema.Type<typeof GeminiUsage>
+
+const GeminiCandidate = Schema.Struct({
+  content: Schema.optional(GeminiContent),
+  finishReason: Schema.optional(Schema.String),
+})
+
+const GeminiChunk = Schema.Struct({
+  candidates: Schema.optional(Schema.Array(GeminiCandidate)),
+  usageMetadata: Schema.optional(GeminiUsage),
+})
+type GeminiChunk = Schema.Schema.Type<typeof GeminiChunk>
+
+interface ParserState {
+  readonly finishReason?: string
+  readonly hasToolCalls: boolean
+  readonly nextToolCallId: number
+  readonly usage?: Usage
+}
+
+const { encodeTarget, decodeTarget, decodeChunk } = ProviderShared.codecs({
+  adapter: ADAPTER,
+  draft: GeminiDraft,
+  target: GeminiTarget,
+  chunk: GeminiChunk,
+  chunkErrorMessage: "Invalid Gemini stream chunk",
+})
+
+const invalid = ProviderShared.invalidRequest
+
+
+
+const mediaData = ProviderShared.mediaBytes
+
+const isRecord = ProviderShared.isRecord
+
+// Tool-schema conversion has two distinct concerns:
+//
+// 1. Sanitize — fix common authoring mistakes Gemini rejects: integer/number
+//    enums (must be strings), `required` entries that don't match a property,
+//    untyped arrays (`items` must be present), and `properties`/`required`
+//    keys on non-object scalars. Mirrors OpenCode's historical
+//    `ProviderTransform.schema` Gemini rules.
+//
+// 2. Project — lossy mapping from JSON Schema to Gemini's schema dialect:
+//    drop empty objects, derive `nullable: true` from `type: [..., "null"]`,
+//    coerce `const` to `[const]` enum, recurse properties/items, propagate
+//    only an allowlisted set of keys (description, required, format, type,
+//    properties, items, allOf, anyOf, oneOf, minLength). Anything outside the
+//    allowlist (e.g. `additionalProperties`, `$ref`) is silently dropped.
+//
+// Sanitize runs first, then project. Both passes live here so the adapter
+// owns the full transformation; consumers don't need to register a patch.
+
+const SCHEMA_INTENT_KEYS = [
+  "type",
+  "properties",
+  "items",
+  "prefixItems",
+  "enum",
+  "const",
+  "$ref",
+  "additionalProperties",
+  "patternProperties",
+  "required",
+  "not",
+  "if",
+  "then",
+  "else",
+]
+
+const hasCombiner = (schema: unknown) =>
+  isRecord(schema) && (Array.isArray(schema.anyOf) || Array.isArray(schema.oneOf) || Array.isArray(schema.allOf))
+
+const hasSchemaIntent = (schema: unknown) =>
+  isRecord(schema) && (hasCombiner(schema) || SCHEMA_INTENT_KEYS.some((key) => key in schema))
+
+const sanitizeToolSchemaNode = (schema: unknown): unknown => {
+  if (!isRecord(schema)) return Array.isArray(schema) ? schema.map(sanitizeToolSchemaNode) : schema
+
+  const result: Record<string, unknown> = Object.fromEntries(
+    Object.entries(schema).map(([key, value]) =>
+      [key, key === "enum" && Array.isArray(value) ? value.map(String) : sanitizeToolSchemaNode(value)],
+    ),
+  )
+
+  // Integer/number enums become string enums on the wire — Gemini rejects
+  // numeric enum values. The `enum` map above already coerced the values;
+  // this rewrites the type to match.
+  if (Array.isArray(result.enum) && (result.type === "integer" || result.type === "number")) result.type = "string"
+
+  // Filter `required` entries that don't appear in `properties` — Gemini
+  // rejects dangling required field references.
+  const properties = result.properties
+  if (result.type === "object" && isRecord(properties) && Array.isArray(result.required)) {
+    result.required = result.required.filter((field) => typeof field === "string" && field in properties)
+  }
+
+  // Default untyped arrays to string-typed items so Gemini has a concrete
+  // schema to validate against.
+  if (result.type === "array" && !hasCombiner(result)) {
+    result.items = result.items ?? {}
+    if (isRecord(result.items) && !hasSchemaIntent(result.items)) result.items = { ...result.items, type: "string" }
+  }
+
+  // Scalar schemas can't carry object-shaped keys.
+  if (typeof result.type === "string" && result.type !== "object" && !hasCombiner(result)) {
+    delete result.properties
+    delete result.required
+  }
+
+  return result
+}
+
+const emptyObjectSchema = (schema: Record<string, unknown>) =>
+  schema.type === "object" && (!isRecord(schema.properties) || Object.keys(schema.properties).length === 0) &&
+  !schema.additionalProperties
+
+const projectToolSchemaNode = (schema: unknown): Record<string, unknown> | undefined => {
+  if (!isRecord(schema)) return undefined
+  if (emptyObjectSchema(schema)) return undefined
+  return Object.fromEntries(
+    [
+      ["description", schema.description],
+      ["required", schema.required],
+      ["format", schema.format],
+      ["type", Array.isArray(schema.type) ? schema.type.filter((type) => type !== "null")[0] : schema.type],
+      ["nullable", Array.isArray(schema.type) && schema.type.includes("null") ? true : undefined],
+      ["enum", schema.const !== undefined ? [schema.const] : schema.enum],
+      ["properties", isRecord(schema.properties)
+        ? Object.fromEntries(
+            Object.entries(schema.properties).map(([key, value]) => [key, projectToolSchemaNode(value)]),
+          )
+        : undefined],
+      ["items", Array.isArray(schema.items)
+        ? schema.items.map(projectToolSchemaNode)
+        : schema.items === undefined
+        ? undefined
+        : projectToolSchemaNode(schema.items)],
+      ["allOf", Array.isArray(schema.allOf) ? schema.allOf.map(projectToolSchemaNode) : undefined],
+      ["anyOf", Array.isArray(schema.anyOf) ? schema.anyOf.map(projectToolSchemaNode) : undefined],
+      ["oneOf", Array.isArray(schema.oneOf) ? schema.oneOf.map(projectToolSchemaNode) : undefined],
+      ["minLength", schema.minLength],
+    ].filter((entry) => entry[1] !== undefined),
+  )
+}
+
+const convertToolSchema = (schema: unknown) => projectToolSchemaNode(sanitizeToolSchemaNode(schema))
+
+const lowerTool = (tool: ToolDefinition) => ({
+  name: tool.name,
+  description: tool.description,
+  parameters: convertToolSchema(tool.inputSchema),
+})
+
+const lowerToolConfig = Effect.fn("Gemini.lowerToolConfig")(function* (
+  toolChoice: NonNullable<LLMRequest["toolChoice"]>,
+) {
+  if (toolChoice.type === "required") return { functionCallingConfig: { mode: "ANY" as const } }
+  if (toolChoice.type === "none") return { functionCallingConfig: { mode: "NONE" as const } }
+  if (toolChoice.type !== "tool") return { functionCallingConfig: { mode: "AUTO" as const } }
+  if (!toolChoice.name) return yield* invalid("Gemini tool choice requires a tool name")
+  return {
+    functionCallingConfig: { mode: "ANY" as const, allowedFunctionNames: [toolChoice.name] },
+  }
+})
+
+const lowerUserPart = (part: TextPart | MediaPart) =>
+  part.type === "text"
+    ? { text: part.text }
+    : { inlineData: { mimeType: part.mediaType, data: mediaData(part) } }
+
+const lowerToolCall = (part: ToolCallPart) => ({
+  functionCall: { name: part.name, args: part.input },
+})
+
+const lowerMessages = Effect.fn("Gemini.lowerMessages")(function* (request: LLMRequest) {
+  const contents: GeminiContent[] = []
+
+  for (const message of request.messages) {
+    if (message.role === "user") {
+      const parts: Array<Schema.Schema.Type<typeof GeminiContentPart>> = []
+      for (const part of message.content) {
+        if (part.type !== "text" && part.type !== "media")
+          return yield* invalid("Gemini user messages only support text and media content for now")
+        parts.push(lowerUserPart(part))
+      }
+      contents.push({ role: "user", parts })
+      continue
+    }
+
+    if (message.role === "assistant") {
+      const parts: Array<Schema.Schema.Type<typeof GeminiContentPart>> = []
+      for (const part of message.content) {
+        if (part.type === "text") {
+          parts.push({ text: part.text })
+          continue
+        }
+        if (part.type === "reasoning") {
+          parts.push({ text: part.text, thought: true })
+          continue
+        }
+        if (part.type === "tool-call") {
+          parts.push(lowerToolCall(part))
+          continue
+        }
+        return yield* invalid("Gemini assistant messages only support text, reasoning, and tool-call content for now")
+      }
+      contents.push({ role: "model", parts })
+      continue
+    }
+
+    const parts: Array<Schema.Schema.Type<typeof GeminiContentPart>> = []
+    for (const part of message.content) {
+      if (part.type !== "tool-result") return yield* invalid("Gemini tool messages only support tool-result content")
+      parts.push({
+        functionResponse: {
+          name: part.name,
+          response: {
+            name: part.name,
+            content: ProviderShared.toolResultText(part),
+          },
+        },
+      })
+    }
+    contents.push({ role: "user", parts })
+  }
+
+  return contents
+})
+
+const thinkingBudget = (effort: ReasoningEffort | undefined) => {
+  if (effort === "minimal" || effort === "low") return 1024
+  if (effort === "high") return 16000
+  if (effort === "xhigh") return 24576
+  if (effort === "max") return 32768
+  return 8192
+}
+
+const prepare = Effect.fn("Gemini.prepare")(function* (request: LLMRequest) {
+  const toolsEnabled = request.tools.length > 0 && request.toolChoice?.type !== "none"
+  const generationConfig = {
+    maxOutputTokens: request.generation.maxTokens,
+    temperature: request.generation.temperature,
+    topP: request.generation.topP,
+    stopSequences: request.generation.stop,
+    thinkingConfig: request.reasoning?.enabled
+      ? {
+          includeThoughts: true,
+          thinkingBudget: thinkingBudget(request.reasoning.effort),
+        }
+      : undefined,
+  }
+
+  return {
+    contents: yield* lowerMessages(request),
+    systemInstruction: request.system.length === 0 ? undefined : { parts: [{ text: ProviderShared.joinText(request.system) }] },
+    tools: toolsEnabled ? [{ functionDeclarations: request.tools.map(lowerTool) }] : undefined,
+    toolConfig: toolsEnabled && request.toolChoice ? yield* lowerToolConfig(request.toolChoice) : undefined,
+    generationConfig: Object.values(generationConfig).some((value) => value !== undefined) ? generationConfig : undefined,
+  }
+})
+
+const mapUsage = (usage: GeminiUsage | undefined) => {
+  if (!usage) return undefined
+  return new Usage({
+    inputTokens: usage.promptTokenCount,
+    outputTokens: usage.candidatesTokenCount,
+    reasoningTokens: usage.thoughtsTokenCount,
+    cacheReadInputTokens: usage.cachedContentTokenCount,
+    totalTokens: ProviderShared.totalTokens(usage.promptTokenCount, usage.candidatesTokenCount, usage.totalTokenCount),
+    native: usage,
+  })
+}
+
+const mapFinishReason = (finishReason: string | undefined, hasToolCalls: boolean): FinishReason => {
+  if (finishReason === "STOP") return hasToolCalls ? "tool-calls" : "stop"
+  if (finishReason === "MAX_TOKENS") return "length"
+  if (
+    finishReason === "IMAGE_SAFETY" ||
+    finishReason === "RECITATION" ||
+    finishReason === "SAFETY" ||
+    finishReason === "BLOCKLIST" ||
+    finishReason === "PROHIBITED_CONTENT" ||
+    finishReason === "SPII"
+  )
+    return "content-filter"
+  if (finishReason === "MALFORMED_FUNCTION_CALL") return "error"
+  return "unknown"
+}
+
+const finish = (state: ParserState): ReadonlyArray<LLMEvent> =>
+  state.finishReason || state.usage
+    ? [{ type: "request-finish", reason: mapFinishReason(state.finishReason, state.hasToolCalls), usage: state.usage }]
+    : []
+
+const processChunk = (state: ParserState, chunk: GeminiChunk) => {
+  const nextState = {
+    ...state,
+    usage: chunk.usageMetadata ? mapUsage(chunk.usageMetadata) ?? state.usage : state.usage,
+  }
+  const candidate = chunk.candidates?.[0]
+  if (!candidate?.content) {
+    return Effect.succeed([{ ...nextState, finishReason: candidate?.finishReason ?? nextState.finishReason }, []] as const)
+  }
+
+  const events: LLMEvent[] = []
+  let hasToolCalls = nextState.hasToolCalls
+  let nextToolCallId = nextState.nextToolCallId
+
+  for (const part of candidate.content.parts) {
+    if ("text" in part && part.text.length > 0) {
+      events.push({ type: part.thought ? "reasoning-delta" : "text-delta", text: part.text })
+      continue
+    }
+
+    if ("functionCall" in part) {
+      const input = part.functionCall.args
+      const id = `tool_${nextToolCallId++}`
+      events.push({ type: "tool-call", id, name: part.functionCall.name, input })
+      hasToolCalls = true
+    }
+  }
+
+  return Effect.succeed([{
+    ...nextState,
+    hasToolCalls,
+    nextToolCallId,
+    finishReason: candidate.finishReason ?? nextState.finishReason,
+  }, events] as const)
+}
+
+/**
+ * The Gemini protocol — request lowering, target validation, body encoding,
+ * and the streaming-chunk state machine. Used by Google AI Studio Gemini and
+ * (once registered) Vertex Gemini.
+ */
+export const protocol = Protocol.define<GeminiDraft, GeminiTarget, string, GeminiChunk, ParserState>({
+  id: "gemini",
+  prepare,
+  validate: ProviderShared.validateWith(decodeTarget),
+  encode: encodeTarget,
+  redact: (target) => target,
+  decode: decodeChunk,
+  initial: () => ({ hasToolCalls: false, nextToolCallId: 0 }),
+  process: processChunk,
+  onHalt: finish,
+  streamReadError: "Failed to read Gemini stream",
+})
+
+export const adapter = Adapter.fromProtocol({
+  id: ADAPTER,
+  protocol,
+  endpoint: Endpoint.baseURL({
+    default: "https://generativelanguage.googleapis.com/v1beta",
+    // Gemini's path embeds the model id and pins SSE framing at the URL level.
+    path: ({ request }) => `/models/${request.model.id}:streamGenerateContent?alt=sse`,
+  }),
+  auth: Auth.apiKeyHeader("x-goog-api-key"),
+  framing: Framing.sse,
+})
+
+export const model = (input: GeminiModelInput) =>
+  llmModel({
+    ...input,
+    provider: "google",
+    protocol: "gemini",
+    capabilities: input.capabilities ?? capabilities({
+      input: { image: true, audio: true, video: true, pdf: true },
+      output: { reasoning: true },
+      tools: { calls: true },
+      reasoning: { efforts: ["minimal", "low", "medium", "high", "xhigh", "max"] },
+    }),
+  })
+
+export * as Gemini from "./gemini"
--- a/packages/llm/src/provider/github-copilot.ts
+++ b/packages/llm/src/provider/github-copilot.ts
@@ -0,0 +1,18 @@
+import { ProviderResolver } from "../provider-resolver"
+import { ProviderID } from "../schema"
+
+export const id = ProviderID.make("github-copilot")
+
+export const shouldUseResponsesApi = (modelID: string) => {
+  const match = /^gpt-(\d+)/.exec(modelID)
+  if (!match) return false
+  return Number(match[1]) >= 5 && !modelID.startsWith("gpt-5-mini")
+}
+
+export const resolver = ProviderResolver.define({
+  id,
+  resolve: (input) =>
+    ProviderResolver.make(id, shouldUseResponsesApi(input.modelID) ? "openai-responses" : "openai-chat"),
+})
+
+export * as GitHubCopilot from "./github-copilot"
--- a/packages/llm/src/provider/google.ts
+++ b/packages/llm/src/provider/google.ts
@@ -0,0 +1,5 @@
+import { ProviderResolver } from "../provider-resolver"
+
+export const resolver = ProviderResolver.fixed("google", "gemini")
+
+export * as Google from "./google"
--- a/packages/llm/src/provider/openai-chat.ts
+++ b/packages/llm/src/provider/openai-chat.ts
@@ -0,0 +1,377 @@
+import { Effect, Schema } from "effect"
+import { Adapter } from "../adapter"
+import { Endpoint } from "../endpoint"
+import { Framing } from "../framing"
+import { capabilities, model as llmModel, type ModelInput } from "../llm"
+import { Protocol } from "../protocol"
+import {
+  Usage,
+  type FinishReason,
+  type LLMEvent,
+  type LLMRequest,
+  type TextPart,
+  type ToolCallPart,
+  type ToolDefinition,
+} from "../schema"
+import { ProviderShared } from "./shared"
+
+const ADAPTER = "openai-chat"
+
+export type OpenAIChatModelInput = Omit<ModelInput, "provider" | "protocol" | "headers"> & {
+  readonly apiKey?: string
+  readonly headers?: Record<string, string>
+}
+
+const OpenAIChatFunction = Schema.Struct({
+  name: Schema.String,
+  description: Schema.String,
+  parameters: Schema.Record(Schema.String, Schema.Unknown),
+})
+
+const OpenAIChatTool = Schema.Struct({
+  type: Schema.Literal("function"),
+  function: OpenAIChatFunction,
+})
+type OpenAIChatTool = Schema.Schema.Type<typeof OpenAIChatTool>
+
+const OpenAIChatAssistantToolCall = Schema.Struct({
+  id: Schema.String,
+  type: Schema.Literal("function"),
+  function: Schema.Struct({
+    name: Schema.String,
+    arguments: Schema.String,
+  }),
+})
+type OpenAIChatAssistantToolCall = Schema.Schema.Type<typeof OpenAIChatAssistantToolCall>
+
+const OpenAIChatMessage = Schema.Union([
+  Schema.Struct({ role: Schema.Literal("system"), content: Schema.String }),
+  Schema.Struct({ role: Schema.Literal("user"), content: Schema.String }),
+  Schema.Struct({
+    role: Schema.Literal("assistant"),
+    content: Schema.NullOr(Schema.String),
+    tool_calls: Schema.optional(Schema.Array(OpenAIChatAssistantToolCall)),
+  }),
+  Schema.Struct({ role: Schema.Literal("tool"), tool_call_id: Schema.String, content: Schema.String }),
+])
+type OpenAIChatMessage = Schema.Schema.Type<typeof OpenAIChatMessage>
+
+const OpenAIChatToolChoiceFunction = Schema.Struct({ name: Schema.String })
+
+const OpenAIChatToolChoice = Schema.Union([
+  Schema.Literals(["auto", "none", "required"]),
+  Schema.Struct({
+    type: Schema.Literal("function"),
+    function: OpenAIChatToolChoiceFunction,
+  }),
+])
+
+const OpenAIChatTargetFields = {
+  model: Schema.String,
+  messages: Schema.Array(OpenAIChatMessage),
+  tools: Schema.optional(Schema.Array(OpenAIChatTool)),
+  tool_choice: Schema.optional(OpenAIChatToolChoice),
+  stream: Schema.Literal(true),
+  stream_options: Schema.optional(Schema.Struct({ include_usage: Schema.Boolean })),
+  max_tokens: Schema.optional(Schema.Number),
+  temperature: Schema.optional(Schema.Number),
+  top_p: Schema.optional(Schema.Number),
+  stop: Schema.optional(Schema.Array(Schema.String)),
+}
+const OpenAIChatDraft = Schema.Struct(OpenAIChatTargetFields)
+type OpenAIChatDraft = Schema.Schema.Type<typeof OpenAIChatDraft>
+const OpenAIChatTarget = Schema.Struct(OpenAIChatTargetFields)
+export type OpenAIChatTarget = Schema.Schema.Type<typeof OpenAIChatTarget>
+
+const OpenAIChatUsage = Schema.Struct({
+  prompt_tokens: Schema.optional(Schema.Number),
+  completion_tokens: Schema.optional(Schema.Number),
+  total_tokens: Schema.optional(Schema.Number),
+  prompt_tokens_details: Schema.optional(
+    Schema.NullOr(
+      Schema.Struct({
+        cached_tokens: Schema.optional(Schema.Number),
+      }),
+    ),
+  ),
+  completion_tokens_details: Schema.optional(
+    Schema.NullOr(
+      Schema.Struct({
+        reasoning_tokens: Schema.optional(Schema.Number),
+      }),
+    ),
+  ),
+})
+
+const OpenAIChatToolCallDeltaFunction = Schema.Struct({
+  name: Schema.optional(Schema.NullOr(Schema.String)),
+  arguments: Schema.optional(Schema.NullOr(Schema.String)),
+})
+
+const OpenAIChatToolCallDelta = Schema.Struct({
+  index: Schema.Number,
+  id: Schema.optional(Schema.NullOr(Schema.String)),
+  function: Schema.optional(Schema.NullOr(OpenAIChatToolCallDeltaFunction)),
+})
+type OpenAIChatToolCallDelta = Schema.Schema.Type<typeof OpenAIChatToolCallDelta>
+
+const OpenAIChatDelta = Schema.Struct({
+  content: Schema.optional(Schema.NullOr(Schema.String)),
+  tool_calls: Schema.optional(Schema.NullOr(Schema.Array(OpenAIChatToolCallDelta))),
+})
+
+const OpenAIChatChoice = Schema.Struct({
+  delta: Schema.optional(Schema.NullOr(OpenAIChatDelta)),
+  finish_reason: Schema.optional(Schema.NullOr(Schema.String)),
+})
+
+const OpenAIChatChunk = Schema.Struct({
+  choices: Schema.Array(OpenAIChatChoice),
+  usage: Schema.optional(Schema.NullOr(OpenAIChatUsage)),
+})
+type OpenAIChatChunk = Schema.Schema.Type<typeof OpenAIChatChunk>
+
+const { encodeTarget, decodeTarget, decodeChunk } = ProviderShared.codecs({
+  adapter: ADAPTER,
+  draft: OpenAIChatDraft,
+  target: OpenAIChatTarget,
+  chunk: OpenAIChatChunk,
+  chunkErrorMessage: "Invalid OpenAI Chat stream chunk",
+})
+
+interface ParsedToolCall {
+  readonly id: string
+  readonly name: string
+  readonly input: unknown
+}
+
+interface ParserState {
+  readonly tools: Record<number, ProviderShared.ToolAccumulator>
+  readonly toolCalls: ReadonlyArray<ParsedToolCall>
+  readonly usage?: Usage
+  readonly finishReason?: FinishReason
+}
+
+const invalid = ProviderShared.invalidRequest
+
+
+
+const lowerTool = (tool: ToolDefinition): OpenAIChatTool => ({
+  type: "function",
+  function: {
+    name: tool.name,
+    description: tool.description,
+    parameters: tool.inputSchema,
+  },
+})
+
+const lowerToolChoice = Effect.fn("OpenAIChat.lowerToolChoice")(function* (
+  toolChoice: NonNullable<LLMRequest["toolChoice"]>,
+) {
+  if (toolChoice.type !== "tool") return toolChoice.type
+  if (!toolChoice.name) return yield* invalid("OpenAI Chat tool choice requires a tool name")
+  return { type: "function" as const, function: { name: toolChoice.name } }
+})
+
+const lowerToolCall = (part: ToolCallPart): OpenAIChatAssistantToolCall => ({
+  id: part.id,
+  type: "function",
+  function: {
+    name: part.name,
+    arguments: ProviderShared.encodeJson(part.input),
+  },
+})
+
+const lowerMessages = Effect.fn("OpenAIChat.lowerMessages")(function* (request: LLMRequest) {
+  const system: OpenAIChatMessage[] =
+    request.system.length === 0 ? [] : [{ role: "system", content: ProviderShared.joinText(request.system) }]
+  const messages: OpenAIChatMessage[] = [...system]
+
+  for (const message of request.messages) {
+    if (message.role === "user") {
+      const content: TextPart[] = []
+      for (const part of message.content) {
+        if (part.type !== "text") return yield* invalid(`OpenAI Chat user messages only support text content for now`)
+        content.push(part)
+      }
+      messages.push({ role: "user", content: ProviderShared.joinText(content) })
+      continue
+    }
+
+    if (message.role === "assistant") {
+      const content: TextPart[] = []
+      const toolCalls: OpenAIChatAssistantToolCall[] = []
+      for (const part of message.content) {
+        if (part.type === "text") {
+          content.push(part)
+          continue
+        }
+        if (part.type === "tool-call") {
+          toolCalls.push(lowerToolCall(part))
+          continue
+        }
+        return yield* invalid(`OpenAI Chat assistant messages only support text and tool-call content for now`)
+      }
+      messages.push({
+        role: "assistant",
+        content: content.length === 0 ? null : ProviderShared.joinText(content),
+        tool_calls: toolCalls.length === 0 ? undefined : toolCalls,
+      })
+      continue
+    }
+
+    for (const part of message.content) {
+      if (part.type !== "tool-result")
+        return yield* invalid(`OpenAI Chat tool messages only support tool-result content`)
+      messages.push({ role: "tool", tool_call_id: part.id, content: ProviderShared.toolResultText(part) })
+    }
+  }
+
+  return messages
+})
+
+const prepare = Effect.fn("OpenAIChat.prepare")(function* (request: LLMRequest) {
+  return {
+    model: request.model.id,
+    messages: yield* lowerMessages(request),
+    tools: request.tools.length === 0 ? undefined : request.tools.map(lowerTool),
+    tool_choice: request.toolChoice ? yield* lowerToolChoice(request.toolChoice) : undefined,
+    stream: true as const,
+    max_tokens: request.generation.maxTokens,
+    temperature: request.generation.temperature,
+    top_p: request.generation.topP,
+    stop: request.generation.stop,
+  }
+})
+
+const mapFinishReason = (reason: string | null | undefined): FinishReason => {
+  if (reason === "stop") return "stop"
+  if (reason === "length") return "length"
+  if (reason === "content_filter") return "content-filter"
+  if (reason === "function_call" || reason === "tool_calls") return "tool-calls"
+  return "unknown"
+}
+
+const mapUsage = (usage: OpenAIChatChunk["usage"]): Usage | undefined => {
+  if (!usage) return undefined
+  return new Usage({
+    inputTokens: usage.prompt_tokens,
+    outputTokens: usage.completion_tokens,
+    reasoningTokens: usage.completion_tokens_details?.reasoning_tokens,
+    cacheReadInputTokens: usage.prompt_tokens_details?.cached_tokens,
+    totalTokens: ProviderShared.totalTokens(usage.prompt_tokens, usage.completion_tokens, usage.total_tokens),
+    native: usage,
+  })
+}
+
+const pushToolDelta = (tools: Record<number, ProviderShared.ToolAccumulator>, delta: OpenAIChatToolCallDelta) =>
+  Effect.gen(function* () {
+    const current = tools[delta.index]
+    const id = delta.id ?? current?.id
+    const name = delta.function?.name ?? current?.name
+    if (!id || !name) {
+      return yield* ProviderShared.chunkError(ADAPTER, "OpenAI Chat tool call delta is missing id or name")
+    }
+    return {
+      id,
+      name,
+      input: `${current?.input ?? ""}${delta.function?.arguments ?? ""}`,
+    }
+  })
+
+const finalizeToolCalls = (tools: Record<number, ProviderShared.ToolAccumulator>) =>
+  Effect.forEach(Object.values(tools), (tool) =>
+    Effect.gen(function* () {
+      const input = yield* ProviderShared.parseToolInput(ADAPTER, tool.name, tool.input)
+      return { id: tool.id, name: tool.name, input } satisfies ParsedToolCall
+    }),
+  )
+
+const processChunk = (state: ParserState, chunk: OpenAIChatChunk) =>
+  Effect.gen(function* () {
+    const events: LLMEvent[] = []
+    const usage = mapUsage(chunk.usage) ?? state.usage
+    const choice = chunk.choices[0]
+    const finishReason = choice?.finish_reason ? mapFinishReason(choice.finish_reason) : state.finishReason
+    const delta = choice?.delta
+    const toolDeltas = delta?.tool_calls ?? []
+    const tools = toolDeltas.length === 0 ? state.tools : { ...state.tools }
+
+    if (delta?.content) events.push({ type: "text-delta", text: delta.content })
+
+    for (const tool of toolDeltas) {
+      const current = yield* pushToolDelta(tools, tool)
+      tools[tool.index] = current
+      if (tool.function?.arguments) {
+        events.push({ type: "tool-input-delta", id: current.id, name: current.name, text: tool.function.arguments })
+      }
+    }
+
+    // Finalize accumulated tool inputs eagerly when finish_reason arrives so
+    // JSON parse failures fail the stream at the boundary rather than at halt.
+    const toolCalls =
+      finishReason !== undefined && state.finishReason === undefined && Object.keys(tools).length > 0
+        ? yield* finalizeToolCalls(tools)
+        : state.toolCalls
+
+    return [{ tools, toolCalls, usage, finishReason }, events] as const
+  })
+
+const finishEvents = (state: ParserState): ReadonlyArray<LLMEvent> => {
+  const hasToolCalls = state.toolCalls.length > 0
+  const reason = state.finishReason === "stop" && hasToolCalls ? "tool-calls" : state.finishReason
+  return [
+    ...state.toolCalls.map((call) => ({ type: "tool-call" as const, ...call })),
+    ...(reason ? ([{ type: "request-finish", reason, usage: state.usage }] satisfies ReadonlyArray<LLMEvent>) : []),
+  ]
+}
+
+/**
+ * The OpenAI Chat protocol — request lowering, target validation, body
+ * encoding, and the streaming-chunk state machine. Reused by every adapter
+ * that speaks OpenAI Chat over HTTP+SSE: native OpenAI, DeepSeek, TogetherAI,
+ * Cerebras, Baseten, Fireworks, DeepInfra, and (once added) Azure OpenAI Chat.
+ */
+export const protocol = Protocol.define<
+  OpenAIChatDraft,
+  OpenAIChatTarget,
+  string,
+  OpenAIChatChunk,
+  ParserState
+>({
+  id: "openai-chat",
+  prepare,
+  validate: ProviderShared.validateWith(decodeTarget),
+  encode: encodeTarget,
+  redact: (target) => target,
+  decode: decodeChunk,
+  initial: () => ({ tools: {}, toolCalls: [] }),
+  process: processChunk,
+  onHalt: finishEvents,
+  streamReadError: "Failed to read OpenAI Chat stream",
+})
+
+export const adapter = Adapter.fromProtocol({
+  id: ADAPTER,
+  protocol,
+  endpoint: Endpoint.baseURL({ default: "https://api.openai.com/v1", path: "/chat/completions" }),
+  framing: Framing.sse,
+})
+
+export const model = (input: OpenAIChatModelInput) =>
+  llmModel({
+    ...input,
+    provider: "openai",
+    protocol: "openai-chat",
+    capabilities: input.capabilities ?? capabilities({ tools: { calls: true, streamingInput: true } }),
+  })
+
+export const includeUsage = adapter.patch("include-usage", {
+  reason: "request final usage chunk from OpenAI Chat streaming responses",
+  apply: (target) => ({
+    ...target,
+    stream_options: { ...target.stream_options, include_usage: true },
+  }),
+})
+
+export * as OpenAIChat from "./openai-chat"
--- a/packages/llm/src/provider/openai-compatible-chat.ts
+++ b/packages/llm/src/provider/openai-compatible-chat.ts
@@ -0,0 +1,74 @@
+import { Adapter } from "../adapter"
+import { Endpoint } from "../endpoint"
+import { Framing } from "../framing"
+import { capabilities, model as llmModel, type ModelInput } from "../llm"
+import { OpenAIChat } from "./openai-chat"
+import { families, type ProviderFamily } from "./openai-compatible-family"
+
+const ADAPTER = "openai-compatible-chat"
+
+export type OpenAICompatibleChatModelInput = Omit<ModelInput, "protocol" | "headers" | "baseURL"> & {
+  readonly baseURL: string
+  readonly apiKey?: string
+  readonly headers?: Record<string, string>
+}
+
+export type ProviderFamilyModelInput = Omit<OpenAICompatibleChatModelInput, "provider" | "baseURL"> & {
+  readonly baseURL?: string
+}
+
+/**
+ * Adapter for non-OpenAI providers that expose an OpenAI Chat-compatible
+ * `/chat/completions` endpoint. Reuses `OpenAIChat.protocol` end-to-end and
+ * only overrides:
+ *
+ * - the registered protocol id (`openai-compatible-chat`) so providers can be
+ *   resolved per-family without colliding with native OpenAI;
+ * - the endpoint, which requires `model.baseURL` (no provider default).
+ */
+export const adapter = Adapter.fromProtocol({
+  id: ADAPTER,
+  protocol: OpenAIChat.protocol,
+  protocolId: "openai-compatible-chat",
+  endpoint: Endpoint.baseURL({
+    path: "/chat/completions",
+    required: "OpenAI-compatible Chat requires a baseURL",
+  }),
+  framing: Framing.sse,
+})
+
+export const model = (input: OpenAICompatibleChatModelInput) =>
+  llmModel({
+    ...input,
+    protocol: "openai-compatible-chat",
+    capabilities: input.capabilities ?? capabilities({ tools: { calls: true, streamingInput: true } }),
+  })
+
+const familyModel = (family: ProviderFamily, input: ProviderFamilyModelInput) =>
+  model({
+    ...input,
+    provider: family.provider,
+    baseURL: input.baseURL ?? family.baseURL,
+  })
+
+export const baseten = (input: ProviderFamilyModelInput) => familyModel(families.baseten, input)
+
+export const cerebras = (input: ProviderFamilyModelInput) => familyModel(families.cerebras, input)
+
+export const deepinfra = (input: ProviderFamilyModelInput) => familyModel(families.deepinfra, input)
+
+export const deepseek = (input: ProviderFamilyModelInput) => familyModel(families.deepseek, input)
+
+export const fireworks = (input: ProviderFamilyModelInput) => familyModel(families.fireworks, input)
+
+export const togetherai = (input: ProviderFamilyModelInput) => familyModel(families.togetherai, input)
+
+export const includeUsage = adapter.patch("include-usage", {
+  reason: "request final usage chunk from OpenAI-compatible Chat streaming responses",
+  apply: (target) => ({
+    ...target,
+    stream_options: { ...target.stream_options, include_usage: true },
+  }),
+})
+
+export * as OpenAICompatibleChat from "./openai-compatible-chat"
--- a/packages/llm/src/provider/openai-compatible-family.ts
+++ b/packages/llm/src/provider/openai-compatible-family.ts
@@ -0,0 +1,36 @@
+import { ProviderResolver } from "../provider-resolver"
+
+export interface ProviderFamily {
+  readonly provider: string
+  readonly baseURL: string
+}
+
+export const families = {
+  baseten: { provider: "baseten", baseURL: "https://inference.baseten.co/v1" },
+  cerebras: { provider: "cerebras", baseURL: "https://api.cerebras.ai/v1" },
+  deepinfra: { provider: "deepinfra", baseURL: "https://api.deepinfra.com/v1/openai" },
+  deepseek: { provider: "deepseek", baseURL: "https://api.deepseek.com/v1" },
+  fireworks: { provider: "fireworks", baseURL: "https://api.fireworks.ai/inference/v1" },
+  togetherai: { provider: "togetherai", baseURL: "https://api.together.xyz/v1" },
+} as const satisfies Record<string, ProviderFamily>
+
+export const byProvider: Record<string, ProviderFamily> = Object.fromEntries(
+  Object.values(families).map((family) => [family.provider, family]),
+)
+
+const resolutions = Object.fromEntries(
+  Object.values(families).map((family) => [
+    family.provider,
+    ProviderResolver.make(family.provider, "openai-compatible-chat", { baseURL: family.baseURL }),
+  ]),
+)
+
+export const resolve = (provider: string) =>
+  resolutions[provider] ?? ProviderResolver.make(provider, "openai-compatible-chat")
+
+export const resolver = ProviderResolver.define({
+  id: ProviderResolver.make("openai-compatible", "openai-compatible-chat").provider,
+  resolve: (input) => resolve(input.providerID),
+})
+
+export * as OpenAICompatibleFamily from "./openai-compatible-family"
--- a/packages/llm/src/provider/openai-responses.ts
+++ b/packages/llm/src/provider/openai-responses.ts
@@ -0,0 +1,399 @@
+import { Effect, Schema } from "effect"
+import { Adapter } from "../adapter"
+import { Endpoint } from "../endpoint"
+import { Framing } from "../framing"
+import { capabilities, model as llmModel, type ModelInput } from "../llm"
+import { Protocol } from "../protocol"
+import {
+  Usage,
+  type FinishReason,
+  type LLMEvent,
+  type LLMRequest,
+  type TextPart,
+  type ToolCallPart,
+  type ToolDefinition,
+} from "../schema"
+import { ProviderShared } from "./shared"
+
+const ADAPTER = "openai-responses"
+
+export type OpenAIResponsesModelInput = Omit<ModelInput, "provider" | "protocol" | "headers"> & {
+  readonly apiKey?: string
+  readonly headers?: Record<string, string>
+}
+
+const OpenAIResponsesInputText = Schema.Struct({
+  type: Schema.Literal("input_text"),
+  text: Schema.String,
+})
+
+const OpenAIResponsesOutputText = Schema.Struct({
+  type: Schema.Literal("output_text"),
+  text: Schema.String,
+})
+
+const OpenAIResponsesInputItem = Schema.Union([
+  Schema.Struct({ role: Schema.Literal("system"), content: Schema.String }),
+  Schema.Struct({ role: Schema.Literal("user"), content: Schema.Array(OpenAIResponsesInputText) }),
+  Schema.Struct({ role: Schema.Literal("assistant"), content: Schema.Array(OpenAIResponsesOutputText) }),
+  Schema.Struct({
+    type: Schema.Literal("function_call"),
+    call_id: Schema.String,
+    name: Schema.String,
+    arguments: Schema.String,
+  }),
+  Schema.Struct({
+    type: Schema.Literal("function_call_output"),
+    call_id: Schema.String,
+    output: Schema.String,
+  }),
+])
+type OpenAIResponsesInputItem = Schema.Schema.Type<typeof OpenAIResponsesInputItem>
+
+const OpenAIResponsesTool = Schema.Struct({
+  type: Schema.Literal("function"),
+  name: Schema.String,
+  description: Schema.String,
+  parameters: Schema.Record(Schema.String, Schema.Unknown),
+  strict: Schema.optional(Schema.Boolean),
+})
+type OpenAIResponsesTool = Schema.Schema.Type<typeof OpenAIResponsesTool>
+
+const OpenAIResponsesToolChoice = Schema.Union([
+  Schema.Literals(["auto", "none", "required"]),
+  Schema.Struct({ type: Schema.Literal("function"), name: Schema.String }),
+])
+
+const OpenAIResponsesTargetFields = {
+  model: Schema.String,
+  input: Schema.Array(OpenAIResponsesInputItem),
+  tools: Schema.optional(Schema.Array(OpenAIResponsesTool)),
+  tool_choice: Schema.optional(OpenAIResponsesToolChoice),
+  stream: Schema.Literal(true),
+  max_output_tokens: Schema.optional(Schema.Number),
+  temperature: Schema.optional(Schema.Number),
+  top_p: Schema.optional(Schema.Number),
+}
+const OpenAIResponsesDraft = Schema.Struct(OpenAIResponsesTargetFields)
+type OpenAIResponsesDraft = Schema.Schema.Type<typeof OpenAIResponsesDraft>
+const OpenAIResponsesTarget = Schema.Struct(OpenAIResponsesTargetFields)
+export type OpenAIResponsesTarget = Schema.Schema.Type<typeof OpenAIResponsesTarget>
+
+const OpenAIResponsesUsage = Schema.Struct({
+  input_tokens: Schema.optional(Schema.Number),
+  input_tokens_details: Schema.optional(Schema.NullOr(Schema.Struct({ cached_tokens: Schema.optional(Schema.Number) }))),
+  output_tokens: Schema.optional(Schema.Number),
+  output_tokens_details: Schema.optional(Schema.NullOr(Schema.Struct({ reasoning_tokens: Schema.optional(Schema.Number) }))),
+  total_tokens: Schema.optional(Schema.Number),
+})
+type OpenAIResponsesUsage = Schema.Schema.Type<typeof OpenAIResponsesUsage>
+
+const OpenAIResponsesStreamItem = Schema.Struct({
+  type: Schema.String,
+  id: Schema.optional(Schema.String),
+  call_id: Schema.optional(Schema.String),
+  name: Schema.optional(Schema.String),
+  arguments: Schema.optional(Schema.String),
+  // Hosted (provider-executed) tool fields. Each hosted tool item carries its
+  // own subset of these — we capture them generically so we can surface the
+  // call's typed input portion and round-trip the full result payload without
+  // hand-rolling a per-tool schema.
+  status: Schema.optional(Schema.String),
+  action: Schema.optional(Schema.Unknown),
+  queries: Schema.optional(Schema.Unknown),
+  results: Schema.optional(Schema.Unknown),
+  code: Schema.optional(Schema.String),
+  container_id: Schema.optional(Schema.String),
+  outputs: Schema.optional(Schema.Unknown),
+  server_label: Schema.optional(Schema.String),
+  output: Schema.optional(Schema.Unknown),
+  error: Schema.optional(Schema.Unknown),
+})
+type OpenAIResponsesStreamItem = Schema.Schema.Type<typeof OpenAIResponsesStreamItem>
+
+const OpenAIResponsesChunk = Schema.Struct({
+  type: Schema.String,
+  delta: Schema.optional(Schema.String),
+  item_id: Schema.optional(Schema.String),
+  item: Schema.optional(OpenAIResponsesStreamItem),
+  response: Schema.optional(
+    Schema.Struct({
+      incomplete_details: Schema.optional(Schema.NullOr(Schema.Struct({ reason: Schema.String }))),
+      usage: Schema.optional(OpenAIResponsesUsage),
+    }),
+  ),
+  code: Schema.optional(Schema.String),
+  message: Schema.optional(Schema.String),
+})
+type OpenAIResponsesChunk = Schema.Schema.Type<typeof OpenAIResponsesChunk>
+
+const { encodeTarget, decodeTarget, decodeChunk } = ProviderShared.codecs({
+  adapter: ADAPTER,
+  draft: OpenAIResponsesDraft,
+  target: OpenAIResponsesTarget,
+  chunk: OpenAIResponsesChunk,
+  chunkErrorMessage: "Invalid OpenAI Responses stream chunk",
+})
+
+interface ParserState {
+  readonly tools: Record<string, ProviderShared.ToolAccumulator>
+}
+
+const invalid = ProviderShared.invalidRequest
+
+
+
+const lowerTool = (tool: ToolDefinition): OpenAIResponsesTool => ({
+  type: "function",
+  name: tool.name,
+  description: tool.description,
+  parameters: tool.inputSchema,
+})
+
+const lowerToolChoice = Effect.fn("OpenAIResponses.lowerToolChoice")(function* (
+  toolChoice: NonNullable<LLMRequest["toolChoice"]>,
+) {
+  if (toolChoice.type !== "tool") return toolChoice.type
+  if (!toolChoice.name) return yield* invalid("OpenAI Responses tool choice requires a tool name")
+  return { type: "function" as const, name: toolChoice.name }
+})
+
+const lowerToolCall = (part: ToolCallPart): OpenAIResponsesInputItem => ({
+  type: "function_call",
+  call_id: part.id,
+  name: part.name,
+  arguments: ProviderShared.encodeJson(part.input),
+})
+
+const lowerMessages = Effect.fn("OpenAIResponses.lowerMessages")(function* (request: LLMRequest) {
+  const system: OpenAIResponsesInputItem[] =
+    request.system.length === 0 ? [] : [{ role: "system", content: ProviderShared.joinText(request.system) }]
+  const input: OpenAIResponsesInputItem[] = [...system]
+
+  for (const message of request.messages) {
+    if (message.role === "user") {
+      const content: TextPart[] = []
+      for (const part of message.content) {
+        if (part.type !== "text") return yield* invalid(`OpenAI Responses user messages only support text content for now`)
+        content.push(part)
+      }
+      input.push({ role: "user", content: content.map((part) => ({ type: "input_text", text: part.text })) })
+      continue
+    }
+
+    if (message.role === "assistant") {
+      const content: TextPart[] = []
+      for (const part of message.content) {
+        if (part.type === "text") {
+          content.push(part)
+          continue
+        }
+        if (part.type === "tool-call") {
+          input.push(lowerToolCall(part))
+          continue
+        }
+        return yield* invalid(`OpenAI Responses assistant messages only support text and tool-call content for now`)
+      }
+      if (content.length > 0)
+        input.push({ role: "assistant", content: content.map((part) => ({ type: "output_text", text: part.text })) })
+      continue
+    }
+
+    for (const part of message.content) {
+      if (part.type !== "tool-result")
+        return yield* invalid(`OpenAI Responses tool messages only support tool-result content`)
+      input.push({ type: "function_call_output", call_id: part.id, output: ProviderShared.toolResultText(part) })
+    }
+  }
+
+  return input
+})
+
+const prepare = Effect.fn("OpenAIResponses.prepare")(function* (request: LLMRequest) {
+  return {
+    model: request.model.id,
+    input: yield* lowerMessages(request),
+    tools: request.tools.length === 0 ? undefined : request.tools.map(lowerTool),
+    tool_choice: request.toolChoice ? yield* lowerToolChoice(request.toolChoice) : undefined,
+    stream: true as const,
+    max_output_tokens: request.generation.maxTokens,
+    temperature: request.generation.temperature,
+    top_p: request.generation.topP,
+  }
+})
+
+const mapUsage = (usage: OpenAIResponsesUsage | undefined) => {
+  if (!usage) return undefined
+  return new Usage({
+    inputTokens: usage.input_tokens,
+    outputTokens: usage.output_tokens,
+    reasoningTokens: usage.output_tokens_details?.reasoning_tokens,
+    cacheReadInputTokens: usage.input_tokens_details?.cached_tokens,
+    totalTokens: ProviderShared.totalTokens(usage.input_tokens, usage.output_tokens, usage.total_tokens),
+    native: usage,
+  })
+}
+
+const mapFinishReason = (chunk: OpenAIResponsesChunk): FinishReason => {
+  if (chunk.type === "response.completed") return "stop"
+  if (chunk.response?.incomplete_details?.reason === "max_output_tokens") return "length"
+  if (chunk.response?.incomplete_details?.reason === "content_filter") return "content-filter"
+  return "unknown"
+}
+
+const pushToolDelta = (tools: Record<string, ProviderShared.ToolAccumulator>, itemId: string, delta: string) =>
+  Effect.gen(function* () {
+    const current = tools[itemId]
+    if (!current) {
+      return yield* ProviderShared.chunkError(ADAPTER, "OpenAI Responses tool argument delta is missing its tool call")
+    }
+    return { ...current, input: `${current.input}${delta}` }
+  })
+
+const finishToolCall = (tools: Record<string, ProviderShared.ToolAccumulator>, item: NonNullable<OpenAIResponsesChunk["item"]>) =>
+  Effect.gen(function* () {
+    if (item.type !== "function_call" || !item.id || !item.call_id || !item.name) return [] as ReadonlyArray<LLMEvent>
+    const raw = item.arguments ?? tools[item.id]?.input ?? ""
+    const input = yield* ProviderShared.parseToolInput(ADAPTER, item.name, raw)
+    return [{ type: "tool-call" as const, id: item.call_id, name: item.name, input }]
+  })
+
+const withoutTool = (tools: Record<string, ProviderShared.ToolAccumulator>, id: string | undefined) =>
+  id === undefined ? tools : Object.fromEntries(Object.entries(tools).filter(([key]) => key !== id))
+
+// Hosted tool items (provider-executed) ship their typed input + status + result
+// fields all in one item. We expose them as a `tool-call` + `tool-result` pair
+// so consumers can treat them uniformly with client tools, only differentiated
+// by `providerExecuted: true`.
+//
+// item.type → tool name. Each entry is the OpenAI Responses item type that
+// represents a hosted (provider-executed) tool call.
+const HOSTED_TOOL_NAMES: Record<string, string> = {
+  web_search_call: "web_search",
+  web_search_preview_call: "web_search_preview",
+  file_search_call: "file_search",
+  code_interpreter_call: "code_interpreter",
+  computer_use_call: "computer_use",
+  image_generation_call: "image_generation",
+  mcp_call: "mcp",
+  local_shell_call: "local_shell",
+}
+
+const isHostedToolItem = (item: OpenAIResponsesStreamItem): item is OpenAIResponsesStreamItem & { id: string } =>
+  item.type in HOSTED_TOOL_NAMES && typeof item.id === "string" && item.id.length > 0
+
+// Pick the input fields the model actually populated when invoking the tool.
+// The shape is tool-specific. Keep this list explicit so each tool's input is
+// reviewable at a glance — fall back to `{}` for tools we haven't typed yet.
+const hostedToolInput = (item: OpenAIResponsesStreamItem): unknown => {
+  if (item.type === "web_search_call" || item.type === "web_search_preview_call") return item.action ?? {}
+  if (item.type === "file_search_call") return { queries: item.queries ?? [] }
+  if (item.type === "code_interpreter_call") return { code: item.code, container_id: item.container_id }
+  if (item.type === "computer_use_call") return item.action ?? {}
+  if (item.type === "local_shell_call") return item.action ?? {}
+  if (item.type === "mcp_call") return { server_label: item.server_label, name: item.name, arguments: item.arguments }
+  return {}
+}
+
+// Round-trip the full item as the structured result so consumers can extract
+// outputs / sources / status without re-decoding.
+const hostedToolResult = (item: OpenAIResponsesStreamItem) => {
+  const isError = typeof item.error !== "undefined" && item.error !== null
+  return isError
+    ? ({ type: "error" as const, value: item.error })
+    : ({ type: "json" as const, value: item })
+}
+
+const hostedToolEvents = (item: OpenAIResponsesStreamItem & { id: string }): ReadonlyArray<LLMEvent> => {
+  const name = HOSTED_TOOL_NAMES[item.type]
+  return [
+    { type: "tool-call", id: item.id, name, input: hostedToolInput(item), providerExecuted: true },
+    { type: "tool-result", id: item.id, name, result: hostedToolResult(item), providerExecuted: true },
+  ]
+}
+
+const processChunk = (state: ParserState, chunk: OpenAIResponsesChunk) =>
+  Effect.gen(function* () {
+    if (chunk.type === "response.output_text.delta" && chunk.delta) {
+      return [state, [{ type: "text-delta", id: chunk.item_id, text: chunk.delta }]] as const
+    }
+
+    if (chunk.type === "response.output_item.added" && chunk.item?.type === "function_call" && chunk.item.id) {
+      return [{
+        tools: {
+          ...state.tools,
+          [chunk.item.id]: {
+            id: chunk.item.call_id ?? chunk.item.id,
+            name: chunk.item.name ?? "",
+            input: chunk.item.arguments ?? "",
+          },
+        },
+      }, []] as const
+    }
+
+    if (chunk.type === "response.function_call_arguments.delta" && chunk.item_id && chunk.delta) {
+      const current = yield* pushToolDelta(state.tools, chunk.item_id, chunk.delta)
+      return [{ tools: { ...state.tools, [chunk.item_id]: current } }, [
+        { type: "tool-input-delta" as const, id: current.id, name: current.name, text: chunk.delta },
+      ]] as const
+    }
+
+    if (chunk.type === "response.output_item.done" && chunk.item?.type === "function_call") {
+      const events = yield* finishToolCall(state.tools, chunk.item)
+      return [{ tools: withoutTool(state.tools, chunk.item.id) }, events] as const
+    }
+
+    if (chunk.type === "response.output_item.done" && chunk.item && isHostedToolItem(chunk.item)) {
+      return [state, hostedToolEvents(chunk.item)] as const
+    }
+
+    if (chunk.type === "response.completed" || chunk.type === "response.incomplete") {
+      return [state, [{ type: "request-finish" as const, reason: mapFinishReason(chunk), usage: mapUsage(chunk.response?.usage) }]] as const
+    }
+
+    if (chunk.type === "error") {
+      return [state, [{ type: "provider-error" as const, message: chunk.message ?? chunk.code ?? "OpenAI Responses stream error" }]] as const
+    }
+
+    return [state, []] as const
+  })
+
+/**
+ * The OpenAI Responses protocol — request lowering, target validation, body
+ * encoding, and the streaming-chunk state machine. Used by native OpenAI and
+ * (once registered) Azure OpenAI Responses.
+ */
+export const protocol = Protocol.define<
+  OpenAIResponsesDraft,
+  OpenAIResponsesTarget,
+  string,
+  OpenAIResponsesChunk,
+  ParserState
+>({
+  id: "openai-responses",
+  prepare,
+  validate: ProviderShared.validateWith(decodeTarget),
+  encode: encodeTarget,
+  redact: (target) => target,
+  decode: decodeChunk,
+  initial: () => ({ tools: {} }),
+  process: processChunk,
+  streamReadError: "Failed to read OpenAI Responses stream",
+})
+
+export const adapter = Adapter.fromProtocol({
+  id: ADAPTER,
+  protocol,
+  endpoint: Endpoint.baseURL({ default: "https://api.openai.com/v1", path: "/responses" }),
+  framing: Framing.sse,
+})
+
+export const model = (input: OpenAIResponsesModelInput) =>
+  llmModel({
+    ...input,
+    provider: "openai",
+    protocol: "openai-responses",
+    capabilities: input.capabilities ?? capabilities({ tools: { calls: true, streamingInput: true } }),
+  })
+
+export * as OpenAIResponses from "./openai-responses"
--- a/packages/llm/src/provider/openai.ts
+++ b/packages/llm/src/provider/openai.ts
@@ -0,0 +1,5 @@
+import { ProviderResolver } from "../provider-resolver"
+
+export const resolver = ProviderResolver.fixed("openai", "openai-responses")
+
+export * as OpenAI from "./openai"
--- a/packages/llm/src/provider/patch.ts
+++ b/packages/llm/src/provider/patch.ts
@@ -0,0 +1,91 @@
+import { Model, Patch, predicate } from "../patch"
+import { CacheHint } from "../schema"
+import type { ContentPart, LLMRequest } from "../schema"
+
+const removeEmptyParts = (content: ReadonlyArray<ContentPart>) =>
+  content.filter((part) => (part.type === "text" || part.type === "reasoning" ? part.text !== "" : true))
+
+const rewriteToolIds = (request: LLMRequest, scrub: (id: string) => string): LLMRequest => ({
+  ...request,
+  messages: request.messages.map((message) => {
+    if (message.role !== "assistant" && message.role !== "tool") return message
+    return {
+      ...message,
+      content: message.content.map((part) => {
+        if (part.type === "tool-call" || part.type === "tool-result") return { ...part, id: scrub(part.id) }
+        return part
+      }),
+    }
+  }),
+})
+
+export const removeEmptyAnthropicContent = Patch.prompt("anthropic.remove-empty-content", {
+  reason: "remove empty text/reasoning blocks for providers that reject empty content",
+  when: Model.provider("anthropic").or(Model.provider("bedrock"), Model.provider("amazon-bedrock")),
+  apply: (request) => ({
+    ...request,
+    system: request.system.filter((part) => part.text !== ""),
+    messages: request.messages
+      .map((message) => ({ ...message, content: removeEmptyParts(message.content) }))
+      .filter((message) => message.content.length > 0),
+  }),
+})
+
+export const scrubClaudeToolIds = Patch.prompt("anthropic.scrub-tool-call-ids", {
+  reason: "Claude tool_use ids only accept alphanumeric, underscore, and dash characters",
+  when: Model.idIncludes("claude"),
+  apply: (request) => rewriteToolIds(request, (id) => id.replace(/[^a-zA-Z0-9_-]/g, "_")),
+})
+
+export const scrubMistralToolIds = Patch.prompt("mistral.scrub-tool-call-ids", {
+  reason: "Mistral tool call ids must be short alphanumeric identifiers",
+  when: Model.provider("mistral").or(Model.idIncludes("mistral"), Model.idIncludes("devstral")),
+  apply: (request) => rewriteToolIds(request, (id) => id.replace(/[^a-zA-Z0-9]/g, "").slice(0, 9).padEnd(9, "0")),
+})
+
+// Single shared CacheHint instance — the cache patch reuses this one object
+// across every marked part. Adapters lower CacheHint structurally
+// (`cache?.type === "ephemeral"`) so reference equality is incidental, but
+// keeping a class instance preserves any consumer that checks
+// `instanceof CacheHint`.
+const EPHEMERAL_CACHE = new CacheHint({ type: "ephemeral" })
+
+const withCacheOnLastText = (content: ReadonlyArray<ContentPart>): ReadonlyArray<ContentPart> => {
+  const last = content.findLastIndex((part) => part.type === "text")
+  if (last === -1) return content
+  return content.map((part, index) =>
+    index === last && part.type === "text" ? { ...part, cache: EPHEMERAL_CACHE } : part,
+  )
+}
+
+// Anthropic and Bedrock both honor up to four positional cache breakpoints.
+// We mark the first 2 system parts and the last 2 messages — the same policy
+// OpenCode uses on the AI-SDK path (`session.applyCaching` in
+// packages/opencode/src/provider/transform.ts). The capability gate makes
+// this a no-op for adapters that don't advertise prompt-level caching, so
+// non-cache providers (OpenAI Responses, Gemini, OpenAI-compatible Chat)
+// are unaffected.
+export const cachePromptHints = Patch.prompt("cache.prompt-hints", {
+  reason: "mark first 2 system parts and last 2 messages with ephemeral cache hints on cache-capable adapters",
+  when: predicate((context) => context.model.capabilities.cache?.prompt === true),
+  apply: (request) => ({
+    ...request,
+    system: request.system.map((part, index) =>
+      index < 2 ? { ...part, cache: EPHEMERAL_CACHE } : part,
+    ),
+    messages: request.messages.map((message, index) =>
+      index < request.messages.length - 2
+        ? message
+        : { ...message, content: withCacheOnLastText(message.content) },
+    ),
+  }),
+})
+
+export const defaults = [
+  removeEmptyAnthropicContent,
+  scrubClaudeToolIds,
+  scrubMistralToolIds,
+  cachePromptHints,
+]
+
+export * as ProviderPatch from "./patch"
--- a/packages/llm/src/provider/shared.ts
+++ b/packages/llm/src/provider/shared.ts
@@ -0,0 +1,235 @@
+import { Buffer } from "node:buffer"
+import { Cause, Effect, Schema, Stream } from "effect"
+import * as Sse from "effect/unstable/encoding/Sse"
+import { HttpClientRequest, type HttpClientResponse } from "effect/unstable/http"
+import { InvalidRequestError, ProviderChunkError, type MediaPart, type ToolResultPart } from "../schema"
+
+export const Json = Schema.fromJsonString(Schema.Unknown)
+export const decodeJson = Schema.decodeUnknownSync(Json)
+export const encodeJson = Schema.encodeSync(Json)
+
+/**
+ * Plain-record narrowing. Excludes arrays so adapters checking nested JSON
+ * Schema fragments don't accidentally treat a tuple as a key/value bag.
+ */
+export const isRecord = (value: unknown): value is Record<string, unknown> =>
+  typeof value === "object" && value !== null && !Array.isArray(value)
+
+/**
+ * Streaming tool-call accumulator. Adapters that build a tool call across
+ * multiple `tool-input-delta` chunks store the partial JSON input string here
+ * and finalize it with `parseToolInput` once the call completes. Anthropic
+ * extends this with a `providerExecuted` flag for hosted (server-side) tools;
+ * it should be the only adapter to do so.
+ */
+export interface ToolAccumulator {
+  readonly id: string
+  readonly name: string
+  readonly input: string
+}
+
+/**
+ * Codec bundle for a streaming JSON adapter:
+ *
+ * - `encodeTarget(target)` produces the JSON string body for `jsonPost`.
+ * - `decodeTarget(draft)` runs the Schema-driven `Draft → Target` decode
+ *   inside an Effect, mapping parse errors to `InvalidRequestError` via
+ *   `validateWith` so the result drops directly into a protocol's `validate`
+ *   field.
+ * - `decodeChunk(input)` decodes one streaming JSON chunk against the chunk
+ *   schema. The default expects a `string` (the SSE data field); pass a
+ *   custom decoder shape via `decodeChunkInput` for adapters whose framing
+ *   already produces a parsed object (e.g. Bedrock's event-stream payloads).
+ *
+ * Adapters that need a totally different decode shape should still hand-roll
+ * those pieces — the helper covers the common SSE-JSON case used by 4 of 6
+ * adapters today.
+ */
+export const codecs = <Draft, Target, Chunk>(input: {
+  readonly adapter: string
+  readonly draft: Schema.Codec<Draft, unknown>
+  readonly target: Schema.Codec<Target, unknown>
+  readonly chunk: Schema.Codec<Chunk, unknown>
+  readonly chunkErrorMessage: string
+}) => {
+  const encodeTarget = Schema.encodeSync(Schema.fromJsonString(input.target))
+  const decodeTarget = validateWith(
+    Schema.decodeUnknownEffect(input.draft.pipe(Schema.decodeTo(input.target))),
+  )
+  const decodeChunkSync = Schema.decodeUnknownSync(Schema.fromJsonString(input.chunk))
+  const decodeChunk = (data: string) =>
+    Effect.try({
+      try: () => decodeChunkSync(data),
+      catch: () => chunkError(input.adapter, input.chunkErrorMessage, data),
+    })
+  return { encodeTarget, decodeTarget, decodeChunk }
+}
+
+/**
+ * `Usage.totalTokens` policy shared by every adapter. Honors a provider-
+ * supplied total; otherwise falls back to `inputTokens + outputTokens` only
+ * when at least one is defined. Returns `undefined` when neither input nor
+ * output is known so adapters don't publish a misleading `0`.
+ */
+export const totalTokens = (
+  inputTokens: number | undefined,
+  outputTokens: number | undefined,
+  total: number | undefined,
+) => {
+  if (total !== undefined) return total
+  if (inputTokens === undefined && outputTokens === undefined) return undefined
+  return (inputTokens ?? 0) + (outputTokens ?? 0)
+}
+
+export const chunkError = (adapter: string, message: string, raw?: string) =>
+  new ProviderChunkError({ adapter, message, raw })
+
+export const parseJson = (adapter: string, input: string, message: string) =>
+  Effect.try({
+    try: () => decodeJson(input),
+    catch: () => chunkError(adapter, message, input),
+  })
+
+/**
+ * Join the `text` field of a list of parts with newlines. Used by adapters
+ * that flatten system / message content arrays into a single provider string
+ * (OpenAI Chat `system` content, OpenAI Responses `system` content, Gemini
+ * `systemInstruction.parts[].text`).
+ */
+export const joinText = (parts: ReadonlyArray<{ readonly text: string }>) =>
+  parts.map((part) => part.text).join("\n")
+
+/**
+ * Parse the streamed JSON input of a tool call. Treats an empty string as
+ * `"{}"` — providers occasionally finish a tool call without ever emitting
+ * input deltas (e.g. zero-arg tools). The error message is uniform across
+ * adapters: `Invalid JSON input for <adapter> tool call <name>`.
+ */
+export const parseToolInput = (adapter: string, name: string, raw: string) =>
+  parseJson(adapter, raw || "{}", `Invalid JSON input for ${adapter} tool call ${name}`)
+
+/**
+ * Encode a `MediaPart`'s raw bytes for inclusion in a JSON request body.
+ * `data: string` is assumed to already be base64 (matches caller convention
+ * across Gemini / Bedrock); `data: Uint8Array` is base64-encoded here. Used
+ * by every adapter that supports image / document inputs.
+ */
+export const mediaBytes = (part: MediaPart) =>
+  typeof part.data === "string" ? part.data : Buffer.from(part.data).toString("base64")
+
+export const trimBaseUrl = (value: string) => value.replace(/\/+$/, "")
+
+export const toolResultText = (part: ToolResultPart) => {
+  if (part.result.type === "text" || part.result.type === "error") return String(part.result.value)
+  return encodeJson(part.result.value)
+}
+
+const errorText = (error: unknown) => {
+  if (error instanceof Error) return error.message
+  if (typeof error === "string") return error
+  if (typeof error === "number" || typeof error === "boolean" || typeof error === "bigint") return String(error)
+  if (error === null) return "null"
+  if (error === undefined) return "undefined"
+  return "Unknown stream error"
+}
+
+const streamError = (adapter: string, message: string, cause: Cause.Cause<unknown>) => {
+  const failed = cause.reasons.find(Cause.isFailReason)?.error
+  if (failed instanceof ProviderChunkError) return failed
+  return chunkError(adapter, message, Cause.pretty(cause))
+}
+
+/**
+ * Generic streaming-response decoder used by `Adapter.fromProtocol`. Splits
+ * the response stream into:
+ *
+ *   bytes → frames (caller-supplied) → chunk → (state, events)
+ *
+ * The `framing` step is the protocol-specific part — `Framing.sse` uses
+ * `sseFraming` below; binary protocols (Bedrock event-stream) supply their
+ * own byte-level decoder. Everything else (transport-error normalization,
+ * schema decoding per chunk, stateful chunk → event mapping, `onHalt` flush,
+ * terminal-error normalization) is shared.
+ */
+export const framed = <Frame, Chunk, State, Event>(input: {
+  readonly adapter: string
+  readonly response: HttpClientResponse.HttpClientResponse
+  readonly readError: string
+  readonly framing: (
+    bytes: Stream.Stream<Uint8Array, ProviderChunkError>,
+  ) => Stream.Stream<Frame, ProviderChunkError>
+  readonly decodeChunk: (frame: Frame) => Effect.Effect<Chunk, ProviderChunkError>
+  readonly initial: () => State
+  readonly process: (
+    state: State,
+    chunk: Chunk,
+  ) => Effect.Effect<readonly [State, ReadonlyArray<Event>], ProviderChunkError>
+  readonly onHalt?: (state: State) => ReadonlyArray<Event>
+}): Stream.Stream<Event, ProviderChunkError> => {
+  const bytes = input.response.stream.pipe(
+    Stream.mapError((error) => chunkError(input.adapter, input.readError, errorText(error))),
+  )
+  return input.framing(bytes).pipe(
+    Stream.mapEffect(input.decodeChunk),
+    Stream.mapAccumEffect(input.initial, input.process, input.onHalt ? { onHalt: input.onHalt } : undefined),
+    Stream.catchCause((cause) => Stream.fail(streamError(input.adapter, input.readError, cause))),
+  )
+}
+
+/**
+ * `framing` step for Server-Sent Events. Decodes UTF-8, runs the SSE channel
+ * decoder, and drops empty / `[DONE]` keep-alive events so the downstream
+ * `decodeChunk` sees one JSON string per element. The SSE channel emits a
+ * `Retry` control event on its error channel; we drop it here (we don't
+ * implement client-driven retries) so the public error channel stays
+ * `ProviderChunkError`.
+ */
+export const sseFraming = (
+  bytes: Stream.Stream<Uint8Array, ProviderChunkError>,
+): Stream.Stream<string, ProviderChunkError> =>
+  bytes.pipe(
+    Stream.decodeText(),
+    Stream.pipeThroughChannel(Sse.decode()),
+    Stream.catchTag("Retry", () => Stream.empty),
+    Stream.filter((event) => event.data.length > 0 && event.data !== "[DONE]"),
+    Stream.map((event) => event.data),
+  )
+
+/**
+ * Canonical `InvalidRequestError` constructor. Lift one-line `const invalid =
+ * (message) => new InvalidRequestError({ message })` aliases out of every
+ * adapter so the error constructor lives in one place. If we ever extend
+ * `InvalidRequestError` with adapter context or trace metadata, the change
+ * lands here.
+ */
+export const invalidRequest = (message: string) => new InvalidRequestError({ message })
+
+/**
+ * Build a `validate` step from a Schema decoder. Replaces the per-adapter
+ * lambda body `(draft) => decode(draft).pipe(Effect.mapError((e) =>
+ * invalid(e.message)))`. Any decode error is translated into
+ * `InvalidRequestError` carrying the original parse-error message.
+ */
+export const validateWith =
+  <A, I, E extends { readonly message: string }>(decode: (input: I) => Effect.Effect<A, E>) =>
+  (draft: I) =>
+    decode(draft).pipe(Effect.mapError((error) => invalidRequest(error.message)))
+
+/**
+ * Build an HTTP POST with a JSON body. Sets `content-type: application/json`
+ * automatically (callers can't override it — every adapter today places it
+ * last so caller headers win on everything else) and merges caller-supplied
+ * headers. The body is passed pre-encoded so adapters can choose between
+ * `Schema.encodeSync(target)` and `ProviderShared.encodeJson(target)`.
+ */
+export const jsonPost = (input: {
+  readonly url: string
+  readonly body: string
+  readonly headers?: Record<string, string>
+}) =>
+  HttpClientRequest.post(input.url).pipe(
+    HttpClientRequest.setHeaders({ ...input.headers, "content-type": "application/json" }),
+    HttpClientRequest.bodyText(input.body, "application/json"),
+  )
+
+export * as ProviderShared from "./shared"
--- a/packages/llm/src/provider/xai.ts
+++ b/packages/llm/src/provider/xai.ts
@@ -0,0 +1,5 @@
+import { ProviderResolver } from "../provider-resolver"
+
+export const resolver = ProviderResolver.fixed("xai", "openai-responses")
+
+export * as XAI from "./xai"
--- a/packages/llm/src/schema.ts
+++ b/packages/llm/src/schema.ts
@@ -0,0 +1,471 @@
+import { Schema } from "effect"
+
+/**
+ * Stable string identifier for a protocol implementation. The discriminator
+ * value lives on `ModelRef.protocol` and on the `Adapter.protocol` field;
+ * the runtime registry keys lookups by it. The implementation type itself is
+ * `Protocol` (see `protocol.ts`).
+ */
+export const ProtocolID = Schema.Literals([
+  "openai-chat",
+  "openai-compatible-chat",
+  "openai-responses",
+  "anthropic-messages",
+  "gemini",
+  "bedrock-converse",
+])
+export type ProtocolID = Schema.Schema.Type<typeof ProtocolID>
+
+export const ModelID = Schema.String.pipe(Schema.brand("LLM.ModelID"))
+export type ModelID = typeof ModelID.Type
+
+export const ProviderID = Schema.String.pipe(Schema.brand("LLM.ProviderID"))
+export type ProviderID = typeof ProviderID.Type
+
+export const ReasoningEfforts = ["none", "minimal", "low", "medium", "high", "xhigh", "max"] as const
+export const ReasoningEffort = Schema.Literals(ReasoningEfforts)
+export type ReasoningEffort = Schema.Schema.Type<typeof ReasoningEffort>
+
+export const PatchPhase = Schema.Literals(["request", "prompt", "tool-schema", "target", "stream"])
+export type PatchPhase = Schema.Schema.Type<typeof PatchPhase>
+
+export const MessageRole = Schema.Literals(["user", "assistant", "tool"])
+export type MessageRole = Schema.Schema.Type<typeof MessageRole>
+
+export const FinishReason = Schema.Literals(["stop", "length", "tool-calls", "content-filter", "error", "unknown"])
+export type FinishReason = Schema.Schema.Type<typeof FinishReason>
+
+export const JsonSchema = Schema.Record(Schema.String, Schema.Unknown)
+export type JsonSchema = Schema.Schema.Type<typeof JsonSchema>
+
+export class ModelCapabilities extends Schema.Class<ModelCapabilities>("LLM.ModelCapabilities")({
+  input: Schema.Struct({
+    text: Schema.Boolean,
+    image: Schema.Boolean,
+    audio: Schema.Boolean,
+    video: Schema.Boolean,
+    pdf: Schema.Boolean,
+  }),
+  output: Schema.Struct({
+    text: Schema.Boolean,
+    reasoning: Schema.Boolean,
+  }),
+  tools: Schema.Struct({
+    calls: Schema.Boolean,
+    streamingInput: Schema.Boolean,
+    providerExecuted: Schema.Boolean,
+  }),
+  cache: Schema.Struct({
+    prompt: Schema.Boolean,
+    messageBlocks: Schema.Boolean,
+    contentBlocks: Schema.Boolean,
+  }),
+  reasoning: Schema.Struct({
+    efforts: Schema.Array(ReasoningEffort),
+    summaries: Schema.Boolean,
+    encryptedContent: Schema.Boolean,
+  }),
+}) {}
+
+export class ModelLimits extends Schema.Class<ModelLimits>("LLM.ModelLimits")({
+  context: Schema.optional(Schema.Number),
+  output: Schema.optional(Schema.Number),
+}) {}
+
+export class ModelRef extends Schema.Class<ModelRef>("LLM.ModelRef")({
+  id: ModelID,
+  provider: ProviderID,
+  protocol: ProtocolID,
+  baseURL: Schema.optional(Schema.String),
+  /**
+   * Auth secret read by `Auth.bearer` / `Auth.apiKeyHeader` at request time.
+   * Lives here so authentication is not baked into `headers` at construction
+   * time and the `Auth` axis can actually do its job per request.
+   */
+  apiKey: Schema.optional(Schema.String),
+  headers: Schema.optional(Schema.Record(Schema.String, Schema.String)),
+  /**
+   * Query params appended to the request URL by `Endpoint.baseURL`. Used for
+   * deployment-level URL-scoped settings such as Azure's `api-version` or any
+   * provider that requires a per-request key in the URL. Generic concern, so
+   * lives as a typed first-class field instead of `native`.
+   */
+  queryParams: Schema.optional(Schema.Record(Schema.String, Schema.String)),
+  capabilities: ModelCapabilities,
+  limits: ModelLimits,
+  /**
+   * Provider-specific opaque options. Reach for this only when the value is
+   * genuinely provider-private and does not fit a typed axis (e.g. Bedrock's
+   * `aws_credentials` / `aws_region` for SigV4). Anything used by more than
+   * one adapter should grow into a typed field instead.
+   */
+  native: Schema.optional(Schema.Record(Schema.String, Schema.Unknown)),
+}) {}
+
+export class CacheHint extends Schema.Class<CacheHint>("LLM.CacheHint")({
+  type: Schema.Literals(["ephemeral", "persistent"]),
+  ttlSeconds: Schema.optional(Schema.Number),
+}) {}
+
+export const SystemPart = Schema.Struct({
+  type: Schema.Literal("text"),
+  text: Schema.String,
+  cache: Schema.optional(CacheHint),
+  metadata: Schema.optional(Schema.Record(Schema.String, Schema.Unknown)),
+}).annotate({ identifier: "LLM.SystemPart" })
+export type SystemPart = Schema.Schema.Type<typeof SystemPart>
+
+export const TextPart = Schema.Struct({
+  type: Schema.Literal("text"),
+  text: Schema.String,
+  cache: Schema.optional(CacheHint),
+  metadata: Schema.optional(Schema.Record(Schema.String, Schema.Unknown)),
+}).annotate({ identifier: "LLM.Content.Text" })
+export type TextPart = Schema.Schema.Type<typeof TextPart>
+
+export const MediaPart = Schema.Struct({
+  type: Schema.Literal("media"),
+  mediaType: Schema.String,
+  data: Schema.Union([Schema.String, Schema.Uint8Array]),
+  filename: Schema.optional(Schema.String),
+  metadata: Schema.optional(Schema.Record(Schema.String, Schema.Unknown)),
+}).annotate({ identifier: "LLM.Content.Media" })
+export type MediaPart = Schema.Schema.Type<typeof MediaPart>
+
+export const ToolResultValue = Schema.Struct({
+  type: Schema.Literals(["json", "text", "error"]),
+  value: Schema.Unknown,
+}).annotate({ identifier: "LLM.ToolResult" })
+export type ToolResultValue = Schema.Schema.Type<typeof ToolResultValue>
+
+export const ToolCallPart = Schema.Struct({
+  type: Schema.Literal("tool-call"),
+  id: Schema.String,
+  name: Schema.String,
+  input: Schema.Unknown,
+  providerExecuted: Schema.optional(Schema.Boolean),
+  metadata: Schema.optional(Schema.Record(Schema.String, Schema.Unknown)),
+}).annotate({ identifier: "LLM.Content.ToolCall" })
+export type ToolCallPart = Schema.Schema.Type<typeof ToolCallPart>
+
+export const ToolResultPart = Schema.Struct({
+  type: Schema.Literal("tool-result"),
+  id: Schema.String,
+  name: Schema.String,
+  result: ToolResultValue,
+  providerExecuted: Schema.optional(Schema.Boolean),
+  metadata: Schema.optional(Schema.Record(Schema.String, Schema.Unknown)),
+}).annotate({ identifier: "LLM.Content.ToolResult" })
+export type ToolResultPart = Schema.Schema.Type<typeof ToolResultPart>
+
+export const ReasoningPart = Schema.Struct({
+  type: Schema.Literal("reasoning"),
+  text: Schema.String,
+  encrypted: Schema.optional(Schema.String),
+  metadata: Schema.optional(Schema.Record(Schema.String, Schema.Unknown)),
+}).annotate({ identifier: "LLM.Content.Reasoning" })
+export type ReasoningPart = Schema.Schema.Type<typeof ReasoningPart>
+
+export const ContentPart = Schema.Union([TextPart, MediaPart, ToolCallPart, ToolResultPart, ReasoningPart]).pipe(
+  Schema.toTaggedUnion("type"),
+)
+export type ContentPart = Schema.Schema.Type<typeof ContentPart>
+
+export class Message extends Schema.Class<Message>("LLM.Message")({
+  id: Schema.optional(Schema.String),
+  role: MessageRole,
+  content: Schema.Array(ContentPart),
+  metadata: Schema.optional(Schema.Record(Schema.String, Schema.Unknown)),
+  native: Schema.optional(Schema.Record(Schema.String, Schema.Unknown)),
+}) {}
+
+export class ToolDefinition extends Schema.Class<ToolDefinition>("LLM.ToolDefinition")({
+  name: Schema.String,
+  description: Schema.String,
+  inputSchema: JsonSchema,
+  metadata: Schema.optional(Schema.Record(Schema.String, Schema.Unknown)),
+  native: Schema.optional(Schema.Record(Schema.String, Schema.Unknown)),
+}) {}
+
+export class ToolChoice extends Schema.Class<ToolChoice>("LLM.ToolChoice")({
+  type: Schema.Literals(["auto", "none", "required", "tool"]),
+  name: Schema.optional(Schema.String),
+}) {}
+
+export class GenerationOptions extends Schema.Class<GenerationOptions>("LLM.GenerationOptions")({
+  maxTokens: Schema.optional(Schema.Number),
+  temperature: Schema.optional(Schema.Number),
+  topP: Schema.optional(Schema.Number),
+  stop: Schema.optional(Schema.Array(Schema.String)),
+}) {}
+
+export class ReasoningIntent extends Schema.Class<ReasoningIntent>("LLM.ReasoningIntent")({
+  enabled: Schema.Boolean,
+  effort: Schema.optional(ReasoningEffort),
+  summary: Schema.optional(Schema.Boolean),
+  encryptedContent: Schema.optional(Schema.Boolean),
+}) {}
+
+export class CacheIntent extends Schema.Class<CacheIntent>("LLM.CacheIntent")({
+  enabled: Schema.Boolean,
+  key: Schema.optional(Schema.String),
+}) {}
+
+export const ResponseFormat = Schema.Union([
+  Schema.Struct({ type: Schema.Literal("text") }),
+  Schema.Struct({ type: Schema.Literal("json"), schema: JsonSchema }),
+  Schema.Struct({ type: Schema.Literal("tool"), tool: ToolDefinition }),
+])
+export type ResponseFormat = Schema.Schema.Type<typeof ResponseFormat>
+
+export class LLMRequest extends Schema.Class<LLMRequest>("LLM.Request")({
+  id: Schema.optional(Schema.String),
+  model: ModelRef,
+  system: Schema.Array(SystemPart),
+  messages: Schema.Array(Message),
+  tools: Schema.Array(ToolDefinition),
+  toolChoice: Schema.optional(ToolChoice),
+  generation: GenerationOptions,
+  reasoning: Schema.optional(ReasoningIntent),
+  cache: Schema.optional(CacheIntent),
+  responseFormat: Schema.optional(ResponseFormat),
+  metadata: Schema.optional(Schema.Record(Schema.String, Schema.Unknown)),
+  native: Schema.optional(Schema.Record(Schema.String, Schema.Unknown)),
+}) {}
+
+export class Usage extends Schema.Class<Usage>("LLM.Usage")({
+  inputTokens: Schema.optional(Schema.Number),
+  outputTokens: Schema.optional(Schema.Number),
+  reasoningTokens: Schema.optional(Schema.Number),
+  cacheReadInputTokens: Schema.optional(Schema.Number),
+  cacheWriteInputTokens: Schema.optional(Schema.Number),
+  totalTokens: Schema.optional(Schema.Number),
+  native: Schema.optional(Schema.Record(Schema.String, Schema.Unknown)),
+}) {}
+
+export const RequestStart = Schema.Struct({
+  type: Schema.Literal("request-start"),
+  id: Schema.String,
+  model: ModelRef,
+}).annotate({ identifier: "LLM.Event.RequestStart" })
+export type RequestStart = Schema.Schema.Type<typeof RequestStart>
+
+export const StepStart = Schema.Struct({
+  type: Schema.Literal("step-start"),
+  index: Schema.Number,
+}).annotate({ identifier: "LLM.Event.StepStart" })
+export type StepStart = Schema.Schema.Type<typeof StepStart>
+
+export const TextStart = Schema.Struct({
+  type: Schema.Literal("text-start"),
+  id: Schema.String,
+}).annotate({ identifier: "LLM.Event.TextStart" })
+export type TextStart = Schema.Schema.Type<typeof TextStart>
+
+export const TextDelta = Schema.Struct({
+  type: Schema.Literal("text-delta"),
+  id: Schema.optional(Schema.String),
+  text: Schema.String,
+}).annotate({ identifier: "LLM.Event.TextDelta" })
+export type TextDelta = Schema.Schema.Type<typeof TextDelta>
+
+export const TextEnd = Schema.Struct({
+  type: Schema.Literal("text-end"),
+  id: Schema.String,
+}).annotate({ identifier: "LLM.Event.TextEnd" })
+export type TextEnd = Schema.Schema.Type<typeof TextEnd>
+
+export const ReasoningDelta = Schema.Struct({
+  type: Schema.Literal("reasoning-delta"),
+  id: Schema.optional(Schema.String),
+  text: Schema.String,
+}).annotate({ identifier: "LLM.Event.ReasoningDelta" })
+export type ReasoningDelta = Schema.Schema.Type<typeof ReasoningDelta>
+
+export const ToolInputDelta = Schema.Struct({
+  type: Schema.Literal("tool-input-delta"),
+  id: Schema.String,
+  name: Schema.String,
+  text: Schema.String,
+}).annotate({ identifier: "LLM.Event.ToolInputDelta" })
+export type ToolInputDelta = Schema.Schema.Type<typeof ToolInputDelta>
+
+export const ToolCall = Schema.Struct({
+  type: Schema.Literal("tool-call"),
+  id: Schema.String,
+  name: Schema.String,
+  input: Schema.Unknown,
+  providerExecuted: Schema.optional(Schema.Boolean),
+}).annotate({ identifier: "LLM.Event.ToolCall" })
+export type ToolCall = Schema.Schema.Type<typeof ToolCall>
+
+export const ToolResult = Schema.Struct({
+  type: Schema.Literal("tool-result"),
+  id: Schema.String,
+  name: Schema.String,
+  result: ToolResultValue,
+  providerExecuted: Schema.optional(Schema.Boolean),
+}).annotate({ identifier: "LLM.Event.ToolResult" })
+export type ToolResult = Schema.Schema.Type<typeof ToolResult>
+
+export const ToolError = Schema.Struct({
+  type: Schema.Literal("tool-error"),
+  id: Schema.String,
+  name: Schema.String,
+  message: Schema.String,
+}).annotate({ identifier: "LLM.Event.ToolError" })
+export type ToolError = Schema.Schema.Type<typeof ToolError>
+
+export const StepFinish = Schema.Struct({
+  type: Schema.Literal("step-finish"),
+  index: Schema.Number,
+  reason: FinishReason,
+  usage: Schema.optional(Usage),
+}).annotate({ identifier: "LLM.Event.StepFinish" })
+export type StepFinish = Schema.Schema.Type<typeof StepFinish>
+
+export const RequestFinish = Schema.Struct({
+  type: Schema.Literal("request-finish"),
+  reason: FinishReason,
+  usage: Schema.optional(Usage),
+}).annotate({ identifier: "LLM.Event.RequestFinish" })
+export type RequestFinish = Schema.Schema.Type<typeof RequestFinish>
+
+export const ProviderErrorEvent = Schema.Struct({
+  type: Schema.Literal("provider-error"),
+  message: Schema.String,
+  retryable: Schema.optional(Schema.Boolean),
+}).annotate({ identifier: "LLM.Event.ProviderError" })
+export type ProviderErrorEvent = Schema.Schema.Type<typeof ProviderErrorEvent>
+
+const llmEventTagged = Schema.Union([
+  RequestStart,
+  StepStart,
+  TextStart,
+  TextDelta,
+  TextEnd,
+  ReasoningDelta,
+  ToolInputDelta,
+  ToolCall,
+  ToolResult,
+  ToolError,
+  StepFinish,
+  RequestFinish,
+  ProviderErrorEvent,
+]).pipe(Schema.toTaggedUnion("type"))
+
+/**
+ * camelCase aliases for `LLMEvent.guards` (provided by `Schema.toTaggedUnion`).
+ * Lets consumers write `events.filter(LLMEvent.is.toolCall)` instead of
+ * `events.filter(LLMEvent.guards["tool-call"])`.
+ */
+export const LLMEvent = Object.assign(llmEventTagged, {
+  is: {
+    requestStart: llmEventTagged.guards["request-start"],
+    stepStart: llmEventTagged.guards["step-start"],
+    textStart: llmEventTagged.guards["text-start"],
+    textDelta: llmEventTagged.guards["text-delta"],
+    textEnd: llmEventTagged.guards["text-end"],
+    reasoningDelta: llmEventTagged.guards["reasoning-delta"],
+    toolInputDelta: llmEventTagged.guards["tool-input-delta"],
+    toolCall: llmEventTagged.guards["tool-call"],
+    toolResult: llmEventTagged.guards["tool-result"],
+    toolError: llmEventTagged.guards["tool-error"],
+    stepFinish: llmEventTagged.guards["step-finish"],
+    requestFinish: llmEventTagged.guards["request-finish"],
+    providerError: llmEventTagged.guards["provider-error"],
+  },
+})
+export type LLMEvent = Schema.Schema.Type<typeof llmEventTagged>
+
+export class PatchTrace extends Schema.Class<PatchTrace>("LLM.PatchTrace")({
+  id: Schema.String,
+  phase: PatchPhase,
+  reason: Schema.String,
+}) {}
+
+export class PreparedRequest extends Schema.Class<PreparedRequest>("LLM.PreparedRequest")({
+  id: Schema.String,
+  adapter: Schema.String,
+  model: ModelRef,
+  target: Schema.Unknown,
+  redactedTarget: Schema.Unknown,
+  patchTrace: Schema.Array(PatchTrace),
+  metadata: Schema.optional(Schema.Record(Schema.String, Schema.Unknown)),
+}) {}
+
+/**
+ * A `PreparedRequest` whose `target` is typed as `Target`. Use with the
+ * generic on `LLMClient.prepare<Target>(...)` when the caller knows which
+ * adapter their request will resolve to and wants its native shape statically
+ * exposed (debug UIs, request previews, plan rendering).
+ *
+ * The runtime payload is identical — the adapter still emits `target: unknown`
+ * — so this is a type-level assertion the caller makes about what they expect
+ * to find. The prepare runtime does not validate the assertion.
+ */
+export type PreparedRequestOf<Target> = Omit<PreparedRequest, "target"> & {
+  readonly target: Target
+}
+
+export class LLMResponse extends Schema.Class<LLMResponse>("LLM.Response")({
+  events: Schema.Array(LLMEvent),
+  usage: Schema.optional(Usage),
+}) {}
+
+export class InvalidRequestError extends Schema.TaggedErrorClass<InvalidRequestError>()("LLM.InvalidRequestError", {
+  message: Schema.String,
+}) {}
+
+export class NoAdapterError extends Schema.TaggedErrorClass<NoAdapterError>()("LLM.NoAdapterError", {
+  protocol: ProtocolID,
+  provider: ProviderID,
+  model: ModelID,
+}) {
+  override get message() {
+    return `No LLM adapter for ${this.provider}/${this.model} using ${this.protocol}`
+  }
+}
+
+export class ProviderChunkError extends Schema.TaggedErrorClass<ProviderChunkError>()("LLM.ProviderChunkError", {
+  adapter: Schema.String,
+  message: Schema.String,
+  raw: Schema.optional(Schema.String),
+}) {}
+
+export class ProviderRequestError extends Schema.TaggedErrorClass<ProviderRequestError>()("LLM.ProviderRequestError", {
+  status: Schema.Number,
+  message: Schema.String,
+  body: Schema.optional(Schema.String),
+}) {}
+
+export class TransportError extends Schema.TaggedErrorClass<TransportError>()("LLM.TransportError", {
+  message: Schema.String,
+  // Optional originating reason — populated for structured HTTP transport
+  // failures (e.g. `RequestError`, `ResponseError`, `IsTimeoutError`) so
+  // consumers can render the underlying cause without parsing the message.
+  reason: Schema.optional(Schema.String),
+  // Optional URL of the failing request when the transport layer surfaces it.
+  url: Schema.optional(Schema.String),
+}) {}
+
+/**
+ * Failure type for tool execute handlers. Handlers must map their internal
+ * errors to this shape; the runtime catches `ToolFailure`s and surfaces them
+ * as `tool-error` events plus a `tool-result` of `type: "error"` so the model
+ * can self-correct.
+ *
+ * Anything thrown or yielded by a handler that is not a `ToolFailure` is
+ * treated as a defect and fails the stream.
+ */
+export class ToolFailure extends Schema.TaggedErrorClass<ToolFailure>()("LLM.ToolFailure", {
+  message: Schema.String,
+  metadata: Schema.optional(Schema.Record(Schema.String, Schema.Unknown)),
+}) {}
+
+export type LLMError =
+  | InvalidRequestError
+  | NoAdapterError
+  | ProviderChunkError
+  | ProviderRequestError
+  | TransportError
--- a/packages/llm/src/tool-runtime.ts
+++ b/packages/llm/src/tool-runtime.ts
@@ -0,0 +1,205 @@
+import { Effect, Stream } from "effect"
+import type { Concurrency } from "effect/Types"
+import type { LLMClient } from "./adapter"
+import type { RequestExecutor } from "./executor"
+import * as LLM from "./llm"
+import {
+  type ContentPart,
+  type FinishReason,
+  type LLMError,
+  type LLMEvent,
+  type LLMRequest,
+  type ToolCallPart,
+  type ToolResultValue,
+} from "./schema"
+import { ToolFailure } from "./schema"
+import { type AnyTool, type Tools, toDefinitions } from "./tool"
+
+export interface RuntimeState {
+  readonly step: number
+  readonly request: LLMRequest
+}
+
+export interface RunOptions<T extends Tools> {
+  readonly request: LLMRequest
+  readonly tools: T
+  /**
+   * Maximum number of model round-trips before the runtime stops emitting new
+   * requests. Defaults to 10. Reaching this limit is not an error — the loop
+   * simply stops and the last `request-finish` event is the terminal signal.
+   */
+  readonly maxSteps?: number
+  /**
+   * How many tool handlers to dispatch in parallel within a single step.
+   * Defaults to 10. Use `"unbounded"` only when handlers do not share an
+   * external dependency that can be saturated (rate-limited APIs, single
+   * connections, etc).
+   */
+  readonly concurrency?: Concurrency
+  /**
+   * Optional predicate evaluated after each step's `request-finish` event. If
+   * it returns `true`, the loop stops even if the model wanted to continue.
+   */
+  readonly stopWhen?: (state: RuntimeState) => boolean
+}
+
+/**
+ * Run a model with a typed tool record. The runtime streams the model, on
+ * each `tool-call` event decodes the input against the tool's `parameters`
+ * Schema, dispatches to the matching handler, encodes the handler's result
+ * against the tool's `success` Schema, and emits a `tool-result` event. When
+ * the model finishes with `tool-calls`, the runtime appends the assistant +
+ * tool messages and re-streams. Stops on a non-`tool-calls` finish, when
+ * `maxSteps` is reached, or when `stopWhen` returns `true`.
+ *
+ * Tool handler dependencies are closed over at tool definition time, so the
+ * runtime's only environment requirement is the `RequestExecutor.Service`.
+ */
+export const run = <T extends Tools>(
+  client: LLMClient,
+  options: RunOptions<T>,
+): Stream.Stream<LLMEvent, LLMError, RequestExecutor.Service> => {
+  const maxSteps = options.maxSteps ?? 10
+  const concurrency = options.concurrency ?? 10
+  const tools = options.tools as Tools
+  const runtimeTools = toDefinitions(tools)
+  const initialRequest = LLM.updateRequest(options.request, {
+    tools: [
+      ...options.request.tools.filter((tool) => !runtimeTools.some((runtimeTool) => runtimeTool.name === tool.name)),
+      ...runtimeTools,
+    ],
+  })
+
+  const loop = (request: LLMRequest, step: number): Stream.Stream<LLMEvent, LLMError, RequestExecutor.Service> =>
+    Stream.unwrap(
+      Effect.gen(function* () {
+        const state: StepState = { assistantContent: [], toolCalls: [], finishReason: undefined }
+
+        const modelStream = client.stream(request).pipe(
+          Stream.tap((event) => Effect.sync(() => accumulate(state, event))),
+        )
+
+        const continuation = Stream.unwrap(
+          Effect.gen(function* () {
+            if (state.finishReason !== "tool-calls" || state.toolCalls.length === 0) return Stream.empty
+            if (options.stopWhen?.({ step, request })) return Stream.empty
+            if (step + 1 >= maxSteps) return Stream.empty
+
+            const dispatched = yield* Effect.forEach(
+              state.toolCalls,
+              (call) => dispatch(tools, call).pipe(Effect.map((result) => [call, result] as const)),
+              { concurrency },
+            )
+            const followUp = LLM.updateRequest(request, {
+              messages: [
+                ...request.messages,
+                LLM.assistant(state.assistantContent),
+                ...dispatched.map(([call, result]) =>
+                  LLM.toolMessage({ id: call.id, name: call.name, result }),
+                ),
+              ],
+            })
+
+            return Stream.fromIterable(dispatched.flatMap(([call, result]) => emitEvents(call, result))).pipe(
+              Stream.concat(loop(followUp, step + 1)),
+            )
+          }),
+        )
+
+        return modelStream.pipe(Stream.concat(continuation))
+      }),
+    )
+
+  return loop(initialRequest, 0)
+}
+
+interface StepState {
+  assistantContent: ContentPart[]
+  toolCalls: ToolCallPart[]
+  finishReason: FinishReason | undefined
+}
+
+const accumulate = (state: StepState, event: LLMEvent) => {
+  if (event.type === "text-delta") {
+    appendStreamingText(state, "text", event.text)
+    return
+  }
+  if (event.type === "reasoning-delta") {
+    appendStreamingText(state, "reasoning", event.text)
+    return
+  }
+  if (event.type === "tool-call") {
+    const part = LLM.toolCall({
+      id: event.id,
+      name: event.name,
+      input: event.input,
+      providerExecuted: event.providerExecuted,
+    })
+    state.assistantContent.push(part)
+    // Provider-executed tools are dispatched by the provider; the runtime must
+    // not invoke a client handler. The matching `tool-result` event arrives
+    // later in the same stream and is folded into `assistantContent` so the
+    // next round's message history carries it.
+    if (!event.providerExecuted) state.toolCalls.push(part)
+    return
+  }
+  if (event.type === "tool-result" && event.providerExecuted) {
+    state.assistantContent.push(LLM.toolResult({
+      id: event.id,
+      name: event.name,
+      result: event.result,
+      providerExecuted: true,
+    }))
+    return
+  }
+  if (event.type === "request-finish") {
+    state.finishReason = event.reason
+  }
+}
+
+const appendStreamingText = (state: StepState, type: "text" | "reasoning", text: string) => {
+  const last = state.assistantContent.at(-1)
+  if (last?.type === type) {
+    state.assistantContent[state.assistantContent.length - 1] = { ...last, text: `${last.text}${text}` }
+    return
+  }
+  state.assistantContent.push({ type, text })
+}
+
+const dispatch = (tools: Tools, call: ToolCallPart): Effect.Effect<ToolResultValue> => {
+  const tool = tools[call.name]
+  if (!tool) return Effect.succeed({ type: "error" as const, value: `Unknown tool: ${call.name}` })
+
+  return decodeAndExecute(tool, call.input).pipe(
+    Effect.catchTag("LLM.ToolFailure", (failure) =>
+      Effect.succeed({ type: "error" as const, value: failure.message } satisfies ToolResultValue),
+    ),
+  )
+}
+
+const decodeAndExecute = (tool: AnyTool, input: unknown): Effect.Effect<ToolResultValue, ToolFailure> =>
+  tool._decode(input).pipe(
+    Effect.mapError((error) => new ToolFailure({ message: `Invalid tool input: ${error.message}` })),
+    Effect.flatMap((decoded) => tool.execute(decoded)),
+    Effect.flatMap((value) =>
+      tool._encode(value).pipe(
+        Effect.mapError(
+          (error) =>
+            new ToolFailure({
+              message: `Tool returned an invalid value for its success schema: ${error.message}`,
+            }),
+        ),
+      ),
+    ),
+    Effect.map((encoded): ToolResultValue => ({ type: "json", value: encoded })),
+  )
+
+const emitEvents = (call: ToolCallPart, result: ToolResultValue): ReadonlyArray<LLMEvent> =>
+  result.type === "error"
+    ? [
+        { type: "tool-error", id: call.id, name: call.name, message: String(result.value) },
+        { type: "tool-result", id: call.id, name: call.name, result },
+      ]
+    : [{ type: "tool-result", id: call.id, name: call.name, result }]
+
+export * as ToolRuntime from "./tool-runtime"
--- a/packages/llm/src/tool.ts
+++ b/packages/llm/src/tool.ts
@@ -0,0 +1,107 @@
+import { Effect, Schema } from "effect"
+import type { ToolDefinition as ToolDefinitionClass } from "./schema"
+import { ToolDefinition, ToolFailure } from "./schema"
+
+/**
+ * Schema constraint for tool parameters / success values: no decoding or
+ * encoding services are allowed. Tools should be self-contained — anything
+ * beyond pure data transformation belongs in the handler closure.
+ */
+export type ToolSchema<T> = Schema.Codec<T, any, never, never>
+
+/**
+ * A type-safe LLM tool. Each tool bundles its own description, parameter
+ * Schema, success Schema, and execute handler. The handler closes over any
+ * services it needs at construction time, so the runtime never sees per-tool
+ * dependencies.
+ *
+ * Errors must be expressed as `ToolFailure`. Unmapped errors and defects fail
+ * the stream.
+ *
+ * Internally each tool also carries memoized codecs and a precomputed
+ * `ToolDefinition` so the runtime doesn't rebuild them per invocation.
+ */
+export interface Tool<Parameters extends ToolSchema<any>, Success extends ToolSchema<any>> {
+  readonly description: string
+  readonly parameters: Parameters
+  readonly success: Success
+  readonly execute: (
+    params: Schema.Schema.Type<Parameters>,
+  ) => Effect.Effect<Schema.Schema.Type<Success>, ToolFailure>
+  /** @internal */
+  readonly _decode: (input: unknown) => Effect.Effect<Schema.Schema.Type<Parameters>, Schema.SchemaError>
+  /** @internal */
+  readonly _encode: (value: Schema.Schema.Type<Success>) => Effect.Effect<unknown, Schema.SchemaError>
+  /** @internal */
+  readonly _definition: ToolDefinitionClass
+}
+
+export type AnyTool = Tool<ToolSchema<any>, ToolSchema<any>>
+
+/**
+ * Constructs a typed tool. The Schema codecs and JSON-schema-shaped
+ * `ToolDefinition` are derived once at this call site so the runtime can
+ * reuse them across every invocation without recomputing.
+ *
+ * ```ts
+ * const getWeather = tool({
+ *   description: "Get current weather",
+ *   parameters: Schema.Struct({ city: Schema.String }),
+ *   success: Schema.Struct({ temperature: Schema.Number }),
+ *   execute: ({ city }) => Effect.succeed({ temperature: 22 }),
+ * })
+ * ```
+ */
+export const tool = <Parameters extends ToolSchema<any>, Success extends ToolSchema<any>>(config: {
+  readonly description: string
+  readonly parameters: Parameters
+  readonly success: Success
+  readonly execute: (
+    params: Schema.Schema.Type<Parameters>,
+  ) => Effect.Effect<Schema.Schema.Type<Success>, ToolFailure>
+}): Tool<Parameters, Success> => ({
+  description: config.description,
+  parameters: config.parameters,
+  success: config.success,
+  execute: config.execute,
+  _decode: Schema.decodeUnknownEffect(config.parameters),
+  _encode: Schema.encodeEffect(config.success),
+  _definition: new ToolDefinition({
+    name: "",
+    description: config.description,
+    inputSchema: toJsonSchema(config.parameters),
+  }),
+})
+
+/**
+ * A record of named tools. The record key becomes the tool name on the wire.
+ */
+export type Tools = Record<string, AnyTool>
+
+/**
+ * Convert a tools record into the `ToolDefinition[]` shape that
+ * `LLMRequest.tools` expects. The runtime calls this internally; consumers
+ * that build `LLMRequest` themselves can use it too.
+ *
+ * Tool names come from the record keys, so the per-tool cached
+ * `_definition` is rebuilt with the correct name here. The JSON Schema body
+ * is reused.
+ */
+export const toDefinitions = (tools: Tools): ReadonlyArray<ToolDefinitionClass> =>
+  Object.entries(tools).map(([name, item]) =>
+    new ToolDefinition({
+      name,
+      description: item._definition.description,
+      inputSchema: item._definition.inputSchema,
+    }),
+  )
+
+const toJsonSchema = (schema: Schema.Top): Record<string, unknown> => {
+  const document = Schema.toJsonSchemaDocument(schema)
+  if (Object.keys(document.definitions).length === 0) return document.schema as Record<string, unknown>
+  return { ...document.schema, $defs: document.definitions } as Record<string, unknown>
+}
+
+export { ToolFailure }
+
+export * as Tool from "./tool"
--- a/packages/llm/test/adapter.test.ts
+++ b/packages/llm/test/adapter.test.ts
@@ -0,0 +1,312 @@
+import { describe, expect } from "bun:test"
+import { Effect, Schema, Stream } from "effect"
+import { HttpClientRequest } from "effect/unstable/http"
+import { LLM } from "../src"
+import { Adapter, LLMClient } from "../src/adapter"
+import { Patch } from "../src/patch"
+import type { LLMRequest, Message, ModelRef, ToolDefinition } from "../src/schema"
+import { testEffect } from "./lib/effect"
+import { dynamicResponse } from "./lib/http"
+
+const updateMessageContent = (message: Message, content: Message["content"]) =>
+  LLM.message({
+    id: message.id,
+    role: message.role,
+    content,
+    metadata: message.metadata,
+    native: message.native,
+  })
+
+const updateModel = (model: ModelRef, patch: Partial<LLM.ModelInput>) =>
+  LLM.model({
+    id: model.id,
+    provider: model.provider,
+    protocol: model.protocol,
+    baseURL: model.baseURL,
+    headers: model.headers,
+    capabilities: model.capabilities,
+    limits: model.limits,
+    native: model.native,
+    ...patch,
+  })
+
+const updateToolDefinition = (tool: ToolDefinition, patch: Partial<ToolDefinition>) =>
+  LLM.toolDefinition({
+    name: tool.name,
+    description: tool.description,
+    inputSchema: tool.inputSchema,
+    metadata: tool.metadata,
+    native: tool.native,
+    ...patch,
+  })
+
+const mapText = (fn: (text: string) => string) => (request: LLMRequest): LLMRequest =>
+  LLM.updateRequest(request, {
+    messages: request.messages.map((message) =>
+      updateMessageContent(
+        message,
+        message.content.map((part) => (part.type === "text" ? { ...part, text: fn(part.text) } : part)),
+      ),
+    ),
+  })
+
+const Json = Schema.fromJsonString(Schema.Unknown)
+const encodeJson = Schema.encodeSync(Json)
+
+type FakeDraft = {
+  readonly body: string
+  readonly includeUsage?: boolean
+}
+
+const FakeChunk = Schema.Union([
+  Schema.Struct({ type: Schema.Literal("text"), text: Schema.String }),
+  Schema.Struct({ type: Schema.Literal("finish"), reason: Schema.Literal("stop") }),
+])
+type FakeChunk = Schema.Schema.Type<typeof FakeChunk>
+const FakeChunks = Schema.Array(FakeChunk)
+
+const request = LLM.request({
+  id: "req_1",
+  model: LLM.model({
+    id: "fake-model",
+    provider: "fake-provider",
+    protocol: "openai-chat",
+  }),
+  prompt: "hello",
+})
+
+const raiseChunk = (chunk: FakeChunk): import("../src/schema").LLMEvent =>
+  chunk.type === "finish"
+    ? { type: "request-finish", reason: chunk.reason }
+    : { type: "text-delta", text: chunk.text }
+
+const fake = Adapter.unsafe<FakeDraft, FakeDraft>({
+  id: "fake",
+  protocol: "openai-chat",
+  redact: (target) => ({ ...target, redacted: true }),
+  validate: (draft) => Effect.succeed(draft),
+  prepare: (request) =>
+    Effect.succeed({
+      body: [
+        ...request.messages
+          .flatMap((message) => message.content)
+          .filter((part) => part.type === "text")
+          .map((part) => part.text),
+        ...request.tools.map((tool) => `tool:${tool.name}:${tool.description}`),
+      ].join("\n"),
+    }),
+  toHttp: (target) =>
+    Effect.succeed(
+      HttpClientRequest.post("https://fake.local/chat").pipe(
+        HttpClientRequest.setHeader("content-type", "application/json"),
+        HttpClientRequest.bodyText(encodeJson(target), "application/json"),
+      ),
+    ),
+  parse: (response) =>
+    Stream.fromEffect(
+      response.json.pipe(
+        Effect.flatMap(Schema.decodeUnknownEffect(FakeChunks)),
+        Effect.orDie,
+      ),
+    ).pipe(
+      Stream.flatMap(Stream.fromIterable),
+      Stream.map(raiseChunk),
+    ),
+})
+
+const gemini = Adapter.unsafe<FakeDraft, FakeDraft>({
+  ...fake,
+  id: "gemini-fake",
+  protocol: "gemini",
+})
+
+const echoLayer = dynamicResponse(({ text, respond }) =>
+  Effect.succeed(
+    respond(
+      encodeJson([
+        { type: "text", text: `echo:${text}` },
+        { type: "finish", reason: "stop" },
+      ]),
+    ),
+  ),
+)
+
+const it = testEffect(echoLayer)
+
+describe("llm adapter", () => {
+  it.effect("prepare applies target patches with trace", () =>
+    Effect.gen(function* () {
+      const prepared = yield* LLMClient.make({
+        adapters: [
+          fake.withPatches([
+            fake.patch("include-usage", {
+              reason: "fake target patch",
+              apply: (draft) => ({ ...draft, includeUsage: true }),
+            }),
+          ]),
+        ],
+      }).prepare(request)
+
+      expect(prepared.redactedTarget).toEqual({ body: "hello", includeUsage: true, redacted: true })
+      expect(prepared.patchTrace.map((item) => item.id)).toEqual(["target.fake.include-usage"])
+    }),
+  )
+
+  it.effect("stream and generate use the adapter pipeline", () =>
+    Effect.gen(function* () {
+      const llm = LLMClient.make({ adapters: [fake] })
+      const events = Array.from(yield* llm.stream(request).pipe(Stream.runCollect))
+      const response = yield* llm.generate(request)
+
+      expect(events.map((event) => event.type)).toEqual(["text-delta", "request-finish"])
+      expect(response.events.map((event) => event.type)).toEqual(["text-delta", "request-finish"])
+    }),
+  )
+
+  it.effect("selects adapters by request protocol", () =>
+    Effect.gen(function* () {
+      const prepared = yield* LLMClient.make({ adapters: [fake, gemini] }).prepare(
+        LLM.updateRequest(request, { model: updateModel(request.model, { protocol: "gemini" }) }),
+      )
+
+      expect(prepared.adapter).toBe("gemini-fake")
+    }),
+  )
+
+  it.effect("request, prompt, and tool-schema patches run before adapter prepare", () =>
+    Effect.gen(function* () {
+      const prepared = yield* LLMClient.make({
+        adapters: [fake],
+        patches: [
+          Patch.request("test.id", {
+            reason: "rewrite request id",
+            apply: (request) => LLM.updateRequest(request, { id: "req_patched" }),
+          }),
+          Patch.prompt("test.message", {
+            reason: "rewrite prompt text",
+            apply: mapText(() => "patched"),
+          }),
+          Patch.toolSchema("test.description", {
+            reason: "rewrite tool description",
+            apply: (tool) => updateToolDefinition(tool, { description: "patched tool" }),
+          }),
+        ],
+      }).prepare(
+        LLM.updateRequest(request, {
+          tools: [{ name: "lookup", description: "original", inputSchema: {} }],
+        }),
+      )
+
+      expect(prepared.id).toBe("req_patched")
+      expect(prepared.target).toEqual({ body: "patched\ntool:lookup:patched tool" })
+      expect(prepared.patchTrace.map((item) => item.id)).toEqual([
+        "request.test.id",
+        "prompt.test.message",
+        "schema.test.description",
+      ])
+    }),
+  )
+
+  it.effect("request patches feed into prompt-patch predicates so phases see updated context", () =>
+    Effect.gen(function* () {
+      const prepared = yield* LLMClient.make({
+        adapters: [fake],
+        patches: [
+          // Earlier phase rewrites the provider, later phase only fires for the
+          // rewritten provider. If `compile` re-uses a stale PatchContext this
+          // test fails because the prompt patch's `when` would not match.
+          Patch.request("rewrite-provider", {
+            reason: "swap provider before prompt phase",
+            apply: (request) => LLM.updateRequest(request, { model: updateModel(request.model, { provider: "rewritten" }) }),
+          }),
+          Patch.prompt("rewrite-only-when-rewritten", {
+            reason: "rewrite prompt text only after provider swap",
+            when: (ctx) => ctx.model.provider === "rewritten",
+            apply: mapText((text) => `rewrote-${text}`),
+          }),
+        ],
+      }).prepare(request)
+
+      expect(prepared.target).toEqual({ body: "rewrote-hello" })
+      expect(prepared.patchTrace.map((item) => item.id)).toEqual([
+        "request.rewrite-provider",
+        "prompt.rewrite-only-when-rewritten",
+      ])
+    }),
+  )
+
+  it.effect("patches with the same order sort by id for deterministic application", () =>
+    Effect.gen(function* () {
+      const prepared = yield* LLMClient.make({
+        adapters: [fake],
+        patches: [
+          Patch.prompt("zeta", {
+            reason: "later id",
+            order: 1,
+            apply: mapText((text) => `${text}|zeta`),
+          }),
+          Patch.prompt("alpha", {
+            reason: "earlier id",
+            order: 1,
+            apply: mapText((text) => `${text}|alpha`),
+          }),
+        ],
+      }).prepare(request)
+
+      expect(prepared.target).toEqual({ body: "hello|alpha|zeta" })
+    }),
+  )
+
+  it.effect("stream patches transform raised events", () =>
+    Effect.gen(function* () {
+      const llm = LLMClient.make({
+        adapters: [fake],
+        patches: [
+          Patch.stream("test.uppercase", {
+            reason: "uppercase text deltas",
+            apply: (event) => (event.type === "text-delta" ? { ...event, text: event.text.toUpperCase() } : event),
+          }),
+        ],
+      })
+
+      const events = Array.from(yield* llm.stream(request).pipe(Stream.runCollect))
+
+      expect(events[0]).toEqual({ type: "text-delta", text: 'ECHO:{"BODY":"HELLO"}' })
+    }),
+  )
+
+  it.effect("stream patches transform multiple events per stream", () =>
+    Effect.gen(function* () {
+      // Verifies stream patches run on every event, not just the first.
+      const seen: string[] = []
+      const llm = LLMClient.make({
+        adapters: [fake],
+        patches: [
+          Patch.stream("test.tap", {
+            reason: "record every event type",
+            apply: (event) => {
+              seen.push(event.type)
+              return event
+            },
+          }),
+        ],
+      })
+
+      yield* llm.stream(request).pipe(Stream.runDrain)
+
+      expect(seen).toEqual(["text-delta", "request-finish"])
+    }),
+  )
+
+  it.effect("rejects protocol mismatch", () =>
+    Effect.gen(function* () {
+      const error = yield* LLMClient.make({ adapters: [fake] })
+        .prepare(
+          LLM.updateRequest(request, { model: updateModel(request.model, { protocol: "gemini" }) }),
+        )
+        .pipe(Effect.flip)
+
+      expect(error.message).toContain("No LLM adapter")
+    }),
+  )
+})
--- a/packages/llm/test/fixtures/recordings/anthropic-messages/streams-text.json
+++ b/packages/llm/test/fixtures/recordings/anthropic-messages/streams-text.json
@@ -0,0 +1,32 @@
+{
+  "version": 1,
+  "metadata": {
+    "name": "anthropic-messages/streams-text",
+    "recordedAt": "2026-04-28T21:18:45.535Z",
+    "tags": [
+      "prefix:anthropic-messages",
+      "provider:anthropic",
+      "protocol:anthropic-messages"
+    ]
+  },
+  "interactions": [
+    {
+      "request": {
+        "method": "POST",
+        "url": "https://api.anthropic.com/v1/messages",
+        "headers": {
+          "anthropic-version": "2023-06-01",
+          "content-type": "application/json"
+        },
+        "body": "{\"model\":\"claude-haiku-4-5-20251001\",\"system\":[{\"type\":\"text\",\"text\":\"You are concise.\"}],\"messages\":[{\"role\":\"user\",\"content\":[{\"type\":\"text\",\"text\":\"Reply with exactly: Hello!\"}]}],\"stream\":true,\"max_tokens\":20,\"temperature\":0}"
+      },
+      "response": {
+        "status": 200,
+        "headers": {
+          "content-type": "text/event-stream; charset=utf-8"
+        },
+        "body": "event: message_start\ndata: {\"type\":\"message_start\",\"message\":{\"model\":\"claude-haiku-4-5-20251001\",\"id\":\"msg_01UodR8c3ezAK8rAfi8HAs8g\",\"type\":\"message\",\"role\":\"assistant\",\"content\":[],\"stop_reason\":null,\"stop_sequence\":null,\"stop_details\":null,\"usage\":{\"input_tokens\":18,\"cache_creation_input_tokens\":0,\"cache_read_input_tokens\":0,\"cache_creation\":{\"ephemeral_5m_input_tokens\":0,\"ephemeral_1h_input_tokens\":0},\"output_tokens\":2,\"service_tier\":\"standard\",\"inference_geo\":\"not_available\"}}          }\n\nevent: content_block_start\ndata: {\"type\":\"content_block_start\",\"index\":0,\"content_block\":{\"type\":\"text\",\"text\":\"\"}    }\n\nevent: ping\ndata: {\"type\": \"ping\"}\n\nevent: content_block_delta\ndata: {\"type\":\"content_block_delta\",\"index\":0,\"delta\":{\"type\":\"text_delta\",\"text\":\"Hello!\"}    }\n\nevent: content_block_stop\ndata: {\"type\":\"content_block_stop\",\"index\":0   }\n\nevent: message_delta\ndata: {\"type\":\"message_delta\",\"delta\":{\"stop_reason\":\"end_turn\",\"stop_sequence\":null,\"stop_details\":null},\"usage\":{\"input_tokens\":18,\"cache_creation_input_tokens\":0,\"cache_read_input_tokens\":0,\"output_tokens\":5}             }\n\nevent: message_stop\ndata: {\"type\":\"message_stop\"         }\n\n"
+      }
+    }
+  ]
+}
--- a/packages/llm/test/fixtures/recordings/anthropic-messages/streams-tool-call.json
+++ b/packages/llm/test/fixtures/recordings/anthropic-messages/streams-tool-call.json
@@ -0,0 +1,33 @@
+{
+  "version": 1,
+  "metadata": {
+    "name": "anthropic-messages/streams-tool-call",
+    "recordedAt": "2026-04-28T21:18:46.878Z",
+    "tags": [
+      "prefix:anthropic-messages",
+      "provider:anthropic",
+      "protocol:anthropic-messages",
+      "tool"
+    ]
+  },
+  "interactions": [
+    {
+      "request": {
+        "method": "POST",
+        "url": "https://api.anthropic.com/v1/messages",
+        "headers": {
+          "anthropic-version": "2023-06-01",
+          "content-type": "application/json"
+        },
+        "body": "{\"model\":\"claude-haiku-4-5-20251001\",\"system\":[{\"type\":\"text\",\"text\":\"Call tools exactly as requested.\"}],\"messages\":[{\"role\":\"user\",\"content\":[{\"type\":\"text\",\"text\":\"Call get_weather with city exactly Paris.\"}]}],\"tools\":[{\"name\":\"get_weather\",\"description\":\"Get current weather for a city.\",\"input_schema\":{\"type\":\"object\",\"properties\":{\"city\":{\"type\":\"string\"}},\"required\":[\"city\"],\"additionalProperties\":false}}],\"tool_choice\":{\"type\":\"tool\",\"name\":\"get_weather\"},\"stream\":true,\"max_tokens\":80,\"temperature\":0}"
+      },
+      "response": {
+        "status": 200,
+        "headers": {
+          "content-type": "text/event-stream; charset=utf-8"
+        },
+        "body": "event: message_start\ndata: {\"type\":\"message_start\",\"message\":{\"model\":\"claude-haiku-4-5-20251001\",\"id\":\"msg_01RYgU7NUPMK4B9v8S7gVpCS\",\"type\":\"message\",\"role\":\"assistant\",\"content\":[],\"stop_reason\":null,\"stop_sequence\":null,\"stop_details\":null,\"usage\":{\"input_tokens\":677,\"cache_creation_input_tokens\":0,\"cache_read_input_tokens\":0,\"cache_creation\":{\"ephemeral_5m_input_tokens\":0,\"ephemeral_1h_input_tokens\":0},\"output_tokens\":16,\"service_tier\":\"standard\",\"inference_geo\":\"not_available\"}}             }\n\nevent: content_block_start\ndata: {\"type\":\"content_block_start\",\"index\":0,\"content_block\":{\"type\":\"tool_use\",\"id\":\"toolu_012rmAruviySvUXSjgCPWVRu\",\"name\":\"get_weather\",\"input\":{},\"caller\":{\"type\":\"direct\"}}      }\n\nevent: ping\ndata: {\"type\": \"ping\"}\n\nevent: content_block_delta\ndata: {\"type\":\"content_block_delta\",\"index\":0,\"delta\":{\"type\":\"input_json_delta\",\"partial_json\":\"\"}             }\n\nevent: content_block_delta\ndata: {\"type\":\"content_block_delta\",\"index\":0,\"delta\":{\"type\":\"input_json_delta\",\"partial_json\":\"{\\\"city\\\":\"}    }\n\nevent: content_block_delta\ndata: {\"type\":\"content_block_delta\",\"index\":0,\"delta\":{\"type\":\"input_json_delta\",\"partial_json\":\" \\\"Paris\\\"}\"}     }\n\nevent: content_block_stop\ndata: {\"type\":\"content_block_stop\",\"index\":0     }\n\nevent: message_delta\ndata: {\"type\":\"message_delta\",\"delta\":{\"stop_reason\":\"tool_use\",\"stop_sequence\":null,\"stop_details\":null},\"usage\":{\"input_tokens\":677,\"cache_creation_input_tokens\":0,\"cache_read_input_tokens\":0,\"output_tokens\":33}              }\n\nevent: message_stop\ndata: {\"type\":\"message_stop\"     }\n\n"
+      }
+    }
+  ]
+}
--- a/packages/llm/test/fixtures/recordings/bedrock-converse/streams-a-tool-call.json
+++ b/packages/llm/test/fixtures/recordings/bedrock-converse/streams-a-tool-call.json
@@ -0,0 +1,33 @@
+{
+  "version": 1,
+  "metadata": {
+    "name": "bedrock-converse/streams-a-tool-call",
+    "recordedAt": "2026-04-28T21:18:46.929Z",
+    "tags": [
+      "prefix:bedrock-converse",
+      "provider:amazon-bedrock",
+      "protocol:bedrock-converse",
+      "tool"
+    ]
+  },
+  "interactions": [
+    {
+      "request": {
+        "method": "POST",
+        "url": "https://bedrock-runtime.us-east-1.amazonaws.com/model/us.amazon.nova-micro-v1%3A0/converse-stream",
+        "headers": {
+          "content-type": "application/json"
+        },
+        "body": "{\"modelId\":\"us.amazon.nova-micro-v1:0\",\"messages\":[{\"role\":\"user\",\"content\":[{\"text\":\"Call get_weather with city exactly Paris.\"}]}],\"system\":[{\"text\":\"Call tools exactly as requested.\"}],\"inferenceConfig\":{\"maxTokens\":80,\"temperature\":0},\"toolConfig\":{\"tools\":[{\"toolSpec\":{\"name\":\"get_weather\",\"description\":\"Get current weather for a city.\",\"inputSchema\":{\"json\":{\"type\":\"object\",\"properties\":{\"city\":{\"type\":\"string\"}},\"required\":[\"city\"],\"additionalProperties\":false}}}}],\"toolChoice\":{\"tool\":{\"name\":\"get_weather\"}}}}"
+      },
+      "response": {
+        "status": 200,
+        "headers": {
+          "content-type": "application/vnd.amazon.eventstream"
+        },
+        "body": "AAAAuQAAAFL9kIXUCzpldmVudC10eXBlBwAMbWVzc2FnZVN0YXJ0DTpjb250ZW50LXR5cGUHABBhcHBsaWNhdGlvbi9qc29uDTptZXNzYWdlLXR5cGUHAAVldmVudHsicCI6ImFiY2RlZmdoaWprbG1ub3BxcnN0dXZ3eHl6QUJDREVGR0hJSktMTU5PUFFSU1RVVldYWVowMTIzNDU2NyIsInJvbGUiOiJhc3Npc3RhbnQifWf51EkAAAEMAAAAV56BJZoLOmV2ZW50LXR5cGUHABFjb250ZW50QmxvY2tTdGFydA06Y29udGVudC10eXBlBwAQYXBwbGljYXRpb24vanNvbg06bWVzc2FnZS10eXBlBwAFZXZlbnR7ImNvbnRlbnRCbG9ja0luZGV4IjowLCJwIjoiYWJjZGVmZ2hpamtsbW5vcHFyc3R1dnd4eXpBQkNERUZHSElKS0xNTk9QUVJTVFUiLCJzdGFydCI6eyJ0b29sVXNlIjp7Im5hbWUiOiJnZXRfd2VhdGhlciIsInRvb2xVc2VJZCI6InRvb2x1c2VfNmExcFB2bmM5OUdMS08zS0drVUEyTiJ9fX2LR7PFAAAA4gAAAFfCOY+BCzpldmVudC10eXBlBwARY29udGVudEJsb2NrRGVsdGENOmNvbnRlbnQtdHlwZQcAEGFwcGxpY2F0aW9uL2pzb24NOm1lc3NhZ2UtdHlwZQcABWV2ZW50eyJjb250ZW50QmxvY2tJbmRleCI6MCwiZGVsdGEiOnsidG9vbFVzZSI6eyJpbnB1dCI6IntcImNpdHlcIjpcIlBhcmlzXCJ9In19LCJwIjoiYWJjZGVmZ2hpamtsbW5vcHFyc3R1dnd4eXpBQkNERUZHSElKS0xNTiJ9RkW+2gAAAIcAAABW5OxHKgs6ZXZlbnQtdHlwZQcAEGNvbnRlbnRCbG9ja1N0b3ANOmNvbnRlbnQtdHlwZQcAEGFwcGxpY2F0aW9uL2pzb24NOm1lc3NhZ2UtdHlwZQcABWV2ZW50eyJjb250ZW50QmxvY2tJbmRleCI6MCwicCI6ImFiYyJ9y6nrtwAAAK4AAABRtlmf/As6ZXZlbnQtdHlwZQcAC21lc3NhZ2VTdG9wDTpjb250ZW50LXR5cGUHABBhcHBsaWNhdGlvbi9qc29uDTptZXNzYWdlLXR5cGUHAAVldmVudHsicCI6ImFiY2RlZmdoaWprbG1ub3BxcnN0dXZ3eHl6QUJDREVGR0hJSktMTU5PUFFSUyIsInN0b3BSZWFzb24iOiJ0b29sX3VzZSJ9MTlQawAAAOIAAABOplInQQs6ZXZlbnQtdHlwZQcACG1ldGFkYXRhDTpjb250ZW50LXR5cGUHABBhcHBsaWNhdGlvbi9qc29uDTptZXNzYWdlLXR5cGUHAAVldmVudHsibWV0cmljcyI6eyJsYXRlbmN5TXMiOjM1NX0sInAiOiJhYmNkZWZnaGlqayIsInVzYWdlIjp7ImlucHV0VG9rZW5zIjo0MTksIm91dHB1dFRva2VucyI6MTYsInNlcnZlclRvb2xVc2FnZSI6e30sInRvdGFsVG9rZW5zIjo0MzV9fU1tVJc=",
+        "bodyEncoding": "base64"
+      }
+    }
+  ]
+}
--- a/packages/llm/test/fixtures/recordings/bedrock-converse/streams-text.json
+++ b/packages/llm/test/fixtures/recordings/bedrock-converse/streams-text.json
@@ -0,0 +1,32 @@
+{
+  "version": 1,
+  "metadata": {
+    "name": "bedrock-converse/streams-text",
+    "recordedAt": "2026-04-28T21:18:46.553Z",
+    "tags": [
+      "prefix:bedrock-converse",
+      "provider:amazon-bedrock",
+      "protocol:bedrock-converse"
+    ]
+  },
+  "interactions": [
+    {
+      "request": {
+        "method": "POST",
+        "url": "https://bedrock-runtime.us-east-1.amazonaws.com/model/us.amazon.nova-micro-v1%3A0/converse-stream",
+        "headers": {
+          "content-type": "application/json"
+        },
+        "body": "{\"modelId\":\"us.amazon.nova-micro-v1:0\",\"messages\":[{\"role\":\"user\",\"content\":[{\"text\":\"Say hello.\"}]}],\"system\":[{\"text\":\"Reply with the single word 'Hello'.\"}],\"inferenceConfig\":{\"maxTokens\":16,\"temperature\":0}}"
+      },
+      "response": {
+        "status": 200,
+        "headers": {
+          "content-type": "application/vnd.amazon.eventstream"
+        },
+        "body": "AAAAmQAAAFI8UarQCzpldmVudC10eXBlBwAMbWVzc2FnZVN0YXJ0DTpjb250ZW50LXR5cGUHABBhcHBsaWNhdGlvbi9qc29uDTptZXNzYWdlLXR5cGUHAAVldmVudHsicCI6ImFiY2RlZmdoaWprbG1ub3BxcnN0dXZ3eHl6QUIiLCJyb2xlIjoiYXNzaXN0YW50In3SL1jNAAAAvQAAAFd4etebCzpldmVudC10eXBlBwARY29udGVudEJsb2NrRGVsdGENOmNvbnRlbnQtdHlwZQcAEGFwcGxpY2F0aW9uL2pzb24NOm1lc3NhZ2UtdHlwZQcABWV2ZW50eyJjb250ZW50QmxvY2tJbmRleCI6MCwiZGVsdGEiOnsidGV4dCI6IkhlbGxvIn0sInAiOiJhYmNkZWZnaGlqa2xtbm9wcXJzdHV2d3h5ekFCQ0RFIn2B0NR6AAAAxgAAAFf2eAZFCzpldmVudC10eXBlBwARY29udGVudEJsb2NrRGVsdGENOmNvbnRlbnQtdHlwZQcAEGFwcGxpY2F0aW9uL2pzb24NOm1lc3NhZ2UtdHlwZQcABWV2ZW50eyJjb250ZW50QmxvY2tJbmRleCI6MCwiZGVsdGEiOnsidGV4dCI6IiJ9LCJwIjoiYWJjZGVmZ2hpamtsbW5vcHFyc3R1dnd4eXpBQkNERUZHSElKS0xNTk9QUVJTIn3XaHMvAAAAhwAAAFbk7EcqCzpldmVudC10eXBlBwAQY29udGVudEJsb2NrU3RvcA06Y29udGVudC10eXBlBwAQYXBwbGljYXRpb24vanNvbg06bWVzc2FnZS10eXBlBwAFZXZlbnR7ImNvbnRlbnRCbG9ja0luZGV4IjowLCJwIjoiYWJjIn3Lqeu3AAAAjwAAAFFK+JlICzpldmVudC10eXBlBwALbWVzc2FnZVN0b3ANOmNvbnRlbnQtdHlwZQcAEGFwcGxpY2F0aW9uL2pzb24NOm1lc3NhZ2UtdHlwZQcABWV2ZW50eyJwIjoiYWJjZGVmZ2hpamtsbW4iLCJzdG9wUmVhc29uIjoiZW5kX3R1cm4ifZ+RQqEAAAECAAAATkXaMzsLOmV2ZW50LXR5cGUHAAhtZXRhZGF0YQ06Y29udGVudC10eXBlBwAQYXBwbGljYXRpb24vanNvbg06bWVzc2FnZS10eXBlBwAFZXZlbnR7Im1ldHJpY3MiOnsibGF0ZW5jeU1zIjozMDZ9LCJwIjoiYWJjZGVmZ2hpamtsbW5vcHFyc3R1dnd4eXpBQkNERUZHSElKS0xNTk9QUVJTVCIsInVzYWdlIjp7ImlucHV0VG9rZW5zIjoxMiwib3V0cHV0VG9rZW5zIjoyLCJzZXJ2ZXJUb29sVXNhZ2UiOnt9LCJ0b3RhbFRva2VucyI6MTR9fSnnkUk=",
+        "bodyEncoding": "base64"
+      }
+    }
+  ]
+}
--- a/packages/llm/test/fixtures/recordings/gemini/streams-text.json
+++ b/packages/llm/test/fixtures/recordings/gemini/streams-text.json
@@ -0,0 +1,31 @@
+{
+  "version": 1,
+  "metadata": {
+    "name": "gemini/streams-text",
+    "recordedAt": "2026-04-28T21:18:47.483Z",
+    "tags": [
+      "prefix:gemini",
+      "provider:google",
+      "protocol:gemini"
+    ]
+  },
+  "interactions": [
+    {
+      "request": {
+        "method": "POST",
+        "url": "https://generativelanguage.googleapis.com/v1beta/models/gemini-2.5-flash:streamGenerateContent?alt=sse",
+        "headers": {
+          "content-type": "application/json"
+        },
+        "body": "{\"contents\":[{\"role\":\"user\",\"parts\":[{\"text\":\"Reply with exactly: Hello!\"}]}],\"systemInstruction\":{\"parts\":[{\"text\":\"You are concise.\"}]},\"generationConfig\":{\"maxOutputTokens\":80,\"temperature\":0}}"
+      },
+      "response": {
+        "status": 200,
+        "headers": {
+          "content-type": "text/event-stream"
+        },
+        "body": "data: {\"candidates\": [{\"content\": {\"parts\": [{\"text\": \"Hello!\"}],\"role\": \"model\"},\"finishReason\": \"STOP\",\"index\": 0}],\"usageMetadata\": {\"promptTokenCount\": 11,\"candidatesTokenCount\": 2,\"totalTokenCount\": 29,\"promptTokensDetails\": [{\"modality\": \"TEXT\",\"tokenCount\": 11}],\"thoughtsTokenCount\": 16},\"modelVersion\": \"gemini-2.5-flash\",\"responseId\": \"NyTxaczMAZ-b_uMP6u--iQg\"}\r\n\r\n"
+      }
+    }
+  ]
+}
--- a/packages/llm/test/fixtures/recordings/gemini/streams-tool-call.json
+++ b/packages/llm/test/fixtures/recordings/gemini/streams-tool-call.json
@@ -0,0 +1,32 @@
+{
+  "version": 1,
+  "metadata": {
+    "name": "gemini/streams-tool-call",
+    "recordedAt": "2026-04-28T21:18:48.285Z",
+    "tags": [
+      "prefix:gemini",
+      "provider:google",
+      "protocol:gemini",
+      "tool"
+    ]
+  },
+  "interactions": [
+    {
+      "request": {
+        "method": "POST",
+        "url": "https://generativelanguage.googleapis.com/v1beta/models/gemini-2.5-flash:streamGenerateContent?alt=sse",
+        "headers": {
+          "content-type": "application/json"
+        },
+        "body": "{\"contents\":[{\"role\":\"user\",\"parts\":[{\"text\":\"Call get_weather with city exactly Paris.\"}]}],\"systemInstruction\":{\"parts\":[{\"text\":\"Call tools exactly as requested.\"}]},\"tools\":[{\"functionDeclarations\":[{\"name\":\"get_weather\",\"description\":\"Get current weather for a city.\",\"parameters\":{\"required\":[\"city\"],\"type\":\"object\",\"properties\":{\"city\":{\"type\":\"string\"}}}}]}],\"toolConfig\":{\"functionCallingConfig\":{\"mode\":\"ANY\",\"allowedFunctionNames\":[\"get_weather\"]}},\"generationConfig\":{\"maxOutputTokens\":80,\"temperature\":0}}"
+      },
+      "response": {
+        "status": 200,
+        "headers": {
+          "content-type": "text/event-stream"
+        },
+        "body": "data: {\"candidates\": [{\"content\": {\"parts\": [{\"functionCall\": {\"name\": \"get_weather\",\"args\": {\"city\": \"Paris\"}},\"thoughtSignature\": \"CiQBDDnWx5RcSsS1UMbykQ5HWlrMu6wrxXGUhmZ0uRKLaMhDZaEKXwEMOdbHVoJAlfbOQyKB378pDZ/gkjWr3HP+dWw1us1kMG22g4G3oJvuTq/SrWS+7KYtSlvOxCKhW2l/2/TczpyGyGmANmsusDcxF1SKOYA5/8Hg0nI24MAlT3+91V/MCoUBAQw51seClFLy3E71v2H44F1kpmjgz8FeTRZofrjbaazfrT+w8Yxgdr3UgGagLMY4OadZemQTWckq9IAqRum78hrBg6NGtQvn15SbtfTNqI4PcxX/+qPo4/g4/ZT5kVORDhVqO8BVP/RA5GQ3ce3sRK8hSkvQlXSoXIPpHh6x7hBezIGXzw==\"}],\"role\": \"model\"},\"finishReason\": \"STOP\",\"index\": 0,\"finishMessage\": \"Model generated function call(s).\"}],\"usageMetadata\": {\"promptTokenCount\": 55,\"candidatesTokenCount\": 15,\"totalTokenCount\": 115,\"promptTokensDetails\": [{\"modality\": \"TEXT\",\"tokenCount\": 55}],\"thoughtsTokenCount\": 45},\"modelVersion\": \"gemini-2.5-flash\",\"responseId\": \"NyTxaYuTJ_OW_uMPgIPKgAg\"}\r\n\r\n"
+      }
+    }
+  ]
+}
--- a/packages/llm/test/fixtures/recordings/openai-chat/continues-after-tool-result.json
+++ b/packages/llm/test/fixtures/recordings/openai-chat/continues-after-tool-result.json
--- a/packages/llm/test/fixtures/recordings/openai-chat/drives-a-tool-loop-end-to-end.json
+++ b/packages/llm/test/fixtures/recordings/openai-chat/drives-a-tool-loop-end-to-end.json
--- a/packages/llm/test/fixtures/recordings/openai-chat/streams-text.json
+++ b/packages/llm/test/fixtures/recordings/openai-chat/streams-text.json
@@ -0,0 +1,31 @@
+{
+  "version": 1,
+  "metadata": {
+    "name": "openai-chat/streams-text",
+    "recordedAt": "2026-04-28T21:18:36.916Z",
+    "tags": [
+      "prefix:openai-chat",
+      "provider:openai",
+      "protocol:openai-chat"
+    ]
+  },
+  "interactions": [
+    {
+      "request": {
+        "method": "POST",
+        "url": "https://api.openai.com/v1/chat/completions",
+        "headers": {
+          "content-type": "application/json"
+        },
+        "body": "{\"model\":\"gpt-4o-mini\",\"messages\":[{\"role\":\"system\",\"content\":\"You are concise.\"},{\"role\":\"user\",\"content\":\"Say hello in one short sentence.\"}],\"stream\":true,\"stream_options\":{\"include_usage\":true},\"max_tokens\":20,\"temperature\":0}"
+      },
+      "response": {
+        "status": 200,
+        "headers": {
+          "content-type": "text/event-stream; charset=utf-8"
+        },
+        "body": "data: {\"id\":\"chatcmpl-DZk7AS6GyEHvGu6oglm0lRAVPLKVl\",\"object\":\"chat.completion.chunk\",\"created\":1777411116,\"model\":\"gpt-4o-mini-2024-07-18\",\"service_tier\":\"default\",\"system_fingerprint\":\"fp_c42fec8f39\",\"choices\":[{\"index\":0,\"delta\":{\"role\":\"assistant\",\"content\":\"\",\"refusal\":null},\"logprobs\":null,\"finish_reason\":null}],\"usage\":null,\"obfuscation\":\"e2lwm6DLm\"}\n\ndata: {\"id\":\"chatcmpl-DZk7AS6GyEHvGu6oglm0lRAVPLKVl\",\"object\":\"chat.completion.chunk\",\"created\":1777411116,\"model\":\"gpt-4o-mini-2024-07-18\",\"service_tier\":\"default\",\"system_fingerprint\":\"fp_c42fec8f39\",\"choices\":[{\"index\":0,\"delta\":{\"content\":\"Hello\"},\"logprobs\":null,\"finish_reason\":null}],\"usage\":null,\"obfuscation\":\"LMrPYw\"}\n\ndata: {\"id\":\"chatcmpl-DZk7AS6GyEHvGu6oglm0lRAVPLKVl\",\"object\":\"chat.completion.chunk\",\"created\":1777411116,\"model\":\"gpt-4o-mini-2024-07-18\",\"service_tier\":\"default\",\"system_fingerprint\":\"fp_c42fec8f39\",\"choices\":[{\"index\":0,\"delta\":{\"content\":\"!\"},\"logprobs\":null,\"finish_reason\":null}],\"usage\":null,\"obfuscation\":\"bJfqjLPNB4\"}\n\ndata: {\"id\":\"chatcmpl-DZk7AS6GyEHvGu6oglm0lRAVPLKVl\",\"object\":\"chat.completion.chunk\",\"created\":1777411116,\"model\":\"gpt-4o-mini-2024-07-18\",\"service_tier\":\"default\",\"system_fingerprint\":\"fp_c42fec8f39\",\"choices\":[{\"index\":0,\"delta\":{},\"logprobs\":null,\"finish_reason\":\"stop\"}],\"usage\":null,\"obfuscation\":\"P3gO2\"}\n\ndata: {\"id\":\"chatcmpl-DZk7AS6GyEHvGu6oglm0lRAVPLKVl\",\"object\":\"chat.completion.chunk\",\"created\":1777411116,\"model\":\"gpt-4o-mini-2024-07-18\",\"service_tier\":\"default\",\"system_fingerprint\":\"fp_c42fec8f39\",\"choices\":[],\"usage\":{\"prompt_tokens\":22,\"completion_tokens\":2,\"total_tokens\":24,\"prompt_tokens_details\":{\"cached_tokens\":0,\"audio_tokens\":0},\"completion_tokens_details\":{\"reasoning_tokens\":0,\"audio_tokens\":0,\"accepted_prediction_tokens\":0,\"rejected_prediction_tokens\":0}},\"obfuscation\":\"lVqas0bcjNx\"}\n\ndata: [DONE]\n\n"
+      }
+    }
+  ]
+}
--- a/packages/llm/test/fixtures/recordings/openai-chat/streams-tool-call.json
+++ b/packages/llm/test/fixtures/recordings/openai-chat/streams-tool-call.json
@@ -0,0 +1,32 @@
+{
+  "version": 1,
+  "metadata": {
+    "name": "openai-chat/streams-tool-call",
+    "recordedAt": "2026-04-28T21:18:38.053Z",
+    "tags": [
+      "prefix:openai-chat",
+      "provider:openai",
+      "protocol:openai-chat",
+      "tool"
+    ]
+  },
+  "interactions": [
+    {
+      "request": {
+        "method": "POST",
+        "url": "https://api.openai.com/v1/chat/completions",
+        "headers": {
+          "content-type": "application/json"
+        },
+        "body": "{\"model\":\"gpt-4o-mini\",\"messages\":[{\"role\":\"system\",\"content\":\"Call tools exactly as requested.\"},{\"role\":\"user\",\"content\":\"Call get_weather with city exactly Paris.\"}],\"tools\":[{\"type\":\"function\",\"function\":{\"name\":\"get_weather\",\"description\":\"Get current weather for a city.\",\"parameters\":{\"type\":\"object\",\"properties\":{\"city\":{\"type\":\"string\"}},\"required\":[\"city\"],\"additionalProperties\":false}}}],\"tool_choice\":{\"type\":\"function\",\"function\":{\"name\":\"get_weather\"}},\"stream\":true,\"max_tokens\":80,\"temperature\":0}"
+      },
+      "response": {
+        "status": 200,
+        "headers": {
+          "content-type": "text/event-stream; charset=utf-8"
+        },
+        "body": "data: {\"id\":\"chatcmpl-DZk7BOHcY0wpwDDyT46mnFuldPW7H\",\"object\":\"chat.completion.chunk\",\"created\":1777411117,\"model\":\"gpt-4o-mini-2024-07-18\",\"service_tier\":\"default\",\"system_fingerprint\":\"fp_b86b5e7355\",\"choices\":[{\"index\":0,\"delta\":{\"role\":\"assistant\",\"content\":null,\"tool_calls\":[{\"index\":0,\"id\":\"call_63S0l2F1i8sv9LmBLJ2eNAYS\",\"type\":\"function\",\"function\":{\"name\":\"get_weather\",\"arguments\":\"\"}}],\"refusal\":null},\"logprobs\":null,\"finish_reason\":null}],\"obfuscation\":\"0\"}\n\ndata: {\"id\":\"chatcmpl-DZk7BOHcY0wpwDDyT46mnFuldPW7H\",\"object\":\"chat.completion.chunk\",\"created\":1777411117,\"model\":\"gpt-4o-mini-2024-07-18\",\"service_tier\":\"default\",\"system_fingerprint\":\"fp_b86b5e7355\",\"choices\":[{\"index\":0,\"delta\":{\"tool_calls\":[{\"index\":0,\"function\":{\"arguments\":\"{\\\"\"}}]},\"logprobs\":null,\"finish_reason\":null}],\"obfuscation\":\"2MSm0yVFD22\"}\n\ndata: {\"id\":\"chatcmpl-DZk7BOHcY0wpwDDyT46mnFuldPW7H\",\"object\":\"chat.completion.chunk\",\"created\":1777411117,\"model\":\"gpt-4o-mini-2024-07-18\",\"service_tier\":\"default\",\"system_fingerprint\":\"fp_b86b5e7355\",\"choices\":[{\"index\":0,\"delta\":{\"tool_calls\":[{\"index\":0,\"function\":{\"arguments\":\"city\"}}]},\"logprobs\":null,\"finish_reason\":null}],\"obfuscation\":\"47VRigngpL\"}\n\ndata: {\"id\":\"chatcmpl-DZk7BOHcY0wpwDDyT46mnFuldPW7H\",\"object\":\"chat.completion.chunk\",\"created\":1777411117,\"model\":\"gpt-4o-mini-2024-07-18\",\"service_tier\":\"default\",\"system_fingerprint\":\"fp_b86b5e7355\",\"choices\":[{\"index\":0,\"delta\":{\"tool_calls\":[{\"index\":0,\"function\":{\"arguments\":\"\\\":\\\"\"}}]},\"logprobs\":null,\"finish_reason\":null}],\"obfuscation\":\"ZDLNnsyrQ\"}\n\ndata: {\"id\":\"chatcmpl-DZk7BOHcY0wpwDDyT46mnFuldPW7H\",\"object\":\"chat.completion.chunk\",\"created\":1777411117,\"model\":\"gpt-4o-mini-2024-07-18\",\"service_tier\":\"default\",\"system_fingerprint\":\"fp_b86b5e7355\",\"choices\":[{\"index\":0,\"delta\":{\"tool_calls\":[{\"index\":0,\"function\":{\"arguments\":\"Paris\"}}]},\"logprobs\":null,\"finish_reason\":null}],\"obfuscation\":\"EnjgG1OLD\"}\n\ndata: {\"id\":\"chatcmpl-DZk7BOHcY0wpwDDyT46mnFuldPW7H\",\"object\":\"chat.completion.chunk\",\"created\":1777411117,\"model\":\"gpt-4o-mini-2024-07-18\",\"service_tier\":\"default\",\"system_fingerprint\":\"fp_b86b5e7355\",\"choices\":[{\"index\":0,\"delta\":{\"tool_calls\":[{\"index\":0,\"function\":{\"arguments\":\"\\\"}\"}}]},\"logprobs\":null,\"finish_reason\":null}],\"obfuscation\":\"fnJiTWAyEwL\"}\n\ndata: {\"id\":\"chatcmpl-DZk7BOHcY0wpwDDyT46mnFuldPW7H\",\"object\":\"chat.completion.chunk\",\"created\":1777411117,\"model\":\"gpt-4o-mini-2024-07-18\",\"service_tier\":\"default\",\"system_fingerprint\":\"fp_b86b5e7355\",\"choices\":[{\"index\":0,\"delta\":{},\"logprobs\":null,\"finish_reason\":\"stop\"}],\"obfuscation\":\"V8\"}\n\ndata: [DONE]\n\n"
+      }
+    }
+  ]
+}
--- a/packages/llm/test/fixtures/recordings/openai-compatible-chat/deepseek-streams-text.json
+++ b/packages/llm/test/fixtures/recordings/openai-compatible-chat/deepseek-streams-text.json
@@ -0,0 +1,31 @@
+{
+  "version": 1,
+  "metadata": {
+    "name": "openai-compatible-chat/deepseek-streams-text",
+    "recordedAt": "2026-04-28T21:18:49.498Z",
+    "tags": [
+      "prefix:openai-compatible-chat",
+      "protocol:openai-compatible-chat",
+      "provider:deepseek"
+    ]
+  },
+  "interactions": [
+    {
+      "request": {
+        "method": "POST",
+        "url": "https://api.deepseek.com/v1/chat/completions",
+        "headers": {
+          "content-type": "application/json"
+        },
+        "body": "{\"model\":\"deepseek-chat\",\"messages\":[{\"role\":\"system\",\"content\":\"You are concise.\"},{\"role\":\"user\",\"content\":\"Reply with exactly: Hello!\"}],\"stream\":true,\"max_tokens\":20,\"temperature\":0}"
+      },
+      "response": {
+        "status": 200,
+        "headers": {
+          "content-type": "text/event-stream; charset=utf-8"
+        },
+        "body": "data: {\"id\":\"0c811926-1e0c-4160-baf8-6e71247c8ad7\",\"object\":\"chat.completion.chunk\",\"created\":1777411128,\"model\":\"deepseek-v4-flash\",\"system_fingerprint\":\"fp_058df29938_prod0820_fp8_kvcache_20260402\",\"choices\":[{\"index\":0,\"delta\":{\"role\":\"assistant\",\"content\":\"\"},\"logprobs\":null,\"finish_reason\":null}]}\n\ndata: {\"id\":\"0c811926-1e0c-4160-baf8-6e71247c8ad7\",\"object\":\"chat.completion.chunk\",\"created\":1777411128,\"model\":\"deepseek-v4-flash\",\"system_fingerprint\":\"fp_058df29938_prod0820_fp8_kvcache_20260402\",\"choices\":[{\"index\":0,\"delta\":{\"content\":\"Hello\"},\"logprobs\":null,\"finish_reason\":null}]}\n\ndata: {\"id\":\"0c811926-1e0c-4160-baf8-6e71247c8ad7\",\"object\":\"chat.completion.chunk\",\"created\":1777411128,\"model\":\"deepseek-v4-flash\",\"system_fingerprint\":\"fp_058df29938_prod0820_fp8_kvcache_20260402\",\"choices\":[{\"index\":0,\"delta\":{\"content\":\"!\"},\"logprobs\":null,\"finish_reason\":null}]}\n\ndata: {\"id\":\"0c811926-1e0c-4160-baf8-6e71247c8ad7\",\"object\":\"chat.completion.chunk\",\"created\":1777411128,\"model\":\"deepseek-v4-flash\",\"system_fingerprint\":\"fp_058df29938_prod0820_fp8_kvcache_20260402\",\"choices\":[{\"index\":0,\"delta\":{\"content\":\"\"},\"logprobs\":null,\"finish_reason\":\"stop\"}],\"usage\":{\"prompt_tokens\":14,\"completion_tokens\":2,\"total_tokens\":16,\"prompt_tokens_details\":{\"cached_tokens\":0},\"prompt_cache_hit_tokens\":0,\"prompt_cache_miss_tokens\":14}}\n\ndata: [DONE]\n\n"
+      }
+    }
+  ]
+}
--- a/packages/llm/test/fixtures/recordings/openai-compatible-chat/togetherai-streams-text.json
+++ b/packages/llm/test/fixtures/recordings/openai-compatible-chat/togetherai-streams-text.json
@@ -0,0 +1,31 @@
+{
+  "version": 1,
+  "metadata": {
+    "name": "openai-compatible-chat/togetherai-streams-text",
+    "recordedAt": "2026-04-28T21:18:55.266Z",
+    "tags": [
+      "prefix:openai-compatible-chat",
+      "protocol:openai-compatible-chat",
+      "provider:togetherai"
+    ]
+  },
+  "interactions": [
+    {
+      "request": {
+        "method": "POST",
+        "url": "https://api.together.xyz/v1/chat/completions",
+        "headers": {
+          "content-type": "application/json"
+        },
+        "body": "{\"model\":\"meta-llama/Llama-3.3-70B-Instruct-Turbo\",\"messages\":[{\"role\":\"system\",\"content\":\"You are concise.\"},{\"role\":\"user\",\"content\":\"Reply with exactly: Hello!\"}],\"stream\":true,\"max_tokens\":20,\"temperature\":0}"
+      },
+      "response": {
+        "status": 200,
+        "headers": {
+          "content-type": "text/event-stream;charset=utf-8"
+        },
+        "body": "data: {\"id\":\"ogzjdpL-6Ng1vN-9f391a08f8af75e1\",\"object\":\"chat.completion.chunk\",\"created\":1777411129,\"choices\":[{\"index\":0,\"text\":\"Hello\",\"logprobs\":null,\"finish_reason\":null,\"seed\":null,\"delta\":{\"token_id\":9906,\"role\":\"assistant\",\"content\":\"Hello\"}}],\"model\":\"meta-llama/Llama-3.3-70B-Instruct-Turbo\",\"usage\":null}\n\ndata: {\"id\":\"ogzjdpL-6Ng1vN-9f391a08f8af75e1\",\"object\":\"chat.completion.chunk\",\"created\":1777411129,\"choices\":[{\"index\":0,\"text\":\"!\",\"logprobs\":null,\"finish_reason\":null,\"seed\":null,\"delta\":{\"token_id\":null,\"role\":\"assistant\",\"content\":\"!\"}}],\"model\":\"meta-llama/Llama-3.3-70B-Instruct-Turbo\",\"usage\":null}\n\ndata: {\"id\":\"ogzjdpL-6Ng1vN-9f391a08f8af75e1\",\"object\":\"chat.completion.chunk\",\"created\":1777411129,\"choices\":[{\"index\":0,\"text\":\"\",\"logprobs\":null,\"finish_reason\":\"stop\",\"seed\":15924764223251450000,\"delta\":{\"token_id\":128009,\"role\":\"assistant\",\"content\":\"\"}}],\"model\":\"meta-llama/Llama-3.3-70B-Instruct-Turbo\",\"usage\":{\"prompt_tokens\":45,\"completion_tokens\":3,\"total_tokens\":48,\"cached_tokens\":0}}\n\ndata: [DONE]\n\n"
+      }
+    }
+  ]
+}
--- a/packages/llm/test/fixtures/recordings/openai-compatible-chat/togetherai-streams-tool-call.json
+++ b/packages/llm/test/fixtures/recordings/openai-compatible-chat/togetherai-streams-tool-call.json
@@ -0,0 +1,32 @@
+{
+  "version": 1,
+  "metadata": {
+    "name": "openai-compatible-chat/togetherai-streams-tool-call",
+    "recordedAt": "2026-04-28T21:18:59.123Z",
+    "tags": [
+      "prefix:openai-compatible-chat",
+      "protocol:openai-compatible-chat",
+      "provider:togetherai",
+      "tool"
+    ]
+  },
+  "interactions": [
+    {
+      "request": {
+        "method": "POST",
+        "url": "https://api.together.xyz/v1/chat/completions",
+        "headers": {
+          "content-type": "application/json"
+        },
+        "body": "{\"model\":\"meta-llama/Llama-3.3-70B-Instruct-Turbo\",\"messages\":[{\"role\":\"system\",\"content\":\"Call tools exactly as requested.\"},{\"role\":\"user\",\"content\":\"Call get_weather with city exactly Paris.\"}],\"tools\":[{\"type\":\"function\",\"function\":{\"name\":\"get_weather\",\"description\":\"Get current weather for a city.\",\"parameters\":{\"type\":\"object\",\"properties\":{\"city\":{\"type\":\"string\"}},\"required\":[\"city\"],\"additionalProperties\":false}}}],\"tool_choice\":{\"type\":\"function\",\"function\":{\"name\":\"get_weather\"}},\"stream\":true,\"max_tokens\":80,\"temperature\":0}"
+      },
+      "response": {
+        "status": 200,
+        "headers": {
+          "content-type": "text/event-stream;charset=utf-8"
+        },
+        "body": "data: {\"id\":\"ogzjfRD-6Ng1vN-9f391a2bb8ca75e1\",\"object\":\"chat.completion.chunk\",\"created\":1777411135,\"choices\":[{\"index\":0,\"role\":\"assistant\",\"text\":\"\",\"logprobs\":null,\"finish_reason\":null,\"delta\":{\"token_id\":null,\"role\":\"assistant\",\"content\":\"\"}}],\"model\":\"meta-llama/Llama-3.3-70B-Instruct-Turbo\"}\n\ndata: {\"id\":\"ogzjfRD-6Ng1vN-9f391a2bb8ca75e1\",\"object\":\"chat.completion.chunk\",\"created\":1777411135,\"choices\":[{\"index\":0,\"text\":\"\",\"logprobs\":null,\"finish_reason\":null,\"delta\":{\"token_id\":null,\"role\":\"assistant\",\"content\":\"\",\"tool_calls\":[{\"index\":0,\"id\":\"call_yu1mxtmex7x48nximi9c8jpo\",\"type\":\"function\",\"function\":{\"name\":\"get_weather\",\"arguments\":\"\"}}]}}],\"model\":\"meta-llama/Llama-3.3-70B-Instruct-Turbo\"}\n\ndata: {\"id\":\"ogzjfRD-6Ng1vN-9f391a2bb8ca75e1\",\"object\":\"chat.completion.chunk\",\"created\":1777411135,\"choices\":[{\"index\":0,\"text\":\"\",\"logprobs\":null,\"finish_reason\":\"tool_calls\",\"delta\":{\"token_id\":null,\"role\":\"assistant\",\"content\":\"\",\"tool_calls\":[{\"index\":0,\"function\":{\"arguments\":\"{\\\"city\\\":\\\"Paris\\\"}\"}}]}}],\"model\":\"meta-llama/Llama-3.3-70B-Instruct-Turbo\"}\n\ndata: {\"id\":\"ogzjfRD-6Ng1vN-9f391a2bb8ca75e1\",\"object\":\"chat.completion.chunk\",\"created\":1777411135,\"choices\":[{\"index\":0,\"text\":\"\",\"logprobs\":null,\"finish_reason\":\"tool_calls\",\"seed\":9033012299842426000,\"delta\":{\"token_id\":128009,\"role\":\"assistant\",\"content\":\"\"}}],\"model\":\"meta-llama/Llama-3.3-70B-Instruct-Turbo\",\"usage\":{\"prompt_tokens\":194,\"completion_tokens\":19,\"total_tokens\":213,\"cached_tokens\":0}}\n\ndata: [DONE]\n\n"
+      }
+    }
+  ]
+}
--- a/packages/llm/test/lib/effect.ts
+++ b/packages/llm/test/lib/effect.ts
@@ -0,0 +1,50 @@
+import { test, type TestOptions } from "bun:test"
+import { Cause, Effect, Exit, Layer } from "effect"
+import type * as Scope from "effect/Scope"
+import * as TestClock from "effect/testing/TestClock"
+import * as TestConsole from "effect/testing/TestConsole"
+
+type Body<A, E, R> = Effect.Effect<A, E, R> | (() => Effect.Effect<A, E, R>)
+
+const body = <A, E, R>(value: Body<A, E, R>) => Effect.suspend(() => (typeof value === "function" ? value() : value))
+
+const run = <A, E, R, E2>(value: Body<A, E, R | Scope.Scope>, layer: Layer.Layer<R, E2>) =>
+  Effect.gen(function* () {
+    const exit = yield* body(value).pipe(Effect.scoped, Effect.provide(layer), Effect.exit)
+    if (Exit.isFailure(exit)) {
+      for (const err of Cause.prettyErrors(exit.cause)) {
+        yield* Effect.logError(err)
+      }
+    }
+    return yield* exit
+  }).pipe(Effect.runPromise)
+
+const make = <R, E>(testLayer: Layer.Layer<R, E>, liveLayer: Layer.Layer<R, E>) => {
+  const effect = <A, E2>(name: string, value: Body<A, E2, R | Scope.Scope>, opts?: number | TestOptions) =>
+    test(name, () => run(value, testLayer), opts)
+
+  effect.only = <A, E2>(name: string, value: Body<A, E2, R | Scope.Scope>, opts?: number | TestOptions) =>
+    test.only(name, () => run(value, testLayer), opts)
+
+  effect.skip = <A, E2>(name: string, value: Body<A, E2, R | Scope.Scope>, opts?: number | TestOptions) =>
+    test.skip(name, () => run(value, testLayer), opts)
+
+  const live = <A, E2>(name: string, value: Body<A, E2, R | Scope.Scope>, opts?: number | TestOptions) =>
+    test(name, () => run(value, liveLayer), opts)
+
+  live.only = <A, E2>(name: string, value: Body<A, E2, R | Scope.Scope>, opts?: number | TestOptions) =>
+    test.only(name, () => run(value, liveLayer), opts)
+
+  live.skip = <A, E2>(name: string, value: Body<A, E2, R | Scope.Scope>, opts?: number | TestOptions) =>
+    test.skip(name, () => run(value, liveLayer), opts)
+
+  return { effect, live }
+}
+
+const testEnv = Layer.mergeAll(TestConsole.layer, TestClock.layer())
+const liveEnv = TestConsole.layer
+
+export const it = make(testEnv, liveEnv)
+
+export const testEffect = <R, E>(layer: Layer.Layer<R, E>) =>
+  make(Layer.provideMerge(layer, testEnv), Layer.provideMerge(layer, liveEnv))
--- a/packages/llm/test/lib/http.ts
+++ b/packages/llm/test/lib/http.ts
@@ -0,0 +1,86 @@
+import { Effect, Layer, Ref } from "effect"
+import { HttpClient, HttpClientRequest, HttpClientResponse } from "effect/unstable/http"
+import { RequestExecutor } from "../../src/executor"
+
+export type HandlerInput = {
+  readonly request: HttpClientRequest.HttpClientRequest
+  readonly text: string
+  readonly respond: (body: ConstructorParameters<typeof Response>[0], init?: ResponseInit) => HttpClientResponse.HttpClientResponse
+}
+
+export type Handler = (input: HandlerInput) => Effect.Effect<HttpClientResponse.HttpClientResponse>
+
+const handlerLayer = (handler: Handler): Layer.Layer<HttpClient.HttpClient> =>
+  Layer.succeed(
+    HttpClient.HttpClient,
+    HttpClient.make((request) =>
+      Effect.gen(function* () {
+        const web = yield* HttpClientRequest.toWeb(request).pipe(Effect.orDie)
+        const text = yield* Effect.promise(() => web.text())
+        return yield* handler({
+          request,
+          text,
+          respond: (body, init) => HttpClientResponse.fromWeb(request, new Response(body, init)),
+        })
+      }),
+    ),
+  )
+
+const executorWith = (layer: Layer.Layer<HttpClient.HttpClient>) =>
+  RequestExecutor.layer.pipe(Layer.provide(layer))
+
+const SSE_HEADERS = { "content-type": "text/event-stream" } as const
+
+/**
+ * Layer that returns a single fixed response body. Use for stream-parser
+ * fixture tests where the request shape is irrelevant. The body type widens
+ * to whatever `Response` accepts so binary fixtures (`Uint8Array`,
+ * `ReadableStream`, etc.) flow through without casts.
+ */
+export const fixedResponse = (
+  body: ConstructorParameters<typeof Response>[0],
+  init: ResponseInit = { headers: SSE_HEADERS },
+) => executorWith(handlerLayer((input) => Effect.succeed(input.respond(body, init))))
+
+/**
+ * Layer that builds a response per request. Useful for echo servers.
+ */
+export const dynamicResponse = (handler: Handler) => executorWith(handlerLayer(handler))
+
+/**
+ * Layer that emits the supplied SSE chunks and then aborts mid-stream. Used to
+ * exercise transport errors that surface during parsing.
+ */
+export const truncatedStream = (chunks: ReadonlyArray<string>) =>
+  dynamicResponse((input) =>
+    Effect.sync(() => {
+      const encoder = new TextEncoder()
+      const stream = new ReadableStream({
+        start(controller) {
+          for (const chunk of chunks) controller.enqueue(encoder.encode(chunk))
+          controller.error(new Error("connection reset"))
+        },
+      })
+      return input.respond(stream, { headers: SSE_HEADERS })
+    }),
+  )
+
+/**
+ * Layer that returns successive bodies on each request. Useful for scripting
+ * multi-step model exchanges (e.g. tool-call loops). The last body in the
+ * array is reused if the test makes more requests than scripted.
+ */
+export const scriptedResponses = (bodies: ReadonlyArray<string>, init: ResponseInit = { headers: SSE_HEADERS }) => {
+  if (bodies.length === 0) throw new Error("scriptedResponses requires at least one body")
+  return Layer.unwrap(
+    Effect.gen(function* () {
+      const cursor = yield* Ref.make(0)
+      return dynamicResponse((input) =>
+        Effect.gen(function* () {
+          const index = yield* Ref.getAndUpdate(cursor, (n) => n + 1)
+          return input.respond(bodies[index] ?? bodies[bodies.length - 1], init)
+        }),
+      )
+    }),
+  )
+}
--- a/packages/llm/test/lib/openai-chunks.ts
+++ b/packages/llm/test/lib/openai-chunks.ts
@@ -0,0 +1,27 @@
+/**
+ * Shared chunk shapes for OpenAI Chat / OpenAI-compatible Chat fixture tests.
+ * Multiple test files build the same `{ id, choices: [{ delta, finish_reason }], usage }`
+ * envelope; consolidating here keeps tool-call event shapes consistent.
+ */
+
+const FIXTURE_ID = "chatcmpl_fixture"
+
+export const deltaChunk = (delta: object, finishReason: string | null = null) => ({
+  id: FIXTURE_ID,
+  choices: [{ delta, finish_reason: finishReason }],
+  usage: null,
+})
+
+export const usageChunk = (usage: object) => ({
+  id: FIXTURE_ID,
+  choices: [],
+  usage,
+})
+
+export const finishChunk = (reason: string) => deltaChunk({}, reason)
+
+export const toolCallChunk = (id: string, name: string, args: string, index = 0) =>
+  deltaChunk({
+    role: "assistant",
+    tool_calls: [{ index, id, function: { name, arguments: args } }],
+  })
--- a/packages/llm/test/lib/sse.ts
+++ b/packages/llm/test/lib/sse.ts
@@ -0,0 +1,20 @@
+/**
+ * Helpers for building deterministic SSE bodies in tests.
+ *
+ * Inline template-literal SSE strings are hard to write and review when chunks
+ * contain JSON; this helper accepts plain values and serializes them, so test
+ * authors only think about the chunk shapes, not the wire format.
+ */
+export const sseEvents = (
+  ...chunks: ReadonlyArray<unknown>
+): string => `${chunks.map(formatChunk).join("")}data: [DONE]\n\n`
+
+const formatChunk = (chunk: unknown) =>
+  `data: ${typeof chunk === "string" ? chunk : JSON.stringify(chunk)}\n\n`
+
+/**
+ * Build an SSE body from already-serialized strings (used when the chunk shape
+ * itself is part of what's being tested, e.g. malformed chunks).
+ */
+export const sseRaw = (...lines: ReadonlyArray<string>): string =>
+  lines.map((line) => `${line}\n\n`).join("")
--- a/packages/llm/test/llm.test.ts
+++ b/packages/llm/test/llm.test.ts
@@ -0,0 +1,74 @@
+import { describe, expect, test } from "bun:test"
+import { LLM } from "../src"
+import { LLMRequest, Message, ModelRef, ToolChoice, ToolDefinition } from "../src/schema"
+
+describe("llm constructors", () => {
+  test("builds canonical schema classes from ergonomic input", () => {
+    const request = LLM.request({
+      id: "req_1",
+      model: LLM.model({ id: "fake-model", provider: "fake", protocol: "openai-chat" }),
+      system: "You are concise.",
+      prompt: "Say hello.",
+    })
+
+    expect(request).toBeInstanceOf(LLMRequest)
+    expect(request.model).toBeInstanceOf(ModelRef)
+    expect(request.messages[0]).toBeInstanceOf(Message)
+    expect(request.system).toEqual([{ type: "text", text: "You are concise." }])
+    expect(request.messages[0]?.content).toEqual([{ type: "text", text: "Say hello." }])
+    expect(request.generation).toEqual({})
+    expect(request.tools).toEqual([])
+  })
+
+  test("updates requests without spreading schema class instances", () => {
+    const base = LLM.request({
+      id: "req_1",
+      model: LLM.model({ id: "fake-model", provider: "fake", protocol: "openai-chat" }),
+      prompt: "Say hello.",
+    })
+    const updated = LLM.updateRequest(base, {
+      generation: { maxTokens: 20 },
+      messages: [...base.messages, LLM.assistant("Hi.")],
+    })
+
+    expect(updated).toBeInstanceOf(LLMRequest)
+    expect(updated.id).toBe("req_1")
+    expect(updated.model).toEqual(base.model)
+    expect(updated.generation).toEqual({ maxTokens: 20 })
+    expect(updated.messages.map((message) => message.role)).toEqual(["user", "assistant"])
+  })
+
+  test("builds tool choices from names and tools", () => {
+    const tool = LLM.toolDefinition({ name: "lookup", description: "Lookup data", inputSchema: { type: "object" } })
+
+    expect(tool).toBeInstanceOf(ToolDefinition)
+    expect(LLM.toolChoice("lookup")).toEqual(new ToolChoice({ type: "tool", name: "lookup" }))
+    expect(LLM.toolChoiceName("required")).toEqual(new ToolChoice({ type: "tool", name: "required" }))
+    expect(LLM.toolChoice(tool)).toEqual(new ToolChoice({ type: "tool", name: "lookup" }))
+  })
+
+  test("builds tool choice modes from reserved strings", () => {
+    expect(LLM.toolChoice("auto")).toEqual(new ToolChoice({ type: "auto" }))
+    expect(LLM.toolChoice("none")).toEqual(new ToolChoice({ type: "none" }))
+    expect(LLM.toolChoice("required")).toEqual(new ToolChoice({ type: "required" }))
+    expect(LLM.request({
+      model: LLM.model({ id: "fake-model", provider: "fake", protocol: "openai-chat" }),
+      prompt: "Use tools if needed.",
+      toolChoice: "required",
+    }).toolChoice).toEqual(new ToolChoice({ type: "required" }))
+  })
+
+  test("builds assistant tool calls and tool result messages", () => {
+    const call = LLM.toolCall({ id: "call_1", name: "lookup", input: { query: "weather" } })
+    const result = LLM.toolResult({ id: "call_1", name: "lookup", result: { temperature: 72 } })
+
+    expect(LLM.assistant([call]).content).toEqual([call])
+    expect(LLM.toolMessage(result).content).toEqual([
+      { type: "tool-result", id: "call_1", name: "lookup", result: { type: "json", value: { temperature: 72 } } },
+    ])
+  })
+
+  test("extracts output text from responses", () => {
+    expect(LLM.outputText({ events: [{ type: "text-delta", text: "hi" }, { type: "request-finish", reason: "stop" }] })).toBe("hi")
+  })
+})
--- a/packages/llm/test/patch.test.ts
+++ b/packages/llm/test/patch.test.ts
@@ -0,0 +1,223 @@
+import { describe, expect, test } from "bun:test"
+import { LLM, ProviderPatch } from "../src"
+import { Model, Patch, context, plan } from "../src/patch"
+
+const request = LLM.request({
+  id: "req_1",
+  model: LLM.model({
+    id: "devstral-small",
+    provider: "mistral",
+    protocol: "openai-chat",
+  }),
+  prompt: "hi",
+})
+
+describe("llm patch", () => {
+  test("constructors prefix ids and registry groups by phase", () => {
+    const prompt = Patch.prompt("mistral.test", {
+      reason: "test prompt",
+      when: Model.provider("mistral"),
+      apply: (request) => request,
+    })
+    const target = Patch.target("fake.test", {
+      reason: "test target",
+      apply: (draft: { value: number }) => draft,
+    })
+
+    const registry = Patch.registry([prompt, target])
+
+    expect(prompt.id).toBe("prompt.mistral.test")
+    expect(target.id).toBe("target.fake.test")
+    expect(registry.prompt).toEqual([prompt])
+    expect(registry.target.map((item) => item.id)).toEqual([target.id])
+  })
+
+  test("predicates compose", () => {
+    const ctx = context({ request })
+
+    expect(Model.provider("mistral").and(Model.protocol("openai-chat"))(ctx)).toBe(true)
+    expect(Model.provider("anthropic").or(Model.idIncludes("devstral"))(ctx)).toBe(true)
+    expect(Model.provider("mistral").not()(ctx)).toBe(false)
+  })
+
+  test("plan filters, sorts, applies, and traces deterministically", () => {
+    const patches = [
+      Patch.prompt("b", {
+        reason: "second alphabetically",
+        order: 1,
+        apply: (request) => ({ ...request, metadata: { ...request.metadata, b: true } }),
+      }),
+      Patch.prompt("a", {
+        reason: "first alphabetically",
+        order: 1,
+        apply: (request) => ({ ...request, metadata: { ...request.metadata, a: true } }),
+      }),
+      Patch.prompt("skip", {
+        reason: "not selected",
+        when: Model.provider("anthropic"),
+        apply: (request) => ({ ...request, metadata: { ...request.metadata, skip: true } }),
+      }),
+    ]
+
+    const patchPlan = plan({ phase: "prompt", context: context({ request }), patches })
+    const output = patchPlan.apply(request)
+
+    expect(patchPlan.trace.map((item) => item.id)).toEqual(["prompt.a", "prompt.b"])
+    expect(output.metadata).toEqual({ a: true, b: true })
+  })
+
+  test("provider patch examples remove empty Anthropic content", () => {
+    const input = LLM.request({
+      id: "anthropic_empty",
+      model: LLM.model({ id: "claude-sonnet", provider: "anthropic", protocol: "anthropic-messages" }),
+      system: "",
+      messages: [
+        LLM.user([{ type: "text", text: "" }, { type: "text", text: "hello" }]),
+        LLM.assistant({ type: "reasoning", text: "" }),
+      ],
+    })
+    const output = plan({
+      phase: "prompt",
+      context: context({ request: input }),
+      patches: [ProviderPatch.removeEmptyAnthropicContent],
+    }).apply(input)
+
+    expect(output.system).toEqual([])
+    expect(output.messages).toHaveLength(1)
+    expect(output.messages[0]?.content).toEqual([{ type: "text", text: "hello" }])
+  })
+
+  test("provider patch examples scrub model-specific tool call ids", () => {
+    const input = LLM.request({
+      id: "mistral_tool_ids",
+      model: LLM.model({ id: "devstral-small", provider: "mistral", protocol: "openai-chat" }),
+      messages: [
+        LLM.assistant([LLM.toolCall({ id: "call.bad/value-long", name: "lookup", input: {} })]),
+        LLM.toolMessage({ id: "call.bad/value-long", name: "lookup", result: "ok", resultType: "text" }),
+      ],
+    })
+    const output = plan({
+      phase: "prompt",
+      context: context({ request: input }),
+      patches: [ProviderPatch.scrubMistralToolIds],
+    }).apply(input)
+
+    expect(output.messages[0]?.content[0]).toMatchObject({ type: "tool-call", id: "callbadva" })
+    expect(output.messages[1]?.content[0]).toMatchObject({ type: "tool-result", id: "callbadva" })
+  })
+
+  // Cache hint policy: mark first-2 system + last-2 messages with ephemeral
+  // cache hints, gated on `model.capabilities.cache.prompt`. Adapters
+  // (Anthropic, Bedrock) lower the hint to `cache_control` / `cachePoint`.
+  describe("cachePromptHints", () => {
+    const cacheCapableModel = (overrides: { provider: string; protocol: "anthropic-messages" | "bedrock-converse" }) =>
+      LLM.model({
+        id: "test-model",
+        provider: overrides.provider,
+        protocol: overrides.protocol,
+        capabilities: LLM.capabilities({ cache: { prompt: true, contentBlocks: true } }),
+      })
+
+    const runCachePatch = (input: ReturnType<typeof LLM.request>) =>
+      plan({
+        phase: "prompt",
+        context: context({ request: input }),
+        patches: [ProviderPatch.cachePromptHints],
+      }).apply(input)
+
+    test("marks first 2 system parts with an ephemeral cache hint", () => {
+      const input = LLM.request({
+        id: "cache_system",
+        model: cacheCapableModel({ provider: "anthropic", protocol: "anthropic-messages" }),
+        system: ["First", "Second", "Third"].map(LLM.system),
+        prompt: "hello",
+      })
+      const output = runCachePatch(input)
+
+      expect(output.system).toHaveLength(3)
+      expect(output.system[0]).toMatchObject({ text: "First", cache: { type: "ephemeral" } })
+      expect(output.system[1]).toMatchObject({ text: "Second", cache: { type: "ephemeral" } })
+      expect(output.system[2]).toMatchObject({ text: "Third" })
+      expect(output.system[2]?.cache).toBeUndefined()
+    })
+
+    test("marks the last text part of the last 2 messages on cache-capable models", () => {
+      const input = LLM.request({
+        id: "cache_messages",
+        model: cacheCapableModel({ provider: "anthropic", protocol: "anthropic-messages" }),
+        messages: [
+          LLM.user([{ type: "text", text: "m0" }]),
+          LLM.user([{ type: "text", text: "m1" }]),
+          LLM.user([{ type: "text", text: "m2" }]),
+        ],
+      })
+      const output = runCachePatch(input)
+
+      expect(output.messages).toHaveLength(3)
+      // First message untouched.
+      const first = output.messages[0].content[0]
+      expect(first).toMatchObject({ type: "text", text: "m0" })
+      expect("cache" in first ? first.cache : undefined).toBeUndefined()
+      // Last 2 messages: cache on the (only) text part.
+      expect(output.messages[1].content[0]).toMatchObject({ type: "text", text: "m1", cache: { type: "ephemeral" } })
+      expect(output.messages[2].content[0]).toMatchObject({ type: "text", text: "m2", cache: { type: "ephemeral" } })
+    })
+
+    test("targets the last text part when a message has trailing non-text content", () => {
+      const input = LLM.request({
+        id: "cache_trailing_tool",
+        model: cacheCapableModel({ provider: "anthropic", protocol: "anthropic-messages" }),
+        messages: [
+          LLM.assistant([
+            { type: "text", text: "calling tool" },
+            LLM.toolCall({ id: "call_1", name: "lookup", input: { q: "weather" } }),
+          ]),
+        ],
+      })
+      const output = runCachePatch(input)
+
+      const content = output.messages[0].content
+      expect(content[0]).toMatchObject({ type: "text", text: "calling tool", cache: { type: "ephemeral" } })
+      expect(content[1]).toMatchObject({ type: "tool-call", id: "call_1" })
+    })
+
+    test("returns the message unchanged when it has no text part", () => {
+      const input = LLM.request({
+        id: "cache_no_text",
+        model: cacheCapableModel({ provider: "anthropic", protocol: "anthropic-messages" }),
+        messages: [
+          LLM.toolMessage({ id: "call_1", name: "lookup", result: { ok: true } }),
+        ],
+      })
+      const output = runCachePatch(input)
+
+      expect(output.messages[0].content[0]).toMatchObject({ type: "tool-result", id: "call_1" })
+      // No text part to mark, so the content array is identity-equal — the
+      // `findLastIndex === -1` short-circuit avoids reallocating.
+      expect(output.messages[0].content).toBe(input.messages[0].content)
+    })
+
+    test("is a no-op when the model does not advertise prompt caching", () => {
+      const input = LLM.request({
+        id: "cache_no_capability",
+        model: LLM.model({
+          id: "gpt-5",
+          provider: "openai",
+          protocol: "openai-responses",
+          // capabilities.cache.prompt defaults to false
+        }),
+        system: ["A", "B"].map(LLM.system),
+        messages: [LLM.user([{ type: "text", text: "hi" }])],
+      })
+      const output = runCachePatch(input)
+
+      // Every text part should be free of cache hints.
+      for (const part of output.system) expect(part.cache).toBeUndefined()
+      for (const message of output.messages) {
+        for (const part of message.content) {
+          if (part.type === "text") expect(part.cache).toBeUndefined()
+        }
+      }
+    })
+  })
+})
--- a/packages/llm/test/provider-resolver.test.ts
+++ b/packages/llm/test/provider-resolver.test.ts
@@ -0,0 +1,51 @@
+import { describe, expect, test } from "bun:test"
+import { Azure, GitHubCopilot, OpenAI, OpenAICompatibleFamily, ProviderResolver } from "../src"
+
+describe("provider resolver", () => {
+  test("fixed providers resolve protocol and auth defaults", () => {
+    expect(OpenAI.resolver.resolve(ProviderResolver.input("gpt-5", "openai", {}))).toMatchObject({
+      provider: "openai",
+      protocol: "openai-responses",
+      auth: "key",
+    })
+  })
+
+  test("dynamic providers can select protocols from model metadata", () => {
+    expect(GitHubCopilot.resolver.resolve(ProviderResolver.input("gpt-5", "github-copilot", {}))).toMatchObject({
+      provider: "github-copilot",
+      protocol: "openai-responses",
+      auth: "key",
+    })
+    expect(GitHubCopilot.resolver.resolve(ProviderResolver.input("gpt-5-mini", "github-copilot", {}))).toMatchObject({
+      provider: "github-copilot",
+      protocol: "openai-chat",
+      auth: "key",
+    })
+  })
+
+  test("OpenAI-compatible families carry provider-specific defaults", () => {
+    expect(OpenAICompatibleFamily.resolver.resolve(ProviderResolver.input("llama", "togetherai", {}))).toMatchObject({
+      provider: "togetherai",
+      protocol: "openai-compatible-chat",
+      baseURL: "https://api.together.xyz/v1",
+      auth: "key",
+    })
+  })
+
+  test("Azure resolves resource URLs and API-version query params", () => {
+    expect(
+      Azure.resolver.resolve(
+        ProviderResolver.input("gpt-5", "azure", { resourceName: "opencode-test", apiVersion: "2025-04-01-preview" }),
+      ),
+    ).toMatchObject({
+      provider: "azure",
+      protocol: "openai-responses",
+      baseURL: "https://opencode-test.openai.azure.com/openai/v1",
+      queryParams: { "api-version": "2025-04-01-preview" },
+    })
+    expect(Azure.resolver.resolve(ProviderResolver.input("gpt-4.1", "azure", { useCompletionUrls: true }))).toMatchObject({
+      protocol: "openai-chat",
+      queryParams: { "api-version": "v1" },
+    })
+  })
+})
--- a/packages/llm/test/provider/anthropic-messages.recorded.test.ts
+++ b/packages/llm/test/provider/anthropic-messages.recorded.test.ts
@@ -0,0 +1,46 @@
+import { describe, expect } from "bun:test"
+import { Effect } from "effect"
+import { LLM } from "../../src"
+import { LLMClient } from "../../src/adapter"
+import { AnthropicMessages } from "../../src/provider/anthropic-messages"
+import { expectFinish, expectWeatherToolCall, textRequest, weatherToolRequest } from "../recorded-scenarios"
+import { recordedTests } from "../recorded-test"
+
+const model = AnthropicMessages.model({
+  id: "claude-haiku-4-5-20251001",
+  apiKey: process.env.ANTHROPIC_API_KEY ?? "fixture",
+})
+
+const request = textRequest({ id: "recorded_anthropic_messages_text", model })
+const toolRequest = weatherToolRequest({ id: "recorded_anthropic_messages_tool_call", model })
+
+const recorded = recordedTests({
+  prefix: "anthropic-messages",
+  provider: "anthropic",
+  protocol: "anthropic-messages",
+  requires: ["ANTHROPIC_API_KEY"],
+  options: { requestHeaders: ["content-type", "anthropic-version"] },
+})
+const anthropic = LLMClient.make({ adapters: [AnthropicMessages.adapter] })
+
+describe("Anthropic Messages recorded", () => {
+  recorded.effect("streams text", () =>
+    Effect.gen(function* () {
+      const response = yield* anthropic.generate(request)
+
+      expect(LLM.outputText(response)).toBe("Hello!")
+      expect(response.usage?.totalTokens).toBeGreaterThan(0)
+      expectFinish(response.events, "stop")
+    }),
+  )
+
+  recorded.effect.with("streams tool call", { tags: ["tool"] }, () =>
+    Effect.gen(function* () {
+      const response = yield* anthropic.generate(toolRequest)
+
+      expect(response.events.some((event) => event.type === "tool-input-delta")).toBe(true)
+      expectWeatherToolCall(response)
+      expectFinish(response.events, "tool-calls")
+    }),
+  )
+})
--- a/packages/llm/test/provider/anthropic-messages.test.ts
+++ b/packages/llm/test/provider/anthropic-messages.test.ts
@@ -0,0 +1,346 @@
+import { describe, expect } from "bun:test"
+import { Effect, Layer } from "effect"
+import { CacheHint, LLM, ProviderRequestError } from "../../src"
+import { LLMClient } from "../../src/adapter"
+import { AnthropicMessages } from "../../src/provider/anthropic-messages"
+import { testEffect } from "../lib/effect"
+import { fixedResponse } from "../lib/http"
+import { sseEvents } from "../lib/sse"
+
+const model = AnthropicMessages.model({
+  id: "claude-sonnet-4-5",
+  baseURL: "https://api.anthropic.test/v1/",
+  headers: { "x-api-key": "test" },
+})
+
+const request = LLM.request({
+  id: "req_1",
+  model,
+  system: { type: "text", text: "You are concise.", cache: new CacheHint({ type: "ephemeral" }) },
+  prompt: "Say hello.",
+  generation: { maxTokens: 20, temperature: 0 },
+})
+
+const it = testEffect(Layer.empty)
+
+describe("Anthropic Messages adapter", () => {
+  it.effect("prepares Anthropic Messages target", () =>
+    Effect.gen(function* () {
+      const prepared = yield* LLMClient.make({ adapters: [AnthropicMessages.adapter] }).prepare(request)
+
+      expect(prepared.target).toEqual({
+        model: "claude-sonnet-4-5",
+        system: [{ type: "text", text: "You are concise.", cache_control: { type: "ephemeral" } }],
+        messages: [{ role: "user", content: [{ type: "text", text: "Say hello." }] }],
+        stream: true,
+        max_tokens: 20,
+        temperature: 0,
+      })
+    }),
+  )
+
+  it.effect("prepares tool call and tool result messages", () =>
+    Effect.gen(function* () {
+      const prepared = yield* LLMClient.make({ adapters: [AnthropicMessages.adapter] }).prepare(
+        LLM.request({
+          id: "req_tool_result",
+          model,
+          messages: [
+            LLM.user("What is the weather?"),
+            LLM.assistant([LLM.toolCall({ id: "call_1", name: "lookup", input: { query: "weather" } })]),
+            LLM.toolMessage({ id: "call_1", name: "lookup", result: { forecast: "sunny" } }),
+          ],
+        }),
+      )
+
+      expect(prepared.target).toEqual({
+        model: "claude-sonnet-4-5",
+        messages: [
+          { role: "user", content: [{ type: "text", text: "What is the weather?" }] },
+          { role: "assistant", content: [{ type: "tool_use", id: "call_1", name: "lookup", input: { query: "weather" } }] },
+          { role: "user", content: [{ type: "tool_result", tool_use_id: "call_1", content: '{"forecast":"sunny"}' }] },
+        ],
+        stream: true,
+        max_tokens: 4096,
+      })
+    }),
+  )
+
+  it.effect("parses text, reasoning, and usage stream fixtures", () =>
+    Effect.gen(function* () {
+      const body = sseEvents(
+        { type: "message_start", message: { usage: { input_tokens: 5, cache_read_input_tokens: 1 } } },
+        { type: "content_block_start", index: 0, content_block: { type: "text", text: "" } },
+        { type: "content_block_delta", index: 0, delta: { type: "text_delta", text: "Hello" } },
+        { type: "content_block_delta", index: 0, delta: { type: "text_delta", text: "!" } },
+        { type: "content_block_stop", index: 0 },
+        { type: "content_block_start", index: 1, content_block: { type: "thinking", thinking: "" } },
+        { type: "content_block_delta", index: 1, delta: { type: "thinking_delta", thinking: "thinking" } },
+        { type: "content_block_stop", index: 1 },
+        { type: "message_delta", delta: { stop_reason: "end_turn" }, usage: { output_tokens: 2 } },
+        { type: "message_stop" },
+      )
+      const response = yield* LLMClient.make({ adapters: [AnthropicMessages.adapter] })
+        .generate(request)
+        .pipe(Effect.provide(fixedResponse(body)))
+
+      expect(LLM.outputText(response)).toBe("Hello!")
+      expect(LLM.outputReasoning(response)).toBe("thinking")
+      expect(LLM.outputUsage(response)).toMatchObject({
+        inputTokens: 5,
+        outputTokens: 2,
+        cacheReadInputTokens: 1,
+        totalTokens: 7,
+      })
+      expect(response.events.at(-1)).toMatchObject({ type: "request-finish", reason: "stop" })
+    }),
+  )
+
+  it.effect("assembles streamed tool call input", () =>
+    Effect.gen(function* () {
+      const body = sseEvents(
+        { type: "message_start", message: { usage: { input_tokens: 5 } } },
+        { type: "content_block_start", index: 0, content_block: { type: "tool_use", id: "call_1", name: "lookup" } },
+        { type: "content_block_delta", index: 0, delta: { type: "input_json_delta", partial_json: '{"query"' } },
+        { type: "content_block_delta", index: 0, delta: { type: "input_json_delta", partial_json: ':"weather"}' } },
+        { type: "content_block_stop", index: 0 },
+        { type: "message_delta", delta: { stop_reason: "tool_use" }, usage: { output_tokens: 1 } },
+      )
+      const response = yield* LLMClient.make({ adapters: [AnthropicMessages.adapter] })
+        .generate(
+          LLM.updateRequest(request, {
+            tools: [{ name: "lookup", description: "Lookup data", inputSchema: { type: "object" } }],
+          }),
+        )
+        .pipe(Effect.provide(fixedResponse(body)))
+
+      expect(LLM.outputToolCalls(response)).toEqual([{ type: "tool-call", id: "call_1", name: "lookup", input: { query: "weather" } }])
+      expect(response.events).toEqual([
+        { type: "tool-input-delta", id: "call_1", name: "lookup", text: '{"query"' },
+        { type: "tool-input-delta", id: "call_1", name: "lookup", text: ':"weather"}' },
+        { type: "tool-call", id: "call_1", name: "lookup", input: { query: "weather" } },
+        {
+          type: "request-finish",
+          reason: "tool-calls",
+          usage: { inputTokens: 5, outputTokens: 1, totalTokens: 6, native: { input_tokens: 5, output_tokens: 1 } },
+        },
+      ])
+    }),
+  )
+
+  it.effect("emits provider-error events for mid-stream provider errors", () =>
+    Effect.gen(function* () {
+      const response = yield* LLMClient.make({ adapters: [AnthropicMessages.adapter] })
+        .generate(request)
+        .pipe(
+          Effect.provide(
+            fixedResponse(sseEvents({ type: "error", error: { type: "overloaded_error", message: "Overloaded" } })),
+          ),
+        )
+
+      expect(response.events).toEqual([{ type: "provider-error", message: "Overloaded" }])
+    }),
+  )
+
+  it.effect("fails HTTP provider errors before stream parsing", () =>
+    Effect.gen(function* () {
+      const error = yield* LLMClient.make({ adapters: [AnthropicMessages.adapter] })
+        .generate(request)
+        .pipe(
+          Effect.provide(
+            fixedResponse('{"type":"error","error":{"type":"invalid_request_error","message":"Bad request"}}', {
+              status: 400,
+              headers: { "content-type": "application/json" },
+            }),
+          ),
+          Effect.flip,
+        )
+
+      expect(error).toBeInstanceOf(ProviderRequestError)
+      expect(error).toMatchObject({ status: 400 })
+      expect(error.message).toContain("HTTP 400")
+    }),
+  )
+
+  it.effect("decodes server_tool_use + web_search_tool_result as provider-executed events", () =>
+    Effect.gen(function* () {
+      const body = sseEvents(
+        { type: "message_start", message: { usage: { input_tokens: 5 } } },
+        { type: "content_block_start", index: 0, content_block: { type: "server_tool_use", id: "srvtoolu_abc", name: "web_search" } },
+        { type: "content_block_delta", index: 0, delta: { type: "input_json_delta", partial_json: '{"query":"effect 4"}' } },
+        { type: "content_block_stop", index: 0 },
+        {
+          type: "content_block_start",
+          index: 1,
+          content_block: {
+            type: "web_search_tool_result",
+            tool_use_id: "srvtoolu_abc",
+            content: [{ type: "web_search_result", url: "https://example.com", title: "Example" }],
+          },
+        },
+        { type: "content_block_stop", index: 1 },
+        { type: "content_block_start", index: 2, content_block: { type: "text", text: "" } },
+        { type: "content_block_delta", index: 2, delta: { type: "text_delta", text: "Found it." } },
+        { type: "content_block_stop", index: 2 },
+        { type: "message_delta", delta: { stop_reason: "end_turn" }, usage: { output_tokens: 8 } },
+      )
+      const response = yield* LLMClient.make({ adapters: [AnthropicMessages.adapter] })
+        .generate(
+          LLM.updateRequest(request, {
+            tools: [{ name: "web_search", description: "Web search", inputSchema: { type: "object" } }],
+          }),
+        )
+        .pipe(Effect.provide(fixedResponse(body)))
+
+      const toolCall = response.events.find((event) => event.type === "tool-call")
+      expect(toolCall).toEqual({
+        type: "tool-call",
+        id: "srvtoolu_abc",
+        name: "web_search",
+        input: { query: "effect 4" },
+        providerExecuted: true,
+      })
+      const toolResult = response.events.find((event) => event.type === "tool-result")
+      expect(toolResult).toEqual({
+        type: "tool-result",
+        id: "srvtoolu_abc",
+        name: "web_search",
+        result: { type: "json", value: [{ type: "web_search_result", url: "https://example.com", title: "Example" }] },
+        providerExecuted: true,
+      })
+      expect(LLM.outputText(response)).toBe("Found it.")
+      expect(response.events.at(-1)).toMatchObject({ type: "request-finish", reason: "stop" })
+    }),
+  )
+
+  it.effect("decodes web_search_tool_result_error as provider-executed error result", () =>
+    Effect.gen(function* () {
+      const body = sseEvents(
+        { type: "message_start", message: { usage: { input_tokens: 5 } } },
+        { type: "content_block_start", index: 0, content_block: { type: "server_tool_use", id: "srvtoolu_x", name: "web_search" } },
+        { type: "content_block_delta", index: 0, delta: { type: "input_json_delta", partial_json: '{"query":"q"}' } },
+        { type: "content_block_stop", index: 0 },
+        {
+          type: "content_block_start",
+          index: 1,
+          content_block: {
+            type: "web_search_tool_result",
+            tool_use_id: "srvtoolu_x",
+            content: { type: "web_search_tool_result_error", error_code: "max_uses_exceeded" },
+          },
+        },
+        { type: "content_block_stop", index: 1 },
+        { type: "message_delta", delta: { stop_reason: "end_turn" }, usage: { output_tokens: 1 } },
+      )
+      const response = yield* LLMClient.make({ adapters: [AnthropicMessages.adapter] })
+        .generate(
+          LLM.updateRequest(request, {
+            tools: [{ name: "web_search", description: "Web search", inputSchema: { type: "object" } }],
+          }),
+        )
+        .pipe(Effect.provide(fixedResponse(body)))
+
+      const toolResult = response.events.find((event) => event.type === "tool-result")
+      expect(toolResult).toMatchObject({
+        type: "tool-result",
+        id: "srvtoolu_x",
+        name: "web_search",
+        result: { type: "error" },
+        providerExecuted: true,
+      })
+    }),
+  )
+
+  it.effect("round-trips provider-executed assistant content into server tool blocks", () =>
+    Effect.gen(function* () {
+      const prepared = yield* LLMClient.make({ adapters: [AnthropicMessages.adapter] }).prepare(
+        LLM.request({
+          id: "req_round_trip",
+          model,
+          messages: [
+            LLM.user("Search for something."),
+            LLM.assistant([
+              {
+                type: "tool-call",
+                id: "srvtoolu_abc",
+                name: "web_search",
+                input: { query: "effect 4" },
+                providerExecuted: true,
+              },
+              {
+                type: "tool-result",
+                id: "srvtoolu_abc",
+                name: "web_search",
+                result: { type: "json", value: [{ url: "https://example.com" }] },
+                providerExecuted: true,
+              },
+              { type: "text", text: "Found it." },
+            ]),
+            LLM.user("Thanks."),
+          ],
+        }),
+      )
+
+      expect(prepared.target).toMatchObject({
+        messages: [
+          { role: "user", content: [{ type: "text", text: "Search for something." }] },
+          {
+            role: "assistant",
+            content: [
+              { type: "server_tool_use", id: "srvtoolu_abc", name: "web_search", input: { query: "effect 4" } },
+              {
+                type: "web_search_tool_result",
+                tool_use_id: "srvtoolu_abc",
+                content: [{ url: "https://example.com" }],
+              },
+              { type: "text", text: "Found it." },
+            ],
+          },
+          { role: "user", content: [{ type: "text", text: "Thanks." }] },
+        ],
+      })
+    }),
+  )
+
+  it.effect("rejects round-trip for unknown server tool names", () =>
+    Effect.gen(function* () {
+      const error = yield* LLMClient.make({ adapters: [AnthropicMessages.adapter] })
+        .prepare(
+          LLM.request({
+            id: "req_unknown_server_tool",
+            model,
+            messages: [
+              LLM.assistant([
+                {
+                  type: "tool-result",
+                  id: "srvtoolu_abc",
+                  name: "future_server_tool",
+                  result: { type: "json", value: {} },
+                  providerExecuted: true,
+                },
+              ]),
+            ],
+          }),
+        )
+        .pipe(Effect.flip)
+
+      expect(error.message).toContain("future_server_tool")
+    }),
+  )
+
+  it.effect("rejects unsupported user media content", () =>
+    Effect.gen(function* () {
+      const error = yield* LLMClient.make({ adapters: [AnthropicMessages.adapter] })
+        .prepare(
+          LLM.request({
+            id: "req_media",
+            model,
+            messages: [LLM.user({ type: "media", mediaType: "image/png", data: "AAECAw==" })],
+          }),
+        )
+        .pipe(Effect.flip)
+
+      expect(error.message).toContain("Anthropic Messages user messages only support text content for now")
+    }),
+  )
+})
--- a/packages/llm/test/provider/bedrock-converse.test.ts
+++ b/packages/llm/test/provider/bedrock-converse.test.ts
@@ -0,0 +1,533 @@
+import { EventStreamCodec } from "@smithy/eventstream-codec"
+import { fromUtf8, toUtf8 } from "@smithy/util-utf8"
+import { describe, expect } from "bun:test"
+import { Effect, Layer } from "effect"
+import { CacheHint, LLM } from "../../src"
+import { LLMClient } from "../../src/adapter"
+import { BedrockConverse } from "../../src/provider/bedrock-converse"
+import { testEffect } from "../lib/effect"
+import { fixedResponse } from "../lib/http"
+import { expectFinish, expectWeatherToolCall, weatherTool } from "../recorded-scenarios"
+import { recordedTests } from "../recorded-test"
+
+const codec = new EventStreamCodec(toUtf8, fromUtf8)
+const utf8Encoder = new TextEncoder()
+
+// Build a single AWS event-stream frame for a Converse stream event. Each
+// frame carries `:message-type=event` + `:event-type=<name>` headers and a
+// JSON payload body.
+const eventFrame = (type: string, payload: object) =>
+  codec.encode({
+    headers: {
+      ":message-type": { type: "string", value: "event" },
+      ":event-type": { type: "string", value: type },
+      ":content-type": { type: "string", value: "application/json" },
+    },
+    body: utf8Encoder.encode(JSON.stringify(payload)),
+  })
+
+const concat = (frames: ReadonlyArray<Uint8Array>) => {
+  const total = frames.reduce((sum, frame) => sum + frame.length, 0)
+  const out = new Uint8Array(total)
+  let offset = 0
+  for (const frame of frames) {
+    out.set(frame, offset)
+    offset += frame.length
+  }
+  return out
+}
+
+const eventStreamBody = (...payloads: ReadonlyArray<readonly [string, object]>) =>
+  concat(payloads.map(([type, payload]) => eventFrame(type, payload)))
+
+// Override the default SSE content-type with the binary event-stream type so
+// the cassette layer treats the body as bytes when recording.
+const fixedBytes = (bytes: Uint8Array) =>
+  fixedResponse(bytes, { headers: { "content-type": "application/vnd.amazon.eventstream" } })
+
+const model = BedrockConverse.model({
+  id: "anthropic.claude-3-5-sonnet-20240620-v1:0",
+  baseURL: "https://bedrock-runtime.test",
+  apiKey: "test-bearer",
+})
+
+const baseRequest = LLM.request({
+  id: "req_1",
+  model,
+  system: "You are concise.",
+  prompt: "Say hello.",
+  generation: { maxTokens: 64, temperature: 0 },
+})
+
+const it = testEffect(Layer.empty)
+
+describe("Bedrock Converse adapter", () => {
+  it.effect("prepares Converse target with system, inference config, and messages", () =>
+    Effect.gen(function* () {
+      const prepared = yield* LLMClient.make({ adapters: [BedrockConverse.adapter] }).prepare(baseRequest)
+
+      expect(prepared.target).toEqual({
+        modelId: "anthropic.claude-3-5-sonnet-20240620-v1:0",
+        system: [{ text: "You are concise." }],
+        messages: [{ role: "user", content: [{ text: "Say hello." }] }],
+        inferenceConfig: { maxTokens: 64, temperature: 0 },
+      })
+    }),
+  )
+
+  it.effect("prepares tool config with toolSpec and toolChoice", () =>
+    Effect.gen(function* () {
+      const prepared = yield* LLMClient.make({ adapters: [BedrockConverse.adapter] }).prepare(
+        LLM.updateRequest(baseRequest, {
+          tools: [
+            {
+              name: "lookup",
+              description: "Lookup data",
+              inputSchema: { type: "object", properties: { query: { type: "string" } }, required: ["query"] },
+            },
+          ],
+          toolChoice: LLM.toolChoice({ type: "required" }),
+        }),
+      )
+
+      expect(prepared.target).toMatchObject({
+        toolConfig: {
+          tools: [
+            {
+              toolSpec: {
+                name: "lookup",
+                description: "Lookup data",
+                inputSchema: {
+                  json: { type: "object", properties: { query: { type: "string" } }, required: ["query"] },
+                },
+              },
+            },
+          ],
+          toolChoice: { any: {} },
+        },
+      })
+    }),
+  )
+
+  it.effect("lowers assistant tool-call + tool-result message history", () =>
+    Effect.gen(function* () {
+      const prepared = yield* LLMClient.make({ adapters: [BedrockConverse.adapter] }).prepare(
+        LLM.request({
+          id: "req_history",
+          model,
+          messages: [
+            LLM.user("What is the weather?"),
+            LLM.assistant([LLM.toolCall({ id: "tool_1", name: "lookup", input: { query: "weather" } })]),
+            LLM.toolMessage({ id: "tool_1", name: "lookup", result: { forecast: "sunny" } }),
+          ],
+        }),
+      )
+
+      expect(prepared.target).toMatchObject({
+        messages: [
+          { role: "user", content: [{ text: "What is the weather?" }] },
+          {
+            role: "assistant",
+            content: [{ toolUse: { toolUseId: "tool_1", name: "lookup", input: { query: "weather" } } }],
+          },
+          {
+            role: "user",
+            content: [
+              {
+                toolResult: {
+                  toolUseId: "tool_1",
+                  content: [{ json: { forecast: "sunny" } }],
+                  status: "success",
+                },
+              },
+            ],
+          },
+        ],
+      })
+    }),
+  )
+
+  it.effect("decodes text-delta + messageStop + metadata usage from binary event stream", () =>
+    Effect.gen(function* () {
+      const body = eventStreamBody(
+        ["messageStart", { role: "assistant" }],
+        ["contentBlockDelta", { contentBlockIndex: 0, delta: { text: "Hello" } }],
+        ["contentBlockDelta", { contentBlockIndex: 0, delta: { text: "!" } }],
+        ["contentBlockStop", { contentBlockIndex: 0 }],
+        ["messageStop", { stopReason: "end_turn" }],
+        ["metadata", { usage: { inputTokens: 5, outputTokens: 2, totalTokens: 7 } }],
+      )
+      const response = yield* LLMClient.make({ adapters: [BedrockConverse.adapter] })
+        .generate(baseRequest)
+        .pipe(Effect.provide(fixedBytes(body)))
+
+      expect(LLM.outputText(response)).toBe("Hello!")
+      const finishes = response.events.filter((event) => event.type === "request-finish")
+      // Bedrock splits the finish across `messageStop` (carries reason) and
+      // `metadata` (carries usage). We consolidate them into a single
+      // terminal `request-finish` event with both.
+      expect(finishes).toHaveLength(1)
+      expect(finishes[0]).toMatchObject({ type: "request-finish", reason: "stop" })
+      expect(LLM.outputUsage(response)).toMatchObject({
+        inputTokens: 5,
+        outputTokens: 2,
+        totalTokens: 7,
+      })
+    }),
+  )
+
+  it.effect("assembles streamed tool call input", () =>
+    Effect.gen(function* () {
+      const body = eventStreamBody(
+        ["messageStart", { role: "assistant" }],
+        [
+          "contentBlockStart",
+          {
+            contentBlockIndex: 0,
+            start: { toolUse: { toolUseId: "tool_1", name: "lookup" } },
+          },
+        ],
+        ["contentBlockDelta", { contentBlockIndex: 0, delta: { toolUse: { input: '{"query"' } } }],
+        ["contentBlockDelta", { contentBlockIndex: 0, delta: { toolUse: { input: ':"weather"}' } } }],
+        ["contentBlockStop", { contentBlockIndex: 0 }],
+        ["messageStop", { stopReason: "tool_use" }],
+      )
+      const response = yield* LLMClient.make({ adapters: [BedrockConverse.adapter] })
+        .generate(
+          LLM.updateRequest(baseRequest, {
+            tools: [{ name: "lookup", description: "Lookup", inputSchema: { type: "object" } }],
+          }),
+        )
+        .pipe(Effect.provide(fixedBytes(body)))
+
+      expect(LLM.outputToolCalls(response)).toEqual([
+        { type: "tool-call", id: "tool_1", name: "lookup", input: { query: "weather" } },
+      ])
+      const events = response.events.filter((event) => event.type === "tool-input-delta")
+      expect(events).toEqual([
+        { type: "tool-input-delta", id: "tool_1", name: "lookup", text: '{"query"' },
+        { type: "tool-input-delta", id: "tool_1", name: "lookup", text: ':"weather"}' },
+      ])
+      expect(response.events.at(-1)).toMatchObject({ type: "request-finish", reason: "tool-calls" })
+    }),
+  )
+
+  it.effect("decodes reasoning deltas", () =>
+    Effect.gen(function* () {
+      const body = eventStreamBody(
+        ["messageStart", { role: "assistant" }],
+        [
+          "contentBlockDelta",
+          { contentBlockIndex: 0, delta: { reasoningContent: { text: "Let me think." } } },
+        ],
+        ["contentBlockStop", { contentBlockIndex: 0 }],
+        ["messageStop", { stopReason: "end_turn" }],
+      )
+      const response = yield* LLMClient.make({ adapters: [BedrockConverse.adapter] })
+        .generate(baseRequest)
+        .pipe(Effect.provide(fixedBytes(body)))
+
+      expect(LLM.outputReasoning(response)).toBe("Let me think.")
+    }),
+  )
+
+  it.effect("emits provider-error for throttlingException", () =>
+    Effect.gen(function* () {
+      const body = eventStreamBody(
+        ["messageStart", { role: "assistant" }],
+        ["throttlingException", { message: "Slow down" }],
+      )
+      const response = yield* LLMClient.make({ adapters: [BedrockConverse.adapter] })
+        .generate(baseRequest)
+        .pipe(Effect.provide(fixedBytes(body)))
+
+      expect(response.events.find((event) => event.type === "provider-error")).toEqual({
+        type: "provider-error",
+        message: "Slow down",
+        retryable: true,
+      })
+    }),
+  )
+
+  it.effect("rejects requests with no auth path", () =>
+    Effect.gen(function* () {
+      const unsignedModel = BedrockConverse.model({
+        id: "anthropic.claude-3-5-sonnet-20240620-v1:0",
+        baseURL: "https://bedrock-runtime.test",
+      })
+      const error = yield* LLMClient.make({ adapters: [BedrockConverse.adapter] })
+        .generate(LLM.updateRequest(baseRequest, { model: unsignedModel }))
+        .pipe(Effect.provide(fixedBytes(eventStreamBody(["messageStop", { stopReason: "end_turn" }]))), Effect.flip)
+
+      expect(error.message).toContain("Bedrock Converse requires either model.apiKey")
+    }),
+  )
+
+  it.effect("signs requests with SigV4 when AWS credentials are provided (deterministic plumbing check)", () =>
+    Effect.gen(function* () {
+      const signed = BedrockConverse.model({
+        id: "anthropic.claude-3-5-sonnet-20240620-v1:0",
+        baseURL: "https://bedrock-runtime.test",
+        credentials: {
+          region: "us-east-1",
+          accessKeyId: "AKIAIOSFODNN7EXAMPLE",
+          secretAccessKey: "wJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY",
+        },
+      })
+      const prepared = yield* LLMClient.make({ adapters: [BedrockConverse.adapter] }).prepare(
+        LLM.updateRequest(baseRequest, { model: signed }),
+      )
+
+      expect(prepared.adapter).toBe("bedrock-converse")
+      // The prepare phase doesn't sign — toHttp does. We assert the credential
+      // is plumbed onto the model native field for the signer to find.
+      expect(prepared.model.native).toMatchObject({
+        aws_credentials: { region: "us-east-1", accessKeyId: "AKIAIOSFODNN7EXAMPLE" },
+        aws_region: "us-east-1",
+      })
+    }),
+  )
+
+  it.effect("emits cachePoint markers after system, user-text, and assistant-text with cache hints", () =>
+    Effect.gen(function* () {
+      const cache = new CacheHint({ type: "ephemeral" })
+      const prepared = yield* LLMClient.make({ adapters: [BedrockConverse.adapter] }).prepare(
+        LLM.request({
+          id: "req_cache",
+          model,
+          system: [{ type: "text", text: "System prefix.", cache }],
+          messages: [
+            LLM.user([{ type: "text", text: "User prefix.", cache }]),
+            LLM.assistant([{ type: "text", text: "Assistant prefix.", cache }]),
+          ],
+          generation: { maxTokens: 16, temperature: 0 },
+        }),
+      )
+
+      expect(prepared.target).toMatchObject({
+        // System: text block followed by cachePoint marker.
+        system: [{ text: "System prefix." }, { cachePoint: { type: "default" } }],
+        messages: [
+          {
+            role: "user",
+            content: [{ text: "User prefix." }, { cachePoint: { type: "default" } }],
+          },
+          {
+            role: "assistant",
+            content: [{ text: "Assistant prefix." }, { cachePoint: { type: "default" } }],
+          },
+        ],
+      })
+    }),
+  )
+
+  it.effect("does not emit cachePoint when no cache hint is set", () =>
+    Effect.gen(function* () {
+      const prepared = yield* LLMClient.make({ adapters: [BedrockConverse.adapter] }).prepare(baseRequest)
+      expect(prepared.target).toMatchObject({
+        system: [{ text: "You are concise." }],
+        messages: [{ role: "user", content: [{ text: "Say hello." }] }],
+      })
+    }),
+  )
+
+  it.effect("lowers image media into Bedrock image blocks", () =>
+    Effect.gen(function* () {
+      const prepared = yield* LLMClient.make({ adapters: [BedrockConverse.adapter] }).prepare(
+        LLM.request({
+          id: "req_image",
+          model,
+          messages: [
+            LLM.user([
+              { type: "text", text: "What is in this image?" },
+              { type: "media", mediaType: "image/png", data: "AAAA" },
+              { type: "media", mediaType: "image/jpeg", data: "BBBB" },
+              { type: "media", mediaType: "image/jpg", data: "CCCC" },
+              { type: "media", mediaType: "image/webp", data: "DDDD" },
+            ]),
+          ],
+        }),
+      )
+
+      expect(prepared.target).toMatchObject({
+        messages: [
+          {
+            role: "user",
+            content: [
+              { text: "What is in this image?" },
+              { image: { format: "png", source: { bytes: "AAAA" } } },
+              { image: { format: "jpeg", source: { bytes: "BBBB" } } },
+              // image/jpg is a non-standard alias; we map it to jpeg.
+              { image: { format: "jpeg", source: { bytes: "CCCC" } } },
+              { image: { format: "webp", source: { bytes: "DDDD" } } },
+            ],
+          },
+        ],
+      })
+    }),
+  )
+
+  it.effect("base64-encodes Uint8Array image bytes", () =>
+    Effect.gen(function* () {
+      const prepared = yield* LLMClient.make({ adapters: [BedrockConverse.adapter] }).prepare(
+        LLM.request({
+          id: "req_image_bytes",
+          model,
+          messages: [
+            LLM.user([
+              { type: "media", mediaType: "image/png", data: new Uint8Array([1, 2, 3, 4, 5]) },
+            ]),
+          ],
+        }),
+      )
+
+      // Buffer.from([1,2,3,4,5]).toString("base64") === "AQIDBAU="
+      expect(prepared.target).toMatchObject({
+        messages: [
+          {
+            role: "user",
+            content: [{ image: { format: "png", source: { bytes: "AQIDBAU=" } } }],
+          },
+        ],
+      })
+    }),
+  )
+
+  it.effect("lowers document media into Bedrock document blocks with format and name", () =>
+    Effect.gen(function* () {
+      const prepared = yield* LLMClient.make({ adapters: [BedrockConverse.adapter] }).prepare(
+        LLM.request({
+          id: "req_doc",
+          model,
+          messages: [
+            LLM.user([
+              { type: "media", mediaType: "application/pdf", data: "PDFDATA", filename: "report.pdf" },
+              { type: "media", mediaType: "text/csv", data: "CSVDATA" },
+            ]),
+          ],
+        }),
+      )
+
+      expect(prepared.target).toMatchObject({
+        messages: [
+          {
+            role: "user",
+            content: [
+              // Filename round-trips when supplied.
+              { document: { format: "pdf", name: "report.pdf", source: { bytes: "PDFDATA" } } },
+              // Falls back to a stable placeholder when filename is missing.
+              { document: { format: "csv", name: "document.csv", source: { bytes: "CSVDATA" } } },
+            ],
+          },
+        ],
+      })
+    }),
+  )
+
+  it.effect("rejects unsupported image media types", () =>
+    Effect.gen(function* () {
+      const error = yield* LLMClient.make({ adapters: [BedrockConverse.adapter] })
+        .prepare(
+          LLM.request({
+            id: "req_bad_image",
+            model,
+            messages: [LLM.user([{ type: "media", mediaType: "image/svg+xml", data: "x" }])],
+          }),
+        )
+        .pipe(Effect.flip)
+
+      expect(error.message).toContain("Bedrock Converse does not support image media type image/svg+xml")
+    }),
+  )
+
+  it.effect("rejects unsupported document media types", () =>
+    Effect.gen(function* () {
+      const error = yield* LLMClient.make({ adapters: [BedrockConverse.adapter] })
+        .prepare(
+          LLM.request({
+            id: "req_bad_doc",
+            model,
+            messages: [
+              LLM.user([{ type: "media", mediaType: "application/x-tar", data: "x", filename: "a.tar" }]),
+            ],
+          }),
+        )
+        .pipe(Effect.flip)
+
+      expect(error.message).toContain("Bedrock Converse does not support document media type application/x-tar")
+    }),
+  )
+})
+
+// Live recorded integration tests. Run with `RECORD=true AWS_ACCESS_KEY_ID=...
+// AWS_SECRET_ACCESS_KEY=... [AWS_SESSION_TOKEN=...] bun run test ...` to refresh
+// cassettes; replay is the default and works without credentials.
+//
+// Region is pinned to us-east-1 in tests so the request URL is stable across
+// machines on replay. If you need to record from a different region (e.g. your
+// account has access elsewhere), pass `BEDROCK_RECORDING_REGION=eu-west-1` —
+// but then commit the resulting cassette and others should record from the
+// same region too.
+const RECORDING_REGION = process.env.BEDROCK_RECORDING_REGION ?? "us-east-1"
+
+const recordedModel = () =>
+  BedrockConverse.model({
+    // Most newer Anthropic models on Bedrock require a cross-region inference
+    // profile (`us.` prefix). Nova does not require an Anthropic use-case form
+    // and is on-demand-throughput accessible by default for most accounts.
+    id: process.env.BEDROCK_MODEL_ID ?? "us.amazon.nova-micro-v1:0",
+    credentials: {
+      region: RECORDING_REGION,
+      accessKeyId: process.env.AWS_ACCESS_KEY_ID ?? "fixture",
+      secretAccessKey: process.env.AWS_SECRET_ACCESS_KEY ?? "fixture",
+      sessionToken: process.env.AWS_SESSION_TOKEN,
+    },
+  })
+
+const recorded = recordedTests({
+  prefix: "bedrock-converse",
+  provider: "amazon-bedrock",
+  protocol: "bedrock-converse",
+  requires: ["AWS_ACCESS_KEY_ID", "AWS_SECRET_ACCESS_KEY"],
+})
+
+describe("Bedrock Converse recorded", () => {
+  recorded.effect("streams text", () =>
+    Effect.gen(function* () {
+      const llm = LLMClient.make({ adapters: [BedrockConverse.adapter] })
+      const response = yield* llm.generate(
+        LLM.request({
+          id: "recorded_bedrock_text",
+          model: recordedModel(),
+          system: "Reply with the single word 'Hello'.",
+          prompt: "Say hello.",
+          generation: { maxTokens: 16, temperature: 0 },
+        }),
+      )
+
+      expect(LLM.outputText(response)).toMatch(/hello/i)
+      expect(response.events.at(-1)).toMatchObject({ type: "request-finish" })
+    }),
+  )
+
+  recorded.effect.with("streams a tool call", { tags: ["tool"] }, () =>
+    Effect.gen(function* () {
+      const llm = LLMClient.make({ adapters: [BedrockConverse.adapter] })
+      const response = yield* llm.generate(
+        LLM.request({
+          id: "recorded_bedrock_tool_call",
+          model: recordedModel(),
+          system: "Call tools exactly as requested.",
+          prompt: "Call get_weather with city exactly Paris.",
+          tools: [weatherTool],
+          toolChoice: LLM.toolChoice(weatherTool),
+          generation: { maxTokens: 80, temperature: 0 },
+        }),
+      )
+
+      expect(response.events.some((event) => event.type === "tool-input-delta")).toBe(true)
+      expectWeatherToolCall(response)
+      expectFinish(response.events, "tool-calls")
+    }),
+  )
+})
--- a/packages/llm/test/provider/gemini.recorded.test.ts
+++ b/packages/llm/test/provider/gemini.recorded.test.ts
@@ -0,0 +1,44 @@
+import { describe, expect } from "bun:test"
+import { Effect } from "effect"
+import { LLM } from "../../src"
+import { LLMClient } from "../../src/adapter"
+import { Gemini } from "../../src/provider/gemini"
+import { expectFinish, expectWeatherToolCall, textRequest, weatherToolRequest } from "../recorded-scenarios"
+import { recordedTests } from "../recorded-test"
+
+const model = Gemini.model({
+  id: "gemini-2.5-flash",
+  apiKey: process.env.GOOGLE_GENERATIVE_AI_API_KEY ?? "fixture",
+})
+
+const request = textRequest({ id: "recorded_gemini_text", model, maxTokens: 80 })
+const toolRequest = weatherToolRequest({ id: "recorded_gemini_tool_call", model })
+
+const recorded = recordedTests({
+  prefix: "gemini",
+  provider: "google",
+  protocol: "gemini",
+  requires: ["GOOGLE_GENERATIVE_AI_API_KEY"],
+})
+const gemini = LLMClient.make({ adapters: [Gemini.adapter] })
+
+describe("Gemini recorded", () => {
+  recorded.effect("streams text", () =>
+    Effect.gen(function* () {
+      const response = yield* gemini.generate(request)
+
+      expect(LLM.outputText(response)).toMatch(/^Hello!?$/)
+      expect(response.usage?.totalTokens).toBeGreaterThan(0)
+      expectFinish(response.events, "stop")
+    }),
+  )
+
+  recorded.effect.with("streams tool call", { tags: ["tool"] }, () =>
+    Effect.gen(function* () {
+      const response = yield* gemini.generate(toolRequest)
+
+      expectWeatherToolCall(response)
+      expectFinish(response.events, "tool-calls")
+    }),
+  )
+})
--- a/packages/llm/test/provider/gemini.test.ts
+++ b/packages/llm/test/provider/gemini.test.ts
@@ -0,0 +1,347 @@
+import { describe, expect } from "bun:test"
+import { Effect, Layer } from "effect"
+import { LLM, ProviderChunkError } from "../../src"
+import { LLMClient } from "../../src/adapter"
+import { Gemini } from "../../src/provider/gemini"
+import { testEffect } from "../lib/effect"
+import { fixedResponse } from "../lib/http"
+import { sseEvents, sseRaw } from "../lib/sse"
+
+const model = Gemini.model({
+  id: "gemini-2.5-flash",
+  baseURL: "https://generativelanguage.test/v1beta/",
+  headers: { "x-goog-api-key": "test" },
+})
+
+const request = LLM.request({
+  id: "req_1",
+  model,
+  system: "You are concise.",
+  prompt: "Say hello.",
+  generation: { maxTokens: 20, temperature: 0 },
+})
+
+const it = testEffect(Layer.empty)
+
+describe("Gemini adapter", () => {
+  it.effect("prepares Gemini target", () =>
+    Effect.gen(function* () {
+      const prepared = yield* LLMClient.make({ adapters: [Gemini.adapter] }).prepare(request)
+
+      expect(prepared.target).toEqual({
+        contents: [{ role: "user", parts: [{ text: "Say hello." }] }],
+        systemInstruction: { parts: [{ text: "You are concise." }] },
+        generationConfig: { maxOutputTokens: 20, temperature: 0 },
+      })
+    }),
+  )
+
+  it.effect("prepares multimodal user input and tool history", () =>
+    Effect.gen(function* () {
+      const prepared = yield* LLMClient.make({ adapters: [Gemini.adapter] }).prepare(
+        LLM.request({
+          id: "req_tool_result",
+          model,
+          tools: [{
+            name: "lookup",
+            description: "Lookup data",
+            inputSchema: { type: "object", properties: { query: { type: "string" } } },
+          }],
+          toolChoice: { type: "tool", name: "lookup" },
+          messages: [
+            LLM.user([
+              { type: "text", text: "What is in this image?" },
+              { type: "media", mediaType: "image/png", data: "AAECAw==" },
+            ]),
+            LLM.assistant([LLM.toolCall({ id: "call_1", name: "lookup", input: { query: "weather" } })]),
+            LLM.toolMessage({ id: "call_1", name: "lookup", result: { forecast: "sunny" } }),
+          ],
+        }),
+      )
+
+      expect(prepared.target).toEqual({
+        contents: [
+          {
+            role: "user",
+            parts: [
+              { text: "What is in this image?" },
+              { inlineData: { mimeType: "image/png", data: "AAECAw==" } },
+            ],
+          },
+          {
+            role: "model",
+            parts: [{ functionCall: { name: "lookup", args: { query: "weather" } } }],
+          },
+          {
+            role: "user",
+            parts: [{ functionResponse: { name: "lookup", response: { name: "lookup", content: '{"forecast":"sunny"}' } } }],
+          },
+        ],
+        tools: [{
+          functionDeclarations: [{
+            name: "lookup",
+            description: "Lookup data",
+            parameters: { type: "object", properties: { query: { type: "string" } } },
+          }],
+        }],
+        toolConfig: { functionCallingConfig: { mode: "ANY", allowedFunctionNames: ["lookup"] } },
+      })
+    }),
+  )
+
+  it.effect("omits tools when tool choice is none", () =>
+    Effect.gen(function* () {
+      const prepared = yield* LLMClient.make({ adapters: [Gemini.adapter] }).prepare(
+        LLM.request({
+          id: "req_no_tools",
+          model,
+          prompt: "Say hello.",
+          tools: [{ name: "lookup", description: "Lookup data", inputSchema: { type: "object" } }],
+          toolChoice: { type: "none" },
+        }),
+      )
+
+      expect(prepared.target).toEqual({
+        contents: [{ role: "user", parts: [{ text: "Say hello." }] }],
+      })
+    }),
+  )
+
+  it.effect("sanitizes integer enums, dangling required, untyped arrays, and scalar object keys", () =>
+    Effect.gen(function* () {
+      const prepared = yield* LLMClient.make({ adapters: [Gemini.adapter] }).prepare(
+        LLM.request({
+          id: "req_schema_patch",
+          model,
+          prompt: "Use the tool.",
+          tools: [{
+            name: "lookup",
+            description: "Lookup data",
+            inputSchema: {
+              type: "object",
+              required: ["status", "missing"],
+              properties: {
+                status: { type: "integer", enum: [1, 2] },
+                tags: { type: "array" },
+                name: { type: "string", properties: { ignored: { type: "string" } }, required: ["ignored"] },
+              },
+            },
+          }],
+        }),
+      )
+
+      expect(prepared.target).toMatchObject({
+        tools: [{
+          functionDeclarations: [{
+            parameters: {
+              type: "object",
+              required: ["status"],
+              properties: {
+                status: { type: "string", enum: ["1", "2"] },
+                tags: { type: "array", items: { type: "string" } },
+                name: { type: "string" },
+              },
+            },
+          }],
+        }],
+      })
+    }),
+  )
+
+  it.effect("parses text, reasoning, and usage stream fixtures", () =>
+    Effect.gen(function* () {
+      const body = sseEvents(
+        {
+          candidates: [{
+            content: { role: "model", parts: [{ text: "thinking", thought: true }] },
+          }],
+        },
+        {
+          candidates: [{
+            content: { role: "model", parts: [{ text: "Hello" }] },
+          }],
+        },
+        {
+          candidates: [{
+            content: { role: "model", parts: [{ text: "!" }] },
+            finishReason: "STOP",
+          }],
+        },
+        {
+          usageMetadata: {
+            promptTokenCount: 5,
+            candidatesTokenCount: 2,
+            totalTokenCount: 7,
+            thoughtsTokenCount: 1,
+            cachedContentTokenCount: 1,
+          },
+        },
+      )
+      const response = yield* LLMClient.make({ adapters: [Gemini.adapter] })
+        .generate(request)
+        .pipe(Effect.provide(fixedResponse(body)))
+
+      expect(LLM.outputText(response)).toBe("Hello!")
+      expect(LLM.outputReasoning(response)).toBe("thinking")
+      expect(LLM.outputUsage(response)).toMatchObject({
+        inputTokens: 5,
+        outputTokens: 2,
+        reasoningTokens: 1,
+        cacheReadInputTokens: 1,
+        totalTokens: 7,
+      })
+      expect(response.events).toEqual([
+        { type: "reasoning-delta", text: "thinking" },
+        { type: "text-delta", text: "Hello" },
+        { type: "text-delta", text: "!" },
+        {
+          type: "request-finish",
+          reason: "stop",
+          usage: {
+            inputTokens: 5,
+            outputTokens: 2,
+            reasoningTokens: 1,
+            cacheReadInputTokens: 1,
+            totalTokens: 7,
+            native: {
+              promptTokenCount: 5,
+              candidatesTokenCount: 2,
+              totalTokenCount: 7,
+              thoughtsTokenCount: 1,
+              cachedContentTokenCount: 1,
+            },
+          },
+        },
+      ])
+    }),
+  )
+
+  it.effect("emits streamed tool calls and maps finish reason", () =>
+    Effect.gen(function* () {
+      const body = sseEvents(
+        {
+          candidates: [{
+            content: {
+              role: "model",
+              parts: [{ functionCall: { name: "lookup", args: { query: "weather" } } }],
+            },
+            finishReason: "STOP",
+          }],
+          usageMetadata: { promptTokenCount: 5, candidatesTokenCount: 1 },
+        },
+      )
+      const response = yield* LLMClient.make({ adapters: [Gemini.adapter] })
+        .generate(
+          LLM.updateRequest(request, {
+            tools: [{ name: "lookup", description: "Lookup data", inputSchema: { type: "object" } }],
+          }),
+        )
+        .pipe(Effect.provide(fixedResponse(body)))
+
+      expect(LLM.outputToolCalls(response)).toEqual([{ type: "tool-call", id: "tool_0", name: "lookup", input: { query: "weather" } }])
+      expect(response.events).toEqual([
+        { type: "tool-call", id: "tool_0", name: "lookup", input: { query: "weather" } },
+        {
+          type: "request-finish",
+          reason: "tool-calls",
+          usage: { inputTokens: 5, outputTokens: 1, totalTokens: 6, native: { promptTokenCount: 5, candidatesTokenCount: 1 } },
+        },
+      ])
+    }),
+  )
+
+  it.effect("assigns unique ids to multiple streamed tool calls", () =>
+    Effect.gen(function* () {
+      const body = sseEvents(
+        {
+          candidates: [{
+            content: {
+              role: "model",
+              parts: [
+                { functionCall: { name: "lookup", args: { query: "weather" } } },
+                { functionCall: { name: "lookup", args: { query: "news" } } },
+              ],
+            },
+            finishReason: "STOP",
+          }],
+        },
+      )
+      const response = yield* LLMClient.make({ adapters: [Gemini.adapter] })
+        .generate(
+          LLM.updateRequest(request, {
+            tools: [{ name: "lookup", description: "Lookup data", inputSchema: { type: "object" } }],
+          }),
+        )
+        .pipe(Effect.provide(fixedResponse(body)))
+
+      expect(LLM.outputToolCalls(response)).toEqual([
+        { type: "tool-call", id: "tool_0", name: "lookup", input: { query: "weather" } },
+        { type: "tool-call", id: "tool_1", name: "lookup", input: { query: "news" } },
+      ])
+      expect(response.events.at(-1)).toMatchObject({ type: "request-finish", reason: "tool-calls" })
+    }),
+  )
+
+  it.effect("maps length and content-filter finish reasons", () =>
+    Effect.gen(function* () {
+      const length = yield* LLMClient.make({ adapters: [Gemini.adapter] })
+        .generate(request)
+        .pipe(
+          Effect.provide(
+            fixedResponse(sseEvents({ candidates: [{ content: { role: "model", parts: [] }, finishReason: "MAX_TOKENS" }] })),
+          ),
+        )
+      const filtered = yield* LLMClient.make({ adapters: [Gemini.adapter] })
+        .generate(request)
+        .pipe(
+          Effect.provide(
+            fixedResponse(sseEvents({ candidates: [{ content: { role: "model", parts: [] }, finishReason: "SAFETY" }] })),
+          ),
+        )
+
+      expect(length.events).toEqual([{ type: "request-finish", reason: "length" }])
+      expect(filtered.events).toEqual([{ type: "request-finish", reason: "content-filter" }])
+    }),
+  )
+
+  it.effect("leaves total usage undefined when component counts are missing", () =>
+    Effect.gen(function* () {
+      const response = yield* LLMClient.make({ adapters: [Gemini.adapter] })
+        .generate(request)
+        .pipe(Effect.provide(fixedResponse(sseEvents({ usageMetadata: { thoughtsTokenCount: 1 } }))))
+
+      expect(response.usage).toMatchObject({ reasoningTokens: 1 })
+      expect(response.usage?.totalTokens).toBeUndefined()
+    }),
+  )
+
+  it.effect("fails invalid stream chunks", () =>
+    Effect.gen(function* () {
+      const error = yield* LLMClient.make({ adapters: [Gemini.adapter] })
+        .generate(request)
+        .pipe(
+          Effect.provide(fixedResponse(sseRaw("data: {not json}"))),
+          Effect.flip,
+        )
+
+      expect(error).toBeInstanceOf(ProviderChunkError)
+      expect(error.message).toContain("Invalid Gemini stream chunk")
+    }),
+  )
+
+  it.effect("rejects unsupported assistant media content", () =>
+    Effect.gen(function* () {
+      const error = yield* LLMClient.make({ adapters: [Gemini.adapter] })
+        .prepare(
+          LLM.request({
+            id: "req_media",
+            model,
+            messages: [LLM.assistant({ type: "media", mediaType: "image/png", data: "AAECAw==" })],
+          }),
+        )
+        .pipe(Effect.flip)
+
+      expect(error.message).toContain("Gemini assistant messages only support text, reasoning, and tool-call content for now")
+    }),
+  )
+})
--- a/packages/llm/test/provider/openai-chat-tool-loop.recorded.test.ts
+++ b/packages/llm/test/provider/openai-chat-tool-loop.recorded.test.ts
@@ -0,0 +1,62 @@
+import { describe, expect } from "bun:test"
+import { Effect, Stream } from "effect"
+import { LLM, LLMEvent } from "../../src"
+import { LLMClient } from "../../src/adapter"
+import { OpenAIChat } from "../../src/provider/openai-chat"
+import { ToolRuntime } from "../../src/tool-runtime"
+import { weatherRuntimeTool } from "../recorded-scenarios"
+import { recordedTests } from "../recorded-test"
+
+// Multi-interaction recorded test: drives the typed `ToolRuntime` against a
+// live OpenAI Chat endpoint so the cassette captures every model round in
+// order (model -> tool dispatch -> model). The cassette is only created with
+// `RECORD=true OPENAI_API_KEY=...`. In replay mode the test is skipped if the
+// cassette is missing — see `recordedTests` for the gate.
+
+const model = OpenAIChat.model({
+  id: "gpt-4o-mini",
+  apiKey: process.env.OPENAI_API_KEY ?? "fixture",
+})
+
+const request = LLM.request({
+  id: "recorded_openai_chat_tool_loop",
+  model,
+  system: "Use the get_weather tool, then answer in one short sentence.",
+  prompt: "What is the weather in Paris?",
+  generation: { maxTokens: 80, temperature: 0 },
+})
+
+const recorded = recordedTests({
+  prefix: "openai-chat",
+  provider: "openai",
+  protocol: "openai-chat",
+  requires: ["OPENAI_API_KEY"],
+})
+const openai = LLMClient.make({ adapters: [OpenAIChat.adapter] })
+
+describe("OpenAI Chat tool-loop recorded", () => {
+  recorded.effect.with("drives a tool loop end-to-end", { tags: ["tool", "tool-loop"] }, () =>
+    Effect.gen(function* () {
+      const events = Array.from(
+        yield* ToolRuntime.run(openai, { request, tools: { get_weather: weatherRuntimeTool } }).pipe(Stream.runCollect),
+      )
+
+      // Two model rounds: tool-call + tool-result + final answer. Two
+      // `request-finish` events confirm both interactions in the cassette
+      // were dispatched in order.
+      const finishes = events.filter(LLMEvent.is.requestFinish)
+      expect(finishes).toHaveLength(2)
+      expect(finishes[0]?.reason).toBe("tool-calls")
+      expect(finishes.at(-1)?.reason).toBe("stop")
+
+      const toolResult = events.find(LLMEvent.is.toolResult)
+      expect(toolResult).toMatchObject({
+        type: "tool-result",
+        name: "get_weather",
+        result: { type: "json", value: { temperature: 22, condition: "sunny" } },
+      })
+
+      expect(LLM.outputText({ events })).toContain("Paris")
+    }),
+  )
+})
--- a/packages/llm/test/provider/openai-chat.recorded.test.ts
+++ b/packages/llm/test/provider/openai-chat.recorded.test.ts
@@ -0,0 +1,87 @@
+import { describe, expect } from "bun:test"
+import { Effect } from "effect"
+import { LLM } from "../../src"
+import { LLMClient } from "../../src/adapter"
+import { OpenAIChat } from "../../src/provider/openai-chat"
+import { expectFinish, textRequest, weatherTool, weatherToolName, weatherToolRequest } from "../recorded-scenarios"
+import { recordedTests } from "../recorded-test"
+
+const model = OpenAIChat.model({
+  id: "gpt-4o-mini",
+  apiKey: process.env.OPENAI_API_KEY ?? "fixture",
+})
+
+const request = textRequest({ id: "recorded_openai_chat_text", model, prompt: "Say hello in one short sentence." })
+const toolCallId = "call_weather"
+const toolRequest = weatherToolRequest({ id: "recorded_openai_chat_tool_call", model })
+
+const toolResultRequest = LLM.request({
+  id: "recorded_openai_chat_tool_result",
+  model,
+  system: "Answer using only the provided tool result.",
+  messages: [
+    LLM.user("What is the weather in Paris?"),
+    LLM.assistant([LLM.toolCall({ id: toolCallId, name: weatherToolName, input: { city: "Paris" } })]),
+    LLM.toolMessage({ id: toolCallId, name: weatherToolName, result: { forecast: "sunny", temperature_c: 22 } }),
+  ],
+  generation: { maxTokens: 40, temperature: 0 },
+})
+
+// Cassettes are deterministic — assert exact stream contents instead of fuzzy
+// `length > 0` checks so adapter parsing regressions surface immediately.
+// Re-record (`RECORD=true`) only when intentionally refreshing a cassette.
+const recorded = recordedTests({
+  prefix: "openai-chat",
+  provider: "openai",
+  protocol: "openai-chat",
+  requires: ["OPENAI_API_KEY"],
+})
+const openai = LLMClient.make({ adapters: [OpenAIChat.adapter] })
+const openaiWithUsage = LLMClient.make({ adapters: [OpenAIChat.adapter.withPatches([OpenAIChat.includeUsage])] })
+
+describe("OpenAI Chat recorded", () => {
+  recorded.effect("streams text", () =>
+    Effect.gen(function* () {
+      const response = yield* openaiWithUsage.generate(request)
+
+      expect(LLM.outputText(response)).toBe("Hello!")
+      expect(response.usage).toMatchObject({
+        inputTokens: 22,
+        outputTokens: 2,
+        totalTokens: 24,
+        cacheReadInputTokens: 0,
+        reasoningTokens: 0,
+      })
+      expect(response.events.map((event) => event.type)).toEqual([
+        "text-delta",
+        "text-delta",
+        "request-finish",
+      ])
+      expectFinish(response.events, "stop")
+    }),
+  )
+
+  recorded.effect.with("streams tool call", { tags: ["tool"] }, () =>
+    Effect.gen(function* () {
+      const response = yield* openai.generate(toolRequest)
+
+      expect(response.events.some((event) => event.type === "tool-input-delta")).toBe(true)
+      expect(response.events.find((event) => event.type === "tool-call")).toMatchObject({
+        type: "tool-call",
+        name: weatherTool.name,
+        input: { city: "Paris" },
+      })
+      expectFinish(response.events, "tool-calls")
+    }),
+  )
+
+  recorded.effect.with("continues after tool result", { tags: ["tool"] }, () =>
+    Effect.gen(function* () {
+      const response = yield* openaiWithUsage.generate(toolResultRequest)
+
+      expect(LLM.outputText(response)).toBe("The weather in Paris is sunny with a temperature of 22°C.")
+      expect(response.usage).toMatchObject({ inputTokens: 59, outputTokens: 14, totalTokens: 73 })
+      expectFinish(response.events, "stop")
+    }),
+  )
+})
--- a/packages/llm/test/provider/openai-chat.test.ts
+++ b/packages/llm/test/provider/openai-chat.test.ts
@@ -0,0 +1,318 @@
+import { describe, expect } from "bun:test"
+import { Effect, Layer, Schema, Stream } from "effect"
+import { HttpClientRequest } from "effect/unstable/http"
+import { LLM, ProviderRequestError } from "../../src"
+import { LLMClient } from "../../src/adapter"
+import { OpenAIChat } from "../../src/provider/openai-chat"
+import { testEffect } from "../lib/effect"
+import { dynamicResponse, fixedResponse, truncatedStream } from "../lib/http"
+import { sseEvents } from "../lib/sse"
+
+const TargetJson = Schema.fromJsonString(Schema.Unknown)
+const encodeJson = Schema.encodeSync(TargetJson)
+
+const model = OpenAIChat.model({
+  id: "gpt-4o-mini",
+  baseURL: "https://api.openai.test/v1/",
+  headers: { authorization: "Bearer test" },
+})
+
+const request = LLM.request({
+  id: "req_1",
+  model,
+  system: "You are concise.",
+  prompt: "Say hello.",
+  generation: { maxTokens: 20, temperature: 0 },
+})
+
+const it = testEffect(Layer.empty)
+
+const deltaChunk = (delta: object, finishReason: string | null = null) => ({
+  id: "chatcmpl_fixture",
+  choices: [{ delta, finish_reason: finishReason }],
+  usage: null,
+})
+
+const usageChunk = (usage: object) => ({
+  id: "chatcmpl_fixture",
+  choices: [],
+  usage,
+})
+
+describe("OpenAI Chat adapter", () => {
+  it.effect("prepares OpenAI Chat target", () =>
+    Effect.gen(function* () {
+      // Pass the OpenAIChat target type so `prepared.target` is statically
+      // typed to the adapter's native shape — the assertions below read field
+      // names without `unknown` casts.
+      const prepared = yield* LLMClient.make({
+        adapters: [OpenAIChat.adapter.withPatches([OpenAIChat.includeUsage])],
+      }).prepare<OpenAIChat.OpenAIChatTarget>(request)
+      const _typed: { readonly model: string; readonly stream: true } = prepared.target
+
+      expect(prepared.target).toEqual({
+        model: "gpt-4o-mini",
+        messages: [
+          { role: "system", content: "You are concise." },
+          { role: "user", content: "Say hello." },
+        ],
+        stream: true,
+        stream_options: { include_usage: true },
+        max_tokens: 20,
+        temperature: 0,
+      })
+      expect(prepared.patchTrace.map((item) => item.id)).toEqual(["target.openai-chat.include-usage"])
+    }),
+  )
+
+  it.effect("adds native query params to the Chat Completions URL", () =>
+    Effect.gen(function* () {
+      yield* LLMClient.make({ adapters: [OpenAIChat.adapter] })
+        .generate(LLM.updateRequest(request, { model: LLM.model({ ...model, queryParams: { "api-version": "v1" } }) }))
+        .pipe(
+          Effect.provide(
+            dynamicResponse((input) =>
+              Effect.gen(function* () {
+                const web = yield* HttpClientRequest.toWeb(input.request).pipe(Effect.orDie)
+                expect(web.url).toBe("https://api.openai.test/v1/chat/completions?api-version=v1")
+                return input.respond(sseEvents(deltaChunk({}, "stop")), { headers: { "content-type": "text/event-stream" } })
+              }),
+            ),
+          ),
+        )
+    }),
+  )
+
+  it.effect("prepares assistant tool-call and tool-result messages", () =>
+    Effect.gen(function* () {
+      const prepared = yield* LLMClient.make({ adapters: [OpenAIChat.adapter] }).prepare(
+        LLM.request({
+          id: "req_tool_result",
+          model,
+          messages: [
+            LLM.user("What is the weather?"),
+            LLM.assistant([LLM.toolCall({ id: "call_1", name: "lookup", input: { query: "weather" } })]),
+            LLM.toolMessage({ id: "call_1", name: "lookup", result: { forecast: "sunny" } }),
+          ],
+        }),
+      )
+
+      expect(prepared.target).toEqual({
+        model: "gpt-4o-mini",
+        messages: [
+          { role: "user", content: "What is the weather?" },
+          {
+            role: "assistant",
+            content: null,
+            tool_calls: [
+              {
+                id: "call_1",
+                type: "function",
+                function: { name: "lookup", arguments: encodeJson({ query: "weather" }) },
+              },
+            ],
+          },
+          { role: "tool", tool_call_id: "call_1", content: encodeJson({ forecast: "sunny" }) },
+        ],
+        stream: true,
+      })
+    }),
+  )
+
+  it.effect("rejects unsupported user media content", () =>
+    Effect.gen(function* () {
+      const error = yield* LLMClient.make({ adapters: [OpenAIChat.adapter] })
+        .prepare(
+          LLM.request({
+            id: "req_media",
+            model,
+            messages: [LLM.user({ type: "media", mediaType: "image/png", data: "AAECAw==" })],
+          }),
+        )
+        .pipe(Effect.flip)
+
+      expect(error.message).toContain("OpenAI Chat user messages only support text content for now")
+    }),
+  )
+
+  it.effect("rejects unsupported assistant reasoning content", () =>
+    Effect.gen(function* () {
+      const error = yield* LLMClient.make({ adapters: [OpenAIChat.adapter] })
+        .prepare(
+          LLM.request({
+            id: "req_reasoning",
+            model,
+            messages: [LLM.assistant({ type: "reasoning", text: "hidden" })],
+          }),
+        )
+        .pipe(Effect.flip)
+
+      expect(error.message).toContain("OpenAI Chat assistant messages only support text and tool-call content for now")
+    }),
+  )
+
+  it.effect("parses text and usage stream fixtures", () =>
+    Effect.gen(function* () {
+      const body = sseEvents(
+        deltaChunk({ role: "assistant", content: "Hello" }),
+        deltaChunk({ content: "!" }),
+        deltaChunk({}, "stop"),
+        usageChunk({
+          prompt_tokens: 5,
+          completion_tokens: 2,
+          total_tokens: 7,
+          prompt_tokens_details: { cached_tokens: 1 },
+          completion_tokens_details: { reasoning_tokens: 0 },
+        }),
+      )
+      const response = yield* LLMClient.make({ adapters: [OpenAIChat.adapter] })
+        .generate(request)
+        .pipe(Effect.provide(fixedResponse(body)))
+
+      expect(LLM.outputText(response)).toBe("Hello!")
+      expect(response.events).toEqual([
+        { type: "text-delta", text: "Hello" },
+        { type: "text-delta", text: "!" },
+        {
+          type: "request-finish",
+          reason: "stop",
+          usage: {
+            inputTokens: 5,
+            outputTokens: 2,
+            reasoningTokens: 0,
+            cacheReadInputTokens: 1,
+            totalTokens: 7,
+            native: {
+              prompt_tokens: 5,
+              completion_tokens: 2,
+              total_tokens: 7,
+              prompt_tokens_details: { cached_tokens: 1 },
+              completion_tokens_details: { reasoning_tokens: 0 },
+            },
+          },
+        },
+      ])
+    }),
+  )
+
+  it.effect("assembles streamed tool call input", () =>
+    Effect.gen(function* () {
+      const body = sseEvents(
+        deltaChunk({
+          role: "assistant",
+          tool_calls: [
+            { index: 0, id: "call_1", function: { name: "lookup", arguments: '{"query"' } },
+          ],
+        }),
+        deltaChunk({ tool_calls: [{ index: 0, function: { arguments: ':"weather"}' } }] }),
+        deltaChunk({}, "tool_calls"),
+      )
+      const response = yield* LLMClient.make({ adapters: [OpenAIChat.adapter] })
+        .generate(
+          LLM.updateRequest(request, {
+            tools: [{ name: "lookup", description: "Lookup data", inputSchema: { type: "object" } }],
+          }),
+        )
+        .pipe(Effect.provide(fixedResponse(body)))
+
+      expect(response.events).toEqual([
+        { type: "tool-input-delta", id: "call_1", name: "lookup", text: '{"query"' },
+        { type: "tool-input-delta", id: "call_1", name: "lookup", text: ':"weather"}' },
+        { type: "tool-call", id: "call_1", name: "lookup", input: { query: "weather" } },
+        { type: "request-finish", reason: "tool-calls", usage: undefined },
+      ])
+    }),
+  )
+
+  it.effect("does not finalize streamed tool calls without a finish reason", () =>
+    Effect.gen(function* () {
+      const body = sseEvents(
+        deltaChunk({
+          role: "assistant",
+          tool_calls: [
+            { index: 0, id: "call_1", function: { name: "lookup", arguments: '{"query"' } },
+          ],
+        }),
+        deltaChunk({ tool_calls: [{ index: 0, function: { arguments: ':"weather"}' } }] }),
+      )
+      const response = yield* LLMClient.make({ adapters: [OpenAIChat.adapter] })
+        .generate(
+          LLM.updateRequest(request, {
+            tools: [{ name: "lookup", description: "Lookup data", inputSchema: { type: "object" } }],
+          }),
+        )
+        .pipe(Effect.provide(fixedResponse(body)))
+
+      expect(response.events).toEqual([
+        { type: "tool-input-delta", id: "call_1", name: "lookup", text: '{"query"' },
+        { type: "tool-input-delta", id: "call_1", name: "lookup", text: ':"weather"}' },
+      ])
+      expect(LLM.outputToolCalls(response)).toEqual([])
+    }),
+  )
+
+  it.effect("fails on malformed stream chunks", () =>
+    Effect.gen(function* () {
+      const body = sseEvents(deltaChunk({ content: 123 }))
+      const error = yield* LLMClient.make({ adapters: [OpenAIChat.adapter] })
+        .generate(request)
+        .pipe(Effect.provide(fixedResponse(body)), Effect.flip)
+
+      expect(error.message).toContain("Invalid OpenAI Chat stream chunk")
+    }),
+  )
+
+  it.effect("surfaces transport errors that occur mid-stream", () =>
+    Effect.gen(function* () {
+      const layer = truncatedStream([
+        `data: ${JSON.stringify(deltaChunk({ role: "assistant", content: "Hello" }))}\n\n`,
+      ])
+      const error = yield* LLMClient.make({ adapters: [OpenAIChat.adapter] })
+        .generate(request)
+        .pipe(Effect.provide(layer), Effect.flip)
+
+      expect(error.message).toContain("Failed to read OpenAI Chat stream")
+    }),
+  )
+
+  it.effect("fails HTTP provider errors before stream parsing", () =>
+    Effect.gen(function* () {
+      const error = yield* LLMClient.make({ adapters: [OpenAIChat.adapter] })
+        .generate(request)
+        .pipe(
+          Effect.provide(
+            fixedResponse('{"error":{"message":"Bad request","type":"invalid_request_error"}}', {
+              status: 400,
+              headers: { "content-type": "application/json" },
+            }),
+          ),
+          Effect.flip,
+        )
+
+      expect(error).toBeInstanceOf(ProviderRequestError)
+      expect(error).toMatchObject({ status: 400 })
+      expect(error.message).toContain("HTTP 400")
+    }),
+  )
+
+  it.effect("short-circuits the upstream stream when the consumer takes a prefix", () =>
+    Effect.gen(function* () {
+      const llm = LLMClient.make({ adapters: [OpenAIChat.adapter] })
+      // The body has more chunks than we'll consume. If `Stream.take(1)` did
+      // not interrupt the upstream HTTP body the test would hang waiting for
+      // the rest of the stream to drain.
+      const body = sseEvents(
+        deltaChunk({ role: "assistant", content: "Hello" }),
+        deltaChunk({ content: " world" }),
+        deltaChunk({}, "stop"),
+      )
+
+      const events = Array.from(
+        yield* llm
+          .stream(request)
+          .pipe(Stream.take(1), Stream.runCollect, Effect.provide(fixedResponse(body))),
+      )
+      expect(events.map((event) => event.type)).toEqual(["text-delta"])
+    }),
+  )
+})
--- a/packages/llm/test/provider/openai-compatible-chat.recorded.test.ts
+++ b/packages/llm/test/provider/openai-compatible-chat.recorded.test.ts
@@ -0,0 +1,55 @@
+import { describe, expect } from "bun:test"
+import { Effect } from "effect"
+import { LLM } from "../../src"
+import { LLMClient } from "../../src/adapter"
+import { OpenAICompatibleChat } from "../../src/provider/openai-compatible-chat"
+import { expectFinish, expectWeatherToolCall, textRequest, weatherToolRequest } from "../recorded-scenarios"
+import { recordedTests } from "../recorded-test"
+
+const deepseekModel = OpenAICompatibleChat.deepseek({
+  id: "deepseek-chat",
+  apiKey: process.env.DEEPSEEK_API_KEY ?? "fixture",
+})
+
+const deepseekRequest = textRequest({ id: "recorded_deepseek_text", model: deepseekModel })
+
+const togetherModel = OpenAICompatibleChat.togetherai({
+  id: "meta-llama/Llama-3.3-70B-Instruct-Turbo",
+  apiKey: process.env.TOGETHER_AI_API_KEY ?? "fixture",
+})
+
+const togetherRequest = textRequest({ id: "recorded_togetherai_text", model: togetherModel })
+const togetherToolRequest = weatherToolRequest({ id: "recorded_togetherai_tool_call", model: togetherModel })
+
+const recorded = recordedTests({ prefix: "openai-compatible-chat", protocol: "openai-compatible-chat" })
+const llm = LLMClient.make({ adapters: [OpenAICompatibleChat.adapter] })
+
+describe("OpenAI-compatible Chat recorded", () => {
+  recorded.effect.with("deepseek streams text", { provider: "deepseek", requires: ["DEEPSEEK_API_KEY"] }, () =>
+    Effect.gen(function* () {
+      const response = yield* llm.generate(deepseekRequest)
+
+      expect(LLM.outputText(response)).toMatch(/^Hello!?$/)
+      expectFinish(response.events, "stop")
+    }),
+  )
+
+  recorded.effect.with("togetherai streams text", { provider: "togetherai", requires: ["TOGETHER_AI_API_KEY"] }, () =>
+    Effect.gen(function* () {
+      const response = yield* llm.generate(togetherRequest)
+
+      expect(LLM.outputText(response)).toMatch(/^Hello!?$/)
+      expectFinish(response.events, "stop")
+    }),
+  )
+
+  recorded.effect.with("togetherai streams tool call", { provider: "togetherai", requires: ["TOGETHER_AI_API_KEY"], tags: ["tool"] }, () =>
+    Effect.gen(function* () {
+      const response = yield* llm.generate(togetherToolRequest)
+
+      expect(response.events.some((event) => event.type === "tool-input-delta")).toBe(true)
+      expectWeatherToolCall(response)
+      expectFinish(response.events, "tool-calls")
+    }),
+  )
+})
--- a/packages/llm/test/provider/openai-compatible-chat.test.ts
+++ b/packages/llm/test/provider/openai-compatible-chat.test.ts
@@ -0,0 +1,230 @@
+import { describe, expect } from "bun:test"
+import { Effect, Layer, Schema } from "effect"
+import { HttpClientRequest } from "effect/unstable/http"
+import { LLM } from "../../src"
+import { LLMClient } from "../../src/adapter"
+import { OpenAICompatibleChat } from "../../src/provider/openai-compatible-chat"
+import { testEffect } from "../lib/effect"
+import { dynamicResponse } from "../lib/http"
+import { sseEvents } from "../lib/sse"
+
+const Json = Schema.fromJsonString(Schema.Unknown)
+const decodeJson = Schema.decodeUnknownSync(Json)
+
+const model = OpenAICompatibleChat.model({
+  id: "deepseek-chat",
+  provider: "deepseek",
+  baseURL: "https://api.deepseek.test/v1/",
+  apiKey: "test-key",
+  queryParams: { "api-version": "2026-01-01" },
+})
+
+const request = LLM.request({
+  id: "req_1",
+  model,
+  system: "You are concise.",
+  prompt: "Say hello.",
+  generation: { maxTokens: 20, temperature: 0 },
+})
+
+const it = testEffect(Layer.empty)
+
+const deltaChunk = (delta: object, finishReason: string | null = null) => ({
+  id: "chatcmpl_fixture",
+  choices: [{ delta, finish_reason: finishReason }],
+  usage: null,
+})
+
+const usageChunk = (usage: object) => ({
+  id: "chatcmpl_fixture",
+  choices: [],
+  usage,
+})
+
+const providerFamilies = [
+  ["baseten", OpenAICompatibleChat.baseten, "https://inference.baseten.co/v1"],
+  ["cerebras", OpenAICompatibleChat.cerebras, "https://api.cerebras.ai/v1"],
+  ["deepinfra", OpenAICompatibleChat.deepinfra, "https://api.deepinfra.com/v1/openai"],
+  ["deepseek", OpenAICompatibleChat.deepseek, "https://api.deepseek.com/v1"],
+  ["fireworks", OpenAICompatibleChat.fireworks, "https://api.fireworks.ai/inference/v1"],
+  ["togetherai", OpenAICompatibleChat.togetherai, "https://api.together.xyz/v1"],
+] as const
+
+describe("OpenAI-compatible Chat adapter", () => {
+  it.effect("prepares generic Chat target", () =>
+    Effect.gen(function* () {
+      const prepared = yield* LLMClient.make({ adapters: [OpenAICompatibleChat.adapter] }).prepare(
+        LLM.updateRequest(request, {
+          tools: [{ name: "lookup", description: "Lookup data", inputSchema: { type: "object" } }],
+          toolChoice: { type: "required" },
+        }),
+      )
+
+      expect(prepared.adapter).toBe("openai-compatible-chat")
+      expect(prepared.model).toMatchObject({
+        id: "deepseek-chat",
+        provider: "deepseek",
+        protocol: "openai-compatible-chat",
+        baseURL: "https://api.deepseek.test/v1/",
+        apiKey: "test-key",
+        queryParams: { "api-version": "2026-01-01" },
+      })
+      expect(prepared.target).toEqual({
+        model: "deepseek-chat",
+        messages: [
+          { role: "system", content: "You are concise." },
+          { role: "user", content: "Say hello." },
+        ],
+        tools: [{ type: "function", function: { name: "lookup", description: "Lookup data", parameters: { type: "object" } } }],
+        tool_choice: "required",
+        stream: true,
+        max_tokens: 20,
+        temperature: 0,
+      })
+    }),
+  )
+
+  it.effect("provides model helpers for compatible provider families", () =>
+    Effect.gen(function* () {
+      expect(
+        providerFamilies.map(([provider, makeModel]) => {
+          const model = makeModel({ id: `${provider}-model`, apiKey: "test-key" })
+          return {
+            id: String(model.id),
+            provider: String(model.provider),
+            protocol: model.protocol,
+            baseURL: model.baseURL,
+            apiKey: model.apiKey,
+          }
+        }),
+      ).toEqual(
+        providerFamilies.map(([provider, _, baseURL]) => ({
+          id: `${provider}-model`,
+          provider,
+          protocol: "openai-compatible-chat",
+          baseURL,
+          apiKey: "test-key",
+        })),
+      )
+
+      const custom = OpenAICompatibleChat.deepseek({
+        id: "deepseek-chat",
+        apiKey: "test-key",
+        baseURL: "https://custom.deepseek.test/v1",
+      })
+      expect(custom).toMatchObject({
+        provider: "deepseek",
+        protocol: "openai-compatible-chat",
+        baseURL: "https://custom.deepseek.test/v1",
+      })
+    }),
+  )
+
+  it.effect("matches AI SDK compatible basic request body fixture", () =>
+    Effect.gen(function* () {
+      const prepared = yield* LLMClient.make({ adapters: [OpenAICompatibleChat.adapter] }).prepare(request)
+
+      expect(prepared.target).toEqual({
+        model: "deepseek-chat",
+        messages: [
+          { role: "system", content: "You are concise." },
+          { role: "user", content: "Say hello." },
+        ],
+        stream: true,
+        max_tokens: 20,
+        temperature: 0,
+      })
+    }),
+  )
+
+  it.effect("matches AI SDK compatible tool request body fixture", () =>
+    Effect.gen(function* () {
+      const prepared = yield* LLMClient.make({ adapters: [OpenAICompatibleChat.adapter] }).prepare(
+        LLM.request({
+          id: "req_tool_parity",
+          model,
+          tools: [{
+            name: "lookup",
+            description: "Lookup data",
+            inputSchema: { type: "object", properties: { query: { type: "string" } }, required: ["query"] },
+          }],
+          toolChoice: "lookup",
+          messages: [
+            LLM.user("What is the weather?"),
+            LLM.assistant([LLM.toolCall({ id: "call_1", name: "lookup", input: { query: "weather" } })]),
+            LLM.toolMessage({ id: "call_1", name: "lookup", result: { forecast: "sunny" } }),
+          ],
+        }),
+      )
+
+      expect(prepared.target).toEqual({
+        model: "deepseek-chat",
+        messages: [
+          { role: "user", content: "What is the weather?" },
+          {
+            role: "assistant",
+            content: null,
+            tool_calls: [{
+              id: "call_1",
+              type: "function",
+              function: { name: "lookup", arguments: '{"query":"weather"}' },
+            }],
+          },
+          { role: "tool", tool_call_id: "call_1", content: '{"forecast":"sunny"}' },
+        ],
+        tools: [{
+          type: "function",
+          function: {
+            name: "lookup",
+            description: "Lookup data",
+            parameters: { type: "object", properties: { query: { type: "string" } }, required: ["query"] },
+          },
+        }],
+        tool_choice: { type: "function", function: { name: "lookup" } },
+        stream: true,
+      })
+    }),
+  )
+
+  it.effect("posts to the configured compatible endpoint and parses text usage", () =>
+    Effect.gen(function* () {
+      const response = yield* LLMClient.make({
+        adapters: [OpenAICompatibleChat.adapter.withPatches([OpenAICompatibleChat.includeUsage])],
+      })
+        .generate(request)
+        .pipe(
+          Effect.provide(
+            dynamicResponse((input) =>
+              Effect.gen(function* () {
+                const web = yield* HttpClientRequest.toWeb(input.request).pipe(Effect.orDie)
+                expect(web.url).toBe("https://api.deepseek.test/v1/chat/completions?api-version=2026-01-01")
+                expect(web.headers.get("authorization")).toBe("Bearer test-key")
+                expect(decodeJson(input.text)).toMatchObject({
+                  model: "deepseek-chat",
+                  stream: true,
+                  stream_options: { include_usage: true },
+                  messages: [
+                    { role: "system", content: "You are concise." },
+                    { role: "user", content: "Say hello." },
+                  ],
+                })
+                return input.respond(
+                  sseEvents(
+                    deltaChunk({ role: "assistant", content: "Hello" }),
+                    deltaChunk({ content: "!" }),
+                    deltaChunk({}, "stop"),
+                    usageChunk({ prompt_tokens: 5, completion_tokens: 2, total_tokens: 7 }),
+                  ),
+                  { headers: { "content-type": "text/event-stream" } },
+                )
+              }),
+            ),
+          ),
+        )
+
+      expect(LLM.outputText(response)).toBe("Hello!")
+      expect(LLM.outputUsage(response)).toMatchObject({ inputTokens: 5, outputTokens: 2, totalTokens: 7 })
+      expect(response.events.at(-1)).toMatchObject({ type: "request-finish", reason: "stop" })
+    }),
+  )
+})
--- a/packages/llm/test/provider/openai-responses.test.ts
+++ b/packages/llm/test/provider/openai-responses.test.ts
@@ -0,0 +1,314 @@
+import { describe, expect } from "bun:test"
+import { Effect, Layer } from "effect"
+import { HttpClientRequest } from "effect/unstable/http"
+import { LLM, ProviderRequestError } from "../../src"
+import { LLMClient } from "../../src/adapter"
+import { OpenAIResponses } from "../../src/provider/openai-responses"
+import { testEffect } from "../lib/effect"
+import { dynamicResponse, fixedResponse } from "../lib/http"
+import { sseEvents } from "../lib/sse"
+
+const model = OpenAIResponses.model({
+  id: "gpt-4.1-mini",
+  baseURL: "https://api.openai.test/v1/",
+  headers: { authorization: "Bearer test" },
+})
+
+const request = LLM.request({
+  id: "req_1",
+  model,
+  system: "You are concise.",
+  prompt: "Say hello.",
+  generation: { maxTokens: 20, temperature: 0 },
+})
+
+const it = testEffect(Layer.empty)
+
+describe("OpenAI Responses adapter", () => {
+  it.effect("prepares OpenAI Responses target", () =>
+    Effect.gen(function* () {
+      const prepared = yield* LLMClient.make({ adapters: [OpenAIResponses.adapter] }).prepare(request)
+
+      expect(prepared.target).toEqual({
+        model: "gpt-4.1-mini",
+        input: [
+          { role: "system", content: "You are concise." },
+          { role: "user", content: [{ type: "input_text", text: "Say hello." }] },
+        ],
+        stream: true,
+        max_output_tokens: 20,
+        temperature: 0,
+      })
+    }),
+  )
+
+  it.effect("adds native query params to the Responses URL", () =>
+    Effect.gen(function* () {
+      yield* LLMClient.make({ adapters: [OpenAIResponses.adapter] })
+        .generate(LLM.updateRequest(request, { model: LLM.model({ ...model, queryParams: { "api-version": "v1" } }) }))
+        .pipe(
+          Effect.provide(
+            dynamicResponse((input) =>
+              Effect.gen(function* () {
+                const web = yield* HttpClientRequest.toWeb(input.request).pipe(Effect.orDie)
+                expect(web.url).toBe("https://api.openai.test/v1/responses?api-version=v1")
+                return input.respond(sseEvents({ type: "response.completed", response: {} }), {
+                  headers: { "content-type": "text/event-stream" },
+                })
+              }),
+            ),
+          ),
+        )
+    }),
+  )
+
+  it.effect("prepares function call and function output input items", () =>
+    Effect.gen(function* () {
+      const prepared = yield* LLMClient.make({ adapters: [OpenAIResponses.adapter] }).prepare(
+        LLM.request({
+          id: "req_tool_result",
+          model,
+          messages: [
+            LLM.user("What is the weather?"),
+            LLM.assistant([LLM.toolCall({ id: "call_1", name: "lookup", input: { query: "weather" } })]),
+            LLM.toolMessage({ id: "call_1", name: "lookup", result: { forecast: "sunny" } }),
+          ],
+        }),
+      )
+
+      expect(prepared.target).toEqual({
+        model: "gpt-4.1-mini",
+        input: [
+          { role: "user", content: [{ type: "input_text", text: "What is the weather?" }] },
+          { type: "function_call", call_id: "call_1", name: "lookup", arguments: '{"query":"weather"}' },
+          { type: "function_call_output", call_id: "call_1", output: '{"forecast":"sunny"}' },
+        ],
+        stream: true,
+      })
+    }),
+  )
+
+  it.effect("parses text and usage stream fixtures", () =>
+    Effect.gen(function* () {
+      const body = sseEvents(
+        { type: "response.output_text.delta", item_id: "msg_1", delta: "Hello" },
+        { type: "response.output_text.delta", item_id: "msg_1", delta: "!" },
+        {
+          type: "response.completed",
+          response: {
+            usage: {
+              input_tokens: 5,
+              output_tokens: 2,
+              total_tokens: 7,
+              input_tokens_details: { cached_tokens: 1 },
+              output_tokens_details: { reasoning_tokens: 0 },
+            },
+          },
+        },
+      )
+      const response = yield* LLMClient.make({ adapters: [OpenAIResponses.adapter] })
+        .generate(request)
+        .pipe(Effect.provide(fixedResponse(body)))
+
+      expect(LLM.outputText(response)).toBe("Hello!")
+      expect(response.events).toEqual([
+        { type: "text-delta", id: "msg_1", text: "Hello" },
+        { type: "text-delta", id: "msg_1", text: "!" },
+        {
+          type: "request-finish",
+          reason: "stop",
+          usage: {
+            inputTokens: 5,
+            outputTokens: 2,
+            reasoningTokens: 0,
+            cacheReadInputTokens: 1,
+            totalTokens: 7,
+            native: {
+              input_tokens: 5,
+              output_tokens: 2,
+              total_tokens: 7,
+              input_tokens_details: { cached_tokens: 1 },
+              output_tokens_details: { reasoning_tokens: 0 },
+            },
+          },
+        },
+      ])
+    }),
+  )
+
+  it.effect("assembles streamed function call input", () =>
+    Effect.gen(function* () {
+      const body = sseEvents(
+        {
+          type: "response.output_item.added",
+          item: { type: "function_call", id: "item_1", call_id: "call_1", name: "lookup", arguments: "" },
+        },
+        { type: "response.function_call_arguments.delta", item_id: "item_1", delta: '{"query"' },
+        { type: "response.function_call_arguments.delta", item_id: "item_1", delta: ':"weather"}' },
+        {
+          type: "response.output_item.done",
+          item: {
+            type: "function_call",
+            id: "item_1",
+            call_id: "call_1",
+            name: "lookup",
+            arguments: '{"query":"weather"}',
+          },
+        },
+        { type: "response.completed", response: { usage: { input_tokens: 5, output_tokens: 1 } } },
+      )
+      const response = yield* LLMClient.make({ adapters: [OpenAIResponses.adapter] })
+        .generate(
+          LLM.updateRequest(request, {
+            tools: [{ name: "lookup", description: "Lookup data", inputSchema: { type: "object" } }],
+          }),
+        )
+        .pipe(Effect.provide(fixedResponse(body)))
+
+      expect(response.events).toEqual([
+        { type: "tool-input-delta", id: "call_1", name: "lookup", text: '{"query"' },
+        { type: "tool-input-delta", id: "call_1", name: "lookup", text: ':"weather"}' },
+        { type: "tool-call", id: "call_1", name: "lookup", input: { query: "weather" } },
+        {
+          type: "request-finish",
+          reason: "stop",
+          usage: { inputTokens: 5, outputTokens: 1, totalTokens: 6, native: { input_tokens: 5, output_tokens: 1 } },
+        },
+      ])
+    }),
+  )
+
+  it.effect("decodes web_search_call as provider-executed tool-call + tool-result", () =>
+    Effect.gen(function* () {
+      const item = {
+        type: "web_search_call",
+        id: "ws_1",
+        status: "completed",
+        action: { type: "search", query: "effect 4" },
+      }
+      const body = sseEvents(
+        { type: "response.output_item.added", item },
+        { type: "response.output_item.done", item },
+        { type: "response.completed", response: { usage: { input_tokens: 5, output_tokens: 1 } } },
+      )
+      const response = yield* LLMClient.make({ adapters: [OpenAIResponses.adapter] })
+        .generate(request)
+        .pipe(Effect.provide(fixedResponse(body)))
+
+      const callsAndResults = response.events.filter((event) => event.type === "tool-call" || event.type === "tool-result")
+      expect(callsAndResults).toEqual([
+        {
+          type: "tool-call",
+          id: "ws_1",
+          name: "web_search",
+          input: { type: "search", query: "effect 4" },
+          providerExecuted: true,
+        },
+        {
+          type: "tool-result",
+          id: "ws_1",
+          name: "web_search",
+          result: { type: "json", value: item },
+          providerExecuted: true,
+        },
+      ])
+    }),
+  )
+
+  it.effect("decodes code_interpreter_call as provider-executed events with code input", () =>
+    Effect.gen(function* () {
+      const item = {
+        type: "code_interpreter_call",
+        id: "ci_1",
+        status: "completed",
+        code: "print(1+1)",
+        container_id: "cnt_xyz",
+        outputs: [{ type: "logs", logs: "2\n" }],
+      }
+      const body = sseEvents(
+        { type: "response.output_item.done", item },
+        { type: "response.completed", response: { usage: { input_tokens: 5, output_tokens: 1 } } },
+      )
+      const response = yield* LLMClient.make({ adapters: [OpenAIResponses.adapter] })
+        .generate(request)
+        .pipe(Effect.provide(fixedResponse(body)))
+
+      const toolCall = response.events.find((event) => event.type === "tool-call")
+      expect(toolCall).toEqual({
+        type: "tool-call",
+        id: "ci_1",
+        name: "code_interpreter",
+        input: { code: "print(1+1)", container_id: "cnt_xyz" },
+        providerExecuted: true,
+      })
+      const toolResult = response.events.find((event) => event.type === "tool-result")
+      expect(toolResult).toEqual({
+        type: "tool-result",
+        id: "ci_1",
+        name: "code_interpreter",
+        result: { type: "json", value: item },
+        providerExecuted: true,
+      })
+    }),
+  )
+
+  it.effect("rejects unsupported user media content", () =>
+    Effect.gen(function* () {
+      const error = yield* LLMClient.make({ adapters: [OpenAIResponses.adapter] })
+        .prepare(
+          LLM.request({
+            id: "req_media",
+            model,
+            messages: [LLM.user({ type: "media", mediaType: "image/png", data: "AAECAw==" })],
+          }),
+        )
+        .pipe(Effect.flip)
+
+      expect(error.message).toContain("OpenAI Responses user messages only support text content for now")
+    }),
+  )
+
+  it.effect("emits provider-error events for mid-stream provider errors", () =>
+    Effect.gen(function* () {
+      const response = yield* LLMClient.make({ adapters: [OpenAIResponses.adapter] })
+        .generate(request)
+        .pipe(
+          Effect.provide(
+            fixedResponse(sseEvents({ type: "error", code: "rate_limit_exceeded", message: "Slow down" })),
+          ),
+        )
+
+      expect(response.events).toEqual([{ type: "provider-error", message: "Slow down" }])
+    }),
+  )
+
+  it.effect("falls back to error code when no message is present", () =>
+    Effect.gen(function* () {
+      const response = yield* LLMClient.make({ adapters: [OpenAIResponses.adapter] })
+        .generate(request)
+        .pipe(Effect.provide(fixedResponse(sseEvents({ type: "error", code: "internal_error" }))))
+
+      expect(response.events).toEqual([{ type: "provider-error", message: "internal_error" }])
+    }),
+  )
+
+  it.effect("fails HTTP provider errors before stream parsing", () =>
+    Effect.gen(function* () {
+      const error = yield* LLMClient.make({ adapters: [OpenAIResponses.adapter] })
+        .generate(request)
+        .pipe(
+          Effect.provide(
+            fixedResponse('{"error":{"type":"invalid_request_error","message":"Bad request"}}', {
+              status: 400,
+              headers: { "content-type": "application/json" },
+            }),
+          ),
+          Effect.flip,
+        )
+
+      expect(error).toBeInstanceOf(ProviderRequestError)
+      expect(error).toMatchObject({ status: 400 })
+      expect(error.message).toContain("HTTP 400")
+    }),
+  )
+})
--- a/packages/llm/test/recorded-scenarios.ts
+++ b/packages/llm/test/recorded-scenarios.ts
@@ -0,0 +1,68 @@
+import { expect } from "bun:test"
+import { Effect, Schema } from "effect"
+import { LLM, type LLMEvent, type LLMResponse, type ModelRef } from "../src"
+import { tool } from "../src/tool"
+
+export const weatherToolName = "get_weather"
+
+export const weatherTool = LLM.toolDefinition({
+  name: weatherToolName,
+  description: "Get current weather for a city.",
+  inputSchema: {
+    type: "object",
+    properties: { city: { type: "string" } },
+    required: ["city"],
+    additionalProperties: false,
+  },
+})
+
+export const weatherRuntimeTool = tool({
+  description: weatherTool.description,
+  parameters: Schema.Struct({ city: Schema.String }),
+  success: Schema.Struct({ temperature: Schema.Number, condition: Schema.String }),
+  execute: ({ city }) =>
+    Effect.succeed(
+      city === "Paris"
+        ? { temperature: 22, condition: "sunny" }
+        : { temperature: 0, condition: "unknown" },
+    ),
+})
+
+export const textRequest = (input: {
+  readonly id: string
+  readonly model: ModelRef
+  readonly prompt?: string
+  readonly maxTokens?: number
+}) =>
+  LLM.request({
+    id: input.id,
+    model: input.model,
+    system: "You are concise.",
+    prompt: input.prompt ?? "Reply with exactly: Hello!",
+    generation: { maxTokens: input.maxTokens ?? 20, temperature: 0 },
+  })
+
+export const weatherToolRequest = (input: {
+  readonly id: string
+  readonly model: ModelRef
+  readonly maxTokens?: number
+}) =>
+  LLM.request({
+    id: input.id,
+    model: input.model,
+    system: "Call tools exactly as requested.",
+    prompt: "Call get_weather with city exactly Paris.",
+    tools: [weatherTool],
+    toolChoice: LLM.toolChoice(weatherTool),
+    generation: { maxTokens: input.maxTokens ?? 80, temperature: 0 },
+  })
+
+export const expectFinish = (
+  events: ReadonlyArray<LLMEvent>,
+  reason: Extract<LLMEvent, { readonly type: "request-finish" }>["reason"],
+) => expect(events.at(-1)).toMatchObject({ type: "request-finish", reason })
+
+export const expectWeatherToolCall = (response: LLMResponse) =>
+  expect(LLM.outputToolCalls(response)).toEqual([
+    { type: "tool-call", id: expect.any(String), name: weatherToolName, input: { city: "Paris" } },
+  ])
--- a/packages/llm/test/recorded-test.ts
+++ b/packages/llm/test/recorded-test.ts
@@ -0,0 +1,163 @@
+import { HttpRecorder } from "@opencode-ai/http-recorder"
+import { test, type TestOptions } from "bun:test"
+import { Effect, Layer } from "effect"
+import * as path from "node:path"
+import { fileURLToPath } from "node:url"
+import { RequestExecutor } from "../src/executor"
+import { testEffect } from "./lib/effect"
+
+const __dirname = path.dirname(fileURLToPath(import.meta.url))
+const FIXTURES_DIR = path.resolve(__dirname, "fixtures", "recordings")
+
+type Body<A, E, R> = Effect.Effect<A, E, R> | (() => Effect.Effect<A, E, R>)
+
+type RecordedTestsOptions = {
+  readonly prefix: string
+  readonly provider?: string
+  readonly protocol?: string
+  readonly requires?: ReadonlyArray<string>
+  readonly options?: HttpRecorder.RecordReplayOptions
+  readonly tags?: ReadonlyArray<string>
+}
+
+type RecordedCaseOptions = {
+  readonly cassette?: string
+  readonly id?: string
+  readonly provider?: string
+  readonly protocol?: string
+  readonly requires?: ReadonlyArray<string>
+  readonly options?: HttpRecorder.RecordReplayOptions
+  readonly tags?: ReadonlyArray<string>
+}
+
+const kebab = (value: string) =>
+  value
+    .trim()
+    .replace(/['"]/g, "")
+    .replace(/[^a-zA-Z0-9]+/g, "-")
+    .replace(/^-|-$/g, "")
+    .toLowerCase()
+
+const missingEnv = (names: ReadonlyArray<string>) => names.filter((name) => !process.env[name])
+
+const envList = (name: string) =>
+  (process.env[name] ?? "")
+    .split(",")
+    .map((item) => item.trim().toLowerCase())
+    .filter((item) => item !== "")
+
+const unique = (items: ReadonlyArray<string>) => Array.from(new Set(items))
+
+const classifiedTags = (input: {
+  readonly prefix?: string
+  readonly provider?: string
+  readonly protocol?: string
+  readonly tags?: ReadonlyArray<string>
+}) =>
+  unique([
+    ...(input.prefix ? [`prefix:${input.prefix}`] : []),
+    ...(input.provider ? [`provider:${input.provider}`] : []),
+    ...(input.protocol ? [`protocol:${input.protocol}`] : []),
+    ...(input.tags ?? []),
+  ])
+
+const matchesSelected = (input: {
+  readonly prefix: string
+  readonly name: string
+  readonly cassette: string
+  readonly tags: ReadonlyArray<string>
+}) => {
+  const providers = envList("RECORDED_PROVIDER")
+  const requiredTags = envList("RECORDED_TAGS")
+  const tests = envList("RECORDED_TEST")
+  const tags = input.tags.map((tag) => tag.toLowerCase())
+  const names = [input.name, kebab(input.name), input.cassette].map((item) => item.toLowerCase())
+
+  if (providers.length > 0 && !providers.some((provider) => tags.includes(`provider:${provider}`) || input.prefix.toLowerCase() === provider)) {
+    return false
+  }
+  if (requiredTags.length > 0 && !requiredTags.every((tag) => tags.includes(tag))) return false
+  if (tests.length > 0 && !tests.some((test) => names.some((name) => name.includes(test)))) return false
+  return true
+}
+
+const cassetteName = (prefix: string, name: string, options: RecordedCaseOptions) =>
+  options.cassette ?? `${prefix}/${options.id ?? kebab(name)}`
+
+const mergeOptions = (
+  base: HttpRecorder.RecordReplayOptions | undefined,
+  override: HttpRecorder.RecordReplayOptions | undefined,
+) => {
+  if (!base) return override
+  if (!override) return base
+  return {
+    ...base,
+    ...override,
+    metadata: base.metadata || override.metadata ? { ...(base.metadata ?? {}), ...(override.metadata ?? {}) } : undefined,
+  }
+}
+
+export const recordedTests = (options: RecordedTestsOptions) => {
+  // Scoped to this `recordedTests` group rather than module-global so two
+  // describe files using different prefixes don't collide and parallelization
+  // at the file level stays safe.
+  const cassettes = new Set<string>()
+
+  const run = <A, E>(
+    name: string,
+    caseOptions: RecordedCaseOptions,
+    body: Body<A, E, RequestExecutor.Service>,
+    testOptions?: number | TestOptions,
+  ) => {
+    const cassette = cassetteName(options.prefix, name, caseOptions)
+    if (cassettes.has(cassette)) throw new Error(`Duplicate recorded cassette "${cassette}"`)
+    cassettes.add(cassette)
+    const tags = unique([
+      ...classifiedTags(options),
+      ...classifiedTags({
+        provider: caseOptions.provider,
+        protocol: caseOptions.protocol,
+        tags: caseOptions.tags,
+      }),
+    ])
+
+    if (!matchesSelected({ prefix: options.prefix, name, cassette, tags })) return test.skip(name, () => {}, testOptions)
+
+    const recorderOptions = mergeOptions(options.options, caseOptions.options)
+    const layerOptions = {
+      directory: FIXTURES_DIR,
+      ...recorderOptions,
+      metadata: {
+        ...recorderOptions?.metadata,
+        tags,
+      },
+    }
+
+    if (process.env.RECORD === "true") {
+      if (missingEnv([...(options.requires ?? []), ...(caseOptions.requires ?? [])]).length > 0) {
+        return test.skip(name, () => {}, testOptions)
+      }
+    } else if (!HttpRecorder.hasCassetteSync(cassette, layerOptions)) {
+      return test.skip(name, () => {}, testOptions)
+    }
+
+    return testEffect(
+      RequestExecutor.layer.pipe(Layer.provide(HttpRecorder.cassetteLayer(cassette, layerOptions))),
+    ).live(name, body, testOptions)
+  }
+
+  const effect = <A, E>(
+    name: string,
+    body: Body<A, E, RequestExecutor.Service>,
+    testOptions?: number | TestOptions,
+  ) => run(name, {}, body, testOptions)
+
+  effect.with = <A, E>(
+    name: string,
+    caseOptions: RecordedCaseOptions,
+    body: Body<A, E, RequestExecutor.Service>,
+    testOptions?: number | TestOptions,
+  ) => run(name, caseOptions, body, testOptions)
+
+  return { effect }
+}
--- a/packages/llm/test/schema.test.ts
+++ b/packages/llm/test/schema.test.ts
@@ -0,0 +1,58 @@
+import { describe, expect, test } from "bun:test"
+import { Schema } from "effect"
+import { ContentPart, LLMEvent, LLMRequest, ModelCapabilities, ModelID, ModelLimits, ModelRef, ProviderID } from "../src/schema"
+
+const capabilities = new ModelCapabilities({
+  input: { text: true, image: false, audio: false, video: false, pdf: false },
+  output: { text: true, reasoning: false },
+  tools: { calls: true, streamingInput: true, providerExecuted: false },
+  cache: { prompt: false, messageBlocks: false, contentBlocks: false },
+  reasoning: { efforts: [], summaries: false, encryptedContent: false },
+})
+
+const model = new ModelRef({
+  id: ModelID.make("fake-model"),
+  provider: ProviderID.make("fake-provider"),
+  protocol: "openai-chat",
+  capabilities,
+  limits: new ModelLimits({}),
+})
+
+describe("llm schema", () => {
+  test("decodes a minimal request", () => {
+    const input: unknown = {
+      id: "req_1",
+      model,
+      system: [{ type: "text", text: "You are terse." }],
+      messages: [{ role: "user", content: [{ type: "text", text: "hi" }] }],
+      tools: [],
+      generation: {},
+    }
+
+    const decoded = Schema.decodeUnknownSync(LLMRequest)(input)
+
+    expect(decoded.id).toBe("req_1")
+    expect(decoded.messages[0]?.content[0]?.type).toBe("text")
+  })
+
+  test("rejects invalid protocol", () => {
+    expect(() =>
+      Schema.decodeUnknownSync(LLMRequest)({
+        model: { ...model, protocol: "bogus" },
+        system: [],
+        messages: [],
+        tools: [],
+        generation: {},
+      }),
+    ).toThrow()
+  })
+
+  test("rejects invalid event type", () => {
+    expect(() => Schema.decodeUnknownSync(LLMEvent)({ type: "bogus" })).toThrow()
+  })
+
+  test("content part tagged union exposes guards", () => {
+    expect(ContentPart.guards.text({ type: "text", text: "hi" })).toBe(true)
+    expect(ContentPart.guards.media({ type: "text", text: "hi" })).toBe(false)
+  })
+})
--- a/packages/llm/test/tool-runtime.test.ts
+++ b/packages/llm/test/tool-runtime.test.ts
@@ -0,0 +1,284 @@
+import { describe, expect } from "bun:test"
+import { Effect, Layer, Schema, Stream } from "effect"
+import { LLM, LLMEvent } from "../src"
+import { LLMClient } from "../src/adapter"
+import { RequestExecutor } from "../src/executor"
+import { OpenAIChat } from "../src/provider/openai-chat"
+import { tool, ToolFailure } from "../src/tool"
+import { ToolRuntime } from "../src/tool-runtime"
+import { testEffect } from "./lib/effect"
+import { scriptedResponses } from "./lib/http"
+import { deltaChunk, finishChunk, toolCallChunk } from "./lib/openai-chunks"
+import { sseEvents } from "./lib/sse"
+
+const model = OpenAIChat.model({
+  id: "gpt-4o-mini",
+  baseURL: "https://api.openai.test/v1/",
+  headers: { authorization: "Bearer test" },
+})
+
+const baseRequest = LLM.request({
+  id: "req_1",
+  model,
+  prompt: "Use the tool.",
+})
+
+const it = testEffect(Layer.empty)
+
+const get_weather = tool({
+  description: "Get current weather for a city.",
+  parameters: Schema.Struct({ city: Schema.String }),
+  success: Schema.Struct({ temperature: Schema.Number, condition: Schema.String }),
+  execute: ({ city }) =>
+    Effect.gen(function* () {
+      if (city === "FAIL") return yield* new ToolFailure({ message: `Weather lookup failed for ${city}` })
+      return { temperature: 22, condition: "sunny" }
+    }),
+})
+
+describe("ToolRuntime", () => {
+  it.effect("dispatches a tool call, appends results, and resumes streaming", () =>
+    Effect.gen(function* () {
+      const llm = LLMClient.make({ adapters: [OpenAIChat.adapter] })
+      const layer = scriptedResponses([
+        sseEvents(toolCallChunk("call_1", "get_weather", '{"city":"Paris"}'), finishChunk("tool_calls")),
+        sseEvents(deltaChunk({ role: "assistant", content: "It's sunny in Paris." }), finishChunk("stop")),
+      ])
+
+      const events = Array.from(
+        yield* ToolRuntime.run(llm, { request: baseRequest, tools: { get_weather } }).pipe(
+          Stream.runCollect,
+          Effect.provide(layer),
+        ),
+      )
+
+      const result = events.find(LLMEvent.is.toolResult)
+      expect(result).toMatchObject({
+        type: "tool-result",
+        id: "call_1",
+        name: "get_weather",
+        result: { type: "json", value: { temperature: 22, condition: "sunny" } },
+      })
+      expect(events.at(-1)?.type).toBe("request-finish")
+      expect(LLM.outputText({ events })).toBe("It's sunny in Paris.")
+    }),
+  )
+
+  it.effect("emits tool-error for unknown tools so the model can self-correct", () =>
+    Effect.gen(function* () {
+      const llm = LLMClient.make({ adapters: [OpenAIChat.adapter] })
+      const layer = scriptedResponses([
+        sseEvents(toolCallChunk("call_1", "missing_tool", "{}"), finishChunk("tool_calls")),
+        sseEvents(deltaChunk({ role: "assistant", content: "Sorry." }), finishChunk("stop")),
+      ])
+
+      const events = Array.from(
+        yield* ToolRuntime.run(llm, { request: baseRequest, tools: { get_weather } }).pipe(
+          Stream.runCollect,
+          Effect.provide(layer),
+        ),
+      )
+
+      const toolError = events.find(LLMEvent.is.toolError)
+      expect(toolError).toMatchObject({ type: "tool-error", id: "call_1", name: "missing_tool" })
+      expect(toolError?.message).toContain("Unknown tool")
+      expect(events.find(LLMEvent.is.toolResult)).toMatchObject({
+        type: "tool-result",
+        id: "call_1",
+        name: "missing_tool",
+        result: { type: "error", value: "Unknown tool: missing_tool" },
+      })
+    }),
+  )
+
+  it.effect("emits tool-error when the LLM input fails the parameters schema", () =>
+    Effect.gen(function* () {
+      const llm = LLMClient.make({ adapters: [OpenAIChat.adapter] })
+      const layer = scriptedResponses([
+        sseEvents(toolCallChunk("call_1", "get_weather", '{"city":42}'), finishChunk("tool_calls")),
+        sseEvents(deltaChunk({ role: "assistant", content: "Done." }), finishChunk("stop")),
+      ])
+
+      const events = Array.from(
+        yield* ToolRuntime.run(llm, { request: baseRequest, tools: { get_weather } }).pipe(
+          Stream.runCollect,
+          Effect.provide(layer),
+        ),
+      )
+
+      const toolError = events.find(LLMEvent.is.toolError)
+      expect(toolError).toMatchObject({ type: "tool-error", id: "call_1", name: "get_weather" })
+      expect(toolError?.message).toContain("Invalid tool input")
+    }),
+  )
+
+  it.effect("emits tool-error when the handler returns a ToolFailure", () =>
+    Effect.gen(function* () {
+      const llm = LLMClient.make({ adapters: [OpenAIChat.adapter] })
+      const layer = scriptedResponses([
+        sseEvents(toolCallChunk("call_1", "get_weather", '{"city":"FAIL"}'), finishChunk("tool_calls")),
+        sseEvents(deltaChunk({ role: "assistant", content: "Sorry." }), finishChunk("stop")),
+      ])
+
+      const events = Array.from(
+        yield* ToolRuntime.run(llm, { request: baseRequest, tools: { get_weather } }).pipe(
+          Stream.runCollect,
+          Effect.provide(layer),
+        ),
+      )
+
+      const toolError = events.find(LLMEvent.is.toolError)
+      expect(toolError).toMatchObject({ type: "tool-error", id: "call_1", name: "get_weather" })
+      expect(toolError?.message).toBe("Weather lookup failed for FAIL")
+    }),
+  )
+
+  it.effect("stops when the model finishes without requesting more tools", () =>
+    Effect.gen(function* () {
+      const llm = LLMClient.make({ adapters: [OpenAIChat.adapter] })
+      const layer = scriptedResponses([sseEvents(deltaChunk({ role: "assistant", content: "Done." }), finishChunk("stop"))])
+
+      const events = Array.from(
+        yield* ToolRuntime.run(llm, { request: baseRequest, tools: { get_weather } }).pipe(
+          Stream.runCollect,
+          Effect.provide(layer),
+        ),
+      )
+
+      expect(events.map((event) => event.type)).toEqual(["text-delta", "request-finish"])
+      expect(LLM.outputText({ events })).toBe("Done.")
+    }),
+  )
+
+  it.effect("respects maxSteps and stops the loop", () =>
+    Effect.gen(function* () {
+      const llm = LLMClient.make({ adapters: [OpenAIChat.adapter] })
+      // Every script entry asks for another tool call. With maxSteps: 2 the
+      // runtime should run at most two model rounds and then exit even though
+      // the model still wants to keep going.
+      const toolCallStep = sseEvents(toolCallChunk("call_x", "get_weather", '{"city":"Paris"}'), finishChunk("tool_calls"))
+      const layer = scriptedResponses([toolCallStep, toolCallStep, toolCallStep])
+
+      const events = Array.from(
+        yield* ToolRuntime.run(llm, { request: baseRequest, tools: { get_weather }, maxSteps: 2 }).pipe(
+          Stream.runCollect,
+          Effect.provide(layer),
+        ),
+      )
+
+      expect(events.filter(LLMEvent.is.requestFinish)).toHaveLength(2)
+    }),
+  )
+
+  it.effect("stops when stopWhen returns true after the first step", () =>
+    Effect.gen(function* () {
+      const llm = LLMClient.make({ adapters: [OpenAIChat.adapter] })
+      const layer = scriptedResponses([
+        sseEvents(toolCallChunk("call_1", "get_weather", '{"city":"Paris"}'), finishChunk("tool_calls")),
+        sseEvents(deltaChunk({ role: "assistant", content: "Should not run." }), finishChunk("stop")),
+      ])
+
+      const events = Array.from(
+        yield* ToolRuntime.run(llm, {
+          request: baseRequest,
+          tools: { get_weather },
+          stopWhen: (state) => state.step >= 0,
+        }).pipe(Stream.runCollect, Effect.provide(layer)),
+      )
+
+      expect(events.filter(LLMEvent.is.requestFinish)).toHaveLength(1)
+      expect(events.find(LLMEvent.is.toolResult)).toBeUndefined()
+    }),
+  )
+
+  it.effect("does not dispatch provider-executed tool calls", () =>
+    Effect.gen(function* () {
+      // Stub client emits a provider-executed tool-call followed by its
+      // tool-result and a stop. The runtime must not dispatch a handler (no
+      // tool-error for unknown name) and must not loop (no second stream).
+      let streams = 0
+      const stub: LLMClient = {
+        prepare: () => Effect.die("not used"),
+        generate: () => Effect.die("not used"),
+        stream: () => {
+          streams++
+          return Stream.fromIterable<LLMEvent>([
+            { type: "request-start", id: "req_1", model: baseRequest.model },
+            {
+              type: "tool-call",
+              id: "srvtoolu_abc",
+              name: "web_search",
+              input: { query: "x" },
+              providerExecuted: true,
+            },
+            {
+              type: "tool-result",
+              id: "srvtoolu_abc",
+              name: "web_search",
+              result: { type: "json", value: { results: [] } },
+              providerExecuted: true,
+            },
+            { type: "text-delta", text: "Done." },
+            { type: "request-finish", reason: "stop" },
+          ])
+        },
+      }
+
+      // The runtime's stream type carries `RequestExecutor.Service` because
+      // adapters use it. Our stub never executes HTTP, but the type still
+      // demands the service — provide a noop so the test compiles.
+      const noopExecutor = Layer.succeed(RequestExecutor.Service, {
+        execute: () => Effect.die("stub client never executes HTTP"),
+      })
+      const events = Array.from(
+        yield* ToolRuntime.run(stub, { request: baseRequest, tools: {} }).pipe(
+          Stream.runCollect,
+          Effect.provide(noopExecutor),
+        ),
+      )
+
+      expect(streams).toBe(1)
+      expect(events.find(LLMEvent.is.toolError)).toBeUndefined()
+      expect(events.filter(LLMEvent.is.toolCall)).toEqual([
+        {
+          type: "tool-call",
+          id: "srvtoolu_abc",
+          name: "web_search",
+          input: { query: "x" },
+          providerExecuted: true,
+        },
+      ])
+      expect(LLM.outputText({ events })).toBe("Done.")
+    }),
+  )
+
+  it.effect("dispatches multiple tool calls in one step concurrently", () =>
+    Effect.gen(function* () {
+      const llm = LLMClient.make({ adapters: [OpenAIChat.adapter] })
+      const layer = scriptedResponses([
+        sseEvents(
+          deltaChunk({
+            role: "assistant",
+            tool_calls: [
+              { index: 0, id: "c1", function: { name: "get_weather", arguments: '{"city":"Paris"}' } },
+              { index: 1, id: "c2", function: { name: "get_weather", arguments: '{"city":"Tokyo"}' } },
+            ],
+          }),
+          finishChunk("tool_calls"),
+        ),
+        sseEvents(deltaChunk({ role: "assistant", content: "Both done." }), finishChunk("stop")),
+      ])
+
+      const events = Array.from(
+        yield* ToolRuntime.run(llm, { request: baseRequest, tools: { get_weather } }).pipe(
+          Stream.runCollect,
+          Effect.provide(layer),
+        ),
+      )
+
+      const results = events.filter(LLMEvent.is.toolResult)
+      expect(results).toHaveLength(2)
+      expect(results.map((event) => event.id).toSorted()).toEqual(["c1", "c2"])
+    }),
+  )
+})
--- a/packages/llm/tsconfig.json
+++ b/packages/llm/tsconfig.json
@@ -0,0 +1,14 @@
+{
+  "$schema": "https://json.schemastore.org/tsconfig",
+  "extends": "@tsconfig/bun/tsconfig.json",
+  "compilerOptions": {
+    "noUncheckedIndexedAccess": false,
+    "plugins": [
+      {
+        "name": "@effect/language-service",
+        "transform": "@effect/language-service/transform",
+        "namespaceImportPackages": ["effect", "@effect/*"]
+      }
+    ]
+  }
+}
--- a/packages/opencode/package.json
+++ b/packages/opencode/package.json
@@ -110,6 +110,7 @@
    "@octokit/graphql": "9.0.2",
    "@octokit/rest": "catalog:",
    "@openauthjs/openauth": "catalog:",
+    "@opencode-ai/llm": "workspace:*",
    "@opencode-ai/plugin": "workspace:*",
    "@opencode-ai/script": "workspace:*",
    "@opencode-ai/sdk": "workspace:*",
--- a/packages/opencode/src/provider/llm-bridge.ts
+++ b/packages/opencode/src/provider/llm-bridge.ts
@@ -0,0 +1,143 @@
+import {
+  AmazonBedrock,
+  Anthropic,
+  Azure,
+  GitHubCopilot,
+  Google,
+  LLM,
+  OpenAI,
+  OpenAICompatibleFamily,
+  ProviderResolver,
+  ReasoningEfforts,
+  XAI,
+  type CapabilitiesInput,
+  type ModelRef,
+  type ProviderAuth,
+  type ProviderResolution,
+  type ProviderResolverShape,
+  type ReasoningEffort,
+} from "@opencode-ai/llm"
+import { isRecord } from "@/util/record"
+import type * as Provider from "./provider"
+
+type Input = {
+  readonly provider: Provider.Info
+  readonly model: Provider.Model
+}
+
+const PROVIDERS: Record<string, ProviderResolverShape> = {
+  "@ai-sdk/amazon-bedrock": AmazonBedrock.resolver,
+  "@ai-sdk/anthropic": Anthropic.resolver,
+  "@ai-sdk/azure": Azure.resolver,
+  "@ai-sdk/baseten": OpenAICompatibleFamily.resolver,
+  "@ai-sdk/cerebras": OpenAICompatibleFamily.resolver,
+  "@ai-sdk/deepinfra": OpenAICompatibleFamily.resolver,
+  "@ai-sdk/fireworks": OpenAICompatibleFamily.resolver,
+  "@ai-sdk/github-copilot": GitHubCopilot.resolver,
+  "@ai-sdk/google": Google.resolver,
+  "@ai-sdk/openai": OpenAI.resolver,
+  "@ai-sdk/openai-compatible": OpenAICompatibleFamily.resolver,
+  "@ai-sdk/togetherai": OpenAICompatibleFamily.resolver,
+  "@ai-sdk/xai": XAI.resolver,
+}
+
+const REASONING_EFFORTS = new Set<ReasoningEffort>(ReasoningEfforts)
+
+const stringOption = (options: Record<string, unknown>, key: string) => {
+  const value = options[key]
+  if (typeof value === "string" && value.trim() !== "") return value
+  return undefined
+}
+
+const recordOption = (options: Record<string, unknown>, key: string): Record<string, string> => {
+  const value = options[key]
+  if (!isRecord(value)) return {}
+  return Object.fromEntries(Object.entries(value).filter((entry): entry is [string, string] => typeof entry[1] === "string"))
+}
+
+export const resolve = (
+  input: Input,
+  options: Record<string, unknown> = { ...input.provider.options, ...input.model.options },
+): ProviderResolution | undefined =>
+  PROVIDERS[input.model.api.npm]?.resolve(ProviderResolver.input(input.model.api.id, input.model.providerID, options))
+
+const baseURL = (input: Input, resolution: ProviderResolution, options: Record<string, unknown>) => {
+  const configured = stringOption(options, "baseURL") ?? input.model.api.url
+  if (configured) return configured
+  return resolution.baseURL
+}
+
+const apiKey = (input: Input, resolution: ProviderResolution, options: Record<string, unknown>) => {
+  if (resolution.auth === "none") return undefined
+  return stringOption(options, "apiKey") ?? input.provider.key
+}
+
+const headers = (input: Input, options: Record<string, unknown>) => {
+  const result = { ...recordOption(options, "headers"), ...input.model.headers }
+  return Object.keys(result).length === 0 ? undefined : result
+}
+
+const reasoningEfforts = (input: Input) =>
+  Object.keys(input.model.variants ?? {}).filter((effort): effort is ReasoningEffort =>
+    REASONING_EFFORTS.has(effort as ReasoningEffort),
+  )
+
+const mergeCapabilities = (base: CapabilitiesInput, override: CapabilitiesInput): CapabilitiesInput => ({
+  input: { ...base.input, ...override?.input },
+  output: { ...base.output, ...override?.output },
+  tools: { ...base.tools, ...override?.tools },
+  cache: { ...base.cache, ...override?.cache },
+  reasoning: { ...base.reasoning, ...override?.reasoning },
+})
+
+const capabilities = (input: Input, resolution: ProviderResolution) => {
+  const base: CapabilitiesInput = {
+    input: {
+      text: input.model.capabilities.input.text,
+      image: input.model.capabilities.input.image,
+      audio: input.model.capabilities.input.audio,
+      video: input.model.capabilities.input.video,
+      pdf: input.model.capabilities.input.pdf,
+    },
+    output: {
+      text: input.model.capabilities.output.text,
+      reasoning: input.model.capabilities.reasoning,
+    },
+    tools: {
+      calls: input.model.capabilities.toolcall,
+      streamingInput: resolution.protocol !== "gemini" && input.model.capabilities.toolcall,
+    },
+    cache: {
+      // Both Anthropic Messages and Bedrock Converse honour positional cache
+      // markers — Anthropic via `cache_control` on content blocks, Bedrock via
+      // its `cachePoint` marker block (added to BedrockConverse in 9d7d518ac).
+      prompt: ["anthropic-messages", "bedrock-converse"].includes(resolution.protocol),
+      contentBlocks: ["anthropic-messages", "bedrock-converse"].includes(resolution.protocol),
+    },
+    reasoning: {
+      efforts: reasoningEfforts(input),
+      summaries: resolution.protocol === "openai-responses",
+      encryptedContent: resolution.protocol === "openai-responses" || resolution.protocol === "anthropic-messages",
+    },
+  }
+  return LLM.capabilities(resolution.capabilities ? mergeCapabilities(base, resolution.capabilities) : base)
+}
+
+export const toModelRef = (input: Input): ModelRef | undefined => {
+  const options = { ...input.provider.options, ...input.model.options }
+  const resolution = resolve(input, options)
+  if (!resolution) return undefined
+  return LLM.model({
+    id: input.model.api.id,
+    provider: resolution.provider,
+    protocol: resolution.protocol,
+    baseURL: baseURL(input, resolution, options),
+    apiKey: apiKey(input, resolution, options),
+    headers: headers(input, options),
+    queryParams: resolution.queryParams,
+    capabilities: capabilities(input, resolution),
+    limits: LLM.limits({ context: input.model.limit.context, output: input.model.limit.output }),
+  })
+}
+
+export * as ProviderLLMBridge from "./llm-bridge"
--- a/packages/opencode/src/provider/provider.ts
+++ b/packages/opencode/src/provider/provider.ts
@@ -25,18 +25,13 @@ import { InstanceState } from "@/effect/instance-state"
 import { AppFileSystem } from "@opencode-ai/core/filesystem"
 import { isRecord } from "@/util/record"
 import { optionalOmitUndefined, withStatics } from "@/util/schema"
+import { GitHubCopilot } from "@opencode-ai/llm/provider/github-copilot"

 import * as ProviderTransform from "./transform"
 import { ModelID, ProviderID } from "./schema"

 const log = Log.create({ service: "provider" })

-function shouldUseCopilotResponsesApi(modelID: string): boolean {
-  const match = /^gpt-(\d+)/.exec(modelID)
-  if (!match) return false
-  return Number(match[1]) >= 5 && !modelID.startsWith("gpt-5-mini")
-}
-
 function wrapSSE(res: Response, ms: number, ctl: AbortController) {
  if (typeof ms !== "number" || ms <= 0) return res
  if (!res.body) return res
@@ -193,7 +188,7 @@ function custom(dep: CustomDep): Record<string, CustomLoader> {
        autoload: false,
        async getModel(sdk: any, modelID: string, _options?: Record<string, any>) {
          if (useLanguageModel(sdk)) return sdk.languageModel(modelID)
-          return shouldUseCopilotResponsesApi(modelID) ? sdk.responses(modelID) : sdk.chat(modelID)
+          return GitHubCopilot.shouldUseResponsesApi(modelID) ? sdk.responses(modelID) : sdk.chat(modelID)
        },
        options: {},
      }),
--- a/packages/opencode/src/session/llm-native-events.ts
+++ b/packages/opencode/src/session/llm-native-events.ts
@@ -0,0 +1,206 @@
+import type { LLMEvent, ToolResultValue, Usage } from "@opencode-ai/llm"
+import type { Event as SessionEvent } from "./llm"
+
+type MapperState = {
+  readonly text: Set<string>
+  readonly reasoning: Set<string>
+  readonly toolInput: Set<string>
+  readonly toolInputs: Map<string, unknown>
+}
+
+const textID = (event: { readonly id?: string }) => event.id ?? "text"
+
+const reasoningID = (event: { readonly id?: string }) => event.id ?? "reasoning"
+
+const usage = (input: Usage | undefined) =>
+  ({
+    inputTokens: input?.inputTokens ?? 0,
+    outputTokens: input?.outputTokens ?? 0,
+    totalTokens: input?.totalTokens,
+    reasoningTokens: input?.reasoningTokens,
+    cachedInputTokens: input?.cacheReadInputTokens,
+    inputTokenDetails: {
+      noCacheTokens: Math.max(0, (input?.inputTokens ?? 0) - (input?.cacheReadInputTokens ?? 0) - (input?.cacheWriteInputTokens ?? 0)),
+      cacheReadTokens: input?.cacheReadInputTokens,
+      cacheWriteTokens: input?.cacheWriteInputTokens,
+    },
+    outputTokenDetails: {
+      textTokens: Math.max(0, (input?.outputTokens ?? 0) - (input?.reasoningTokens ?? 0)),
+      reasoningTokens: input?.reasoningTokens,
+    },
+  })
+
+const stringifyResult = (result: ToolResultValue) => {
+  if (typeof result.value === "string") return result.value
+  return JSON.stringify(result.value)
+}
+
+// Recognize the opencode `Tool.ExecuteResult` shape inside a `tool-result`
+// event's `result.value`. Native-path tool dispatchers wrap their handler
+// output in this shape so the AI-SDK-shaped session event carries the
+// real `title`, `metadata`, and `output` fields rather than the JSON
+// encoding of the whole record. Provider-executed tools (Anthropic
+// `web_search` etc.) and synthetic results that don't follow the shape
+// still go through `stringifyResult` below.
+type ExecuteShape = {
+  readonly title?: unknown
+  readonly metadata?: unknown
+  readonly output?: unknown
+}
+
+const isExecuteResult = (value: unknown): value is ExecuteShape => {
+  if (typeof value !== "object" || value === null || Array.isArray(value)) return false
+  const v = value as ExecuteShape
+  return typeof v.output === "string"
+}
+
+const toolResultOutput = (result: ToolResultValue) => {
+  if (result.type !== "json" || !isExecuteResult(result.value)) {
+    return { title: "", metadata: {}, output: stringifyResult(result) }
+  }
+  const value = result.value
+  return {
+    title: typeof value.title === "string" ? value.title : "",
+    metadata: typeof value.metadata === "object" && value.metadata !== null ? (value.metadata as Record<string, unknown>) : {},
+    output: typeof value.output === "string" ? value.output : "",
+  }
+}
+
+const response = () => ({ id: "", timestamp: new Date(0), modelId: "" })
+
+const finishReason = (reason: Extract<LLMEvent, { type: "request-finish" | "step-finish" }>["reason"]) =>
+  reason === "unknown" ? "error" : reason
+
+const closeOpenParts = (state: MapperState) => [
+  ...Array.from(state.text, (id) => ({ type: "text-end" as const, id })),
+  ...Array.from(state.reasoning, (id) => ({ type: "reasoning-end" as const, id })),
+  ...Array.from(state.toolInput, (id) => ({ type: "tool-input-end" as const, id })),
+]
+
+export const mapper = () => {
+  const state: MapperState = { text: new Set(), reasoning: new Set(), toolInput: new Set(), toolInputs: new Map() }
+
+  const startText = (id: string) => {
+    if (state.text.has(id)) return []
+    state.text.add(id)
+    return [{ type: "text-start" as const, id }]
+  }
+
+  const endText = (id: string) => {
+    if (!state.text.has(id)) return []
+    state.text.delete(id)
+    return [{ type: "text-end" as const, id }]
+  }
+
+  const startReasoning = (id: string) => {
+    if (state.reasoning.has(id)) return []
+    state.reasoning.add(id)
+    return [{ type: "reasoning-start" as const, id }]
+  }
+
+  const startToolInput = (id: string, toolName: string, providerExecuted?: boolean) => {
+    if (state.toolInput.has(id)) return []
+    state.toolInput.add(id)
+    return [{ type: "tool-input-start" as const, id, toolName, providerExecuted }]
+  }
+
+  const endToolInput = (id: string) => {
+    if (!state.toolInput.has(id)) return []
+    state.toolInput.delete(id)
+    return [{ type: "tool-input-end" as const, id }]
+  }
+
+  const finish = (event: Extract<LLMEvent, { type: "request-finish" | "step-finish" }>, includeFinal: boolean) => {
+    const reason = finishReason(event.reason)
+    const events = [
+      ...closeOpenParts(state),
+      {
+        type: "finish-step" as const,
+        finishReason: reason,
+        rawFinishReason: event.reason,
+        usage: usage(event.usage),
+        response: response(),
+        providerMetadata: undefined,
+      },
+      ...(includeFinal
+        ? [{ type: "finish" as const, finishReason: reason, rawFinishReason: event.reason, usage: usage(event.usage), totalUsage: usage(event.usage), response: response(), providerMetadata: undefined }]
+        : []),
+    ]
+    state.text.clear()
+    state.reasoning.clear()
+    state.toolInput.clear()
+    return events
+  }
+
+  const map = (event: LLMEvent): ReadonlyArray<SessionEvent> => {
+    switch (event.type) {
+      case "request-start":
+        return [{ type: "start" }]
+      case "step-start":
+        return [{ type: "start-step", request: {}, warnings: [] }]
+      case "text-start":
+        return startText(event.id)
+      case "text-delta": {
+        const id = textID(event)
+        return [...startText(id), { type: "text-delta", id, text: event.text }]
+      }
+      case "text-end":
+        return endText(event.id)
+      case "reasoning-delta": {
+        const id = reasoningID(event)
+        return [...startReasoning(id), { type: "reasoning-delta", id, text: event.text }]
+      }
+      case "tool-input-delta":
+        return [
+          ...startToolInput(event.id, event.name),
+          { type: "tool-input-delta", id: event.id, delta: event.text },
+        ]
+      case "tool-call":
+        state.toolInputs.set(event.id, event.input)
+        return [
+          ...startToolInput(event.id, event.name, event.providerExecuted),
+          ...endToolInput(event.id),
+          {
+            type: "tool-call",
+            toolCallId: event.id,
+            toolName: event.name,
+            input: event.input,
+            providerExecuted: event.providerExecuted,
+          },
+        ]
+      case "tool-result":
+        if (event.result.type === "error") {
+          return [{ type: "tool-error", toolCallId: event.id, toolName: event.name, input: state.toolInputs.get(event.id) ?? {}, error: stringifyResult(event.result) }]
+        }
+        return [
+          {
+            type: "tool-result",
+            toolCallId: event.id,
+            toolName: event.name,
+            input: state.toolInputs.get(event.id) ?? {},
+            output: toolResultOutput(event.result),
+          },
+        ]
+      case "tool-error":
+        return [{ type: "tool-error", toolCallId: event.id, toolName: event.name, input: state.toolInputs.get(event.id) ?? {}, error: event.message }]
+      case "step-finish":
+        return finish(event, false)
+      case "request-finish":
+        return finish(event, true)
+      case "provider-error":
+        return [{ type: "error", error: new Error(event.message) }]
+    }
+    return []
+  }
+
+  const flush = (): ReadonlyArray<SessionEvent> => closeOpenParts(state)
+
+  return { map, flush }
+}
+
+export const toSessionEvents = (events: Iterable<LLMEvent>) => {
+  const m = mapper()
+  return [...Array.from(events, (event) => m.map(event)).flat(), ...m.flush()]
+}
+
+export * as LLMNativeEvents from "./llm-native-events"
--- a/packages/opencode/src/session/llm-native-tools.ts
+++ b/packages/opencode/src/session/llm-native-tools.ts
@@ -0,0 +1,248 @@
+import {
+  LLM,
+  type LLMClient,
+  type LLMError,
+  type LLMEvent,
+  type LLMRequest,
+  type FinishReason,
+  type ContentPart,
+  type RequestExecutor,
+} from "@opencode-ai/llm"
+import { Cause, Deferred, Effect, FiberSet, Queue, Stream, type Scope } from "effect"
+import type { Tool, ToolExecutionOptions } from "ai"
+
+// Maximum number of model rounds before the streaming-dispatch loop stops.
+// Mirrors `ToolRuntime.run`'s default; tweak via `maxSteps` if a caller needs
+// a different ceiling.
+export const DEFAULT_MAX_STEPS = 10
+
+// What we care about from the round's events to (a) decide whether to start
+// another round and (b) build the continuation request's message history.
+interface RoundState {
+  finishReason: FinishReason | undefined
+  // Echoed back as the next round's assistant message — text deltas merged
+  // into a single text part, reasoning deltas into a single reasoning part,
+  // tool calls appended in order. Provider-executed tool results are also
+  // appended here so the provider sees the full hosted-tool round-trip.
+  assistantContent: ContentPart[]
+  // Client-side tool dispatches. One entry per `tool-call` event we forked
+  // a handler for, populated when the handler completes.
+  toolResults: Array<{ id: string; name: string; result: unknown }>
+}
+
+const appendStreamingText = (state: RoundState, type: "text" | "reasoning", text: string) => {
+  const last = state.assistantContent.at(-1)
+  if (last?.type === type) {
+    state.assistantContent[state.assistantContent.length - 1] = { ...last, text: `${last.text}${text}` }
+    return
+  }
+  state.assistantContent.push({ type, text })
+}
+
+const accumulate = (state: RoundState, event: LLMEvent) => {
+  if (event.type === "text-delta") return appendStreamingText(state, "text", event.text)
+  if (event.type === "reasoning-delta") return appendStreamingText(state, "reasoning", event.text)
+  if (event.type === "tool-call") {
+    state.assistantContent.push(
+      LLM.toolCall({
+        id: event.id,
+        name: event.name,
+        input: event.input,
+        providerExecuted: event.providerExecuted,
+      }),
+    )
+    return
+  }
+  if (event.type === "tool-result" && event.providerExecuted) {
+    state.assistantContent.push(
+      LLM.toolResult({
+        id: event.id,
+        name: event.name,
+        result: event.result,
+        providerExecuted: true,
+      }),
+    )
+    return
+  }
+  if (event.type === "request-finish") {
+    state.finishReason = event.reason
+  }
+}
+
+// Dispatch a single client-side tool call. Returns the synthetic LLMEvent
+// that should be injected back into the round's stream — either a
+// `tool-result` (success) or `tool-error` (handler threw / unknown tool).
+// Errors from the AI SDK execute handler are caught and turned into
+// `tool-error` so the round survives and the model can self-correct on
+// the next step.
+const dispatchTool = (
+  call: { readonly id: string; readonly name: string; readonly input: unknown },
+  tools: Record<string, Tool>,
+  abort: AbortSignal,
+): Effect.Effect<LLMEvent> =>
+  Effect.gen(function* () {
+    const tool = tools[call.name]
+    if (!tool || typeof tool.execute !== "function") {
+      return {
+        type: "tool-error",
+        id: call.id,
+        name: call.name,
+        message: `Unknown tool: ${call.name}`,
+      } satisfies LLMEvent
+    }
+    const options: ToolExecutionOptions = {
+      toolCallId: call.id,
+      messages: [],
+      abortSignal: abort,
+    }
+    return yield* Effect.tryPromise({
+      try: () => Promise.resolve(tool.execute!(call.input as never, options)),
+      catch: (err) => err,
+    }).pipe(
+      Effect.map(
+        (result): LLMEvent => ({
+          type: "tool-result",
+          id: call.id,
+          name: call.name,
+          result: { type: "json", value: result },
+        }),
+      ),
+      Effect.catch(
+        (err): Effect.Effect<LLMEvent> =>
+          Effect.succeed({
+            type: "tool-error",
+            id: call.id,
+            name: call.name,
+            message: err instanceof Error ? err.message : String(err),
+          }),
+      ),
+    )
+  })
+
+// Drive one model round. Streams every LLM event in real time; each
+// non-provider-executed `tool-call` event forks a dispatcher fiber that
+// pushes the resulting `tool-result` (or `tool-error`) event back into the
+// same stream as soon as the handler completes. The round ends when:
+//   1. the LLM stream completes, AND
+//   2. every forked dispatcher has finished.
+// At that point the queue is closed (consumers see end-of-stream) and
+// `done` resolves with the accumulated state so the multi-round driver can
+// decide whether to recurse.
+const runOneRound = (
+  client: LLMClient,
+  request: LLMRequest,
+  tools: Record<string, Tool>,
+  abort: AbortSignal,
+): Effect.Effect<
+  {
+    readonly events: Stream.Stream<LLMEvent, LLMError>
+    readonly done: Deferred.Deferred<RoundState>
+  },
+  never,
+  Scope.Scope | RequestExecutor.Service
+> =>
+  Effect.gen(function* () {
+    const queue = yield* Queue.unbounded<LLMEvent, LLMError | Cause.Done>()
+    const fiberSet = yield* FiberSet.make<unknown, never>()
+    const state: RoundState = { finishReason: undefined, assistantContent: [], toolResults: [] }
+    const done = yield* Deferred.make<RoundState>()
+
+    yield* Effect.forkScoped(
+      Effect.gen(function* () {
+        yield* client.stream(request).pipe(
+          Stream.runForEach((event) =>
+            Effect.gen(function* () {
+              accumulate(state, event)
+              yield* Queue.offer(queue, event)
+              if (event.type === "tool-call" && !event.providerExecuted) {
+                yield* FiberSet.run(
+                  fiberSet,
+                  dispatchTool(event, tools, abort).pipe(
+                    Effect.flatMap((resultEvent) =>
+                      Effect.gen(function* () {
+                        if (resultEvent.type === "tool-result") {
+                          state.toolResults.push({
+                            id: resultEvent.id,
+                            name: resultEvent.name,
+                            result: (resultEvent.result as { readonly value: unknown }).value,
+                          })
+                        }
+                        yield* Queue.offer(queue, resultEvent)
+                      }),
+                    ),
+                  ),
+                )
+              }
+            }),
+          ),
+          Effect.catchCause((cause) =>
+            Effect.gen(function* () {
+              yield* Queue.failCause(queue, cause)
+              yield* Deferred.succeed(done, state)
+            }),
+          ),
+        )
+        yield* FiberSet.awaitEmpty(fiberSet)
+        yield* Queue.end(queue)
+        yield* Deferred.succeed(done, state)
+      }),
+    )
+
+    return { events: Stream.fromQueue(queue), done }
+  })
+
+// Build the next round's `LLMRequest` by appending the assistant message that
+// echoes everything the round produced (text, reasoning, tool calls, hosted
+// tool results) plus a `tool` role message per dispatched result. Lowering
+// of these LLM-shaped messages back to the provider wire format is handled
+// inside the existing adapter `prepare` step.
+const continuationRequest = (request: LLMRequest, state: RoundState): LLMRequest => {
+  const assistant = LLM.message({ role: "assistant", content: state.assistantContent })
+  const toolMessages = state.toolResults.map((entry) =>
+    LLM.toolMessage({ id: entry.id, name: entry.name, result: entry.result }),
+  )
+  return LLM.updateRequest(request, {
+    messages: [...request.messages, assistant, ...toolMessages],
+  })
+}
+
+/**
+ * Run a multi-round model+tool stream with streaming dispatch within each
+ * round. As each `tool-call` event arrives, the matching AI SDK tool's
+ * `execute` runs in a forked fiber and its result is injected back into the
+ * stream as a synthetic `tool-result` event. This matches the AI SDK's
+ * `streamText` UX: long-running tools don't block subsequent tool-call
+ * streaming, and consumers see results land as they complete.
+ *
+ * Stops when the model finishes a round with anything other than
+ * `tool-calls`, when `maxSteps` is reached, or when the underlying scope is
+ * interrupted (e.g. via the abort signal).
+ */
+export const runWithTools = (input: {
+  readonly client: LLMClient
+  readonly request: LLMRequest
+  readonly tools: Record<string, Tool>
+  readonly abort: AbortSignal
+  readonly maxSteps?: number
+}): Stream.Stream<LLMEvent, LLMError, RequestExecutor.Service> => {
+  const maxSteps = input.maxSteps ?? DEFAULT_MAX_STEPS
+  const round = (request: LLMRequest, step: number): Stream.Stream<LLMEvent, LLMError, RequestExecutor.Service> =>
+    Stream.unwrap(
+      Effect.gen(function* () {
+        const { events, done } = yield* runOneRound(input.client, request, input.tools, input.abort)
+        const continuation = Stream.unwrap(
+          Effect.gen(function* () {
+            const state = yield* Deferred.await(done)
+            if (state.finishReason !== "tool-calls") return Stream.empty
+            if (state.toolResults.length === 0) return Stream.empty
+            if (step + 1 >= maxSteps) return Stream.empty
+            return round(continuationRequest(request, state), step + 1)
+          }),
+        )
+        return events.pipe(Stream.concat(continuation))
+      }),
+    )
+  return round(input.request, 0)
+}
+
+export * as LLMNativeTools from "./llm-native-tools"
--- a/packages/opencode/src/session/llm-native.ts
+++ b/packages/opencode/src/session/llm-native.ts
@@ -0,0 +1,259 @@
+import { LLM, type ContentPart, type MediaPart } from "@opencode-ai/llm"
+import { Effect, Schema } from "effect"
+import { ProviderLLMBridge } from "@/provider/llm-bridge"
+import * as EffectZod from "@/util/effect-zod"
+import type { Provider } from "@/provider/provider"
+import type { Tool } from "@/tool/tool"
+import type { MessageV2 } from "./message-v2"
+
+export class UnsupportedModelError extends Schema.TaggedErrorClass<UnsupportedModelError>()(
+  "LLMNative.UnsupportedModelError",
+  {
+    providerID: Schema.String,
+    modelID: Schema.String,
+  },
+) {
+  override get message() {
+    return `No native LLM route for ${this.providerID}/${this.modelID}`
+  }
+}
+
+export class UnsupportedContentError extends Schema.TaggedErrorClass<UnsupportedContentError>()(
+  "LLMNative.UnsupportedContentError",
+  {
+    messageID: Schema.String,
+    partType: Schema.String,
+    reason: Schema.optional(Schema.String),
+  },
+) {
+  override get message() {
+    const base = `Native LLM request conversion does not support ${this.partType} parts in message ${this.messageID}`
+    return this.reason ? `${base}: ${this.reason}` : base
+  }
+}
+
+export type RequestInput = {
+  readonly id?: string
+  readonly provider: Provider.Info
+  readonly model: Provider.Model
+  readonly system?: ReadonlyArray<string>
+  readonly messages: ReadonlyArray<MessageV2.WithParts>
+  readonly tools?: ReadonlyArray<Tool.Def>
+  readonly toolChoice?: LLM.RequestInput["toolChoice"]
+  readonly generation?: LLM.RequestInput["generation"]
+  readonly headers?: Record<string, string>
+  readonly metadata?: Record<string, unknown>
+  readonly native?: Record<string, unknown>
+}
+
+const isDefined = <T>(value: T | undefined): value is T => value !== undefined
+
+const isRecord = (value: unknown): value is Record<string, unknown> =>
+  typeof value === "object" && value !== null && !Array.isArray(value)
+
+// Match `data:<mediaType>[;param=value]*[;base64],<payload>`. Captures only the
+// payload — the bridge passes it through to `MediaPart.data` (already-base64
+// per the convention `ProviderShared.mediaBytes` follows). Non-data URLs
+// (http(s):, file:, relative paths) are out of scope for now and rejected
+// upstream so a future fetch / filesystem-read path can plug in cleanly.
+const DATA_URL_PATTERN = /^data:[^,]*,(.*)$/s
+
+const lowerFilePart = (message: MessageV2.WithParts, part: MessageV2.FilePart) =>
+  Effect.gen(function* () {
+    const match = DATA_URL_PATTERN.exec(part.url)
+    if (!match) {
+      return yield* new UnsupportedContentError({
+        messageID: message.info.id,
+        partType: "file",
+        reason: `file URL must be a data: URL (got ${part.url})`,
+      })
+    }
+    return {
+      type: "media",
+      mediaType: part.mime,
+      data: match[1],
+      filename: part.filename,
+    } satisfies MediaPart
+  })
+
+const nativeMessage = (message: MessageV2.WithParts) => ({
+  opencodeMessageID: message.info.id,
+})
+
+const providerMeta = (metadata: Record<string, unknown> | undefined) => {
+  if (!metadata) return undefined
+  const { providerExecuted: _, ...rest } = metadata
+  return Object.keys(rest).length > 0 ? rest : undefined
+}
+
+const providerExecuted = (metadata: Record<string, unknown> | undefined) =>
+  metadata?.providerExecuted === true ? true : undefined
+
+const encryptedReasoning = (metadata: Record<string, unknown> | undefined) => {
+  if (!metadata) return undefined
+  if (typeof metadata.encrypted === "string") return metadata.encrypted
+  if (isRecord(metadata.anthropic) && typeof metadata.anthropic.signature === "string") return metadata.anthropic.signature
+  if (isRecord(metadata.openai) && typeof metadata.openai.reasoningEncryptedContent === "string") {
+    return metadata.openai.reasoningEncryptedContent
+  }
+  return undefined
+}
+
+const isToolPart = (part: MessageV2.Part): part is MessageV2.ToolPart => part.type === "tool"
+
+const supportsPart = (message: MessageV2.WithParts, part: MessageV2.Part) => {
+  if (part.type === "text") return true
+  if (part.type === "file") return message.info.role === "user"
+  if (message.info.role !== "assistant") return false
+  return part.type === "reasoning" || part.type === "tool"
+}
+
+const unsupportedPart = (input: RequestInput) =>
+  input.messages
+    .flatMap((message) => message.parts.map((part) => ({ message, part })))
+    .find((entry) => !supportsPart(entry.message, entry.part))
+
+const toolResultValue = (part: MessageV2.ToolPart) => {
+  if (part.state.status === "completed") {
+    return {
+      type: "text" as const,
+      value: part.state.time.compacted ? "[Old tool result content cleared]" : part.state.output,
+    }
+  }
+  if (part.state.status === "error") {
+    const output = part.state.metadata?.interrupted === true ? part.state.metadata.output : undefined
+    if (typeof output === "string") return { type: "text" as const, value: output }
+    return { type: "error" as const, value: part.state.error }
+  }
+  return { type: "error" as const, value: "[Tool execution was interrupted]" }
+}
+
+const assistantContent = (part: MessageV2.Part): ReadonlyArray<ContentPart> => {
+  if (part.type === "text" && !part.ignored) return [LLM.text(part.text)]
+  if (part.type === "reasoning") return [{ type: "reasoning", text: part.text, encrypted: encryptedReasoning(part.metadata), metadata: part.metadata }]
+  if (part.type !== "tool") return []
+
+  return [
+    LLM.toolCall({
+      id: part.callID,
+      name: part.tool,
+      input: part.state.input,
+      providerExecuted: providerExecuted(part.metadata),
+      metadata: providerMeta(part.metadata),
+    }),
+    ...(providerExecuted(part.metadata) ? [toolResultPart(part)] : []),
+  ]
+}
+
+const toolResultMessage = (part: MessageV2.ToolPart) =>
+  LLM.toolMessage({
+    id: part.callID,
+    name: part.tool,
+    result: toolResultValue(part),
+    providerExecuted: providerExecuted(part.metadata),
+    metadata: providerMeta(part.metadata),
+  })
+
+const toolResultPart = (part: MessageV2.ToolPart) =>
+  LLM.toolResult({
+    id: part.callID,
+    name: part.tool,
+    result: toolResultValue(part),
+    providerExecuted: true,
+    metadata: providerMeta(part.metadata),
+  })
+
+const assistantMessages = (input: MessageV2.WithParts) => {
+  const content = input.parts.flatMap(assistantContent)
+  const assistant = content.length
+    ? LLM.message({
+        id: input.info.id,
+        role: "assistant",
+        content,
+        native: nativeMessage(input),
+      })
+    : undefined
+
+  return [
+    assistant,
+    ...input.parts.filter(isToolPart).filter((part) => !providerExecuted(part.metadata)).map(toolResultMessage),
+  ].filter(isDefined)
+}
+
+// User-role parts that pass the static gate: text and file. Text becomes a
+// `LLM.text(...)` ContentPart; file becomes a `MediaPart` via `lowerFilePart`,
+// which can yield `UnsupportedContentError` for non-data URLs.
+const lowerUserPart = (message: MessageV2.WithParts, part: MessageV2.Part) =>
+  Effect.gen(function* () {
+    if (part.type === "text") return part.ignored ? [] : [LLM.text(part.text)]
+    if (part.type === "file") return [yield* lowerFilePart(message, part)]
+    return []
+  })
+
+const userMessage = Effect.fnUntraced(function* (input: MessageV2.WithParts) {
+  const content: ContentPart[] = []
+  for (const part of input.parts) {
+    content.push(...(yield* lowerUserPart(input, part)))
+  }
+  if (content.length === 0) return []
+  return [
+    LLM.message({
+      id: input.info.id,
+      role: input.info.role,
+      content,
+      native: nativeMessage(input),
+    }),
+  ]
+})
+
+const lowerMessage = Effect.fnUntraced(function* (input: MessageV2.WithParts) {
+  if (input.info.role === "assistant") return assistantMessages(input)
+  return yield* userMessage(input)
+})
+
+export const toolDefinition = (input: { readonly model: Provider.Model; readonly tool: Tool.Def }) =>
+  LLM.toolDefinition({
+    name: input.tool.id,
+    description: input.tool.description,
+    inputSchema: EffectZod.toJsonSchema(input.tool.parameters),
+    native: {
+      opencodeToolID: input.tool.id,
+    },
+  })
+
+export const request = Effect.fn("LLMNative.request")(function* (input: RequestInput) {
+  const unsupported = unsupportedPart(input)
+  if (unsupported) {
+    return yield* new UnsupportedContentError({
+      messageID: unsupported.message.info.id,
+      partType: unsupported.part.type,
+    })
+  }
+
+  const model = ProviderLLMBridge.toModelRef({ provider: input.provider, model: input.model })
+  if (!model) {
+    return yield* new UnsupportedModelError({
+      providerID: input.provider.id,
+      modelID: input.model.id,
+    })
+  }
+  const headers = { ...model.headers, ...input.headers }
+  const requestModel = Object.keys(headers).length === 0 ? model : LLM.model({ ...model, headers })
+  // Cache hints, tool-id scrubbing, and other adapter-aware patches live in
+  // `@opencode-ai/llm`'s `ProviderPatch` registry. Callers wire them in at
+  // `client({ adapters, patches: ProviderPatch.defaults })` time so the
+  // bridge stays focused on shape conversion.
+  return LLM.request({
+    id: input.id,
+    model: requestModel,
+    system: input.system?.filter((part) => part.trim() !== "").map(LLM.system) ?? [],
+    messages: (yield* Effect.forEach(input.messages, lowerMessage)).flat(),
+    tools: input.tools?.map((tool) => toolDefinition({ model: input.model, tool })) ?? [],
+    toolChoice: input.toolChoice,
+    generation: input.generation,
+    metadata: input.metadata,
+    native: input.native,
+  })
+})
+
+export * as LLMNative from "./llm-native"
--- a/packages/opencode/src/session/llm.ts
+++ b/packages/opencode/src/session/llm.ts
@@ -3,13 +3,28 @@ import * as Log from "@opencode-ai/core/util/log"
 import { Context, Effect, Layer, Record } from "effect"
 import * as Stream from "effect/Stream"
 import { streamText, wrapLanguageModel, type ModelMessage, type Tool, tool, jsonSchema } from "ai"
+import type { LanguageModelV3 } from "@ai-sdk/provider"
 import { mergeDeep } from "remeda"
 import { GitLabWorkflowLanguageModel } from "gitlab-ai-provider"
+import {
+  AnthropicMessages,
+  BedrockConverse,
+  Gemini,
+  LLMClient,
+  OpenAIChat,
+  OpenAICompatibleChat,
+  OpenAIResponses,
+  ProviderPatch,
+  RequestExecutor,
+  type ProtocolID,
+} from "@opencode-ai/llm"
 import { ProviderTransform } from "@/provider/transform"
 import { Config } from "@/config/config"
 import { InstanceState } from "@/effect/instance-state"
 import type { Agent } from "@/agent/agent"
 import type { MessageV2 } from "./message-v2"
+// Aliased to avoid a name clash with the AI SDK `Tool` type imported above.
+import type { Tool as OpenCodeTool } from "@/tool/tool"
 import { Plugin } from "@/plugin"
 import { SystemPrompt } from "./system"
 import { Flag } from "@opencode-ai/core/flag/flag"
@@ -19,16 +34,36 @@ import { Bus } from "@/bus"
 import { Wildcard } from "@/util/wildcard"
 import { SessionID } from "@/session/schema"
 import { Auth } from "@/auth"
-import { Installation } from "@/installation"
 import { InstallationVersion } from "@opencode-ai/core/installation/version"
 import { EffectBridge } from "@/effect/bridge"
 import * as Option from "effect/Option"
 import * as OtelTracer from "@effect/opentelemetry/Tracer"
+import { LLMNative } from "./llm-native"
+import { LLMNativeEvents } from "./llm-native-events"
+import { LLMNativeTools } from "./llm-native-tools"

 const log = Log.create({ service: "llm" })
 export const OUTPUT_TOKEN_MAX = ProviderTransform.OUTPUT_TOKEN_MAX
 type Result = Awaited<ReturnType<typeof streamText>>

+type PreparedStream = {
+  readonly language: LanguageModelV3
+  readonly cfg: Config.Info
+  readonly item: Provider.Info
+  readonly system: string[]
+  readonly options: Record<string, any>
+  readonly messages: ModelMessage[]
+  readonly params: {
+    readonly temperature?: number
+    readonly topP?: number
+    readonly topK?: number
+    readonly maxOutputTokens?: number
+    readonly options: Record<string, any>
+  }
+  readonly headers: Record<string, string>
+  readonly tools: Record<string, Tool>
+}
+
 // Avoid re-instantiating remeda's deep merge types in this hot LLM path; the runtime behavior is still mergeDeep.
 const mergeOptions = (target: Record<string, any>, source: Record<string, any> | undefined): Record<string, any> =>
  mergeDeep(target, source ?? {}) as Record<string, any>
@@ -46,6 +81,12 @@ export type StreamInput = {
  tools: Record<string, Tool>
  retries?: number
  toolChoice?: "auto" | "required" | "none"
+  nativeMessages?: ReadonlyArray<MessageV2.WithParts>
+  // OpenCode-native `Tool.Def[]` parallel to `tools` (AI SDK shape). When
+  // populated alongside `tools`, the LLM-native path forwards definitions to
+  // the model and can dispatch multi-round tool loops without changing the
+  // existing AI SDK path.
+  nativeTools?: ReadonlyArray<OpenCodeTool.Def>
 }

 export type StreamRequest = StreamInput & {
@@ -63,7 +104,12 @@ export class Service extends Context.Service<Service, Interface>()("@opencode/LL
 const live: Layer.Layer<
  Service,
  never,
-  Auth.Service | Config.Service | Provider.Service | Plugin.Service | Permission.Service
+  | Auth.Service
+  | Config.Service
+  | Provider.Service
+  | Plugin.Service
+  | Permission.Service
+  | RequestExecutor.Service
 > = Layer.effect(
  Service,
  Effect.gen(function* () {
@@ -72,21 +118,13 @@ const live: Layer.Layer<
    const provider = yield* Provider.Service
    const plugin = yield* Plugin.Service
    const perm = yield* Permission.Service
+    // Required by the LLM-native stream path. The default layer wires it on
+    // top of `FetchHttpClient.layer`. Yielded here (not inside `runNative`)
+    // so the executor instance is shared across every native stream the
+    // service hands out.
+    const executor = yield* RequestExecutor.Service

-    const run = Effect.fn("LLM.run")(function* (input: StreamRequest) {
-      const l = log
-        .clone()
-        .tag("providerID", input.model.providerID)
-        .tag("modelID", input.model.id)
-        .tag("session.id", input.sessionID)
-        .tag("small", (input.small ?? false).toString())
-        .tag("agent", input.agent.name)
-        .tag("mode", input.agent.mode)
-      l.info("stream", {
-        modelID: input.model.id,
-        providerID: input.model.providerID,
-      })
-
+    const prepare = Effect.fn("LLM.prepareStream")(function* (input: StreamRequest) {
      const [language, cfg, item, info] = yield* Effect.all(
        [
          provider.getLanguage(input.model),
@@ -226,24 +264,65 @@ const live: Layer.Layer<
        })
      }

+      return { language, cfg, item, system, options, messages, params, headers, tools } satisfies PreparedStream
+    })
+
+    const transportHeaders = Effect.fn("LLM.transportHeaders")(function* (
+      input: StreamRequest,
+      headers: Record<string, string>,
+    ) {
+      if (input.model.providerID.startsWith("opencode")) {
+        return {
+          "x-opencode-project": (yield* InstanceState.context).project.id,
+          "x-opencode-session": input.sessionID,
+          "x-opencode-request": input.user.id,
+          "x-opencode-client": Flag.OPENCODE_CLIENT,
+          "User-Agent": `opencode/${InstallationVersion}`,
+          ...input.model.headers,
+          ...headers,
+        }
+      }
+      return {
+        "x-session-affinity": input.sessionID,
+        ...(input.parentSessionID ? { "x-parent-session-id": input.parentSessionID } : {}),
+        "User-Agent": `opencode/${InstallationVersion}`,
+        ...input.model.headers,
+        ...headers,
+      }
+    })
+
+    const run = Effect.fn("LLM.run")(function* (input: StreamRequest, prepared: PreparedStream) {
+      const l = log
+        .clone()
+        .tag("providerID", input.model.providerID)
+        .tag("modelID", input.model.id)
+        .tag("session.id", input.sessionID)
+        .tag("small", (input.small ?? false).toString())
+        .tag("agent", input.agent.name)
+        .tag("mode", input.agent.mode)
+      l.info("stream", {
+        modelID: input.model.id,
+        providerID: input.model.providerID,
+      })
+
      // Wire up toolExecutor for DWS workflow models so that tool calls
      // from the workflow service are executed via opencode's tool system
      // and results sent back over the WebSocket.
-      if (language instanceof GitLabWorkflowLanguageModel) {
-        const workflowModel = language as GitLabWorkflowLanguageModel & {
+      if (prepared.language instanceof GitLabWorkflowLanguageModel) {
+        const workflowModel: GitLabWorkflowLanguageModel & {
          sessionID?: string
          sessionPreapprovedTools?: string[]
-          approvalHandler?: (approvalTools: { name: string; args: string }[]) => Promise<{ approved: boolean }>
-        }
+          approvalHandler?: ((approvalTools: { name: string; args: string }[]) => Promise<{ approved: boolean; message?: string }>) | null
+        } = prepared.language
        workflowModel.sessionID = input.sessionID
-        workflowModel.systemPrompt = system.join("\n")
+        workflowModel.systemPrompt = prepared.system.join("\n")
        workflowModel.toolExecutor = async (toolName, argsJson, _requestID) => {
-          const t = tools[toolName]
+          const t = prepared.tools[toolName]
          if (!t || !t.execute) {
            return { result: "", error: `Unknown tool: ${toolName}` }
          }
          try {
-            const result = await t.execute!(JSON.parse(argsJson), {
+            const result = await t.execute(JSON.parse(argsJson), {
              toolCallId: _requestID,
              messages: input.messages,
              abortSignal: input.abort,
@@ -260,7 +339,7 @@ const live: Layer.Layer<
        }

        const ruleset = Permission.merge(input.agent.permission ?? [], input.permission ?? [])
-        workflowModel.sessionPreapprovedTools = Object.keys(tools).filter((name) => {
+        workflowModel.sessionPreapprovedTools = Object.keys(prepared.tools).filter((name) => {
          const match = ruleset.findLast((rule) => Wildcard.match(name, rule.permission))
          return !match || match.action !== "ask"
        })
@@ -283,8 +362,13 @@ const live: Layer.Layer<
            })
            const toolPatterns = approvalTools.map((t: { name: string; args: string }) => {
              try {
-                const parsed = JSON.parse(t.args) as Record<string, unknown>
-                const title = (parsed?.title ?? parsed?.name ?? "") as string
+                const parsed = JSON.parse(t.args) as unknown
+                const value = typeof parsed === "object" && parsed !== null && !Array.isArray(parsed) ? parsed : {}
+                const title = "title" in value && typeof value.title === "string"
+                  ? value.title
+                  : "name" in value && typeof value.name === "string"
+                    ? value.name
+                    : ""
                return title ? `${t.name}: ${title}` : t.name
              } catch {
                return t.name
@@ -313,7 +397,7 @@ const live: Layer.Layer<
        })
      }

-      const tracer = cfg.experimental?.openTelemetry
+      const tracer = prepared.cfg.experimental?.openTelemetry
        ? Option.getOrUndefined(yield* Effect.serviceOption(OtelTracer.OtelTracer))
        : undefined
      const telemetryTracer = tracer
@@ -329,10 +413,6 @@ const live: Layer.Layer<
          })
        : undefined

-      const opencodeProjectID = input.model.providerID.startsWith("opencode")
-        ? (yield* InstanceState.context).project.id
-        : undefined
-
      return streamText({
        onError(error) {
          l.error("stream error", {
@@ -341,7 +421,7 @@ const live: Layer.Layer<
        },
        async experimental_repairToolCall(failed) {
          const lower = failed.toolCall.toolName.toLowerCase()
-          if (lower !== failed.toolCall.toolName && tools[lower]) {
+          if (lower !== failed.toolCall.toolName && prepared.tools[lower]) {
            l.info("repairing tool call", {
              tool: failed.toolCall.toolName,
              repaired: lower,
@@ -360,43 +440,27 @@ const live: Layer.Layer<
            toolName: "invalid",
          }
        },
-        temperature: params.temperature,
-        topP: params.topP,
-        topK: params.topK,
-        providerOptions: ProviderTransform.providerOptions(input.model, params.options),
-        activeTools: Object.keys(tools).filter((x) => x !== "invalid"),
-        tools,
+        temperature: prepared.params.temperature,
+        topP: prepared.params.topP,
+        topK: prepared.params.topK,
+        providerOptions: ProviderTransform.providerOptions(input.model, prepared.params.options),
+        activeTools: Object.keys(prepared.tools).filter((x) => x !== "invalid"),
+        tools: prepared.tools,
        toolChoice: input.toolChoice,
-        maxOutputTokens: params.maxOutputTokens,
+        maxOutputTokens: prepared.params.maxOutputTokens,
        abortSignal: input.abort,
-        headers: {
-          ...(input.model.providerID.startsWith("opencode")
-            ? {
-                "x-opencode-project": opencodeProjectID,
-                "x-opencode-session": input.sessionID,
-                "x-opencode-request": input.user.id,
-                "x-opencode-client": Flag.OPENCODE_CLIENT,
-                "User-Agent": `opencode/${InstallationVersion}`,
-              }
-            : {
-                "x-session-affinity": input.sessionID,
-                ...(input.parentSessionID ? { "x-parent-session-id": input.parentSessionID } : {}),
-                "User-Agent": `opencode/${InstallationVersion}`,
-              }),
-          ...input.model.headers,
-          ...headers,
-        },
+        headers: yield* transportHeaders(input, prepared.headers),
        maxRetries: input.retries ?? 0,
-        messages,
+        messages: prepared.messages,
        model: wrapLanguageModel({
-          model: language,
+          model: prepared.language,
          middleware: [
            {
              specificationVersion: "v3" as const,
              async transformParams(args) {
                if (args.type === "stream") {
                  // @ts-expect-error
-                  args.params.prompt = ProviderTransform.message(args.params.prompt, input.model, options)
+                  args.params.prompt = ProviderTransform.message(args.params.prompt, input.model, prepared.options)
                }
                return args.params
              },
@@ -404,17 +468,150 @@ const live: Layer.Layer<
          ],
        }),
        experimental_telemetry: {
-          isEnabled: cfg.experimental?.openTelemetry,
+          isEnabled: prepared.cfg.experimental?.openTelemetry,
          functionId: "session.llm",
          tracer: telemetryTracer,
          metadata: {
-            userId: cfg.username ?? "unknown",
+            userId: prepared.cfg.username ?? "unknown",
            sessionId: input.sessionID,
          },
        },
      })
    })

+    // ----- LLM-native opt-in path -----
+    //
+    // `runNative` returns the session-shaped Stream when (and only when) the
+    // request matches the narrow opt-in profile we've actively wired:
+    //
+    //   - The flag `OPENCODE_EXPERIMENTAL_LLM_NATIVE` is set.
+    //   - The caller populated `input.nativeMessages` with `MessageV2.WithParts`
+    //     (the AI SDK `messages` array isn't enough — the LLM-native bridge
+    //     needs the typed parts).
+    //   - The bridge can route the model to one of the protocols listed in
+    //     `NATIVE_PROTOCOLS` (today: Anthropic only).
+    //   - If tools are present, the caller supplied a native tool definition
+    //     for every AI SDK tool key so the native path can dispatch them.
+    //
+    // Otherwise it returns `undefined` and the caller falls through to the
+    // existing AI SDK path. The return shape is deliberately narrow — we are
+    // not yet committed to native-by-default for any provider.
+    const NATIVE_PROTOCOLS = new Set<ProtocolID>(["anthropic-messages"])
+    const NATIVE_ADAPTERS = [
+      AnthropicMessages.adapter,
+      OpenAIChat.adapter,
+      OpenAIResponses.adapter,
+      Gemini.adapter,
+      OpenAICompatibleChat.adapter,
+      BedrockConverse.adapter,
+    ]
+
+    const nativeClient = LLMClient.make({
+      adapters: NATIVE_ADAPTERS,
+      patches: ProviderPatch.defaults,
+    })
+
+    const runNative = Effect.fn("LLM.runNative")(function* (input: StreamRequest, prepared: PreparedStream) {
+      if (!Flag.OPENCODE_EXPERIMENTAL_LLM_NATIVE) return undefined
+      if (!input.nativeMessages || input.nativeMessages.length === 0) return undefined
+      if (input.retries && input.retries > 0) return undefined
+      if (prepared.cfg.experimental?.openTelemetry) return undefined
+      // The native core does not yet carry AI SDK providerOptions. If request
+      // preparation produced any, keep exact behavior by falling back.
+      if (Object.keys(prepared.params.options).length > 0) return undefined
+      // The native dispatcher needs a `Tool.Def` for every AI SDK tool key
+      // the model might call. Two failure modes the gate has to catch:
+      //
+      //   1. AI SDK tools present but `nativeTools` undefined / empty —
+      //      caller didn't (or couldn't) supply native shapes.
+      //   2. AI SDK tools include a key that's missing from `nativeTools` —
+      //      coverage gap. Today this happens with MCP tools (only AI SDK
+      //      shape) and the synthesized `StructuredOutput` tool. The
+      //      `prompt.ts:resolveTools` call sets `nativeTools: undefined` in
+      //      both cases, but check defensively in case a future caller
+      //      passes a partial set.
+      //
+      // Either way fall through so the session takes the AI SDK path
+      // unchanged.
+      const aiToolKeys = Object.keys(prepared.tools)
+      if (aiToolKeys.length > 0) {
+        if (input.nativeTools === undefined || input.nativeTools.length === 0) return undefined
+        const nativeIDs = new Set(input.nativeTools.map((tool) => tool.id))
+        for (const key of aiToolKeys) {
+          if (!nativeIDs.has(key)) return undefined
+        }
+      }
+
+      // Mirror the AI SDK path's permission/user-disabled filter for both
+      // the AI SDK record (used as the dispatch table) and the native tool
+      // definitions (sent to the model). Without this, the model would see
+      // tools that the session has actively disabled.
+      const filteredAITools = prepared.tools
+      const allowedIds = new Set(Object.keys(filteredAITools))
+      const filteredNativeTools = input.nativeTools?.filter((tool) => allowedIds.has(tool.id))
+
+      const llmRequest = yield* LLMNative.request({
+        id: input.user.id,
+        provider: prepared.item,
+        model: input.model,
+        system: prepared.system,
+        messages: input.nativeMessages,
+        tools: filteredNativeTools,
+        toolChoice: input.toolChoice,
+        generation: {
+          maxTokens: prepared.params.maxOutputTokens,
+          temperature: prepared.params.temperature,
+          topP: prepared.params.topP,
+        },
+        headers: yield* transportHeaders(input, prepared.headers),
+      }).pipe(
+        Effect.catchTag("LLMNative.UnsupportedModelError", () => Effect.void),
+        Effect.catchTag("LLMNative.UnsupportedContentError", () => Effect.void),
+      )
+      if (!llmRequest) return undefined
+      if (!NATIVE_PROTOCOLS.has(llmRequest.model.protocol)) return undefined
+
+      log.info("native stream", {
+        sessionID: input.sessionID,
+        modelID: input.model.id,
+        providerID: input.model.providerID,
+        protocol: llmRequest.model.protocol,
+      })
+
+      // Stateful LLMEvent → SessionEvent translator. `map.map(event)` is called
+      // per-element, `map.flush()` emits the remaining `*-end` events for any
+      // text/reasoning/tool-input parts left open at stream close. The flush
+      // stream is built lazily (`Stream.unwrap(Effect.sync(...))`) so it
+      // observes the mapper's final state after `flatMap` has consumed every
+      // upstream event.
+      //
+      // The upstream source is one of two paths:
+      //
+      //   - When `nativeTools` is unset (zero-tool sessions), call the LLM
+      //     client directly. One model round, single stream, no dispatch.
+      //   - When `nativeTools` is set, hand both the request and the matching
+      //     AI SDK `tools` record to `LLMNativeTools.runWithTools`, which
+      //     drives the multi-round loop with streaming dispatch: each
+      //     `tool-call` event forks a tool handler fiber, and the
+      //     handler's result is injected back into the same stream as a
+      //     synthetic `tool-result` event. Long-running tools don't block
+      //     subsequent tool-call streaming.
+      const map = LLMNativeEvents.mapper()
+      const upstream = filteredNativeTools && filteredNativeTools.length > 0
+        ? LLMNativeTools.runWithTools({
+            client: nativeClient,
+            request: llmRequest,
+            tools: filteredAITools,
+            abort: input.abort,
+          })
+        : nativeClient.stream(llmRequest)
+      return upstream.pipe(
+        Stream.flatMap((event) => Stream.fromIterable(map.map(event))),
+        Stream.concat(Stream.unwrap(Effect.sync(() => Stream.fromIterable(map.flush())))),
+        Stream.provideService(RequestExecutor.Service, executor),
+      )
+    })
+
    const stream: Interface["stream"] = (input) =>
      Stream.scoped(
        Stream.unwrap(
@@ -424,7 +621,13 @@ const live: Layer.Layer<
              (ctrl) => Effect.sync(() => ctrl.abort()),
            )

-            const result = yield* run({ ...input, abort: ctrl.signal })
+            const request = { ...input, abort: ctrl.signal }
+            const prepared = yield* prepare(request)
+
+            const native = yield* runNative(request, prepared)
+            if (native) return native
+
+            const result = yield* run(request, prepared)

            return Stream.fromAsyncIterable(result.fullStream, (e) => (e instanceof Error ? e : new Error(String(e))))
          }),
@@ -443,6 +646,7 @@ export const defaultLayer = Layer.suspend(() =>
    Layer.provide(Config.defaultLayer),
    Layer.provide(Provider.defaultLayer),
    Layer.provide(Plugin.defaultLayer),
+    Layer.provide(RequestExecutor.defaultLayer),
  ),
 )

--- a/packages/opencode/src/session/prompt.ts
+++ b/packages/opencode/src/session/prompt.ts
@@ -1,6 +1,5 @@
 import path from "path"
 import os from "os"
-import z from "zod"
 import * as EffectZod from "@/util/effect-zod"
 import { SessionID, MessageID, PartID } from "./schema"
 import { MessageV2 } from "./message-v2"
@@ -369,6 +368,14 @@ NOTE: At any point in time through this workflow you should feel free to ask the
    }) {
      using _ = log.time("resolveTools")
      const tools: Record<string, AITool> = {}
+      // Opencode-native `Tool.Def[]` collected alongside the AI SDK record so
+      // the LLM-native path can advertise the same tools to the model. We
+      // populate this from the registry loop only; if any other tool source
+      // contributes (MCP, structured-output), we surface `nativeTools:
+      // undefined` so callers fall through to the AI SDK path. Keeps the
+      // definitions and dispatch tables strictly in sync.
+      const nativeTools: Tool.Def[] = []
+      let nativeFeasible = true
      const run = yield* runner()
      const promptOps = yield* ops()

@@ -410,6 +417,7 @@ NOTE: At any point in time through this workflow you should feel free to ask the
        providerID: input.model.providerID,
        agent: input.agent,
      })) {
+        nativeTools.push(item)
        const schema = ProviderTransform.schema(input.model, EffectZod.toJsonSchema(item.parameters))
        tools[item.id] = tool({
          description: item.description,
@@ -451,6 +459,11 @@ NOTE: At any point in time through this workflow you should feel free to ask the
      for (const [key, item] of Object.entries(yield* mcp.tools())) {
        const execute = item.execute
        if (!execute) continue
+        // MCP tools have AI SDK shape only — no opencode `Tool.Def` to feed
+        // the LLM-native path's dispatcher. Disqualify the whole batch so
+        // sessions with MCP servers stay on the AI SDK path until MCP
+        // tooling lands native support.
+        nativeFeasible = false

        const schema = yield* Effect.promise(() => Promise.resolve(asSchema(item.inputSchema).jsonSchema))
        const transformed = ProviderTransform.schema(input.model, schema)
@@ -526,7 +539,7 @@ NOTE: At any point in time through this workflow you should feel free to ask the
        tools[key] = item
      }

-      return tools
+      return { tools, nativeTools: nativeFeasible ? nativeTools : undefined }
    })

    const handleSubtask = Effect.fn("SessionPrompt.handleSubtask")(function* (input: {
@@ -1277,7 +1290,7 @@ NOTE: At any point in time through this workflow you should feel free to ask the
      function* (sessionID: SessionID) {
        const ctx = yield* InstanceState.context
        const slog = elog.with({ sessionID })
-        let structured: unknown | undefined
+        let structured: unknown
        let step = 0
        const session = yield* sessions.get(sessionID)

@@ -1399,7 +1412,7 @@ NOTE: At any point in time through this workflow you should feel free to ask the
            const lastUserMsg = msgs.findLast((m) => m.info.role === "user")
            const bypassAgentCheck = lastUserMsg?.parts.some((p) => p.type === "agent") ?? false

-            const tools = yield* resolveTools({
+            const { tools, nativeTools: resolvedNativeTools } = yield* resolveTools({
              agent,
              session,
              model,
@@ -1409,6 +1422,13 @@ NOTE: At any point in time through this workflow you should feel free to ask the
              messages: msgs,
            })

+            // Mutable so the structured-output branch can drop it without
+            // reaching into `resolveTools`. `nativeTools` is undefined when
+            // any tool source can't feed the LLM-native dispatcher (today:
+            // MCP). The structured-output branch joins that list because the
+            // synthesized `StructuredOutput` tool has no opencode `Tool.Def`.
+            let nativeTools = resolvedNativeTools
+
            if (lastUser.format?.type === "json_schema") {
              tools["StructuredOutput"] = createStructuredOutputTool({
                schema: lastUser.format.schema,
@@ -1416,6 +1436,7 @@ NOTE: At any point in time through this workflow you should feel free to ask the
                  structured = output
                },
              })
+              nativeTools = undefined
            }

            if (step === 1)
@@ -1458,7 +1479,12 @@ NOTE: At any point in time through this workflow you should feel free to ask the
              parentSessionID: session.parentID,
              system,
              messages: [...modelMsgs, ...(isLastStep ? [{ role: "assistant" as const, content: MAX_STEPS }] : [])],
+              // The native bridge consumes MessageV2 history. The AI SDK path
+              // appends a synthetic MAX_STEPS assistant ModelMessage below;
+              // until native supports that extra shape, fall back for parity.
+              nativeMessages: isLastStep ? undefined : msgs,
              tools,
+              nativeTools,
              model,
              toolChoice: format.type === "json_schema" ? "required" : undefined,
            })
--- a/packages/opencode/test/provider/llm-bridge.test.ts
+++ b/packages/opencode/test/provider/llm-bridge.test.ts
@@ -0,0 +1,217 @@
+import { describe, expect, test } from "bun:test"
+import { ProviderLLMBridge } from "../../src/provider/llm-bridge"
+import { ModelID, ProviderID } from "../../src/provider/schema"
+import { ProviderTest } from "../fake/provider"
+import type { Provider } from "../../src/provider/provider"
+
+const model = (input: {
+  readonly id: string
+  readonly providerID: string
+  readonly npm: string
+  readonly apiID?: string
+  readonly apiURL?: string
+  readonly headers?: Record<string, string>
+  readonly options?: Record<string, unknown>
+  readonly reasoning?: boolean
+  readonly toolcall?: boolean
+  readonly variants?: Provider.Model["variants"]
+}): Provider.Model => {
+  const base = ProviderTest.model()
+  return ProviderTest.model({
+    id: ModelID.make(input.id),
+    providerID: ProviderID.make(input.providerID),
+    api: { id: input.apiID ?? input.id, url: input.apiURL ?? "", npm: input.npm },
+    capabilities: {
+      ...base.capabilities,
+      reasoning: input.reasoning ?? false,
+      toolcall: input.toolcall ?? true,
+    },
+    limit: { context: 128_000, output: 32_000 },
+    options: input.options ?? {},
+    headers: input.headers ?? {},
+    variants: input.variants ?? {},
+  })
+}
+
+const provider = (input: Partial<Provider.Info> & Pick<Provider.Info, "id">) =>
+  ProviderTest.info({ ...input, models: input.models ?? {} })
+
+describe("ProviderLLMBridge", () => {
+  test("maps OpenAI-style providers to Responses", () => {
+    const ref = ProviderLLMBridge.toModelRef({
+      provider: provider({ id: ProviderID.openai, key: "openai-key" }),
+      model: model({ id: "gpt-5", providerID: "openai", npm: "@ai-sdk/openai", reasoning: true, variants: { high: {} } }),
+    })
+
+    expect(ref).toMatchObject({
+      id: "gpt-5",
+      provider: "openai",
+      protocol: "openai-responses",
+      apiKey: "openai-key",
+      limits: { context: 128_000, output: 32_000 },
+    })
+    expect(ref?.capabilities.reasoning.efforts).toEqual(["high"])
+  })
+
+  test("maps Anthropic headers and cache capability", () => {
+    const ref = ProviderLLMBridge.toModelRef({
+      provider: provider({
+        id: ProviderID.anthropic,
+        key: "anthropic-key",
+        options: { headers: { "anthropic-beta": "fine-grained-tool-streaming-2025-05-14" } },
+      }),
+      model: model({ id: "claude-sonnet-4-5", providerID: "anthropic", npm: "@ai-sdk/anthropic" }),
+    })
+
+    expect(ref).toMatchObject({
+      protocol: "anthropic-messages",
+      apiKey: "anthropic-key",
+      headers: {
+        "anthropic-beta": "fine-grained-tool-streaming-2025-05-14",
+      },
+    })
+    expect(ref?.capabilities.cache).toMatchObject({ prompt: true, contentBlocks: true })
+  })
+
+  test("maps Gemini API keys", () => {
+    const ref = ProviderLLMBridge.toModelRef({
+      provider: provider({ id: ProviderID.make("google"), options: { apiKey: "google-key" } }),
+      model: model({ id: "gemini-2.5-flash", providerID: "google", npm: "@ai-sdk/google" }),
+    })
+
+    expect(ref).toMatchObject({
+      protocol: "gemini",
+      apiKey: "google-key",
+    })
+    expect(ref?.capabilities.tools.streamingInput).toBe(false)
+  })
+
+  test("maps known OpenAI-compatible provider families", () => {
+    const ref = ProviderLLMBridge.toModelRef({
+      provider: provider({ id: ProviderID.make("togetherai"), options: { apiKey: "together-key" } }),
+      model: model({
+        id: "llama",
+        apiID: "meta-llama/Llama-3.3-70B-Instruct-Turbo",
+        providerID: "togetherai",
+        npm: "@ai-sdk/togetherai",
+      }),
+    })
+
+    expect(ref).toMatchObject({
+      id: "meta-llama/Llama-3.3-70B-Instruct-Turbo",
+      provider: "togetherai",
+      protocol: "openai-compatible-chat",
+      baseURL: "https://api.together.xyz/v1",
+      apiKey: "together-key",
+    })
+  })
+
+  test("maps GitHub Copilot through its provider resolver", () => {
+    const ref = ProviderLLMBridge.toModelRef({
+      provider: provider({ id: ProviderID.make("github-copilot"), key: "copilot-key" }),
+      model: model({ id: "gpt-5", providerID: "github-copilot", npm: "@ai-sdk/github-copilot" }),
+    })
+
+    expect(ref).toMatchObject({
+      provider: "github-copilot",
+      protocol: "openai-responses",
+      apiKey: "copilot-key",
+    })
+  })
+
+  test("maps Azure to Responses with resource URL and api-version query", () => {
+    const ref = ProviderLLMBridge.toModelRef({
+      provider: provider({
+        id: ProviderID.make("azure"),
+        key: "azure-key",
+        options: { resourceName: "opencode-test", apiVersion: "2025-04-01-preview" },
+      }),
+      model: model({ id: "gpt-5", providerID: "azure", npm: "@ai-sdk/azure" }),
+    })
+
+    expect(ref).toMatchObject({
+      provider: "azure",
+      protocol: "openai-responses",
+      baseURL: "https://opencode-test.openai.azure.com/openai/v1",
+      apiKey: "azure-key",
+      queryParams: { "api-version": "2025-04-01-preview" },
+    })
+  })
+
+  test("maps Azure completion URL opt-in to Chat Completions", () => {
+    const ref = ProviderLLMBridge.toModelRef({
+      provider: provider({ id: ProviderID.make("azure"), key: "azure-key", options: { resourceName: "opencode-test" } }),
+      model: model({ id: "gpt-4.1", providerID: "azure", npm: "@ai-sdk/azure", options: { useCompletionUrls: true } }),
+    })
+
+    expect(ref).toMatchObject({
+      provider: "azure",
+      protocol: "openai-chat",
+      baseURL: "https://opencode-test.openai.azure.com/openai/v1",
+      queryParams: { "api-version": "v1" },
+    })
+  })
+
+  test("keeps provider and model overrides ahead of defaults", () => {
+    const ref = ProviderLLMBridge.toModelRef({
+      provider: provider({
+        id: ProviderID.make("cerebras"),
+        key: "cerebras-key",
+        options: {
+          baseURL: "https://custom.cerebras.test/v1",
+          headers: { "X-Cerebras-3rd-Party-Integration": "opencode" },
+        },
+      }),
+      model: model({
+        id: "cerebras-model",
+        providerID: "cerebras",
+        npm: "@ai-sdk/cerebras",
+        headers: { "x-model-header": "1" },
+      }),
+    })
+
+    expect(ref).toMatchObject({
+      protocol: "openai-compatible-chat",
+      baseURL: "https://custom.cerebras.test/v1",
+      apiKey: "cerebras-key",
+      headers: {
+        "X-Cerebras-3rd-Party-Integration": "opencode",
+        "x-model-header": "1",
+      },
+    })
+  })
+
+  test("maps Amazon Bedrock to Converse with bearer auth and content-block cache", () => {
+    const ref = ProviderLLMBridge.toModelRef({
+      provider: provider({ id: ProviderID.make("amazon-bedrock"), key: "bedrock-bearer-key" }),
+      model: model({
+        id: "anthropic.claude-3-5-sonnet-20240620-v1:0",
+        providerID: "amazon-bedrock",
+        npm: "@ai-sdk/amazon-bedrock",
+      }),
+    })
+
+    expect(ref).toMatchObject({
+      protocol: "bedrock-converse",
+      apiKey: "bedrock-bearer-key",
+    })
+    // Bedrock Converse supports both prompt-level and positional content-block
+    // cache markers (cachePoint blocks landed in 9d7d518ac).
+    expect(ref?.capabilities.cache).toMatchObject({ prompt: true, contentBlocks: true })
+  })
+
+  test("leaves undecided provider packages unmapped", () => {
+    const unsupported = [
+      ["mistral", "mistral-large", "@ai-sdk/mistral"],
+    ] as const
+
+    expect(
+      unsupported.map(([providerID, modelID, npm]) =>
+        ProviderLLMBridge.toModelRef({
+          provider: provider({ id: ProviderID.make(providerID), key: `${providerID}-key` }),
+          model: model({ id: modelID, providerID, npm }),
+        }),
+      ),
+    ).toEqual([undefined, undefined])
+  })
+})
--- a/packages/opencode/test/session/llm-native-events.test.ts
+++ b/packages/opencode/test/session/llm-native-events.test.ts
@@ -0,0 +1,84 @@
+import { describe, expect, test } from "bun:test"
+import { LLM, type LLMEvent } from "@opencode-ai/llm"
+import { LLMNativeEvents } from "../../src/session/llm-native-events"
+
+const types = (events: ReadonlyArray<{ readonly type: string }>) => events.map((event) => event.type)
+
+describe("LLMNativeEvents", () => {
+  test("synthesizes text and reasoning boundaries around native deltas", () => {
+    const events = LLMNativeEvents.toSessionEvents([
+      { type: "request-start", id: "req_1", model: LLM.model({ id: "gpt-5", provider: "openai", protocol: "openai-responses" }) },
+      { type: "step-start", index: 0 },
+      { type: "text-delta", text: "Hello" },
+      { type: "text-delta", text: "!" },
+      { type: "reasoning-delta", text: "Thinking" },
+      { type: "request-finish", reason: "stop", usage: { inputTokens: 5, outputTokens: 2, totalTokens: 7 } },
+    ] satisfies ReadonlyArray<LLMEvent>)
+
+    expect(types(events)).toEqual([
+      "start",
+      "start-step",
+      "text-start",
+      "text-delta",
+      "text-delta",
+      "reasoning-start",
+      "reasoning-delta",
+      "text-end",
+      "reasoning-end",
+      "finish-step",
+      "finish",
+    ])
+    expect(events.filter((event) => event.type === "text-delta").map((event) => event.text)).toEqual(["Hello", "!"])
+    expect(events.find((event) => event.type === "finish-step")).toMatchObject({
+      finishReason: "stop",
+      usage: { inputTokens: 5, outputTokens: 2, totalTokens: 7 },
+    })
+  })
+
+  test("creates pending tool state before native tool-call events", () => {
+    const events = LLMNativeEvents.toSessionEvents([
+      { type: "tool-input-delta", id: "call_1", name: "lookup", text: '{"query"' },
+      { type: "tool-input-delta", id: "call_1", name: "lookup", text: ':"weather"}' },
+      { type: "tool-call", id: "call_1", name: "lookup", input: { query: "weather" } },
+    ] satisfies ReadonlyArray<LLMEvent>)
+
+    expect(types(events)).toEqual([
+      "tool-input-start",
+      "tool-input-delta",
+      "tool-input-delta",
+      "tool-input-end",
+      "tool-call",
+    ])
+    expect(events.find((event) => event.type === "tool-call")).toMatchObject({
+      toolCallId: "call_1",
+      toolName: "lookup",
+      input: { query: "weather" },
+    })
+  })
+
+  test("maps native tool results and errors into processor events", () => {
+    const events = LLMNativeEvents.toSessionEvents([
+      { type: "tool-call", id: "call_1", name: "lookup", input: { query: "weather" } },
+      { type: "tool-result", id: "call_1", name: "lookup", result: { type: "json", value: { forecast: "sunny" } } },
+      { type: "tool-error", id: "call_2", name: "lookup", message: "bad input" },
+      { type: "tool-result", id: "call_3", name: "lookup", result: { type: "error", value: "provider failed" } },
+    ] satisfies ReadonlyArray<LLMEvent>)
+
+    expect(events.find((event) => event.type === "tool-result")).toMatchObject({
+      toolCallId: "call_1",
+      output: { title: "", metadata: {}, output: '{"forecast":"sunny"}' },
+    })
+    expect(events.filter((event) => event.type === "tool-error")).toEqual([
+      { type: "tool-error", toolCallId: "call_2", toolName: "lookup", input: {}, error: "bad input" },
+      { type: "tool-error", toolCallId: "call_3", toolName: "lookup", input: {}, error: "provider failed" },
+    ])
+  })
+
+  test("maps provider errors into fatal processor errors", () => {
+    const events = LLMNativeEvents.toSessionEvents([{ type: "provider-error", message: "rate limited", retryable: true }])
+
+    expect(events).toHaveLength(1)
+    expect(events[0].type).toBe("error")
+    if (events[0].type === "error") expect(events[0].error).toEqual(new Error("rate limited"))
+  })
+})
--- a/packages/opencode/test/session/llm-native-stream.test.ts
+++ b/packages/opencode/test/session/llm-native-stream.test.ts
@@ -0,0 +1,341 @@
+import { describe, expect } from "bun:test"
+import {
+  AnthropicMessages,
+  BedrockConverse,
+  Gemini,
+  LLMClient,
+  OpenAIChat,
+  OpenAICompatibleChat,
+  OpenAIResponses,
+  ProviderPatch,
+  RequestExecutor,
+} from "@opencode-ai/llm"
+import { Effect, Layer, Ref, Schema, Stream } from "effect"
+import { HttpClient, HttpClientResponse } from "effect/unstable/http"
+import { tool, jsonSchema } from "ai"
+import { ModelID, ProviderID } from "../../src/provider/schema"
+import { MessageID, PartID, SessionID } from "../../src/session/schema"
+import { LLMNative } from "../../src/session/llm-native"
+import { LLMNativeEvents } from "../../src/session/llm-native-events"
+import { LLMNativeTools } from "../../src/session/llm-native-tools"
+import { ProviderTest } from "../fake/provider"
+import { testEffect } from "../lib/effect"
+import type { MessageV2 } from "../../src/session/message-v2"
+import type { Provider } from "../../src/provider/provider"
+import type { Tool } from "../../src/tool/tool"
+
+// Inline HTTP layer that returns a single fixed body. Mirrors the
+// `fixedResponse` helper in `packages/llm/test/lib/http.ts` — duplicated here
+// rather than imported across packages so this test stays self-contained.
+const fixedResponse = (body: BodyInit, init: ResponseInit = { headers: { "content-type": "text/event-stream" } }) =>
+  RequestExecutor.layer.pipe(
+    Layer.provide(
+      Layer.succeed(
+        HttpClient.HttpClient,
+        HttpClient.make((request) =>
+          Effect.succeed(HttpClientResponse.fromWeb(request, new Response(body, init))),
+        ),
+      ),
+    ),
+  )
+
+// Scripted multi-response HTTP layer. Each request consumes the next body in
+// order; the final body repeats if more requests arrive. Mirrors the
+// `scriptedResponses` helper in `packages/llm/test/lib/http.ts`.
+const scriptedResponses = (bodies: ReadonlyArray<BodyInit>, init: ResponseInit = { headers: { "content-type": "text/event-stream" } }) =>
+  RequestExecutor.layer.pipe(
+    Layer.provide(
+      Layer.unwrap(
+        Effect.gen(function* () {
+          const cursor = yield* Ref.make(0)
+          return Layer.succeed(
+            HttpClient.HttpClient,
+            HttpClient.make((request) =>
+              Effect.gen(function* () {
+                const index = yield* Ref.getAndUpdate(cursor, (n) => n + 1)
+                const body = bodies[index] ?? bodies[bodies.length - 1]
+                return HttpClientResponse.fromWeb(request, new Response(body, init))
+              }),
+            ),
+          )
+        }),
+      ),
+    ),
+  )
+
+// Encode an Anthropic SSE body. Each event becomes a `data:` line; the codec
+// also expects `event:` lines but the package's SSE framing only reads the
+// data field.
+const sseBody = (events: ReadonlyArray<unknown>) =>
+  events.map((event) => `data: ${JSON.stringify(event)}\n\n`).join("") + "data: [DONE]\n\n"
+
+const sessionID = SessionID.descending()
+
+const anthropicModel = (override: Partial<Provider.Model> = {}): Provider.Model =>
+  ProviderTest.model({
+    id: ModelID.make("claude-sonnet-4-5"),
+    providerID: ProviderID.make("anthropic"),
+    api: { id: "claude-sonnet-4-5", url: "https://api.anthropic.com/v1", npm: "@ai-sdk/anthropic" },
+    ...override,
+  })
+
+const userPart = (messageID: MessageID, text: string): MessageV2.TextPart => ({
+  id: PartID.ascending(),
+  sessionID,
+  messageID,
+  type: "text",
+  text,
+})
+
+const userMessage = (mdl: Provider.Model, id: MessageID, parts: MessageV2.Part[]): MessageV2.WithParts => ({
+  info: {
+    id,
+    sessionID,
+    role: "user",
+    time: { created: 1 },
+    agent: "build",
+    model: { providerID: mdl.providerID, modelID: mdl.id },
+  },
+  parts,
+})
+
+// What `runNative` builds. Kept in sync with `session/llm.ts`'s
+// NATIVE_ADAPTERS list — if a protocol is added there, add it here.
+const adapters = [
+  AnthropicMessages.adapter,
+  OpenAIChat.adapter,
+  OpenAIResponses.adapter,
+  Gemini.adapter,
+  OpenAICompatibleChat.adapter,
+  BedrockConverse.adapter,
+]
+
+const it = testEffect(Layer.empty)
+
+describe("LLMNative stream wire-up (audit gap #4 phase 1)", () => {
+  it.effect("converts an Anthropic SSE response into session events via the LLMNative path", () =>
+    Effect.gen(function* () {
+      const mdl = anthropicModel()
+      const provider = ProviderTest.info({ id: ProviderID.make("anthropic"), key: "anthropic-key" }, mdl)
+      const userID = MessageID.ascending()
+
+      const llmRequest = yield* LLMNative.request({
+        id: "smoke-test",
+        provider,
+        model: mdl,
+        system: ["You are concise."],
+        messages: [userMessage(mdl, userID, [userPart(userID, "Say hello.")])],
+      })
+
+      const client = LLMClient.make({ adapters, patches: ProviderPatch.defaults })
+      const map = LLMNativeEvents.mapper()
+
+      const body = sseBody([
+        { type: "message_start", message: { usage: { input_tokens: 5 } } },
+        { type: "content_block_start", index: 0, content_block: { type: "text", text: "" } },
+        { type: "content_block_delta", index: 0, delta: { type: "text_delta", text: "Hello" } },
+        { type: "content_block_delta", index: 0, delta: { type: "text_delta", text: "!" } },
+        { type: "content_block_stop", index: 0 },
+        { type: "message_delta", delta: { stop_reason: "end_turn" }, usage: { output_tokens: 2 } },
+        { type: "message_stop" },
+      ])
+
+      const events = yield* client.stream(llmRequest).pipe(
+        Stream.flatMap((event) => Stream.fromIterable(map.map(event))),
+        Stream.concat(Stream.unwrap(Effect.sync(() => Stream.fromIterable(map.flush())))),
+        Stream.runCollect,
+        Effect.provide(fixedResponse(body)),
+      )
+
+      const collected = Array.from(events)
+
+      // The mapper synthesizes text-start on first text-delta, then closes
+      // open parts at finish. Assert key milestones rather than the full
+      // shape (the AI SDK event vocabulary has a lot of boilerplate fields
+      // populated by `LLMNativeEvents` that we don't want to over-constrain).
+      const textDelta = collected.find((event) => event.type === "text-delta")
+      expect(textDelta).toMatchObject({ type: "text-delta", text: "Hello" })
+
+      const textStart = collected.findIndex((event) => event.type === "text-start")
+      const firstDelta = collected.findIndex((event) => event.type === "text-delta")
+      expect(textStart).toBeGreaterThanOrEqual(0)
+      expect(textStart).toBeLessThan(firstDelta)
+
+      const finishStep = collected.find((event) => event.type === "finish-step")
+      expect(finishStep).toMatchObject({ finishReason: "stop" })
+
+      const finish = collected.find((event) => event.type === "finish")
+      expect(finish).toMatchObject({
+        finishReason: "stop",
+        totalUsage: { inputTokens: 5, outputTokens: 2, totalTokens: 7 },
+      })
+
+      // No tool events on a text-only happy path.
+      expect(collected.some((event) => event.type === "tool-call")).toBe(false)
+      expect(collected.some((event) => event.type === "error")).toBe(false)
+    }),
+  )
+
+  // Phase 2 step 2b: drives the streaming-dispatch loop end-to-end. The
+  // scripted Anthropic backend replies in two rounds — round 1 is a tool
+  // call, round 2 is text after the tool result feeds back. Asserts that
+  // `runWithTools` (a) forks the AI SDK execute when the `tool-call` event
+  // arrives, (b) injects a synthetic `tool-result` event into the same
+  // stream, (c) issues a continuation request with the tool result in
+  // history, and (d) the stream concludes with the second-round text.
+  it.effect("dispatches a tool call mid-stream and continues the conversation", () =>
+    Effect.gen(function* () {
+      const mdl = anthropicModel()
+      const lookupParameters = Schema.Struct({
+        query: Schema.String.annotate({ description: "Search query" }),
+      })
+      const lookupTool: Tool.Def<typeof lookupParameters> = {
+        id: "lookup",
+        description: "Lookup project data",
+        parameters: lookupParameters,
+        execute: () => Effect.succeed({ title: "Weather lookup", metadata: {}, output: '{"forecast":"sunny"}' }),
+      }
+
+      // AI SDK side: the same tool wrapped so `tool.execute(args, opts)`
+      // resolves with the same opencode `ExecuteResult` shape the live
+      // `prompt.ts:resolveTools` would produce. The dispatcher inside
+      // `runWithTools` calls this; the synthetic `tool-result` LLM event
+      // carries the result back into the stream.
+      const aiTool = tool({
+        description: "Lookup project data",
+        inputSchema: jsonSchema({
+          type: "object",
+          properties: { query: { type: "string", description: "Search query" } },
+          required: ["query"],
+        }),
+        execute: async () => ({
+          title: "Weather lookup",
+          metadata: {},
+          output: '{"forecast":"sunny"}',
+        }),
+      })
+
+      const userID = MessageID.ascending()
+      const llmRequest = yield* LLMNative.request({
+        id: "smoke-tool-loop",
+        provider: ProviderTest.info({ id: ProviderID.make("anthropic"), key: "anthropic-key" }, mdl),
+        model: mdl,
+        system: ["Be concise."],
+        messages: [userMessage(mdl, userID, [userPart(userID, "What is the weather?")])],
+        tools: [lookupTool],
+      })
+
+      // Round 1: model issues `lookup` tool call.
+      const round1 = sseBody([
+        { type: "message_start", message: { usage: { input_tokens: 5 } } },
+        { type: "content_block_start", index: 0, content_block: { type: "tool_use", id: "call_1", name: "lookup" } },
+        { type: "content_block_delta", index: 0, delta: { type: "input_json_delta", partial_json: '{"query"' } },
+        { type: "content_block_delta", index: 0, delta: { type: "input_json_delta", partial_json: ':"weather"}' } },
+        { type: "content_block_stop", index: 0 },
+        { type: "message_delta", delta: { stop_reason: "tool_use" }, usage: { output_tokens: 1 } },
+        { type: "message_stop" },
+      ])
+      // Round 2: model replies with text after seeing the tool result.
+      const round2 = sseBody([
+        { type: "message_start", message: { usage: { input_tokens: 12 } } },
+        { type: "content_block_start", index: 0, content_block: { type: "text", text: "" } },
+        { type: "content_block_delta", index: 0, delta: { type: "text_delta", text: "It is sunny." } },
+        { type: "content_block_stop", index: 0 },
+        { type: "message_delta", delta: { stop_reason: "end_turn" }, usage: { output_tokens: 4 } },
+        { type: "message_stop" },
+      ])
+
+      const client = LLMClient.make({ adapters, patches: ProviderPatch.defaults })
+      const map = LLMNativeEvents.mapper()
+
+      const events = yield* LLMNativeTools.runWithTools({
+        client,
+        request: llmRequest,
+        tools: { lookup: aiTool },
+        abort: new AbortController().signal,
+      }).pipe(
+        Stream.flatMap((event) => Stream.fromIterable(map.map(event))),
+        Stream.concat(Stream.unwrap(Effect.sync(() => Stream.fromIterable(map.flush())))),
+        Stream.runCollect,
+        Effect.provide(scriptedResponses([round1, round2])),
+      )
+
+      const collected = Array.from(events)
+
+      // Round 1: tool call streams, dispatcher fires, synthetic tool-result lands.
+      const toolCall = collected.find((event) => event.type === "tool-call")
+      expect(toolCall).toMatchObject({
+        type: "tool-call",
+        toolCallId: "call_1",
+        toolName: "lookup",
+        input: { query: "weather" },
+      })
+
+      const toolResult = collected.find((event) => event.type === "tool-result")
+      expect(toolResult).toMatchObject({
+        type: "tool-result",
+        toolCallId: "call_1",
+        toolName: "lookup",
+        output: { title: "Weather lookup", output: '{"forecast":"sunny"}' },
+      })
+
+      // Round 2: text-delta arrives after the tool result.
+      const round2Text = collected.find((event) => event.type === "text-delta")
+      expect(round2Text).toMatchObject({ type: "text-delta", text: "It is sunny." })
+
+      // Final finish should be `stop`, not `tool-calls` (tool loop terminated).
+      const finalFinish = [...collected].reverse().find((event) => event.type === "finish")
+      expect(finalFinish).toMatchObject({ finishReason: "stop" })
+
+      // No errors leaked through.
+      expect(collected.some((event) => event.type === "error")).toBe(false)
+    }),
+  )
+
+  // Phase 2 step 2a: verifies a tool-bearing `nativeTools` array reaches the
+  // wire as Anthropic `tools[]` blocks. The model in this fixture answers with
+  // plain text instead of issuing a tool call (we don't yet have dispatch).
+  // This proves tool definitions plumb through `LLMNative.request` →
+  // `LLMRequest` → adapter `prepare` → wire body.
+  it.effect("forwards nativeTools to the wire as Anthropic tools when the gate is open", () =>
+    Effect.gen(function* () {
+      const mdl = anthropicModel()
+      const provider = ProviderTest.info({ id: ProviderID.make("anthropic"), key: "anthropic-key" }, mdl)
+      const userID = MessageID.ascending()
+
+      const lookupParameters = Schema.Struct({
+        query: Schema.String.annotate({ description: "Search query" }),
+      })
+      const lookupTool: Tool.Def<typeof lookupParameters> = {
+        id: "lookup",
+        description: "Lookup project data",
+        parameters: lookupParameters,
+        execute: () => Effect.succeed({ title: "", metadata: {}, output: "" }),
+      }
+
+      const llmRequest = yield* LLMNative.request({
+        id: "smoke-tools",
+        provider,
+        model: mdl,
+        system: ["You are concise."],
+        messages: [userMessage(mdl, userID, [userPart(userID, "Look something up.")])],
+        tools: [lookupTool],
+      })
+
+      const prepared = yield* LLMClient.make({ adapters, patches: ProviderPatch.defaults }).prepare(llmRequest)
+      expect(prepared.target).toMatchObject({
+        tools: [
+          {
+            name: "lookup",
+            description: "Lookup project data",
+            input_schema: {
+              type: "object",
+              properties: { query: { type: "string", description: "Search query" } },
+              required: ["query"],
+            },
+          },
+        ],
+      })
+    }),
+  )
+})
--- a/packages/opencode/test/session/llm-native.test.ts
+++ b/packages/opencode/test/session/llm-native.test.ts
--- a/packages/opencode/test/session/llm.test.ts
+++ b/packages/opencode/test/session/llm.test.ts
@@ -14,8 +14,9 @@ import { Filesystem } from "@/util/filesystem"
 import { tmpdir } from "../fixture/fixture"
 import type { Agent } from "../../src/agent/agent"
 import { MessageV2 } from "../../src/session/message-v2"
-import { SessionID, MessageID } from "../../src/session/schema"
+import { SessionID, MessageID, PartID } from "../../src/session/schema"
 import { AppRuntime } from "../../src/effect/app-runtime"
+import { Flag } from "@opencode-ai/core/flag/flag"

 async function getModel(providerID: ProviderID, modelID: ModelID) {
  return AppRuntime.runPromise(
@@ -909,6 +910,140 @@ describe("session.llm.stream", () => {
    })
  })

+  test("falls back to AI SDK when native message conversion is unsupported", async () => {
+    const server = state.server
+    if (!server) {
+      throw new Error("Server not initialized")
+    }
+
+    const source = await loadFixture("anthropic", "claude-opus-4-6")
+    const model = source.model
+    const chunks = [
+      {
+        type: "message_start",
+        message: {
+          id: "msg-native-fallback",
+          model: model.id,
+          usage: {
+            input_tokens: 3,
+            cache_creation_input_tokens: null,
+            cache_read_input_tokens: null,
+          },
+        },
+      },
+      { type: "content_block_start", index: 0, content_block: { type: "text", text: "" } },
+      { type: "content_block_delta", index: 0, delta: { type: "text_delta", text: "Hello" } },
+      { type: "content_block_stop", index: 0 },
+      {
+        type: "message_delta",
+        delta: { stop_reason: "end_turn", stop_sequence: null, container: null },
+        usage: {
+          input_tokens: 3,
+          output_tokens: 2,
+          cache_creation_input_tokens: null,
+          cache_read_input_tokens: null,
+        },
+      },
+      { type: "message_stop" },
+    ]
+    const request = waitRequest("/messages", createEventResponse(chunks))
+    const originalNative = Flag.OPENCODE_EXPERIMENTAL_LLM_NATIVE
+    Flag.OPENCODE_EXPERIMENTAL_LLM_NATIVE = true
+
+    try {
+      await using tmp = await tmpdir({
+        init: async (dir) => {
+          await Bun.write(
+            path.join(dir, "opencode.json"),
+            JSON.stringify({
+              $schema: "https://opencode.ai/config.json",
+              enabled_providers: ["anthropic"],
+              provider: {
+                anthropic: {
+                  name: "Anthropic",
+                  env: ["ANTHROPIC_API_KEY"],
+                  npm: "@ai-sdk/anthropic",
+                  api: "https://api.anthropic.com/v1",
+                  models: {
+                    [model.id]: model,
+                  },
+                  options: {
+                    apiKey: "test-anthropic-key",
+                    baseURL: `${server.url.origin}/v1`,
+                  },
+                },
+              },
+            }),
+          )
+        },
+      })
+
+      await Instance.provide({
+        directory: tmp.path,
+        fn: async () => {
+          const resolved = await getModel(ProviderID.make("anthropic"), ModelID.make(model.id))
+          const sessionID = SessionID.make("session-test-native-fallback")
+          const agent = {
+            name: "test",
+            mode: "primary",
+            options: {},
+            permission: [{ permission: "*", pattern: "*", action: "allow" }],
+          } satisfies Agent.Info
+          const user = {
+            id: MessageID.make("user-native-fallback"),
+            sessionID,
+            role: "user",
+            time: { created: Date.now() },
+            agent: agent.name,
+            model: { providerID: ProviderID.make("anthropic"), modelID: resolved.id },
+          } satisfies MessageV2.User
+          const nativeMessageID = MessageID.ascending()
+
+          await drain({
+            user,
+            sessionID,
+            model: resolved,
+            agent,
+            system: ["You are a helpful assistant."],
+            messages: [{ role: "user", content: "Hello" }],
+            nativeMessages: [
+              {
+                info: {
+                  id: nativeMessageID,
+                  sessionID,
+                  role: "user",
+                  time: { created: 1 },
+                  agent: agent.name,
+                  model: { providerID: ProviderID.make("anthropic"), modelID: resolved.id },
+                },
+                parts: [
+                  {
+                    id: PartID.ascending(),
+                    sessionID,
+                    messageID: nativeMessageID,
+                    type: "step-start",
+                  },
+                ],
+              },
+            ],
+            tools: {},
+          })
+
+          const capture = await request
+          expect(capture.url.pathname.endsWith("/messages")).toBe(true)
+          expect(capture.body.messages).toEqual([
+            {
+              role: "user",
+              content: [{ type: "text", text: "Hello", cache_control: { type: "ephemeral" } }],
+            },
+          ])
+        },
+      })
+    } finally {
+      Flag.OPENCODE_EXPERIMENTAL_LLM_NATIVE = originalNative
+    }
+  })
+
  test("sends anthropic tool_use blocks with tool_result immediately after them", async () => {
    const server = state.server
    if (!server) {