Compare commits

..

183 Commits

Author SHA1 Message Date
Kit Langton
6e23e56a49 simplify(llm): trim native runtime overhead 2026-05-03 10:13:09 -04:00
Kit Langton
b7930c75f4 simplify(llm): share tagged schema helper 2026-05-03 09:30:58 -04:00
Kit Langton
195e0bb0c3 simplify(llm): define events with schema tags 2026-05-03 09:14:37 -04:00
Kit Langton
de79fd7306 fix(test): use instance helper in native fallback test 2026-05-03 08:50:17 -04:00
Kit Langton
1caeaf70bb Merge remote-tracking branch 'origin/dev' into HEAD 2026-05-03 08:48:44 -04:00
Shoubhit Dash
d1f597b5b5 fix(vcs): avoid unbounded diff memory usage (#25581) 2026-05-03 17:49:46 +05:30
Dax Raad
8299fb3e2b ignore: remove triage-unassigned.ts script
This script was used to batch-triage open GitHub issues without assignees.
Removing as the triage workflow has evolved and this batch approach is no longer needed.
2026-05-03 01:59:03 -04:00
Dax Raad
4f7f90133d ci: stop sending daily community recap notifications 2026-05-03 01:54:32 -04:00
Dax Raad
b205e104f6 ci: remove vouch-based contributor filtering workflows
Removes the automated vouch system that filtered issues and PRs from non-vouched users. This simplifies the contribution process by removing the requirement for maintainers to manually vouch contributors before they can participate.
2026-05-03 01:54:32 -04:00
Dax Raad
252e2f98e6 ci: remove automatic labels from GitHub issue templates to allow manual triage 2026-05-03 01:54:32 -04:00
opencode-agent[bot]
e2afdc1202 chore: generate 2026-05-03 05:22:22 +00:00
Dax Raad
a08e4c9651 core: simplify triage workflow to focus on issue ownership
Switch triage agent to gpt-5.4-nano for faster issue assignment. Remove label
management from the triage tool so it only assigns owners based on team
ownership rules. This reduces noise in the issue tracker and ensures issues
get to the right team member immediately without unnecessary labels.

Update team structures to reflect current ownership and add script for
processing unassigned issues.
2026-05-03 01:21:17 -04:00
Dax Raad
7ccab8d272 core: update triage agent to use qwen3.6-plus model for improved response quality 2026-05-03 01:10:14 -04:00
Dax Raad
fc57eb3b8e ci 2026-05-03 01:05:36 -04:00
Dax
9179bafd54 Add debug info command (#25550) 2026-05-03 05:04:52 +00:00
Kit Langton
2df8eda8a3 fix(cli): bridge Instance.current ALS in effectCmd handlers (regression from #25522) (#25546) 2026-05-03 04:24:33 +00:00
Kit Langton
9065d79a9a fix(llm): preserve native protocol state 2026-05-03 00:20:50 -04:00
Kit Langton
bd32252a7e refactor(cli/providers): Stage 4 — drop inline AppRuntime.runPromise calls (#25532) 2026-05-02 23:42:40 -04:00
Kit Langton
1717d636a2 refactor(cli/mcp+agent): Stage 4 — drop AppRuntime.runPromise bridges (#25530) 2026-05-02 23:40:59 -04:00
Aiden Cline
8e016b4703 fix: regression w/ auth login where stderr was ignored instead of inherited (#25529) 2026-05-02 22:36:02 -05:00
opencode-agent[bot]
b89d48a2a4 chore: update nix node_modules hashes 2026-05-03 03:25:46 +00:00
Dax
33312bfd1b fix(session): encode v2 session responses (#25528) 2026-05-03 03:24:46 +00:00
opencode-agent[bot]
3f1ce36418 chore: generate 2026-05-03 03:23:47 +00:00
Kit Langton
0e13279545 refactor(cli): convert agent / providers / mcp to effectCmd (#25525) 2026-05-02 23:22:44 -04:00
Kit Langton
5f03d892c0 fix(httpapi): pagination Link header echoes request host (#25527) 2026-05-02 23:19:33 -04:00
Kit Langton
bdabb102fe refactor(cli/stats): Stage 4 — fully Effect-native body (#25523) 2026-05-02 23:08:26 -04:00
Kit Langton
a79a6594b0 chore: bump Effect beta (#25524) 2026-05-02 23:08:13 -04:00
opencode-agent[bot]
a3d282a4c2 chore: generate 2026-05-03 03:04:40 +00:00
Kit Langton
db24f89313 refactor(cli): convert mcp list, auth, auth list, logout to effectCmd (#25521) 2026-05-03 03:03:32 +00:00
opencode-agent[bot]
31cb0bfa4f chore: generate 2026-05-03 02:54:20 +00:00
Kit Langton
af9fdf0a1c refactor(cli): convert github subcommands to effectCmd (#25522) 2026-05-02 22:53:20 -04:00
Youssef Achy
be88cd5cb9 chore(opencode): exclude .map files from CLI binary build (#25500) 2026-05-02 22:52:32 -04:00
Luke Parker
b4cc7d13b6 fix(desktop): limit zoom handler to zoom keys (#25516) 2026-05-03 02:44:52 +00:00
Aiden Cline
0ba013f8de chore: rm log statement (#25470) 2026-05-02 21:43:48 -05:00
Kit Langton
0956b15c52 refactor(acp): drop async from synchronous ACP.init (#25520) 2026-05-02 22:38:44 -04:00
opencode-agent[bot]
61150f6391 chore: generate 2026-05-03 02:36:41 +00:00
Kit Langton
7409dcc6bd refactor(cli): convert run command to effectCmd (#25519) 2026-05-02 22:35:20 -04:00
Kit Langton
2829943ad1 refactor(cli): convert debug wait, agent list, acp to effectCmd (#25518) 2026-05-02 22:31:20 -04:00
Kit Langton
c4311dda31 feat(cli): allow effectCmd instance to be a function of args (#25517) 2026-05-03 02:27:41 +00:00
Kit Langton
ad05a46d74 refactor(lifecycle): bootstrap as pure orchestration (#25510) 2026-05-02 22:26:54 -04:00
opencode-agent[bot]
a6cadba814 chore: generate 2026-05-03 02:10:52 +00:00
Dax
a3bc5d35b0 Refactor v2 session events as schemas (#24512) 2026-05-02 22:09:48 -04:00
Kit Langton
1409a0715c refactor(cli): convert web + account to effectCmd (instance: false) (#25512) 2026-05-02 21:59:35 -04:00
Kit Langton
e98c291866 feat(cli): add instance: false opt-out to effectCmd (#25507) 2026-05-03 01:44:06 +00:00
Kit Langton
e709dc34fb feat: default HTTP API backend to on for dev/beta channels 2026-05-02 20:43:23 -04:00
opencode-agent[bot]
9293cddb3a chore: generate 2026-05-03 00:43:16 +00:00
Kit Langton
68b3448b09 refactor(cli): drop redundant explicit Effect.ensuring(store.dispose) (#25503) 2026-05-02 20:42:09 -04:00
opencode-agent[bot]
80f2b13a55 chore: generate 2026-05-03 00:40:21 +00:00
Kit Langton
7d91d3b1ed Normalize instance lifecycle wiring (#25501) 2026-05-02 20:39:20 -04:00
opencode-agent[bot]
a6464062b7 chore: generate 2026-05-03 00:32:24 +00:00
Kit Langton
fd01dc9c89 test(httpapi): add route exerciser 2026-05-02 20:31:21 -04:00
opencode-agent[bot]
d10fb88b66 chore: generate 2026-05-03 00:10:53 +00:00
Luke Parker
6b68b1020e docs: clarify LSP and formatter opt-in config (#25502) 2026-05-03 00:09:50 +00:00
Kit Langton
85bb9007ba feat(cli): auto-dispose InstanceContext after effectCmd handlers (#25481) 2026-05-02 19:54:13 -04:00
opencode-agent[bot]
9bef88e3b0 chore: generate 2026-05-02 23:34:40 +00:00
Kit Langton
f98053c34e fix(instance): run bootstrap from instance store (#25475) 2026-05-02 19:33:38 -04:00
opencode-agent[bot]
36007aecf4 chore: generate 2026-05-02 23:23:53 +00:00
Kit Langton
4de44bbbef refactor(cli): convert debug subcommands to effectCmd (#25479) 2026-05-02 19:22:51 -04:00
opencode-agent[bot]
9d03d4419e chore: generate 2026-05-02 23:20:15 +00:00
Kit Langton
7ab1c1c74a refactor(cli): convert debug agent command to effectCmd (#25485) 2026-05-02 19:19:06 -04:00
Luke Parker
3f459819ba feat: refactor bash tool with shell-aware prompts for bash, pwsh+powershell, and cmd (#20039) 2026-05-03 09:18:48 +10:00
Kit Langton
1986a6e817 refactor(cli): convert session subcommands to effectCmd (#25483) 2026-05-02 18:15:28 -04:00
opencode-agent[bot]
dfe1325fca chore: generate 2026-05-02 22:02:14 +00:00
Kit Langton
c1686c6ddc refactor(cli): convert stats command to effectCmd (#25474) 2026-05-02 18:01:06 -04:00
Kit Langton
79b6ce5db4 refactor(cli): convert import command to effectCmd (#25467) 2026-05-02 21:56:32 +00:00
Kit Langton
0c816eb4b1 refactor(cli): convert plugin command to effectCmd (#25473) 2026-05-02 17:55:13 -04:00
Kit Langton
e318e173d8 refactor(cli): convert export command to effectCmd (#25471) 2026-05-02 17:45:41 -04:00
opencode-agent[bot]
b314781a1a chore: generate 2026-05-02 21:02:46 +00:00
Kit Langton
8396d6b016 refactor(cli): convert pr command to effectCmd (#25465) 2026-05-02 17:01:46 -04:00
opencode
43e20874f4 sync release versions for v1.14.33 2026-05-02 19:53:06 +00:00
opencode-agent[bot]
c444e971b0 chore: generate 2026-05-02 19:27:24 +00:00
HyeokjaeLee
430bde9e9b fix(instance): restore InstanceBootstrap init parameter for non-Effec… (#25449)
Co-authored-by: Dax Raad <d@ironbay.co>
2026-05-02 15:26:30 -04:00
Kit Langton
05b82a6a30 refactor(cli): drop ModelsDev Promise compat shim (#25460) 2026-05-02 15:11:01 -04:00
Kit Langton
6cd02c05c2 fix(telemetry): emit Tool.execute span for MCP and plugin tools (#25452) 2026-05-02 14:49:56 -04:00
opencode-agent[bot]
b3a7513765 chore: generate 2026-05-02 18:00:11 +00:00
Kit Langton
f8738c9002 feat(models): effectify ModelsDev as Service (#25434) 2026-05-02 13:59:08 -04:00
Aiden Cline
b460db15d7 tweak: allow read tool to accept offset of 0 (#25431) 2026-05-02 11:12:07 -05:00
opencode-agent[bot]
ff4779ca11 chore: generate 2026-05-02 16:09:04 +00:00
Kit Langton
146ff8ad85 feat(cli): add effectCmd wrapper + convert models command (#25429) 2026-05-02 12:08:04 -04:00
OpeOginni
0d0ec7dc46 docs: CLI docs for current commands and flags (#25399) 2026-05-02 11:07:22 -05:00
Jérôme Benoit
1ea6e6cd4b fix(nix): remove stale packages/shared filter (#24930) 2026-05-02 10:49:51 -05:00
opencode-agent[bot]
96061222d2 chore: generate 2026-05-02 15:45:21 +00:00
Kit Langton
3b9155714d Delete Instance.dispose and Instance.reload (#25427) 2026-05-02 11:44:16 -04:00
opencode
7371db5cc6 sync release versions for v1.14.32 2026-05-02 15:34:12 +00:00
Kit Langton
046e459d65 fix(llm): map Responses tool calls finish reason 2026-05-01 17:47:46 -04:00
Kit Langton
652ef9c09a fix(llm): use Azure api-key auth for OpenAI adapters 2026-05-01 17:11:44 -04:00
Kit Langton
e9d84c6db7 fix(llm): preserve native stream fallback parity 2026-05-01 08:54:05 -04:00
Kit Langton
116a5c2e74 docs(llm): document prepare<Target>, PreparedRequestOf, and LLMEvent.is.* in AGENTS.md 2026-05-01 08:12:38 -04:00
Kit Langton
8f338ef6dc simplify(llm): inline single-use llmEventIs const and drop redundant as const 2026-05-01 08:12:37 -04:00
Kit Langton
75f467bae3 feat(llm): expose PreparedRequestOf<Target> on LLMClient.prepare
LLMClient.prepare(request) returned a PreparedRequest with target: unknown.
Callers building debug UIs / request previews / plan rendering had to cast
target to the adapter's native shape at every read.

Adds PreparedRequestOf<Target> in schema and a generic Target = unknown
parameter on LLMClient.prepare so callers can opt in to a typed view:

  const prepared = yield* client.prepare<OpenAIChatTarget>(request)
  prepared.target.model           // typed
  prepared.target.messages         // typed

The runtime payload is unchanged — the adapter still emits target: unknown
and the consumer asserts the shape they expect from the configured adapter.
The cast lives at the public boundary in adapter.ts; everything else stays
honest about runtime types.

Existing callers without the type argument still get target: unknown and
nothing breaks. Test in openai-chat.test.ts proves the narrowing at the
type level.
2026-05-01 08:12:37 -04:00
Kit Langton
f4de3e801e feat(llm): add LLMEvent.is.* camelCase narrowing helpers
Schema.toTaggedUnion('type') already provides LLMEvent.guards but uses
kebab-case bracket access (LLMEvent.guards['tool-call']). Adds an LLMEvent.is
namespace with camelCase aliases that delegate to the same guards, so
consumers can write events.filter(LLMEvent.is.toolCall) instead of
events.filter(LLMEvent.guards['tool-call']).

Migrated all callsites in src/llm.ts and the two test files for consistency.
LLMEvent.guards / .match / .cases / .isAnyOf remain available for callers
who want the Effect-canonical API.
2026-05-01 08:12:37 -04:00
Kit Langton
a0165b2ae8 docs(llm): fix stale field names in ProtocolID comment and AGENTS.md code example 2026-05-01 08:12:37 -04:00
Kit Langton
9363c70acd simplify(llm): drop redundant auth: "key" from resolvers (it is the default) 2026-05-01 08:12:37 -04:00
Kit Langton
5ec2673af2 simplify(llm): inline resolveAdapter into compile 2026-05-01 08:12:37 -04:00
Kit Langton
8b414cdb5a refactor(llm): collapse ProviderAuth to 'key' | 'none'
After the auth-axis migration, the OpenCode bridge consults this enum
solely to decide whether to read `provider.key` and stamp it on
`model.apiKey`. The bearer / anthropic-api-key / google-api-key
distinctions used to control which header the bridge wrote; that is now
the adapter's Auth axis's job.

Three of four variants were write-only after the migration. Collapse to:

- 'key'  — provider needs an API key
- 'none' — provider does not (e.g. local)

Updated all six provider resolvers and the resolver test fixtures.
2026-05-01 08:12:37 -04:00
Kit Langton
49913ff041 refactor(llm): rename Adapter.define -> Adapter.unsafe; drop Adapter.compose
Two cleanups to make the adapter constructor surface honest about what is
canonical and what is an escape hatch:

- Adapter.compose existed to override pieces of an existing adapter, used
  by OpenAI-compatible Chat before the four-axis migration. After the
  migration nothing references it; OpenAI-compatible Chat composes via
  fromProtocol({ protocol: OpenAIChat.protocol, ... }) instead. Delete
  the function and its ComposeInput type.

- Adapter.define is the lower-level escape hatch for adapters whose
  behavior genuinely cannot fit the Protocol/Endpoint/Auth/Framing model.
  Its name implied it was the canonical entry point. Renamed to
  Adapter.unsafe so the four-axis Adapter.fromProtocol(...) reads as the
  obvious primary path and the escape hatch carries its escape semantics
  in its name.

Updated test fixtures in adapter.test.ts and the AGENTS.md guidance.
2026-05-01 08:12:37 -04:00
Kit Langton
bb7f52b24d refactor(llm): remove ambiguous Adapter provider scoping field
The optional 'provider' field on Adapter / AdapterInput / FromProtocolInput
existed as a registry filter: requests with a different model.provider could
not find adapters that set it. After the four-axis migration no adapter
needed it (and an earlier pass removed it from the five migrated providers
because setting it broke session/llm-native tests).

Drop the field entirely and collapse the registry to a single-tier protocol
lookup. If a future deployment genuinely needs to be scoped (e.g. an
Azure-only OpenAI Responses adapter), reintroduce as 'scopedTo' with an
explicit name. Solve when needed, not before.

Also drops the test that exercised the now-removed two-tier lookup
('prefers provider-specific adapters over protocol fallbacks').
2026-05-01 08:12:37 -04:00
Kit Langton
e7ff19bb5f simplify(llm): stringify endpoint URL once in Adapter.fromProtocol
url.toString() was called twice on the same URL object — once for auth
and once for jsonPost. Convert to string immediately and reuse.
2026-05-01 08:12:37 -04:00
Kit Langton
bb859e2e2c simplify(llm): remove redundant queryParams from OpenAICompatibleChatModelInput
queryParams is now inherited from ModelInput (via ModelRef) after the
typed-field promotion. The explicit re-declaration was dead weight.
2026-05-01 08:12:37 -04:00
Kit Langton
61a18bdbd0 simplify(llm): fix stale model.native.queryParams references in docs
The commit that promoted queryParams to a typed ModelRef field updated
the implementation but left two JSDoc/doc references pointing at the old
model.native.queryParams path.
2026-05-01 08:12:37 -04:00
Kit Langton
f86a6790a2 refactor(llm): move queryParams off model.native to typed field
Promotes queryParams to a first-class ModelRef field used by Endpoint.baseURL,
so deployment-level URL query params (Azure api-version, OpenAI-compatible
provider knobs) live in a typed home instead of an opaque `native` bag.

Also removes write-only dead fields from `native`:

- openaiCompatibleProvider (set by family helper, never read)
- opencodeProviderID, opencodeModelID (set by opencode bridge + native session
  builder, never read)
- npm (set by opencode bridge, never read)

After this commit `model.native` only carries genuinely provider-specific
opaque options that no other adapter cares about (Bedrock's aws_credentials
+ aws_region for SigV4). Drops the now-dead ProviderShared.queryParams
helper. Updates AGENTS.md doc on native is implicit through the new schema
JSDoc.
2026-05-01 08:12:37 -04:00
Kit Langton
d8b9672234 simplify(llm): split Bedrock auth into bearer fast path + sigv4 gen
The two paths are independent: `model.apiKey` produces a synchronous
Bearer auth, while AWS credentials need an effectful sigv4 sign.
Hoist the bearer path out of `Effect.gen` and reuse `Auth.bearer`
directly, keeping the SigV4 path as a focused `Effect.gen` that owns
the credential lookup, signing, and header merge.

Inlines the now single-use `headersForSigning` and `signed` setup.
2026-05-01 08:12:37 -04:00
Kit Langton
042bf6c822 simplify(llm): default Adapter.fromProtocol auth to Auth.bearer
After the apiKey migration, every adapter explicitly specified `auth`,
and three of them (OpenAI Chat, OpenAI Responses, OpenAI-compatible Chat)
all wrote `auth: Auth.bearer`. `Auth.bearer` is a no-op when
`model.apiKey` is unset, so making it the default is strictly safer than
the previous `Auth.passthrough` default — bearer-style adapters drop
their explicit `auth` line, and adapters that need a different scheme
opt out via `Auth.apiKeyHeader(...)` (Anthropic, Gemini) or a custom
`Auth` (Bedrock SigV4 + Bearer).

Update doc comments on `fromProtocol.auth`, `Auth` type, and
`packages/llm/AGENTS.md` to reflect the new default.
2026-05-01 08:12:37 -04:00
Kit Langton
5d08e28cd9 refactor(llm): move auth secret from headers onto ModelRef.apiKey
Add an optional `apiKey` field to `ModelRef` so authentication is no
longer baked into `model.headers` at construction time. Each provider
adapter now passes an `Auth` to `Adapter.fromProtocol` that reads
`request.model.apiKey` per request:

- OpenAI Chat / Responses / OpenAI-compatible Chat: `Auth.bearer`
- Anthropic Messages:  `Auth.apiKeyHeader("x-api-key")`
- Gemini:              `Auth.apiKeyHeader("x-goog-api-key")`
- Bedrock Converse:    custom auth that uses `apiKey` for Bearer auth
                       and falls back to SigV4 with AWS credentials

The `model()` constructors no longer fold the API key into
`model.headers`. The OpenCode bridge sets `apiKey` directly instead of
building auth headers via the now-deleted `authHeader` helper. Test
assertions move from `headers: { authorization: "Bearer ..." }` to
`apiKey: "..."`.
2026-05-01 08:12:37 -04:00
Kit Langton
4f294852a6 simplify(llm): share core between Auth.bearer and Auth.apiKeyHeader
Both helpers had the same shape: read `request.model.apiKey`, no-op if
absent, otherwise merge a one-key header object. Lift that into a tiny
`fromApiKey(from)` helper and define both in terms of it.

The public surface (`Auth.bearer`, `Auth.apiKeyHeader`) is unchanged.
2026-05-01 08:12:36 -04:00
Kit Langton
a676b12b7b fix(llm): keep adapters provider-less by default
Removes the provider field from the five migrated Adapter.fromProtocol
calls. Setting provider scopes the adapter in the registry so requests
must use the same provider id, which broke session/llm-native tests
that build models with provider 'amazon-bedrock' against the
bedrock-converse adapter.

Adapters should stay protocol-only by default and only set provider
when the deployment is genuinely scoped (e.g. an Azure-only adapter
that does not work for native OpenAI). Restoring the original
protocol-only registration.
2026-05-01 08:12:36 -04:00
Kit Langton
31740c1d36 simplify(llm): inline single-use DEFAULT_BASE_URL / defaultBaseURL constants
Per style guide, single-use values should be inlined. Each adapter had
a module-private constant used exactly once in its Adapter.fromProtocol
call. Inlining removes 5 named constants (4 DEFAULT_BASE_URL + 1
defaultBaseURL + ANTHROPIC_VERSION) without loss of clarity — the
string literal appears at the point of use.
2026-05-01 08:12:36 -04:00
Kit Langton
9928917899 simplify(llm): remove dead ProviderShared.sse and withQuery helpers
After migration to Adapter.fromProtocol, the sse() convenience wrapper
and withQuery() URL builder are no longer called anywhere — Framing.sse
and Endpoint.baseURL handle their responsibilities directly. Also
inlines two exported-but-unused test constants (helloPrompt,
weatherPrompt) per style guide.
2026-05-01 08:12:36 -04:00
Kit Langton
98cb886faf docs(llm): document Protocol/Endpoint/Auth/Framing architecture
Updates the AGENTS.md adapter section to describe the four orthogonal
axes that make up an adapter today (Protocol + Endpoint + Auth + Framing)
and the canonical Adapter.fromProtocol composition. Adds a folder layout
overview so the dependency direction (provider/* imports protocol/auth/
endpoint/framing, never the other way) is visible.
2026-05-01 08:12:36 -04:00
Kit Langton
bdd01cad33 refactor(llm): migrate remaining adapters to fromProtocol
Extracts a Protocol implementation per provider and wires the adapter
through Adapter.fromProtocol with explicit Endpoint, Auth, and Framing:

- OpenAI Responses — Endpoint.baseURL with /responses path.
- Anthropic Messages — adds anthropic-version header via the headers slot.
- Gemini — endpoint embeds the model id and pins ?alt=sse at the URL level.
- Bedrock Converse — keeps SigV4-or-Bearer auth as a typed Auth function;
  AWS event-stream framing is a typed Framing value alongside the protocol;
  Endpoint.baseURL gains a function-typed default so the URL host can carry
  the per-request region.

Recorded replay byte-identical across all six adapters; full provider
suite 83 pass, full llm suite 122 pass, opencode typecheck clean.
2026-05-01 08:12:36 -04:00
Kit Langton
6ed160ae02 refactor(llm): migrate OpenAI Chat adapters to fromProtocol
Extracts OpenAIChat.protocol so that:

- openai-chat is now a four-line Adapter.fromProtocol composition over
  the protocol, the OpenAI base URL, default passthrough auth, and SSE
  framing.
- openai-compatible-chat reuses OpenAIChat.protocol verbatim. The whole
  adapter is one Adapter.fromProtocol call that pins protocolId to
  openai-compatible-chat and requires a caller-supplied baseURL.

Bug fixes in OpenAIChat.protocol now propagate to DeepSeek, TogetherAI,
Cerebras, Baseten, Fireworks, DeepInfra, and any future OpenAI-compatible
deployment without touching their files. Recorded replay byte-identical.
2026-05-01 08:12:36 -04:00
Kit Langton
7505da95d3 feat(llm): add Protocol, Endpoint, Auth, Framing primitives
Introduces the four orthogonal axes that an LLM adapter is composed of:

- Protocol — semantic API contract (lowering, validation, encoding,
  parsing). Examples: OpenAI Chat, Anthropic Messages, Bedrock Converse.
- Endpoint — URL construction (baseURL + path + query params).
- Auth — per-request transport authentication. Defaults to passthrough
  for adapters whose auth header is baked into model.headers.
- Framing — byte stream to frames (SSE today; AWS event stream next).

Adds Adapter.fromProtocol(...) which composes these into the existing
AdapterDefinition shape so LLMClient.make(...) and the runtime registry
do not change. Existing adapters keep working through Adapter.define
until they migrate one at a time.
2026-05-01 08:12:36 -04:00
Kit Langton
6099b3dfe9 refactor(llm): rename Protocol type to ProtocolID
Frees up the Protocol name for the upcoming Protocol implementation type
that owns request lowering, target validation, and stream parsing as a
single composable unit. Field names on ModelRef and Adapter stay as
'protocol' since they carry the string discriminator value.
2026-05-01 08:12:36 -04:00
Kit Langton
20bab34b01 test(llm): share recorded provider scenarios 2026-05-01 08:12:36 -04:00
Kit Langton
cd7487a73b test(llm): add focused recorded test filters 2026-05-01 08:12:36 -04:00
Kit Langton
a921eb88e6 test(opencode): cover Azure native request mapping 2026-05-01 08:12:36 -04:00
Kit Langton
f2f7a338de feat(llm): resolve Azure provider natively 2026-05-01 08:12:36 -04:00
Kit Langton
7141036ec4 refactor(llm): simplify provider resolver defaults 2026-05-01 08:12:36 -04:00
Kit Langton
b0be03facd refactor(llm): clarify provider resolution 2026-05-01 08:12:35 -04:00
Kit Langton
1cd53b27ec chore(llm): clean up PR docs 2026-05-01 08:12:35 -04:00
Kit Langton
59f39a922f chore(opencode): drop local LLM adapter spec from branch 2026-05-01 08:12:35 -04:00
Kit Langton
7fba0efbd9 fix(opencode): update native LLM imports after rebase 2026-05-01 08:12:35 -04:00
Kit Langton
0e558e13c7 feat(opencode): populate nativeTools from prompt.ts so production sessions can route through the native path (audit gap #4 phase 2 step 3)
Wires the prompt-side tool resolver to also surface opencode-native
`Tool.Def[]` alongside the AI SDK record it already builds. With
`OPENCODE_EXPERIMENTAL_LLM_NATIVE=1` set, real production sessions
that satisfy the gate now stream through `LLMNativeTools.runWithTools`
instead of `streamText` — the LLM-native path goes from
"plumbing-only" to "actually used."

Changes:

- `prompt.ts:resolveTools` collects `Tool.Def[]` from the registry
  loop and tracks a feasibility flag. MCP tools (which only have AI
  SDK shape) flip the flag off; the synthesized `StructuredOutput`
  tool that the json_schema branch injects also flips it. The return
  shape becomes `{ tools, nativeTools }` where `nativeTools` is
  `undefined` whenever any non-registry tool source contributes —
  callers fall through to the AI SDK path automatically. The
  registry path stays in sync because every `tools[item.id] =
  tool({...})` is paired with a `nativeTools.push(item)` at the same
  loop iteration.

- The single caller (`prompt.ts:1396`) destructures the new shape
  and passes `nativeTools` through to `handle.process(...)`. The
  json_schema branch sets `nativeTools = undefined` after injecting
  `StructuredOutput` so the gate falls through for structured-output
  sessions.

- `runNative` (in `session/llm.ts`) gains two safety nets that work
  regardless of caller behavior:

    1. Coverage check: if AI SDK tools are non-empty, every key must
       have a matching `Tool.Def` in `nativeTools`. A partial set
       falls through. Defends against future callers that might
       emit a partial native list.

    2. Filter parity: `runNative` now calls the existing
       `resolveTools(input)` (the in-file permission/user-disabled
       filter) and intersects its keys with `nativeTools`, then
       feeds the filtered AI SDK record to the dispatcher and the
       filtered native list to `LLMNative.request`. Without this,
       sessions could see permission-disabled tools advertised on
       one path but not the other.

- The dispatch path uses the filtered AI SDK tools record as the
  execute table: `LLMNativeTools.runWithTools({ tools:
  filteredAITools, ... })`. Tool definitions sent to the model are
  the filtered native list. Every tool the model sees can dispatch.

What this enables: a session opted into the experimental flag, with
a clean toolset (registry-only, no MCP, no structured output),
running an Anthropic model, now exercises the streaming-dispatch
loop end-to-end. Tool calls fire as soon as the model finishes
streaming each tool's input; results land in the stream the moment
each handler resolves. Multi-round behavior matches phase 2 step 2b.

What this still does NOT do (deferred to step 4):

- Parity test harness comparing native vs AI SDK event sequences for
  the same scripted session. Until that lands, broader confidence
  comes from running real sessions with the flag set.
- MCP support on the native path. Sessions with MCP servers
  configured stay on AI SDK indefinitely.
- Native support for the synthesized `StructuredOutput` tool.

Verification: opencode typecheck clean for `src/session/*` (the
TUI-side errors visible in the working tree are Kit's parallel
work, untouched here); bridge area tests 36/0/0 across
`llm-native.test.ts` + `llm-native-stream.test.ts` +
`llm-bridge.test.ts`; `prompt.test.ts` still 47/0/0 (no regression
from the resolveTools shape change).
2026-05-01 08:12:35 -04:00
Kit Langton
afa57acfda refactor(llm): extract HTTP recorder package 2026-05-01 08:12:35 -04:00
Kit Langton
189161ed62 feat(opencode): streaming tool dispatch and multi-round loop on the native path (audit gap #4 phase 2 step 2b)
Lands the streaming-dispatch tool loop for the LLM-native path. When
the gate-passing session has `nativeTools` populated, the native
runner forks an AI SDK `tool.execute(...)` the moment a `tool-call`
event arrives mid-stream and injects a synthetic `tool-result` event
back into the same stream when the handler resolves. Long-running
tools no longer block subsequent tool-call streaming; the user sees
each result land as soon as that specific handler completes.

The driver loops across rounds: when a round ends with `reason:
"tool-calls"` AND the dispatchers produced at least one result, the
runner builds a continuation `LLMRequest` (assistant message echoing
text/reasoning/tool-call content + tool messages with results) and
recurses. Stops on a non-`tool-calls` finish, when `maxSteps`
(default 10, mirrors `ToolRuntime.run`) is reached, or when the
underlying scope is interrupted.

New file `session/llm-native-tools.ts`:

- `runWithTools({ client, request, tools, abort, maxSteps? })` is the
  public entry point. Returns a `Stream<LLMEvent, LLMError,
  RequestExecutor.Service>` of merged model events + synthetic tool
  results, ready to flow through `LLMNativeEvents.mapper` for
  consumption by the existing session processor.
- `runOneRound` is the internal building block. It opens an unbounded
  `Queue<LLMEvent, LLMError | Cause.Done>`, forks a producer that
  streams the model and pushes each event to the queue, and forks a
  dispatcher (via a scope-bound `FiberSet`) for every
  non-provider-executed `tool-call`. Each dispatcher's result is
  pushed back into the same queue. After the model stream completes,
  the producer awaits `FiberSet.awaitEmpty` and ends the queue;
  consumers see end-of-stream. A `Deferred<RoundState>` resolves
  alongside so the multi-round driver can decide whether to recurse.
- `dispatchTool` wraps the AI SDK `tool.execute(input, { toolCallId,
  messages, abortSignal })` call. Unknown-tool and execute-throws
  paths produce `tool-error` events instead of failing the stream
  (mirrors `ToolRuntime.run`'s defect-vs-recoverable boundary), so
  the model can self-correct on the next round.

Wired into `runNative` (`session/llm.ts`): when `input.nativeTools`
is non-empty, the upstream becomes `LLMNativeTools.runWithTools(...)`
instead of `nativeClient.stream(...)`; the AI SDK `tools` record
flows in as the dispatch table. Zero-tool sessions still take the
direct-stream path (one round, no dispatch overhead).

Mapper update (`session/llm-native-events.ts`): `tool-result` events
whose `result.value` matches the opencode `Tool.ExecuteResult` shape
(`{ output: string, title?: string, metadata?: object }`) now flow
through to the AI-SDK-shaped session event with their `title` and
`metadata` preserved. Provider-executed and synthetic results that
don't match still fall back to `stringifyResult`. Without this, the
session processor would see every native tool result as
`{ title: "", metadata: {}, output: <JSON of the whole record> }`.

Smoke test (`test/session/llm-native-stream.test.ts`): scripts a
two-round Anthropic SSE backend — round 1 issues a `lookup` tool
call, round 2 replies with text after the tool result feeds back.
Asserts the full event sequence threads through `runWithTools`,
the dispatcher, and the mapper:

- `tool-call` event has the streamed JSON input parsed.
- `tool-result` event carries the `ExecuteResult` shape with
  `title` + `output` populated (proving the mapper update works).
- Round 2 text-delta arrives after the synthetic tool-result.
- Final `finish` event has `finishReason: "stop"` (loop terminated).

What this still does NOT do (deferred to step 3):

- No production caller populates `nativeTools` yet; that's the
  `prompt.ts:resolveTools` change. Until that lands, the gate keeps
  every real session on the AI SDK path.
- No parity harness comparing native + AI SDK event sequences for
  the same scripted session. That's step 4.

Verification: opencode typecheck clean; 36/0/0 across the three
bridge-area tests; 125/0/0 across the LLM package.
2026-05-01 08:12:35 -04:00
Kit Langton
fa8f7a1dca feat(opencode): plumb nativeTools through StreamInput (audit gap #4 phase 2 step 2a)
Adds opt-in `nativeTools?: ReadonlyArray<Tool.Def>` to `LLM.StreamInput`
so callers that route through the native path can attach typed
opencode tool definitions alongside the AI SDK `tools` record. The
gate in `runNative` widens accordingly: a session can use the native
path when it has zero tools (existing behavior) OR when it explicitly
provides `nativeTools` matching its AI SDK `tools` (new opt-in). When
`nativeTools` reaches `LLMNative.request`, the existing
`toolDefinition` converter folds each `Tool.Def` into the request's
`tools` array and the LLM core lowers it onto the wire.

This commit deliberately does NOT include the dispatch loop. A
session that opts in by setting `nativeTools` and that triggers a
`tool-call` from the model will see the call event but no
`tool-result` because the native path has no execute handler yet.
That's why no production caller populates `nativeTools`: phase 2
step 2b will land the dispatch loop and only then will real
production sessions route through here.

What this lays in place:

- `StreamInput.nativeTools` typed against `Tool.Def[]` from `@/tool`.
  Aliased to `OpenCodeTool` at the import to dodge a clash with the
  AI SDK `Tool` type that the same file already imports.
- The `runNative` gate flips from "no tools allowed" to "either no
  tools, or `nativeTools` is supplied". An AI SDK tool count > 0
  with `nativeTools` undefined still falls through, so existing
  production sessions are unaffected.
- `LLMNative.request` already accepted `tools: ReadonlyArray<Tool.Def>`
  and converts via `toolDefinition`. We just forward the input
  through; no LLM-bridge change.

Smoke coverage: a new test in `llm-native-stream.test.ts` builds a
typed `Tool.Def` (Effect Schema parameters), routes it through
`LLMNative.request` + `LLMClient.prepare`, and asserts the prepared
Anthropic target carries the tool as an `input_schema` block with
the expected JSON Schema shape. This validates the conversion path
that phase 2 step 2b will exercise from inside `runNative`.

Verification: opencode typecheck clean; 35/0/0 across the three
bridge-area tests (`llm-native.test.ts`, `llm-native-stream.test.ts`,
`llm-bridge.test.ts`).
2026-05-01 08:12:35 -04:00
Kit Langton
afba37d330 test(opencode): smoke test for LLM-native stream wire-up (audit gap #4 phase 2)
Adds `test/session/llm-native-stream.test.ts` — one focused test that
proves the end-to-end wire-up `runNative` relies on actually produces
session events from a scripted Anthropic SSE response.

The test stays self-contained:

- Builds a fake Anthropic `Provider.Info` + `Provider.Model` via
  `ProviderTest`.
- Builds an `LLMRequest` via `LLMNative.request(...)` from a
  `MessageV2.WithParts` user message — the same call shape `runNative`
  uses inside `session/llm.ts`.
- Creates an `LLMClient` with the same adapters list + `ProviderPatch.defaults`
  list as `runNative`. The adapters are imported directly from
  `@opencode-ai/llm`; if `runNative`'s `NATIVE_ADAPTERS` array changes,
  this test's `adapters` constant has to follow (commented).
- Provides a single fixed-response HTTP layer that returns a scripted
  Anthropic SSE body. The layer helper is inlined (12 lines) rather
  than imported from `packages/llm/test/lib/http.ts` so the test
  doesn't reach across package boundaries.
- Pipes the LLM stream through `LLMNativeEvents.mapper()` exactly as
  `runNative` does (`Stream.flatMap` + lazy `Stream.concat` for
  flush), runs it to completion, and asserts the key session events:
  `text-start` precedes `text-delta`, `finish-step` carries
  `finishReason: "stop"`, and `finish` carries the merged usage totals.

This does NOT test the dispatch gate inside `session/llm.ts`
(`!Flag.OPENCODE_EXPERIMENTAL_LLM_NATIVE`, missing `nativeMessages`,
tools present, non-Anthropic protocol). Those are simple boolean
conditions and don't need separate coverage. It also does not exercise
the production `Service` layer — that's deferred to Phase 2 step 2
(tool support) and Phase 2 step 3 (production caller wiring).

What the test buys: confidence that the conversion pipeline works and
catches regressions in `LLMNative.request`, the LLM adapter set, or
`LLMNativeEvents.mapper` before they would surface in a real session.

Verification: 34/0/0 across the three bridge-area tests
(`llm-native.test.ts` + `llm-native-stream.test.ts` +
`llm-bridge.test.ts`); opencode typecheck clean.
2026-05-01 08:12:35 -04:00
Kit Langton
fc3a1bfd34 feat(opencode): wire LLM-native stream path behind opt-in flag (audit gap #4 phase 1)
Adds the parallel `runNative()` path inside `session/llm.ts` so a narrow
slice of sessions can flow through `@opencode-ai/llm` instead of the AI
SDK `streamText`. Behavior is gated and shipped off by default; only
callers that opt in see any difference.

The full migration plan (audit gap #4) is parallel-path-with-flag,
prove parity test-by-test, flip default last. This commit is phase 1:
get the wire-up in place behind a flag with one protocol so we can see
whether the design holds before committing to the full migration.

Wire-up summary:

- New flag `OPENCODE_EXPERIMENTAL_LLM_NATIVE` (also enabled by the
  umbrella `OPENCODE_EXPERIMENTAL`). Off by default.
- The session-LLM `live` layer now consumes `RequestExecutor.Service`,
  and the `defaultLayer` provides `RequestExecutor.defaultLayer` so a
  Node fetch HTTP client backs every native stream.
- `runNative(input)` returns `Stream<Event> | undefined`. `undefined`
  means "fall through to AI SDK." It returns a real stream only when
  every gate passes: the flag is set, the caller populated
  `input.nativeMessages` (the bridge needs typed `MessageV2.WithParts`,
  not the AI SDK `messages` array), the session has zero tools (Phase
  2 will lift this), and the bridge routes the model to a protocol in
  `NATIVE_PROTOCOLS`.
- `NATIVE_PROTOCOLS` is a single-entry set today: `anthropic-messages`.
  Other adapters are imported and registered with the client so the
  Phase 2 expansion is a one-line edit, not an architecture change.
- Stream wiring: client.stream(req) -> Stream.flatMap(event ->
  fromIterable(map.map(event))) -> Stream.concat(suspended
  fromIterable(map.flush())) -> Stream.provideService(
  RequestExecutor.Service, executor). The flush stream is built lazily
  with `Stream.unwrap(Effect.sync(...))` so it observes the mapper
  final state after every upstream event has been mapped.
- The mapper (`LLMNativeEvents.mapper`) emits AI-SDK-shaped session
  events from `LLMEvent` so downstream consumers see one shape.

What this does NOT do (deferred to later phases):

- No tool support on the native path (skipped, falls through).
- No parity harness yet; Phase 2 builds it.
- No production traffic; flag is off by default and no production
  caller populates `nativeMessages`.
- No reasoning/cache/multi-modal coverage. Anthropic supports reasoning
  and cache via existing patches, so those start working as soon as a
  caller routes a real session through.

Verification: opencode typecheck clean, bridge tests still green
(33/0/0 across llm-native.test.ts + llm-bridge.test.ts); LLM package
tests green (123/0/0).
2026-05-01 08:12:35 -04:00
Kit Langton
0ba8ca63b6 refactor(llm): Bedrock JSON-codec compliance, signing-headers cleanup, and small dedup
Five review findings; all small, all independent.

H2: Bedrock used raw `JSON.parse` and `JSON.stringify` despite the
package rule against ad-hoc JSON encoders. The in-loop parse on each
event-stream frame goes through `ProviderShared.parseJson` (yielded
inside `Effect.gen`); the `decodeChunk` error fallback uses
`ProviderShared.encodeJson` instead of `JSON.stringify` for the raw
field on `ProviderChunkError`. No behavior change — just channels
JSON through the shared Schema-driven codec.

H3: `BedrockConverse.toHttp` built a `baseHeaders` record with
`content-type: application/json` and passed it through both auth
paths. The bearer path called `jsonPost` with the raw model headers
(no manual content-type), the SigV4 path used `baseHeaders` plus the
signed result. Two paths produced subtly different header sets and
both relied on `jsonPost` overwriting/adding the same content-type
key. Simplify: drop the unused bearer-side construction; rename the
SigV4 input to `headersForSigning` and document why content-type
must be present at signing time (signature covers it).

M4: Lift `isRecord` from `gemini.ts` into `ProviderShared.isRecord`
so adapters share one definition. The duplicates in `llm.ts` (LLM IR
layer) and `llm-native.ts` (OpenCode bridge) stay where they are —
those are at different layers and importing from `provider/` would
invert the dependency direction. Net effect: the provider layer
goes from 2 copies to 1.

L8: `TransportError` lost everything but the message string.
Surface the originating reason tag (`Timeout` / `TransportError` /
`ResponseError` / `RequestError`) and the request URL when
available, both as optional Schema fields. Consumers that don't
care keep getting the same `message` rendering; consumers that do
can finally render "timed out connecting to https://..." instead
of "HTTP transport failed".

M9 + L3: Two dead branches. Anthropic's `processChunk` had
`?? ""` fallbacks for `partial_json` after an early-return guard
already proved it non-empty. OpenAI Chat's `mapFinishReason` had
`if (reason === undefined || reason === null) return "unknown"`
followed by `return "unknown"` — both branches went to the same
place. Drop the unreachable code.

120 LLM-package tests + 33 OpenCode bridge tests still green.
2026-05-01 08:12:35 -04:00
Kit Langton
38af0dc6f8 refactor(llm): centralize codec scaffolding, ToolAccumulator, and totalTokens policy
Three review findings collapsed into one ProviderShared pass.

M1: Five adapters duplicated the same six-line block:

    const ChunkJson = Schema.fromJsonString(Chunk)
    const TargetJson = Schema.fromJsonString(Target)
    const decodeChunkSync = Schema.decodeUnknownSync(ChunkJson)
    const encodeTarget = Schema.encodeSync(TargetJson)
    const decodeTarget = Schema.decodeUnknownEffect(Draft.pipe(Schema.decodeTo(Target)))
    const decodeChunk = (data) => Effect.try({...chunkError(...)})

Lift it into `ProviderShared.codecs({ adapter, draft, target, chunk,
chunkErrorMessage })` returning `{ encodeTarget, decodeTarget,
decodeChunk }`. The result drops directly into `Adapter.define`'s
`validate` field (uses `validateWith` internally to map parse errors
to InvalidRequestError). Adopted in OpenAI Chat, OpenAI Responses,
Anthropic Messages, and Gemini. Bedrock has a custom event-stream
`decodeChunk` that takes `unknown` (not `string`) so it keeps its
inline codecs.

M2: Four adapters defined an identical `ToolAccumulator` interface
(`{ readonly id: string; readonly name: string; readonly input:
string }`). Lift to `ProviderShared.ToolAccumulator`. Anthropic
extends it locally with `providerExecuted` for hosted tools.

M3: The five `mapUsage` implementations had subtly different
`totalTokens` policies — OpenAI Chat passed through whatever the
provider sent, OpenAI Responses unconditionally summed inputs and
output (publishing `totalTokens: 0` when both were `undefined`),
Anthropic and Gemini guarded with conditionals, Bedrock used a
`(...) || undefined` falsy fallback. Add `ProviderShared.totalTokens`
with one rule: prefer provider-supplied total, else sum inputs and
outputs only when at least one is defined, else `undefined`. Fixes
the OpenAI Responses `totalTokens: 0` bug.

M6: Anthropic's `mergeUsage` recomputed `totalTokens` from the merged
input/output via two nested ?? chains and a conditional sum.
Simplified to use the same totalTokens helper, with `inputTokens` and
`outputTokens` extracted as locals so the merge is one ?? per field
and the comment explains why merging exists (Anthropic emits usage
on `message_start` and `message_delta`).

No behavior changes other than the OpenAI Responses fix; existing
tests pass unchanged. 120 LLM-package tests + 33 OpenCode bridge
tests green.
2026-05-01 08:12:35 -04:00
Kit Langton
8bbbceef92 fix(llm): unify apiKey precedence and consolidate Gemini schema conversion
Two issues from the review of the LLM package's six adapters.

H1: Inconsistent apiKey precedence. Five of six adapters spread the
caller's headers first then set the auth header (apiKey wins), but
`OpenAICompatibleChat.model` did the opposite (caller headers won).
That meant a user passing both `apiKey` and `headers.authorization`
would get auth from a different source depending on which adapter
they routed through. Flip the OpenAI-compatible adapter to match the
rest, and add a comment documenting the rule: apiKey wins, callers
who want their own auth header should omit `apiKey` entirely.

H4: Gemini tool-schema sanitization was split across two functions
that both ran on every Gemini request — `convertJsonSchema` in the
adapter (lossy projection: drop empty objects, derive nullable from
type-array, allowlist of preserved keys, recursive properties/items)
and `sanitizeGeminiSchemaNode` registered as a default `tool-schema`
patch (fix-up: integer enums to strings, dangling required filtering,
untyped array typing, scalar property stripping). Both passes only
ran on Gemini models; debugging a tool schema rejection meant
checking both files.

Fold the patch's rules into the adapter as `sanitizeToolSchemaNode`,
running before the existing projection step (renamed
`projectToolSchemaNode`). Compose them in `convertToolSchema` and use
that in `lowerTool`. Delete the patch from `provider/patch.ts` and
`ProviderPatch.defaults`. The behavior is unchanged — same input,
same output — but the rules now live in one file with a header
comment explaining the two concerns.

The matching test in `gemini.test.ts` no longer needs to opt into a
patch list; it now asserts the adapter alone produces the sanitized
shape.
2026-05-01 08:12:35 -04:00
Kit Langton
d00db17902 feat(opencode): add native LLM event bridge 2026-05-01 08:12:35 -04:00
Kit Langton
f59996362e feat(opencode): round-trip encrypted reasoning content through the bridge
Closes audit gap #3. The bridge now extracts the encrypted reasoning
blob from `MessageV2.ReasoningPart.metadata` and surfaces it on
`LLM.ReasoningPart.encrypted`, where the Anthropic and Bedrock
adapters lower it to the wire — Anthropic emits `thinking.signature`,
Bedrock emits `reasoningContent.reasoningText.signature`. Without
this, multi-turn sessions with reasoning models would lose the
encrypted state on every step and break the chain.

The encrypted blob originates in three different places depending on
how the session was started:

1. AI-SDK Anthropic sessions store it as
   `metadata.anthropic.signature` (per AI SDK provider-keyed
   convention).
2. AI-SDK OpenAI sessions store it as
   `metadata.openai.reasoningEncryptedContent`.
3. Future LLM-native sessions will store it as a top-level
   `metadata.encrypted` string (cleanest shape — provider-agnostic,
   matches the LLM IR field name).

The new `encryptedReasoning` helper probes all three locations in
order, so existing OpenCode sessions can be served by the LLM-native
path without re-recording reasoning content. The full `metadata`
record continues to flow through to `LLM.ReasoningPart.metadata`
unchanged, preserving any provider-specific fields adapters might
read in the future.

OpenAI Responses encrypted reasoning round-trip is intentionally out
of scope: the LLM-package adapter doesn't yet model reasoning items
in the request body. That's a separate adapter feature requiring new
input-item schema variants and is deferred until needed.

Tests (5 new in llm-native.test.ts):
- AI-SDK Anthropic signature extracted into LLM.ReasoningPart.encrypted.
- End-to-end Anthropic lowering: bridge \u2192 client.prepare \u2192 target with
  `thinking.signature` populated correctly.
- AI-SDK OpenAI reasoningEncryptedContent extracted (forward
  compatibility — useful when the OpenAI Responses adapter gains
  reasoning-item lowering).
- Top-level metadata.encrypted extracted (LLM-native session shape).
- No known key in metadata leaves `encrypted` undefined.

Verified: 33/0/0 across native + bridge tests (was 28; +5 from the
new reasoning extraction tests).
2026-05-01 08:12:35 -04:00
Kit Langton
b653261772 feat(opencode): bridge user FilePart to LLM MediaPart for vision input
Closes audit gap #2 (FilePart \u2192 MediaPart not implemented).

The bridge now lowers `MessageV2.FilePart` on user messages into
`LLM.MediaPart`, unblocking image and document inputs. The first
pass supports `data:` URLs only — the inline base64 form most
commonly produced by the OpenCode UI for pasted screenshots and
attached files. `http(s):` and `file:` URLs are explicitly
rejected with a clear error so a future fetch / filesystem-read
path can plug in cleanly without regressing safety.

Implementation:
- New `lowerFilePart` helper extracts the base64 payload from a
  data URL via a single regex; failure yields a typed
  `UnsupportedContentError` carrying both the partType and a
  `reason` that includes the offending URL for debuggability.
- New `lowerUserPart` dispatches user-side parts: text \u2192
  `LLM.text`, file \u2192 `MediaPart`. Returns identity-empty
  for any unsupported part type the static gate would have caught.
- `userMessage` is now `Effect.fnUntraced` so file conversion can
  yield typed errors. `lowerMessage` (the per-message dispatcher,
  renamed from `messages` to free the local name) cascades the
  Effect through the request flow via `Effect.forEach`.
- `supportsPart` static gate now allows `file` parts on user
  messages. Assistant messages still reject file parts (the LLM
  IR's MediaPart isn't valid in assistant content for any
  adapter we ship today).
- `UnsupportedContentError` gains an optional `reason` field that
  appends to the canonical message as `<base>: <reason>`. Existing
  static-gate failures keep the same shape (no reason).

Tests (3 new, 1 rewritten):
- Image data URL with filename round-trips to MediaPart with
  base64-stripped data.
- PDF data URL preserves filename and base64 payload.
- `https:` URL rejected with an error mentioning both the file
  partType, the message ID, and the offending URL.
- The pre-existing "fails instead of dropping unsupported native
  parts" test now uses a reasoning part on a user message
  (reasoning is valid for assistants only) since file parts with
  data URLs are no longer rejected by the static gate.

Out of scope, intentional follow-ups:
- HTTP/HTTPS URL fetching (would need HttpClient.HttpClient and a
  decision on caching, retries, size limits).
- File path / file:// URL reading (would need FileSystem.FileSystem
  and a permission check against the session's working directory).
- File parts on assistant messages (LLM IR doesn't model
  assistant-side media; defer until we hit a provider that needs it).
- text/plain and application/x-directory file parts that the
  AI-SDK path converts to text inline at message-v2.ts:791 — for
  the bridge, those should be converted upstream before reaching
  LLMNative.request rather than handled here.

Verified: bun typecheck clean, 28/0/0 across native + bridge
tests (was 21; +7 from the FilePart additions plus the rewritten
unsupported-parts test).
2026-05-01 08:12:34 -04:00
Kit Langton
5f08d6cbd6 feat(llm): cachePromptHints patch with first-2 system / last-2 messages policy
Lift the prompt-cache policy out of OpenCode's bridge and into the
LLM package as a typed, gated patch. The policy mirrors the AI-SDK
applyCaching path (packages/opencode/src/provider/transform.ts:229):
mark the first 2 system parts and the last 2 messages with an
ephemeral cache hint, gated on `model.capabilities.cache.prompt`.

Adapters lower the hint structurally — Anthropic emits
`cache_control: { type: "ephemeral" }` on the marked block,
Bedrock emits a positional `cachePoint: { type: "default" }`
after the marked block (added in 9d7d518ac). The capability gate
keeps non-cache adapters (OpenAI Responses, Gemini, OpenAI-compat
Chat) hint-free.

Why a Patch and not bridge code:
- packages/llm/AGENTS.md TODO explicitly calls for cache hint patches
- Other consumers of @opencode-ai/llm get caching for free
- The bridge stays focused on shape conversion (MessageV2 \u2192 LLMRequest)
- Patches compose via ProviderPatch.defaults (now includes this one)
- The capability gate is a typed predicate, not provider-name matching

Implementation:
- New `cachePromptHints` patch in provider/patch.ts. The
  `withCacheOnLastText` helper uses Array.findLastIndex (codebase
  idiom) and short-circuits when no text part exists so messages
  with only tool-result content are returned identity-equal.
- `EPHEMERAL_CACHE` is a single shared CacheHint instance — no
  per-request allocation, preserves `instanceof` for any consumer
  that checks class identity.
- Added to `ProviderPatch.defaults` so existing callers that pass
  `defaults` get cache support automatically.

Tests (5 new in patch.test.ts):
- Marks first 2 system parts on cache-capable models.
- Marks last text part of last 2 messages.
- Targets the last text part when a message has trailing
  non-text content (assistant text + tool-call).
- Returns content unchanged (identity-equal) when no text part
  exists, so pure tool-result messages don't allocate.
- No-op when the model does not advertise prompt caching.

Bridge cleanup:
- Removed `applyCachePolicy`, `withCacheOnLastText`,
  `updateMessageContent`, `EPHEMERAL_CACHE` from llm-native.ts
  (-30 lines of bridge-side cache code).
- Dropped now-unused `CacheHint`, `LLMRequest`, `Message` imports.
- The bridge's only responsibility is now MessageV2 lowering;
  callers wire `patches: ProviderPatch.defaults` at client
  construction.

OpenCode tests rewritten:
- Old: assert on `request.system[N].cache` (bridge internals).
- New: assert on `prepared.target` after running through
  `LLMClient.make({ adapters, patches: ProviderPatch.defaults })
  .prepare(request)` — verifies the full lowering end-to-end.
- Anthropic: target.system[0..1] carry `cache_control: ephemeral`,
  target.messages[1..2] carry it on the final text block.
- Bedrock: target has `cachePoint` markers after each cached block.
- Non-cache (OpenAI Responses): JSON.stringify(target) contains
  none of `cache_control` / `cachePoint` / `ephemeral`.

Verified: bun typecheck clean across both packages, 120/0/0 in LLM
package (was 113; +7 from new patch tests counting parameter
variations), 21/0/0 in OpenCode native+bridge tests.
2026-05-01 08:12:34 -04:00
Kit Langton
3cd13c87c4 refactor(llm): standardize native request APIs 2026-05-01 08:12:34 -04:00
Kit Langton
653a830cf6 refactor(llm): clarify tool definition API 2026-05-01 08:12:34 -04:00
Kit Langton
33ef3b01f8 test(opencode): cover native Gemini parity 2026-05-01 08:12:34 -04:00
Kit Langton
a26f2c905f test(opencode): cover native OpenAI-compatible parity 2026-05-01 08:12:34 -04:00
Kit Langton
03a97a64a3 chore(llm): fix low-hanging lint warnings 2026-05-01 08:12:34 -04:00
Kit Langton
096c305a55 feat(llm): Bedrock Converse cache hints, image, and document blocks
Close the parity gaps deferred from the original Bedrock pass.

Schema additions on the Converse target:
- BedrockImageBlock for { image: { format, source: { bytes } } }.
  Supported formats per Converse docs: png, jpeg, gif, webp.
- BedrockDocumentBlock for { document: { format, name, source: { bytes } } }.
  Supported formats: pdf, csv, doc, docx, xls, xlsx, html, txt, md.
- BedrockCachePointBlock for the positional { cachePoint: { type } }
  marker. Currently emits the only Bedrock cache type, 'default'. A
  TODO marks where to map ttlSeconds → ttl ('5m' | '1h') once we have
  a recorded cassette to validate the wire shape.

Lowering:
- TextPart and SystemPart cache hints emit a positional cachePoint
  marker right after their text block. Both 'ephemeral' and
  'persistent' CacheHint types map onto Bedrock's 'default' since
  Bedrock does not distinguish — this matches the convention the
  Anthropic adapter uses (cache?.type === 'ephemeral' check).
- MediaPart routes by mediaType: 'image/*' → image block, everything
  else → document block. MIME type → format mapping is via
  IMAGE_FORMATS / DOCUMENT_FORMATS records typed with 'as const
  satisfies' so the keys stay narrow at compile time.
- A small textWithCache helper collapses the 'push text, push
  cachePoint if cache is set' pattern that would otherwise repeat at
  three callsites (system, user-text, assistant-text).
- Bytes are encoded via ProviderShared.mediaBytes — the shared
  helper Kit landed in c3346f7dc.

Bug fix: lowerSystem was dead code in the previous draft. The
prepare() function still inlined the pre-cache .map(...) that
discarded system cache hints. prepare() now calls lowerSystem so
the cachePoint markers actually flow through.

Tests (7 new fixtures, all green):
- Cache hint on system / user-text / assistant-text emits cachePoint
  after text in each context.
- No cache hint → no cachePoint emitted (regression guard).
- Image lowering covers png / jpeg / jpg-alias / webp.
- Uint8Array image bytes are base64-encoded ([1,2,3,4,5] → AQIDBAU=).
- Document lowering with filename round-trip and missing-filename
  fallback to 'document.<format>'.
- Unsupported image MIME (image/svg+xml) is rejected with a clear
  error message.
- Unsupported document MIME (application/x-tar) is rejected with a
  clear error message.

Recorded cassettes for cache hints, images, and documents are still
TODO — the wire shapes are exercised deterministically here and will
be validated against a live model in a follow-up cassette pass.

Verified: bun typecheck clean, 113 pass / 0 fail / 0 skip (was 106;
+7 from the new fixture tests).
2026-05-01 08:12:34 -04:00
Kit Langton
ecd73f26fc refactor(llm): simplify adapter shared logic 2026-05-01 08:12:34 -04:00
Kit Langton
1a839c6233 refactor(opencode): tighten native LLM bridge boundaries 2026-05-01 08:12:34 -04:00
Kit Langton
c69f2bb15e refactor(llm): centralize InvalidRequestError, validate, and JSON POST
Phase A continuation of the ProviderShared dedupe pass. Three more
patterns lifted into ProviderShared so they're written once:

ProviderShared.invalidRequest(message) — replaces six identical
`const invalid = (message) => new InvalidRequestError({ message })`
one-liners across openai-chat, openai-responses, anthropic-messages,
gemini, openai-compatible-chat, and bedrock-converse. Each adapter
keeps a short `const invalid = ProviderShared.invalidRequest` alias
so the 27 callsite `yield* invalid("...")` patterns are unchanged.
Bedrock's SigV4 catch path and the openai-compatible-chat baseURL
guard both go through the helper now too.

ProviderShared.validateWith(decode) — replaces the identical
`(draft) => decode(draft).pipe(Effect.mapError((e) =>
invalid(e.message)))` lambda body in five adapters. Same line count
but shorter, names the pattern, and keeps the `decode → mapError →
InvalidRequestError` translation in one canonical spot.

ProviderShared.jsonPost({ url, body, headers }) — replaces the
five-adapter pattern of `HttpClientRequest.post(url).pipe(setHeaders,
bodyText)` for JSON-body POSTs. Sets `content-type: application/json`
last so caller headers can override everything except the
content-type. Bedrock uses it for both the bearer-auth and SigV4-
signed paths; SigV4 still signs against `baseHeaders` (which already
contained content-type) so the signature matches what the helper
ultimately sends.

Net change: -73 / +86 (+13 in shared.ts mostly JSDoc; -86 across the
six adapters). The `HttpClientRequest` and `InvalidRequestError`
imports are dropped from the five SSE adapters and from Bedrock since
they're no longer referenced directly.

Verified: `bun typecheck` clean, 106 pass / 0 fail / 0 skip
(unchanged).
2026-05-01 08:12:34 -04:00
Kit Langton
339db0e885 docs(llm): document ProviderShared helpers and framing seam
Update the adapter authoring guide to reflect the dedupe pass:

- Generalize the `parse` bullet from `ProviderShared.sse` to
  `ProviderShared.framed` and call out the two framing dialects
  in use today (SSE for OpenAI/Anthropic/Gemini/compat, AWS event
  stream for Bedrock).
- Spell out that `framed`'s `framing` parameter is the seam for
  new wire formats; the rest of the pipeline is shared.
- New 'Shared adapter helpers' subsection enumerating the
  `ProviderShared` exports a new adapter author should reach for
  before hand-rolling: `framed`, `sse`, `sseFraming`, `joinText`,
  `parseToolInput`, `parseJson`, `chunkError`.
- Closing nudge: lift 3-5 line repeats into ProviderShared rather
  than copy them between adapters.

Doc-only — no code or test changes.
2026-05-01 08:12:34 -04:00
Kit Langton
fa2a5d1fdb feat(opencode): convert native LLM message history 2026-05-01 08:12:34 -04:00
Kit Langton
3a94622e76 refactor(llm): dedupe adapter scaffolding into ProviderShared
Promote three repeated patterns out of individual adapters into
ProviderShared so a fifth or sixth adapter doesn't write the same
glue code over again.

ProviderShared.joinText(parts) — replaces the per-adapter `text()`
helper that joined an array of parts with newlines. Used by OpenAI
Chat (system content, user text, assistant text), OpenAI Responses
(system content), and Gemini (systemInstruction). The dead copies in
Anthropic Messages and Bedrock are gone.

ProviderShared.parseToolInput(adapter, name, raw) — replaces the
identical `parseJson(adapter, raw || "{}", \`Invalid JSON input
for <adapter> tool call <name>\`)` invocation in finishToolCall
across Anthropic, OpenAI Chat, OpenAI Responses, and Bedrock. Uniform
error message and the empty-string-to-"{}" fallback handled in one
place.

ProviderShared.framed(...) — generalizes the existing `sse()` helper
so the protocol-specific framing layer is pluggable. The shared
shape is bytes → frames → chunk → (state, events) with mapError /
mapEffect / mapAccumEffect / catchCause as the spine; framing is
the only varying step.

ProviderShared.sseFraming — the SSE-specific framing implementation
(decodeText + Sse.decode + filter [DONE]). The existing `sse()`
helper now delegates to `framed` with this framing, keeping the
adapter API surface identical.

Bedrock's parseStream — collapses to a single `ProviderShared.framed`
call with its own `eventStreamFraming` step. The cursor-based byte
buffer + AWS event-stream codec live as inputs to framed; everything
else is shared with the SSE adapters. Bedrock now has the same
`catchCause → streamError` terminal-error normalization that SSE
adapters have (it was missing before this refactor).

Net effect across the llm package: -66 lines / +114 lines but the
+114 is mostly JSDoc on the new helpers; adapter implementations
shrink. A future protocol (Bedrock InvokeModel, Vertex Gemini binary
streaming, etc.) plugs in by supplying its `framing` step.

Verified: `bun typecheck` clean, 106 pass / 0 fail / 0 skip
(unchanged from before the refactor).
2026-05-01 08:12:34 -04:00
Kit Langton
778b1762b0 feat(opencode): convert native LLM tool definitions 2026-05-01 08:12:34 -04:00
Kit Langton
bab2fbc7f6 refactor(llm): simplify Bedrock Converse adapter after review
Cleanup of the Bedrock adapter (ba1705d) following parallel review
passes for code reuse, code quality, and efficiency.

- Drop dead `text` join helper and unused `TextPart` import.
- Schema-validate `model.native.aws_credentials` instead of seven
  manual `typeof` guards in `credentialsFromInput`. Removes the
  unsafe `as Record<string, unknown>` cast and fixes the dead
  `native?.region` fallback (the `model()` constructor only writes
  `aws_region`).
- Skip the JSON.parse → JSON.stringify → Schema.fromJsonString triple
  round-trip in the frame consumer. The eventstream codec already
  hands us a UTF-8 payload; parse once and feed the wrapped object
  directly to `Schema.decodeUnknownSync(BedrockChunk)`.
- Replace O(n²) buffer concat in `consumeFrames` with a cursor-based
  state `{ buffer, offset }`. Compaction happens once per network
  chunk via `appendChunk` instead of per frame; frame slicing is
  zero-copy via `subarray`. Bounded buffer growth regardless of
  stream length.
- Rename `ParserState.finishReason` → `pendingStopReason` (raw
  string) and defer the `mapFinishReason` call to the single emit
  site, plus the `onHalt` fallback. Tightens the helper's signature
  to `(reason: string)` so the chunk-typed `messageStop.stopReason`
  flows through without the optional widening.
- Restructure `signRequest` to take an object parameter (was four
  positional args), and replace the manual `forEach`-into-record with
  `Object.fromEntries(signed.headers.entries())`.
- Inline single-use `status` and `useTools` variables.
- Widen `fixedResponse` to accept `ConstructorParameters<Response>[0]`
  so binary fixtures (`Uint8Array`, streams) flow without casts. The
  Bedrock test's `fixedBytes` helper now wraps it cleanly.
- Tidy `captureResponseBody` into a ternary returning the union shape
  directly so the call site spreads the captured object without
  reaching for `bodyEncoding` explicitly.

Verified: `bun typecheck` clean, 106 pass / 0 fail / 0 skip
(unchanged from before the refactor).
2026-05-01 08:12:34 -04:00
Kit Langton
0da7d8a2a1 feat(opencode): add native LLM request builder 2026-05-01 08:12:33 -04:00
Kit Langton
769d6123d5 feat(llm): add Bedrock Converse adapter
Implements the AWS Bedrock Converse streaming protocol as the 5th
first-class adapter in @opencode-ai/llm. Single `bedrock-converse`
adapter covers all underlying models (Anthropic, Llama, Mistral,
Cohere, Nova, Titan) since Converse is uniform.

Wire format: messages with text / reasoning / toolUse / toolResult
content blocks, system blocks, inferenceConfig, toolConfig with
toolSpec + toolChoice. Image / document / cache-point content types
are still TODO.

Streaming: AWS event stream binary framing via @smithy/eventstream-codec.
Each frame is decoded then dispatched by `:event-type` header into
the chunk schema. Bedrock splits the finish across `messageStop`
(reason) and `metadata` (usage) — the parser stashes the reason and
emits a single consolidated `request-finish` event when metadata
arrives, with an `onHalt` fallback for truncated streams.

Auth: two paths. Bearer API key (newer) when the consumer sets
`model.headers.authorization = 'Bearer <key>'`. SigV4 signing via
aws4fetch otherwise — credentials live on `model.native.aws_credentials`
and are signed at `toHttp` time so STS-vended tokens are picked up
when the consumer rebuilds the model. The adapter rejects requests
with neither auth path with a clear InvalidRequestError.

Routing: `@ai-sdk/amazon-bedrock` lowers to `bedrock-converse` via
the new `AmazonBedrock` provider routing module; the OpenCode
`llm-bridge.ts` registers it.

Cassette format: response bodies under
`application/vnd.amazon.eventstream` and `application/octet-stream`
content types are now stored as base64 with `bodyEncoding: 'base64'`
on the response snapshot — text round-tripping mangled the CRC32
fields in event-stream frames. Existing cassettes (SSE/JSON) omit
the field and decode as text unchanged.

Tests: 11 deterministic fixtures (prepare / lower messages / lower
tool config / decode text+usage / decode tool calls / decode
reasoning / decode throttling exception / auth path validation /
SigV4 plumbing) + 2 recorded cassettes against live Bedrock
(`us.amazon.nova-micro-v1:0` in us-east-1) for streaming text and
streaming tool calls.

AGENTS.md: documents the Bedrock auth model, binary cassette format,
and updates the protocol coverage / cassette backlog.

Deps: @smithy/eventstream-codec, @smithy/util-utf8, aws4fetch (~40KB
combined; matches AI SDK's approach).
2026-05-01 08:12:33 -04:00
Kit Langton
6c887b0faa refactor(llm): brand provider and model identifiers 2026-05-01 08:12:33 -04:00
Kit Langton
4e3f678b24 feat(llm): add provider-routed adapter composition 2026-05-01 08:12:33 -04:00
Kit Langton
e1c6bf92fb feat(llm): provider-executed tool pass-through
Add a `providerExecuted: boolean` flag to `tool-call` and `tool-result`
events plus the persisted `ToolResultPart`. When set, the tool runtime
skips client dispatch (the provider already executed the tool) and folds
both events into the assistant message so the next round's history
carries the call + result for context.

Anthropic: decode `server_tool_use` blocks and the three server tool
result block types (`web_search_tool_result`, `code_execution_tool_result`,
`web_fetch_tool_result`) into `tool-call` / `tool-result` events with
`providerExecuted: true`. Round-trip the same parts back into the
provider when the assistant message is replayed in subsequent requests.
Result block error payloads (`*_tool_result_error`) surface as
`result.type === "error"`.

OpenAI Responses: decode hosted tool items emitted via
`response.output_item.done` (`web_search_call`, `file_search_call`,
`code_interpreter_call`, `computer_use_call`, `image_generation_call`,
`mcp_call`, `local_shell_call`) as `tool-call` + `tool-result` pairs
with `providerExecuted: true`. Each tool's input fields are pulled out
explicitly; the full item is passed through as the result payload so
consumers can read outputs / sources / status without re-decoding.

Tool runtime: extend the dispatch decision so provider-executed
tool-calls bypass the handler lookup, and tool-result events with
`providerExecuted: true` are appended to the assistant content for
round-trip rather than being treated as a separate tool message.

Tests: 7 new deterministic fixtures cover Anthropic decode (success +
error result + round-trip + unknown server tool name), OpenAI Responses
decode (web_search_call, code_interpreter_call), and tool-runtime
skip-dispatch.

AGENTS.md updates the runtime section to describe pass-through behavior
and notes the transport-agnostic design that keeps a future WebSocket
adapter (e.g. OpenAI Codex backend) as a sibling rather than a core
rewrite.
2026-05-01 08:11:29 -04:00
Kit Langton
b5ca62d1ea test(llm): record OpenAI Chat tool-loop cassette
Captures both model rounds of the typed ToolRuntime tool loop into a
single multi-interaction cassette: round 1 carries the user prompt and
returns a get_weather tool call; round 2 carries the assistant tool call
plus tool result and returns a final answer.

Verifies the multi-interaction cassette infrastructure end-to-end against
a real provider.
2026-05-01 08:11:29 -04:00
Kit Langton
ca8d700a14 feat(llm): support multi-interaction cassettes with sequential matcher
The cassette layer already stored interactions in an array, but replay
always used find-first structural matching and cassettes were written
as one minified JSON line. That makes tool-loop and retry recordings
unworkable: identical requests collapse to one response, and large
recordings are unreadable on review.

- Add `sequentialMatcher` for position-based dispatch so identical
  retries map to recorded responses in order via an internal cursor.
- Pretty-print cassette JSON on write and reformat existing fixtures so
  multi-interaction diffs stay reviewable.
- Add deterministic `record-replay.test.ts` covering default vs
  sequential dispatch and cursor exhaustion.
- Add an OpenAI Chat tool-loop recorded test scaffold gated behind
  `OPENAI_API_KEY` so a single `RECORD=true` run captures every
  model round of the loop into one cassette file.
- Update AGENTS.md to document multi-interaction cassettes and the
  matcher options, and mark the cassette ergonomics TODO complete.
2026-05-01 08:11:29 -04:00
Kit Langton
ca198f739e refactor(llm): cache tool codecs and tighten ToolRuntime types
Simplify pass after the typed ToolRuntime initial drop. Findings from a
parallel review (code reuse + quality + perf):

src/tool.ts
- Tool now carries memoized decode/encode codecs and a precomputed
  ToolDefinition, derived once at tool() construction time. The runtime no
  longer rebuilds Schema closures or JSON Schema docs per call/per run.
- Constrains parameters/success to Schema.Codec<T, any, never, never> so
  the codecs have no service requirements. Drops the 'as unknown as' casts
  the runtime needed previously.
- Fixes a latent bug: schemas with $ref now correctly emit $defs on
  ToolDefinition.inputSchema (toJsonSchemaDocument's definitions were
  silently dropped before).

src/tool-runtime.ts
- Uses LLMRequest constructor instead of 'as LLMRequest' casts.
- Default tool dispatch concurrency is 10 (was 'unbounded'); exposed via
  RunOptions.concurrency. Unbounded is still available for handlers that
  do not share a saturable resource.
- Drops dead 'usage' state, the single-use Dispatched interface, and the
  DEFAULT_MAX_STEPS constant per the inline-when-used style rule.
- accumulate() now factors text-delta and reasoning-delta into one helper.

test/lib/openai-chunks.ts (new)
- Shared deltaChunk / usageChunk / toolCallChunk / finishChunk helpers.

test/lib/http.ts
- scriptedResponses moved here from tool-runtime.test.ts so future
  multi-step adapter tests can reuse it. Also picks up parallel work that
  swapped HandlerInput to a 'respond' callback for cleaner Response
  construction.

test/tool-runtime.test.ts
- Uses LLMEvent.guards for typed event filtering instead of cast-and-check.
- Concurrent test now uses sseEvents + deltaChunk instead of a hand-rolled
  body string.

Includes parallel callsite updates in test/adapter.test.ts and
test/provider/openai-compatible-chat.test.ts that adopt the 'respond' API
in lib/http.ts.
2026-05-01 08:11:29 -04:00
Kit Langton
6a7735e14c test(llm): cover OpenAI-compatible Chat parity 2026-05-01 08:11:29 -04:00
Kit Langton
3a2cb7f8ac feat(llm): add typed ToolRuntime
Schema-first, Effect-first tool loop:

- 'tool({ description, parameters, success, execute })' constructs a fully
  typed Tool. parameters and success are Effect Schemas; execute is typed
  against them and returns Effect<Success, ToolFailure>. Handler dependencies
  are closed over at construction time so the runtime never sees per-tool
  services.
- 'ToolRuntime.run(client, { request, tools, maxSteps?, stopWhen? })' streams
  the model, decodes tool-call inputs against parameters, dispatches to the
  matching handler, encodes results against success, emits tool-result events,
  appends assistant + tool messages, and re-streams. Stops on non-tool-calls
  finish, maxSteps, or stopWhen.
- Three recoverable error paths emit tool-error events so the model can
  self-correct: unknown tool name, input fails parameters Schema, handler
  returns ToolFailure. Defects fail the stream.
- 'ToolFailure' added to the schema and exported as the single forced error
  channel for handlers.
- Tool definitions on the LLMRequest are derived via toJsonSchemaDocument so
  consumers don't write JSON Schema by hand.

8 deterministic fixture tests cover the loop, errors, maxSteps, stopWhen, and
parallel tool calls in one step.
2026-05-01 08:11:29 -04:00
Kit Langton
b4a7cf638f feat(llm): add OpenAI-compatible provider helpers 2026-05-01 08:11:29 -04:00
Kit Langton
0cc992fc7c feat(llm): add OpenAI-compatible Chat adapter 2026-05-01 08:11:29 -04:00
Kit Langton
ca29f8a6ef test(llm): cover provider-error events and HTTP sad paths
Locks down the error contract before OpenCode integration:
- mid-stream provider errors (Anthropic 'event: error', OpenAI Responses
  'type: error') surface as 'provider-error' LLMEvents
- HTTP 4xx responses fail with ProviderRequestError before stream parsing
  begins (the executor contract)

Anthropic already had both. Adds:
- OpenAI Responses: provider-error fixture, code-fallback fixture, HTTP 400
- OpenAI Chat: HTTP 400 sad path
- AGENTS.md TODO refreshed; live recordings of provider errors still pending
2026-05-01 08:11:29 -04:00
Kit Langton
afe3990f27 refactor(llm): convert lowerToolChoice helpers to yieldable form
Per the package style guide, sync if/return functions that need to fail
should yield the error directly via Effect.gen rather than ladder
Effect.fail / Effect.succeed across every branch.

Touches all four adapters' tool-choice lowering. The naming-required
validation now reads as 'guard, then return' rather than embedded in a
chain of monadic returns. Behavior unchanged.
2026-05-01 08:11:29 -04:00
Kit Langton
8a4699e8e7 refactor(llm): drop vestigial Chunk type and raise step
Every adapter's parse already produces LLMEvents (via the process callback in
the shared sse helper), and every raise was Stream.make(event). The Chunk type
parameter, the raise field, the RaiseState interface, and the Stream.flatMap
raise step in client.stream were all pure overhead.

- Adapter contract shrinks from <Draft, Target, Chunk> to <Draft, Target>.
- All four adapters drop their raise: (event) => Stream.make(event) line.
- client.stream skips the no-op flatMap.
- AGENTS.md adapter section reflects the simpler contract.
2026-05-01 08:11:29 -04:00
Kit Langton
74b2e5781c refactor(llm): remove unused SSE invalid chunk option 2026-05-01 08:11:28 -04:00
Kit Langton
6573673875 docs(llm): mark Responses/Anthropic/Gemini done and outline OpenCode integration
Updates the AGENTS.md TODO list:
- mark Responses, Anthropic, and Gemini adapter coverage as done
- mark the Gemini schema sanitizer port as done
- add concrete next-step items for OpenCode integration: ModelRef bridge,
  request bridge, provider-quirk patches, request/stream parity tests, and
  a flagged rollout against existing session/llm.test.ts cases
- add OpenAI-compatible Chat, Bedrock Converse, and Vertex routing as
  outstanding adapter/dispatch decisions
2026-05-01 08:11:28 -04:00
Kit Langton
3561938e41 feat(llm): port Gemini tool-schema sanitizer as a patch
Gemini rejects integer enums, dangling required fields, untyped arrays, and
object keywords on scalar schemas. The sanitizer was previously a divergent
copy in OpenCode; this lands it in the package as a tool-schema patch with
deterministic tests and selects it for Gemini-protocol or Gemini-named models.

Also tightens the Gemini test suite: covers tool-choice none, drops the
tool-input-delta assertion that Gemini does not actually emit, and confirms
total usage stays undefined when only thoughtsTokenCount arrives.
2026-05-01 08:11:28 -04:00
Kit Langton
e476b63a28 refactor(llm): yieldable parser errors and linear runFold
- shared sse helper now expects Effectful decodeChunk and process callbacks,
  so adapter parsers can be Effect.gen and yield typed ProviderChunkError
  instead of throwing across the sync mapAccum boundary.
- parseJson returns Effect<unknown, ProviderChunkError> via Effect.try,
  matching the package style guide on yieldable errors.
- OpenAI Chat finalizes accumulated tool inputs eagerly when finish_reason
  arrives, surfacing JSON parse failures at the boundary instead of at halt.
  onHalt stays sync and just emits from state.
- generate's runFold reducer now mutates the accumulator instead of
  reallocating the events array on every chunk, dropping O(n^2) growth on
  long streams.
2026-05-01 08:11:28 -04:00
Kit Langton
850eeae24c test(llm): cover Gemini stream edge cases 2026-05-01 08:11:28 -04:00
Kit Langton
8d97b38983 feat(llm): add Gemini adapter 2026-05-01 08:11:28 -04:00
Kit Langton
9a05675200 refactor(llm): share provider stream parsing 2026-05-01 08:11:28 -04:00
Kit Langton
0f4e54d6e8 feat(llm): add Anthropic Messages adapter 2026-05-01 08:11:28 -04:00
Kit Langton
aec6c5983d feat(llm): add OpenAI Responses adapter 2026-05-01 08:11:28 -04:00
Kit Langton
18d618d051 test(llm): harden cassette matching and add streaming edge-case coverage
- Structurally match recorded requests by canonical JSON so non-deterministic
  field ordering doesn't break replay.
- Pluggable header allow-list and body redaction hook on the record/replay
  layer, so adapters with non-default auth (Anthropic, Bedrock) can plug in
  without touching this file.
- Move the cassette-name dedupe set inside recordedTests() so two describe
  files using different prefixes can run in parallel.
- Replace inline SSE template literals and per-file HTTP layers with shared
  test/lib helpers (sseEvents, fixedResponse, dynamicResponse, truncatedStream).
- Tighten recorded-test assertions to exact text and usage so adapter parser
  regressions surface immediately instead of passing fuzzy length>0 checks.
- Add cancellation and mid-stream transport-error tests for the OpenAI Chat
  adapter.
- Add cross-phase patch tests that verify each phase sees an updated
  PatchContext and that same-order patches sort deterministically by id.
2026-05-01 08:11:28 -04:00
Kit Langton
412a1bec44 test(llm): clean Effect test utilities 2026-05-01 08:11:28 -04:00
Kit Langton
04468304e7 refactor(llm): simplify adapter execution API 2026-05-01 08:11:28 -04:00
Kit Langton
ca9e0cfa3c test(llm): record OpenAI tool result flow 2026-05-01 08:11:28 -04:00
Kit Langton
f02652353e test(llm): add provider patch coverage 2026-05-01 08:11:28 -04:00
Kit Langton
1e0f6ee242 feat(llm): add adapter registry ergonomics 2026-05-01 08:11:28 -04:00
Kit Langton
36ab9fa584 docs(llm): add package todo list 2026-05-01 08:11:27 -04:00
Kit Langton
d96bf0d566 feat(llm): add OpenAI Chat adapter 2026-05-01 08:11:27 -04:00
Kit Langton
79683710c0 feat(llm): move core to package 2026-05-01 08:11:27 -04:00
Kit Langton
edd176c490 feat(llm): add initial patch API 2026-05-01 08:11:27 -04:00
348 changed files with 37928 additions and 13302 deletions

View File

@@ -1,6 +1,5 @@
name: Bug report
description: Report an issue that should be fixed
labels: ["bug"]
body:
- type: textarea
id: description

View File

@@ -1,6 +1,5 @@
name: 🚀 Feature Request
description: Suggest an idea, feature, or enhancement
labels: [discussion]
title: "[FEATURE]:"
body:

View File

@@ -1,6 +1,5 @@
name: Question
description: Ask a question
labels: ["question"]
body:
- type: textarea
id: question

View File

@@ -11,6 +11,5 @@ MrMushrooooom
nexxeln
R44VC0RP
rekram1-node
RhysSullivan
thdxr
simonklee

41
.github/VOUCHED.td vendored
View File

@@ -1,41 +0,0 @@
# Vouched contributors for this project.
#
# See https://github.com/mitchellh/vouch for details.
#
# Syntax:
# - One handle per line (without @), sorted alphabetically.
# - Optional platform prefix: platform:username (e.g., github:user).
# - Denounce with minus prefix: -username or -platform:username.
# - Optional details after a space following the handle.
adamdotdevin
-agusbasari29 AI PR slop
ariane-emory
-atharvau AI review spamming literally every PR
-borealbytes
-carycooper777
-danieljoshuanazareth
-danieljoshuanazareth
-davidbernat looks to be a clawdbot that spams team and sends super weird emails, doesnt appear to be a real person
dmtrkovalenko
edemaine
fahreddinozcan
-florianleibert
fwang
iamdavidhill
jayair
kitlangton
kommander
-opencode2026
-opencodeengineer bot that spams issues
r44vc0rp
rekram1-node
-ricardo-m-l
-robinmordasiewicz
rubdos
-saisharan0103 spamming ai prs
shantur
simonklee
-spider-yamet clawdbot/llm psychosis, spam pinging the team
-terisuke
thdxr
-toastythebot

View File

@@ -1,170 +0,0 @@
name: daily-issues-recap
on:
schedule:
# Run at 6 PM EST (23:00 UTC, or 22:00 UTC during daylight saving)
- cron: "0 23 * * *"
workflow_dispatch: # Allow manual trigger for testing
jobs:
daily-recap:
runs-on: blacksmith-4vcpu-ubuntu-2404
permissions:
contents: read
issues: read
steps:
- name: Checkout repository
uses: actions/checkout@v4
with:
fetch-depth: 1
- uses: ./.github/actions/setup-bun
- name: Install opencode
run: curl -fsSL https://opencode.ai/install | bash
- name: Generate daily issues recap
id: recap
env:
OPENCODE_API_KEY: ${{ secrets.OPENCODE_API_KEY }}
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
OPENCODE_PERMISSION: |
{
"bash": {
"*": "deny",
"gh issue*": "allow",
"gh search*": "allow"
},
"webfetch": "deny",
"edit": "deny",
"write": "deny"
}
run: |
# Get today's date range
TODAY=$(date -u +%Y-%m-%d)
opencode run -m opencode/claude-sonnet-4-5 "Generate a daily issues recap for the OpenCode repository.
TODAY'S DATE: ${TODAY}
STEP 1: Gather today's issues
Search for all OPEN issues created today (${TODAY}) using:
gh issue list --repo ${{ github.repository }} --state open --search \"created:${TODAY}\" --json number,title,body,labels,state,comments,createdAt,author --limit 500
IMPORTANT: EXCLUDE all issues authored by Anomaly team members. Filter out issues where the author login matches ANY of these:
adamdotdevin, Brendonovich, fwang, Hona, iamdavidhill, jayair, kitlangton, kommander, MrMushrooooom, R44VC0RP, rekram1-node, thdxr
This recap is specifically for COMMUNITY (external) issues only.
STEP 2: Analyze and categorize
For each issue created today, categorize it:
**Severity Assessment:**
- CRITICAL: Crashes, data loss, security issues, blocks major functionality
- HIGH: Significant bugs affecting many users, important features broken
- MEDIUM: Bugs with workarounds, minor features broken
- LOW: Minor issues, cosmetic, nice-to-haves
**Activity Assessment:**
- Note issues with high comment counts or engagement
- Note issues from repeat reporters (check if author has filed before)
STEP 3: Cross-reference with existing issues
For issues that seem like feature requests or recurring bugs:
- Search for similar older issues to identify patterns
- Note if this is a frequently requested feature
- Identify any issues that are duplicates of long-standing requests
STEP 4: Generate the recap
Create a structured recap with these sections:
===DISCORD_START===
**Daily Issues Recap - ${TODAY}**
**Summary Stats**
- Total issues opened today: [count]
- By category: [bugs/features/questions]
**Critical/High Priority Issues**
[List any CRITICAL or HIGH severity issues with brief descriptions and issue numbers]
**Most Active/Discussed**
[Issues with significant engagement or from active community members]
**Trending Topics**
[Patterns noticed - e.g., 'Multiple reports about X', 'Continued interest in Y feature']
**Duplicates & Related**
[Issues that relate to existing open issues]
===DISCORD_END===
STEP 5: Format for Discord
Format the recap as a Discord-compatible message:
- Use Discord markdown (**, __, etc.)
- BE EXTREMELY CONCISE - this is an EOD summary, not a detailed report
- Use hyperlinked issue numbers with suppressed embeds: [#1234](<https://github.com/${{ github.repository }}/issues/1234>)
- Group related issues on single lines where possible
- Add emoji sparingly for critical items only
- HARD LIMIT: Keep under 1800 characters total
- Skip sections that have nothing notable (e.g., if no critical issues, omit that section)
- Prioritize signal over completeness - only surface what matters
OUTPUT: Output ONLY the content between ===DISCORD_START=== and ===DISCORD_END=== markers. Include the markers so I can extract it." > /tmp/recap_raw.txt
# Extract only the Discord message between markers
sed -n '/===DISCORD_START===/,/===DISCORD_END===/p' /tmp/recap_raw.txt | grep -v '===DISCORD' > /tmp/recap.txt
echo "recap_file=/tmp/recap.txt" >> $GITHUB_OUTPUT
- name: Post to Discord
env:
DISCORD_WEBHOOK_URL: ${{ secrets.DISCORD_ISSUES_WEBHOOK_URL }}
run: |
if [ -z "$DISCORD_WEBHOOK_URL" ]; then
echo "Warning: DISCORD_ISSUES_WEBHOOK_URL secret not set, skipping Discord post"
cat /tmp/recap.txt
exit 0
fi
# Read the recap
RECAP_RAW=$(cat /tmp/recap.txt)
RECAP_LENGTH=${#RECAP_RAW}
echo "Recap length: ${RECAP_LENGTH} chars"
# Function to post a message to Discord
post_to_discord() {
local msg="$1"
local content=$(echo "$msg" | jq -Rs '.')
curl -s -H "Content-Type: application/json" \
-X POST \
-d "{\"content\": ${content}}" \
"$DISCORD_WEBHOOK_URL"
sleep 1
}
# If under limit, send as single message
if [ "$RECAP_LENGTH" -le 1950 ]; then
post_to_discord "$RECAP_RAW"
else
echo "Splitting into multiple messages..."
remaining="$RECAP_RAW"
while [ ${#remaining} -gt 0 ]; do
if [ ${#remaining} -le 1950 ]; then
post_to_discord "$remaining"
break
else
chunk="${remaining:0:1900}"
last_newline=$(echo "$chunk" | grep -bo $'\n' | tail -1 | cut -d: -f1)
if [ -n "$last_newline" ] && [ "$last_newline" -gt 500 ]; then
chunk="${remaining:0:$last_newline}"
remaining="${remaining:$((last_newline+1))}"
else
chunk="${remaining:0:1900}"
remaining="${remaining:1900}"
fi
post_to_discord "$chunk"
fi
done
fi
echo "Posted daily recap to Discord"

View File

@@ -1,173 +0,0 @@
name: daily-pr-recap
on:
schedule:
# Run at 5pm EST (22:00 UTC, or 21:00 UTC during daylight saving)
- cron: "0 22 * * *"
workflow_dispatch: # Allow manual trigger for testing
jobs:
pr-recap:
runs-on: blacksmith-4vcpu-ubuntu-2404
permissions:
contents: read
pull-requests: read
steps:
- name: Checkout repository
uses: actions/checkout@v4
with:
fetch-depth: 1
- uses: ./.github/actions/setup-bun
- name: Install opencode
run: curl -fsSL https://opencode.ai/install | bash
- name: Generate daily PR recap
id: recap
env:
OPENCODE_API_KEY: ${{ secrets.OPENCODE_API_KEY }}
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
OPENCODE_PERMISSION: |
{
"bash": {
"*": "deny",
"gh pr*": "allow",
"gh search*": "allow"
},
"webfetch": "deny",
"edit": "deny",
"write": "deny"
}
run: |
TODAY=$(date -u +%Y-%m-%d)
opencode run -m opencode/claude-sonnet-4-5 "Generate a daily PR activity recap for the OpenCode repository.
TODAY'S DATE: ${TODAY}
STEP 1: Gather PR data
Run these commands to gather PR information. ONLY include OPEN PRs created or updated TODAY (${TODAY}):
# Open PRs created today
gh pr list --repo ${{ github.repository }} --state open --search \"created:${TODAY}\" --json number,title,author,labels,createdAt,updatedAt,reviewDecision,isDraft,additions,deletions --limit 100
# Open PRs with activity today (updated today)
gh pr list --repo ${{ github.repository }} --state open --search \"updated:${TODAY}\" --json number,title,author,labels,createdAt,updatedAt,reviewDecision,isDraft,additions,deletions --limit 100
IMPORTANT: EXCLUDE all PRs authored by Anomaly team members. Filter out PRs where the author login matches ANY of these:
adamdotdevin, Brendonovich, fwang, Hona, iamdavidhill, jayair, kitlangton, kommander, MrMushrooooom, R44VC0RP, rekram1-node, thdxr
This recap is specifically for COMMUNITY (external) contributions only.
STEP 2: For high-activity PRs, check comment counts
For promising PRs, run:
gh pr view [NUMBER] --repo ${{ github.repository }} --json comments --jq '[.comments[] | select(.author.login != \"copilot-pull-request-reviewer\" and .author.login != \"github-actions\")] | length'
IMPORTANT: When counting comments/activity, EXCLUDE these bot accounts:
- copilot-pull-request-reviewer
- github-actions
STEP 3: Identify what matters (ONLY from today's PRs)
**Bug Fixes From Today:**
- PRs with 'fix' or 'bug' in title created/updated today
- Small bug fixes (< 100 lines changed) that are easy to review
- Bug fixes from community contributors
**High Activity Today:**
- PRs with significant human comments today (excluding bots listed above)
- PRs with back-and-forth discussion today
**Quick Wins:**
- Small PRs (< 50 lines) that are approved or nearly approved
- PRs that just need a final review
STEP 4: Generate the recap
Create a structured recap:
===DISCORD_START===
**Daily PR Recap - ${TODAY}**
**New PRs Today**
[PRs opened today - group by type: bug fixes, features, etc.]
**Active PRs Today**
[PRs with activity/updates today - significant discussion]
**Quick Wins**
[Small PRs ready to merge]
===DISCORD_END===
STEP 5: Format for Discord
- Use Discord markdown (**, __, etc.)
- BE EXTREMELY CONCISE - surface what we might miss
- Use hyperlinked PR numbers with suppressed embeds: [#1234](<https://github.com/${{ github.repository }}/pull/1234>)
- Include PR author: [#1234](<url>) (@author)
- For bug fixes, add brief description of what it fixes
- Show line count for quick wins: \"(+15/-3 lines)\"
- HARD LIMIT: Keep under 1800 characters total
- Skip empty sections
- Focus on PRs that need human eyes
OUTPUT: Output ONLY the content between ===DISCORD_START=== and ===DISCORD_END=== markers. Include the markers so I can extract it." > /tmp/pr_recap_raw.txt
# Extract only the Discord message between markers
sed -n '/===DISCORD_START===/,/===DISCORD_END===/p' /tmp/pr_recap_raw.txt | grep -v '===DISCORD' > /tmp/pr_recap.txt
echo "recap_file=/tmp/pr_recap.txt" >> $GITHUB_OUTPUT
- name: Post to Discord
env:
DISCORD_WEBHOOK_URL: ${{ secrets.DISCORD_ISSUES_WEBHOOK_URL }}
run: |
if [ -z "$DISCORD_WEBHOOK_URL" ]; then
echo "Warning: DISCORD_ISSUES_WEBHOOK_URL secret not set, skipping Discord post"
cat /tmp/pr_recap.txt
exit 0
fi
# Read the recap
RECAP_RAW=$(cat /tmp/pr_recap.txt)
RECAP_LENGTH=${#RECAP_RAW}
echo "Recap length: ${RECAP_LENGTH} chars"
# Function to post a message to Discord
post_to_discord() {
local msg="$1"
local content=$(echo "$msg" | jq -Rs '.')
curl -s -H "Content-Type: application/json" \
-X POST \
-d "{\"content\": ${content}}" \
"$DISCORD_WEBHOOK_URL"
sleep 1
}
# If under limit, send as single message
if [ "$RECAP_LENGTH" -le 1950 ]; then
post_to_discord "$RECAP_RAW"
else
echo "Splitting into multiple messages..."
remaining="$RECAP_RAW"
while [ ${#remaining} -gt 0 ]; do
if [ ${#remaining} -le 1950 ]; then
post_to_discord "$remaining"
break
else
chunk="${remaining:0:1900}"
last_newline=$(echo "$chunk" | grep -bo $'\n' | tail -1 | cut -d: -f1)
if [ -n "$last_newline" ] && [ "$last_newline" -gt 500 ]; then
chunk="${remaining:0:$last_newline}"
remaining="${remaining:$((last_newline+1))}"
else
chunk="${remaining:0:1900}"
remaining="${remaining:1900}"
fi
post_to_discord "$chunk"
fi
done
fi
echo "Posted daily PR recap to Discord"

View File

@@ -1,116 +0,0 @@
name: vouch-check-issue
on:
issues:
types: [opened]
permissions:
contents: read
issues: write
jobs:
check:
runs-on: ubuntu-latest
steps:
- name: Check if issue author is denounced
uses: actions/github-script@v7
with:
script: |
const author = context.payload.issue.user.login;
const issueNumber = context.payload.issue.number;
// Skip bots
if (author.endsWith('[bot]')) {
core.info(`Skipping bot: ${author}`);
return;
}
// Read the VOUCHED.td file via API (no checkout needed)
let content;
try {
const response = await github.rest.repos.getContent({
owner: context.repo.owner,
repo: context.repo.repo,
path: '.github/VOUCHED.td',
});
content = Buffer.from(response.data.content, 'base64').toString('utf-8');
} catch (error) {
if (error.status === 404) {
core.info('No .github/VOUCHED.td file found, skipping check.');
return;
}
throw error;
}
// Parse the .td file for vouched and denounced users
const vouched = new Set();
const denounced = new Map();
for (const line of content.split('\n')) {
const trimmed = line.trim();
if (!trimmed || trimmed.startsWith('#')) continue;
const isDenounced = trimmed.startsWith('-');
const rest = isDenounced ? trimmed.slice(1).trim() : trimmed;
if (!rest) continue;
const spaceIdx = rest.indexOf(' ');
const handle = spaceIdx === -1 ? rest : rest.slice(0, spaceIdx);
const reason = spaceIdx === -1 ? null : rest.slice(spaceIdx + 1).trim();
// Handle platform:username or bare username
// Only match bare usernames or github: prefix (skip other platforms)
const colonIdx = handle.indexOf(':');
if (colonIdx !== -1) {
const platform = handle.slice(0, colonIdx).toLowerCase();
if (platform !== 'github') continue;
}
const username = colonIdx === -1 ? handle : handle.slice(colonIdx + 1);
if (!username) continue;
if (isDenounced) {
denounced.set(username.toLowerCase(), reason);
continue;
}
vouched.add(username.toLowerCase());
}
// Check if the author is denounced
const reason = denounced.get(author.toLowerCase());
if (reason !== undefined) {
// Author is denounced — close the issue
const body = 'This issue has been automatically closed.';
await github.rest.issues.createComment({
owner: context.repo.owner,
repo: context.repo.repo,
issue_number: issueNumber,
body,
});
await github.rest.issues.update({
owner: context.repo.owner,
repo: context.repo.repo,
issue_number: issueNumber,
state: 'closed',
state_reason: 'not_planned',
});
core.info(`Closed issue #${issueNumber} from denounced user ${author}`);
return;
}
// Author is positively vouched — add label
if (!vouched.has(author.toLowerCase())) {
core.info(`User ${author} is not denounced or vouched. Allowing issue.`);
return;
}
await github.rest.issues.addLabels({
owner: context.repo.owner,
repo: context.repo.repo,
issue_number: issueNumber,
labels: ['Vouched'],
});
core.info(`Added vouched label to issue #${issueNumber} from ${author}`);

View File

@@ -1,114 +0,0 @@
name: vouch-check-pr
on:
pull_request_target:
types: [opened]
permissions:
contents: read
issues: write
pull-requests: write
jobs:
check:
runs-on: ubuntu-latest
steps:
- name: Check if PR author is denounced
uses: actions/github-script@v7
with:
script: |
const author = context.payload.pull_request.user.login;
const prNumber = context.payload.pull_request.number;
// Skip bots
if (author.endsWith('[bot]')) {
core.info(`Skipping bot: ${author}`);
return;
}
// Read the VOUCHED.td file via API (no checkout needed)
let content;
try {
const response = await github.rest.repos.getContent({
owner: context.repo.owner,
repo: context.repo.repo,
path: '.github/VOUCHED.td',
});
content = Buffer.from(response.data.content, 'base64').toString('utf-8');
} catch (error) {
if (error.status === 404) {
core.info('No .github/VOUCHED.td file found, skipping check.');
return;
}
throw error;
}
// Parse the .td file for vouched and denounced users
const vouched = new Set();
const denounced = new Map();
for (const line of content.split('\n')) {
const trimmed = line.trim();
if (!trimmed || trimmed.startsWith('#')) continue;
const isDenounced = trimmed.startsWith('-');
const rest = isDenounced ? trimmed.slice(1).trim() : trimmed;
if (!rest) continue;
const spaceIdx = rest.indexOf(' ');
const handle = spaceIdx === -1 ? rest : rest.slice(0, spaceIdx);
const reason = spaceIdx === -1 ? null : rest.slice(spaceIdx + 1).trim();
// Handle platform:username or bare username
// Only match bare usernames or github: prefix (skip other platforms)
const colonIdx = handle.indexOf(':');
if (colonIdx !== -1) {
const platform = handle.slice(0, colonIdx).toLowerCase();
if (platform !== 'github') continue;
}
const username = colonIdx === -1 ? handle : handle.slice(colonIdx + 1);
if (!username) continue;
if (isDenounced) {
denounced.set(username.toLowerCase(), reason);
continue;
}
vouched.add(username.toLowerCase());
}
// Check if the author is denounced
const reason = denounced.get(author.toLowerCase());
if (reason !== undefined) {
// Author is denounced — close the PR
await github.rest.issues.createComment({
owner: context.repo.owner,
repo: context.repo.repo,
issue_number: prNumber,
body: 'This pull request has been automatically closed.',
});
await github.rest.pulls.update({
owner: context.repo.owner,
repo: context.repo.repo,
pull_number: prNumber,
state: 'closed',
});
core.info(`Closed PR #${prNumber} from denounced user ${author}`);
return;
}
// Author is positively vouched — add label
if (!vouched.has(author.toLowerCase())) {
core.info(`User ${author} is not denounced or vouched. Allowing PR.`);
return;
}
await github.rest.issues.addLabels({
owner: context.repo.owner,
repo: context.repo.repo,
issue_number: prNumber,
labels: ['Vouched'],
});
core.info(`Added vouched label to PR #${prNumber} from ${author}`);

View File

@@ -1,38 +0,0 @@
name: vouch-manage-by-issue
on:
issue_comment:
types: [created]
concurrency:
group: vouch-manage
cancel-in-progress: false
permissions:
contents: write
issues: write
pull-requests: read
jobs:
manage:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
with:
persist-credentials: false
fetch-depth: 0
- name: Setup git committer
id: committer
uses: ./.github/actions/setup-git-committer
with:
opencode-app-id: ${{ vars.OPENCODE_APP_ID }}
opencode-app-secret: ${{ secrets.OPENCODE_APP_SECRET }}
- uses: mitchellh/vouch/action/manage-by-issue@main
with:
issue-id: ${{ github.event.issue.number }}
comment-id: ${{ github.event.comment.id }}
roles: admin,maintain,write
env:
GITHUB_TOKEN: ${{ steps.committer.outputs.token }}

View File

@@ -1,7 +1,7 @@
---
mode: primary
hidden: true
model: opencode/minimax-m2.5
model: opencode/gpt-5.4-nano
color: "#44BA81"
tools:
"*": false
@@ -14,127 +14,30 @@ Use your github-triage tool to triage issues.
This file is the source of truth for ownership/routing rules.
## Labels
Assign issues by choosing the team with the strongest overlap. The github-triage tool will assign a random member from that team.
### windows
Do not add labels to issues. Only assign an owner.
Use for any issue that mentions Windows (the OS). Be sure they are saying that they are on Windows.
When calling github-triage, pass one of these team values: tui, desktop_web, core, inference, windows.
- Use if they mention WSL too
## Teams
#### perf
### TUI
Performance-related issues:
Terminal UI issues, including rendering, keybindings, scrolling, terminal compatibility, SSH behavior, crashes in the TUI, and low-level TUI performance.
- Slow performance
- High RAM usage
- High CPU usage
### Desktop / Web
**Only** add if it's likely a RAM or CPU issue. **Do not** add for LLM slowness.
Desktop application and browser-based app issues, including `opencode web`, desktop-specific UI behavior, packaging, and web view problems.
#### desktop
### Core
Desktop app issues:
Core opencode server and harness issues, including sqlite, snapshots, memory, API behavior, agent context construction, tool execution, provider integrations, model behavior, documentation, and larger architectural features.
- `opencode web` command
- The desktop app itself
### Inference
**Only** add if it's specifically about the Desktop application or `opencode web` view. **Do not** add for terminal, TUI, or general opencode issues.
OpenCode Zen, OpenCode Go, and billing issues.
#### nix
### Windows
**Only** add if the issue explicitly mentions nix.
If the issue does not mention nix, do not add nix.
If the issue mentions nix, assign to `rekram1-node`.
#### zen
**Only** add if the issue mentions "zen" or "opencode zen" or "opencode black".
If the issue doesn't have "zen" or "opencode black" in it then don't add zen label
#### core
Use for core server issues in `packages/opencode/`, excluding `packages/opencode/src/cli/cmd/tui/`.
Examples:
- LSP server behavior
- Harness behavior (agent + tools)
- Feature requests for server behavior
- Agent context construction
- API endpoints
- Provider integration issues
- New, broken, or poor-quality models
#### acp
If the issue mentions acp support, assign acp label.
#### docs
Add if the issue requests better documentation or docs updates.
#### opentui
TUI issues potentially caused by our underlying TUI library:
- Keybindings not working
- Scroll speed issues (too fast/slow/laggy)
- Screen flickering
- Crashes with opentui in the log
**Do not** add for general TUI bugs.
When assigning to people here are the following rules:
Desktop / Web:
Use for desktop-labeled issues only.
- adamdotdevin
- iamdavidhill
- Brendonovich
- nexxeln
Zen:
ONLY assign if the issue will have the "zen" label.
- fwang
- MrMushrooooom
TUI (`packages/opencode/src/cli/cmd/tui/...`):
- thdxr for TUI UX/UI product decisions and interaction flow
- kommander for OpenTUI engine issues: rendering artifacts, keybind handling, terminal compatibility, SSH behavior, and low-level perf bottlenecks
- rekram1-node for TUI bugs that are not clearly OpenTUI engine issues
Core (`packages/opencode/...`, excluding TUI subtree):
- thdxr for sqlite/snapshot/memory bugs and larger architectural core features
- jlongster for opencode server + API feature work (tool currently remaps jlongster -> thdxr until assignable)
- rekram1-node for harness issues, provider issues, and other bug-squashing
For core bugs that do not clearly map, either thdxr or rekram1-node is acceptable.
Docs:
- R44VC0RP
Windows:
- Hona (assign any issue that mentions Windows or is likely Windows-specific)
Determinism rules:
- If title + body does not contain "zen", do not add the "zen" label
- If "nix" label is added but title + body does not mention nix/nixos, the tool will drop "nix"
- If title + body mentions nix/nixos, assign to `rekram1-node`
- If "desktop" label is added, the tool will override assignee and randomly pick one Desktop / Web owner
In all other cases, choose the team/section with the most overlap with the issue and assign a member from that team at random.
ACP:
- rekram1-node (assign any acp issues to rekram1-node)
Windows-specific issues, including native Windows behavior, WSL interactions, path handling, shell compatibility, and installation or runtime problems that only happen on Windows.

View File

@@ -1,16 +1,14 @@
/// <reference path="../env.d.ts" />
import { tool } from "@opencode-ai/plugin"
const TEAM = {
desktop: ["adamdotdevin", "iamdavidhill", "Brendonovich", "nexxeln"],
zen: ["fwang", "MrMushrooooom"],
tui: ["kommander", "rekram1-node", "simonklee"],
core: ["kitlangton", "rekram1-node", "jlongster"],
docs: ["R44VC0RP"],
tui: ["kommander", "simonklee"],
desktop_web: ["Hona", "Brendonovich"],
core: ["jlongster", "rekram1-node", "nexxeln", "kitlangton"],
inference: ["fwang", "MrMushrooooom"],
windows: ["Hona"],
} as const
const ASSIGNEES = [...new Set(Object.values(TEAM).flat())]
function pick<T>(items: readonly T[]) {
return items[Math.floor(Math.random() * items.length)]!
}
@@ -38,79 +36,25 @@ async function githubFetch(endpoint: string, options: RequestInit = {}) {
}
export default tool({
description: `Use this tool to assign and/or label a GitHub issue.
description: `Use this tool to assign a GitHub issue.
Choose labels and assignee using the current triage policy and ownership rules.
Pick the most fitting labels for the issue and assign one owner.
If unsure, choose the team/section with the most overlap with the issue and assign a member from that team at random.`,
Provide the team that should own the issue. This tool picks a random assignee from that team and does not apply labels.`,
args: {
assignee: tool.schema
.enum(ASSIGNEES as [string, ...string[]])
.describe("The username of the assignee")
.default("rekram1-node"),
labels: tool.schema
.array(tool.schema.enum(["nix", "opentui", "perf", "web", "desktop", "zen", "docs", "windows", "core"]))
.describe("The labels(s) to add to the issue")
.default([]),
team: tool.schema
.enum(Object.keys(TEAM) as [keyof typeof TEAM, ...(keyof typeof TEAM)[]])
.describe("The owning team"),
},
async execute(args) {
const issue = getIssueNumber()
const owner = "anomalyco"
const repo = "opencode"
const results: string[] = []
let labels = [...new Set(args.labels.map((x) => (x === "desktop" ? "web" : x)))]
const web = labels.includes("web")
const text = `${process.env.ISSUE_TITLE ?? ""}\n${process.env.ISSUE_BODY ?? ""}`.toLowerCase()
const zen = /\bzen\b/.test(text) || text.includes("opencode black")
const nix = /\bnix(os)?\b/.test(text)
if (labels.includes("nix") && !nix) {
labels = labels.filter((x) => x !== "nix")
results.push("Dropped label: nix (issue does not mention nix)")
}
const assignee = nix ? "rekram1-node" : web ? pick(TEAM.desktop) : args.assignee
if (labels.includes("zen") && !zen) {
throw new Error("Only add the zen label when issue title/body contains 'zen'")
}
if (web && !nix && !(TEAM.desktop as readonly string[]).includes(assignee)) {
throw new Error("Web issues must be assigned to adamdotdevin, iamdavidhill, Brendonovich, or nexxeln")
}
if ((TEAM.zen as readonly string[]).includes(assignee) && !labels.includes("zen")) {
throw new Error("Only zen issues should be assigned to fwang or MrMushrooooom")
}
if (assignee === "Hona" && !labels.includes("windows")) {
throw new Error("Only windows issues should be assigned to Hona")
}
if (assignee === "R44VC0RP" && !labels.includes("docs")) {
throw new Error("Only docs issues should be assigned to R44VC0RP")
}
if (assignee === "kommander" && !labels.includes("opentui")) {
throw new Error("Only opentui issues should be assigned to kommander")
}
const assignee = pick(TEAM[args.team])
await githubFetch(`/repos/${owner}/${repo}/issues/${issue}/assignees`, {
method: "POST",
body: JSON.stringify({ assignees: [assignee] }),
})
results.push(`Assigned @${assignee} to issue #${issue}`)
if (labels.length > 0) {
await githubFetch(`/repos/${owner}/${repo}/issues/${issue}/labels`, {
method: "POST",
body: JSON.stringify({ labels }),
})
results.push(`Added labels: ${labels.join(", ")}`)
}
return results.join("\n")
return `Assigned @${assignee} from ${args.team} to issue #${issue}`
},
})

View File

@@ -132,7 +132,7 @@ It's very similar to Claude Code in terms of capability. Here are the key differ
- 100% open source
- Not coupled to any provider. Although we recommend the models we provide through [OpenCode Zen](https://opencode.ai/zen), OpenCode can be used with Claude, OpenAI, Google, or even local models. As models evolve, the gaps between them will close and pricing will drop, so being provider-agnostic is important.
- Out-of-the-box LSP support
- Built-in opt-in LSP support
- A focus on TUI. OpenCode is built by neovim users and the creators of [terminal.shop](https://terminal.shop); we are going to push the limits of what's possible in the terminal.
- A client/server architecture. This, for example, can allow OpenCode to run on your computer while you drive it remotely from a mobile app, meaning that the TUI frontend is just one of the possible clients.

View File

@@ -29,7 +29,7 @@
},
"packages/app": {
"name": "@opencode-ai/app",
"version": "1.14.31",
"version": "1.14.33",
"dependencies": {
"@kobalte/core": "catalog:",
"@opencode-ai/core": "workspace:*",
@@ -85,7 +85,7 @@
},
"packages/console/app": {
"name": "@opencode-ai/console-app",
"version": "1.14.31",
"version": "1.14.33",
"dependencies": {
"@cloudflare/vite-plugin": "1.15.2",
"@ibm/plex": "6.4.1",
@@ -119,7 +119,7 @@
},
"packages/console/core": {
"name": "@opencode-ai/console-core",
"version": "1.14.31",
"version": "1.14.33",
"dependencies": {
"@aws-sdk/client-sts": "3.782.0",
"@jsx-email/render": "1.1.1",
@@ -146,7 +146,7 @@
},
"packages/console/function": {
"name": "@opencode-ai/console-function",
"version": "1.14.31",
"version": "1.14.33",
"dependencies": {
"@ai-sdk/anthropic": "3.0.64",
"@ai-sdk/openai": "3.0.48",
@@ -170,7 +170,7 @@
},
"packages/console/mail": {
"name": "@opencode-ai/console-mail",
"version": "1.14.31",
"version": "1.14.33",
"dependencies": {
"@jsx-email/all": "2.2.3",
"@jsx-email/cli": "1.4.3",
@@ -194,7 +194,7 @@
},
"packages/core": {
"name": "@opencode-ai/core",
"version": "1.14.31",
"version": "1.14.33",
"bin": {
"opencode": "./bin/opencode",
},
@@ -228,7 +228,7 @@
},
"packages/desktop": {
"name": "@opencode-ai/desktop",
"version": "1.14.31",
"version": "1.14.33",
"dependencies": {
"@opencode-ai/app": "workspace:*",
"@opencode-ai/ui": "workspace:*",
@@ -263,7 +263,7 @@
},
"packages/desktop-electron": {
"name": "@opencode-ai/desktop-electron",
"version": "1.14.31",
"version": "1.14.33",
"dependencies": {
"drizzle-orm": "catalog:",
"effect": "catalog:",
@@ -309,7 +309,7 @@
},
"packages/enterprise": {
"name": "@opencode-ai/enterprise",
"version": "1.14.31",
"version": "1.14.33",
"dependencies": {
"@opencode-ai/core": "workspace:*",
"@opencode-ai/ui": "workspace:*",
@@ -338,7 +338,7 @@
},
"packages/function": {
"name": "@opencode-ai/function",
"version": "1.14.31",
"version": "1.14.33",
"dependencies": {
"@octokit/auth-app": "8.0.1",
"@octokit/rest": "catalog:",
@@ -352,9 +352,39 @@
"typescript": "catalog:",
},
},
"packages/http-recorder": {
"name": "@opencode-ai/http-recorder",
"version": "0.0.0",
"dependencies": {
"@effect/platform-node": "catalog:",
"effect": "catalog:",
},
"devDependencies": {
"@tsconfig/bun": "catalog:",
"@types/bun": "catalog:",
"@typescript/native-preview": "catalog:",
},
},
"packages/llm": {
"name": "@opencode-ai/llm",
"version": "1.14.25",
"dependencies": {
"@smithy/eventstream-codec": "4.2.14",
"@smithy/util-utf8": "4.2.2",
"aws4fetch": "1.0.20",
"effect": "catalog:",
},
"devDependencies": {
"@effect/platform-node": "catalog:",
"@opencode-ai/http-recorder": "workspace:*",
"@tsconfig/bun": "catalog:",
"@types/bun": "catalog:",
"@typescript/native-preview": "catalog:",
},
},
"packages/opencode": {
"name": "opencode",
"version": "1.14.31",
"version": "1.14.33",
"bin": {
"opencode": "./bin/opencode",
},
@@ -396,6 +426,7 @@
"@octokit/graphql": "9.0.2",
"@octokit/rest": "catalog:",
"@openauthjs/openauth": "catalog:",
"@opencode-ai/llm": "workspace:*",
"@opencode-ai/plugin": "workspace:*",
"@opencode-ai/script": "workspace:*",
"@opencode-ai/sdk": "workspace:*",
@@ -496,7 +527,7 @@
},
"packages/plugin": {
"name": "@opencode-ai/plugin",
"version": "1.14.31",
"version": "1.14.33",
"dependencies": {
"@opencode-ai/sdk": "workspace:*",
"effect": "catalog:",
@@ -531,7 +562,7 @@
},
"packages/sdk/js": {
"name": "@opencode-ai/sdk",
"version": "1.14.31",
"version": "1.14.33",
"dependencies": {
"cross-spawn": "catalog:",
},
@@ -546,7 +577,7 @@
},
"packages/slack": {
"name": "@opencode-ai/slack",
"version": "1.14.31",
"version": "1.14.33",
"dependencies": {
"@opencode-ai/sdk": "workspace:*",
"@slack/bolt": "^3.17.1",
@@ -581,7 +612,7 @@
},
"packages/ui": {
"name": "@opencode-ai/ui",
"version": "1.14.31",
"version": "1.14.33",
"dependencies": {
"@kobalte/core": "catalog:",
"@opencode-ai/core": "workspace:*",
@@ -630,7 +661,7 @@
},
"packages/web": {
"name": "@opencode-ai/web",
"version": "1.14.31",
"version": "1.14.33",
"dependencies": {
"@astrojs/cloudflare": "12.6.3",
"@astrojs/markdown-remark": "6.3.1",
@@ -715,7 +746,7 @@
"dompurify": "3.3.1",
"drizzle-kit": "1.0.0-beta.19-d95b7a4",
"drizzle-orm": "1.0.0-beta.19-d95b7a4",
"effect": "4.0.0-beta.57",
"effect": "4.0.0-beta.59",
"fuzzysort": "3.1.0",
"hono": "4.10.7",
"hono-openapi": "1.1.2",
@@ -1576,6 +1607,10 @@
"@opencode-ai/function": ["@opencode-ai/function@workspace:packages/function"],
"@opencode-ai/http-recorder": ["@opencode-ai/http-recorder@workspace:packages/http-recorder"],
"@opencode-ai/llm": ["@opencode-ai/llm@workspace:packages/llm"],
"@opencode-ai/plugin": ["@opencode-ai/plugin@workspace:packages/plugin"],
"@opencode-ai/script": ["@opencode-ai/script@workspace:packages/script"],
@@ -3078,7 +3113,7 @@
"ee-first": ["ee-first@1.1.1", "", {}, "sha512-WMwm9LhRUo+WUaRN+vRuETqG89IgZphVSNkdFgeb6sS/E4OrDIN7t48CAewSHXc6C8lefD8KKfr5vY61brQlow=="],
"effect": ["effect@4.0.0-beta.57", "", { "dependencies": { "@standard-schema/spec": "^1.1.0", "fast-check": "^4.6.0", "find-my-way-ts": "^0.1.6", "ini": "^6.0.0", "kubernetes-types": "^1.30.0", "msgpackr": "^1.11.9", "multipasta": "^0.2.7", "toml": "^4.1.1", "uuid": "^13.0.0", "yaml": "^2.8.3" } }, "sha512-rg32VgXnLKaPRs9tbRDaZ5jxmzNY7ojXt85gSHGUTwdlbWH5Ik+OCUY2q14TXliygPGoHwCAvNWS4bQJOqf00g=="],
"effect": ["effect@4.0.0-beta.59", "", { "dependencies": { "@standard-schema/spec": "^1.1.0", "fast-check": "^4.6.0", "find-my-way-ts": "^0.1.6", "ini": "^6.0.0", "kubernetes-types": "^1.30.0", "msgpackr": "^1.11.9", "multipasta": "^0.2.7", "toml": "^4.1.1", "uuid": "^13.0.0", "yaml": "^2.8.3" } }, "sha512-xyUDLeHSe8d6lWGOvR6Fgn2HL6gYeTZ/S4Jzk9uc4ZUxMPPsNZlNXrvk0C7/utQFzeX7uAWcVnG2BjbA0SRoAA=="],
"ejs": ["ejs@3.1.10", "", { "dependencies": { "jake": "^10.8.5" }, "bin": { "ejs": "bin/cli.js" } }, "sha512-UeJmFfOrAQS8OJWPZ4qtgHyWExa088/MtK5UEyoJGFH67cDEXkZSviOiKRCZ4Xij0zxI3JECgYs3oKx+AizQBA=="],
@@ -5636,6 +5671,10 @@
"@opencode-ai/desktop-electron/typescript": ["typescript@5.6.3", "", { "bin": { "tsc": "bin/tsc", "tsserver": "bin/tsserver" } }, "sha512-hjcS1mhfuyi4WW8IWtjP7brDrG2cuDZukyrYrSauoXGNgx0S7zceP07adYkJycEr56BOUTNPzbInooiN3fn1qw=="],
"@opencode-ai/llm/@smithy/eventstream-codec": ["@smithy/eventstream-codec@4.2.14", "", { "dependencies": { "@aws-crypto/crc32": "5.2.0", "@smithy/types": "^4.14.1", "@smithy/util-hex-encoding": "^4.2.2", "tslib": "^2.6.2" } }, "sha512-erZq0nOIpzfeZdCyzZjdJb4nVSKLUmSkaQUVkRGQTXs30gyUGeKnrYEg+Xe1W5gE3aReS7IgsvANwVPxSzY6Pw=="],
"@opencode-ai/llm/@smithy/util-utf8": ["@smithy/util-utf8@4.2.2", "", { "dependencies": { "@smithy/util-buffer-from": "^4.2.2", "tslib": "^2.6.2" } }, "sha512-75MeYpjdWRe8M5E3AW0O4Cx3UadweS+cwdXjwYGBW5h/gxxnbeZ877sLPX/ZJA9GVTlL/qG0dXP29JWFCD1Ayw=="],
"@opencode-ai/ui/@solid-primitives/resize-observer": ["@solid-primitives/resize-observer@2.1.3", "", { "dependencies": { "@solid-primitives/event-listener": "^2.4.3", "@solid-primitives/rootless": "^1.5.2", "@solid-primitives/static-store": "^0.1.2", "@solid-primitives/utils": "^6.3.2" }, "peerDependencies": { "solid-js": "^1.6.12" } }, "sha512-zBLje5E06TgOg93S7rGPldmhDnouNGhvfZVKOp+oG2XU8snA+GoCSSCz1M+jpNAg5Ek2EakU5UVQqL152WmdXQ=="],
"@opencode-ai/web/@shikijs/transformers": ["@shikijs/transformers@3.20.0", "", { "dependencies": { "@shikijs/core": "3.20.0", "@shikijs/types": "3.20.0" } }, "sha512-PrHHMRr3Q5W1qB/42kJW6laqFyWdhrPF2hNR9qjOm1xcSiAO3hAHo7HaVyHE6pMyevmy3i51O8kuGGXC78uK3g=="],
@@ -6622,6 +6661,8 @@
"@opencode-ai/desktop/@actions/artifact/@actions/http-client": ["@actions/http-client@2.2.3", "", { "dependencies": { "tunnel": "^0.0.6", "undici": "^5.25.4" } }, "sha512-mx8hyJi/hjFvbPokCg4uRd4ZX78t+YyRPtnKWwIl+RzNaVuFpQHfmlGVfsKEJN8LwTCvL+DfVgAM04XaHkm6bA=="],
"@opencode-ai/llm/@smithy/eventstream-codec/@smithy/types": ["@smithy/types@4.14.1", "", { "dependencies": { "tslib": "^2.6.2" } }, "sha512-59b5HtSVrVR/eYNei3BUj3DCPKD/G7EtDDe7OEJE7i7FtQFugYo6MxbotS8mVJkLNVf8gYaAlEBwwtJ9HzhWSg=="],
"@opencode-ai/web/@shikijs/transformers/@shikijs/core": ["@shikijs/core@3.20.0", "", { "dependencies": { "@shikijs/types": "3.20.0", "@shikijs/vscode-textmate": "^10.0.2", "@types/hast": "^3.0.4", "hast-util-to-html": "^9.0.5" } }, "sha512-f2ED7HYV4JEk827mtMDwe/yQ25pRiXZmtHjWF8uzZKuKiEsJR7Ce1nuQ+HhV9FzDcbIo4ObBCD9GPTzNuy9S1g=="],
"@opencode-ai/web/@shikijs/transformers/@shikijs/types": ["@shikijs/types@3.20.0", "", { "dependencies": { "@shikijs/vscode-textmate": "^10.0.2", "@types/hast": "^3.0.4" } }, "sha512-lhYAATn10nkZcBQ0BlzSbJA3wcmL5MXUUF8d2Zzon6saZDlToKaiRX60n2+ZaHJCmXEcZRWNzn+k9vplr8Jhsw=="],

View File

@@ -1,8 +1,8 @@
{
"nodeModules": {
"x86_64-linux": "sha256-SLWRe4uPSRWgU+NPa1BywmrUtNVIC0Oy2mjmxclxk+s=",
"aarch64-linux": "sha256-toHEeIqMzrmThoV0B52juGKm4pa/aJN3gBFFtrSZp2Q=",
"aarch64-darwin": "sha256-lYUsUxq5zR2RXjqZTEdjduOncnlwvTlxDJVKWXJuKPY=",
"x86_64-darwin": "sha256-77XmuEYqGwb1mkEHfnghq1VtukFTneohA0FW6WDOk1U="
"x86_64-linux": "sha256-9wTDLZsuGjkWyVOb6AG2VRYPiaSj/lnXwVkSwNeDcns=",
"aarch64-linux": "sha256-gmKlL2fQxY8bo+//8m9e1TNYJK3RXa4i8xsgtd046bc=",
"aarch64-darwin": "sha256-ENSJK+7rZi3m342mjtGg9N0P6zWEypXMpI7QdFMydbc=",
"x86_64-darwin": "sha256-gkxCxGh5dlwj03vZdz20pbiAwFEDpAlu/5iU8cwZOGI="
}
}

View File

@@ -55,7 +55,6 @@ stdenvNoCC.mkDerivation {
--filter './packages/opencode' \
--filter './packages/desktop' \
--filter './packages/app' \
--filter './packages/shared' \
--frozen-lockfile \
--ignore-scripts \
--no-progress

View File

@@ -53,7 +53,7 @@
"dompurify": "3.3.1",
"drizzle-kit": "1.0.0-beta.19-d95b7a4",
"drizzle-orm": "1.0.0-beta.19-d95b7a4",
"effect": "4.0.0-beta.57",
"effect": "4.0.0-beta.59",
"ai": "6.0.168",
"cross-spawn": "7.0.6",
"hono": "4.10.7",

View File

@@ -1,6 +1,6 @@
{
"name": "@opencode-ai/app",
"version": "1.14.31",
"version": "1.14.33",
"description": "",
"type": "module",
"exports": {

View File

@@ -1,6 +1,6 @@
{
"name": "@opencode-ai/console-app",
"version": "1.14.31",
"version": "1.14.33",
"type": "module",
"license": "MIT",
"scripts": {

View File

@@ -1,7 +1,7 @@
{
"$schema": "https://json.schemastore.org/package.json",
"name": "@opencode-ai/console-core",
"version": "1.14.31",
"version": "1.14.33",
"private": true,
"type": "module",
"license": "MIT",

View File

@@ -1,6 +1,6 @@
{
"name": "@opencode-ai/console-function",
"version": "1.14.31",
"version": "1.14.33",
"$schema": "https://json.schemastore.org/package.json",
"private": true,
"type": "module",

View File

@@ -1,6 +1,6 @@
{
"name": "@opencode-ai/console-mail",
"version": "1.14.31",
"version": "1.14.33",
"dependencies": {
"@jsx-email/all": "2.2.3",
"@jsx-email/cli": "1.4.3",

View File

@@ -1,6 +1,6 @@
{
"$schema": "https://json.schemastore.org/package.json",
"version": "1.14.31",
"version": "1.14.33",
"name": "@opencode-ai/core",
"type": "module",
"license": "MIT",

View File

@@ -1,4 +1,5 @@
import { Config } from "effect"
import { InstallationChannel } from "../installation/version"
function truthy(key: string) {
const value = process.env[key]?.toLowerCase()
@@ -10,6 +11,10 @@ function falsy(key: string) {
return value === "false" || value === "0"
}
// Channels that default to the new effect-httpapi server backend. The legacy
// hono backend remains the default for stable (`prod`/`latest`) installs.
const HTTPAPI_DEFAULT_ON_CHANNELS = new Set(["dev", "beta", "local"])
function number(key: string) {
const value = process.env[key]
if (!value) return undefined
@@ -67,6 +72,13 @@ export const Flag = {
OPENCODE_ENABLE_EXA: truthy("OPENCODE_ENABLE_EXA") || OPENCODE_EXPERIMENTAL || truthy("OPENCODE_EXPERIMENTAL_EXA"),
OPENCODE_EXPERIMENTAL_BASH_DEFAULT_TIMEOUT_MS: number("OPENCODE_EXPERIMENTAL_BASH_DEFAULT_TIMEOUT_MS"),
OPENCODE_EXPERIMENTAL_OUTPUT_TOKEN_MAX: number("OPENCODE_EXPERIMENTAL_OUTPUT_TOKEN_MAX"),
// Opt-in to the LLM-native stream path in `session/llm.ts`. Today this
// routes a narrow slice of sessions (text-only, Anthropic, with explicit
// `nativeMessages` populated by the caller) through the
// `@opencode-ai/llm` core stack instead of `streamText` from the AI SDK.
// Everything else falls through to the existing path. The flag will go
// away once parity is proven across all six protocols.
OPENCODE_EXPERIMENTAL_LLM_NATIVE: OPENCODE_EXPERIMENTAL || truthy("OPENCODE_EXPERIMENTAL_LLM_NATIVE"),
OPENCODE_EXPERIMENTAL_OXFMT: OPENCODE_EXPERIMENTAL || truthy("OPENCODE_EXPERIMENTAL_OXFMT"),
OPENCODE_EXPERIMENTAL_LSP_TY: truthy("OPENCODE_EXPERIMENTAL_LSP_TY"),
OPENCODE_EXPERIMENTAL_LSP_TOOL: OPENCODE_EXPERIMENTAL || truthy("OPENCODE_EXPERIMENTAL_LSP_TOOL"),
@@ -81,8 +93,16 @@ export const Flag = {
OPENCODE_STRICT_CONFIG_DEPS: truthy("OPENCODE_STRICT_CONFIG_DEPS"),
OPENCODE_WORKSPACE_ID: process.env["OPENCODE_WORKSPACE_ID"],
OPENCODE_EXPERIMENTAL_HTTPAPI: truthy("OPENCODE_EXPERIMENTAL_HTTPAPI"),
// Defaults to true on dev/beta/local channels so internal users exercise the
// new effect-httpapi server backend. Stable (`prod`/`latest`) installs stay
// on the legacy hono backend until the rollout is complete. An explicit env
// var ("true"/"1" or "false"/"0") always wins, providing an opt-in for
// stable users and an escape hatch for dev/beta users.
OPENCODE_EXPERIMENTAL_HTTPAPI:
truthy("OPENCODE_EXPERIMENTAL_HTTPAPI") ||
(!falsy("OPENCODE_EXPERIMENTAL_HTTPAPI") && HTTPAPI_DEFAULT_ON_CHANNELS.has(InstallationChannel)),
OPENCODE_EXPERIMENTAL_WORKSPACES: OPENCODE_EXPERIMENTAL || truthy("OPENCODE_EXPERIMENTAL_WORKSPACES"),
OPENCODE_EXPERIMENTAL_EVENT_SYSTEM: OPENCODE_EXPERIMENTAL || truthy("OPENCODE_EXPERIMENTAL_EVENT_SYSTEM"),
// Evaluated at access time (not module load) because tests, the CLI, and
// external tooling set these env vars at runtime.

View File

@@ -1,3 +1,5 @@
export * as Log from "./log"
import path from "path"
import fs from "fs/promises"
import { createWriteStream } from "fs"

View File

@@ -1,7 +1,7 @@
{
"name": "@opencode-ai/desktop-electron",
"private": true,
"version": "1.14.31",
"version": "1.14.33",
"type": "module",
"license": "MIT",
"homepage": "https://opencode.ai",

View File

@@ -26,13 +26,20 @@ const applyZoom = (next: number) => {
window.addEventListener("keydown", (event) => {
if (!(OS_NAME === "macos" ? event.metaKey : event.ctrlKey)) return
let newZoom = webviewZoom()
if (event.key === "-") newZoom -= 0.2
if (event.key === "=" || event.key === "+") newZoom += 0.2
if (event.key === "0") newZoom = 1
applyZoom(clamp(newZoom))
if (event.key === "-") {
event.preventDefault()
applyZoom(clamp(webviewZoom() - 0.2))
return
}
if (event.key === "=" || event.key === "+") {
event.preventDefault()
applyZoom(clamp(webviewZoom() + 0.2))
return
}
if (event.key === "0") {
event.preventDefault()
applyZoom(1)
}
})
export { webviewZoom }

View File

@@ -1,7 +1,7 @@
{
"name": "@opencode-ai/desktop",
"private": true,
"version": "1.14.31",
"version": "1.14.33",
"type": "module",
"license": "MIT",
"scripts": {

View File

@@ -1,6 +1,6 @@
{
"name": "@opencode-ai/enterprise",
"version": "1.14.31",
"version": "1.14.33",
"private": true,
"type": "module",
"license": "MIT",

View File

@@ -1,7 +1,7 @@
id = "opencode"
name = "OpenCode"
description = "The open source coding agent."
version = "1.14.31"
version = "1.14.33"
schema_version = 1
authors = ["Anomaly"]
repository = "https://github.com/anomalyco/opencode"
@@ -11,26 +11,26 @@ name = "OpenCode"
icon = "./icons/opencode.svg"
[agent_servers.opencode.targets.darwin-aarch64]
archive = "https://github.com/anomalyco/opencode/releases/download/v1.14.31/opencode-darwin-arm64.zip"
archive = "https://github.com/anomalyco/opencode/releases/download/v1.14.33/opencode-darwin-arm64.zip"
cmd = "./opencode"
args = ["acp"]
[agent_servers.opencode.targets.darwin-x86_64]
archive = "https://github.com/anomalyco/opencode/releases/download/v1.14.31/opencode-darwin-x64.zip"
archive = "https://github.com/anomalyco/opencode/releases/download/v1.14.33/opencode-darwin-x64.zip"
cmd = "./opencode"
args = ["acp"]
[agent_servers.opencode.targets.linux-aarch64]
archive = "https://github.com/anomalyco/opencode/releases/download/v1.14.31/opencode-linux-arm64.tar.gz"
archive = "https://github.com/anomalyco/opencode/releases/download/v1.14.33/opencode-linux-arm64.tar.gz"
cmd = "./opencode"
args = ["acp"]
[agent_servers.opencode.targets.linux-x86_64]
archive = "https://github.com/anomalyco/opencode/releases/download/v1.14.31/opencode-linux-x64.tar.gz"
archive = "https://github.com/anomalyco/opencode/releases/download/v1.14.33/opencode-linux-x64.tar.gz"
cmd = "./opencode"
args = ["acp"]
[agent_servers.opencode.targets.windows-x86_64]
archive = "https://github.com/anomalyco/opencode/releases/download/v1.14.31/opencode-windows-x64.zip"
archive = "https://github.com/anomalyco/opencode/releases/download/v1.14.33/opencode-windows-x64.zip"
cmd = "./opencode.exe"
args = ["acp"]

View File

@@ -1,6 +1,6 @@
{
"name": "@opencode-ai/function",
"version": "1.14.31",
"version": "1.14.33",
"$schema": "https://json.schemastore.org/package.json",
"private": true,
"type": "module",

View File

@@ -0,0 +1,26 @@
{
"$schema": "https://json.schemastore.org/package.json",
"version": "0.0.0",
"name": "@opencode-ai/http-recorder",
"type": "module",
"license": "MIT",
"private": true,
"scripts": {
"test": "bun test --timeout 30000",
"test:ci": "mkdir -p .artifacts/unit && bun test --timeout 30000 --reporter=junit --reporter-outfile=.artifacts/unit/junit.xml",
"typecheck": "tsgo --noEmit"
},
"exports": {
".": "./src/index.ts",
"./*": "./src/*.ts"
},
"devDependencies": {
"@tsconfig/bun": "catalog:",
"@types/bun": "catalog:",
"@typescript/native-preview": "catalog:"
},
"dependencies": {
"@effect/platform-node": "catalog:",
"effect": "catalog:"
}
}

View File

@@ -0,0 +1,90 @@
import { Option } from "effect"
import { Headers, HttpBody, HttpClientRequest, UrlParams } from "effect/unstable/http"
import { decodeJson } from "./matching"
import { REDACTED, redactUrl, secretFindings } from "./redaction"
import type { Cassette, RequestSnapshot } from "./schema"
const safeText = (value: unknown) => {
if (value === undefined) return "undefined"
if (secretFindings(value).length > 0) return JSON.stringify(REDACTED)
const text = typeof value === "string" ? JSON.stringify(value) : JSON.stringify(value)
if (!text) return String(value)
return text.length > 300 ? `${text.slice(0, 300)}...` : text
}
const jsonBody = (body: string) => Option.getOrUndefined(decodeJson(body))
const valueDiffs = (expected: unknown, received: unknown, base = "$", limit = 8): ReadonlyArray<string> => {
if (Object.is(expected, received)) return []
if (
expected &&
received &&
typeof expected === "object" &&
typeof received === "object" &&
!Array.isArray(expected) &&
!Array.isArray(received)
) {
return [...new Set([...Object.keys(expected), ...Object.keys(received)])]
.toSorted()
.flatMap((key) =>
valueDiffs(
(expected as Record<string, unknown>)[key],
(received as Record<string, unknown>)[key],
`${base}.${key}`,
limit,
),
)
.slice(0, limit)
}
if (Array.isArray(expected) && Array.isArray(received)) {
return Array.from({ length: Math.max(expected.length, received.length) }, (_, index) => index)
.flatMap((index) => valueDiffs(expected[index], received[index], `${base}[${index}]`, limit))
.slice(0, limit)
}
return [`${base} expected ${safeText(expected)}, received ${safeText(received)}`]
}
const headerDiffs = (expected: Record<string, string>, received: Record<string, string>) =>
[...new Set([...Object.keys(expected), ...Object.keys(received)])].toSorted().flatMap((key) => {
if (expected[key] === received[key]) return []
if (expected[key] === undefined) return [` ${key} unexpected ${safeText(received[key])}`]
if (received[key] === undefined) return [` ${key} missing expected ${safeText(expected[key])}`]
return [` ${key} expected ${safeText(expected[key])}, received ${safeText(received[key])}`]
})
export const requestDiff = (expected: RequestSnapshot, received: RequestSnapshot) => {
const lines = []
if (expected.method !== received.method) {
lines.push("method:", ` expected ${expected.method}, received ${received.method}`)
}
if (expected.url !== received.url) {
lines.push("url:", ` expected ${expected.url}`, ` received ${received.url}`)
}
const headers = headerDiffs(expected.headers, received.headers)
if (headers.length > 0) lines.push("headers:", ...headers.slice(0, 8))
const expectedBody = jsonBody(expected.body)
const receivedBody = jsonBody(received.body)
const body = expectedBody !== undefined && receivedBody !== undefined
? valueDiffs(expectedBody, receivedBody).map((line) => ` ${line}`)
: expected.body === received.body
? []
: [` expected ${safeText(expected.body)}, received ${safeText(received.body)}`]
if (body.length > 0) lines.push("body:", ...body)
return lines
}
export const mismatchDetail = (cassette: Cassette, incoming: RequestSnapshot) => {
if (cassette.interactions.length === 0) return "cassette has no recorded interactions"
const ranked = cassette.interactions
.map((interaction, index) => ({ index, lines: requestDiff(interaction.request, incoming) }))
.toSorted((a, b) => a.lines.length - b.lines.length || a.index - b.index)
const best = ranked[0]
return [
"no recorded interaction matched",
`closest interaction: #${best.index + 1}`,
...best.lines,
].join("\n")
}
export const redactedErrorRequest = (request: HttpClientRequest.HttpClientRequest) =>
HttpClientRequest.makeWith(request.method, redactUrl(request.url), UrlParams.empty, Option.none(), Headers.empty, HttpBody.empty)

View File

@@ -0,0 +1,192 @@
import { NodeFileSystem } from "@effect/platform-node"
import { Effect, FileSystem, Layer, Option, Ref } from "effect"
import {
FetchHttpClient,
HttpClient,
HttpClientError,
HttpClientRequest,
HttpClientResponse,
} from "effect/unstable/http"
import * as path from "node:path"
import { redactedErrorRequest, mismatchDetail, requestDiff } from "./diff"
import { defaultMatcher, decodeJson, type RequestMatcher } from "./matching"
import { cassetteSecretFindings, redactHeaders, redactUrl, type SecretFinding } from "./redaction"
import type { Cassette, CassetteMetadata, Interaction, ResponseSnapshot } from "./schema"
import { cassetteFor, cassettePath, formatCassette, parseCassette } from "./storage"
const isRecordMode = process.env.RECORD === "true"
export const DEFAULT_REQUEST_HEADERS: ReadonlyArray<string> = ["content-type", "accept", "openai-beta"]
const DEFAULT_RESPONSE_HEADERS: ReadonlyArray<string> = ["content-type"]
export interface RecordReplayOptions {
readonly directory?: string
readonly metadata?: CassetteMetadata
readonly redact?: {
readonly headers?: ReadonlyArray<string>
readonly query?: ReadonlyArray<string>
}
readonly requestHeaders?: ReadonlyArray<string>
readonly responseHeaders?: ReadonlyArray<string>
readonly redactBody?: (body: unknown) => unknown
readonly dispatch?: "match" | "sequential"
readonly match?: RequestMatcher
}
const responseHeaders = (
response: HttpClientResponse.HttpClientResponse,
allow: ReadonlyArray<string>,
redact: ReadonlyArray<string> | undefined,
) => {
const merged = redactHeaders(response.headers as Record<string, string>, allow, redact)
if (!merged["content-type"]) merged["content-type"] = "text/event-stream"
return merged
}
const BINARY_CONTENT_TYPES: ReadonlyArray<string> = ["vnd.amazon.eventstream", "octet-stream"]
const isBinaryContentType = (contentType: string | undefined) => {
if (!contentType) return false
const lower = contentType.toLowerCase()
return BINARY_CONTENT_TYPES.some((token) => lower.includes(token))
}
const captureResponseBody = (
response: HttpClientResponse.HttpClientResponse,
contentType: string | undefined,
) =>
isBinaryContentType(contentType)
? response.arrayBuffer.pipe(
Effect.map((bytes) => ({ body: Buffer.from(bytes).toString("base64"), bodyEncoding: "base64" as const })),
)
: response.text.pipe(Effect.map((body) => ({ body })))
const decodeResponseBody = (snapshot: ResponseSnapshot) =>
snapshot.bodyEncoding === "base64" ? Buffer.from(snapshot.body, "base64") : snapshot.body
const fixtureMissing = (request: HttpClientRequest.HttpClientRequest, name: string) =>
new HttpClientError.HttpClientError({
reason: new HttpClientError.TransportError({
request: redactedErrorRequest(request),
description: `Fixture "${name}" not found. Run with RECORD=true to create it.`,
}),
})
const fixtureMismatch = (request: HttpClientRequest.HttpClientRequest, name: string, detail: string) =>
new HttpClientError.HttpClientError({
reason: new HttpClientError.TransportError({
request: redactedErrorRequest(request),
description: `Fixture "${name}" does not match the current request: ${detail}. Run with RECORD=true to update it.`,
}),
})
const unsafeCassette = (
request: HttpClientRequest.HttpClientRequest,
name: string,
findings: ReadonlyArray<SecretFinding>,
) =>
new HttpClientError.HttpClientError({
reason: new HttpClientError.TransportError({
request: redactedErrorRequest(request),
description: `Refusing to write cassette "${name}" because it contains possible secrets: ${findings
.map((item) => `${item.path} (${item.reason})`)
.join(", ")}`,
}),
})
export const cassetteLayer = (
name: string,
options: RecordReplayOptions = {},
): Layer.Layer<HttpClient.HttpClient> =>
Layer.effect(
HttpClient.HttpClient,
Effect.gen(function* () {
const upstream = yield* HttpClient.HttpClient
const fileSystem = yield* FileSystem.FileSystem
const file = cassettePath(name, options.directory)
const dir = path.dirname(file)
const requestHeadersAllow = options.requestHeaders ?? DEFAULT_REQUEST_HEADERS
const responseHeadersAllow = options.responseHeaders ?? DEFAULT_RESPONSE_HEADERS
const match = options.match ?? defaultMatcher
const sequential = options.dispatch === "sequential"
const recorded = yield* Ref.make<ReadonlyArray<Interaction>>([])
const replay = yield* Ref.make<Cassette | undefined>(undefined)
const cursor = yield* Ref.make(0)
const snapshotRequest = (request: HttpClientRequest.HttpClientRequest) =>
Effect.gen(function* () {
const web = yield* HttpClientRequest.toWeb(request).pipe(Effect.orDie)
const raw = yield* Effect.promise(() => web.text())
const body = options.redactBody
? Option.match(decodeJson(raw), {
onNone: () => raw,
onSome: (parsed) => JSON.stringify(options.redactBody?.(parsed)),
})
: raw
return {
method: web.method,
url: redactUrl(web.url, options.redact?.query),
headers: redactHeaders(Object.fromEntries(web.headers.entries()), requestHeadersAllow, options.redact?.headers),
body,
}
})
const selectInteraction = (cassette: Cassette, incoming: Interaction["request"]) =>
Effect.gen(function* () {
if (sequential) {
const index = yield* Ref.get(cursor)
const interaction = cassette.interactions[index]
if (!interaction) return { interaction, detail: `interaction ${index + 1} of ${cassette.interactions.length} not recorded` }
if (!match(incoming, interaction.request)) {
return { interaction: undefined, detail: requestDiff(interaction.request, incoming).join("\n") }
}
yield* Ref.update(cursor, (n) => n + 1)
return { interaction, detail: "" }
}
const interaction = cassette.interactions.find((candidate) => match(incoming, candidate.request))
return { interaction, detail: interaction ? "" : mismatchDetail(cassette, incoming) }
})
const loadReplay = (request: HttpClientRequest.HttpClientRequest) =>
Effect.gen(function* () {
const cached = yield* Ref.get(replay)
if (cached) return cached
const cassette = parseCassette(
yield* fileSystem.readFileString(file).pipe(Effect.mapError(() => fixtureMissing(request, name))),
)
yield* Ref.set(replay, cassette)
return cassette
})
return HttpClient.make((request) => {
if (isRecordMode) {
return Effect.gen(function* () {
const currentRequest = yield* snapshotRequest(request)
const response = yield* upstream.execute(request)
const headers = responseHeaders(response, responseHeadersAllow, options.redact?.headers)
const captured = yield* captureResponseBody(response, headers["content-type"])
const interaction: Interaction = {
request: currentRequest,
response: { status: response.status, headers, ...captured },
}
const interactions = yield* Ref.updateAndGet(recorded, (prev) => [...prev, interaction])
const cassette = cassetteFor(name, interactions, options.metadata)
const findings = cassetteSecretFindings(cassette)
if (findings.length > 0) return yield* unsafeCassette(request, name, findings)
yield* fileSystem.makeDirectory(dir, { recursive: true }).pipe(Effect.orDie)
yield* fileSystem.writeFileString(file, formatCassette(cassette)).pipe(Effect.orDie)
return HttpClientResponse.fromWeb(request, new Response(decodeResponseBody(interaction.response), interaction.response))
})
}
return Effect.gen(function* () {
const cassette = yield* loadReplay(request)
const incoming = yield* snapshotRequest(request)
const { interaction, detail } = yield* selectInteraction(cassette, incoming)
if (!interaction) return yield* fixtureMismatch(request, name, detail)
return HttpClientResponse.fromWeb(request, new Response(decodeResponseBody(interaction.response), interaction.response))
})
})
}),
).pipe(Layer.provide(FetchHttpClient.layer), Layer.provide(NodeFileSystem.layer))

View File

@@ -0,0 +1,8 @@
export * from "./schema"
export * from "./redaction"
export * from "./matching"
export * from "./diff"
export * from "./storage"
export * from "./effect"
export * as HttpRecorder from "."

View File

@@ -0,0 +1,33 @@
import { Option, Schema } from "effect"
import type { RequestSnapshot } from "./schema"
const JsonValue = Schema.fromJsonString(Schema.Unknown)
export const decodeJson = Schema.decodeUnknownOption(JsonValue)
const canonicalize = (value: unknown): unknown => {
if (Array.isArray(value)) return value.map(canonicalize)
if (value !== null && typeof value === "object") {
return Object.fromEntries(
Object.keys(value as Record<string, unknown>)
.toSorted()
.map((key) => [key, canonicalize((value as Record<string, unknown>)[key])]),
)
}
return value
}
export type RequestMatcher = (incoming: RequestSnapshot, recorded: RequestSnapshot) => boolean
export const canonicalSnapshot = (snapshot: RequestSnapshot): string =>
JSON.stringify({
method: snapshot.method,
url: snapshot.url,
headers: canonicalize(snapshot.headers),
body: Option.match(decodeJson(snapshot.body), {
onNone: () => snapshot.body,
onSome: canonicalize,
}),
})
export const defaultMatcher: RequestMatcher = (incoming, recorded) =>
canonicalSnapshot(incoming) === canonicalSnapshot(recorded)

View File

@@ -0,0 +1,110 @@
import type { Cassette } from "./schema"
export const REDACTED = "[REDACTED]"
const DEFAULT_REDACT_HEADERS = [
"authorization",
"cookie",
"proxy-authorization",
"set-cookie",
"x-api-key",
"x-amz-security-token",
"x-goog-api-key",
]
const DEFAULT_REDACT_QUERY = [
"access_token",
"api-key",
"api_key",
"apikey",
"code",
"key",
"signature",
"sig",
"token",
"x-amz-credential",
"x-amz-security-token",
"x-amz-signature",
]
const SECRET_PATTERNS: ReadonlyArray<{ readonly label: string; readonly pattern: RegExp }> = [
{ label: "bearer token", pattern: /\bBearer\s+[A-Za-z0-9._~+/=-]{16,}\b/i },
{ label: "API key", pattern: /\bsk-[A-Za-z0-9][A-Za-z0-9_-]{20,}\b/ },
{ label: "Anthropic API key", pattern: /\bsk-ant-[A-Za-z0-9_-]{20,}\b/ },
{ label: "Google API key", pattern: /\bAIza[0-9A-Za-z_-]{20,}\b/ },
{ label: "AWS access key", pattern: /\b(?:AKIA|ASIA)[0-9A-Z]{16}\b/ },
{ label: "GitHub token", pattern: /\bgh[pousr]_[A-Za-z0-9_]{20,}\b/ },
{ label: "private key", pattern: /-----BEGIN [A-Z ]*PRIVATE KEY-----/ },
]
const ENV_SECRET_NAMES = /(?:API|AUTH|BEARER|CREDENTIAL|KEY|PASSWORD|SECRET|TOKEN)/i
const SAFE_ENV_VALUES = new Set(["fixture", "test", "test-key"])
const envSecrets = () =>
Object.entries(process.env).flatMap(([name, value]) => {
if (!value) return []
if (!ENV_SECRET_NAMES.test(name)) return []
if (value.length < 12) return []
if (SAFE_ENV_VALUES.has(value.toLowerCase())) return []
return [{ name, value }]
})
const pathFor = (base: string, key: string) => (base ? `${base}.${key}` : key)
const stringEntries = (value: unknown, base = ""): ReadonlyArray<{ readonly path: string; readonly value: string }> => {
if (typeof value === "string") return [{ path: base, value }]
if (Array.isArray(value)) return value.flatMap((item, index) => stringEntries(item, `${base}[${index}]`))
if (value && typeof value === "object") {
return Object.entries(value).flatMap(([key, child]) => stringEntries(child, pathFor(base, key)))
}
return []
}
const redactionSet = (values: ReadonlyArray<string> | undefined, defaults: ReadonlyArray<string>) =>
new Set([...defaults, ...(values ?? [])].map((value) => value.toLowerCase()))
export const redactUrl = (raw: string, query: ReadonlyArray<string> = DEFAULT_REDACT_QUERY) => {
if (!URL.canParse(raw)) return raw
const url = new URL(raw)
if (url.username) url.username = REDACTED
if (url.password) url.password = REDACTED
const redacted = redactionSet(query, DEFAULT_REDACT_QUERY)
for (const key of [...url.searchParams.keys()]) {
if (redacted.has(key.toLowerCase())) url.searchParams.set(key, REDACTED)
}
return url.toString()
}
export const redactHeaders = (
headers: Record<string, string>,
allow: ReadonlyArray<string>,
redact: ReadonlyArray<string> = DEFAULT_REDACT_HEADERS,
) => {
const allowed = new Set(allow.map((name) => name.toLowerCase()))
const redacted = redactionSet(redact, DEFAULT_REDACT_HEADERS)
return Object.fromEntries(
Object.entries(headers)
.map(([name, value]) => [name.toLowerCase(), value] as const)
.filter(([name]) => allowed.has(name))
.map(([name, value]) => [name, redacted.has(name) ? REDACTED : value] as const)
.toSorted(([a], [b]) => a.localeCompare(b)),
)
}
export type SecretFinding = {
readonly path: string
readonly reason: string
}
export const secretFindings = (value: unknown): ReadonlyArray<SecretFinding> =>
stringEntries(value).flatMap((entry) => [
...SECRET_PATTERNS.filter((item) => item.pattern.test(entry.value)).map((item) => ({
path: entry.path,
reason: item.label,
})),
...envSecrets()
.filter((item) => entry.value.includes(item.value))
.map((item) => ({ path: entry.path, reason: `environment secret ${item.name}` })),
])
export const cassetteSecretFindings = (cassette: Cassette) => secretFindings(cassette)

View File

@@ -0,0 +1,36 @@
import { Schema } from "effect"
export const RequestSnapshotSchema = Schema.Struct({
method: Schema.String,
url: Schema.String,
headers: Schema.Record(Schema.String, Schema.String),
body: Schema.String,
})
export type RequestSnapshot = Schema.Schema.Type<typeof RequestSnapshotSchema>
export const ResponseSnapshotSchema = Schema.Struct({
status: Schema.Number,
headers: Schema.Record(Schema.String, Schema.String),
body: Schema.String,
bodyEncoding: Schema.optional(Schema.Literals(["text", "base64"])),
})
export type ResponseSnapshot = Schema.Schema.Type<typeof ResponseSnapshotSchema>
export const InteractionSchema = Schema.Struct({
request: RequestSnapshotSchema,
response: ResponseSnapshotSchema,
})
export type Interaction = Schema.Schema.Type<typeof InteractionSchema>
export const CassetteMetadataSchema = Schema.Record(Schema.String, Schema.Unknown)
export type CassetteMetadata = Schema.Schema.Type<typeof CassetteMetadataSchema>
export const CassetteSchema = Schema.Struct({
version: Schema.Literal(1),
metadata: Schema.optional(CassetteMetadataSchema),
interactions: Schema.Array(InteractionSchema),
})
export type Cassette = Schema.Schema.Type<typeof CassetteSchema>
export const decodeCassette = Schema.decodeUnknownSync(CassetteSchema)
export const encodeCassette = Schema.encodeSync(CassetteSchema)

View File

@@ -0,0 +1,34 @@
import { Option } from "effect"
import * as fs from "node:fs"
import * as path from "node:path"
import { encodeCassette, decodeCassette, type Cassette, type CassetteMetadata, type Interaction } from "./schema"
export const DEFAULT_RECORDINGS_DIR = path.resolve(process.cwd(), "test", "fixtures", "recordings")
export const cassettePath = (name: string, directory = DEFAULT_RECORDINGS_DIR) => path.join(directory, `${name}.json`)
const metadataFor = (name: string, metadata: CassetteMetadata | undefined): CassetteMetadata => ({
name,
recordedAt: new Date().toISOString(),
...(metadata ?? {}),
})
export const cassetteFor = (
name: string,
interactions: ReadonlyArray<Interaction>,
metadata: CassetteMetadata | undefined,
): Cassette => ({
version: 1,
metadata: metadataFor(name, metadata),
interactions,
})
export const formatCassette = (cassette: Cassette) => `${JSON.stringify(encodeCassette(cassette), null, 2)}\n`
export const parseCassette = (raw: string) => decodeCassette(JSON.parse(raw))
export const hasCassetteSync = (name: string, options: { readonly directory?: string } = {}) => {
const file = cassettePath(name, options.directory)
if (!fs.existsSync(file)) return false
return Option.isSome(Option.liftThrowable(parseCassette)(fs.readFileSync(file, "utf8")))
}

View File

@@ -0,0 +1,39 @@
{
"version": 1,
"interactions": [
{
"request": {
"method": "POST",
"url": "https://example.test/echo",
"headers": {
"content-type": "application/json"
},
"body": "{\"step\":1}"
},
"response": {
"status": 200,
"headers": {
"content-type": "application/json"
},
"body": "{\"reply\":\"first\"}"
}
},
{
"request": {
"method": "POST",
"url": "https://example.test/echo",
"headers": {
"content-type": "application/json"
},
"body": "{\"step\":2}"
},
"response": {
"status": 200,
"headers": {
"content-type": "application/json"
},
"body": "{\"reply\":\"second\"}"
}
}
]
}

View File

@@ -0,0 +1,39 @@
{
"version": 1,
"interactions": [
{
"request": {
"method": "POST",
"url": "https://example.test/poll",
"headers": {
"content-type": "application/json"
},
"body": "{\"id\":\"job_1\"}"
},
"response": {
"status": 200,
"headers": {
"content-type": "application/json"
},
"body": "{\"status\":\"pending\"}"
}
},
{
"request": {
"method": "POST",
"url": "https://example.test/poll",
"headers": {
"content-type": "application/json"
},
"body": "{\"id\":\"job_1\"}"
},
"response": {
"status": 200,
"headers": {
"content-type": "application/json"
},
"body": "{\"status\":\"complete\"}"
}
}
]
}

View File

@@ -0,0 +1,194 @@
import { describe, expect, test } from "bun:test"
import { Cause, Effect, Exit } from "effect"
import { HttpBody, HttpClient, HttpClientRequest } from "effect/unstable/http"
import { HttpRecorder } from "../src"
import { redactedErrorRequest } from "../src/diff"
const post = (url: string, body: object) =>
Effect.gen(function* () {
const http = yield* HttpClient.HttpClient
const request = HttpClientRequest.post(url, {
headers: { "content-type": "application/json" },
body: HttpBody.text(JSON.stringify(body), "application/json"),
})
const response = yield* http.execute(request)
return yield* response.text
})
const run = <A, E>(effect: Effect.Effect<A, E, HttpClient.HttpClient>) =>
Effect.runPromise(effect.pipe(Effect.provide(HttpRecorder.cassetteLayer("record-replay/multi-step"))))
const runWith = <A, E>(name: string, options: HttpRecorder.RecordReplayOptions, effect: Effect.Effect<A, E, HttpClient.HttpClient>) =>
Effect.runPromise(effect.pipe(Effect.provide(HttpRecorder.cassetteLayer(name, options))))
const failureText = (exit: Exit.Exit<unknown, unknown>) => {
if (Exit.isSuccess(exit)) return ""
return Cause.prettyErrors(exit.cause).join("\n")
}
describe("http-recorder", () => {
test("redacts sensitive URL query parameters", () => {
expect(
HttpRecorder.redactUrl(
"https://example.test/path?key=secret-google-key&api_key=secret-openai-key&safe=value&X-Amz-Signature=secret-signature",
),
).toBe(
"https://example.test/path?key=%5BREDACTED%5D&api_key=%5BREDACTED%5D&safe=value&X-Amz-Signature=%5BREDACTED%5D",
)
})
test("redacts URL credentials", () => {
expect(HttpRecorder.redactUrl("https://user:password@example.test/path?safe=value")).toBe(
"https://%5BREDACTED%5D:%5BREDACTED%5D@example.test/path?safe=value",
)
})
test("redacts sensitive headers when allow-listed", () => {
expect(
HttpRecorder.redactHeaders(
{
authorization: "Bearer secret-token",
"content-type": "application/json",
"x-custom-token": "custom-secret",
"x-api-key": "secret-key",
"x-goog-api-key": "secret-google-key",
},
["authorization", "content-type", "x-api-key", "x-goog-api-key", "x-custom-token"],
["x-custom-token"],
),
).toEqual({
authorization: "[REDACTED]",
"content-type": "application/json",
"x-api-key": "[REDACTED]",
"x-custom-token": "[REDACTED]",
"x-goog-api-key": "[REDACTED]",
})
})
test("redacts error requests without retaining headers, params, or body", () => {
const request = HttpClientRequest.post("https://example.test/path", {
headers: { authorization: "Bearer super-secret" },
body: HttpBody.text("super-secret-body", "text/plain"),
}).pipe(HttpClientRequest.setUrlParam("api_key", "super-secret-key"))
expect(redactedErrorRequest(request).toJSON()).toMatchObject({
url: "https://example.test/path",
urlParams: { params: [] },
headers: {},
body: { _tag: "Empty" },
})
})
test("detects secret-looking values without returning the secret", () => {
expect(
HttpRecorder.cassetteSecretFindings({
version: 1,
interactions: [
{
request: {
method: "POST",
url: "https://example.test/path?key=sk-123456789012345678901234",
headers: {},
body: JSON.stringify({ nested: "AIzaSyDHibiBRvJZLsFnPYPoiTwxY4ztQ55yqCE" }),
},
response: {
status: 200,
headers: {},
body: "Bearer abcdefghijklmnopqrstuvwxyz",
},
},
],
}),
).toEqual([
{ path: "interactions[0].request.url", reason: "API key" },
{ path: "interactions[0].request.body", reason: "Google API key" },
{ path: "interactions[0].response.body", reason: "bearer token" },
])
})
test("detects secret-looking values inside metadata", () => {
expect(
HttpRecorder.cassetteSecretFindings({
version: 1,
metadata: { token: "sk-123456789012345678901234" },
interactions: [],
}),
).toEqual([{ path: "metadata.token", reason: "API key" }])
})
test("default matcher dispatches multi-interaction cassettes by request shape", async () => {
await run(
Effect.gen(function* () {
expect(yield* post("https://example.test/echo", { step: 2 })).toBe('{"reply":"second"}')
expect(yield* post("https://example.test/echo", { step: 1 })).toBe('{"reply":"first"}')
}),
)
})
test("sequential dispatch returns recorded responses in order for identical requests", async () => {
await runWith(
"record-replay/retry",
{ dispatch: "sequential" },
Effect.gen(function* () {
expect(yield* post("https://example.test/poll", { id: "job_1" })).toBe('{"status":"pending"}')
expect(yield* post("https://example.test/poll", { id: "job_1" })).toBe('{"status":"complete"}')
}),
)
})
test("default matcher returns the first match for identical requests", async () => {
await runWith(
"record-replay/retry",
{},
Effect.gen(function* () {
expect(yield* post("https://example.test/poll", { id: "job_1" })).toBe('{"status":"pending"}')
expect(yield* post("https://example.test/poll", { id: "job_1" })).toBe('{"status":"pending"}')
}),
)
})
test("sequential dispatch reports cursor exhaustion when more requests are made than recorded", async () => {
await runWith(
"record-replay/multi-step",
{ dispatch: "sequential" },
Effect.gen(function* () {
yield* post("https://example.test/echo", { step: 1 })
yield* post("https://example.test/echo", { step: 2 })
const exit = yield* Effect.exit(post("https://example.test/echo", { step: 3 }))
expect(Exit.isFailure(exit)).toBe(true)
}),
)
})
test("sequential dispatch still validates each recorded request", async () => {
await runWith(
"record-replay/multi-step",
{ dispatch: "sequential" },
Effect.gen(function* () {
yield* post("https://example.test/echo", { step: 1 })
const exit = yield* Effect.exit(post("https://example.test/echo", { step: 3 }))
expect(Exit.isFailure(exit)).toBe(true)
expect(failureText(exit)).toContain("$.step expected 2, received 3")
expect(yield* post("https://example.test/echo", { step: 2 })).toBe('{"reply":"second"}')
}),
)
})
test("mismatch diagnostics show closest redacted request differences", async () => {
await run(
Effect.gen(function* () {
const exit = yield* Effect.exit(
post("https://example.test/echo?api_key=secret-value", { step: 3, token: "sk-123456789012345678901234" }),
)
const message = failureText(exit)
expect(message).toContain("closest interaction: #1")
expect(message).toContain("url:")
expect(message).toContain("https://example.test/echo?api_key=%5BREDACTED%5D")
expect(message).toContain("body:")
expect(message).toContain('$.step expected 1, received 3')
expect(message).toContain('$.token expected undefined, received "[REDACTED]"')
expect(message).not.toContain("sk-123456789012345678901234")
}),
)
})
})

View File

@@ -0,0 +1,14 @@
{
"$schema": "https://json.schemastore.org/tsconfig",
"extends": "@tsconfig/bun/tsconfig.json",
"compilerOptions": {
"noUncheckedIndexedAccess": false,
"plugins": [
{
"name": "@effect/language-service",
"transform": "@effect/language-service/transform",
"namespaceImportPackages": ["effect", "@effect/*"]
}
]
}
}

323
packages/llm/AGENTS.md Normal file
View File

@@ -0,0 +1,323 @@
# LLM Package Guide
## Effect
- Prefer `HttpClient.HttpClient` / `HttpClientResponse.HttpClientResponse` over web `fetch` / `Response` at package boundaries.
- Use `Stream.Stream` for streaming transformations. Avoid ad hoc async generators or manual web reader loops unless an Effect `Stream` API cannot model the behavior.
- Use Effect Schema codecs for JSON encode/decode (`Schema.fromJsonString(...)`) instead of direct `JSON.parse` / `JSON.stringify` in implementation code.
- In `Effect.gen`, yield yieldable errors directly (`return yield* new MyError(...)`) instead of `Effect.fail(new MyError(...))`.
- Use `Effect.void` instead of `Effect.succeed(undefined)` when the successful value is intentionally void.
## Tests
- Use `testEffect(...)` from `test/lib/effect.ts` for tests requiring Effect layers.
- Keep provider tests fixture-first. Live provider calls must stay behind `RECORD=true` and required API-key checks.
## Architecture
This package is an Effect Schema-first LLM core. The Schema classes in `src/schema.ts` are the canonical runtime data model. Convenience functions in `src/llm.ts` are thin constructors that return those same Schema class instances; they should improve callsites without creating a second model.
### Request Flow
The intended callsite is:
```ts
const request = LLM.request({
model: OpenAIChat.model({ id: "gpt-4o-mini", apiKey }),
system: "You are concise.",
prompt: "Say hello.",
})
const response = yield* LLMClient.make({ adapters: [OpenAIChat.adapter] }).generate(request)
```
`LLM.request(...)` builds an `LLMRequest`. `LLMClient.make(...)` selects an adapter by `request.model.protocol`, applies patches, prepares a typed provider target, asks the adapter for a real `HttpClientRequest.HttpClientRequest`, sends it through `RequestExecutor.Service`, parses the provider stream into common `LLMEvent`s, and finally returns an `LLMResponse`.
Use `LLMClient.make(...).stream(request)` when callers want incremental `LLMEvent`s. Use `LLMClient.make(...).generate(request)` when callers want those same events collected into an `LLMResponse`. Use `LLMClient.make(...).prepare<Target>(request)` to compile a request through the adapter pipeline without sending it — the optional `Target` type argument narrows `.target` to the adapter's native shape (e.g. `prepare<OpenAIChatTarget>(...)` returns a `PreparedRequestOf<OpenAIChatTarget>`). The runtime payload is identical; the generic is a type-level assertion.
Filter or narrow `LLMEvent` streams with `LLMEvent.is.*` (camelCase guards, e.g. `events.filter(LLMEvent.is.toolCall)`). The kebab-case `LLMEvent.guards["tool-call"]` form also works but prefer `is.*` in new code.
### Adapters
An adapter is the registered, runnable composition of four orthogonal pieces:
- **`Protocol`** (`src/protocol.ts`) — semantic API contract. Owns request lowering, target validation, body encoding, and the streaming chunk-to-event state machine. Examples: `OpenAIChat.protocol`, `OpenAIResponses.protocol`, `AnthropicMessages.protocol`, `Gemini.protocol`, `BedrockConverse.protocol`.
- **`Endpoint`** (`src/endpoint.ts`) — URL construction. Receives the request and the validated target so it can read `model.id`, `model.baseURL`, `model.queryParams`, and any target field that influences the URL (e.g. Bedrock's `modelId` segment). Reach for `Endpoint.baseURL({ default, path })` before hand-rolling a URL.
- **`Auth`** (`src/auth.ts`) — per-request transport authentication. Adapters read `model.apiKey` at request time via `Auth.bearer` (the `Adapter.fromProtocol` default; sets `Authorization: Bearer <apiKey>`) or `Auth.apiKeyHeader(name)` for providers that use a custom header (Anthropic `x-api-key`, Gemini `x-goog-api-key`). Adapters that need per-request signing (Bedrock SigV4, future Vertex IAM, Azure AAD) implement `Auth` as a function that signs the body and merges signed headers into the result.
- **`Framing`** (`src/framing.ts`) — bytes → frames. SSE (`Framing.sse`) is shared; Bedrock keeps its AWS event-stream framing as a typed `Framing<object>` value alongside its protocol.
Compose them via `Adapter.fromProtocol(...)`:
```ts
export const adapter = Adapter.fromProtocol({
id: "openai-chat",
protocol: OpenAIChat.protocol,
endpoint: Endpoint.baseURL({ default: "https://api.openai.com/v1", path: "/chat/completions" }),
framing: Framing.sse,
})
```
The four-axis decomposition is the reason DeepSeek, TogetherAI, Cerebras, Baseten, Fireworks, and DeepInfra all reuse `OpenAIChat.protocol` verbatim — each provider deployment is a 5-15 line `Adapter.fromProtocol(...)` call instead of a 300-400 line adapter clone. Bug fixes in one protocol propagate to every consumer of that protocol in a single commit.
Reach for the lower-level `Adapter.unsafe(...)` only when an adapter genuinely cannot fit the four-axis model. The name signals that you're escaping the safe abstraction; new adapters should always start with `Adapter.fromProtocol(...)` and prove they need otherwise.
When a provider ships a non-HTTP transport (OpenAI's WebSocket-based Codex backend, hypothetical bidirectional streaming APIs), the seam is `Framing` plus a parallel `Endpoint` / `Auth` interpretation — not a fork of the adapter contract.
### Folder layout
```
packages/llm/src/
schema.ts // LLMRequest, LLMEvent, errors — canonical Schema model
llm.ts // request constructors and convenience helpers
adapter.ts // Adapter.fromProtocol + LLMClient.make
executor.ts // RequestExecutor service + transport error mapping
patch.ts // Patch system (request/prompt/tool-schema/target/stream)
protocol.ts // Protocol type + Protocol.define
endpoint.ts // Endpoint type + Endpoint.baseURL
auth.ts // Auth type + Auth.bearer / Auth.apiKeyHeader / Auth.passthrough
framing.ts // Framing type + Framing.sse
provider/
shared.ts // ProviderShared toolkit used inside protocol impls
patch.ts // ProviderPatch helpers (defaults, capability gates)
openai-chat.ts // protocol + adapter (compose OpenAIChat.protocol)
openai-responses.ts
anthropic-messages.ts
gemini.ts
bedrock-converse.ts
openai-compatible-chat.ts // adapter that reuses OpenAIChat.protocol
openai-compatible-family.ts // family lookups (deepseek, togetherai, ...)
azure.ts / amazon-bedrock.ts / google.ts / ... // ProviderResolver entries
provider-resolver.ts // OpenCode-bridge resolver layer
tool.ts // typed tool() helper
tool-runtime.ts // ToolRuntime.run with full tool-loop type safety
```
The dependency arrow points down: `provider/*.ts` files import `protocol`, `endpoint`, `auth`, `framing` and never the other direction. Lower-level modules know nothing about specific providers.
### Shared adapter helpers
`ProviderShared` exports a small toolkit used inside protocol implementations to keep them focused on provider-native shapes:
- `framed({ adapter, response, readError, framing, decodeChunk, initial, process, onHalt? })` — the canonical streaming pipeline used by `Adapter.fromProtocol(...)`. You rarely call this directly anymore.
- `sseFraming` — the SSE-specific framing step. Already wired through `Framing.sse`; reach for it directly only when wrapping or composing.
- `joinText(parts)` — joins an array of `TextPart` (or anything with a `.text`) with newlines. Use this anywhere a protocol flattens text content into a single string for a provider field.
- `parseToolInput(adapter, name, raw)` — Schema-decodes a tool-call argument string with the canonical "Invalid JSON input for `<adapter>` tool call `<name>`" error message. Treats empty input as `{}`. Use this in `finishToolCall` / `finalizeToolCalls`; do not roll a fresh `parseJson` callsite.
- `parseJson(adapter, raw, message)` — generic JSON-via-Schema decode for non-tool payloads.
- `chunkError(adapter, message, ...)` — typed `ProviderChunkError` constructor for stream-time failures.
- `validateWith(decoder)` — lifts a Schema decode effect into the protocol's `validate` shape, mapping parse errors to `InvalidRequestError`.
- `codecs({ adapter, draft, target, chunk, chunkErrorMessage })` — the encode/decode bundle each protocol needs (request body encode, draft → target validate, chunk decode).
If you find yourself copying a 3-to-5-line snippet between two protocols, lift it into `ProviderShared` next to these helpers rather than duplicating.
### Patches
Patches are the forcing function for provider/model quirks. If a behavior is not universal enough for common IR, keep it as a named patch with a trace entry. Good examples:
- OpenAI Chat streaming usage: `target.openai-chat.include-usage` adds `stream_options.include_usage`.
- Anthropic prompt caching: map common cache hints onto selected content/message blocks.
- Mistral/OpenAI-compatible prompt cleanup: normalize empty text content or tool-call IDs only for affected models.
- Reasoning models: map common reasoning intent to provider-specific effort, summary, or encrypted-content fields.
Do not grow common request schemas just to fit one provider. Prefer adapter-local target schemas plus patches selected by provider/model predicates.
### Tools
Tool loops are represented in common messages and events:
```ts
const call = LLM.toolCall({ id: "call_1", name: "lookup", input: { query: "weather" } })
const result = LLM.toolMessage({ id: "call_1", name: "lookup", result: { forecast: "sunny" } })
const followUp = LLM.request({
model,
messages: [LLM.user("Weather?"), LLM.assistant([call]), result],
})
```
Adapters lower this into provider-native assistant tool-call messages and tool-result messages. Streaming providers should emit `tool-input-delta` events while arguments arrive, then a final `tool-call` event with parsed input.
### Tool runtime
`ToolRuntime.run(client, options)` orchestrates the tool loop with full type safety:
```ts
const get_weather = tool({
description: "Get current weather for a city",
parameters: Schema.Struct({ city: Schema.String }),
success: Schema.Struct({ temperature: Schema.Number, condition: Schema.String }),
execute: ({ city }) =>
Effect.gen(function* () {
// city: string — typed from parameters Schema
const data = yield* WeatherApi.fetch(city)
return { temperature: data.temp, condition: data.cond }
// return type checked against success Schema
}),
})
const events = yield* ToolRuntime.run(client, {
request,
tools: { get_weather, get_time, ... },
maxSteps: 10,
stopWhen: (state) => false,
}).pipe(Stream.runCollect)
```
The runtime:
- Adds tool definitions (derived from each tool's `parameters` Schema via `Schema.toJsonSchemaDocument`) onto `request.tools`.
- Streams the model.
- On `tool-call`: looks up the named tool, decodes input against `parameters` Schema, dispatches to the typed `execute`, encodes the result against `success` Schema, emits `tool-result`.
- Loops when the step finishes with `tool-calls`, appending the assistant + tool messages.
- Stops on a non-`tool-calls` finish, when `maxSteps` is reached, or when `stopWhen` returns `true`.
Handler dependencies (services, permissions, plugin hooks, abort handling) are closed over by the consumer at tool-construction time. The runtime's only environment requirement is `RequestExecutor.Service`. Build the tools record inside an `Effect.gen` once and reuse it across many runs:
```ts
const tools = Effect.gen(function* () {
const fs = yield* FileSystem
const permission = yield* Permission
return {
read_file: tool({
...
execute: ({ path }) =>
Effect.gen(function* () {
yield* permission.ask({ tool: "read_file", path })
return { content: yield* fs.readFile(path) }
}),
}),
}
})
```
Errors must be expressed as `ToolFailure`. The runtime catches it and emits a `tool-error` event, then a `tool-result` of `type: "error"`, so the model can self-correct on the next step. Anything that is not a `ToolFailure` is treated as a defect and fails the stream. Three recoverable error paths produce `tool-error` events:
- The model called an unknown tool name.
- Input failed the `parameters` Schema.
- The handler returned a `ToolFailure`.
Provider-defined / hosted tools (e.g. Anthropic `web_search` / `code_execution` / `web_fetch`, OpenAI Responses `web_search_call` / `file_search_call` / `code_interpreter_call` / `mcp_call` / `local_shell_call` / `image_generation_call` / `computer_use_call`) pass through the runtime untouched:
- Adapters surface the model's call as a `tool-call` event with `providerExecuted: true`, and the provider's result as a matching `tool-result` event with `providerExecuted: true`.
- The runtime detects `providerExecuted` on `tool-call` and **skips client dispatch** — no handler is invoked and no `tool-error` is raised for "unknown tool". The provider already executed it.
- Both events are appended to the assistant message in `assistantContent` so the next round's history carries the call + result for context. Anthropic encodes them back as `server_tool_use` + `web_search_tool_result` (or `code_execution_tool_result` / `web_fetch_tool_result`) blocks; OpenAI Responses callers typically use `previous_response_id` instead of resending hosted-tool items.
Add provider-defined tools to `request.tools` (no runtime entry needed). The matching adapter must know how to lower the tool definition into the provider-native shape; right now Anthropic accepts `web_search` / `code_execution` / `web_fetch` and OpenAI Responses accepts the hosted tool names listed above.
### Recording Tests
Recorded tests use one cassette file per scenario. A cassette holds an ordered array of `{ request, response }` interactions, so multi-step flows (tool loops, retries, polling) record into a single file. Use `recordedTests({ prefix, requires })` and let the helper derive cassette names from test names:
```ts
const recorded = recordedTests({ prefix: "openai-chat", requires: ["OPENAI_API_KEY"] })
recorded.effect("streams text", () =>
Effect.gen(function* () {
// test body
}),
)
```
Replay is the default. `RECORD=true` records fresh cassettes and requires the listed env vars. Cassettes are written as pretty-printed JSON so multi-interaction diffs stay reviewable.
Pass `provider`, `protocol`, and optional `tags` to `recordedTests(...)` / `recorded.effect.with(...)` so cassettes carry searchable metadata. Use recorded-test filters to replay or record a narrow subset without rewriting a whole file:
- `RECORDED_PROVIDER=openai` matches tests tagged with `provider:openai`; comma-separated values are allowed.
- `RECORDED_TAGS=tool` requires all listed tags to be present, e.g. `RECORDED_TAGS=provider:togetherai,tool`.
- `RECORDED_TEST="streams text"` matches by test name, kebab-case test id, or cassette path.
Filters apply in replay and record mode. Combine them with `RECORD=true` when refreshing only one provider or scenario.
**Binary response bodies.** Most providers stream text (SSE, JSON). AWS Bedrock streams binary AWS event-stream frames whose CRC32 fields would be mangled by a UTF-8 round-trip — those bodies are stored as base64 with `bodyEncoding: "base64"` on the response snapshot. Detection is by `Content-Type` in `@opencode-ai/http-recorder` (currently `application/vnd.amazon.eventstream` and `application/octet-stream`); cassettes for SSE/JSON adapters omit the field and decode as text.
**Matching strategies.** Replay defaults to structural matching, which finds an interaction by comparing method, URL, allow-listed headers, and the canonical JSON body. This is the right choice for tool loops because each round's request differs (the message history grows). For scenarios where successive requests are byte-identical and expect different responses (retries, polling), pass `dispatch: "sequential"` in `RecordReplayOptions` — replay then walks the cassette in record order via an internal cursor. `scriptedResponses` (in `test/lib/http.ts`) is the deterministic counterpart for tests that don't need a live provider; it scripts response bodies in order without reading from disk.
Do not blanket re-record an entire test file when adding one cassette. `RECORD=true` rewrites every recorded case that runs, and provider streams contain volatile IDs, timestamps, fingerprints, and obfuscation fields. Prefer deleting the one cassette you intend to refresh, or run a focused test pattern that only registers the scenario you want to record. Keep stable existing cassettes unchanged unless their request shape or expected behavior changed.
## TODO
### Completed Foundation
- [x] Add an adapter registry so `LLMClient.make(...)` can choose an adapter by provider/protocol instead of requiring a single adapter.
- [x] Add request/response convenience helpers where callsites still expose schema internals, but keep constructors returning canonical Schema class instances.
- [x] Expand OpenAI Chat support for assistant tool-call messages followed by tool-result messages.
- [x] Add OpenAI Chat recorded tests for tool-result follow-up and usage chunks.
- [x] Add deterministic fixture tests for unsupported content paths, including media in user messages and unsupported assistant content.
- [x] Add provider patch examples from real opencode quirks, starting with prompt normalization and target-level provider options.
- [x] Add an OpenAI Responses adapter once the Chat adapter shape feels stable.
- [x] Add Anthropic Messages adapter coverage after Responses, especially content block mapping, tool use/result mapping, and cache hints.
- [x] Add Gemini adapter coverage for text, media input, tool calls, reasoning deltas, finish reasons, usage, and recorded cassettes.
- [x] Extract or port OpenCode's `ProviderTransform.schema` Gemini sanitizer into a tested `packages/llm` tool-schema patch; do not keep a divergent adapter-local copy long term.
### Provider Coverage
- [x] Add a generic OpenAI-compatible Chat adapter for non-OpenAI providers that expose `/chat/completions`.
- [x] Keep OpenAI Responses as a separate first-class protocol for providers that actually implement `/responses`; do not treat generic OpenAI-compatible providers as Responses-capable by default.
- [x] Cover OpenAI-compatible provider families that can share the generic adapter first: DeepSeek, TogetherAI, Cerebras, Baseten, Fireworks, DeepInfra, and similar providers.
- [ ] Decide which providers need thin dedicated wrappers over OpenAI-compatible Chat because they have custom parsing/options: Mistral, Groq, xAI, Perplexity, and Cohere.
- [x] Add Bedrock Converse support: wire format (messages / system / inferenceConfig / toolConfig), AWS event stream binary framing via `@smithy/eventstream-codec`, SigV4 signing via `aws4fetch` (or Bearer API key path), text/reasoning/tool/usage/finish decoding, cache hints, image/document content, deterministic tests, and recorded basic text/tool cassettes. Additional model-specific fields are still TODO.
- [ ] Decide Vertex shape after Bedrock/OpenAI-compatible are stable: Vertex Gemini as Gemini target/http patch vs adapter, and Vertex Anthropic as Anthropic target/http patch vs adapter.
- [ ] Add Gateway/OpenRouter-style routing support only after the generic OpenAI-compatible adapter and provider option patch model are stable.
### OpenCode Parity Patches
- [ ] Port Anthropic tool-use ordering into a prompt patch.
- [ ] Finish Mistral/OpenAI-compatible cleanup patches, including message sequence repair after tool messages.
- [ ] Port DeepSeek reasoning handling and interleaved reasoning field mapping.
- [ ] Add unsupported attachment fallback patches keyed by model capabilities.
- [ ] Add cache hint patches for Anthropic, OpenRouter, Bedrock, OpenAI-compatible, Copilot, and Alibaba-style providers.
- [ ] Add provider option namespacing patches for Gateway, OpenRouter, Azure, OpenAI-compatible wrappers, and other provider-specific option bags.
- [ ] Add model-specific reasoning option patches for providers that need effort, summary, or native reasoning fields.
- [ ] Add provider-specific metadata extraction patches only where OpenCode needs returned reasoning, citations, usage details, or provider-native fields.
### OpenCode Bridge
- [x] Build a `Provider.Model` -> `LLM.ModelRef` bridge for OpenCode, including protocol selection, base URLs, headers, limits, capabilities, native provider metadata, and OpenAI-compatible provider family detection.
- [x] Build a pure `session.llm` -> `LLM.request(...)` bridge for system prompts, message history, tool definitions, tool choice, generation options, reasoning variants, cache hints, and attachments.
- [x] Add a typed `ToolRuntime` that drives the tool loop with Schema-typed parameters/success per tool, single-`ToolFailure` error channel, and `maxSteps`/`stopWhen` controls.
- [x] Provider-defined tool pass-through: `providerExecuted` flag on `tool-call`/`tool-result` events; Anthropic `server_tool_use` / `web_search_tool_result` / `code_execution_tool_result` / `web_fetch_tool_result` round-trip; OpenAI Responses hosted-tool items decoded as `tool-call` + `tool-result` pairs; runtime skips client dispatch when `providerExecuted: true`.
- [ ] Keep auth and deployment concerns in the OpenCode bridge where possible: Bedrock credentials/region/profile, Vertex project/location/token, Azure deployment/API version, and Gateway/OpenRouter routing headers.
- [ ] Keep initial OpenCode integration behind a local flag/path until request payload parity and stream event parity are proven against the existing `session/llm.test.ts` cases.
### Native OpenCode Rollout
- [x] Add a native event bridge that maps `LLMEvent` streams into the existing `SessionProcessor` event contract without creating a second processor.
- [ ] Extract runtime-neutral OpenCode tool resolution from `SessionPrompt.resolveTools`, then build both existing-stream and native `@opencode-ai/llm` tool adapters from the same resolved shape.
- [ ] Map `Permission.RejectedError`, `Permission.CorrectedError`, validation failures, thrown tool failures, and aborts into model-visible native tool error/results.
- [ ] Wire a native stream producer behind an explicit local flag and provider allowlist; the producer should consume `nativeMessages`, call `LLMNative.request(...)`, stream through `LLMClient.make(...)`, and feed `LLMNativeEvents.mapper()` into `SessionProcessor`.
- [ ] Add end-to-end native stream tests through the actual session loop for text, reasoning, tool-call streaming, tool success, rejected permission, corrected permission, thrown tool error, abort, and provider-executed tool history.
- [ ] Dogfood native streaming with the flag enabled for OpenAI first, then Anthropic, Gemini, OpenAI-compatible providers, Bedrock, and Copilot provider-by-provider.
- [ ] Flip native streaming to default only after request parity, stream parity, tool execution, typecheck, focused provider tests, recorded cassettes, and manual dogfood pass for the enabled provider set.
- [ ] Keep the existing stream path as an opt-out fallback during soak; remove it only after native default has proven stable.
### Test And Recording Gaps
- [x] Harden the generic HTTP recorder before adding more live cassettes: secret scanning before writes, sensitive header/query redaction, response/body secret scanning, and clear failure messages that identify the unsafe field without printing the secret.
- [x] Refactor the recorder toward extractable library boundaries: core HTTP cassette schema/matching/redaction/diffing should stay LLM-agnostic; LLM tests should supply metadata and semantic assertions from a thin wrapper.
- [x] Add cassette metadata support: recorder schema version, recorded timestamp, scenario name, tags, and caller-provided subject metadata such as provider/protocol/model/capabilities without making the core recorder depend on LLM concepts.
- [x] Improve replay mismatch diagnostics: show method/URL/header/body diffs and closest recorded interaction while keeping secrets redacted. Unused-interaction reporting is still TODO if a test needs it.
- [ ] Add semantic replay assertions for LLM cassettes: replay raw HTTP, parse provider streams, and compare normalized `LLMEvent[]` or `LLMResponse` snapshots in addition to request matching.
- [ ] Add stream chunk-boundary fuzzing for text/SSE cassettes so parser tests prove correctness independent of provider chunk boundaries.
- [ ] Keep deterministic coverage for malformed chunks and tool arguments that arrive in the first chunk unless a live provider reliably produces those shapes.
- [x] Cover provider-error and HTTP-status sad paths with deterministic fixtures across adapters (Anthropic mid-stream + 4xx; OpenAI Responses mid-stream + 4xx; OpenAI Chat 4xx). Live recordings of provider errors are still TODO when stable cassettes can be captured.
- [x] Improve cassette ergonomics for multi-interaction flows: pretty-printed JSON for diff-friendly cassettes, explicit sequential dispatch, and a recorded tool-loop scaffold (`openai-chat-tool-loop.recorded.test.ts`).
- [x] Mirror OpenCode request-body parity tests through the new LLM path for OpenAI Responses, Anthropic Messages, Gemini, OpenAI-compatible Chat, and Bedrock once supported.
- [x] Add adapter parity fixtures for generic OpenAI-compatible Chat before adding provider-specific wrappers.
### Recorded Cassette Backlog
- [x] DeepSeek OpenAI-compatible Chat basic streaming text.
- [ ] DeepSeek OpenAI-compatible Chat tool call and tool-result follow-up.
- [ ] DeepSeek reasoning output, including any interleaved reasoning fields the live API emits.
- [x] TogetherAI OpenAI-compatible Chat basic streaming text and tool-call flow.
- [ ] Cerebras OpenAI-compatible Chat basic streaming text and tool-call flow.
- [ ] Baseten OpenAI-compatible Chat basic streaming text and deployed-model request shape.
- [ ] Fireworks OpenAI-compatible Chat basic streaming text and tool-call flow.
- [ ] DeepInfra OpenAI-compatible Chat basic streaming text and tool-call flow.
- [ ] Provider-error cassettes for stable, non-secret error bodies where the provider returns deterministic 4xx/5xx payloads.
- [ ] Mistral, Groq, xAI, Perplexity, and Cohere basic/tool cassettes after deciding whether each stays generic OpenAI-compatible or gets a thin wrapper.
- [x] Bedrock Converse basic text and tool-call cassettes (recorded against `us.amazon.nova-micro-v1:0` in us-east-1). Cache-hint cassettes still TODO.
- [ ] Vertex Gemini and Vertex Anthropic basic/tool cassettes after the Vertex adapter/patch shape is decided.
- [ ] Gateway/OpenRouter routing-header cassettes after routing support lands.

30
packages/llm/package.json Normal file
View File

@@ -0,0 +1,30 @@
{
"$schema": "https://json.schemastore.org/package.json",
"version": "1.14.25",
"name": "@opencode-ai/llm",
"type": "module",
"license": "MIT",
"private": true,
"scripts": {
"test": "bun test --timeout 30000",
"test:ci": "mkdir -p .artifacts/unit && bun test --timeout 30000 --reporter=junit --reporter-outfile=.artifacts/unit/junit.xml",
"typecheck": "tsgo --noEmit"
},
"exports": {
".": "./src/index.ts",
"./*": "./src/*.ts"
},
"devDependencies": {
"@effect/platform-node": "catalog:",
"@opencode-ai/http-recorder": "workspace:*",
"@tsconfig/bun": "catalog:",
"@types/bun": "catalog:",
"@typescript/native-preview": "catalog:"
},
"dependencies": {
"@smithy/eventstream-codec": "4.2.14",
"@smithy/util-utf8": "4.2.2",
"aws4fetch": "1.0.20",
"effect": "catalog:"
}
}

330
packages/llm/src/adapter.ts Normal file
View File

@@ -0,0 +1,330 @@
import { Effect, Stream } from "effect"
import { HttpClientRequest, type HttpClientResponse } from "effect/unstable/http"
import type { Auth } from "./auth"
import { bearer as authBearer } from "./auth"
import type { Endpoint } from "./endpoint"
import * as LLM from "./llm"
import { RequestExecutor } from "./executor"
import type { AnyPatch, Patch, PatchInput, PatchRegistry } from "./patch"
import { context, emptyRegistry, plan, registry as makePatchRegistry, target as targetPatch } from "./patch"
import type { Framing } from "./framing"
import type { Protocol } from "./protocol"
import { ProviderShared } from "./provider/shared"
import type {
LLMError,
LLMEvent,
LLMRequest,
ModelRef,
PatchTrace,
PreparedRequest,
PreparedRequestOf,
ProtocolID,
} from "./schema"
import { LLMResponse, NoAdapterError, PreparedRequest as PreparedRequestSchema } from "./schema"
interface RuntimeAdapter {
readonly id: string
readonly protocol: ProtocolID
readonly patches: ReadonlyArray<Patch<unknown>>
readonly redact: (target: unknown) => unknown
readonly prepare: (request: LLMRequest) => Effect.Effect<unknown, LLMError>
readonly validate: (draft: unknown) => Effect.Effect<unknown, LLMError>
readonly toHttp: (target: unknown, context: HttpContext) => Effect.Effect<HttpClientRequest.HttpClientRequest, LLMError>
readonly parse: (response: HttpClientResponse.HttpClientResponse) => Stream.Stream<LLMEvent, LLMError>
}
interface RuntimeAdapterSource {
readonly runtime: RuntimeAdapter
}
export interface HttpContext {
readonly request: LLMRequest
readonly patchTrace: ReadonlyArray<PatchTrace>
}
export interface Adapter<Draft, Target> {
readonly id: string
readonly protocol: ProtocolID
readonly patches: ReadonlyArray<Patch<Draft>>
readonly redact: (target: Target) => unknown
readonly prepare: (request: LLMRequest) => Effect.Effect<Draft, LLMError>
readonly validate: (draft: Draft) => Effect.Effect<Target, LLMError>
readonly toHttp: (target: Target, context: HttpContext) => Effect.Effect<HttpClientRequest.HttpClientRequest, LLMError>
readonly parse: (response: HttpClientResponse.HttpClientResponse) => Stream.Stream<LLMEvent, LLMError>
}
export interface AdapterInput<Draft, Target> {
readonly id: string
readonly protocol: ProtocolID
readonly patches?: ReadonlyArray<Patch<Draft>>
readonly redact: (target: Target) => unknown
readonly prepare: (request: LLMRequest) => Effect.Effect<Draft, LLMError>
readonly validate: (draft: Draft) => Effect.Effect<Target, LLMError>
readonly toHttp: (target: Target, context: HttpContext) => Effect.Effect<HttpClientRequest.HttpClientRequest, LLMError>
readonly parse: (response: HttpClientResponse.HttpClientResponse) => Stream.Stream<LLMEvent, LLMError>
}
export interface AdapterDefinition<Draft, Target> extends Adapter<Draft, Target> {
readonly runtime: RuntimeAdapter
readonly patch: (id: string, input: PatchInput<Draft>) => Patch<Draft>
readonly withPatches: (patches: ReadonlyArray<Patch<Draft>>) => AdapterDefinition<Draft, Target>
}
export interface LLMClient {
/**
* Compile a request through the adapter pipeline (patches, prepare, validate,
* toHttp) without sending it. Returns the prepared request including the
* provider-native target.
*
* Pass a `Target` type argument to statically expose the adapter's target
* shape (e.g. `prepare<OpenAIChatTarget>(...)`) — the runtime payload is
* identical, so this is a type-level assertion the caller makes about which
* adapter the request will resolve to.
*/
readonly prepare: <Target = unknown>(
request: LLMRequest,
) => Effect.Effect<PreparedRequestOf<Target>, LLMError>
readonly stream: (request: LLMRequest) => Stream.Stream<LLMEvent, LLMError, RequestExecutor.Service>
readonly generate: (request: LLMRequest) => Effect.Effect<LLMResponse, LLMError, RequestExecutor.Service>
}
export interface ClientOptions {
readonly adapters: ReadonlyArray<RuntimeAdapterSource>
readonly patches?: PatchRegistry | ReadonlyArray<AnyPatch>
}
const noAdapter = (model: ModelRef) =>
new NoAdapterError({ protocol: model.protocol, provider: model.provider, model: model.id })
const normalizeRegistry = (patches: PatchRegistry | ReadonlyArray<AnyPatch> | undefined): PatchRegistry => {
if (!patches) return emptyRegistry
if ("request" in patches) return patches
return makePatchRegistry(patches)
}
/**
* Lower-level adapter constructor. Reach for this only when the adapter
* genuinely cannot fit `fromProtocol`'s four-axis model — for example, an
* adapter that needs hand-rolled `toHttp` / `parse` because no `Protocol`,
* `Endpoint`, `Auth`, or `Framing` value cleanly captures its behavior.
*
* Named `unsafe` to signal that you are escaping the safe abstraction; the
* canonical path is `Adapter.fromProtocol(...)`. New adapters should start
* there and prove they need otherwise before reaching for this.
*/
export function unsafe<Draft, Target>(input: AdapterInput<Draft, Target>): AdapterDefinition<Draft, Target> {
const build = (patches: ReadonlyArray<Patch<Draft>>): AdapterDefinition<Draft, Target> => ({
id: input.id,
protocol: input.protocol,
patches,
get runtime() {
// Runtime registry erases adapter draft/target generics after validation.
// oxlint-disable-next-line typescript-eslint/no-unsafe-type-assertion
return this as unknown as RuntimeAdapter
},
redact: input.redact,
prepare: input.prepare,
validate: input.validate,
toHttp: input.toHttp,
parse: input.parse,
patch: (id, patchInput) => targetPatch(`${input.id}.${id}`, patchInput),
withPatches: (next) => build([...patches, ...next]),
})
return build(input.patches ?? [])
}
export interface FromProtocolInput<Draft, Target, Frame, Chunk, State> {
/** Adapter id used in registry lookup, error messages, and patch namespaces. */
readonly id: string
/** Semantic API contract — owns lowering, validation, encoding, and parsing. */
readonly protocol: Protocol<Draft, Target, Frame, Chunk, State>
/** Where the request is sent. */
readonly endpoint: Endpoint<Target>
/**
* Per-request transport authentication. Defaults to `Auth.bearer`, which
* sets `Authorization: Bearer <model.apiKey>` when `model.apiKey` is set
* and is a no-op otherwise. Override with `Auth.apiKeyHeader(name)` for
* providers that use a custom header (Anthropic, Gemini), or supply a
* custom `Auth` for per-request signing (Bedrock SigV4).
*/
readonly auth?: Auth
/** Stream framing — bytes -> frames before `protocol.decode`. */
readonly framing: Framing<Frame>
/** Static / per-request headers added before `auth` runs. */
readonly headers?: (input: { readonly request: LLMRequest }) => Record<string, string>
/** Provider patches that target this adapter (e.g. include-usage). */
readonly patches?: ReadonlyArray<Patch<Draft>>
/**
* Optional override for the adapter's protocol id. Defaults to
* `protocol.id`. Only set when an adapter intentionally registers under a
* different protocol than the wire it speaks (today: OpenAI-compatible Chat
* uses OpenAI Chat protocol but registers under `openai-compatible-chat`).
*/
readonly protocolId?: ProtocolID
}
/**
* Build an `Adapter` by composing the four orthogonal pieces of a deployment:
*
* - `Protocol` — what is the API I'm speaking?
* - `Endpoint` — where do I send the request?
* - `Auth` — how do I authenticate it?
* - `Framing` — how do I cut the response stream into protocol frames?
*
* Plus optional `headers` and `patches` for cross-cutting deployment concerns
* (provider version pins, per-deployment quirks).
*
* This is the canonical adapter constructor. Reach for `unsafe(...)` only
* when an adapter genuinely cannot fit the four-axis model.
*/
export function fromProtocol<Draft, Target, Frame, Chunk, State>(
input: FromProtocolInput<Draft, Target, Frame, Chunk, State>,
): AdapterDefinition<Draft, Target> {
const auth = input.auth ?? authBearer
const protocol = input.protocol
const buildHeaders = input.headers ?? (() => ({}))
const toHttp = (target: Target, ctx: HttpContext) =>
Effect.gen(function* () {
const url = (yield* input.endpoint({ request: ctx.request, target })).toString()
const body = protocol.encode(target)
const merged = { ...buildHeaders({ request: ctx.request }), ...ctx.request.model.headers }
const headers = yield* auth({
request: ctx.request,
method: "POST",
url,
body,
headers: merged,
})
return ProviderShared.jsonPost({ url, body, headers })
})
const parse = (response: HttpClientResponse.HttpClientResponse) =>
ProviderShared.framed({
adapter: input.id,
response,
readError: protocol.streamReadError,
framing: input.framing.frame,
decodeChunk: protocol.decode,
initial: protocol.initial,
process: protocol.process,
onHalt: protocol.onHalt,
})
return unsafe({
id: input.id,
protocol: input.protocolId ?? protocol.id,
patches: input.patches,
redact: protocol.redact,
prepare: protocol.prepare,
validate: protocol.validate,
toHttp,
parse,
})
}
const makeClient = (options: ClientOptions): LLMClient => {
const registry = normalizeRegistry(options.patches)
const adapters = new Map(
options.adapters.map((source) => [source.runtime.protocol, source.runtime] as const),
)
const compile = Effect.fn("LLM.compile")(function* (request: LLMRequest) {
const adapter = adapters.get(request.model.protocol)
if (!adapter) return yield* noAdapter(request.model)
const requestPlan = plan({
phase: "request",
context: context({ request }),
patches: registry.request,
})
const requestAfterRequestPatches = requestPlan.apply(request)
const promptPlan = plan({
phase: "prompt",
context: context({ request: requestAfterRequestPatches }),
patches: registry.prompt,
})
const requestBeforeToolPatches = promptPlan.apply(requestAfterRequestPatches)
const toolSchemaPlan = plan({
phase: "tool-schema",
context: context({ request: requestBeforeToolPatches }),
patches: registry.toolSchema,
})
const patchedRequest =
requestBeforeToolPatches.tools.length === 0
? requestBeforeToolPatches
: LLM.updateRequest(requestBeforeToolPatches, { tools: requestBeforeToolPatches.tools.map(toolSchemaPlan.apply) })
const patchContext = context({ request: patchedRequest })
const draft = yield* adapter.prepare(patchedRequest)
const targetPlan = plan({
phase: "target",
context: patchContext,
patches: [...adapter.patches, ...registry.target],
})
const target = yield* adapter.validate(targetPlan.apply(draft))
const targetPatchTrace = [
...requestPlan.trace,
...promptPlan.trace,
...(requestBeforeToolPatches.tools.length === 0 ? [] : toolSchemaPlan.trace),
...targetPlan.trace,
]
const http = yield* adapter.toHttp(target, { request: patchedRequest, patchTrace: targetPatchTrace })
return { request: patchedRequest, adapter, target, http, patchTrace: targetPatchTrace }
})
const prepare = Effect.fn("LLM.prepare")(function* (request: LLMRequest) {
const compiled = yield* compile(request)
return new PreparedRequestSchema({
id: compiled.request.id ?? "request",
adapter: compiled.adapter.id,
model: compiled.request.model,
target: compiled.target,
redactedTarget: compiled.adapter.redact(compiled.target),
patchTrace: compiled.patchTrace,
})
})
const stream = (request: LLMRequest) =>
Stream.unwrap(
Effect.gen(function* () {
const compiled = yield* compile(request)
const executor = yield* RequestExecutor.Service
const response = yield* executor.execute(compiled.http)
const streamPlan = plan({
phase: "stream",
context: context({ request: compiled.request }),
patches: registry.stream,
})
const events = compiled.adapter.parse(response)
if (streamPlan.patches.length === 0) return events
return events.pipe(Stream.map(streamPlan.apply))
}),
)
const generate = Effect.fn("LLM.generate")(function* (request: LLMRequest) {
return new LLMResponse(
yield* stream(request).pipe(
Stream.runFold(
() => ({ events: [] as LLMEvent[], usage: undefined as LLMResponse["usage"] }),
(acc, event) => {
acc.events.push(event)
if ("usage" in event && event.usage !== undefined) acc.usage = event.usage
return acc
},
),
),
)
})
// The runtime always emits a `PreparedRequest` (target: unknown). Callers
// who supply a `Target` type argument assert the shape they expect from
// their adapter; the cast hands them a typed view of the same payload.
return { prepare: prepare as LLMClient["prepare"], stream, generate }
}
export const LLMClient = { make: makeClient }
export * as Adapter from "./adapter"

78
packages/llm/src/auth.ts Normal file
View File

@@ -0,0 +1,78 @@
import { Effect } from "effect"
import type { LLMError, LLMRequest } from "./schema"
/**
* Per-request transport authentication.
*
* Receives the unsigned HTTP request shape (URL, method, body, headers) and
* returns the headers to actually send.
*
* Most adapters use the default `Auth.bearer`, which reads
* `request.model.apiKey` and sets `Authorization: Bearer ...`. Providers
* that use a different header pick `Auth.apiKeyHeader(name)` (e.g.
* Anthropic's `x-api-key`, Gemini's `x-goog-api-key`) or a provider-aware
* helper such as `Auth.openAI` for Azure OpenAI's static `api-key` header.
*
* Adapters that need per-request signing (AWS SigV4, future Vertex IAM,
* future Azure AAD) implement `Auth` as a function that hashes the body,
* mints a signature, and merges signed headers into the result.
*/
export type Auth = (input: AuthInput) => Effect.Effect<Record<string, string>, LLMError>
export interface AuthInput {
readonly request: LLMRequest
readonly method: "POST" | "GET"
readonly url: string
readonly body: string
readonly headers: Record<string, string>
}
/**
* Auth that returns the headers untouched. Use when authentication is
* handled outside the LLM core (e.g. caller supplied `headers.authorization`
* directly, or there is genuinely no auth).
*/
export const passthrough: Auth = ({ headers }) => Effect.succeed(headers)
/**
* Builds an `Auth` that reads `request.model.apiKey` and merges the headers
* produced by `from(apiKey)` into the outgoing headers. No-op when
* `model.apiKey` is unset, so callers who pre-set their own auth header keep
* working. The shared core for `bearer` and `apiKeyHeader`.
*/
const fromApiKey = (from: (apiKey: string) => Record<string, string>): Auth => ({ request, headers }) => {
const key = request.model.apiKey
if (!key) return Effect.succeed(headers)
return Effect.succeed({ ...headers, ...from(key) })
}
/**
* `Authorization: Bearer <apiKey>` from `request.model.apiKey`. No-op when
* `model.apiKey` is unset. Used by OpenAI, OpenAI Responses, OpenAI-compatible
* Chat, and (with Bedrock-specific fallback) Bedrock Converse.
*/
export const bearer: Auth = fromApiKey((key) => ({ authorization: `Bearer ${key}` }))
/**
* OpenAI-compatible auth with Azure OpenAI's static API-key exception. Azure
* Entra/OAuth callers can still pre-set `authorization` and omit `apiKey`.
*/
export const openAI: Auth = ({ request, headers }) => {
const key = request.model.apiKey
if (!key) return Effect.succeed(headers)
if (request.model.provider === "azure") {
return Effect.succeed({
...Object.fromEntries(Object.entries(headers).filter(([name]) => name.toLowerCase() !== "authorization")),
"api-key": key,
})
}
return Effect.succeed({ ...headers, authorization: `Bearer ${key}` })
}
/**
* Set a custom header to `request.model.apiKey`. No-op when `model.apiKey`
* is unset. Used by Anthropic (`x-api-key`) and Gemini (`x-goog-api-key`).
*/
export const apiKeyHeader = (name: string): Auth => fromApiKey((key) => ({ [name]: key }))
export * as Auth from "./auth"

View File

@@ -0,0 +1,50 @@
import { Effect } from "effect"
import { ProviderShared } from "./provider/shared"
import type { LLMError, LLMRequest } from "./schema"
/**
* URL construction for one adapter.
*
* `Endpoint` is the deployment-side answer to "where does this request go?"
* It receives the `LLMRequest` (so it can read `model.id`, `model.baseURL`,
* and `model.queryParams`) and the validated `Target` (so adapters
* whose path depends on a target field — e.g. Bedrock's `modelId` segment —
* can read it safely after target patches).
*
* The result is a `URL` object so query-param composition stays correct
* regardless of caller-provided baseURL trailing slashes.
*/
export type Endpoint<Target> = (input: EndpointInput<Target>) => Effect.Effect<URL, LLMError>
export interface EndpointInput<Target> {
readonly request: LLMRequest
readonly target: Target
}
/**
* Build a URL from the model's `baseURL` (or a default) plus a path. Appends
* `model.queryParams` so adapters that need request-level query params
* (Azure `api-version`, etc.) get them for free.
*
* Both `default` and `path` may be strings or functions of the
* `EndpointInput`, for adapters whose URL embeds the model id, region, or
* another target field.
*/
export const baseURL = <Target>(input: {
readonly default?: string | ((input: EndpointInput<Target>) => string)
readonly path: string | ((input: EndpointInput<Target>) => string)
/** Error message used when neither `model.baseURL` nor `default` is set. */
readonly required?: string
}): Endpoint<Target> => (ctx) =>
Effect.gen(function* () {
const fallback = typeof input.default === "function" ? input.default(ctx) : input.default
const base = ctx.request.model.baseURL ?? fallback
if (!base) return yield* ProviderShared.invalidRequest(input.required ?? "Missing baseURL")
const path = typeof input.path === "string" ? input.path : input.path(ctx)
const url = new URL(`${ProviderShared.trimBaseUrl(base)}${path}`)
const params = ctx.request.model.queryParams
if (params) for (const [key, value] of Object.entries(params)) url.searchParams.set(key, value)
return url
})
export * as Endpoint from "./endpoint"

View File

@@ -0,0 +1,54 @@
import { Cause, Context, Effect, Layer } from "effect"
import { FetchHttpClient, HttpClient, HttpClientError, HttpClientRequest, HttpClientResponse } from "effect/unstable/http"
import { ProviderRequestError, TransportError, type LLMError } from "./schema"
export interface Interface {
readonly execute: (
request: HttpClientRequest.HttpClientRequest,
) => Effect.Effect<HttpClientResponse.HttpClientResponse, LLMError>
}
export class Service extends Context.Service<Service, Interface>()("@opencode/LLM/RequestExecutor") {}
const statusError = (response: HttpClientResponse.HttpClientResponse) =>
Effect.gen(function* () {
if (response.status < 400) return response
const body = yield* response.text.pipe(Effect.catch(() => Effect.succeed(undefined)))
return yield* new ProviderRequestError({
status: response.status,
message: `Provider request failed with HTTP ${response.status}`,
body,
})
})
const toHttpError = (error: unknown) => {
if (Cause.isTimeoutError(error)) return new TransportError({ message: error.message, reason: "Timeout" })
if (!HttpClientError.isHttpClientError(error)) return new TransportError({ message: "HTTP transport failed" })
const url = "request" in error ? error.request.url : undefined
if (error.reason._tag === "TransportError") {
return new TransportError({
message: error.reason.description ?? "HTTP transport failed",
reason: error.reason._tag,
url,
})
}
return new TransportError({
message: `HTTP transport failed: ${error.reason._tag}`,
reason: error.reason._tag,
url,
})
}
export const layer: Layer.Layer<Service, never, HttpClient.HttpClient> = Layer.effect(
Service,
Effect.gen(function* () {
const http = yield* HttpClient.HttpClient
return Service.of({
execute: (request) => http.execute(request).pipe(Effect.mapError(toHttpError), Effect.flatMap(statusError)),
})
}),
)
export const defaultLayer = layer.pipe(Layer.provide(FetchHttpClient.layer))
export * as RequestExecutor from "./executor"

View File

@@ -0,0 +1,29 @@
import type { Stream } from "effect"
import { ProviderShared } from "./provider/shared"
import type { ProviderChunkError } from "./schema"
/**
* Decode a streaming HTTP response body into provider-protocol frames.
*
* `Framing` is the byte-stream-shaped seam between transport and protocol:
*
* - SSE (`Framing.sse`) — UTF-8 decode the body, run the SSE channel decoder,
* drop empty / `[DONE]` keep-alives. Each emitted frame is the JSON `data:`
* payload of one event.
* - AWS event stream — length-prefixed binary frames with CRC checksums.
* Each emitted frame is one parsed binary event record.
*
* The frame type is opaque to this layer; the protocol's `decode` step turns
* a frame into a typed chunk.
*/
export interface Framing<Frame> {
readonly id: string
readonly frame: (
bytes: Stream.Stream<Uint8Array, ProviderChunkError>,
) => Stream.Stream<Frame, ProviderChunkError>
}
/** Server-Sent Events framing. Used by every JSON-streaming HTTP provider. */
export const sse: Framing<string> = { id: "sse", frame: ProviderShared.sseFraming }
export * as Framing from "./framing"

41
packages/llm/src/index.ts Normal file
View File

@@ -0,0 +1,41 @@
export * from "./adapter"
export * from "./executor"
export * from "./patch"
export * from "./schema"
export * from "./tool"
export * from "./tool-runtime"
export { Auth } from "./auth"
export { Endpoint } from "./endpoint"
export { Framing } from "./framing"
export { Protocol } from "./protocol"
export type { Auth as AuthFn, AuthInput } from "./auth"
export type { Endpoint as EndpointFn, EndpointInput } from "./endpoint"
export type { Framing as FramingDef } from "./framing"
export type { Protocol as ProtocolDef } from "./protocol"
export * as LLM from "./llm"
export * as ProviderPatch from "./provider/patch"
export * as Schema from "./schema"
export type { CapabilitiesInput } from "./llm"
export type {
ProviderAuth,
ProviderResolution,
ProviderResolveInput,
ProviderResolver as ProviderResolverShape,
} from "./provider-resolver"
export { AnthropicMessages } from "./provider/anthropic-messages"
export { AmazonBedrock } from "./provider/amazon-bedrock"
export { Anthropic } from "./provider/anthropic"
export { Azure } from "./provider/azure"
export { BedrockConverse } from "./provider/bedrock-converse"
export { Gemini } from "./provider/gemini"
export { Google } from "./provider/google"
export { GitHubCopilot } from "./provider/github-copilot"
export { OpenAI } from "./provider/openai"
export { OpenAIChat } from "./provider/openai-chat"
export { OpenAICompatibleChat } from "./provider/openai-compatible-chat"
export { OpenAICompatibleFamily } from "./provider/openai-compatible-family"
export { OpenAIResponses } from "./provider/openai-responses"
export { ProviderResolver } from "./provider-resolver"
export { XAI } from "./provider/xai"

213
packages/llm/src/llm.ts Normal file
View File

@@ -0,0 +1,213 @@
import {
GenerationOptions,
LLMEvent,
LLMRequest,
LLMResponse,
Message,
ModelCapabilities,
ModelID,
ModelLimits,
ModelRef,
ProviderID,
ToolChoice,
ToolDefinition,
type ContentPart,
type ModelID as ModelIDType,
type ProviderID as ProviderIDType,
type ReasoningEffort,
type SystemPart,
type ToolCallPart,
type ToolResultPart,
type ToolResultValue,
} from "./schema"
export type CapabilitiesInput = {
readonly input?: Partial<ModelCapabilities["input"]>
readonly output?: Partial<ModelCapabilities["output"]>
readonly tools?: Partial<ModelCapabilities["tools"]>
readonly cache?: Partial<ModelCapabilities["cache"]>
readonly reasoning?: Partial<Omit<ModelCapabilities["reasoning"], "efforts">> & {
readonly efforts?: ReadonlyArray<ReasoningEffort>
}
}
export type ModelInput = Omit<ConstructorParameters<typeof ModelRef>[0], "id" | "provider" | "capabilities" | "limits"> & {
readonly id: string | ModelIDType
readonly provider: string | ProviderIDType
readonly capabilities?: ModelCapabilities | CapabilitiesInput
readonly limits?: ModelLimits | ConstructorParameters<typeof ModelLimits>[0]
}
export type MessageInput = Omit<ConstructorParameters<typeof Message>[0], "content"> & {
readonly content: string | ContentPart | ReadonlyArray<ContentPart>
}
export type ToolChoiceInput =
| ToolChoice
| ConstructorParameters<typeof ToolChoice>[0]
| ToolDefinition
| string
export type ToolChoiceMode = Exclude<ToolChoice["type"], "tool">
export type ToolResultInput = Omit<ToolResultPart, "type" | "result"> & {
readonly result: unknown
readonly resultType?: ToolResultValue["type"]
}
export type RequestInput = Omit<
ConstructorParameters<typeof LLMRequest>[0],
"system" | "messages" | "tools" | "toolChoice" | "generation"
> & {
readonly system?: string | SystemPart | ReadonlyArray<SystemPart>
readonly prompt?: string | ContentPart | ReadonlyArray<ContentPart>
readonly messages?: ReadonlyArray<Message | MessageInput>
readonly tools?: ReadonlyArray<ToolDefinition | ConstructorParameters<typeof ToolDefinition>[0]>
readonly toolChoice?: ToolChoiceInput
readonly generation?: GenerationOptions | ConstructorParameters<typeof GenerationOptions>[0]
}
export const capabilities = (input: CapabilitiesInput = {}) =>
new ModelCapabilities({
input: { text: true, image: false, audio: false, video: false, pdf: false, ...input.input },
output: { text: true, reasoning: false, ...input.output },
tools: { calls: false, streamingInput: false, providerExecuted: false, ...input.tools },
cache: { prompt: false, messageBlocks: false, contentBlocks: false, ...input.cache },
reasoning: { efforts: [], summaries: false, encryptedContent: false, ...input.reasoning },
})
export const limits = (input: ConstructorParameters<typeof ModelLimits>[0] = {}) => new ModelLimits(input)
export const text = (value: string): ContentPart => ({ type: "text", text: value })
export const system = (value: string): SystemPart => ({ type: "text", text: value })
const contentParts = (input: string | ContentPart | ReadonlyArray<ContentPart>) =>
typeof input === "string" ? [text(input)] : Array.isArray(input) ? [...input] : [input]
const systemParts = (input?: string | SystemPart | ReadonlyArray<SystemPart>) => {
if (input === undefined) return []
return typeof input === "string" ? [system(input)] : Array.isArray(input) ? [...input] : [input]
}
export const message = (input: Message | MessageInput) => {
if (input instanceof Message) return input
return new Message({ ...input, content: contentParts(input.content) })
}
export const user = (content: string | ContentPart | ReadonlyArray<ContentPart>) =>
message({ role: "user", content })
export const assistant = (content: string | ContentPart | ReadonlyArray<ContentPart>) =>
message({ role: "assistant", content })
export const model = (input: ModelInput) => {
const { capabilities: modelCapabilities, limits: modelLimits, ...rest } = input
return new ModelRef({
...rest,
id: ModelID.make(input.id),
provider: ProviderID.make(input.provider),
protocol: input.protocol,
capabilities: modelCapabilities instanceof ModelCapabilities ? modelCapabilities : capabilities(modelCapabilities),
limits: modelLimits instanceof ModelLimits ? modelLimits : limits(modelLimits),
})
}
export const toolDefinition = (input: ToolDefinition | ConstructorParameters<typeof ToolDefinition>[0]) => {
if (input instanceof ToolDefinition) return input
return new ToolDefinition(input)
}
export const toolCall = (input: Omit<ToolCallPart, "type">): ToolCallPart => ({ type: "tool-call", ...input })
const isRecord = (value: unknown): value is Record<string, unknown> =>
typeof value === "object" && value !== null && !Array.isArray(value)
const isToolResultValue = (value: unknown): value is ToolResultValue =>
isRecord(value) && (value.type === "text" || value.type === "json" || value.type === "error") && "value" in value
const toolResultValue = (value: unknown, type: ToolResultValue["type"] = "json"): ToolResultValue => {
if (isToolResultValue(value)) return value
return { type, value }
}
export const toolResult = (input: ToolResultInput): ToolResultPart => ({
type: "tool-result",
id: input.id,
name: input.name,
result: toolResultValue(input.result, input.resultType),
providerExecuted: input.providerExecuted,
metadata: input.metadata,
})
export const toolMessage = (input: ToolResultPart | ToolResultInput) =>
message({ role: "tool", content: ["type" in input ? input : toolResult(input)] })
export const toolChoiceName = (name: string) => new ToolChoice({ type: "tool", name })
const isToolChoiceMode = (value: string): value is ToolChoiceMode =>
value === "auto" || value === "none" || value === "required"
export const toolChoice = (input: ToolChoiceInput) => {
if (input instanceof ToolChoice) return input
if (input instanceof ToolDefinition) return new ToolChoice({ type: "tool", name: input.name })
if (typeof input === "string") return isToolChoiceMode(input) ? new ToolChoice({ type: input }) : toolChoiceName(input)
return new ToolChoice(input)
}
export const generation = (input: GenerationOptions | ConstructorParameters<typeof GenerationOptions>[0] = {}) => {
if (input instanceof GenerationOptions) return input
return new GenerationOptions(input)
}
export const requestInput = (input: LLMRequest): RequestInput => ({
id: input.id,
model: input.model,
system: input.system,
messages: input.messages,
tools: input.tools,
toolChoice: input.toolChoice,
generation: input.generation,
reasoning: input.reasoning,
cache: input.cache,
responseFormat: input.responseFormat,
metadata: input.metadata,
native: input.native,
})
export const request = (input: RequestInput) => {
const { system: requestSystem, prompt, messages, tools, toolChoice: requestToolChoice, generation: requestGeneration, ...rest } = input
return new LLMRequest({
...rest,
system: systemParts(requestSystem),
messages: [...(messages?.map(message) ?? []), ...(prompt === undefined ? [] : [user(prompt)])],
tools: tools?.map(toolDefinition) ?? [],
toolChoice: requestToolChoice ? toolChoice(requestToolChoice) : undefined,
generation: generation(requestGeneration),
})
}
export const updateRequest = (input: LLMRequest, patch: Partial<RequestInput>) =>
request({ ...requestInput(input), ...patch })
export const outputText = (response: LLMResponse | { readonly events: ReadonlyArray<LLMEvent> }) =>
response.events
.filter(LLMEvent.is.textDelta)
.map((event) => event.text)
.join("")
export const outputUsage = (response: LLMResponse | { readonly events: ReadonlyArray<LLMEvent> }) => {
if (response instanceof LLMResponse) return response.usage
return response.events.reduce<LLMResponse["usage"]>(
(usage, event) => ("usage" in event && event.usage !== undefined ? event.usage : usage),
undefined,
)
}
export const outputToolCalls = (response: LLMResponse | { readonly events: ReadonlyArray<LLMEvent> }) =>
response.events.filter(LLMEvent.is.toolCall)
export const outputReasoning = (response: LLMResponse | { readonly events: ReadonlyArray<LLMEvent> }) =>
response.events
.filter(LLMEvent.is.reasoningDelta)
.map((event) => event.text)
.join("")

159
packages/llm/src/patch.ts Normal file
View File

@@ -0,0 +1,159 @@
import type { LLMEvent, LLMRequest, ModelRef, PatchPhase, ProtocolID, ToolDefinition } from "./schema"
import { PatchTrace } from "./schema"
export interface PatchContext {
readonly request: LLMRequest
readonly model: ModelRef
readonly protocol: ModelRef["protocol"]
}
export interface Patch<A> {
readonly id: string
readonly phase: PatchPhase
readonly reason: string
readonly order?: number
readonly when: (context: PatchContext) => boolean
readonly apply: (value: A, context: PatchContext) => A
}
export interface AnyPatch {
readonly id: string
readonly phase: PatchPhase
readonly reason: string
readonly order?: number
readonly when: (context: PatchContext) => boolean
readonly apply: (value: never, context: PatchContext) => unknown
}
export interface PatchInput<A> {
readonly reason: string
readonly order?: number
readonly when?: PatchPredicate | ((context: PatchContext) => boolean)
readonly apply: (value: A, context: PatchContext) => A
}
export interface PatchPredicate {
(context: PatchContext): boolean
readonly and: (...predicates: ReadonlyArray<PatchPredicate>) => PatchPredicate
readonly or: (...predicates: ReadonlyArray<PatchPredicate>) => PatchPredicate
readonly not: () => PatchPredicate
}
export interface PatchPlan<A> {
readonly phase: PatchPhase
readonly patches: ReadonlyArray<Patch<A>>
readonly trace: ReadonlyArray<PatchTrace>
readonly apply: (value: A) => A
}
export interface PatchRegistry {
readonly request: ReadonlyArray<Patch<LLMRequest>>
readonly prompt: ReadonlyArray<Patch<LLMRequest>>
readonly toolSchema: ReadonlyArray<Patch<ToolDefinition>>
readonly target: ReadonlyArray<Patch<unknown>>
readonly stream: ReadonlyArray<Patch<LLMEvent>>
}
export const emptyRegistry: PatchRegistry = {
request: [],
prompt: [],
toolSchema: [],
target: [],
stream: [],
}
export const predicate = (run: (context: PatchContext) => boolean): PatchPredicate => {
const self = Object.assign(run, {
and: (...predicates: ReadonlyArray<PatchPredicate>) =>
predicate((context) => self(context) && predicates.every((item) => item(context))),
or: (...predicates: ReadonlyArray<PatchPredicate>) =>
predicate((context) => self(context) || predicates.some((item) => item(context))),
not: () => predicate((context) => !self(context)),
})
return self
}
export const Model = {
provider: (provider: string) => predicate((context) => context.model.provider === provider),
protocol: (protocol: ProtocolID) => predicate((context) => context.protocol === protocol),
id: (id: string) => predicate((context) => context.model.id === id),
idIncludes: (value: string) => predicate((context) => context.model.id.toLowerCase().includes(value.toLowerCase())),
}
export const make = <A>(id: string, phase: PatchPhase, input: PatchInput<A>): Patch<A> => ({
id,
phase,
reason: input.reason,
order: input.order,
when: input.when ?? (() => true),
apply: input.apply,
})
export const request = (id: string, input: PatchInput<LLMRequest>) => make(`request.${id}`, "request", input)
export const prompt = (id: string, input: PatchInput<LLMRequest>) => make(`prompt.${id}`, "prompt", input)
export const toolSchema = (id: string, input: PatchInput<ToolDefinition>) => make(`schema.${id}`, "tool-schema", input)
export const target = <A>(id: string, input: PatchInput<A>) => make(`target.${id}`, "target", input)
export const stream = (id: string, input: PatchInput<LLMEvent>) => make(`stream.${id}`, "stream", input)
export function registry(patches: ReadonlyArray<AnyPatch>): PatchRegistry {
return {
request: patches.filter((patch): patch is Patch<LLMRequest> => patch.phase === "request"),
prompt: patches.filter((patch): patch is Patch<LLMRequest> => patch.phase === "prompt"),
toolSchema: patches.filter((patch): patch is Patch<ToolDefinition> => patch.phase === "tool-schema"),
target: patches.filter((patch) => patch.phase === "target") as unknown as ReadonlyArray<Patch<unknown>>,
stream: patches.filter((patch): patch is Patch<LLMEvent> => patch.phase === "stream"),
}
}
export function context(input: {
readonly request: LLMRequest
}): PatchContext {
return {
request: input.request,
model: input.request.model,
protocol: input.request.model.protocol,
}
}
export function plan<A>(input: {
readonly phase: PatchPhase
readonly context: PatchContext
readonly patches: ReadonlyArray<Patch<A>>
}): PatchPlan<A> {
const patches = input.patches
.filter((patch) => patch.phase === input.phase && patch.when(input.context))
.toSorted((left, right) => (left.order ?? 0) - (right.order ?? 0) || left.id.localeCompare(right.id))
return {
phase: input.phase,
patches,
trace: patches.map(
(patch) =>
new PatchTrace({
id: patch.id,
phase: patch.phase,
reason: patch.reason,
}),
),
apply: (value) => patches.reduce((next, patch) => patch.apply(next, input.context), value),
}
}
export function mergeRegistries(registries: ReadonlyArray<PatchRegistry>): PatchRegistry {
return registries.reduce(
(merged, registry) => ({
request: [...merged.request, ...registry.request],
prompt: [...merged.prompt, ...registry.prompt],
toolSchema: [...merged.toolSchema, ...registry.toolSchema],
target: [...merged.target, ...registry.target],
stream: [...merged.stream, ...registry.stream],
}),
emptyRegistry,
)
}
export * as Patch from "./patch"

View File

@@ -0,0 +1,72 @@
import type { Effect } from "effect"
import type { LLMError, LLMEvent, LLMRequest, ProtocolID, ProviderChunkError } from "./schema"
/**
* The semantic API contract of one model server family.
*
* A `Protocol` owns the parts of an adapter that are intrinsic to "what does
* this API look like": how a common `LLMRequest` lowers into a provider-native
* shape, how that shape validates and encodes onto the wire, and how the
* streaming response decodes back into common `LLMEvent`s.
*
* Examples:
*
* - `OpenAIChat.protocol` — chat completions style
* - `OpenAIResponses.protocol` — responses API
* - `AnthropicMessages.protocol` — messages API with content blocks
* - `Gemini.protocol` — generateContent
* - `BedrockConverse.protocol` — Converse with binary event-stream framing
*
* A `Protocol` is **not** a deployment. It does not know which URL, which
* headers, or which auth scheme to use. Those are deployment concerns owned
* by `Adapter.fromProtocol(...)` along with the chosen `Endpoint`, `Auth`,
* and `Framing`. This separation is what lets DeepSeek, TogetherAI, Cerebras,
* etc. all reuse `OpenAIChat.protocol` without forking 300 lines per provider.
*
* The five type parameters reflect the pipeline:
*
* - `Draft` — provider-native shape *before* target patches.
* - `Target` — provider-native shape *after* target patches and Schema
* validation. The body sent to the provider is `encode(target)`.
* - `Frame` — one unit of the framed response stream. SSE: a JSON data
* string. AWS event stream: a parsed binary frame.
* - `Chunk` — schema-decoded provider chunk produced from one frame.
* - `State` — accumulator threaded through `process` to translate chunk
* sequences into `LLMEvent` sequences.
*/
export interface Protocol<Draft, Target, Frame, Chunk, State> {
/** Stable id matching `ModelRef.protocol` for adapter registry lookup. */
readonly id: ProtocolID
/** Lower a common request into this protocol's draft shape. */
readonly prepare: (request: LLMRequest) => Effect.Effect<Draft, LLMError>
/** Validate the post-patch draft against the protocol's target schema. */
readonly validate: (draft: Draft) => Effect.Effect<Target, LLMError>
/** Serialize the validated target into a request body. */
readonly encode: (target: Target) => string
/** Produce a redacted copy for `PreparedRequest.redactedTarget`. */
readonly redact: (target: Target) => unknown
/** Decode one framed response unit into a typed provider chunk. */
readonly decode: (frame: Frame) => Effect.Effect<Chunk, ProviderChunkError>
/** Initial parser state. Called once per response. */
readonly initial: () => State
/** Translate one chunk into emitted events plus the next state. */
readonly process: (
state: State,
chunk: Chunk,
) => Effect.Effect<readonly [State, ReadonlyArray<LLMEvent>], ProviderChunkError>
/** Optional flush emitted when the framed stream ends. */
readonly onHalt?: (state: State) => ReadonlyArray<LLMEvent>
/** Error message used when the underlying transport fails mid-stream. */
readonly streamReadError: string
}
/**
* Construct a `Protocol` from its parts. Currently a typed identity, but kept
* as the public constructor so future cross-cutting concerns (tracing spans,
* default redaction, instrumentation) can be added in one place.
*/
export const define = <Draft, Target, Frame, Chunk, State>(
input: Protocol<Draft, Target, Frame, Chunk, State>,
): Protocol<Draft, Target, Frame, Chunk, State> => input
export * as Protocol from "./protocol"

View File

@@ -0,0 +1,65 @@
import { ModelID, ProviderID, type ProtocolID } from "./schema"
import type { ModelID as ModelIDType, ProviderID as ProviderIDType } from "./schema"
import type { CapabilitiesInput } from "./llm"
/**
* Whether a provider needs an API key at request time. The OpenCode bridge
* consults this to decide whether to read `provider.key` and stamp it onto
* `model.apiKey`; the adapter's `Auth` axis owns header placement so this
* field does not need to distinguish bearer / x-api-key / x-goog-api-key.
*/
export type ProviderAuth = "key" | "none"
export interface ProviderResolution {
readonly provider: ProviderIDType
readonly protocol: ProtocolID
readonly baseURL?: string
readonly auth: ProviderAuth
readonly queryParams?: Record<string, string>
readonly capabilities?: CapabilitiesInput
}
export interface ProviderResolveInput {
readonly modelID: ModelIDType
readonly providerID: ProviderIDType
readonly options: Record<string, unknown>
}
export interface ProviderResolver {
readonly id: ProviderIDType
readonly resolve: (input: ProviderResolveInput) => ProviderResolution | undefined
}
export const make = (
provider: string | ProviderIDType,
protocol: ProtocolID,
options: Partial<Omit<ProviderResolution, "provider" | "protocol">> = {},
): ProviderResolution => ({
provider: ProviderID.make(provider),
protocol,
...options,
auth: options.auth ?? "key",
})
export const define = (input: ProviderResolver): ProviderResolver => input
export const fixed = (
provider: string | ProviderIDType,
protocol: ProtocolID,
options: Partial<Omit<ProviderResolution, "provider" | "protocol">> = {},
): ProviderResolver => {
const resolution = make(provider, protocol, options)
return define({ id: resolution.provider, resolve: () => resolution })
}
export const input = (
modelID: string | ModelIDType,
providerID: string | ProviderIDType,
options: Record<string, unknown>,
): ProviderResolveInput => ({
modelID: ModelID.make(modelID),
providerID: ProviderID.make(providerID),
options,
})
export * as ProviderResolver from "./provider-resolver"

View File

@@ -0,0 +1,5 @@
import { ProviderResolver } from "../provider-resolver"
export const resolver = ProviderResolver.fixed("amazon-bedrock", "bedrock-converse")
export * as AmazonBedrock from "./amazon-bedrock"

View File

@@ -0,0 +1,546 @@
import { Effect, Schema } from "effect"
import { Adapter } from "../adapter"
import { Auth } from "../auth"
import { Endpoint } from "../endpoint"
import { Framing } from "../framing"
import { capabilities, model as llmModel, type ModelInput } from "../llm"
import { Protocol } from "../protocol"
import {
Usage,
type CacheHint,
type FinishReason,
type LLMEvent,
type LLMRequest,
type ToolCallPart,
type ToolDefinition,
type ToolResultPart,
} from "../schema"
import { ProviderShared } from "./shared"
const ADAPTER = "anthropic-messages"
export type AnthropicMessagesModelInput = Omit<ModelInput, "provider" | "protocol" | "headers"> & {
readonly apiKey?: string
readonly headers?: Record<string, string>
}
const AnthropicCacheControl = Schema.Struct({ type: Schema.Literal("ephemeral") })
const AnthropicTextBlock = Schema.Struct({
type: Schema.Literal("text"),
text: Schema.String,
cache_control: Schema.optional(AnthropicCacheControl),
})
type AnthropicTextBlock = Schema.Schema.Type<typeof AnthropicTextBlock>
const AnthropicThinkingBlock = Schema.Struct({
type: Schema.Literal("thinking"),
thinking: Schema.String,
signature: Schema.optional(Schema.String),
cache_control: Schema.optional(AnthropicCacheControl),
})
const AnthropicToolUseBlock = Schema.Struct({
type: Schema.Literal("tool_use"),
id: Schema.String,
name: Schema.String,
input: Schema.Unknown,
cache_control: Schema.optional(AnthropicCacheControl),
})
type AnthropicToolUseBlock = Schema.Schema.Type<typeof AnthropicToolUseBlock>
const AnthropicServerToolUseBlock = Schema.Struct({
type: Schema.Literal("server_tool_use"),
id: Schema.String,
name: Schema.String,
input: Schema.Unknown,
cache_control: Schema.optional(AnthropicCacheControl),
})
type AnthropicServerToolUseBlock = Schema.Schema.Type<typeof AnthropicServerToolUseBlock>
// Server tool result blocks: web_search_tool_result, code_execution_tool_result,
// and web_fetch_tool_result. The provider executes the tool and inlines the
// structured result into the assistant turn — there is no client tool_result
// round-trip. We round-trip the structured `content` payload as opaque JSON so
// the next request can echo it back when continuing the conversation.
const AnthropicServerToolResultType = Schema.Literals([
"web_search_tool_result",
"code_execution_tool_result",
"web_fetch_tool_result",
])
type AnthropicServerToolResultType = Schema.Schema.Type<typeof AnthropicServerToolResultType>
const AnthropicServerToolResultBlock = Schema.Struct({
type: AnthropicServerToolResultType,
tool_use_id: Schema.String,
content: Schema.Unknown,
cache_control: Schema.optional(AnthropicCacheControl),
})
type AnthropicServerToolResultBlock = Schema.Schema.Type<typeof AnthropicServerToolResultBlock>
const AnthropicToolResultBlock = Schema.Struct({
type: Schema.Literal("tool_result"),
tool_use_id: Schema.String,
content: Schema.String,
is_error: Schema.optional(Schema.Boolean),
cache_control: Schema.optional(AnthropicCacheControl),
})
const AnthropicUserBlock = Schema.Union([AnthropicTextBlock, AnthropicToolResultBlock])
const AnthropicAssistantBlock = Schema.Union([
AnthropicTextBlock,
AnthropicThinkingBlock,
AnthropicToolUseBlock,
AnthropicServerToolUseBlock,
AnthropicServerToolResultBlock,
])
type AnthropicAssistantBlock = Schema.Schema.Type<typeof AnthropicAssistantBlock>
type AnthropicToolResultBlock = Schema.Schema.Type<typeof AnthropicToolResultBlock>
const AnthropicMessage = Schema.Union([
Schema.Struct({ role: Schema.Literal("user"), content: Schema.Array(AnthropicUserBlock) }),
Schema.Struct({ role: Schema.Literal("assistant"), content: Schema.Array(AnthropicAssistantBlock) }),
])
type AnthropicMessage = Schema.Schema.Type<typeof AnthropicMessage>
const AnthropicTool = Schema.Struct({
name: Schema.String,
description: Schema.String,
input_schema: Schema.Record(Schema.String, Schema.Unknown),
cache_control: Schema.optional(AnthropicCacheControl),
})
type AnthropicTool = Schema.Schema.Type<typeof AnthropicTool>
const AnthropicToolChoice = Schema.Union([
Schema.Struct({ type: Schema.Literals(["auto", "any"]) }),
Schema.Struct({ type: Schema.Literal("tool"), name: Schema.String }),
])
const AnthropicThinking = Schema.Struct({
type: Schema.Literal("enabled"),
budget_tokens: Schema.Number,
})
const AnthropicTargetFields = {
model: Schema.String,
system: Schema.optional(Schema.Array(AnthropicTextBlock)),
messages: Schema.Array(AnthropicMessage),
tools: Schema.optional(Schema.Array(AnthropicTool)),
tool_choice: Schema.optional(AnthropicToolChoice),
stream: Schema.Literal(true),
max_tokens: Schema.Number,
temperature: Schema.optional(Schema.Number),
top_p: Schema.optional(Schema.Number),
stop_sequences: Schema.optional(Schema.Array(Schema.String)),
thinking: Schema.optional(AnthropicThinking),
}
const AnthropicMessagesDraft = Schema.Struct(AnthropicTargetFields)
type AnthropicMessagesDraft = Schema.Schema.Type<typeof AnthropicMessagesDraft>
const AnthropicMessagesTarget = Schema.Struct(AnthropicTargetFields)
export type AnthropicMessagesTarget = Schema.Schema.Type<typeof AnthropicMessagesTarget>
const AnthropicUsage = Schema.Struct({
input_tokens: Schema.optional(Schema.Number),
output_tokens: Schema.optional(Schema.Number),
cache_creation_input_tokens: Schema.optional(Schema.NullOr(Schema.Number)),
cache_read_input_tokens: Schema.optional(Schema.NullOr(Schema.Number)),
})
type AnthropicUsage = Schema.Schema.Type<typeof AnthropicUsage>
const AnthropicStreamBlock = Schema.Struct({
type: Schema.String,
id: Schema.optional(Schema.String),
name: Schema.optional(Schema.String),
text: Schema.optional(Schema.String),
thinking: Schema.optional(Schema.String),
input: Schema.optional(Schema.Unknown),
// *_tool_result blocks arrive whole as content_block_start (no streaming
// delta) with the structured payload in `content` and the originating
// server_tool_use id in `tool_use_id`.
tool_use_id: Schema.optional(Schema.String),
content: Schema.optional(Schema.Unknown),
})
const AnthropicStreamDelta = Schema.Struct({
type: Schema.optional(Schema.String),
text: Schema.optional(Schema.String),
thinking: Schema.optional(Schema.String),
partial_json: Schema.optional(Schema.String),
signature: Schema.optional(Schema.String),
stop_reason: Schema.optional(Schema.NullOr(Schema.String)),
stop_sequence: Schema.optional(Schema.NullOr(Schema.String)),
})
const AnthropicChunk = Schema.Struct({
type: Schema.String,
index: Schema.optional(Schema.Number),
message: Schema.optional(Schema.Struct({ usage: Schema.optional(AnthropicUsage) })),
content_block: Schema.optional(AnthropicStreamBlock),
delta: Schema.optional(AnthropicStreamDelta),
usage: Schema.optional(AnthropicUsage),
error: Schema.optional(Schema.Struct({ type: Schema.String, message: Schema.String })),
})
type AnthropicChunk = Schema.Schema.Type<typeof AnthropicChunk>
interface ToolAccumulator extends ProviderShared.ToolAccumulator {
readonly providerExecuted: boolean
}
interface ParserState {
readonly tools: Record<number, ToolAccumulator>
readonly usage?: Usage
}
const { encodeTarget, decodeTarget, decodeChunk } = ProviderShared.codecs({
adapter: ADAPTER,
draft: AnthropicMessagesDraft,
target: AnthropicMessagesTarget,
chunk: AnthropicChunk,
chunkErrorMessage: "Invalid Anthropic Messages stream chunk",
})
const invalid = ProviderShared.invalidRequest
const cacheControl = (cache: CacheHint | undefined) => cache?.type === "ephemeral" ? { type: "ephemeral" as const } : undefined
const lowerTool = (tool: ToolDefinition): AnthropicTool => ({
name: tool.name,
description: tool.description,
input_schema: tool.inputSchema,
})
const lowerToolChoice = Effect.fn("AnthropicMessages.lowerToolChoice")(function* (
toolChoice: NonNullable<LLMRequest["toolChoice"]>,
) {
if (toolChoice.type === "none") return undefined
if (toolChoice.type === "required") return { type: "any" as const }
if (toolChoice.type !== "tool") return { type: "auto" as const }
if (!toolChoice.name) return yield* invalid("Anthropic Messages tool choice requires a tool name")
return { type: "tool" as const, name: toolChoice.name }
})
const lowerToolCall = (part: ToolCallPart): AnthropicToolUseBlock => ({
type: "tool_use",
id: part.id,
name: part.name,
input: part.input,
})
const lowerServerToolCall = (part: ToolCallPart): AnthropicServerToolUseBlock => ({
type: "server_tool_use",
id: part.id,
name: part.name,
input: part.input,
})
// Server tool result blocks are typed by name. Anthropic ships three today;
// extend this list when new server tools land. The block content is the
// structured payload returned by the provider, which we round-trip as-is.
const serverToolResultType = (name: string): AnthropicServerToolResultType | undefined => {
if (name === "web_search") return "web_search_tool_result"
if (name === "code_execution") return "code_execution_tool_result"
if (name === "web_fetch") return "web_fetch_tool_result"
return undefined
}
const lowerServerToolResult = Effect.fn("AnthropicMessages.lowerServerToolResult")(function* (part: ToolResultPart) {
const wireType = serverToolResultType(part.name)
if (!wireType) return yield* invalid(`Anthropic Messages does not know how to round-trip server tool result for ${part.name}`)
return { type: wireType, tool_use_id: part.id, content: part.result.value } satisfies AnthropicServerToolResultBlock
})
const lowerMessages = Effect.fn("AnthropicMessages.lowerMessages")(function* (request: LLMRequest) {
const messages: AnthropicMessage[] = []
for (const message of request.messages) {
if (message.role === "user") {
const content: AnthropicTextBlock[] = []
for (const part of message.content) {
if (part.type !== "text") return yield* invalid(`Anthropic Messages user messages only support text content for now`)
content.push({ type: "text", text: part.text, cache_control: cacheControl(part.cache) })
}
messages.push({ role: "user", content })
continue
}
if (message.role === "assistant") {
const content: AnthropicAssistantBlock[] = []
for (const part of message.content) {
if (part.type === "text") {
content.push({ type: "text", text: part.text, cache_control: cacheControl(part.cache) })
continue
}
if (part.type === "reasoning") {
content.push({ type: "thinking", thinking: part.text, signature: part.encrypted })
continue
}
if (part.type === "tool-call") {
content.push(part.providerExecuted ? lowerServerToolCall(part) : lowerToolCall(part))
continue
}
if (part.type === "tool-result" && part.providerExecuted) {
content.push(yield* lowerServerToolResult(part))
continue
}
return yield* invalid(`Anthropic Messages assistant messages only support text, reasoning, and tool-call content for now`)
}
messages.push({ role: "assistant", content })
continue
}
const content: AnthropicToolResultBlock[] = []
for (const part of message.content) {
if (part.type !== "tool-result") return yield* invalid(`Anthropic Messages tool messages only support tool-result content`)
content.push({
type: "tool_result",
tool_use_id: part.id,
content: ProviderShared.toolResultText(part),
is_error: part.result.type === "error" ? true : undefined,
})
}
messages.push({ role: "user", content })
}
return messages
})
const thinkingBudget = (request: LLMRequest) => {
if (!request.reasoning?.enabled) return undefined
if (request.reasoning.effort === "minimal" || request.reasoning.effort === "low") return 1024
if (request.reasoning.effort === "high") return 16000
if (request.reasoning.effort === "xhigh") return 24576
if (request.reasoning.effort === "max") return 32000
return 8000
}
const prepare = Effect.fn("AnthropicMessages.prepare")(function* (request: LLMRequest) {
const toolChoice = request.toolChoice ? yield* lowerToolChoice(request.toolChoice) : undefined
const budget = thinkingBudget(request)
return {
model: request.model.id,
system: request.system.length === 0
? undefined
: request.system.map((part) => ({ type: "text" as const, text: part.text, cache_control: cacheControl(part.cache) })),
messages: yield* lowerMessages(request),
tools: request.tools.length === 0 || request.toolChoice?.type === "none" ? undefined : request.tools.map(lowerTool),
tool_choice: toolChoice,
stream: true as const,
max_tokens: request.generation.maxTokens ?? request.model.limits.output ?? 4096,
temperature: request.generation.temperature,
top_p: request.generation.topP,
stop_sequences: request.generation.stop,
thinking: budget ? { type: "enabled" as const, budget_tokens: budget } : undefined,
}
})
const mapFinishReason = (reason: string | null | undefined): FinishReason => {
if (reason === "end_turn" || reason === "stop_sequence" || reason === "pause_turn") return "stop"
if (reason === "max_tokens") return "length"
if (reason === "tool_use") return "tool-calls"
if (reason === "refusal") return "content-filter"
return "unknown"
}
const mapUsage = (usage: AnthropicUsage | undefined): Usage | undefined => {
if (!usage) return undefined
return new Usage({
inputTokens: usage.input_tokens,
outputTokens: usage.output_tokens,
cacheReadInputTokens: usage.cache_read_input_tokens ?? undefined,
cacheWriteInputTokens: usage.cache_creation_input_tokens ?? undefined,
totalTokens: ProviderShared.totalTokens(usage.input_tokens, usage.output_tokens, undefined),
native: usage,
})
}
// Anthropic emits usage on `message_start` and again on `message_delta` — the
// final delta carries the authoritative totals. Right-biased merge: each
// field prefers `right` when defined, falls back to `left`. `totalTokens` is
// recomputed from the merged input/output to stay consistent.
const mergeUsage = (left: Usage | undefined, right: Usage | undefined) => {
if (!left) return right
if (!right) return left
const inputTokens = right.inputTokens ?? left.inputTokens
const outputTokens = right.outputTokens ?? left.outputTokens
return new Usage({
inputTokens,
outputTokens,
cacheReadInputTokens: right.cacheReadInputTokens ?? left.cacheReadInputTokens,
cacheWriteInputTokens: right.cacheWriteInputTokens ?? left.cacheWriteInputTokens,
totalTokens: ProviderShared.totalTokens(inputTokens, outputTokens, undefined),
native: { ...left.native, ...right.native },
})
}
const finishToolCall = (tool: ToolAccumulator | undefined) =>
Effect.gen(function* () {
if (!tool) return [] as ReadonlyArray<LLMEvent>
const input = yield* ProviderShared.parseToolInput(ADAPTER, tool.name, tool.input)
const event: LLMEvent = tool.providerExecuted
? { type: "tool-call", id: tool.id, name: tool.name, input, providerExecuted: true }
: { type: "tool-call", id: tool.id, name: tool.name, input }
return [event]
})
// Server tool result blocks come whole in `content_block_start` (no streaming
// delta sequence). We convert the payload to a `tool-result` event with
// `providerExecuted: true`. The runtime appends it to the assistant message
// for round-trip; downstream consumers can inspect `result.value` for the
// structured payload.
const SERVER_TOOL_RESULT_NAMES: Record<AnthropicServerToolResultType, string> = {
web_search_tool_result: "web_search",
code_execution_tool_result: "code_execution",
web_fetch_tool_result: "web_fetch",
}
const isServerToolResultType = (type: string): type is AnthropicServerToolResultType =>
type in SERVER_TOOL_RESULT_NAMES
const serverToolResultEvent = (block: NonNullable<AnthropicChunk["content_block"]>): LLMEvent | undefined => {
if (!block.type || !isServerToolResultType(block.type)) return undefined
const errorPayload =
typeof block.content === "object" && block.content !== null && "type" in block.content
? String((block.content as Record<string, unknown>).type)
: ""
const isError = errorPayload.endsWith("_tool_result_error")
return {
type: "tool-result",
id: block.tool_use_id ?? "",
name: SERVER_TOOL_RESULT_NAMES[block.type],
result: isError
? { type: "error", value: block.content }
: { type: "json", value: block.content },
providerExecuted: true,
}
}
const processChunk = (state: ParserState, chunk: AnthropicChunk) =>
Effect.gen(function* () {
if (chunk.type === "message_start") {
const usage = mapUsage(chunk.message?.usage)
return [usage ? { ...state, usage: mergeUsage(state.usage, usage) } : state, []] as const
}
if (
chunk.type === "content_block_start" &&
chunk.index !== undefined &&
(chunk.content_block?.type === "tool_use" || chunk.content_block?.type === "server_tool_use")
) {
return [{
...state,
tools: {
...state.tools,
[chunk.index]: {
id: chunk.content_block.id ?? String(chunk.index),
name: chunk.content_block.name ?? "",
input: "",
providerExecuted: chunk.content_block.type === "server_tool_use",
},
},
}, []] as const
}
if (chunk.type === "content_block_start" && chunk.content_block?.type === "text" && chunk.content_block.text) {
return [state, [{ type: "text-delta", text: chunk.content_block.text }]] as const
}
if (chunk.type === "content_block_start" && chunk.content_block?.type === "thinking" && chunk.content_block.thinking) {
return [state, [{ type: "reasoning-delta", text: chunk.content_block.thinking }]] as const
}
if (chunk.type === "content_block_start" && chunk.content_block) {
const event = serverToolResultEvent(chunk.content_block)
if (event) return [state, [event]] as const
}
if (chunk.type === "content_block_delta" && chunk.delta?.type === "text_delta" && chunk.delta.text) {
return [state, [{ type: "text-delta", text: chunk.delta.text }]] as const
}
if (chunk.type === "content_block_delta" && chunk.delta?.type === "thinking_delta" && chunk.delta.thinking) {
return [state, [{ type: "reasoning-delta", text: chunk.delta.thinking }]] as const
}
if (chunk.type === "content_block_delta" && chunk.delta?.type === "signature_delta" && chunk.delta.signature) {
return [state, [{ type: "reasoning-delta", text: "", encrypted: chunk.delta.signature }]] as const
}
if (chunk.type === "content_block_delta" && chunk.delta?.type === "input_json_delta" && chunk.index !== undefined) {
if (!chunk.delta.partial_json) return [state, []] as const
const current = state.tools[chunk.index]
if (!current) {
return yield* ProviderShared.chunkError(ADAPTER, "Anthropic Messages tool argument delta is missing its tool call")
}
const next = { ...current, input: `${current.input}${chunk.delta.partial_json}` }
return [{ ...state, tools: { ...state.tools, [chunk.index]: next } }, [
{ type: "tool-input-delta" as const, id: next.id, name: next.name, text: chunk.delta.partial_json },
]] as const
}
if (chunk.type === "content_block_stop" && chunk.index !== undefined) {
const events = yield* finishToolCall(state.tools[chunk.index])
const { [chunk.index]: _, ...tools } = state.tools
return [{ ...state, tools }, events] as const
}
if (chunk.type === "message_delta") {
const usage = mergeUsage(state.usage, mapUsage(chunk.usage))
return [{ ...state, usage }, [{ type: "request-finish" as const, reason: mapFinishReason(chunk.delta?.stop_reason), usage }]] as const
}
if (chunk.type === "error") {
return [state, [{ type: "provider-error" as const, message: chunk.error?.message ?? "Anthropic Messages stream error" }]] as const
}
return [state, []] as const
})
/**
* The Anthropic Messages protocol — request lowering, target validation,
* body encoding, and the streaming-chunk state machine. Used by native
* Anthropic Cloud and (once registered) Vertex Anthropic / Bedrock-hosted
* Anthropic passthrough.
*/
export const protocol = Protocol.define<
AnthropicMessagesDraft,
AnthropicMessagesTarget,
string,
AnthropicChunk,
ParserState
>({
id: "anthropic-messages",
prepare,
validate: ProviderShared.validateWith(decodeTarget),
encode: encodeTarget,
redact: (target) => target,
decode: decodeChunk,
initial: () => ({ tools: {} }),
process: processChunk,
streamReadError: "Failed to read Anthropic Messages stream",
})
export const adapter = Adapter.fromProtocol({
id: ADAPTER,
protocol,
endpoint: Endpoint.baseURL({ default: "https://api.anthropic.com/v1", path: "/messages" }),
auth: Auth.apiKeyHeader("x-api-key"),
framing: Framing.sse,
headers: () => ({ "anthropic-version": "2023-06-01" }),
})
export const model = (input: AnthropicMessagesModelInput) =>
llmModel({
...input,
provider: "anthropic",
protocol: "anthropic-messages",
capabilities: input.capabilities ?? capabilities({
output: { reasoning: true },
tools: { calls: true, streamingInput: true },
cache: { prompt: true, contentBlocks: true },
reasoning: { efforts: ["low", "medium", "high", "xhigh", "max"], summaries: false, encryptedContent: true },
}),
})
export * as AnthropicMessages from "./anthropic-messages"

View File

@@ -0,0 +1,5 @@
import { ProviderResolver } from "../provider-resolver"
export const resolver = ProviderResolver.fixed("anthropic", "anthropic-messages")
export * as Anthropic from "./anthropic"

View File

@@ -0,0 +1,27 @@
import { ProviderResolver } from "../provider-resolver"
import { ProviderID } from "../schema"
export const id = ProviderID.make("azure")
const stringOption = (options: Record<string, unknown>, key: string) => {
const value = options[key]
if (typeof value === "string" && value.trim() !== "") return value
return undefined
}
const baseURL = (options: Record<string, unknown>) => {
const resource = stringOption(options, "resourceName")
if (!resource) return undefined
return `https://${resource}.openai.azure.com/openai/v1`
}
export const resolver = ProviderResolver.define({
id,
resolve: (input) =>
ProviderResolver.make(id, input.options.useCompletionUrls === true ? "openai-chat" : "openai-responses", {
baseURL: baseURL(input.options),
queryParams: { "api-version": stringOption(input.options, "apiVersion") ?? "v1" },
}),
})
export * as Azure from "./azure"

View File

@@ -0,0 +1,855 @@
import { EventStreamCodec } from "@smithy/eventstream-codec"
import { fromUtf8, toUtf8 } from "@smithy/util-utf8"
import { AwsV4Signer } from "aws4fetch"
import { Effect, Option, Schema, Stream } from "effect"
import { Adapter } from "../adapter"
import { Auth } from "../auth"
import { Endpoint } from "../endpoint"
import type { Framing } from "../framing"
import { capabilities, model as llmModel, type ModelInput } from "../llm"
import { Protocol } from "../protocol"
import {
Usage,
type CacheHint,
type FinishReason,
type LLMEvent,
type LLMRequest,
type MediaPart,
type ProviderChunkError,
type ToolCallPart,
type ToolDefinition,
type ToolResultPart,
} from "../schema"
import { ProviderShared } from "./shared"
const ADAPTER = "bedrock-converse"
/**
* AWS credentials for SigV4 signing. Bedrock also supports Bearer API key auth
* — pass the key as `model.headers.authorization = "Bearer <key>"` to take that
* path instead. STS-vended credentials should be refreshed by the consumer
* (rebuild the model) before they expire; the adapter does not refresh.
*/
export interface BedrockCredentials {
readonly region: string
readonly accessKeyId: string
readonly secretAccessKey: string
readonly sessionToken?: string
}
export type BedrockConverseModelInput = Omit<ModelInput, "provider" | "protocol" | "headers"> & {
/**
* Bearer API key (Bedrock's newer API key auth). Sets the `Authorization`
* header and bypasses SigV4 signing. Mutually exclusive with `credentials`.
*/
readonly apiKey?: string
/**
* AWS credentials for SigV4 signing. The adapter signs each request at
* `toHttp` time using `aws4fetch`. Mutually exclusive with `apiKey`.
*/
readonly credentials?: BedrockCredentials
readonly headers?: Record<string, string>
}
const BedrockTextBlock = Schema.Struct({
text: Schema.String,
})
type BedrockTextBlock = Schema.Schema.Type<typeof BedrockTextBlock>
const BedrockToolUseBlock = Schema.Struct({
toolUse: Schema.Struct({
toolUseId: Schema.String,
name: Schema.String,
input: Schema.Unknown,
}),
})
type BedrockToolUseBlock = Schema.Schema.Type<typeof BedrockToolUseBlock>
const BedrockToolResultContentItem = Schema.Union([
Schema.Struct({ text: Schema.String }),
Schema.Struct({ json: Schema.Unknown }),
])
const BedrockToolResultBlock = Schema.Struct({
toolResult: Schema.Struct({
toolUseId: Schema.String,
content: Schema.Array(BedrockToolResultContentItem),
status: Schema.optional(Schema.Literals(["success", "error"])),
}),
})
type BedrockToolResultBlock = Schema.Schema.Type<typeof BedrockToolResultBlock>
const BedrockReasoningBlock = Schema.Struct({
reasoningContent: Schema.Struct({
reasoningText: Schema.optional(
Schema.Struct({
text: Schema.String,
signature: Schema.optional(Schema.String),
}),
),
}),
})
// Image block. Bedrock Converse accepts `format` as the file extension and
// `source.bytes` as a base64 string (binary upload via base64 in the JSON
// wire format). Supported formats per the Converse docs: png, jpeg, gif, webp.
const BedrockImageFormat = Schema.Literals(["png", "jpeg", "gif", "webp"])
type BedrockImageFormat = Schema.Schema.Type<typeof BedrockImageFormat>
const BedrockImageBlock = Schema.Struct({
image: Schema.Struct({
format: BedrockImageFormat,
source: Schema.Struct({ bytes: Schema.String }),
}),
})
type BedrockImageBlock = Schema.Schema.Type<typeof BedrockImageBlock>
// Document block. Required `name` is the user-facing filename so the model
// can reference it. Supported formats per the Converse docs: pdf, csv, doc,
// docx, xls, xlsx, html, txt, md.
const BedrockDocumentFormat = Schema.Literals([
"pdf",
"csv",
"doc",
"docx",
"xls",
"xlsx",
"html",
"txt",
"md",
])
type BedrockDocumentFormat = Schema.Schema.Type<typeof BedrockDocumentFormat>
const BedrockDocumentBlock = Schema.Struct({
document: Schema.Struct({
format: BedrockDocumentFormat,
name: Schema.String,
source: Schema.Struct({ bytes: Schema.String }),
}),
})
type BedrockDocumentBlock = Schema.Schema.Type<typeof BedrockDocumentBlock>
// Cache breakpoint marker. Inserted positionally between content blocks (or
// after a system text / tool spec) to mark the prefix as cacheable. Bedrock
// Converse currently exposes `default` as the only cache-point type.
const BedrockCachePointBlock = Schema.Struct({
cachePoint: Schema.Struct({ type: Schema.Literal("default") }),
})
type BedrockCachePointBlock = Schema.Schema.Type<typeof BedrockCachePointBlock>
const BedrockUserBlock = Schema.Union([
BedrockTextBlock,
BedrockImageBlock,
BedrockDocumentBlock,
BedrockToolResultBlock,
BedrockCachePointBlock,
])
type BedrockUserBlock = Schema.Schema.Type<typeof BedrockUserBlock>
const BedrockAssistantBlock = Schema.Union([
BedrockTextBlock,
BedrockReasoningBlock,
BedrockToolUseBlock,
BedrockCachePointBlock,
])
type BedrockAssistantBlock = Schema.Schema.Type<typeof BedrockAssistantBlock>
const BedrockMessage = Schema.Union([
Schema.Struct({ role: Schema.Literal("user"), content: Schema.Array(BedrockUserBlock) }),
Schema.Struct({ role: Schema.Literal("assistant"), content: Schema.Array(BedrockAssistantBlock) }),
])
type BedrockMessage = Schema.Schema.Type<typeof BedrockMessage>
const BedrockSystemBlock = Schema.Union([BedrockTextBlock, BedrockCachePointBlock])
type BedrockSystemBlock = Schema.Schema.Type<typeof BedrockSystemBlock>
const BedrockTool = Schema.Struct({
toolSpec: Schema.Struct({
name: Schema.String,
description: Schema.String,
inputSchema: Schema.Struct({
json: Schema.Record(Schema.String, Schema.Unknown),
}),
}),
})
type BedrockTool = Schema.Schema.Type<typeof BedrockTool>
const BedrockToolChoice = Schema.Union([
Schema.Struct({ auto: Schema.Struct({}) }),
Schema.Struct({ any: Schema.Struct({}) }),
Schema.Struct({ tool: Schema.Struct({ name: Schema.String }) }),
])
const BedrockTargetFields = {
modelId: Schema.String,
messages: Schema.Array(BedrockMessage),
system: Schema.optional(Schema.Array(BedrockSystemBlock)),
inferenceConfig: Schema.optional(
Schema.Struct({
maxTokens: Schema.optional(Schema.Number),
temperature: Schema.optional(Schema.Number),
topP: Schema.optional(Schema.Number),
stopSequences: Schema.optional(Schema.Array(Schema.String)),
}),
),
toolConfig: Schema.optional(
Schema.Struct({
tools: Schema.Array(BedrockTool),
toolChoice: Schema.optional(BedrockToolChoice),
}),
),
additionalModelRequestFields: Schema.optional(Schema.Record(Schema.String, Schema.Unknown)),
}
const BedrockConverseDraft = Schema.Struct(BedrockTargetFields)
type BedrockConverseDraft = Schema.Schema.Type<typeof BedrockConverseDraft>
const BedrockConverseTarget = Schema.Struct(BedrockTargetFields)
export type BedrockConverseTarget = Schema.Schema.Type<typeof BedrockConverseTarget>
const BedrockUsageSchema = Schema.Struct({
inputTokens: Schema.optional(Schema.Number),
outputTokens: Schema.optional(Schema.Number),
totalTokens: Schema.optional(Schema.Number),
cacheReadInputTokens: Schema.optional(Schema.Number),
cacheWriteInputTokens: Schema.optional(Schema.Number),
})
type BedrockUsageSchema = Schema.Schema.Type<typeof BedrockUsageSchema>
// Streaming chunk shape — the AWS event stream wraps each JSON payload by its
// `:event-type` header (e.g. `messageStart`, `contentBlockDelta`). We
// reconstruct that wrapping in `decodeFrames` below so the chunk schema can
// stay a plain discriminated record.
const BedrockChunk = Schema.Struct({
messageStart: Schema.optional(Schema.Struct({ role: Schema.String })),
contentBlockStart: Schema.optional(
Schema.Struct({
contentBlockIndex: Schema.Number,
start: Schema.optional(
Schema.Struct({
toolUse: Schema.optional(
Schema.Struct({ toolUseId: Schema.String, name: Schema.String }),
),
}),
),
}),
),
contentBlockDelta: Schema.optional(
Schema.Struct({
contentBlockIndex: Schema.Number,
delta: Schema.optional(
Schema.Struct({
text: Schema.optional(Schema.String),
toolUse: Schema.optional(Schema.Struct({ input: Schema.String })),
reasoningContent: Schema.optional(
Schema.Struct({
text: Schema.optional(Schema.String),
signature: Schema.optional(Schema.String),
}),
),
}),
),
}),
),
contentBlockStop: Schema.optional(Schema.Struct({ contentBlockIndex: Schema.Number })),
messageStop: Schema.optional(
Schema.Struct({
stopReason: Schema.String,
additionalModelResponseFields: Schema.optional(Schema.Unknown),
}),
),
metadata: Schema.optional(
Schema.Struct({
usage: Schema.optional(BedrockUsageSchema),
metrics: Schema.optional(Schema.Unknown),
}),
),
internalServerException: Schema.optional(Schema.Struct({ message: Schema.String })),
modelStreamErrorException: Schema.optional(Schema.Struct({ message: Schema.String })),
validationException: Schema.optional(Schema.Struct({ message: Schema.String })),
throttlingException: Schema.optional(Schema.Struct({ message: Schema.String })),
serviceUnavailableException: Schema.optional(Schema.Struct({ message: Schema.String })),
})
type BedrockChunk = Schema.Schema.Type<typeof BedrockChunk>
// The eventstream codec already gives us a UTF-8 payload that we parse once
// per frame; we then wrap it under the `:event-type` key and hand the parsed
// object to `decodeChunkSync`. This keeps a single JSON parse per frame —
// avoid `Schema.fromJsonString` here which would add an extra decode/encode
// roundtrip.
const decodeChunkSync = Schema.decodeUnknownSync(BedrockChunk)
const decodeChunk = (data: unknown) =>
Effect.try({
try: () => decodeChunkSync(data),
catch: () =>
ProviderShared.chunkError(
ADAPTER,
"Invalid Bedrock Converse stream chunk",
typeof data === "string" ? data : ProviderShared.encodeJson(data),
),
})
const encodeTarget = Schema.encodeSync(Schema.fromJsonString(BedrockConverseTarget))
const decodeTarget = Schema.decodeUnknownEffect(BedrockConverseDraft.pipe(Schema.decodeTo(BedrockConverseTarget)))
const invalid = ProviderShared.invalidRequest
const region = (request: LLMRequest) => {
const fromNative = request.model.native?.aws_region
if (typeof fromNative === "string" && fromNative !== "") return fromNative
return "us-east-1"
}
const lowerTool = (tool: ToolDefinition): BedrockTool => ({
toolSpec: {
name: tool.name,
description: tool.description,
inputSchema: { json: tool.inputSchema },
},
})
// Bedrock cache markers are positional — emit a `cachePoint` block right after
// the content the caller wants treated as a cacheable prefix. Bedrock currently
// exposes one cache-point type (`default`); both `ephemeral` and `persistent`
// hints from the common `CacheHint` shape map onto it. Other cache-hint types
// (none today) would need explicit handling.
//
// TODO: Bedrock recently added optional `ttl: "5m" | "1h"` on cachePoint —
// once we have a recorded cassette to validate the wire shape, map
// `CacheHint.ttlSeconds` here.
const CACHE_POINT_DEFAULT: BedrockCachePointBlock = { cachePoint: { type: "default" } }
const cachePointBlock = (cache: CacheHint | undefined): BedrockCachePointBlock | undefined => {
if (cache?.type !== "ephemeral" && cache?.type !== "persistent") return undefined
return CACHE_POINT_DEFAULT
}
// Emit a text block followed by an optional positional cache marker. Used by
// system, user-text, and assistant-text lowering — all three share the same
// "push text, push cachePoint if cache hint is present" shape. The return type
// is the lowest common denominator (text | cachePoint) so callers can spread
// it into any of the three block-union arrays.
const textWithCache = (
text: string,
cache: CacheHint | undefined,
): Array<BedrockTextBlock | BedrockCachePointBlock> => {
const cachePoint = cachePointBlock(cache)
return cachePoint ? [{ text }, cachePoint] : [{ text }]
}
// MIME type → Bedrock format mapping. Bedrock distinguishes image vs document
// by the top-level block type, not the mediaType, so `lowerMedia` routes by
// the `image/` prefix and the leaf functions look up the format. `image/jpg`
// is included as a non-standard alias commonly seen in user-supplied data.
const IMAGE_FORMATS = {
"image/png": "png",
"image/jpeg": "jpeg",
"image/jpg": "jpeg",
"image/gif": "gif",
"image/webp": "webp",
} as const satisfies Record<string, BedrockImageFormat>
const DOCUMENT_FORMATS = {
"application/pdf": "pdf",
"text/csv": "csv",
"application/msword": "doc",
"application/vnd.openxmlformats-officedocument.wordprocessingml.document": "docx",
"application/vnd.ms-excel": "xls",
"application/vnd.openxmlformats-officedocument.spreadsheetml.sheet": "xlsx",
"text/html": "html",
"text/plain": "txt",
"text/markdown": "md",
} as const satisfies Record<string, BedrockDocumentFormat>
// Bedrock document blocks require a name; default to the filename if the
// caller supplied one, otherwise generate a stable placeholder so the model
// still sees a valid block.
const lowerImage = (part: MediaPart, mime: string) => {
const format = IMAGE_FORMATS[mime as keyof typeof IMAGE_FORMATS]
if (!format) return invalid(`Bedrock Converse does not support image media type ${part.mediaType}`)
return Effect.succeed<BedrockImageBlock>({
image: { format, source: { bytes: ProviderShared.mediaBytes(part) } },
})
}
const lowerDocument = (part: MediaPart, mime: string) => {
const format = DOCUMENT_FORMATS[mime as keyof typeof DOCUMENT_FORMATS]
if (!format) return invalid(`Bedrock Converse does not support document media type ${part.mediaType}`)
return Effect.succeed<BedrockDocumentBlock>({
document: {
format,
name: part.filename ?? `document.${format}`,
source: { bytes: ProviderShared.mediaBytes(part) },
},
})
}
const lowerMedia = (part: MediaPart) => {
const mime = part.mediaType.toLowerCase()
return mime.startsWith("image/") ? lowerImage(part, mime) : lowerDocument(part, mime)
}
const lowerToolChoice = Effect.fn("BedrockConverse.lowerToolChoice")(function* (
toolChoice: NonNullable<LLMRequest["toolChoice"]>,
) {
if (toolChoice.type === "none") return undefined
if (toolChoice.type === "required") return { any: {} } as const
if (toolChoice.type !== "tool") return { auto: {} } as const
if (!toolChoice.name) return yield* invalid("Bedrock Converse tool choice requires a tool name")
return { tool: { name: toolChoice.name } } as const
})
const lowerToolCall = (part: ToolCallPart): BedrockToolUseBlock => ({
toolUse: {
toolUseId: part.id,
name: part.name,
input: part.input,
},
})
const lowerToolResult = (part: ToolResultPart): BedrockToolResultBlock => ({
toolResult: {
toolUseId: part.id,
content:
part.result.type === "text" || part.result.type === "error"
? [{ text: String(part.result.value) }]
: [{ json: part.result.value }],
status: part.result.type === "error" ? "error" : "success",
},
})
const lowerMessages = Effect.fn("BedrockConverse.lowerMessages")(function* (request: LLMRequest) {
const messages: BedrockMessage[] = []
for (const message of request.messages) {
if (message.role === "user") {
const content: BedrockUserBlock[] = []
for (const part of message.content) {
if (part.type === "text") {
content.push(...textWithCache(part.text, part.cache))
continue
}
if (part.type === "media") {
content.push(yield* lowerMedia(part))
continue
}
return yield* invalid("Bedrock Converse user messages only support text and media content for now")
}
messages.push({ role: "user", content })
continue
}
if (message.role === "assistant") {
const content: BedrockAssistantBlock[] = []
for (const part of message.content) {
if (part.type === "text") {
content.push(...textWithCache(part.text, part.cache))
continue
}
if (part.type === "reasoning") {
content.push({
reasoningContent: {
reasoningText: { text: part.text, signature: part.encrypted },
},
})
continue
}
if (part.type === "tool-call") {
content.push(lowerToolCall(part))
continue
}
return yield* invalid("Bedrock Converse assistant messages only support text, reasoning, and tool-call content for now")
}
messages.push({ role: "assistant", content })
continue
}
const content: BedrockToolResultBlock[] = []
for (const part of message.content) {
if (part.type !== "tool-result")
return yield* invalid("Bedrock Converse tool messages only support tool-result content")
content.push(lowerToolResult(part))
}
messages.push({ role: "user", content })
}
return messages
})
// System prompts share the cache-point convention: emit the text block, then
// optionally a positional `cachePoint` marker.
const lowerSystem = (system: ReadonlyArray<LLMRequest["system"][number]>): BedrockSystemBlock[] =>
system.flatMap((part) => textWithCache(part.text, part.cache))
const prepare = Effect.fn("BedrockConverse.prepare")(function* (request: LLMRequest) {
const toolChoice = request.toolChoice ? yield* lowerToolChoice(request.toolChoice) : undefined
return {
modelId: request.model.id,
messages: yield* lowerMessages(request),
system: request.system.length === 0 ? undefined : lowerSystem(request.system),
inferenceConfig:
request.generation.maxTokens === undefined &&
request.generation.temperature === undefined &&
request.generation.topP === undefined &&
(request.generation.stop === undefined || request.generation.stop.length === 0)
? undefined
: {
maxTokens: request.generation.maxTokens,
temperature: request.generation.temperature,
topP: request.generation.topP,
stopSequences: request.generation.stop,
},
toolConfig:
request.tools.length > 0 && request.toolChoice?.type !== "none"
? { tools: request.tools.map(lowerTool), toolChoice }
: undefined,
}
})
// Credentials live on `model.native.aws_credentials` so the OpenCode bridge
// can resolve them via `@aws-sdk/credential-providers` and stuff them in
// without exposing the auth machinery to the rest of the LLM core. Schema
// decode keeps this boundary honest — anything that doesn't match the shape
// is treated as "no credentials".
const NativeCredentials = Schema.Struct({
accessKeyId: Schema.String,
secretAccessKey: Schema.String,
region: Schema.optional(Schema.String),
sessionToken: Schema.optional(Schema.String),
})
const decodeNativeCredentials = Schema.decodeUnknownOption(NativeCredentials)
const credentialsFromInput = (request: LLMRequest): BedrockCredentials | undefined =>
decodeNativeCredentials(request.model.native?.aws_credentials).pipe(
Option.map((creds) => ({ ...creds, region: creds.region ?? region(request) })),
Option.getOrUndefined,
)
const signRequest = (input: {
readonly url: string
readonly body: string
readonly headers: Record<string, string>
readonly credentials: BedrockCredentials
}) =>
Effect.tryPromise({
try: async () => {
const signed = await new AwsV4Signer({
url: input.url,
method: "POST",
headers: Object.entries(input.headers),
body: input.body,
region: input.credentials.region,
accessKeyId: input.credentials.accessKeyId,
secretAccessKey: input.credentials.secretAccessKey,
sessionToken: input.credentials.sessionToken,
service: "bedrock",
}).sign()
return Object.fromEntries(signed.headers.entries())
},
catch: (error) =>
invalid(`Bedrock Converse SigV4 signing failed: ${error instanceof Error ? error.message : String(error)}`),
})
/**
* Bedrock auth. `model.apiKey` (Bedrock's newer Bearer API key auth) wins if
* set; otherwise we sign the request with SigV4 using AWS credentials from
* `model.native.aws_credentials`. SigV4 must sign the exact bytes that get
* sent, so the `content-type: application/json` header is included in the
* signing input — `jsonPost` then sets the same value below and the signature
* stays valid.
*/
const auth: Auth = (input) => {
if (input.request.model.apiKey) return Auth.bearer(input)
return Effect.gen(function* () {
const credentials = credentialsFromInput(input.request)
if (!credentials) {
return yield* invalid(
"Bedrock Converse requires either model.apiKey or AWS credentials in model.native.aws_credentials",
)
}
const headersForSigning = { ...input.headers, "content-type": "application/json" }
const signed = yield* signRequest({ url: input.url, body: input.body, headers: headersForSigning, credentials })
return { ...headersForSigning, ...signed }
})
}
const mapFinishReason = (reason: string): FinishReason => {
if (reason === "end_turn" || reason === "stop_sequence") return "stop"
if (reason === "max_tokens") return "length"
if (reason === "tool_use") return "tool-calls"
if (reason === "content_filtered" || reason === "guardrail_intervened") return "content-filter"
return "unknown"
}
const mapUsage = (usage: BedrockUsageSchema | undefined): Usage | undefined => {
if (!usage) return undefined
return new Usage({
inputTokens: usage.inputTokens,
outputTokens: usage.outputTokens,
totalTokens: ProviderShared.totalTokens(usage.inputTokens, usage.outputTokens, usage.totalTokens),
cacheReadInputTokens: usage.cacheReadInputTokens,
cacheWriteInputTokens: usage.cacheWriteInputTokens,
native: usage,
})
}
interface ParserState {
readonly tools: Record<number, ProviderShared.ToolAccumulator>
// Bedrock splits the finish into `messageStop` (carries `stopReason`) and
// `metadata` (carries usage). The raw stop reason is held here until
// `metadata` arrives, then mapped + emitted together as a single terminal
// `request-finish` event so consumers see one event with both.
readonly pendingStopReason: string | undefined
}
const finishToolCall = (tool: ProviderShared.ToolAccumulator | undefined) =>
Effect.gen(function* () {
if (!tool) return [] as ReadonlyArray<LLMEvent>
const input = yield* ProviderShared.parseToolInput(ADAPTER, tool.name, tool.input)
return [{ type: "tool-call" as const, id: tool.id, name: tool.name, input }]
})
const processChunk = (state: ParserState, chunk: BedrockChunk) =>
Effect.gen(function* () {
if (chunk.contentBlockStart?.start?.toolUse) {
const index = chunk.contentBlockStart.contentBlockIndex
return [
{
...state,
tools: {
...state.tools,
[index]: {
id: chunk.contentBlockStart.start.toolUse.toolUseId,
name: chunk.contentBlockStart.start.toolUse.name,
input: "",
},
},
},
[],
] as const
}
if (chunk.contentBlockDelta?.delta?.text) {
return [state, [{ type: "text-delta" as const, text: chunk.contentBlockDelta.delta.text }]] as const
}
if (chunk.contentBlockDelta?.delta?.reasoningContent?.text) {
return [
state,
[{ type: "reasoning-delta" as const, text: chunk.contentBlockDelta.delta.reasoningContent.text }],
] as const
}
if (chunk.contentBlockDelta?.delta?.toolUse) {
const index = chunk.contentBlockDelta.contentBlockIndex
const current = state.tools[index]
if (!current) {
return yield* ProviderShared.chunkError(ADAPTER, "Bedrock Converse tool delta is missing its tool call")
}
const next = { ...current, input: `${current.input}${chunk.contentBlockDelta.delta.toolUse.input}` }
return [
{ ...state, tools: { ...state.tools, [index]: next } },
[
{
type: "tool-input-delta" as const,
id: next.id,
name: next.name,
text: chunk.contentBlockDelta.delta.toolUse.input,
},
],
] as const
}
if (chunk.contentBlockStop) {
const events = yield* finishToolCall(state.tools[chunk.contentBlockStop.contentBlockIndex])
const { [chunk.contentBlockStop.contentBlockIndex]: _, ...tools } = state.tools
return [{ ...state, tools }, events] as const
}
if (chunk.messageStop) {
// Stash the reason — emit `request-finish` once `metadata` arrives with
// usage, so consumers see one terminal event carrying both. If metadata
// never arrives the `onHalt` fallback emits a usage-less finish.
return [{ ...state, pendingStopReason: chunk.messageStop.stopReason }, []] as const
}
if (chunk.metadata) {
const reason = state.pendingStopReason ? mapFinishReason(state.pendingStopReason) : "stop"
const usage = mapUsage(chunk.metadata.usage)
return [
{ ...state, pendingStopReason: undefined },
[{ type: "request-finish" as const, reason, usage }],
] as const
}
if (chunk.internalServerException || chunk.modelStreamErrorException || chunk.serviceUnavailableException) {
const message =
chunk.internalServerException?.message ??
chunk.modelStreamErrorException?.message ??
chunk.serviceUnavailableException?.message ??
"Bedrock Converse stream error"
return [state, [{ type: "provider-error" as const, message, retryable: true }]] as const
}
if (chunk.validationException || chunk.throttlingException) {
const message =
chunk.validationException?.message ?? chunk.throttlingException?.message ?? "Bedrock Converse error"
return [
state,
[{ type: "provider-error" as const, message, retryable: chunk.throttlingException !== undefined }],
] as const
}
return [state, []] as const
})
// Bedrock streams responses using the AWS event stream binary protocol — each
// frame is `[length:4][headers-length:4][prelude-crc:4][headers][payload][crc:4]`.
// We use `@smithy/eventstream-codec` to validate framing and CRCs, then
// reconstruct the JSON wrapping by `:event-type` so the chunk schema can match.
const eventCodec = new EventStreamCodec(toUtf8, fromUtf8)
const utf8 = new TextDecoder()
// Cursor-tracking buffer state. Bytes accumulate in `buffer`; `offset` is the
// read position. Reading by `subarray` is zero-copy. We only allocate a fresh
// buffer when (a) a new network chunk arrives and we need to append, or (b)
// the consumed prefix is more than half the buffer (compaction).
interface FrameBufferState {
readonly buffer: Uint8Array
readonly offset: number
}
const initialFrameBuffer: FrameBufferState = { buffer: new Uint8Array(0), offset: 0 }
const appendChunk = (state: FrameBufferState, chunk: Uint8Array): FrameBufferState => {
const remaining = state.buffer.length - state.offset
if (remaining === 0) return { buffer: chunk, offset: 0 }
// Compact: drop the consumed prefix and append the new chunk in one alloc.
// This bounds buffer growth to at most one network chunk past the live
// window, regardless of stream length.
const next = new Uint8Array(remaining + chunk.length)
next.set(state.buffer.subarray(state.offset), 0)
next.set(chunk, remaining)
return { buffer: next, offset: 0 }
}
const consumeFrames = (state: FrameBufferState, chunk: Uint8Array) =>
Effect.gen(function* () {
let cursor = appendChunk(state, chunk)
const out: object[] = []
while (cursor.buffer.length - cursor.offset >= 4) {
const view = cursor.buffer.subarray(cursor.offset)
const totalLength = new DataView(view.buffer, view.byteOffset, view.byteLength).getUint32(0, false)
if (view.length < totalLength) break
const decoded = yield* Effect.try({
try: () => eventCodec.decode(view.subarray(0, totalLength)),
catch: (error) =>
ProviderShared.chunkError(
ADAPTER,
`Failed to decode Bedrock Converse event-stream frame: ${
error instanceof Error ? error.message : String(error)
}`,
),
})
cursor = { buffer: cursor.buffer, offset: cursor.offset + totalLength }
if (decoded.headers[":message-type"]?.value !== "event") continue
const eventType = decoded.headers[":event-type"]?.value
if (typeof eventType !== "string") continue
const payload = utf8.decode(decoded.body)
if (!payload) continue
// The AWS event stream pads short payloads with a `p` field. Drop it
// before handing the object to the chunk schema. JSON decode goes
// through the shared Schema-driven codec to satisfy the package rule
// against ad-hoc `JSON.parse` calls.
const parsed = (yield* ProviderShared.parseJson(
ADAPTER,
payload,
"Failed to parse Bedrock Converse event-stream payload",
)) as Record<string, unknown>
delete parsed.p
out.push({ [eventType]: parsed })
}
return [cursor, out] as const
})
/**
* AWS event-stream framing for Bedrock Converse. Each frame is decoded by
* `@smithy/eventstream-codec` (length + header + payload + CRC) and rewrapped
* under its `:event-type` header so the chunk schema can match the JSON
* payload directly. Reusable for any AWS service that wraps JSON payloads in
* event-stream frames keyed by `:event-type`.
*/
const framing: Framing<object> = {
id: "aws-event-stream",
frame: (bytes) => bytes.pipe(Stream.mapAccumEffect(() => initialFrameBuffer, consumeFrames)),
}
// If a stream ends after `messageStop` but before `metadata` (rare but
// possible on truncated transports), still surface a terminal finish.
const onHalt = (state: ParserState): ReadonlyArray<LLMEvent> =>
state.pendingStopReason
? [{ type: "request-finish", reason: mapFinishReason(state.pendingStopReason) }]
: []
/**
* The Bedrock Converse protocol — request lowering, target validation,
* body encoding, and the streaming-chunk state machine.
*/
export const protocol = Protocol.define<
BedrockConverseDraft,
BedrockConverseTarget,
object,
BedrockChunk,
ParserState
>({
id: "bedrock-converse",
prepare,
validate: ProviderShared.validateWith(decodeTarget),
encode: encodeTarget,
redact: (target) => target,
decode: decodeChunk,
initial: () => ({ tools: {}, pendingStopReason: undefined }),
process: processChunk,
onHalt,
streamReadError: "Failed to read Bedrock Converse stream",
})
export const adapter = Adapter.fromProtocol({
id: ADAPTER,
protocol,
endpoint: Endpoint.baseURL({
// Bedrock's URL embeds the region in the host and the validated modelId
// in the path. We reach into the target after target patches so the URL
// matches the body that gets signed.
default: ({ request }) => `https://bedrock-runtime.${region(request)}.amazonaws.com`,
path: ({ target }) => `/model/${encodeURIComponent(target.modelId)}/converse-stream`,
}),
auth,
framing,
})
export const model = (input: BedrockConverseModelInput) => {
const { credentials, ...rest } = input
return llmModel({
...rest,
provider: "bedrock",
protocol: "bedrock-converse",
capabilities:
input.capabilities ??
capabilities({
output: { reasoning: true },
tools: { calls: true, streamingInput: true },
cache: { prompt: true, contentBlocks: true },
}),
native: credentials
? {
...input.native,
aws_credentials: credentials,
aws_region: credentials.region,
}
: input.native,
})
}
export * as BedrockConverse from "./bedrock-converse"

View File

@@ -0,0 +1,521 @@
import { Effect, Schema } from "effect"
import { Adapter } from "../adapter"
import { Auth } from "../auth"
import { Endpoint } from "../endpoint"
import { Framing } from "../framing"
import { capabilities, model as llmModel, type ModelInput } from "../llm"
import { Protocol } from "../protocol"
import {
Usage,
type FinishReason,
type LLMEvent,
type LLMRequest,
type MediaPart,
type ReasoningEffort,
type TextPart,
type ToolCallPart,
type ToolDefinition,
} from "../schema"
import { ProviderShared } from "./shared"
const ADAPTER = "gemini"
export type GeminiModelInput = Omit<ModelInput, "provider" | "protocol" | "headers"> & {
readonly apiKey?: string
readonly headers?: Record<string, string>
}
const GeminiTextPart = Schema.Struct({
text: Schema.String,
thought: Schema.optional(Schema.Boolean),
thoughtSignature: Schema.optional(Schema.String),
})
const GeminiInlineDataPart = Schema.Struct({
inlineData: Schema.Struct({
mimeType: Schema.String,
data: Schema.String,
}),
})
const GeminiFunctionCallPart = Schema.Struct({
functionCall: Schema.Struct({
id: Schema.optional(Schema.String),
name: Schema.String,
args: Schema.Unknown,
}),
thoughtSignature: Schema.optional(Schema.String),
})
const GeminiFunctionResponsePart = Schema.Struct({
functionResponse: Schema.Struct({
id: Schema.optional(Schema.String),
name: Schema.String,
response: Schema.Unknown,
}),
})
const GeminiContentPart = Schema.Union([
GeminiTextPart,
GeminiInlineDataPart,
GeminiFunctionCallPart,
GeminiFunctionResponsePart,
])
const GeminiContent = Schema.Struct({
role: Schema.Literals(["user", "model"]),
parts: Schema.Array(GeminiContentPart),
})
type GeminiContent = Schema.Schema.Type<typeof GeminiContent>
const GeminiSystemInstruction = Schema.Struct({
parts: Schema.Array(Schema.Struct({ text: Schema.String })),
})
const GeminiFunctionDeclaration = Schema.Struct({
name: Schema.String,
description: Schema.String,
parameters: Schema.optional(Schema.Record(Schema.String, Schema.Unknown)),
})
const GeminiTool = Schema.Struct({
functionDeclarations: Schema.Array(GeminiFunctionDeclaration),
})
const GeminiToolConfig = Schema.Struct({
functionCallingConfig: Schema.Struct({
mode: Schema.Literals(["AUTO", "NONE", "ANY"]),
allowedFunctionNames: Schema.optional(Schema.Array(Schema.String)),
}),
})
const GeminiThinkingConfig = Schema.Struct({
thinkingBudget: Schema.optional(Schema.Number),
includeThoughts: Schema.optional(Schema.Boolean),
})
const GeminiGenerationConfig = Schema.Struct({
maxOutputTokens: Schema.optional(Schema.Number),
temperature: Schema.optional(Schema.Number),
topP: Schema.optional(Schema.Number),
stopSequences: Schema.optional(Schema.Array(Schema.String)),
thinkingConfig: Schema.optional(GeminiThinkingConfig),
})
const GeminiTargetFields = {
contents: Schema.Array(GeminiContent),
systemInstruction: Schema.optional(GeminiSystemInstruction),
tools: Schema.optional(Schema.Array(GeminiTool)),
toolConfig: Schema.optional(GeminiToolConfig),
generationConfig: Schema.optional(GeminiGenerationConfig),
}
const GeminiDraft = Schema.Struct(GeminiTargetFields)
type GeminiDraft = Schema.Schema.Type<typeof GeminiDraft>
const GeminiTarget = Schema.Struct(GeminiTargetFields)
export type GeminiTarget = Schema.Schema.Type<typeof GeminiTarget>
const GeminiUsage = Schema.Struct({
cachedContentTokenCount: Schema.optional(Schema.Number),
thoughtsTokenCount: Schema.optional(Schema.Number),
promptTokenCount: Schema.optional(Schema.Number),
candidatesTokenCount: Schema.optional(Schema.Number),
totalTokenCount: Schema.optional(Schema.Number),
})
type GeminiUsage = Schema.Schema.Type<typeof GeminiUsage>
const GeminiCandidate = Schema.Struct({
content: Schema.optional(GeminiContent),
finishReason: Schema.optional(Schema.String),
})
const GeminiChunk = Schema.Struct({
candidates: Schema.optional(Schema.Array(GeminiCandidate)),
usageMetadata: Schema.optional(GeminiUsage),
})
type GeminiChunk = Schema.Schema.Type<typeof GeminiChunk>
interface ParserState {
readonly finishReason?: string
readonly hasToolCalls: boolean
readonly nextToolCallId: number
readonly usage?: Usage
}
const { encodeTarget, decodeTarget, decodeChunk } = ProviderShared.codecs({
adapter: ADAPTER,
draft: GeminiDraft,
target: GeminiTarget,
chunk: GeminiChunk,
chunkErrorMessage: "Invalid Gemini stream chunk",
})
const invalid = ProviderShared.invalidRequest
const mediaData = ProviderShared.mediaBytes
const isRecord = ProviderShared.isRecord
// Tool-schema conversion has two distinct concerns:
//
// 1. Sanitize — fix common authoring mistakes Gemini rejects: integer/number
// enums (must be strings), `required` entries that don't match a property,
// untyped arrays (`items` must be present), and `properties`/`required`
// keys on non-object scalars. Mirrors OpenCode's historical
// `ProviderTransform.schema` Gemini rules.
//
// 2. Project — lossy mapping from JSON Schema to Gemini's schema dialect:
// drop empty objects, derive `nullable: true` from `type: [..., "null"]`,
// coerce `const` to `[const]` enum, recurse properties/items, propagate
// only an allowlisted set of keys (description, required, format, type,
// properties, items, allOf, anyOf, oneOf, minLength). Anything outside the
// allowlist (e.g. `additionalProperties`, `$ref`) is silently dropped.
//
// Sanitize runs first, then project. Both passes live here so the adapter
// owns the full transformation; consumers don't need to register a patch.
const SCHEMA_INTENT_KEYS = [
"type",
"properties",
"items",
"prefixItems",
"enum",
"const",
"$ref",
"additionalProperties",
"patternProperties",
"required",
"not",
"if",
"then",
"else",
]
const hasCombiner = (schema: unknown) =>
isRecord(schema) && (Array.isArray(schema.anyOf) || Array.isArray(schema.oneOf) || Array.isArray(schema.allOf))
const hasSchemaIntent = (schema: unknown) =>
isRecord(schema) && (hasCombiner(schema) || SCHEMA_INTENT_KEYS.some((key) => key in schema))
const sanitizeToolSchemaNode = (schema: unknown): unknown => {
if (!isRecord(schema)) return Array.isArray(schema) ? schema.map(sanitizeToolSchemaNode) : schema
const result: Record<string, unknown> = Object.fromEntries(
Object.entries(schema).map(([key, value]) =>
[key, key === "enum" && Array.isArray(value) ? value.map(String) : sanitizeToolSchemaNode(value)],
),
)
// Integer/number enums become string enums on the wire — Gemini rejects
// numeric enum values. The `enum` map above already coerced the values;
// this rewrites the type to match.
if (Array.isArray(result.enum) && (result.type === "integer" || result.type === "number")) result.type = "string"
// Filter `required` entries that don't appear in `properties` — Gemini
// rejects dangling required field references.
const properties = result.properties
if (result.type === "object" && isRecord(properties) && Array.isArray(result.required)) {
result.required = result.required.filter((field) => typeof field === "string" && field in properties)
}
// Default untyped arrays to string-typed items so Gemini has a concrete
// schema to validate against.
if (result.type === "array" && !hasCombiner(result)) {
result.items = result.items ?? {}
if (isRecord(result.items) && !hasSchemaIntent(result.items)) result.items = { ...result.items, type: "string" }
}
// Scalar schemas can't carry object-shaped keys.
if (typeof result.type === "string" && result.type !== "object" && !hasCombiner(result)) {
delete result.properties
delete result.required
}
return result
}
const emptyObjectSchema = (schema: Record<string, unknown>) =>
schema.type === "object" && (!isRecord(schema.properties) || Object.keys(schema.properties).length === 0) &&
!schema.additionalProperties
const projectToolSchemaNode = (schema: unknown): Record<string, unknown> | undefined => {
if (!isRecord(schema)) return undefined
if (emptyObjectSchema(schema)) return undefined
return Object.fromEntries(
[
["description", schema.description],
["required", schema.required],
["format", schema.format],
["type", Array.isArray(schema.type) ? schema.type.filter((type) => type !== "null")[0] : schema.type],
["nullable", Array.isArray(schema.type) && schema.type.includes("null") ? true : undefined],
["enum", schema.const !== undefined ? [schema.const] : schema.enum],
["properties", isRecord(schema.properties)
? Object.fromEntries(
Object.entries(schema.properties).map(([key, value]) => [key, projectToolSchemaNode(value)]),
)
: undefined],
["items", Array.isArray(schema.items)
? schema.items.map(projectToolSchemaNode)
: schema.items === undefined
? undefined
: projectToolSchemaNode(schema.items)],
["allOf", Array.isArray(schema.allOf) ? schema.allOf.map(projectToolSchemaNode) : undefined],
["anyOf", Array.isArray(schema.anyOf) ? schema.anyOf.map(projectToolSchemaNode) : undefined],
["oneOf", Array.isArray(schema.oneOf) ? schema.oneOf.map(projectToolSchemaNode) : undefined],
["minLength", schema.minLength],
].filter((entry) => entry[1] !== undefined),
)
}
const convertToolSchema = (schema: unknown) => projectToolSchemaNode(sanitizeToolSchemaNode(schema))
const lowerTool = (tool: ToolDefinition) => ({
name: tool.name,
description: tool.description,
parameters: convertToolSchema(tool.inputSchema),
})
const lowerToolConfig = Effect.fn("Gemini.lowerToolConfig")(function* (
toolChoice: NonNullable<LLMRequest["toolChoice"]>,
) {
if (toolChoice.type === "required") return { functionCallingConfig: { mode: "ANY" as const } }
if (toolChoice.type === "none") return { functionCallingConfig: { mode: "NONE" as const } }
if (toolChoice.type !== "tool") return { functionCallingConfig: { mode: "AUTO" as const } }
if (!toolChoice.name) return yield* invalid("Gemini tool choice requires a tool name")
return {
functionCallingConfig: { mode: "ANY" as const, allowedFunctionNames: [toolChoice.name] },
}
})
const lowerUserPart = (part: TextPart | MediaPart) =>
part.type === "text"
? { text: part.text }
: { inlineData: { mimeType: part.mediaType, data: mediaData(part) } }
const thoughtSignature = (metadata: Record<string, unknown> | undefined) =>
isRecord(metadata?.google) && typeof metadata.google.thoughtSignature === "string"
? metadata.google.thoughtSignature
: undefined
const withThoughtSignature = (signature: string | undefined) => signature ? { thoughtSignature: signature } : {}
const lowerToolCall = (part: ToolCallPart) => ({
functionCall: { id: part.id, name: part.name, args: part.input },
...withThoughtSignature(thoughtSignature(part.metadata)),
})
const lowerMessages = Effect.fn("Gemini.lowerMessages")(function* (request: LLMRequest) {
const contents: GeminiContent[] = []
for (const message of request.messages) {
if (message.role === "user") {
const parts: Array<Schema.Schema.Type<typeof GeminiContentPart>> = []
for (const part of message.content) {
if (part.type !== "text" && part.type !== "media")
return yield* invalid("Gemini user messages only support text and media content for now")
parts.push(lowerUserPart(part))
}
contents.push({ role: "user", parts })
continue
}
if (message.role === "assistant") {
const parts: Array<Schema.Schema.Type<typeof GeminiContentPart>> = []
for (const part of message.content) {
if (part.type === "text") {
parts.push({ text: part.text, ...withThoughtSignature(thoughtSignature(part.metadata)) })
continue
}
if (part.type === "reasoning") {
parts.push({ text: part.text, thought: true, ...withThoughtSignature(thoughtSignature(part.metadata)) })
continue
}
if (part.type === "tool-call") {
parts.push(lowerToolCall(part))
continue
}
return yield* invalid("Gemini assistant messages only support text, reasoning, and tool-call content for now")
}
contents.push({ role: "model", parts })
continue
}
const parts: Array<Schema.Schema.Type<typeof GeminiContentPart>> = []
for (const part of message.content) {
if (part.type !== "tool-result") return yield* invalid("Gemini tool messages only support tool-result content")
parts.push({
functionResponse: {
id: part.id,
name: part.name,
response: {
name: part.name,
content: ProviderShared.toolResultText(part),
},
},
})
}
contents.push({ role: "user", parts })
}
return contents
})
const thinkingBudget = (effort: ReasoningEffort | undefined) => {
if (effort === "minimal" || effort === "low") return 1024
if (effort === "high") return 16000
if (effort === "xhigh") return 24576
if (effort === "max") return 32768
return 8192
}
const prepare = Effect.fn("Gemini.prepare")(function* (request: LLMRequest) {
const toolsEnabled = request.tools.length > 0 && request.toolChoice?.type !== "none"
const generationConfig = {
maxOutputTokens: request.generation.maxTokens,
temperature: request.generation.temperature,
topP: request.generation.topP,
stopSequences: request.generation.stop,
thinkingConfig: request.reasoning?.enabled
? {
includeThoughts: true,
thinkingBudget: thinkingBudget(request.reasoning.effort),
}
: undefined,
}
return {
contents: yield* lowerMessages(request),
systemInstruction: request.system.length === 0 ? undefined : { parts: [{ text: ProviderShared.joinText(request.system) }] },
tools: toolsEnabled ? [{ functionDeclarations: request.tools.map(lowerTool) }] : undefined,
toolConfig: toolsEnabled && request.toolChoice ? yield* lowerToolConfig(request.toolChoice) : undefined,
generationConfig: Object.values(generationConfig).some((value) => value !== undefined) ? generationConfig : undefined,
}
})
const mapUsage = (usage: GeminiUsage | undefined) => {
if (!usage) return undefined
return new Usage({
inputTokens: usage.promptTokenCount,
outputTokens: usage.candidatesTokenCount,
reasoningTokens: usage.thoughtsTokenCount,
cacheReadInputTokens: usage.cachedContentTokenCount,
totalTokens: ProviderShared.totalTokens(usage.promptTokenCount, usage.candidatesTokenCount, usage.totalTokenCount),
native: usage,
})
}
const mapFinishReason = (finishReason: string | undefined, hasToolCalls: boolean): FinishReason => {
if (finishReason === "STOP") return hasToolCalls ? "tool-calls" : "stop"
if (finishReason === "MAX_TOKENS") return "length"
if (
finishReason === "IMAGE_SAFETY" ||
finishReason === "RECITATION" ||
finishReason === "SAFETY" ||
finishReason === "BLOCKLIST" ||
finishReason === "PROHIBITED_CONTENT" ||
finishReason === "SPII"
)
return "content-filter"
if (finishReason === "MALFORMED_FUNCTION_CALL") return "error"
return "unknown"
}
const finish = (state: ParserState): ReadonlyArray<LLMEvent> =>
state.finishReason || state.usage
? [{ type: "request-finish", reason: mapFinishReason(state.finishReason, state.hasToolCalls), usage: state.usage }]
: []
const processChunk = (state: ParserState, chunk: GeminiChunk) => {
const nextState = {
...state,
usage: chunk.usageMetadata ? mapUsage(chunk.usageMetadata) ?? state.usage : state.usage,
}
const candidate = chunk.candidates?.[0]
if (!candidate?.content) {
return Effect.succeed([{ ...nextState, finishReason: candidate?.finishReason ?? nextState.finishReason }, []] as const)
}
const events: LLMEvent[] = []
let hasToolCalls = nextState.hasToolCalls
let nextToolCallId = nextState.nextToolCallId
for (const part of candidate.content.parts) {
if ("text" in part && part.text.length > 0) {
events.push({
type: part.thought ? "reasoning-delta" : "text-delta",
text: part.text,
...(part.thoughtSignature ? { metadata: { google: { thoughtSignature: part.thoughtSignature } } } : {}),
})
continue
}
if ("functionCall" in part) {
const input = part.functionCall.args
const id = part.functionCall.id ?? `tool_${nextToolCallId}`
events.push({
type: "tool-call",
id,
name: part.functionCall.name,
input,
...(part.thoughtSignature || part.functionCall.id
? { metadata: { google: { ...(part.thoughtSignature ? { thoughtSignature: part.thoughtSignature } : {}), ...(part.functionCall.id ? { functionCallId: part.functionCall.id } : {}) } } }
: {}),
})
if (!part.functionCall.id) nextToolCallId++
hasToolCalls = true
}
}
return Effect.succeed([{
...nextState,
hasToolCalls,
nextToolCallId,
finishReason: candidate.finishReason ?? nextState.finishReason,
}, events] as const)
}
/**
* The Gemini protocol — request lowering, target validation, body encoding,
* and the streaming-chunk state machine. Used by Google AI Studio Gemini and
* (once registered) Vertex Gemini.
*/
export const protocol = Protocol.define<GeminiDraft, GeminiTarget, string, GeminiChunk, ParserState>({
id: "gemini",
prepare,
validate: ProviderShared.validateWith(decodeTarget),
encode: encodeTarget,
redact: (target) => target,
decode: decodeChunk,
initial: () => ({ hasToolCalls: false, nextToolCallId: 0 }),
process: processChunk,
onHalt: finish,
streamReadError: "Failed to read Gemini stream",
})
export const adapter = Adapter.fromProtocol({
id: ADAPTER,
protocol,
endpoint: Endpoint.baseURL({
default: "https://generativelanguage.googleapis.com/v1beta",
// Gemini's path embeds the model id and pins SSE framing at the URL level.
path: ({ request }) => `/models/${request.model.id}:streamGenerateContent?alt=sse`,
}),
auth: Auth.apiKeyHeader("x-goog-api-key"),
framing: Framing.sse,
})
export const model = (input: GeminiModelInput) =>
llmModel({
...input,
provider: "google",
protocol: "gemini",
capabilities: input.capabilities ?? capabilities({
input: { image: true, audio: true, video: true, pdf: true },
output: { reasoning: true },
tools: { calls: true },
reasoning: { efforts: ["minimal", "low", "medium", "high", "xhigh", "max"] },
}),
})
export * as Gemini from "./gemini"

View File

@@ -0,0 +1,18 @@
import { ProviderResolver } from "../provider-resolver"
import { ProviderID } from "../schema"
export const id = ProviderID.make("github-copilot")
export const shouldUseResponsesApi = (modelID: string) => {
const match = /^gpt-(\d+)/.exec(modelID)
if (!match) return false
return Number(match[1]) >= 5 && !modelID.startsWith("gpt-5-mini")
}
export const resolver = ProviderResolver.define({
id,
resolve: (input) =>
ProviderResolver.make(id, shouldUseResponsesApi(input.modelID) ? "openai-responses" : "openai-chat"),
})
export * as GitHubCopilot from "./github-copilot"

View File

@@ -0,0 +1,5 @@
import { ProviderResolver } from "../provider-resolver"
export const resolver = ProviderResolver.fixed("google", "gemini")
export * as Google from "./google"

View File

@@ -0,0 +1,379 @@
import { Effect, Schema } from "effect"
import { Adapter } from "../adapter"
import { Auth } from "../auth"
import { Endpoint } from "../endpoint"
import { Framing } from "../framing"
import { capabilities, model as llmModel, type ModelInput } from "../llm"
import { Protocol } from "../protocol"
import {
Usage,
type FinishReason,
type LLMEvent,
type LLMRequest,
type TextPart,
type ToolCallPart,
type ToolDefinition,
} from "../schema"
import { ProviderShared } from "./shared"
const ADAPTER = "openai-chat"
export type OpenAIChatModelInput = Omit<ModelInput, "provider" | "protocol" | "headers"> & {
readonly apiKey?: string
readonly headers?: Record<string, string>
}
const OpenAIChatFunction = Schema.Struct({
name: Schema.String,
description: Schema.String,
parameters: Schema.Record(Schema.String, Schema.Unknown),
})
const OpenAIChatTool = Schema.Struct({
type: Schema.Literal("function"),
function: OpenAIChatFunction,
})
type OpenAIChatTool = Schema.Schema.Type<typeof OpenAIChatTool>
const OpenAIChatAssistantToolCall = Schema.Struct({
id: Schema.String,
type: Schema.Literal("function"),
function: Schema.Struct({
name: Schema.String,
arguments: Schema.String,
}),
})
type OpenAIChatAssistantToolCall = Schema.Schema.Type<typeof OpenAIChatAssistantToolCall>
const OpenAIChatMessage = Schema.Union([
Schema.Struct({ role: Schema.Literal("system"), content: Schema.String }),
Schema.Struct({ role: Schema.Literal("user"), content: Schema.String }),
Schema.Struct({
role: Schema.Literal("assistant"),
content: Schema.NullOr(Schema.String),
tool_calls: Schema.optional(Schema.Array(OpenAIChatAssistantToolCall)),
}),
Schema.Struct({ role: Schema.Literal("tool"), tool_call_id: Schema.String, content: Schema.String }),
])
type OpenAIChatMessage = Schema.Schema.Type<typeof OpenAIChatMessage>
const OpenAIChatToolChoiceFunction = Schema.Struct({ name: Schema.String })
const OpenAIChatToolChoice = Schema.Union([
Schema.Literals(["auto", "none", "required"]),
Schema.Struct({
type: Schema.Literal("function"),
function: OpenAIChatToolChoiceFunction,
}),
])
const OpenAIChatTargetFields = {
model: Schema.String,
messages: Schema.Array(OpenAIChatMessage),
tools: Schema.optional(Schema.Array(OpenAIChatTool)),
tool_choice: Schema.optional(OpenAIChatToolChoice),
stream: Schema.Literal(true),
stream_options: Schema.optional(Schema.Struct({ include_usage: Schema.Boolean })),
max_tokens: Schema.optional(Schema.Number),
temperature: Schema.optional(Schema.Number),
top_p: Schema.optional(Schema.Number),
stop: Schema.optional(Schema.Array(Schema.String)),
}
const OpenAIChatDraft = Schema.Struct(OpenAIChatTargetFields)
type OpenAIChatDraft = Schema.Schema.Type<typeof OpenAIChatDraft>
const OpenAIChatTarget = Schema.Struct(OpenAIChatTargetFields)
export type OpenAIChatTarget = Schema.Schema.Type<typeof OpenAIChatTarget>
const OpenAIChatUsage = Schema.Struct({
prompt_tokens: Schema.optional(Schema.Number),
completion_tokens: Schema.optional(Schema.Number),
total_tokens: Schema.optional(Schema.Number),
prompt_tokens_details: Schema.optional(
Schema.NullOr(
Schema.Struct({
cached_tokens: Schema.optional(Schema.Number),
}),
),
),
completion_tokens_details: Schema.optional(
Schema.NullOr(
Schema.Struct({
reasoning_tokens: Schema.optional(Schema.Number),
}),
),
),
})
const OpenAIChatToolCallDeltaFunction = Schema.Struct({
name: Schema.optional(Schema.NullOr(Schema.String)),
arguments: Schema.optional(Schema.NullOr(Schema.String)),
})
const OpenAIChatToolCallDelta = Schema.Struct({
index: Schema.Number,
id: Schema.optional(Schema.NullOr(Schema.String)),
function: Schema.optional(Schema.NullOr(OpenAIChatToolCallDeltaFunction)),
})
type OpenAIChatToolCallDelta = Schema.Schema.Type<typeof OpenAIChatToolCallDelta>
const OpenAIChatDelta = Schema.Struct({
content: Schema.optional(Schema.NullOr(Schema.String)),
tool_calls: Schema.optional(Schema.NullOr(Schema.Array(OpenAIChatToolCallDelta))),
})
const OpenAIChatChoice = Schema.Struct({
delta: Schema.optional(Schema.NullOr(OpenAIChatDelta)),
finish_reason: Schema.optional(Schema.NullOr(Schema.String)),
})
const OpenAIChatChunk = Schema.Struct({
choices: Schema.Array(OpenAIChatChoice),
usage: Schema.optional(Schema.NullOr(OpenAIChatUsage)),
})
type OpenAIChatChunk = Schema.Schema.Type<typeof OpenAIChatChunk>
const { encodeTarget, decodeTarget, decodeChunk } = ProviderShared.codecs({
adapter: ADAPTER,
draft: OpenAIChatDraft,
target: OpenAIChatTarget,
chunk: OpenAIChatChunk,
chunkErrorMessage: "Invalid OpenAI Chat stream chunk",
})
interface ParsedToolCall {
readonly id: string
readonly name: string
readonly input: unknown
}
interface ParserState {
readonly tools: Record<number, ProviderShared.ToolAccumulator>
readonly toolCalls: ReadonlyArray<ParsedToolCall>
readonly usage?: Usage
readonly finishReason?: FinishReason
}
const invalid = ProviderShared.invalidRequest
const lowerTool = (tool: ToolDefinition): OpenAIChatTool => ({
type: "function",
function: {
name: tool.name,
description: tool.description,
parameters: tool.inputSchema,
},
})
const lowerToolChoice = Effect.fn("OpenAIChat.lowerToolChoice")(function* (
toolChoice: NonNullable<LLMRequest["toolChoice"]>,
) {
if (toolChoice.type !== "tool") return toolChoice.type
if (!toolChoice.name) return yield* invalid("OpenAI Chat tool choice requires a tool name")
return { type: "function" as const, function: { name: toolChoice.name } }
})
const lowerToolCall = (part: ToolCallPart): OpenAIChatAssistantToolCall => ({
id: part.id,
type: "function",
function: {
name: part.name,
arguments: ProviderShared.encodeJson(part.input),
},
})
const lowerMessages = Effect.fn("OpenAIChat.lowerMessages")(function* (request: LLMRequest) {
const system: OpenAIChatMessage[] =
request.system.length === 0 ? [] : [{ role: "system", content: ProviderShared.joinText(request.system) }]
const messages: OpenAIChatMessage[] = [...system]
for (const message of request.messages) {
if (message.role === "user") {
const content: TextPart[] = []
for (const part of message.content) {
if (part.type !== "text") return yield* invalid(`OpenAI Chat user messages only support text content for now`)
content.push(part)
}
messages.push({ role: "user", content: ProviderShared.joinText(content) })
continue
}
if (message.role === "assistant") {
const content: TextPart[] = []
const toolCalls: OpenAIChatAssistantToolCall[] = []
for (const part of message.content) {
if (part.type === "text") {
content.push(part)
continue
}
if (part.type === "tool-call") {
toolCalls.push(lowerToolCall(part))
continue
}
return yield* invalid(`OpenAI Chat assistant messages only support text and tool-call content for now`)
}
messages.push({
role: "assistant",
content: content.length === 0 ? null : ProviderShared.joinText(content),
tool_calls: toolCalls.length === 0 ? undefined : toolCalls,
})
continue
}
for (const part of message.content) {
if (part.type !== "tool-result")
return yield* invalid(`OpenAI Chat tool messages only support tool-result content`)
messages.push({ role: "tool", tool_call_id: part.id, content: ProviderShared.toolResultText(part) })
}
}
return messages
})
const prepare = Effect.fn("OpenAIChat.prepare")(function* (request: LLMRequest) {
return {
model: request.model.id,
messages: yield* lowerMessages(request),
tools: request.tools.length === 0 ? undefined : request.tools.map(lowerTool),
tool_choice: request.toolChoice ? yield* lowerToolChoice(request.toolChoice) : undefined,
stream: true as const,
max_tokens: request.generation.maxTokens,
temperature: request.generation.temperature,
top_p: request.generation.topP,
stop: request.generation.stop,
}
})
const mapFinishReason = (reason: string | null | undefined): FinishReason => {
if (reason === "stop") return "stop"
if (reason === "length") return "length"
if (reason === "content_filter") return "content-filter"
if (reason === "function_call" || reason === "tool_calls") return "tool-calls"
return "unknown"
}
const mapUsage = (usage: OpenAIChatChunk["usage"]): Usage | undefined => {
if (!usage) return undefined
return new Usage({
inputTokens: usage.prompt_tokens,
outputTokens: usage.completion_tokens,
reasoningTokens: usage.completion_tokens_details?.reasoning_tokens,
cacheReadInputTokens: usage.prompt_tokens_details?.cached_tokens,
totalTokens: ProviderShared.totalTokens(usage.prompt_tokens, usage.completion_tokens, usage.total_tokens),
native: usage,
})
}
const pushToolDelta = (tools: Record<number, ProviderShared.ToolAccumulator>, delta: OpenAIChatToolCallDelta) =>
Effect.gen(function* () {
const current = tools[delta.index]
const id = delta.id ?? current?.id
const name = delta.function?.name ?? current?.name
if (!id || !name) {
return yield* ProviderShared.chunkError(ADAPTER, "OpenAI Chat tool call delta is missing id or name")
}
return {
id,
name,
input: `${current?.input ?? ""}${delta.function?.arguments ?? ""}`,
}
})
const finalizeToolCalls = (tools: Record<number, ProviderShared.ToolAccumulator>) =>
Effect.forEach(Object.values(tools), (tool) =>
Effect.gen(function* () {
const input = yield* ProviderShared.parseToolInput(ADAPTER, tool.name, tool.input)
return { id: tool.id, name: tool.name, input } satisfies ParsedToolCall
}),
)
const processChunk = (state: ParserState, chunk: OpenAIChatChunk) =>
Effect.gen(function* () {
const events: LLMEvent[] = []
const usage = mapUsage(chunk.usage) ?? state.usage
const choice = chunk.choices[0]
const finishReason = choice?.finish_reason ? mapFinishReason(choice.finish_reason) : state.finishReason
const delta = choice?.delta
const toolDeltas = delta?.tool_calls ?? []
const tools = toolDeltas.length === 0 ? state.tools : { ...state.tools }
if (delta?.content) events.push({ type: "text-delta", text: delta.content })
for (const tool of toolDeltas) {
const current = yield* pushToolDelta(tools, tool)
tools[tool.index] = current
if (tool.function?.arguments) {
events.push({ type: "tool-input-delta", id: current.id, name: current.name, text: tool.function.arguments })
}
}
// Finalize accumulated tool inputs eagerly when finish_reason arrives so
// JSON parse failures fail the stream at the boundary rather than at halt.
const toolCalls =
finishReason !== undefined && state.finishReason === undefined && Object.keys(tools).length > 0
? yield* finalizeToolCalls(tools)
: state.toolCalls
return [{ tools, toolCalls, usage, finishReason }, events] as const
})
const finishEvents = (state: ParserState): ReadonlyArray<LLMEvent> => {
const hasToolCalls = state.toolCalls.length > 0
const reason = state.finishReason === "stop" && hasToolCalls ? "tool-calls" : state.finishReason
return [
...state.toolCalls.map((call) => ({ type: "tool-call" as const, ...call })),
...(reason ? ([{ type: "request-finish", reason, usage: state.usage }] satisfies ReadonlyArray<LLMEvent>) : []),
]
}
/**
* The OpenAI Chat protocol — request lowering, target validation, body
* encoding, and the streaming-chunk state machine. Reused by every adapter
* that speaks OpenAI Chat over HTTP+SSE: native OpenAI, DeepSeek, TogetherAI,
* Cerebras, Baseten, Fireworks, DeepInfra, and (once added) Azure OpenAI Chat.
*/
export const protocol = Protocol.define<
OpenAIChatDraft,
OpenAIChatTarget,
string,
OpenAIChatChunk,
ParserState
>({
id: "openai-chat",
prepare,
validate: ProviderShared.validateWith(decodeTarget),
encode: encodeTarget,
redact: (target) => target,
decode: decodeChunk,
initial: () => ({ tools: {}, toolCalls: [] }),
process: processChunk,
onHalt: finishEvents,
streamReadError: "Failed to read OpenAI Chat stream",
})
export const adapter = Adapter.fromProtocol({
id: ADAPTER,
protocol,
endpoint: Endpoint.baseURL({ default: "https://api.openai.com/v1", path: "/chat/completions" }),
auth: Auth.openAI,
framing: Framing.sse,
})
export const model = (input: OpenAIChatModelInput) =>
llmModel({
...input,
provider: "openai",
protocol: "openai-chat",
capabilities: input.capabilities ?? capabilities({ tools: { calls: true, streamingInput: true } }),
})
export const includeUsage = adapter.patch("include-usage", {
reason: "request final usage chunk from OpenAI Chat streaming responses",
apply: (target) => ({
...target,
stream_options: { ...target.stream_options, include_usage: true },
}),
})
export * as OpenAIChat from "./openai-chat"

View File

@@ -0,0 +1,74 @@
import { Adapter } from "../adapter"
import { Endpoint } from "../endpoint"
import { Framing } from "../framing"
import { capabilities, model as llmModel, type ModelInput } from "../llm"
import { OpenAIChat } from "./openai-chat"
import { families, type ProviderFamily } from "./openai-compatible-family"
const ADAPTER = "openai-compatible-chat"
export type OpenAICompatibleChatModelInput = Omit<ModelInput, "protocol" | "headers" | "baseURL"> & {
readonly baseURL: string
readonly apiKey?: string
readonly headers?: Record<string, string>
}
export type ProviderFamilyModelInput = Omit<OpenAICompatibleChatModelInput, "provider" | "baseURL"> & {
readonly baseURL?: string
}
/**
* Adapter for non-OpenAI providers that expose an OpenAI Chat-compatible
* `/chat/completions` endpoint. Reuses `OpenAIChat.protocol` end-to-end and
* only overrides:
*
* - the registered protocol id (`openai-compatible-chat`) so providers can be
* resolved per-family without colliding with native OpenAI;
* - the endpoint, which requires `model.baseURL` (no provider default).
*/
export const adapter = Adapter.fromProtocol({
id: ADAPTER,
protocol: OpenAIChat.protocol,
protocolId: "openai-compatible-chat",
endpoint: Endpoint.baseURL({
path: "/chat/completions",
required: "OpenAI-compatible Chat requires a baseURL",
}),
framing: Framing.sse,
})
export const model = (input: OpenAICompatibleChatModelInput) =>
llmModel({
...input,
protocol: "openai-compatible-chat",
capabilities: input.capabilities ?? capabilities({ tools: { calls: true, streamingInput: true } }),
})
const familyModel = (family: ProviderFamily, input: ProviderFamilyModelInput) =>
model({
...input,
provider: family.provider,
baseURL: input.baseURL ?? family.baseURL,
})
export const baseten = (input: ProviderFamilyModelInput) => familyModel(families.baseten, input)
export const cerebras = (input: ProviderFamilyModelInput) => familyModel(families.cerebras, input)
export const deepinfra = (input: ProviderFamilyModelInput) => familyModel(families.deepinfra, input)
export const deepseek = (input: ProviderFamilyModelInput) => familyModel(families.deepseek, input)
export const fireworks = (input: ProviderFamilyModelInput) => familyModel(families.fireworks, input)
export const togetherai = (input: ProviderFamilyModelInput) => familyModel(families.togetherai, input)
export const includeUsage = adapter.patch("include-usage", {
reason: "request final usage chunk from OpenAI-compatible Chat streaming responses",
apply: (target) => ({
...target,
stream_options: { ...target.stream_options, include_usage: true },
}),
})
export * as OpenAICompatibleChat from "./openai-compatible-chat"

View File

@@ -0,0 +1,36 @@
import { ProviderResolver } from "../provider-resolver"
export interface ProviderFamily {
readonly provider: string
readonly baseURL: string
}
export const families = {
baseten: { provider: "baseten", baseURL: "https://inference.baseten.co/v1" },
cerebras: { provider: "cerebras", baseURL: "https://api.cerebras.ai/v1" },
deepinfra: { provider: "deepinfra", baseURL: "https://api.deepinfra.com/v1/openai" },
deepseek: { provider: "deepseek", baseURL: "https://api.deepseek.com/v1" },
fireworks: { provider: "fireworks", baseURL: "https://api.fireworks.ai/inference/v1" },
togetherai: { provider: "togetherai", baseURL: "https://api.together.xyz/v1" },
} as const satisfies Record<string, ProviderFamily>
export const byProvider: Record<string, ProviderFamily> = Object.fromEntries(
Object.values(families).map((family) => [family.provider, family]),
)
const resolutions = Object.fromEntries(
Object.values(families).map((family) => [
family.provider,
ProviderResolver.make(family.provider, "openai-compatible-chat", { baseURL: family.baseURL }),
]),
)
export const resolve = (provider: string) =>
resolutions[provider] ?? ProviderResolver.make(provider, "openai-compatible-chat")
export const resolver = ProviderResolver.define({
id: ProviderResolver.make("openai-compatible", "openai-compatible-chat").provider,
resolve: (input) => resolve(input.providerID),
})
export * as OpenAICompatibleFamily from "./openai-compatible-family"

View File

@@ -0,0 +1,407 @@
import { Effect, Schema } from "effect"
import { Adapter } from "../adapter"
import { Auth } from "../auth"
import { Endpoint } from "../endpoint"
import { Framing } from "../framing"
import { capabilities, model as llmModel, type ModelInput } from "../llm"
import { Protocol } from "../protocol"
import {
Usage,
type FinishReason,
type LLMEvent,
type LLMRequest,
type TextPart,
type ToolCallPart,
type ToolDefinition,
} from "../schema"
import { ProviderShared } from "./shared"
const ADAPTER = "openai-responses"
export type OpenAIResponsesModelInput = Omit<ModelInput, "provider" | "protocol" | "headers"> & {
readonly apiKey?: string
readonly headers?: Record<string, string>
}
const OpenAIResponsesInputText = Schema.Struct({
type: Schema.Literal("input_text"),
text: Schema.String,
})
const OpenAIResponsesOutputText = Schema.Struct({
type: Schema.Literal("output_text"),
text: Schema.String,
})
const OpenAIResponsesInputItem = Schema.Union([
Schema.Struct({ role: Schema.Literal("system"), content: Schema.String }),
Schema.Struct({ role: Schema.Literal("user"), content: Schema.Array(OpenAIResponsesInputText) }),
Schema.Struct({ role: Schema.Literal("assistant"), content: Schema.Array(OpenAIResponsesOutputText) }),
Schema.Struct({
type: Schema.Literal("function_call"),
call_id: Schema.String,
name: Schema.String,
arguments: Schema.String,
}),
Schema.Struct({
type: Schema.Literal("function_call_output"),
call_id: Schema.String,
output: Schema.String,
}),
])
type OpenAIResponsesInputItem = Schema.Schema.Type<typeof OpenAIResponsesInputItem>
const OpenAIResponsesTool = Schema.Struct({
type: Schema.Literal("function"),
name: Schema.String,
description: Schema.String,
parameters: Schema.Record(Schema.String, Schema.Unknown),
strict: Schema.optional(Schema.Boolean),
})
type OpenAIResponsesTool = Schema.Schema.Type<typeof OpenAIResponsesTool>
const OpenAIResponsesToolChoice = Schema.Union([
Schema.Literals(["auto", "none", "required"]),
Schema.Struct({ type: Schema.Literal("function"), name: Schema.String }),
])
const OpenAIResponsesTargetFields = {
model: Schema.String,
input: Schema.Array(OpenAIResponsesInputItem),
tools: Schema.optional(Schema.Array(OpenAIResponsesTool)),
tool_choice: Schema.optional(OpenAIResponsesToolChoice),
stream: Schema.Literal(true),
max_output_tokens: Schema.optional(Schema.Number),
temperature: Schema.optional(Schema.Number),
top_p: Schema.optional(Schema.Number),
}
const OpenAIResponsesDraft = Schema.Struct(OpenAIResponsesTargetFields)
type OpenAIResponsesDraft = Schema.Schema.Type<typeof OpenAIResponsesDraft>
const OpenAIResponsesTarget = Schema.Struct(OpenAIResponsesTargetFields)
export type OpenAIResponsesTarget = Schema.Schema.Type<typeof OpenAIResponsesTarget>
const OpenAIResponsesUsage = Schema.Struct({
input_tokens: Schema.optional(Schema.Number),
input_tokens_details: Schema.optional(Schema.NullOr(Schema.Struct({ cached_tokens: Schema.optional(Schema.Number) }))),
output_tokens: Schema.optional(Schema.Number),
output_tokens_details: Schema.optional(Schema.NullOr(Schema.Struct({ reasoning_tokens: Schema.optional(Schema.Number) }))),
total_tokens: Schema.optional(Schema.Number),
})
type OpenAIResponsesUsage = Schema.Schema.Type<typeof OpenAIResponsesUsage>
const OpenAIResponsesStreamItem = Schema.Struct({
type: Schema.String,
id: Schema.optional(Schema.String),
call_id: Schema.optional(Schema.String),
name: Schema.optional(Schema.String),
arguments: Schema.optional(Schema.String),
// Hosted (provider-executed) tool fields. Each hosted tool item carries its
// own subset of these — we capture them generically so we can surface the
// call's typed input portion and round-trip the full result payload without
// hand-rolling a per-tool schema.
status: Schema.optional(Schema.String),
action: Schema.optional(Schema.Unknown),
queries: Schema.optional(Schema.Unknown),
results: Schema.optional(Schema.Unknown),
code: Schema.optional(Schema.String),
container_id: Schema.optional(Schema.String),
outputs: Schema.optional(Schema.Unknown),
server_label: Schema.optional(Schema.String),
output: Schema.optional(Schema.Unknown),
error: Schema.optional(Schema.Unknown),
})
type OpenAIResponsesStreamItem = Schema.Schema.Type<typeof OpenAIResponsesStreamItem>
const OpenAIResponsesChunk = Schema.Struct({
type: Schema.String,
delta: Schema.optional(Schema.String),
item_id: Schema.optional(Schema.String),
item: Schema.optional(OpenAIResponsesStreamItem),
response: Schema.optional(
Schema.Struct({
incomplete_details: Schema.optional(Schema.NullOr(Schema.Struct({ reason: Schema.String }))),
usage: Schema.optional(OpenAIResponsesUsage),
}),
),
code: Schema.optional(Schema.String),
message: Schema.optional(Schema.String),
})
type OpenAIResponsesChunk = Schema.Schema.Type<typeof OpenAIResponsesChunk>
const { encodeTarget, decodeTarget, decodeChunk } = ProviderShared.codecs({
adapter: ADAPTER,
draft: OpenAIResponsesDraft,
target: OpenAIResponsesTarget,
chunk: OpenAIResponsesChunk,
chunkErrorMessage: "Invalid OpenAI Responses stream chunk",
})
interface ParserState {
readonly tools: Record<string, ProviderShared.ToolAccumulator>
readonly hasFunctionCall: boolean
}
const invalid = ProviderShared.invalidRequest
const lowerTool = (tool: ToolDefinition): OpenAIResponsesTool => ({
type: "function",
name: tool.name,
description: tool.description,
parameters: tool.inputSchema,
})
const lowerToolChoice = Effect.fn("OpenAIResponses.lowerToolChoice")(function* (
toolChoice: NonNullable<LLMRequest["toolChoice"]>,
) {
if (toolChoice.type !== "tool") return toolChoice.type
if (!toolChoice.name) return yield* invalid("OpenAI Responses tool choice requires a tool name")
return { type: "function" as const, name: toolChoice.name }
})
const lowerToolCall = (part: ToolCallPart): OpenAIResponsesInputItem => ({
type: "function_call",
call_id: part.id,
name: part.name,
arguments: ProviderShared.encodeJson(part.input),
})
const lowerMessages = Effect.fn("OpenAIResponses.lowerMessages")(function* (request: LLMRequest) {
const system: OpenAIResponsesInputItem[] =
request.system.length === 0 ? [] : [{ role: "system", content: ProviderShared.joinText(request.system) }]
const input: OpenAIResponsesInputItem[] = [...system]
for (const message of request.messages) {
if (message.role === "user") {
const content: TextPart[] = []
for (const part of message.content) {
if (part.type !== "text") return yield* invalid(`OpenAI Responses user messages only support text content for now`)
content.push(part)
}
input.push({ role: "user", content: content.map((part) => ({ type: "input_text", text: part.text })) })
continue
}
if (message.role === "assistant") {
const content: TextPart[] = []
for (const part of message.content) {
if (part.type === "text") {
content.push(part)
continue
}
if (part.type === "tool-call") {
input.push(lowerToolCall(part))
continue
}
return yield* invalid(`OpenAI Responses assistant messages only support text and tool-call content for now`)
}
if (content.length > 0)
input.push({ role: "assistant", content: content.map((part) => ({ type: "output_text", text: part.text })) })
continue
}
for (const part of message.content) {
if (part.type !== "tool-result")
return yield* invalid(`OpenAI Responses tool messages only support tool-result content`)
input.push({ type: "function_call_output", call_id: part.id, output: ProviderShared.toolResultText(part) })
}
}
return input
})
const prepare = Effect.fn("OpenAIResponses.prepare")(function* (request: LLMRequest) {
return {
model: request.model.id,
input: yield* lowerMessages(request),
tools: request.tools.length === 0 ? undefined : request.tools.map(lowerTool),
tool_choice: request.toolChoice ? yield* lowerToolChoice(request.toolChoice) : undefined,
stream: true as const,
max_output_tokens: request.generation.maxTokens,
temperature: request.generation.temperature,
top_p: request.generation.topP,
}
})
const mapUsage = (usage: OpenAIResponsesUsage | undefined) => {
if (!usage) return undefined
return new Usage({
inputTokens: usage.input_tokens,
outputTokens: usage.output_tokens,
reasoningTokens: usage.output_tokens_details?.reasoning_tokens,
cacheReadInputTokens: usage.input_tokens_details?.cached_tokens,
totalTokens: ProviderShared.totalTokens(usage.input_tokens, usage.output_tokens, usage.total_tokens),
native: usage,
})
}
const mapFinishReason = (chunk: OpenAIResponsesChunk, hasFunctionCall: boolean): FinishReason => {
const reason = chunk.response?.incomplete_details?.reason
if (reason === undefined || reason === null) return hasFunctionCall ? "tool-calls" : "stop"
if (reason === "max_output_tokens") return "length"
if (reason === "content_filter") return "content-filter"
return hasFunctionCall ? "tool-calls" : "unknown"
}
const pushToolDelta = (tools: Record<string, ProviderShared.ToolAccumulator>, itemId: string, delta: string) =>
Effect.gen(function* () {
const current = tools[itemId]
if (!current) {
return yield* ProviderShared.chunkError(ADAPTER, "OpenAI Responses tool argument delta is missing its tool call")
}
return { ...current, input: `${current.input}${delta}` }
})
const finishToolCall = (tools: Record<string, ProviderShared.ToolAccumulator>, item: NonNullable<OpenAIResponsesChunk["item"]>) =>
Effect.gen(function* () {
if (item.type !== "function_call" || !item.id || !item.call_id || !item.name) return [] as ReadonlyArray<LLMEvent>
const raw = item.arguments ?? tools[item.id]?.input ?? ""
const input = yield* ProviderShared.parseToolInput(ADAPTER, item.name, raw)
return [{ type: "tool-call" as const, id: item.call_id, name: item.name, input }]
})
const withoutTool = (tools: Record<string, ProviderShared.ToolAccumulator>, id: string | undefined) =>
id === undefined ? tools : Object.fromEntries(Object.entries(tools).filter(([key]) => key !== id))
// Hosted tool items (provider-executed) ship their typed input + status + result
// fields all in one item. We expose them as a `tool-call` + `tool-result` pair
// so consumers can treat them uniformly with client tools, only differentiated
// by `providerExecuted: true`.
//
// item.type → tool name. Each entry is the OpenAI Responses item type that
// represents a hosted (provider-executed) tool call.
const HOSTED_TOOL_NAMES: Record<string, string> = {
web_search_call: "web_search",
web_search_preview_call: "web_search_preview",
file_search_call: "file_search",
code_interpreter_call: "code_interpreter",
computer_use_call: "computer_use",
image_generation_call: "image_generation",
mcp_call: "mcp",
local_shell_call: "local_shell",
}
const isHostedToolItem = (item: OpenAIResponsesStreamItem): item is OpenAIResponsesStreamItem & { id: string } =>
item.type in HOSTED_TOOL_NAMES && typeof item.id === "string" && item.id.length > 0
// Pick the input fields the model actually populated when invoking the tool.
// The shape is tool-specific. Keep this list explicit so each tool's input is
// reviewable at a glance — fall back to `{}` for tools we haven't typed yet.
const hostedToolInput = (item: OpenAIResponsesStreamItem): unknown => {
if (item.type === "web_search_call" || item.type === "web_search_preview_call") return item.action ?? {}
if (item.type === "file_search_call") return { queries: item.queries ?? [] }
if (item.type === "code_interpreter_call") return { code: item.code, container_id: item.container_id }
if (item.type === "computer_use_call") return item.action ?? {}
if (item.type === "local_shell_call") return item.action ?? {}
if (item.type === "mcp_call") return { server_label: item.server_label, name: item.name, arguments: item.arguments }
return {}
}
// Round-trip the full item as the structured result so consumers can extract
// outputs / sources / status without re-decoding.
const hostedToolResult = (item: OpenAIResponsesStreamItem) => {
const isError = typeof item.error !== "undefined" && item.error !== null
return isError
? ({ type: "error" as const, value: item.error })
: ({ type: "json" as const, value: item })
}
const hostedToolEvents = (item: OpenAIResponsesStreamItem & { id: string }): ReadonlyArray<LLMEvent> => {
const name = HOSTED_TOOL_NAMES[item.type]
return [
{ type: "tool-call", id: item.id, name, input: hostedToolInput(item), providerExecuted: true },
{ type: "tool-result", id: item.id, name, result: hostedToolResult(item), providerExecuted: true },
]
}
const processChunk = (state: ParserState, chunk: OpenAIResponsesChunk) =>
Effect.gen(function* () {
if (chunk.type === "response.output_text.delta" && chunk.delta) {
return [state, [{ type: "text-delta", id: chunk.item_id, text: chunk.delta }]] as const
}
if (chunk.type === "response.output_item.added" && chunk.item?.type === "function_call" && chunk.item.id) {
return [{
hasFunctionCall: state.hasFunctionCall,
tools: {
...state.tools,
[chunk.item.id]: {
id: chunk.item.call_id ?? chunk.item.id,
name: chunk.item.name ?? "",
input: chunk.item.arguments ?? "",
},
},
}, []] as const
}
if (chunk.type === "response.function_call_arguments.delta" && chunk.item_id && chunk.delta) {
const current = yield* pushToolDelta(state.tools, chunk.item_id, chunk.delta)
return [{ hasFunctionCall: state.hasFunctionCall, tools: { ...state.tools, [chunk.item_id]: current } }, [
{ type: "tool-input-delta" as const, id: current.id, name: current.name, text: chunk.delta },
]] as const
}
if (chunk.type === "response.output_item.done" && chunk.item?.type === "function_call") {
const events = yield* finishToolCall(state.tools, chunk.item)
return [{
hasFunctionCall: events.length > 0 ? true : state.hasFunctionCall,
tools: withoutTool(state.tools, chunk.item.id),
}, events] as const
}
if (chunk.type === "response.output_item.done" && chunk.item && isHostedToolItem(chunk.item)) {
return [state, hostedToolEvents(chunk.item)] as const
}
if (chunk.type === "response.completed" || chunk.type === "response.incomplete") {
return [state, [{ type: "request-finish" as const, reason: mapFinishReason(chunk, state.hasFunctionCall), usage: mapUsage(chunk.response?.usage) }]] as const
}
if (chunk.type === "error") {
return [state, [{ type: "provider-error" as const, message: chunk.message ?? chunk.code ?? "OpenAI Responses stream error" }]] as const
}
return [state, []] as const
})
/**
* The OpenAI Responses protocol — request lowering, target validation, body
* encoding, and the streaming-chunk state machine. Used by native OpenAI and
* (once registered) Azure OpenAI Responses.
*/
export const protocol = Protocol.define<
OpenAIResponsesDraft,
OpenAIResponsesTarget,
string,
OpenAIResponsesChunk,
ParserState
>({
id: "openai-responses",
prepare,
validate: ProviderShared.validateWith(decodeTarget),
encode: encodeTarget,
redact: (target) => target,
decode: decodeChunk,
initial: () => ({ hasFunctionCall: false, tools: {} }),
process: processChunk,
streamReadError: "Failed to read OpenAI Responses stream",
})
export const adapter = Adapter.fromProtocol({
id: ADAPTER,
protocol,
endpoint: Endpoint.baseURL({ default: "https://api.openai.com/v1", path: "/responses" }),
auth: Auth.openAI,
framing: Framing.sse,
})
export const model = (input: OpenAIResponsesModelInput) =>
llmModel({
...input,
provider: "openai",
protocol: "openai-responses",
capabilities: input.capabilities ?? capabilities({ tools: { calls: true, streamingInput: true } }),
})
export * as OpenAIResponses from "./openai-responses"

View File

@@ -0,0 +1,5 @@
import { ProviderResolver } from "../provider-resolver"
export const resolver = ProviderResolver.fixed("openai", "openai-responses")
export * as OpenAI from "./openai"

View File

@@ -0,0 +1,91 @@
import { Model, Patch, predicate } from "../patch"
import { CacheHint } from "../schema"
import type { ContentPart, LLMRequest } from "../schema"
const removeEmptyParts = (content: ReadonlyArray<ContentPart>) =>
content.filter((part) => (part.type === "text" || part.type === "reasoning" ? part.text !== "" : true))
const rewriteToolIds = (request: LLMRequest, scrub: (id: string) => string): LLMRequest => ({
...request,
messages: request.messages.map((message) => {
if (message.role !== "assistant" && message.role !== "tool") return message
return {
...message,
content: message.content.map((part) => {
if (part.type === "tool-call" || part.type === "tool-result") return { ...part, id: scrub(part.id) }
return part
}),
}
}),
})
export const removeEmptyAnthropicContent = Patch.prompt("anthropic.remove-empty-content", {
reason: "remove empty text/reasoning blocks for providers that reject empty content",
when: Model.provider("anthropic").or(Model.provider("bedrock"), Model.provider("amazon-bedrock")),
apply: (request) => ({
...request,
system: request.system.filter((part) => part.text !== ""),
messages: request.messages
.map((message) => ({ ...message, content: removeEmptyParts(message.content) }))
.filter((message) => message.content.length > 0),
}),
})
export const scrubClaudeToolIds = Patch.prompt("anthropic.scrub-tool-call-ids", {
reason: "Claude tool_use ids only accept alphanumeric, underscore, and dash characters",
when: Model.idIncludes("claude"),
apply: (request) => rewriteToolIds(request, (id) => id.replace(/[^a-zA-Z0-9_-]/g, "_")),
})
export const scrubMistralToolIds = Patch.prompt("mistral.scrub-tool-call-ids", {
reason: "Mistral tool call ids must be short alphanumeric identifiers",
when: Model.provider("mistral").or(Model.idIncludes("mistral"), Model.idIncludes("devstral")),
apply: (request) => rewriteToolIds(request, (id) => id.replace(/[^a-zA-Z0-9]/g, "").slice(0, 9).padEnd(9, "0")),
})
// Single shared CacheHint instance — the cache patch reuses this one object
// across every marked part. Adapters lower CacheHint structurally
// (`cache?.type === "ephemeral"`) so reference equality is incidental, but
// keeping a class instance preserves any consumer that checks
// `instanceof CacheHint`.
const EPHEMERAL_CACHE = new CacheHint({ type: "ephemeral" })
const withCacheOnLastText = (content: ReadonlyArray<ContentPart>): ReadonlyArray<ContentPart> => {
const last = content.findLastIndex((part) => part.type === "text")
if (last === -1) return content
return content.map((part, index) =>
index === last && part.type === "text" ? { ...part, cache: EPHEMERAL_CACHE } : part,
)
}
// Anthropic and Bedrock both honor up to four positional cache breakpoints.
// We mark the first 2 system parts and the last 2 messages — the same policy
// OpenCode uses on the AI-SDK path (`session.applyCaching` in
// packages/opencode/src/provider/transform.ts). The capability gate makes
// this a no-op for adapters that don't advertise prompt-level caching, so
// non-cache providers (OpenAI Responses, Gemini, OpenAI-compatible Chat)
// are unaffected.
export const cachePromptHints = Patch.prompt("cache.prompt-hints", {
reason: "mark first 2 system parts and last 2 messages with ephemeral cache hints on cache-capable adapters",
when: predicate((context) => context.model.capabilities.cache?.prompt === true),
apply: (request) => ({
...request,
system: request.system.map((part, index) =>
index < 2 ? { ...part, cache: EPHEMERAL_CACHE } : part,
),
messages: request.messages.map((message, index) =>
index < request.messages.length - 2
? message
: { ...message, content: withCacheOnLastText(message.content) },
),
}),
})
export const defaults = [
removeEmptyAnthropicContent,
scrubClaudeToolIds,
scrubMistralToolIds,
cachePromptHints,
]
export * as ProviderPatch from "./patch"

View File

@@ -0,0 +1,235 @@
import { Buffer } from "node:buffer"
import { Cause, Effect, Schema, Stream } from "effect"
import * as Sse from "effect/unstable/encoding/Sse"
import { HttpClientRequest, type HttpClientResponse } from "effect/unstable/http"
import { InvalidRequestError, ProviderChunkError, type MediaPart, type ToolResultPart } from "../schema"
export const Json = Schema.fromJsonString(Schema.Unknown)
export const decodeJson = Schema.decodeUnknownSync(Json)
export const encodeJson = Schema.encodeSync(Json)
/**
* Plain-record narrowing. Excludes arrays so adapters checking nested JSON
* Schema fragments don't accidentally treat a tuple as a key/value bag.
*/
export const isRecord = (value: unknown): value is Record<string, unknown> =>
typeof value === "object" && value !== null && !Array.isArray(value)
/**
* Streaming tool-call accumulator. Adapters that build a tool call across
* multiple `tool-input-delta` chunks store the partial JSON input string here
* and finalize it with `parseToolInput` once the call completes. Anthropic
* extends this with a `providerExecuted` flag for hosted (server-side) tools;
* it should be the only adapter to do so.
*/
export interface ToolAccumulator {
readonly id: string
readonly name: string
readonly input: string
}
/**
* Codec bundle for a streaming JSON adapter:
*
* - `encodeTarget(target)` produces the JSON string body for `jsonPost`.
* - `decodeTarget(draft)` runs the Schema-driven `Draft → Target` decode
* inside an Effect, mapping parse errors to `InvalidRequestError` via
* `validateWith` so the result drops directly into a protocol's `validate`
* field.
* - `decodeChunk(input)` decodes one streaming JSON chunk against the chunk
* schema. The default expects a `string` (the SSE data field); pass a
* custom decoder shape via `decodeChunkInput` for adapters whose framing
* already produces a parsed object (e.g. Bedrock's event-stream payloads).
*
* Adapters that need a totally different decode shape should still hand-roll
* those pieces — the helper covers the common SSE-JSON case used by 4 of 6
* adapters today.
*/
export const codecs = <Draft, Target, Chunk>(input: {
readonly adapter: string
readonly draft: Schema.Codec<Draft, unknown>
readonly target: Schema.Codec<Target, unknown>
readonly chunk: Schema.Codec<Chunk, unknown>
readonly chunkErrorMessage: string
}) => {
const encodeTarget = Schema.encodeSync(Schema.fromJsonString(input.target))
const decodeTarget = validateWith(
Schema.decodeUnknownEffect(input.draft.pipe(Schema.decodeTo(input.target))),
)
const decodeChunkSync = Schema.decodeUnknownSync(Schema.fromJsonString(input.chunk))
const decodeChunk = (data: string) =>
Effect.try({
try: () => decodeChunkSync(data),
catch: () => chunkError(input.adapter, input.chunkErrorMessage, data),
})
return { encodeTarget, decodeTarget, decodeChunk }
}
/**
* `Usage.totalTokens` policy shared by every adapter. Honors a provider-
* supplied total; otherwise falls back to `inputTokens + outputTokens` only
* when at least one is defined. Returns `undefined` when neither input nor
* output is known so adapters don't publish a misleading `0`.
*/
export const totalTokens = (
inputTokens: number | undefined,
outputTokens: number | undefined,
total: number | undefined,
) => {
if (total !== undefined) return total
if (inputTokens === undefined && outputTokens === undefined) return undefined
return (inputTokens ?? 0) + (outputTokens ?? 0)
}
export const chunkError = (adapter: string, message: string, raw?: string) =>
new ProviderChunkError({ adapter, message, raw })
export const parseJson = (adapter: string, input: string, message: string) =>
Effect.try({
try: () => decodeJson(input),
catch: () => chunkError(adapter, message, input),
})
/**
* Join the `text` field of a list of parts with newlines. Used by adapters
* that flatten system / message content arrays into a single provider string
* (OpenAI Chat `system` content, OpenAI Responses `system` content, Gemini
* `systemInstruction.parts[].text`).
*/
export const joinText = (parts: ReadonlyArray<{ readonly text: string }>) =>
parts.map((part) => part.text).join("\n")
/**
* Parse the streamed JSON input of a tool call. Treats an empty string as
* `"{}"` — providers occasionally finish a tool call without ever emitting
* input deltas (e.g. zero-arg tools). The error message is uniform across
* adapters: `Invalid JSON input for <adapter> tool call <name>`.
*/
export const parseToolInput = (adapter: string, name: string, raw: string) =>
parseJson(adapter, raw || "{}", `Invalid JSON input for ${adapter} tool call ${name}`)
/**
* Encode a `MediaPart`'s raw bytes for inclusion in a JSON request body.
* `data: string` is assumed to already be base64 (matches caller convention
* across Gemini / Bedrock); `data: Uint8Array` is base64-encoded here. Used
* by every adapter that supports image / document inputs.
*/
export const mediaBytes = (part: MediaPart) =>
typeof part.data === "string" ? part.data : Buffer.from(part.data).toString("base64")
export const trimBaseUrl = (value: string) => value.replace(/\/+$/, "")
export const toolResultText = (part: ToolResultPart) => {
if (part.result.type === "text" || part.result.type === "error") return String(part.result.value)
return encodeJson(part.result.value)
}
const errorText = (error: unknown) => {
if (error instanceof Error) return error.message
if (typeof error === "string") return error
if (typeof error === "number" || typeof error === "boolean" || typeof error === "bigint") return String(error)
if (error === null) return "null"
if (error === undefined) return "undefined"
return "Unknown stream error"
}
const streamError = (adapter: string, message: string, cause: Cause.Cause<unknown>) => {
const failed = cause.reasons.find(Cause.isFailReason)?.error
if (failed instanceof ProviderChunkError) return failed
return chunkError(adapter, message, Cause.pretty(cause))
}
/**
* Generic streaming-response decoder used by `Adapter.fromProtocol`. Splits
* the response stream into:
*
* bytes → frames (caller-supplied) → chunk → (state, events)
*
* The `framing` step is the protocol-specific part — `Framing.sse` uses
* `sseFraming` below; binary protocols (Bedrock event-stream) supply their
* own byte-level decoder. Everything else (transport-error normalization,
* schema decoding per chunk, stateful chunk → event mapping, `onHalt` flush,
* terminal-error normalization) is shared.
*/
export const framed = <Frame, Chunk, State, Event>(input: {
readonly adapter: string
readonly response: HttpClientResponse.HttpClientResponse
readonly readError: string
readonly framing: (
bytes: Stream.Stream<Uint8Array, ProviderChunkError>,
) => Stream.Stream<Frame, ProviderChunkError>
readonly decodeChunk: (frame: Frame) => Effect.Effect<Chunk, ProviderChunkError>
readonly initial: () => State
readonly process: (
state: State,
chunk: Chunk,
) => Effect.Effect<readonly [State, ReadonlyArray<Event>], ProviderChunkError>
readonly onHalt?: (state: State) => ReadonlyArray<Event>
}): Stream.Stream<Event, ProviderChunkError> => {
const bytes = input.response.stream.pipe(
Stream.mapError((error) => chunkError(input.adapter, input.readError, errorText(error))),
)
return input.framing(bytes).pipe(
Stream.mapEffect(input.decodeChunk),
Stream.mapAccumEffect(input.initial, input.process, input.onHalt ? { onHalt: input.onHalt } : undefined),
Stream.catchCause((cause) => Stream.fail(streamError(input.adapter, input.readError, cause))),
)
}
/**
* `framing` step for Server-Sent Events. Decodes UTF-8, runs the SSE channel
* decoder, and drops empty / `[DONE]` keep-alive events so the downstream
* `decodeChunk` sees one JSON string per element. The SSE channel emits a
* `Retry` control event on its error channel; we drop it here (we don't
* implement client-driven retries) so the public error channel stays
* `ProviderChunkError`.
*/
export const sseFraming = (
bytes: Stream.Stream<Uint8Array, ProviderChunkError>,
): Stream.Stream<string, ProviderChunkError> =>
bytes.pipe(
Stream.decodeText(),
Stream.pipeThroughChannel(Sse.decode()),
Stream.catchTag("Retry", () => Stream.empty),
Stream.filter((event) => event.data.length > 0 && event.data !== "[DONE]"),
Stream.map((event) => event.data),
)
/**
* Canonical `InvalidRequestError` constructor. Lift one-line `const invalid =
* (message) => new InvalidRequestError({ message })` aliases out of every
* adapter so the error constructor lives in one place. If we ever extend
* `InvalidRequestError` with adapter context or trace metadata, the change
* lands here.
*/
export const invalidRequest = (message: string) => new InvalidRequestError({ message })
/**
* Build a `validate` step from a Schema decoder. Replaces the per-adapter
* lambda body `(draft) => decode(draft).pipe(Effect.mapError((e) =>
* invalid(e.message)))`. Any decode error is translated into
* `InvalidRequestError` carrying the original parse-error message.
*/
export const validateWith =
<A, I, E extends { readonly message: string }>(decode: (input: I) => Effect.Effect<A, E>) =>
(draft: I) =>
decode(draft).pipe(Effect.mapError((error) => invalidRequest(error.message)))
/**
* Build an HTTP POST with a JSON body. Sets `content-type: application/json`
* automatically (callers can't override it — every adapter today places it
* last so caller headers win on everything else) and merges caller-supplied
* headers. The body is passed pre-encoded so adapters can choose between
* `Schema.encodeSync(target)` and `ProviderShared.encodeJson(target)`.
*/
export const jsonPost = (input: {
readonly url: string
readonly body: string
readonly headers?: Record<string, string>
}) =>
HttpClientRequest.post(input.url).pipe(
HttpClientRequest.setHeaders({ ...input.headers, "content-type": "application/json" }),
HttpClientRequest.bodyText(input.body, "application/json"),
)
export * as ProviderShared from "./shared"

View File

@@ -0,0 +1,5 @@
import { ProviderResolver } from "../provider-resolver"
export const resolver = ProviderResolver.fixed("xai", "openai-responses")
export * as XAI from "./xai"

465
packages/llm/src/schema.ts Normal file
View File

@@ -0,0 +1,465 @@
import { Schema } from "effect"
/**
* Stable string identifier for a protocol implementation. The discriminator
* value lives on `ModelRef.protocol` and on the `Adapter.protocol` field;
* the runtime registry keys lookups by it. The implementation type itself is
* `Protocol` (see `protocol.ts`).
*/
export const ProtocolID = Schema.Literals([
"openai-chat",
"openai-compatible-chat",
"openai-responses",
"anthropic-messages",
"gemini",
"bedrock-converse",
])
export type ProtocolID = Schema.Schema.Type<typeof ProtocolID>
export const ModelID = Schema.String.pipe(Schema.brand("LLM.ModelID"))
export type ModelID = typeof ModelID.Type
export const ProviderID = Schema.String.pipe(Schema.brand("LLM.ProviderID"))
export type ProviderID = typeof ProviderID.Type
export const ReasoningEfforts = ["none", "minimal", "low", "medium", "high", "xhigh", "max"] as const
export const ReasoningEffort = Schema.Literals(ReasoningEfforts)
export type ReasoningEffort = Schema.Schema.Type<typeof ReasoningEffort>
export const PatchPhase = Schema.Literals(["request", "prompt", "tool-schema", "target", "stream"])
export type PatchPhase = Schema.Schema.Type<typeof PatchPhase>
export const MessageRole = Schema.Literals(["user", "assistant", "tool"])
export type MessageRole = Schema.Schema.Type<typeof MessageRole>
export const FinishReason = Schema.Literals(["stop", "length", "tool-calls", "content-filter", "error", "unknown"])
export type FinishReason = Schema.Schema.Type<typeof FinishReason>
export const JsonSchema = Schema.Record(Schema.String, Schema.Unknown)
export type JsonSchema = Schema.Schema.Type<typeof JsonSchema>
export class ModelCapabilities extends Schema.Class<ModelCapabilities>("LLM.ModelCapabilities")({
input: Schema.Struct({
text: Schema.Boolean,
image: Schema.Boolean,
audio: Schema.Boolean,
video: Schema.Boolean,
pdf: Schema.Boolean,
}),
output: Schema.Struct({
text: Schema.Boolean,
reasoning: Schema.Boolean,
}),
tools: Schema.Struct({
calls: Schema.Boolean,
streamingInput: Schema.Boolean,
providerExecuted: Schema.Boolean,
}),
cache: Schema.Struct({
prompt: Schema.Boolean,
messageBlocks: Schema.Boolean,
contentBlocks: Schema.Boolean,
}),
reasoning: Schema.Struct({
efforts: Schema.Array(ReasoningEffort),
summaries: Schema.Boolean,
encryptedContent: Schema.Boolean,
}),
}) {}
export class ModelLimits extends Schema.Class<ModelLimits>("LLM.ModelLimits")({
context: Schema.optional(Schema.Number),
output: Schema.optional(Schema.Number),
}) {}
export class ModelRef extends Schema.Class<ModelRef>("LLM.ModelRef")({
id: ModelID,
provider: ProviderID,
protocol: ProtocolID,
baseURL: Schema.optional(Schema.String),
/**
* Auth secret read by `Auth.bearer` / `Auth.apiKeyHeader` at request time.
* Lives here so authentication is not baked into `headers` at construction
* time and the `Auth` axis can actually do its job per request.
*/
apiKey: Schema.optional(Schema.String),
headers: Schema.optional(Schema.Record(Schema.String, Schema.String)),
/**
* Query params appended to the request URL by `Endpoint.baseURL`. Used for
* deployment-level URL-scoped settings such as Azure's `api-version` or any
* provider that requires a per-request key in the URL. Generic concern, so
* lives as a typed first-class field instead of `native`.
*/
queryParams: Schema.optional(Schema.Record(Schema.String, Schema.String)),
capabilities: ModelCapabilities,
limits: ModelLimits,
/**
* Provider-specific opaque options. Reach for this only when the value is
* genuinely provider-private and does not fit a typed axis (e.g. Bedrock's
* `aws_credentials` / `aws_region` for SigV4). Anything used by more than
* one adapter should grow into a typed field instead.
*/
native: Schema.optional(Schema.Record(Schema.String, Schema.Unknown)),
}) {}
export class CacheHint extends Schema.Class<CacheHint>("LLM.CacheHint")({
type: Schema.Literals(["ephemeral", "persistent"]),
ttlSeconds: Schema.optional(Schema.Number),
}) {}
const TypeStruct = <const Type extends string, const Fields extends Schema.Struct.Fields>(
type: Type,
identifier: string,
fields: Fields,
) => Schema.Struct({
type: Schema.tag(type),
...fields,
}).annotate({ identifier })
export const SystemPart = TypeStruct("text", "LLM.SystemPart", {
text: Schema.String,
cache: Schema.optional(CacheHint),
metadata: Schema.optional(Schema.Record(Schema.String, Schema.Unknown)),
})
export type SystemPart = Schema.Schema.Type<typeof SystemPart>
export const TextPart = TypeStruct("text", "LLM.Content.Text", {
text: Schema.String,
cache: Schema.optional(CacheHint),
metadata: Schema.optional(Schema.Record(Schema.String, Schema.Unknown)),
})
export type TextPart = Schema.Schema.Type<typeof TextPart>
export const MediaPart = TypeStruct("media", "LLM.Content.Media", {
mediaType: Schema.String,
data: Schema.Union([Schema.String, Schema.Uint8Array]),
filename: Schema.optional(Schema.String),
metadata: Schema.optional(Schema.Record(Schema.String, Schema.Unknown)),
})
export type MediaPart = Schema.Schema.Type<typeof MediaPart>
export const ToolResultValue = Schema.Struct({
type: Schema.Literals(["json", "text", "error"]),
value: Schema.Unknown,
}).annotate({ identifier: "LLM.ToolResult" })
export type ToolResultValue = Schema.Schema.Type<typeof ToolResultValue>
export const ToolCallPart = TypeStruct("tool-call", "LLM.Content.ToolCall", {
id: Schema.String,
name: Schema.String,
input: Schema.Unknown,
providerExecuted: Schema.optional(Schema.Boolean),
metadata: Schema.optional(Schema.Record(Schema.String, Schema.Unknown)),
})
export type ToolCallPart = Schema.Schema.Type<typeof ToolCallPart>
export const ToolResultPart = TypeStruct("tool-result", "LLM.Content.ToolResult", {
id: Schema.String,
name: Schema.String,
result: ToolResultValue,
providerExecuted: Schema.optional(Schema.Boolean),
metadata: Schema.optional(Schema.Record(Schema.String, Schema.Unknown)),
})
export type ToolResultPart = Schema.Schema.Type<typeof ToolResultPart>
export const ReasoningPart = TypeStruct("reasoning", "LLM.Content.Reasoning", {
text: Schema.String,
encrypted: Schema.optional(Schema.String),
metadata: Schema.optional(Schema.Record(Schema.String, Schema.Unknown)),
})
export type ReasoningPart = Schema.Schema.Type<typeof ReasoningPart>
export const ContentPart = Schema.Union([TextPart, MediaPart, ToolCallPart, ToolResultPart, ReasoningPart]).pipe(
Schema.toTaggedUnion("type"),
)
export type ContentPart = Schema.Schema.Type<typeof ContentPart>
export class Message extends Schema.Class<Message>("LLM.Message")({
id: Schema.optional(Schema.String),
role: MessageRole,
content: Schema.Array(ContentPart),
metadata: Schema.optional(Schema.Record(Schema.String, Schema.Unknown)),
native: Schema.optional(Schema.Record(Schema.String, Schema.Unknown)),
}) {}
export class ToolDefinition extends Schema.Class<ToolDefinition>("LLM.ToolDefinition")({
name: Schema.String,
description: Schema.String,
inputSchema: JsonSchema,
metadata: Schema.optional(Schema.Record(Schema.String, Schema.Unknown)),
native: Schema.optional(Schema.Record(Schema.String, Schema.Unknown)),
}) {}
export class ToolChoice extends Schema.Class<ToolChoice>("LLM.ToolChoice")({
type: Schema.Literals(["auto", "none", "required", "tool"]),
name: Schema.optional(Schema.String),
}) {}
export class GenerationOptions extends Schema.Class<GenerationOptions>("LLM.GenerationOptions")({
maxTokens: Schema.optional(Schema.Number),
temperature: Schema.optional(Schema.Number),
topP: Schema.optional(Schema.Number),
stop: Schema.optional(Schema.Array(Schema.String)),
}) {}
export class ReasoningIntent extends Schema.Class<ReasoningIntent>("LLM.ReasoningIntent")({
enabled: Schema.Boolean,
effort: Schema.optional(ReasoningEffort),
summary: Schema.optional(Schema.Boolean),
encryptedContent: Schema.optional(Schema.Boolean),
}) {}
export class CacheIntent extends Schema.Class<CacheIntent>("LLM.CacheIntent")({
enabled: Schema.Boolean,
key: Schema.optional(Schema.String),
}) {}
export const ResponseFormat = Schema.Union([
TypeStruct("text", "LLM.ResponseFormat.Text", {}),
TypeStruct("json", "LLM.ResponseFormat.Json", { schema: JsonSchema }),
TypeStruct("tool", "LLM.ResponseFormat.Tool", { tool: ToolDefinition }),
]).pipe(Schema.toTaggedUnion("type"))
export type ResponseFormat = Schema.Schema.Type<typeof ResponseFormat>
export class LLMRequest extends Schema.Class<LLMRequest>("LLM.Request")({
id: Schema.optional(Schema.String),
model: ModelRef,
system: Schema.Array(SystemPart),
messages: Schema.Array(Message),
tools: Schema.Array(ToolDefinition),
toolChoice: Schema.optional(ToolChoice),
generation: GenerationOptions,
reasoning: Schema.optional(ReasoningIntent),
cache: Schema.optional(CacheIntent),
responseFormat: Schema.optional(ResponseFormat),
metadata: Schema.optional(Schema.Record(Schema.String, Schema.Unknown)),
native: Schema.optional(Schema.Record(Schema.String, Schema.Unknown)),
}) {}
export class Usage extends Schema.Class<Usage>("LLM.Usage")({
inputTokens: Schema.optional(Schema.Number),
outputTokens: Schema.optional(Schema.Number),
reasoningTokens: Schema.optional(Schema.Number),
cacheReadInputTokens: Schema.optional(Schema.Number),
cacheWriteInputTokens: Schema.optional(Schema.Number),
totalTokens: Schema.optional(Schema.Number),
native: Schema.optional(Schema.Record(Schema.String, Schema.Unknown)),
}) {}
export const RequestStart = TypeStruct("request-start", "LLM.Event.RequestStart", {
id: Schema.String,
model: ModelRef,
})
export type RequestStart = Schema.Schema.Type<typeof RequestStart>
export const StepStart = TypeStruct("step-start", "LLM.Event.StepStart", {
index: Schema.Number,
})
export type StepStart = Schema.Schema.Type<typeof StepStart>
export const TextStart = TypeStruct("text-start", "LLM.Event.TextStart", {
id: Schema.String,
})
export type TextStart = Schema.Schema.Type<typeof TextStart>
export const TextDelta = TypeStruct("text-delta", "LLM.Event.TextDelta", {
id: Schema.optional(Schema.String),
text: Schema.String,
metadata: Schema.optional(Schema.Record(Schema.String, Schema.Unknown)),
})
export type TextDelta = Schema.Schema.Type<typeof TextDelta>
export const TextEnd = TypeStruct("text-end", "LLM.Event.TextEnd", {
id: Schema.String,
})
export type TextEnd = Schema.Schema.Type<typeof TextEnd>
export const ReasoningDelta = TypeStruct("reasoning-delta", "LLM.Event.ReasoningDelta", {
id: Schema.optional(Schema.String),
text: Schema.String,
encrypted: Schema.optional(Schema.String),
metadata: Schema.optional(Schema.Record(Schema.String, Schema.Unknown)),
})
export type ReasoningDelta = Schema.Schema.Type<typeof ReasoningDelta>
export const ToolInputDelta = TypeStruct("tool-input-delta", "LLM.Event.ToolInputDelta", {
id: Schema.String,
name: Schema.String,
text: Schema.String,
})
export type ToolInputDelta = Schema.Schema.Type<typeof ToolInputDelta>
export const ToolCall = TypeStruct("tool-call", "LLM.Event.ToolCall", {
id: Schema.String,
name: Schema.String,
input: Schema.Unknown,
providerExecuted: Schema.optional(Schema.Boolean),
metadata: Schema.optional(Schema.Record(Schema.String, Schema.Unknown)),
})
export type ToolCall = Schema.Schema.Type<typeof ToolCall>
export const ToolResult = TypeStruct("tool-result", "LLM.Event.ToolResult", {
id: Schema.String,
name: Schema.String,
result: ToolResultValue,
providerExecuted: Schema.optional(Schema.Boolean),
})
export type ToolResult = Schema.Schema.Type<typeof ToolResult>
export const ToolError = TypeStruct("tool-error", "LLM.Event.ToolError", {
id: Schema.String,
name: Schema.String,
message: Schema.String,
})
export type ToolError = Schema.Schema.Type<typeof ToolError>
export const StepFinish = TypeStruct("step-finish", "LLM.Event.StepFinish", {
index: Schema.Number,
reason: FinishReason,
usage: Schema.optional(Usage),
})
export type StepFinish = Schema.Schema.Type<typeof StepFinish>
export const RequestFinish = TypeStruct("request-finish", "LLM.Event.RequestFinish", {
reason: FinishReason,
usage: Schema.optional(Usage),
})
export type RequestFinish = Schema.Schema.Type<typeof RequestFinish>
export const ProviderErrorEvent = TypeStruct("provider-error", "LLM.Event.ProviderError", {
message: Schema.String,
retryable: Schema.optional(Schema.Boolean),
})
export type ProviderErrorEvent = Schema.Schema.Type<typeof ProviderErrorEvent>
const llmEventTagged = Schema.Union([
RequestStart,
StepStart,
TextStart,
TextDelta,
TextEnd,
ReasoningDelta,
ToolInputDelta,
ToolCall,
ToolResult,
ToolError,
StepFinish,
RequestFinish,
ProviderErrorEvent,
]).pipe(Schema.toTaggedUnion("type"))
/**
* camelCase aliases for `LLMEvent.guards` (provided by `Schema.toTaggedUnion`).
* Lets consumers write `events.filter(LLMEvent.is.toolCall)` instead of
* `events.filter(LLMEvent.guards["tool-call"])`.
*/
export const LLMEvent = Object.assign(llmEventTagged, {
is: {
requestStart: llmEventTagged.guards["request-start"],
stepStart: llmEventTagged.guards["step-start"],
textStart: llmEventTagged.guards["text-start"],
textDelta: llmEventTagged.guards["text-delta"],
textEnd: llmEventTagged.guards["text-end"],
reasoningDelta: llmEventTagged.guards["reasoning-delta"],
toolInputDelta: llmEventTagged.guards["tool-input-delta"],
toolCall: llmEventTagged.guards["tool-call"],
toolResult: llmEventTagged.guards["tool-result"],
toolError: llmEventTagged.guards["tool-error"],
stepFinish: llmEventTagged.guards["step-finish"],
requestFinish: llmEventTagged.guards["request-finish"],
providerError: llmEventTagged.guards["provider-error"],
},
})
export type LLMEvent = Schema.Schema.Type<typeof llmEventTagged>
export class PatchTrace extends Schema.Class<PatchTrace>("LLM.PatchTrace")({
id: Schema.String,
phase: PatchPhase,
reason: Schema.String,
}) {}
export class PreparedRequest extends Schema.Class<PreparedRequest>("LLM.PreparedRequest")({
id: Schema.String,
adapter: Schema.String,
model: ModelRef,
target: Schema.Unknown,
redactedTarget: Schema.Unknown,
patchTrace: Schema.Array(PatchTrace),
metadata: Schema.optional(Schema.Record(Schema.String, Schema.Unknown)),
}) {}
/**
* A `PreparedRequest` whose `target` is typed as `Target`. Use with the
* generic on `LLMClient.prepare<Target>(...)` when the caller knows which
* adapter their request will resolve to and wants its native shape statically
* exposed (debug UIs, request previews, plan rendering).
*
* The runtime payload is identical — the adapter still emits `target: unknown`
* — so this is a type-level assertion the caller makes about what they expect
* to find. The prepare runtime does not validate the assertion.
*/
export type PreparedRequestOf<Target> = Omit<PreparedRequest, "target"> & {
readonly target: Target
}
export class LLMResponse extends Schema.Class<LLMResponse>("LLM.Response")({
events: Schema.Array(LLMEvent),
usage: Schema.optional(Usage),
}) {}
export class InvalidRequestError extends Schema.TaggedErrorClass<InvalidRequestError>()("LLM.InvalidRequestError", {
message: Schema.String,
}) {}
export class NoAdapterError extends Schema.TaggedErrorClass<NoAdapterError>()("LLM.NoAdapterError", {
protocol: ProtocolID,
provider: ProviderID,
model: ModelID,
}) {
override get message() {
return `No LLM adapter for ${this.provider}/${this.model} using ${this.protocol}`
}
}
export class ProviderChunkError extends Schema.TaggedErrorClass<ProviderChunkError>()("LLM.ProviderChunkError", {
adapter: Schema.String,
message: Schema.String,
raw: Schema.optional(Schema.String),
}) {}
export class ProviderRequestError extends Schema.TaggedErrorClass<ProviderRequestError>()("LLM.ProviderRequestError", {
status: Schema.Number,
message: Schema.String,
body: Schema.optional(Schema.String),
}) {}
export class TransportError extends Schema.TaggedErrorClass<TransportError>()("LLM.TransportError", {
message: Schema.String,
// Optional originating reason — populated for structured HTTP transport
// failures (e.g. `RequestError`, `ResponseError`, `IsTimeoutError`) so
// consumers can render the underlying cause without parsing the message.
reason: Schema.optional(Schema.String),
// Optional URL of the failing request when the transport layer surfaces it.
url: Schema.optional(Schema.String),
}) {}
/**
* Failure type for tool execute handlers. Handlers must map their internal
* errors to this shape; the runtime catches `ToolFailure`s and surfaces them
* as `tool-error` events plus a `tool-result` of `type: "error"` so the model
* can self-correct.
*
* Anything thrown or yielded by a handler that is not a `ToolFailure` is
* treated as a defect and fails the stream.
*/
export class ToolFailure extends Schema.TaggedErrorClass<ToolFailure>()("LLM.ToolFailure", {
message: Schema.String,
metadata: Schema.optional(Schema.Record(Schema.String, Schema.Unknown)),
}) {}
export type LLMError =
| InvalidRequestError
| NoAdapterError
| ProviderChunkError
| ProviderRequestError
| TransportError

View File

@@ -0,0 +1,223 @@
import { Effect, Stream } from "effect"
import type { Concurrency } from "effect/Types"
import type { LLMClient } from "./adapter"
import type { RequestExecutor } from "./executor"
import * as LLM from "./llm"
import {
type ContentPart,
type FinishReason,
type LLMError,
type LLMEvent,
type LLMRequest,
type ToolCallPart,
type ToolResultValue,
} from "./schema"
import { ToolFailure } from "./schema"
import { type AnyTool, type Tools, toDefinitions } from "./tool"
export interface RuntimeState {
readonly step: number
readonly request: LLMRequest
}
export interface RunOptions<T extends Tools> {
readonly request: LLMRequest
readonly tools: T
/**
* Maximum number of model round-trips before the runtime stops emitting new
* requests. Defaults to 10. Reaching this limit is not an error — the loop
* simply stops and the last `request-finish` event is the terminal signal.
*/
readonly maxSteps?: number
/**
* How many tool handlers to dispatch in parallel within a single step.
* Defaults to 10. Use `"unbounded"` only when handlers do not share an
* external dependency that can be saturated (rate-limited APIs, single
* connections, etc).
*/
readonly concurrency?: Concurrency
/**
* Optional predicate evaluated after each step's `request-finish` event. If
* it returns `true`, the loop stops even if the model wanted to continue.
*/
readonly stopWhen?: (state: RuntimeState) => boolean
}
/**
* Run a model with a typed tool record. The runtime streams the model, on
* each `tool-call` event decodes the input against the tool's `parameters`
* Schema, dispatches to the matching handler, encodes the handler's result
* against the tool's `success` Schema, and emits a `tool-result` event. When
* the model finishes with `tool-calls`, the runtime appends the assistant +
* tool messages and re-streams. Stops on a non-`tool-calls` finish, when
* `maxSteps` is reached, or when `stopWhen` returns `true`.
*
* Tool handler dependencies are closed over at tool definition time, so the
* runtime's only environment requirement is the `RequestExecutor.Service`.
*/
export const run = <T extends Tools>(
client: LLMClient,
options: RunOptions<T>,
): Stream.Stream<LLMEvent, LLMError, RequestExecutor.Service> => {
const maxSteps = options.maxSteps ?? 10
const concurrency = options.concurrency ?? 10
const tools = options.tools as Tools
const runtimeTools = toDefinitions(tools)
const initialRequest = LLM.updateRequest(options.request, {
tools: [
...options.request.tools.filter((tool) => !runtimeTools.some((runtimeTool) => runtimeTool.name === tool.name)),
...runtimeTools,
],
})
const loop = (request: LLMRequest, step: number): Stream.Stream<LLMEvent, LLMError, RequestExecutor.Service> =>
Stream.unwrap(
Effect.gen(function* () {
const state: StepState = { assistantContent: [], toolCalls: [], finishReason: undefined }
const modelStream = client.stream(request).pipe(
Stream.tap((event) => Effect.sync(() => accumulate(state, event))),
)
const continuation = Stream.unwrap(
Effect.gen(function* () {
if (state.finishReason !== "tool-calls" || state.toolCalls.length === 0) return Stream.empty
if (options.stopWhen?.({ step, request })) return Stream.empty
if (step + 1 >= maxSteps) return Stream.empty
const dispatched = yield* Effect.forEach(
state.toolCalls,
(call) => dispatch(tools, call).pipe(Effect.map((result) => [call, result] as const)),
{ concurrency },
)
const followUp = LLM.updateRequest(request, {
messages: [
...request.messages,
LLM.assistant(state.assistantContent),
...dispatched.map(([call, result]) =>
LLM.toolMessage({ id: call.id, name: call.name, result }),
),
],
})
return Stream.fromIterable(dispatched.flatMap(([call, result]) => emitEvents(call, result))).pipe(
Stream.concat(loop(followUp, step + 1)),
)
}),
)
return modelStream.pipe(Stream.concat(continuation))
}),
)
return loop(initialRequest, 0)
}
interface StepState {
assistantContent: ContentPart[]
toolCalls: ToolCallPart[]
finishReason: FinishReason | undefined
}
const accumulate = (state: StepState, event: LLMEvent) => {
if (event.type === "text-delta") {
appendStreamingText(state, "text", event.text, { metadata: event.metadata })
return
}
if (event.type === "reasoning-delta") {
appendStreamingText(state, "reasoning", event.text, { encrypted: event.encrypted, metadata: event.metadata })
return
}
if (event.type === "tool-call") {
const part = LLM.toolCall({
id: event.id,
name: event.name,
input: event.input,
providerExecuted: event.providerExecuted,
metadata: event.metadata,
})
state.assistantContent.push(part)
// Provider-executed tools are dispatched by the provider; the runtime must
// not invoke a client handler. The matching `tool-result` event arrives
// later in the same stream and is folded into `assistantContent` so the
// next round's message history carries it.
if (!event.providerExecuted) state.toolCalls.push(part)
return
}
if (event.type === "tool-result" && event.providerExecuted) {
state.assistantContent.push(LLM.toolResult({
id: event.id,
name: event.name,
result: event.result,
providerExecuted: true,
}))
return
}
if (event.type === "request-finish") {
state.finishReason = event.reason
}
}
const appendStreamingText = (
state: StepState,
type: "text" | "reasoning",
text: string,
options: { readonly encrypted?: string; readonly metadata?: Record<string, unknown> } = {},
) => {
const last = state.assistantContent.at(-1)
const canMergeSignedReasoning = type === "reasoning" && text === "" && options.encrypted && last?.type === "reasoning"
const canMergeText = last?.type === type && !options.metadata && !last.metadata && !options.encrypted
if (canMergeSignedReasoning || canMergeText) {
state.assistantContent[state.assistantContent.length - 1] = {
...last,
text: `${last.text}${text}`,
...(type === "reasoning" && options.encrypted ? { encrypted: options.encrypted } : {}),
metadata: options.metadata ? { ...(last.metadata ?? {}), ...options.metadata } : last.metadata,
}
return
}
state.assistantContent.push({
type,
text,
...(type === "reasoning" && options.encrypted ? { encrypted: options.encrypted } : {}),
...(options.metadata ? { metadata: options.metadata } : {}),
})
}
const dispatch = (tools: Tools, call: ToolCallPart): Effect.Effect<ToolResultValue> => {
const tool = tools[call.name]
if (!tool) return Effect.succeed({ type: "error" as const, value: `Unknown tool: ${call.name}` })
return decodeAndExecute(tool, call.input).pipe(
Effect.catchTag("LLM.ToolFailure", (failure) =>
Effect.succeed({ type: "error" as const, value: failure.message } satisfies ToolResultValue),
),
)
}
const decodeAndExecute = (tool: AnyTool, input: unknown): Effect.Effect<ToolResultValue, ToolFailure> =>
tool._decode(input).pipe(
Effect.mapError((error) => new ToolFailure({ message: `Invalid tool input: ${error.message}` })),
Effect.flatMap((decoded) => tool.execute(decoded)),
Effect.flatMap((value) =>
tool._encode(value).pipe(
Effect.mapError(
(error) =>
new ToolFailure({
message: `Tool returned an invalid value for its success schema: ${error.message}`,
}),
),
),
),
Effect.map((encoded): ToolResultValue => ({ type: "json", value: encoded })),
)
const emitEvents = (call: ToolCallPart, result: ToolResultValue): ReadonlyArray<LLMEvent> =>
result.type === "error"
? [
{ type: "tool-error", id: call.id, name: call.name, message: String(result.value) },
{ type: "tool-result", id: call.id, name: call.name, result },
]
: [{ type: "tool-result", id: call.id, name: call.name, result }]
export * as ToolRuntime from "./tool-runtime"

107
packages/llm/src/tool.ts Normal file
View File

@@ -0,0 +1,107 @@
import { Effect, Schema } from "effect"
import type { ToolDefinition as ToolDefinitionClass } from "./schema"
import { ToolDefinition, ToolFailure } from "./schema"
/**
* Schema constraint for tool parameters / success values: no decoding or
* encoding services are allowed. Tools should be self-contained — anything
* beyond pure data transformation belongs in the handler closure.
*/
export type ToolSchema<T> = Schema.Codec<T, any, never, never>
/**
* A type-safe LLM tool. Each tool bundles its own description, parameter
* Schema, success Schema, and execute handler. The handler closes over any
* services it needs at construction time, so the runtime never sees per-tool
* dependencies.
*
* Errors must be expressed as `ToolFailure`. Unmapped errors and defects fail
* the stream.
*
* Internally each tool also carries memoized codecs and a precomputed
* `ToolDefinition` so the runtime doesn't rebuild them per invocation.
*/
export interface Tool<Parameters extends ToolSchema<any>, Success extends ToolSchema<any>> {
readonly description: string
readonly parameters: Parameters
readonly success: Success
readonly execute: (
params: Schema.Schema.Type<Parameters>,
) => Effect.Effect<Schema.Schema.Type<Success>, ToolFailure>
/** @internal */
readonly _decode: (input: unknown) => Effect.Effect<Schema.Schema.Type<Parameters>, Schema.SchemaError>
/** @internal */
readonly _encode: (value: Schema.Schema.Type<Success>) => Effect.Effect<unknown, Schema.SchemaError>
/** @internal */
readonly _definition: ToolDefinitionClass
}
export type AnyTool = Tool<ToolSchema<any>, ToolSchema<any>>
/**
* Constructs a typed tool. The Schema codecs and JSON-schema-shaped
* `ToolDefinition` are derived once at this call site so the runtime can
* reuse them across every invocation without recomputing.
*
* ```ts
* const getWeather = tool({
* description: "Get current weather",
* parameters: Schema.Struct({ city: Schema.String }),
* success: Schema.Struct({ temperature: Schema.Number }),
* execute: ({ city }) => Effect.succeed({ temperature: 22 }),
* })
* ```
*/
export const tool = <Parameters extends ToolSchema<any>, Success extends ToolSchema<any>>(config: {
readonly description: string
readonly parameters: Parameters
readonly success: Success
readonly execute: (
params: Schema.Schema.Type<Parameters>,
) => Effect.Effect<Schema.Schema.Type<Success>, ToolFailure>
}): Tool<Parameters, Success> => ({
description: config.description,
parameters: config.parameters,
success: config.success,
execute: config.execute,
_decode: Schema.decodeUnknownEffect(config.parameters),
_encode: Schema.encodeEffect(config.success),
_definition: new ToolDefinition({
name: "",
description: config.description,
inputSchema: toJsonSchema(config.parameters),
}),
})
/**
* A record of named tools. The record key becomes the tool name on the wire.
*/
export type Tools = Record<string, AnyTool>
/**
* Convert a tools record into the `ToolDefinition[]` shape that
* `LLMRequest.tools` expects. The runtime calls this internally; consumers
* that build `LLMRequest` themselves can use it too.
*
* Tool names come from the record keys, so the per-tool cached
* `_definition` is rebuilt with the correct name here. The JSON Schema body
* is reused.
*/
export const toDefinitions = (tools: Tools): ReadonlyArray<ToolDefinitionClass> =>
Object.entries(tools).map(([name, item]) =>
new ToolDefinition({
name,
description: item._definition.description,
inputSchema: item._definition.inputSchema,
}),
)
const toJsonSchema = (schema: Schema.Top): Record<string, unknown> => {
const document = Schema.toJsonSchemaDocument(schema)
if (Object.keys(document.definitions).length === 0) return document.schema as Record<string, unknown>
return { ...document.schema, $defs: document.definitions } as Record<string, unknown>
}
export { ToolFailure }
export * as Tool from "./tool"

View File

@@ -0,0 +1,312 @@
import { describe, expect } from "bun:test"
import { Effect, Schema, Stream } from "effect"
import { HttpClientRequest } from "effect/unstable/http"
import { LLM } from "../src"
import { Adapter, LLMClient } from "../src/adapter"
import { Patch } from "../src/patch"
import type { LLMRequest, Message, ModelRef, ToolDefinition } from "../src/schema"
import { testEffect } from "./lib/effect"
import { dynamicResponse } from "./lib/http"
const updateMessageContent = (message: Message, content: Message["content"]) =>
LLM.message({
id: message.id,
role: message.role,
content,
metadata: message.metadata,
native: message.native,
})
const updateModel = (model: ModelRef, patch: Partial<LLM.ModelInput>) =>
LLM.model({
id: model.id,
provider: model.provider,
protocol: model.protocol,
baseURL: model.baseURL,
headers: model.headers,
capabilities: model.capabilities,
limits: model.limits,
native: model.native,
...patch,
})
const updateToolDefinition = (tool: ToolDefinition, patch: Partial<ToolDefinition>) =>
LLM.toolDefinition({
name: tool.name,
description: tool.description,
inputSchema: tool.inputSchema,
metadata: tool.metadata,
native: tool.native,
...patch,
})
const mapText = (fn: (text: string) => string) => (request: LLMRequest): LLMRequest =>
LLM.updateRequest(request, {
messages: request.messages.map((message) =>
updateMessageContent(
message,
message.content.map((part) => (part.type === "text" ? { ...part, text: fn(part.text) } : part)),
),
),
})
const Json = Schema.fromJsonString(Schema.Unknown)
const encodeJson = Schema.encodeSync(Json)
type FakeDraft = {
readonly body: string
readonly includeUsage?: boolean
}
const FakeChunk = Schema.Union([
Schema.Struct({ type: Schema.Literal("text"), text: Schema.String }),
Schema.Struct({ type: Schema.Literal("finish"), reason: Schema.Literal("stop") }),
])
type FakeChunk = Schema.Schema.Type<typeof FakeChunk>
const FakeChunks = Schema.Array(FakeChunk)
const request = LLM.request({
id: "req_1",
model: LLM.model({
id: "fake-model",
provider: "fake-provider",
protocol: "openai-chat",
}),
prompt: "hello",
})
const raiseChunk = (chunk: FakeChunk): import("../src/schema").LLMEvent =>
chunk.type === "finish"
? { type: "request-finish", reason: chunk.reason }
: { type: "text-delta", text: chunk.text }
const fake = Adapter.unsafe<FakeDraft, FakeDraft>({
id: "fake",
protocol: "openai-chat",
redact: (target) => ({ ...target, redacted: true }),
validate: (draft) => Effect.succeed(draft),
prepare: (request) =>
Effect.succeed({
body: [
...request.messages
.flatMap((message) => message.content)
.filter((part) => part.type === "text")
.map((part) => part.text),
...request.tools.map((tool) => `tool:${tool.name}:${tool.description}`),
].join("\n"),
}),
toHttp: (target) =>
Effect.succeed(
HttpClientRequest.post("https://fake.local/chat").pipe(
HttpClientRequest.setHeader("content-type", "application/json"),
HttpClientRequest.bodyText(encodeJson(target), "application/json"),
),
),
parse: (response) =>
Stream.fromEffect(
response.json.pipe(
Effect.flatMap(Schema.decodeUnknownEffect(FakeChunks)),
Effect.orDie,
),
).pipe(
Stream.flatMap(Stream.fromIterable),
Stream.map(raiseChunk),
),
})
const gemini = Adapter.unsafe<FakeDraft, FakeDraft>({
...fake,
id: "gemini-fake",
protocol: "gemini",
})
const echoLayer = dynamicResponse(({ text, respond }) =>
Effect.succeed(
respond(
encodeJson([
{ type: "text", text: `echo:${text}` },
{ type: "finish", reason: "stop" },
]),
),
),
)
const it = testEffect(echoLayer)
describe("llm adapter", () => {
it.effect("prepare applies target patches with trace", () =>
Effect.gen(function* () {
const prepared = yield* LLMClient.make({
adapters: [
fake.withPatches([
fake.patch("include-usage", {
reason: "fake target patch",
apply: (draft) => ({ ...draft, includeUsage: true }),
}),
]),
],
}).prepare(request)
expect(prepared.redactedTarget).toEqual({ body: "hello", includeUsage: true, redacted: true })
expect(prepared.patchTrace.map((item) => item.id)).toEqual(["target.fake.include-usage"])
}),
)
it.effect("stream and generate use the adapter pipeline", () =>
Effect.gen(function* () {
const llm = LLMClient.make({ adapters: [fake] })
const events = Array.from(yield* llm.stream(request).pipe(Stream.runCollect))
const response = yield* llm.generate(request)
expect(events.map((event) => event.type)).toEqual(["text-delta", "request-finish"])
expect(response.events.map((event) => event.type)).toEqual(["text-delta", "request-finish"])
}),
)
it.effect("selects adapters by request protocol", () =>
Effect.gen(function* () {
const prepared = yield* LLMClient.make({ adapters: [fake, gemini] }).prepare(
LLM.updateRequest(request, { model: updateModel(request.model, { protocol: "gemini" }) }),
)
expect(prepared.adapter).toBe("gemini-fake")
}),
)
it.effect("request, prompt, and tool-schema patches run before adapter prepare", () =>
Effect.gen(function* () {
const prepared = yield* LLMClient.make({
adapters: [fake],
patches: [
Patch.request("test.id", {
reason: "rewrite request id",
apply: (request) => LLM.updateRequest(request, { id: "req_patched" }),
}),
Patch.prompt("test.message", {
reason: "rewrite prompt text",
apply: mapText(() => "patched"),
}),
Patch.toolSchema("test.description", {
reason: "rewrite tool description",
apply: (tool) => updateToolDefinition(tool, { description: "patched tool" }),
}),
],
}).prepare(
LLM.updateRequest(request, {
tools: [{ name: "lookup", description: "original", inputSchema: {} }],
}),
)
expect(prepared.id).toBe("req_patched")
expect(prepared.target).toEqual({ body: "patched\ntool:lookup:patched tool" })
expect(prepared.patchTrace.map((item) => item.id)).toEqual([
"request.test.id",
"prompt.test.message",
"schema.test.description",
])
}),
)
it.effect("request patches feed into prompt-patch predicates so phases see updated context", () =>
Effect.gen(function* () {
const prepared = yield* LLMClient.make({
adapters: [fake],
patches: [
// Earlier phase rewrites the provider, later phase only fires for the
// rewritten provider. If `compile` re-uses a stale PatchContext this
// test fails because the prompt patch's `when` would not match.
Patch.request("rewrite-provider", {
reason: "swap provider before prompt phase",
apply: (request) => LLM.updateRequest(request, { model: updateModel(request.model, { provider: "rewritten" }) }),
}),
Patch.prompt("rewrite-only-when-rewritten", {
reason: "rewrite prompt text only after provider swap",
when: (ctx) => ctx.model.provider === "rewritten",
apply: mapText((text) => `rewrote-${text}`),
}),
],
}).prepare(request)
expect(prepared.target).toEqual({ body: "rewrote-hello" })
expect(prepared.patchTrace.map((item) => item.id)).toEqual([
"request.rewrite-provider",
"prompt.rewrite-only-when-rewritten",
])
}),
)
it.effect("patches with the same order sort by id for deterministic application", () =>
Effect.gen(function* () {
const prepared = yield* LLMClient.make({
adapters: [fake],
patches: [
Patch.prompt("zeta", {
reason: "later id",
order: 1,
apply: mapText((text) => `${text}|zeta`),
}),
Patch.prompt("alpha", {
reason: "earlier id",
order: 1,
apply: mapText((text) => `${text}|alpha`),
}),
],
}).prepare(request)
expect(prepared.target).toEqual({ body: "hello|alpha|zeta" })
}),
)
it.effect("stream patches transform raised events", () =>
Effect.gen(function* () {
const llm = LLMClient.make({
adapters: [fake],
patches: [
Patch.stream("test.uppercase", {
reason: "uppercase text deltas",
apply: (event) => (event.type === "text-delta" ? { ...event, text: event.text.toUpperCase() } : event),
}),
],
})
const events = Array.from(yield* llm.stream(request).pipe(Stream.runCollect))
expect(events[0]).toEqual({ type: "text-delta", text: 'ECHO:{"BODY":"HELLO"}' })
}),
)
it.effect("stream patches transform multiple events per stream", () =>
Effect.gen(function* () {
// Verifies stream patches run on every event, not just the first.
const seen: string[] = []
const llm = LLMClient.make({
adapters: [fake],
patches: [
Patch.stream("test.tap", {
reason: "record every event type",
apply: (event) => {
seen.push(event.type)
return event
},
}),
],
})
yield* llm.stream(request).pipe(Stream.runDrain)
expect(seen).toEqual(["text-delta", "request-finish"])
}),
)
it.effect("rejects protocol mismatch", () =>
Effect.gen(function* () {
const error = yield* LLMClient.make({ adapters: [fake] })
.prepare(
LLM.updateRequest(request, { model: updateModel(request.model, { protocol: "gemini" }) }),
)
.pipe(Effect.flip)
expect(error.message).toContain("No LLM adapter")
}),
)
})

View File

@@ -0,0 +1,32 @@
{
"version": 1,
"metadata": {
"name": "anthropic-messages/streams-text",
"recordedAt": "2026-04-28T21:18:45.535Z",
"tags": [
"prefix:anthropic-messages",
"provider:anthropic",
"protocol:anthropic-messages"
]
},
"interactions": [
{
"request": {
"method": "POST",
"url": "https://api.anthropic.com/v1/messages",
"headers": {
"anthropic-version": "2023-06-01",
"content-type": "application/json"
},
"body": "{\"model\":\"claude-haiku-4-5-20251001\",\"system\":[{\"type\":\"text\",\"text\":\"You are concise.\"}],\"messages\":[{\"role\":\"user\",\"content\":[{\"type\":\"text\",\"text\":\"Reply with exactly: Hello!\"}]}],\"stream\":true,\"max_tokens\":20,\"temperature\":0}"
},
"response": {
"status": 200,
"headers": {
"content-type": "text/event-stream; charset=utf-8"
},
"body": "event: message_start\ndata: {\"type\":\"message_start\",\"message\":{\"model\":\"claude-haiku-4-5-20251001\",\"id\":\"msg_01UodR8c3ezAK8rAfi8HAs8g\",\"type\":\"message\",\"role\":\"assistant\",\"content\":[],\"stop_reason\":null,\"stop_sequence\":null,\"stop_details\":null,\"usage\":{\"input_tokens\":18,\"cache_creation_input_tokens\":0,\"cache_read_input_tokens\":0,\"cache_creation\":{\"ephemeral_5m_input_tokens\":0,\"ephemeral_1h_input_tokens\":0},\"output_tokens\":2,\"service_tier\":\"standard\",\"inference_geo\":\"not_available\"}} }\n\nevent: content_block_start\ndata: {\"type\":\"content_block_start\",\"index\":0,\"content_block\":{\"type\":\"text\",\"text\":\"\"} }\n\nevent: ping\ndata: {\"type\": \"ping\"}\n\nevent: content_block_delta\ndata: {\"type\":\"content_block_delta\",\"index\":0,\"delta\":{\"type\":\"text_delta\",\"text\":\"Hello!\"} }\n\nevent: content_block_stop\ndata: {\"type\":\"content_block_stop\",\"index\":0 }\n\nevent: message_delta\ndata: {\"type\":\"message_delta\",\"delta\":{\"stop_reason\":\"end_turn\",\"stop_sequence\":null,\"stop_details\":null},\"usage\":{\"input_tokens\":18,\"cache_creation_input_tokens\":0,\"cache_read_input_tokens\":0,\"output_tokens\":5} }\n\nevent: message_stop\ndata: {\"type\":\"message_stop\" }\n\n"
}
}
]
}

View File

@@ -0,0 +1,33 @@
{
"version": 1,
"metadata": {
"name": "anthropic-messages/streams-tool-call",
"recordedAt": "2026-04-28T21:18:46.878Z",
"tags": [
"prefix:anthropic-messages",
"provider:anthropic",
"protocol:anthropic-messages",
"tool"
]
},
"interactions": [
{
"request": {
"method": "POST",
"url": "https://api.anthropic.com/v1/messages",
"headers": {
"anthropic-version": "2023-06-01",
"content-type": "application/json"
},
"body": "{\"model\":\"claude-haiku-4-5-20251001\",\"system\":[{\"type\":\"text\",\"text\":\"Call tools exactly as requested.\"}],\"messages\":[{\"role\":\"user\",\"content\":[{\"type\":\"text\",\"text\":\"Call get_weather with city exactly Paris.\"}]}],\"tools\":[{\"name\":\"get_weather\",\"description\":\"Get current weather for a city.\",\"input_schema\":{\"type\":\"object\",\"properties\":{\"city\":{\"type\":\"string\"}},\"required\":[\"city\"],\"additionalProperties\":false}}],\"tool_choice\":{\"type\":\"tool\",\"name\":\"get_weather\"},\"stream\":true,\"max_tokens\":80,\"temperature\":0}"
},
"response": {
"status": 200,
"headers": {
"content-type": "text/event-stream; charset=utf-8"
},
"body": "event: message_start\ndata: {\"type\":\"message_start\",\"message\":{\"model\":\"claude-haiku-4-5-20251001\",\"id\":\"msg_01RYgU7NUPMK4B9v8S7gVpCS\",\"type\":\"message\",\"role\":\"assistant\",\"content\":[],\"stop_reason\":null,\"stop_sequence\":null,\"stop_details\":null,\"usage\":{\"input_tokens\":677,\"cache_creation_input_tokens\":0,\"cache_read_input_tokens\":0,\"cache_creation\":{\"ephemeral_5m_input_tokens\":0,\"ephemeral_1h_input_tokens\":0},\"output_tokens\":16,\"service_tier\":\"standard\",\"inference_geo\":\"not_available\"}} }\n\nevent: content_block_start\ndata: {\"type\":\"content_block_start\",\"index\":0,\"content_block\":{\"type\":\"tool_use\",\"id\":\"toolu_012rmAruviySvUXSjgCPWVRu\",\"name\":\"get_weather\",\"input\":{},\"caller\":{\"type\":\"direct\"}} }\n\nevent: ping\ndata: {\"type\": \"ping\"}\n\nevent: content_block_delta\ndata: {\"type\":\"content_block_delta\",\"index\":0,\"delta\":{\"type\":\"input_json_delta\",\"partial_json\":\"\"} }\n\nevent: content_block_delta\ndata: {\"type\":\"content_block_delta\",\"index\":0,\"delta\":{\"type\":\"input_json_delta\",\"partial_json\":\"{\\\"city\\\":\"} }\n\nevent: content_block_delta\ndata: {\"type\":\"content_block_delta\",\"index\":0,\"delta\":{\"type\":\"input_json_delta\",\"partial_json\":\" \\\"Paris\\\"}\"} }\n\nevent: content_block_stop\ndata: {\"type\":\"content_block_stop\",\"index\":0 }\n\nevent: message_delta\ndata: {\"type\":\"message_delta\",\"delta\":{\"stop_reason\":\"tool_use\",\"stop_sequence\":null,\"stop_details\":null},\"usage\":{\"input_tokens\":677,\"cache_creation_input_tokens\":0,\"cache_read_input_tokens\":0,\"output_tokens\":33} }\n\nevent: message_stop\ndata: {\"type\":\"message_stop\" }\n\n"
}
}
]
}

View File

@@ -0,0 +1,33 @@
{
"version": 1,
"metadata": {
"name": "bedrock-converse/streams-a-tool-call",
"recordedAt": "2026-04-28T21:18:46.929Z",
"tags": [
"prefix:bedrock-converse",
"provider:amazon-bedrock",
"protocol:bedrock-converse",
"tool"
]
},
"interactions": [
{
"request": {
"method": "POST",
"url": "https://bedrock-runtime.us-east-1.amazonaws.com/model/us.amazon.nova-micro-v1%3A0/converse-stream",
"headers": {
"content-type": "application/json"
},
"body": "{\"modelId\":\"us.amazon.nova-micro-v1:0\",\"messages\":[{\"role\":\"user\",\"content\":[{\"text\":\"Call get_weather with city exactly Paris.\"}]}],\"system\":[{\"text\":\"Call tools exactly as requested.\"}],\"inferenceConfig\":{\"maxTokens\":80,\"temperature\":0},\"toolConfig\":{\"tools\":[{\"toolSpec\":{\"name\":\"get_weather\",\"description\":\"Get current weather for a city.\",\"inputSchema\":{\"json\":{\"type\":\"object\",\"properties\":{\"city\":{\"type\":\"string\"}},\"required\":[\"city\"],\"additionalProperties\":false}}}}],\"toolChoice\":{\"tool\":{\"name\":\"get_weather\"}}}}"
},
"response": {
"status": 200,
"headers": {
"content-type": "application/vnd.amazon.eventstream"
},
"body": "AAAAuQAAAFL9kIXUCzpldmVudC10eXBlBwAMbWVzc2FnZVN0YXJ0DTpjb250ZW50LXR5cGUHABBhcHBsaWNhdGlvbi9qc29uDTptZXNzYWdlLXR5cGUHAAVldmVudHsicCI6ImFiY2RlZmdoaWprbG1ub3BxcnN0dXZ3eHl6QUJDREVGR0hJSktMTU5PUFFSU1RVVldYWVowMTIzNDU2NyIsInJvbGUiOiJhc3Npc3RhbnQifWf51EkAAAEMAAAAV56BJZoLOmV2ZW50LXR5cGUHABFjb250ZW50QmxvY2tTdGFydA06Y29udGVudC10eXBlBwAQYXBwbGljYXRpb24vanNvbg06bWVzc2FnZS10eXBlBwAFZXZlbnR7ImNvbnRlbnRCbG9ja0luZGV4IjowLCJwIjoiYWJjZGVmZ2hpamtsbW5vcHFyc3R1dnd4eXpBQkNERUZHSElKS0xNTk9QUVJTVFUiLCJzdGFydCI6eyJ0b29sVXNlIjp7Im5hbWUiOiJnZXRfd2VhdGhlciIsInRvb2xVc2VJZCI6InRvb2x1c2VfNmExcFB2bmM5OUdMS08zS0drVUEyTiJ9fX2LR7PFAAAA4gAAAFfCOY+BCzpldmVudC10eXBlBwARY29udGVudEJsb2NrRGVsdGENOmNvbnRlbnQtdHlwZQcAEGFwcGxpY2F0aW9uL2pzb24NOm1lc3NhZ2UtdHlwZQcABWV2ZW50eyJjb250ZW50QmxvY2tJbmRleCI6MCwiZGVsdGEiOnsidG9vbFVzZSI6eyJpbnB1dCI6IntcImNpdHlcIjpcIlBhcmlzXCJ9In19LCJwIjoiYWJjZGVmZ2hpamtsbW5vcHFyc3R1dnd4eXpBQkNERUZHSElKS0xNTiJ9RkW+2gAAAIcAAABW5OxHKgs6ZXZlbnQtdHlwZQcAEGNvbnRlbnRCbG9ja1N0b3ANOmNvbnRlbnQtdHlwZQcAEGFwcGxpY2F0aW9uL2pzb24NOm1lc3NhZ2UtdHlwZQcABWV2ZW50eyJjb250ZW50QmxvY2tJbmRleCI6MCwicCI6ImFiYyJ9y6nrtwAAAK4AAABRtlmf/As6ZXZlbnQtdHlwZQcAC21lc3NhZ2VTdG9wDTpjb250ZW50LXR5cGUHABBhcHBsaWNhdGlvbi9qc29uDTptZXNzYWdlLXR5cGUHAAVldmVudHsicCI6ImFiY2RlZmdoaWprbG1ub3BxcnN0dXZ3eHl6QUJDREVGR0hJSktMTU5PUFFSUyIsInN0b3BSZWFzb24iOiJ0b29sX3VzZSJ9MTlQawAAAOIAAABOplInQQs6ZXZlbnQtdHlwZQcACG1ldGFkYXRhDTpjb250ZW50LXR5cGUHABBhcHBsaWNhdGlvbi9qc29uDTptZXNzYWdlLXR5cGUHAAVldmVudHsibWV0cmljcyI6eyJsYXRlbmN5TXMiOjM1NX0sInAiOiJhYmNkZWZnaGlqayIsInVzYWdlIjp7ImlucHV0VG9rZW5zIjo0MTksIm91dHB1dFRva2VucyI6MTYsInNlcnZlclRvb2xVc2FnZSI6e30sInRvdGFsVG9rZW5zIjo0MzV9fU1tVJc=",
"bodyEncoding": "base64"
}
}
]
}

View File

@@ -0,0 +1,32 @@
{
"version": 1,
"metadata": {
"name": "bedrock-converse/streams-text",
"recordedAt": "2026-04-28T21:18:46.553Z",
"tags": [
"prefix:bedrock-converse",
"provider:amazon-bedrock",
"protocol:bedrock-converse"
]
},
"interactions": [
{
"request": {
"method": "POST",
"url": "https://bedrock-runtime.us-east-1.amazonaws.com/model/us.amazon.nova-micro-v1%3A0/converse-stream",
"headers": {
"content-type": "application/json"
},
"body": "{\"modelId\":\"us.amazon.nova-micro-v1:0\",\"messages\":[{\"role\":\"user\",\"content\":[{\"text\":\"Say hello.\"}]}],\"system\":[{\"text\":\"Reply with the single word 'Hello'.\"}],\"inferenceConfig\":{\"maxTokens\":16,\"temperature\":0}}"
},
"response": {
"status": 200,
"headers": {
"content-type": "application/vnd.amazon.eventstream"
},
"body": "AAAAmQAAAFI8UarQCzpldmVudC10eXBlBwAMbWVzc2FnZVN0YXJ0DTpjb250ZW50LXR5cGUHABBhcHBsaWNhdGlvbi9qc29uDTptZXNzYWdlLXR5cGUHAAVldmVudHsicCI6ImFiY2RlZmdoaWprbG1ub3BxcnN0dXZ3eHl6QUIiLCJyb2xlIjoiYXNzaXN0YW50In3SL1jNAAAAvQAAAFd4etebCzpldmVudC10eXBlBwARY29udGVudEJsb2NrRGVsdGENOmNvbnRlbnQtdHlwZQcAEGFwcGxpY2F0aW9uL2pzb24NOm1lc3NhZ2UtdHlwZQcABWV2ZW50eyJjb250ZW50QmxvY2tJbmRleCI6MCwiZGVsdGEiOnsidGV4dCI6IkhlbGxvIn0sInAiOiJhYmNkZWZnaGlqa2xtbm9wcXJzdHV2d3h5ekFCQ0RFIn2B0NR6AAAAxgAAAFf2eAZFCzpldmVudC10eXBlBwARY29udGVudEJsb2NrRGVsdGENOmNvbnRlbnQtdHlwZQcAEGFwcGxpY2F0aW9uL2pzb24NOm1lc3NhZ2UtdHlwZQcABWV2ZW50eyJjb250ZW50QmxvY2tJbmRleCI6MCwiZGVsdGEiOnsidGV4dCI6IiJ9LCJwIjoiYWJjZGVmZ2hpamtsbW5vcHFyc3R1dnd4eXpBQkNERUZHSElKS0xNTk9QUVJTIn3XaHMvAAAAhwAAAFbk7EcqCzpldmVudC10eXBlBwAQY29udGVudEJsb2NrU3RvcA06Y29udGVudC10eXBlBwAQYXBwbGljYXRpb24vanNvbg06bWVzc2FnZS10eXBlBwAFZXZlbnR7ImNvbnRlbnRCbG9ja0luZGV4IjowLCJwIjoiYWJjIn3Lqeu3AAAAjwAAAFFK+JlICzpldmVudC10eXBlBwALbWVzc2FnZVN0b3ANOmNvbnRlbnQtdHlwZQcAEGFwcGxpY2F0aW9uL2pzb24NOm1lc3NhZ2UtdHlwZQcABWV2ZW50eyJwIjoiYWJjZGVmZ2hpamtsbW4iLCJzdG9wUmVhc29uIjoiZW5kX3R1cm4ifZ+RQqEAAAECAAAATkXaMzsLOmV2ZW50LXR5cGUHAAhtZXRhZGF0YQ06Y29udGVudC10eXBlBwAQYXBwbGljYXRpb24vanNvbg06bWVzc2FnZS10eXBlBwAFZXZlbnR7Im1ldHJpY3MiOnsibGF0ZW5jeU1zIjozMDZ9LCJwIjoiYWJjZGVmZ2hpamtsbW5vcHFyc3R1dnd4eXpBQkNERUZHSElKS0xNTk9QUVJTVCIsInVzYWdlIjp7ImlucHV0VG9rZW5zIjoxMiwib3V0cHV0VG9rZW5zIjoyLCJzZXJ2ZXJUb29sVXNhZ2UiOnt9LCJ0b3RhbFRva2VucyI6MTR9fSnnkUk=",
"bodyEncoding": "base64"
}
}
]
}

View File

@@ -0,0 +1,31 @@
{
"version": 1,
"metadata": {
"name": "gemini/streams-text",
"recordedAt": "2026-04-28T21:18:47.483Z",
"tags": [
"prefix:gemini",
"provider:google",
"protocol:gemini"
]
},
"interactions": [
{
"request": {
"method": "POST",
"url": "https://generativelanguage.googleapis.com/v1beta/models/gemini-2.5-flash:streamGenerateContent?alt=sse",
"headers": {
"content-type": "application/json"
},
"body": "{\"contents\":[{\"role\":\"user\",\"parts\":[{\"text\":\"Reply with exactly: Hello!\"}]}],\"systemInstruction\":{\"parts\":[{\"text\":\"You are concise.\"}]},\"generationConfig\":{\"maxOutputTokens\":80,\"temperature\":0}}"
},
"response": {
"status": 200,
"headers": {
"content-type": "text/event-stream"
},
"body": "data: {\"candidates\": [{\"content\": {\"parts\": [{\"text\": \"Hello!\"}],\"role\": \"model\"},\"finishReason\": \"STOP\",\"index\": 0}],\"usageMetadata\": {\"promptTokenCount\": 11,\"candidatesTokenCount\": 2,\"totalTokenCount\": 29,\"promptTokensDetails\": [{\"modality\": \"TEXT\",\"tokenCount\": 11}],\"thoughtsTokenCount\": 16},\"modelVersion\": \"gemini-2.5-flash\",\"responseId\": \"NyTxaczMAZ-b_uMP6u--iQg\"}\r\n\r\n"
}
}
]
}

View File

@@ -0,0 +1,32 @@
{
"version": 1,
"metadata": {
"name": "gemini/streams-tool-call",
"recordedAt": "2026-04-28T21:18:48.285Z",
"tags": [
"prefix:gemini",
"provider:google",
"protocol:gemini",
"tool"
]
},
"interactions": [
{
"request": {
"method": "POST",
"url": "https://generativelanguage.googleapis.com/v1beta/models/gemini-2.5-flash:streamGenerateContent?alt=sse",
"headers": {
"content-type": "application/json"
},
"body": "{\"contents\":[{\"role\":\"user\",\"parts\":[{\"text\":\"Call get_weather with city exactly Paris.\"}]}],\"systemInstruction\":{\"parts\":[{\"text\":\"Call tools exactly as requested.\"}]},\"tools\":[{\"functionDeclarations\":[{\"name\":\"get_weather\",\"description\":\"Get current weather for a city.\",\"parameters\":{\"required\":[\"city\"],\"type\":\"object\",\"properties\":{\"city\":{\"type\":\"string\"}}}}]}],\"toolConfig\":{\"functionCallingConfig\":{\"mode\":\"ANY\",\"allowedFunctionNames\":[\"get_weather\"]}},\"generationConfig\":{\"maxOutputTokens\":80,\"temperature\":0}}"
},
"response": {
"status": 200,
"headers": {
"content-type": "text/event-stream"
},
"body": "data: {\"candidates\": [{\"content\": {\"parts\": [{\"functionCall\": {\"name\": \"get_weather\",\"args\": {\"city\": \"Paris\"}},\"thoughtSignature\": \"CiQBDDnWx5RcSsS1UMbykQ5HWlrMu6wrxXGUhmZ0uRKLaMhDZaEKXwEMOdbHVoJAlfbOQyKB378pDZ/gkjWr3HP+dWw1us1kMG22g4G3oJvuTq/SrWS+7KYtSlvOxCKhW2l/2/TczpyGyGmANmsusDcxF1SKOYA5/8Hg0nI24MAlT3+91V/MCoUBAQw51seClFLy3E71v2H44F1kpmjgz8FeTRZofrjbaazfrT+w8Yxgdr3UgGagLMY4OadZemQTWckq9IAqRum78hrBg6NGtQvn15SbtfTNqI4PcxX/+qPo4/g4/ZT5kVORDhVqO8BVP/RA5GQ3ce3sRK8hSkvQlXSoXIPpHh6x7hBezIGXzw==\"}],\"role\": \"model\"},\"finishReason\": \"STOP\",\"index\": 0,\"finishMessage\": \"Model generated function call(s).\"}],\"usageMetadata\": {\"promptTokenCount\": 55,\"candidatesTokenCount\": 15,\"totalTokenCount\": 115,\"promptTokensDetails\": [{\"modality\": \"TEXT\",\"tokenCount\": 55}],\"thoughtsTokenCount\": 45},\"modelVersion\": \"gemini-2.5-flash\",\"responseId\": \"NyTxaYuTJ_OW_uMPgIPKgAg\"}\r\n\r\n"
}
}
]
}

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

View File

@@ -0,0 +1,31 @@
{
"version": 1,
"metadata": {
"name": "openai-chat/streams-text",
"recordedAt": "2026-04-28T21:18:36.916Z",
"tags": [
"prefix:openai-chat",
"provider:openai",
"protocol:openai-chat"
]
},
"interactions": [
{
"request": {
"method": "POST",
"url": "https://api.openai.com/v1/chat/completions",
"headers": {
"content-type": "application/json"
},
"body": "{\"model\":\"gpt-4o-mini\",\"messages\":[{\"role\":\"system\",\"content\":\"You are concise.\"},{\"role\":\"user\",\"content\":\"Say hello in one short sentence.\"}],\"stream\":true,\"stream_options\":{\"include_usage\":true},\"max_tokens\":20,\"temperature\":0}"
},
"response": {
"status": 200,
"headers": {
"content-type": "text/event-stream; charset=utf-8"
},
"body": "data: {\"id\":\"chatcmpl-DZk7AS6GyEHvGu6oglm0lRAVPLKVl\",\"object\":\"chat.completion.chunk\",\"created\":1777411116,\"model\":\"gpt-4o-mini-2024-07-18\",\"service_tier\":\"default\",\"system_fingerprint\":\"fp_c42fec8f39\",\"choices\":[{\"index\":0,\"delta\":{\"role\":\"assistant\",\"content\":\"\",\"refusal\":null},\"logprobs\":null,\"finish_reason\":null}],\"usage\":null,\"obfuscation\":\"e2lwm6DLm\"}\n\ndata: {\"id\":\"chatcmpl-DZk7AS6GyEHvGu6oglm0lRAVPLKVl\",\"object\":\"chat.completion.chunk\",\"created\":1777411116,\"model\":\"gpt-4o-mini-2024-07-18\",\"service_tier\":\"default\",\"system_fingerprint\":\"fp_c42fec8f39\",\"choices\":[{\"index\":0,\"delta\":{\"content\":\"Hello\"},\"logprobs\":null,\"finish_reason\":null}],\"usage\":null,\"obfuscation\":\"LMrPYw\"}\n\ndata: {\"id\":\"chatcmpl-DZk7AS6GyEHvGu6oglm0lRAVPLKVl\",\"object\":\"chat.completion.chunk\",\"created\":1777411116,\"model\":\"gpt-4o-mini-2024-07-18\",\"service_tier\":\"default\",\"system_fingerprint\":\"fp_c42fec8f39\",\"choices\":[{\"index\":0,\"delta\":{\"content\":\"!\"},\"logprobs\":null,\"finish_reason\":null}],\"usage\":null,\"obfuscation\":\"bJfqjLPNB4\"}\n\ndata: {\"id\":\"chatcmpl-DZk7AS6GyEHvGu6oglm0lRAVPLKVl\",\"object\":\"chat.completion.chunk\",\"created\":1777411116,\"model\":\"gpt-4o-mini-2024-07-18\",\"service_tier\":\"default\",\"system_fingerprint\":\"fp_c42fec8f39\",\"choices\":[{\"index\":0,\"delta\":{},\"logprobs\":null,\"finish_reason\":\"stop\"}],\"usage\":null,\"obfuscation\":\"P3gO2\"}\n\ndata: {\"id\":\"chatcmpl-DZk7AS6GyEHvGu6oglm0lRAVPLKVl\",\"object\":\"chat.completion.chunk\",\"created\":1777411116,\"model\":\"gpt-4o-mini-2024-07-18\",\"service_tier\":\"default\",\"system_fingerprint\":\"fp_c42fec8f39\",\"choices\":[],\"usage\":{\"prompt_tokens\":22,\"completion_tokens\":2,\"total_tokens\":24,\"prompt_tokens_details\":{\"cached_tokens\":0,\"audio_tokens\":0},\"completion_tokens_details\":{\"reasoning_tokens\":0,\"audio_tokens\":0,\"accepted_prediction_tokens\":0,\"rejected_prediction_tokens\":0}},\"obfuscation\":\"lVqas0bcjNx\"}\n\ndata: [DONE]\n\n"
}
}
]
}

View File

@@ -0,0 +1,32 @@
{
"version": 1,
"metadata": {
"name": "openai-chat/streams-tool-call",
"recordedAt": "2026-04-28T21:18:38.053Z",
"tags": [
"prefix:openai-chat",
"provider:openai",
"protocol:openai-chat",
"tool"
]
},
"interactions": [
{
"request": {
"method": "POST",
"url": "https://api.openai.com/v1/chat/completions",
"headers": {
"content-type": "application/json"
},
"body": "{\"model\":\"gpt-4o-mini\",\"messages\":[{\"role\":\"system\",\"content\":\"Call tools exactly as requested.\"},{\"role\":\"user\",\"content\":\"Call get_weather with city exactly Paris.\"}],\"tools\":[{\"type\":\"function\",\"function\":{\"name\":\"get_weather\",\"description\":\"Get current weather for a city.\",\"parameters\":{\"type\":\"object\",\"properties\":{\"city\":{\"type\":\"string\"}},\"required\":[\"city\"],\"additionalProperties\":false}}}],\"tool_choice\":{\"type\":\"function\",\"function\":{\"name\":\"get_weather\"}},\"stream\":true,\"max_tokens\":80,\"temperature\":0}"
},
"response": {
"status": 200,
"headers": {
"content-type": "text/event-stream; charset=utf-8"
},
"body": "data: {\"id\":\"chatcmpl-DZk7BOHcY0wpwDDyT46mnFuldPW7H\",\"object\":\"chat.completion.chunk\",\"created\":1777411117,\"model\":\"gpt-4o-mini-2024-07-18\",\"service_tier\":\"default\",\"system_fingerprint\":\"fp_b86b5e7355\",\"choices\":[{\"index\":0,\"delta\":{\"role\":\"assistant\",\"content\":null,\"tool_calls\":[{\"index\":0,\"id\":\"call_63S0l2F1i8sv9LmBLJ2eNAYS\",\"type\":\"function\",\"function\":{\"name\":\"get_weather\",\"arguments\":\"\"}}],\"refusal\":null},\"logprobs\":null,\"finish_reason\":null}],\"obfuscation\":\"0\"}\n\ndata: {\"id\":\"chatcmpl-DZk7BOHcY0wpwDDyT46mnFuldPW7H\",\"object\":\"chat.completion.chunk\",\"created\":1777411117,\"model\":\"gpt-4o-mini-2024-07-18\",\"service_tier\":\"default\",\"system_fingerprint\":\"fp_b86b5e7355\",\"choices\":[{\"index\":0,\"delta\":{\"tool_calls\":[{\"index\":0,\"function\":{\"arguments\":\"{\\\"\"}}]},\"logprobs\":null,\"finish_reason\":null}],\"obfuscation\":\"2MSm0yVFD22\"}\n\ndata: {\"id\":\"chatcmpl-DZk7BOHcY0wpwDDyT46mnFuldPW7H\",\"object\":\"chat.completion.chunk\",\"created\":1777411117,\"model\":\"gpt-4o-mini-2024-07-18\",\"service_tier\":\"default\",\"system_fingerprint\":\"fp_b86b5e7355\",\"choices\":[{\"index\":0,\"delta\":{\"tool_calls\":[{\"index\":0,\"function\":{\"arguments\":\"city\"}}]},\"logprobs\":null,\"finish_reason\":null}],\"obfuscation\":\"47VRigngpL\"}\n\ndata: {\"id\":\"chatcmpl-DZk7BOHcY0wpwDDyT46mnFuldPW7H\",\"object\":\"chat.completion.chunk\",\"created\":1777411117,\"model\":\"gpt-4o-mini-2024-07-18\",\"service_tier\":\"default\",\"system_fingerprint\":\"fp_b86b5e7355\",\"choices\":[{\"index\":0,\"delta\":{\"tool_calls\":[{\"index\":0,\"function\":{\"arguments\":\"\\\":\\\"\"}}]},\"logprobs\":null,\"finish_reason\":null}],\"obfuscation\":\"ZDLNnsyrQ\"}\n\ndata: {\"id\":\"chatcmpl-DZk7BOHcY0wpwDDyT46mnFuldPW7H\",\"object\":\"chat.completion.chunk\",\"created\":1777411117,\"model\":\"gpt-4o-mini-2024-07-18\",\"service_tier\":\"default\",\"system_fingerprint\":\"fp_b86b5e7355\",\"choices\":[{\"index\":0,\"delta\":{\"tool_calls\":[{\"index\":0,\"function\":{\"arguments\":\"Paris\"}}]},\"logprobs\":null,\"finish_reason\":null}],\"obfuscation\":\"EnjgG1OLD\"}\n\ndata: {\"id\":\"chatcmpl-DZk7BOHcY0wpwDDyT46mnFuldPW7H\",\"object\":\"chat.completion.chunk\",\"created\":1777411117,\"model\":\"gpt-4o-mini-2024-07-18\",\"service_tier\":\"default\",\"system_fingerprint\":\"fp_b86b5e7355\",\"choices\":[{\"index\":0,\"delta\":{\"tool_calls\":[{\"index\":0,\"function\":{\"arguments\":\"\\\"}\"}}]},\"logprobs\":null,\"finish_reason\":null}],\"obfuscation\":\"fnJiTWAyEwL\"}\n\ndata: {\"id\":\"chatcmpl-DZk7BOHcY0wpwDDyT46mnFuldPW7H\",\"object\":\"chat.completion.chunk\",\"created\":1777411117,\"model\":\"gpt-4o-mini-2024-07-18\",\"service_tier\":\"default\",\"system_fingerprint\":\"fp_b86b5e7355\",\"choices\":[{\"index\":0,\"delta\":{},\"logprobs\":null,\"finish_reason\":\"stop\"}],\"obfuscation\":\"V8\"}\n\ndata: [DONE]\n\n"
}
}
]
}

View File

@@ -0,0 +1,31 @@
{
"version": 1,
"metadata": {
"name": "openai-compatible-chat/deepseek-streams-text",
"recordedAt": "2026-04-28T21:18:49.498Z",
"tags": [
"prefix:openai-compatible-chat",
"protocol:openai-compatible-chat",
"provider:deepseek"
]
},
"interactions": [
{
"request": {
"method": "POST",
"url": "https://api.deepseek.com/v1/chat/completions",
"headers": {
"content-type": "application/json"
},
"body": "{\"model\":\"deepseek-chat\",\"messages\":[{\"role\":\"system\",\"content\":\"You are concise.\"},{\"role\":\"user\",\"content\":\"Reply with exactly: Hello!\"}],\"stream\":true,\"max_tokens\":20,\"temperature\":0}"
},
"response": {
"status": 200,
"headers": {
"content-type": "text/event-stream; charset=utf-8"
},
"body": "data: {\"id\":\"0c811926-1e0c-4160-baf8-6e71247c8ad7\",\"object\":\"chat.completion.chunk\",\"created\":1777411128,\"model\":\"deepseek-v4-flash\",\"system_fingerprint\":\"fp_058df29938_prod0820_fp8_kvcache_20260402\",\"choices\":[{\"index\":0,\"delta\":{\"role\":\"assistant\",\"content\":\"\"},\"logprobs\":null,\"finish_reason\":null}]}\n\ndata: {\"id\":\"0c811926-1e0c-4160-baf8-6e71247c8ad7\",\"object\":\"chat.completion.chunk\",\"created\":1777411128,\"model\":\"deepseek-v4-flash\",\"system_fingerprint\":\"fp_058df29938_prod0820_fp8_kvcache_20260402\",\"choices\":[{\"index\":0,\"delta\":{\"content\":\"Hello\"},\"logprobs\":null,\"finish_reason\":null}]}\n\ndata: {\"id\":\"0c811926-1e0c-4160-baf8-6e71247c8ad7\",\"object\":\"chat.completion.chunk\",\"created\":1777411128,\"model\":\"deepseek-v4-flash\",\"system_fingerprint\":\"fp_058df29938_prod0820_fp8_kvcache_20260402\",\"choices\":[{\"index\":0,\"delta\":{\"content\":\"!\"},\"logprobs\":null,\"finish_reason\":null}]}\n\ndata: {\"id\":\"0c811926-1e0c-4160-baf8-6e71247c8ad7\",\"object\":\"chat.completion.chunk\",\"created\":1777411128,\"model\":\"deepseek-v4-flash\",\"system_fingerprint\":\"fp_058df29938_prod0820_fp8_kvcache_20260402\",\"choices\":[{\"index\":0,\"delta\":{\"content\":\"\"},\"logprobs\":null,\"finish_reason\":\"stop\"}],\"usage\":{\"prompt_tokens\":14,\"completion_tokens\":2,\"total_tokens\":16,\"prompt_tokens_details\":{\"cached_tokens\":0},\"prompt_cache_hit_tokens\":0,\"prompt_cache_miss_tokens\":14}}\n\ndata: [DONE]\n\n"
}
}
]
}

View File

@@ -0,0 +1,31 @@
{
"version": 1,
"metadata": {
"name": "openai-compatible-chat/togetherai-streams-text",
"recordedAt": "2026-04-28T21:18:55.266Z",
"tags": [
"prefix:openai-compatible-chat",
"protocol:openai-compatible-chat",
"provider:togetherai"
]
},
"interactions": [
{
"request": {
"method": "POST",
"url": "https://api.together.xyz/v1/chat/completions",
"headers": {
"content-type": "application/json"
},
"body": "{\"model\":\"meta-llama/Llama-3.3-70B-Instruct-Turbo\",\"messages\":[{\"role\":\"system\",\"content\":\"You are concise.\"},{\"role\":\"user\",\"content\":\"Reply with exactly: Hello!\"}],\"stream\":true,\"max_tokens\":20,\"temperature\":0}"
},
"response": {
"status": 200,
"headers": {
"content-type": "text/event-stream;charset=utf-8"
},
"body": "data: {\"id\":\"ogzjdpL-6Ng1vN-9f391a08f8af75e1\",\"object\":\"chat.completion.chunk\",\"created\":1777411129,\"choices\":[{\"index\":0,\"text\":\"Hello\",\"logprobs\":null,\"finish_reason\":null,\"seed\":null,\"delta\":{\"token_id\":9906,\"role\":\"assistant\",\"content\":\"Hello\"}}],\"model\":\"meta-llama/Llama-3.3-70B-Instruct-Turbo\",\"usage\":null}\n\ndata: {\"id\":\"ogzjdpL-6Ng1vN-9f391a08f8af75e1\",\"object\":\"chat.completion.chunk\",\"created\":1777411129,\"choices\":[{\"index\":0,\"text\":\"!\",\"logprobs\":null,\"finish_reason\":null,\"seed\":null,\"delta\":{\"token_id\":null,\"role\":\"assistant\",\"content\":\"!\"}}],\"model\":\"meta-llama/Llama-3.3-70B-Instruct-Turbo\",\"usage\":null}\n\ndata: {\"id\":\"ogzjdpL-6Ng1vN-9f391a08f8af75e1\",\"object\":\"chat.completion.chunk\",\"created\":1777411129,\"choices\":[{\"index\":0,\"text\":\"\",\"logprobs\":null,\"finish_reason\":\"stop\",\"seed\":15924764223251450000,\"delta\":{\"token_id\":128009,\"role\":\"assistant\",\"content\":\"\"}}],\"model\":\"meta-llama/Llama-3.3-70B-Instruct-Turbo\",\"usage\":{\"prompt_tokens\":45,\"completion_tokens\":3,\"total_tokens\":48,\"cached_tokens\":0}}\n\ndata: [DONE]\n\n"
}
}
]
}

View File

@@ -0,0 +1,32 @@
{
"version": 1,
"metadata": {
"name": "openai-compatible-chat/togetherai-streams-tool-call",
"recordedAt": "2026-04-28T21:18:59.123Z",
"tags": [
"prefix:openai-compatible-chat",
"protocol:openai-compatible-chat",
"provider:togetherai",
"tool"
]
},
"interactions": [
{
"request": {
"method": "POST",
"url": "https://api.together.xyz/v1/chat/completions",
"headers": {
"content-type": "application/json"
},
"body": "{\"model\":\"meta-llama/Llama-3.3-70B-Instruct-Turbo\",\"messages\":[{\"role\":\"system\",\"content\":\"Call tools exactly as requested.\"},{\"role\":\"user\",\"content\":\"Call get_weather with city exactly Paris.\"}],\"tools\":[{\"type\":\"function\",\"function\":{\"name\":\"get_weather\",\"description\":\"Get current weather for a city.\",\"parameters\":{\"type\":\"object\",\"properties\":{\"city\":{\"type\":\"string\"}},\"required\":[\"city\"],\"additionalProperties\":false}}}],\"tool_choice\":{\"type\":\"function\",\"function\":{\"name\":\"get_weather\"}},\"stream\":true,\"max_tokens\":80,\"temperature\":0}"
},
"response": {
"status": 200,
"headers": {
"content-type": "text/event-stream;charset=utf-8"
},
"body": "data: {\"id\":\"ogzjfRD-6Ng1vN-9f391a2bb8ca75e1\",\"object\":\"chat.completion.chunk\",\"created\":1777411135,\"choices\":[{\"index\":0,\"role\":\"assistant\",\"text\":\"\",\"logprobs\":null,\"finish_reason\":null,\"delta\":{\"token_id\":null,\"role\":\"assistant\",\"content\":\"\"}}],\"model\":\"meta-llama/Llama-3.3-70B-Instruct-Turbo\"}\n\ndata: {\"id\":\"ogzjfRD-6Ng1vN-9f391a2bb8ca75e1\",\"object\":\"chat.completion.chunk\",\"created\":1777411135,\"choices\":[{\"index\":0,\"text\":\"\",\"logprobs\":null,\"finish_reason\":null,\"delta\":{\"token_id\":null,\"role\":\"assistant\",\"content\":\"\",\"tool_calls\":[{\"index\":0,\"id\":\"call_yu1mxtmex7x48nximi9c8jpo\",\"type\":\"function\",\"function\":{\"name\":\"get_weather\",\"arguments\":\"\"}}]}}],\"model\":\"meta-llama/Llama-3.3-70B-Instruct-Turbo\"}\n\ndata: {\"id\":\"ogzjfRD-6Ng1vN-9f391a2bb8ca75e1\",\"object\":\"chat.completion.chunk\",\"created\":1777411135,\"choices\":[{\"index\":0,\"text\":\"\",\"logprobs\":null,\"finish_reason\":\"tool_calls\",\"delta\":{\"token_id\":null,\"role\":\"assistant\",\"content\":\"\",\"tool_calls\":[{\"index\":0,\"function\":{\"arguments\":\"{\\\"city\\\":\\\"Paris\\\"}\"}}]}}],\"model\":\"meta-llama/Llama-3.3-70B-Instruct-Turbo\"}\n\ndata: {\"id\":\"ogzjfRD-6Ng1vN-9f391a2bb8ca75e1\",\"object\":\"chat.completion.chunk\",\"created\":1777411135,\"choices\":[{\"index\":0,\"text\":\"\",\"logprobs\":null,\"finish_reason\":\"tool_calls\",\"seed\":9033012299842426000,\"delta\":{\"token_id\":128009,\"role\":\"assistant\",\"content\":\"\"}}],\"model\":\"meta-llama/Llama-3.3-70B-Instruct-Turbo\",\"usage\":{\"prompt_tokens\":194,\"completion_tokens\":19,\"total_tokens\":213,\"cached_tokens\":0}}\n\ndata: [DONE]\n\n"
}
}
]
}

View File

@@ -0,0 +1,50 @@
import { test, type TestOptions } from "bun:test"
import { Cause, Effect, Exit, Layer } from "effect"
import type * as Scope from "effect/Scope"
import * as TestClock from "effect/testing/TestClock"
import * as TestConsole from "effect/testing/TestConsole"
type Body<A, E, R> = Effect.Effect<A, E, R> | (() => Effect.Effect<A, E, R>)
const body = <A, E, R>(value: Body<A, E, R>) => Effect.suspend(() => (typeof value === "function" ? value() : value))
const run = <A, E, R, E2>(value: Body<A, E, R | Scope.Scope>, layer: Layer.Layer<R, E2>) =>
Effect.gen(function* () {
const exit = yield* body(value).pipe(Effect.scoped, Effect.provide(layer), Effect.exit)
if (Exit.isFailure(exit)) {
for (const err of Cause.prettyErrors(exit.cause)) {
yield* Effect.logError(err)
}
}
return yield* exit
}).pipe(Effect.runPromise)
const make = <R, E>(testLayer: Layer.Layer<R, E>, liveLayer: Layer.Layer<R, E>) => {
const effect = <A, E2>(name: string, value: Body<A, E2, R | Scope.Scope>, opts?: number | TestOptions) =>
test(name, () => run(value, testLayer), opts)
effect.only = <A, E2>(name: string, value: Body<A, E2, R | Scope.Scope>, opts?: number | TestOptions) =>
test.only(name, () => run(value, testLayer), opts)
effect.skip = <A, E2>(name: string, value: Body<A, E2, R | Scope.Scope>, opts?: number | TestOptions) =>
test.skip(name, () => run(value, testLayer), opts)
const live = <A, E2>(name: string, value: Body<A, E2, R | Scope.Scope>, opts?: number | TestOptions) =>
test(name, () => run(value, liveLayer), opts)
live.only = <A, E2>(name: string, value: Body<A, E2, R | Scope.Scope>, opts?: number | TestOptions) =>
test.only(name, () => run(value, liveLayer), opts)
live.skip = <A, E2>(name: string, value: Body<A, E2, R | Scope.Scope>, opts?: number | TestOptions) =>
test.skip(name, () => run(value, liveLayer), opts)
return { effect, live }
}
const testEnv = Layer.mergeAll(TestConsole.layer, TestClock.layer())
const liveEnv = TestConsole.layer
export const it = make(testEnv, liveEnv)
export const testEffect = <R, E>(layer: Layer.Layer<R, E>) =>
make(Layer.provideMerge(layer, testEnv), Layer.provideMerge(layer, liveEnv))

View File

@@ -0,0 +1,86 @@
import { Effect, Layer, Ref } from "effect"
import { HttpClient, HttpClientRequest, HttpClientResponse } from "effect/unstable/http"
import { RequestExecutor } from "../../src/executor"
export type HandlerInput = {
readonly request: HttpClientRequest.HttpClientRequest
readonly text: string
readonly respond: (body: ConstructorParameters<typeof Response>[0], init?: ResponseInit) => HttpClientResponse.HttpClientResponse
}
export type Handler = (input: HandlerInput) => Effect.Effect<HttpClientResponse.HttpClientResponse>
const handlerLayer = (handler: Handler): Layer.Layer<HttpClient.HttpClient> =>
Layer.succeed(
HttpClient.HttpClient,
HttpClient.make((request) =>
Effect.gen(function* () {
const web = yield* HttpClientRequest.toWeb(request).pipe(Effect.orDie)
const text = yield* Effect.promise(() => web.text())
return yield* handler({
request,
text,
respond: (body, init) => HttpClientResponse.fromWeb(request, new Response(body, init)),
})
}),
),
)
const executorWith = (layer: Layer.Layer<HttpClient.HttpClient>) =>
RequestExecutor.layer.pipe(Layer.provide(layer))
const SSE_HEADERS = { "content-type": "text/event-stream" } as const
/**
* Layer that returns a single fixed response body. Use for stream-parser
* fixture tests where the request shape is irrelevant. The body type widens
* to whatever `Response` accepts so binary fixtures (`Uint8Array`,
* `ReadableStream`, etc.) flow through without casts.
*/
export const fixedResponse = (
body: ConstructorParameters<typeof Response>[0],
init: ResponseInit = { headers: SSE_HEADERS },
) => executorWith(handlerLayer((input) => Effect.succeed(input.respond(body, init))))
/**
* Layer that builds a response per request. Useful for echo servers.
*/
export const dynamicResponse = (handler: Handler) => executorWith(handlerLayer(handler))
/**
* Layer that emits the supplied SSE chunks and then aborts mid-stream. Used to
* exercise transport errors that surface during parsing.
*/
export const truncatedStream = (chunks: ReadonlyArray<string>) =>
dynamicResponse((input) =>
Effect.sync(() => {
const encoder = new TextEncoder()
const stream = new ReadableStream({
start(controller) {
for (const chunk of chunks) controller.enqueue(encoder.encode(chunk))
controller.error(new Error("connection reset"))
},
})
return input.respond(stream, { headers: SSE_HEADERS })
}),
)
/**
* Layer that returns successive bodies on each request. Useful for scripting
* multi-step model exchanges (e.g. tool-call loops). The last body in the
* array is reused if the test makes more requests than scripted.
*/
export const scriptedResponses = (bodies: ReadonlyArray<string>, init: ResponseInit = { headers: SSE_HEADERS }) => {
if (bodies.length === 0) throw new Error("scriptedResponses requires at least one body")
return Layer.unwrap(
Effect.gen(function* () {
const cursor = yield* Ref.make(0)
return dynamicResponse((input) =>
Effect.gen(function* () {
const index = yield* Ref.getAndUpdate(cursor, (n) => n + 1)
return input.respond(bodies[index] ?? bodies[bodies.length - 1], init)
}),
)
}),
)
}

View File

@@ -0,0 +1,27 @@
/**
* Shared chunk shapes for OpenAI Chat / OpenAI-compatible Chat fixture tests.
* Multiple test files build the same `{ id, choices: [{ delta, finish_reason }], usage }`
* envelope; consolidating here keeps tool-call event shapes consistent.
*/
const FIXTURE_ID = "chatcmpl_fixture"
export const deltaChunk = (delta: object, finishReason: string | null = null) => ({
id: FIXTURE_ID,
choices: [{ delta, finish_reason: finishReason }],
usage: null,
})
export const usageChunk = (usage: object) => ({
id: FIXTURE_ID,
choices: [],
usage,
})
export const finishChunk = (reason: string) => deltaChunk({}, reason)
export const toolCallChunk = (id: string, name: string, args: string, index = 0) =>
deltaChunk({
role: "assistant",
tool_calls: [{ index, id, function: { name, arguments: args } }],
})

View File

@@ -0,0 +1,20 @@
/**
* Helpers for building deterministic SSE bodies in tests.
*
* Inline template-literal SSE strings are hard to write and review when chunks
* contain JSON; this helper accepts plain values and serializes them, so test
* authors only think about the chunk shapes, not the wire format.
*/
export const sseEvents = (
...chunks: ReadonlyArray<unknown>
): string => `${chunks.map(formatChunk).join("")}data: [DONE]\n\n`
const formatChunk = (chunk: unknown) =>
`data: ${typeof chunk === "string" ? chunk : JSON.stringify(chunk)}\n\n`
/**
* Build an SSE body from already-serialized strings (used when the chunk shape
* itself is part of what's being tested, e.g. malformed chunks).
*/
export const sseRaw = (...lines: ReadonlyArray<string>): string =>
lines.map((line) => `${line}\n\n`).join("")

View File

@@ -0,0 +1,74 @@
import { describe, expect, test } from "bun:test"
import { LLM } from "../src"
import { LLMRequest, Message, ModelRef, ToolChoice, ToolDefinition } from "../src/schema"
describe("llm constructors", () => {
test("builds canonical schema classes from ergonomic input", () => {
const request = LLM.request({
id: "req_1",
model: LLM.model({ id: "fake-model", provider: "fake", protocol: "openai-chat" }),
system: "You are concise.",
prompt: "Say hello.",
})
expect(request).toBeInstanceOf(LLMRequest)
expect(request.model).toBeInstanceOf(ModelRef)
expect(request.messages[0]).toBeInstanceOf(Message)
expect(request.system).toEqual([{ type: "text", text: "You are concise." }])
expect(request.messages[0]?.content).toEqual([{ type: "text", text: "Say hello." }])
expect(request.generation).toEqual({})
expect(request.tools).toEqual([])
})
test("updates requests without spreading schema class instances", () => {
const base = LLM.request({
id: "req_1",
model: LLM.model({ id: "fake-model", provider: "fake", protocol: "openai-chat" }),
prompt: "Say hello.",
})
const updated = LLM.updateRequest(base, {
generation: { maxTokens: 20 },
messages: [...base.messages, LLM.assistant("Hi.")],
})
expect(updated).toBeInstanceOf(LLMRequest)
expect(updated.id).toBe("req_1")
expect(updated.model).toEqual(base.model)
expect(updated.generation).toEqual({ maxTokens: 20 })
expect(updated.messages.map((message) => message.role)).toEqual(["user", "assistant"])
})
test("builds tool choices from names and tools", () => {
const tool = LLM.toolDefinition({ name: "lookup", description: "Lookup data", inputSchema: { type: "object" } })
expect(tool).toBeInstanceOf(ToolDefinition)
expect(LLM.toolChoice("lookup")).toEqual(new ToolChoice({ type: "tool", name: "lookup" }))
expect(LLM.toolChoiceName("required")).toEqual(new ToolChoice({ type: "tool", name: "required" }))
expect(LLM.toolChoice(tool)).toEqual(new ToolChoice({ type: "tool", name: "lookup" }))
})
test("builds tool choice modes from reserved strings", () => {
expect(LLM.toolChoice("auto")).toEqual(new ToolChoice({ type: "auto" }))
expect(LLM.toolChoice("none")).toEqual(new ToolChoice({ type: "none" }))
expect(LLM.toolChoice("required")).toEqual(new ToolChoice({ type: "required" }))
expect(LLM.request({
model: LLM.model({ id: "fake-model", provider: "fake", protocol: "openai-chat" }),
prompt: "Use tools if needed.",
toolChoice: "required",
}).toolChoice).toEqual(new ToolChoice({ type: "required" }))
})
test("builds assistant tool calls and tool result messages", () => {
const call = LLM.toolCall({ id: "call_1", name: "lookup", input: { query: "weather" } })
const result = LLM.toolResult({ id: "call_1", name: "lookup", result: { temperature: 72 } })
expect(LLM.assistant([call]).content).toEqual([call])
expect(LLM.toolMessage(result).content).toEqual([
{ type: "tool-result", id: "call_1", name: "lookup", result: { type: "json", value: { temperature: 72 } } },
])
})
test("extracts output text from responses", () => {
expect(LLM.outputText({ events: [{ type: "text-delta", text: "hi" }, { type: "request-finish", reason: "stop" }] })).toBe("hi")
})
})

View File

@@ -0,0 +1,223 @@
import { describe, expect, test } from "bun:test"
import { LLM, ProviderPatch } from "../src"
import { Model, Patch, context, plan } from "../src/patch"
const request = LLM.request({
id: "req_1",
model: LLM.model({
id: "devstral-small",
provider: "mistral",
protocol: "openai-chat",
}),
prompt: "hi",
})
describe("llm patch", () => {
test("constructors prefix ids and registry groups by phase", () => {
const prompt = Patch.prompt("mistral.test", {
reason: "test prompt",
when: Model.provider("mistral"),
apply: (request) => request,
})
const target = Patch.target("fake.test", {
reason: "test target",
apply: (draft: { value: number }) => draft,
})
const registry = Patch.registry([prompt, target])
expect(prompt.id).toBe("prompt.mistral.test")
expect(target.id).toBe("target.fake.test")
expect(registry.prompt).toEqual([prompt])
expect(registry.target.map((item) => item.id)).toEqual([target.id])
})
test("predicates compose", () => {
const ctx = context({ request })
expect(Model.provider("mistral").and(Model.protocol("openai-chat"))(ctx)).toBe(true)
expect(Model.provider("anthropic").or(Model.idIncludes("devstral"))(ctx)).toBe(true)
expect(Model.provider("mistral").not()(ctx)).toBe(false)
})
test("plan filters, sorts, applies, and traces deterministically", () => {
const patches = [
Patch.prompt("b", {
reason: "second alphabetically",
order: 1,
apply: (request) => ({ ...request, metadata: { ...request.metadata, b: true } }),
}),
Patch.prompt("a", {
reason: "first alphabetically",
order: 1,
apply: (request) => ({ ...request, metadata: { ...request.metadata, a: true } }),
}),
Patch.prompt("skip", {
reason: "not selected",
when: Model.provider("anthropic"),
apply: (request) => ({ ...request, metadata: { ...request.metadata, skip: true } }),
}),
]
const patchPlan = plan({ phase: "prompt", context: context({ request }), patches })
const output = patchPlan.apply(request)
expect(patchPlan.trace.map((item) => item.id)).toEqual(["prompt.a", "prompt.b"])
expect(output.metadata).toEqual({ a: true, b: true })
})
test("provider patch examples remove empty Anthropic content", () => {
const input = LLM.request({
id: "anthropic_empty",
model: LLM.model({ id: "claude-sonnet", provider: "anthropic", protocol: "anthropic-messages" }),
system: "",
messages: [
LLM.user([{ type: "text", text: "" }, { type: "text", text: "hello" }]),
LLM.assistant({ type: "reasoning", text: "" }),
],
})
const output = plan({
phase: "prompt",
context: context({ request: input }),
patches: [ProviderPatch.removeEmptyAnthropicContent],
}).apply(input)
expect(output.system).toEqual([])
expect(output.messages).toHaveLength(1)
expect(output.messages[0]?.content).toEqual([{ type: "text", text: "hello" }])
})
test("provider patch examples scrub model-specific tool call ids", () => {
const input = LLM.request({
id: "mistral_tool_ids",
model: LLM.model({ id: "devstral-small", provider: "mistral", protocol: "openai-chat" }),
messages: [
LLM.assistant([LLM.toolCall({ id: "call.bad/value-long", name: "lookup", input: {} })]),
LLM.toolMessage({ id: "call.bad/value-long", name: "lookup", result: "ok", resultType: "text" }),
],
})
const output = plan({
phase: "prompt",
context: context({ request: input }),
patches: [ProviderPatch.scrubMistralToolIds],
}).apply(input)
expect(output.messages[0]?.content[0]).toMatchObject({ type: "tool-call", id: "callbadva" })
expect(output.messages[1]?.content[0]).toMatchObject({ type: "tool-result", id: "callbadva" })
})
// Cache hint policy: mark first-2 system + last-2 messages with ephemeral
// cache hints, gated on `model.capabilities.cache.prompt`. Adapters
// (Anthropic, Bedrock) lower the hint to `cache_control` / `cachePoint`.
describe("cachePromptHints", () => {
const cacheCapableModel = (overrides: { provider: string; protocol: "anthropic-messages" | "bedrock-converse" }) =>
LLM.model({
id: "test-model",
provider: overrides.provider,
protocol: overrides.protocol,
capabilities: LLM.capabilities({ cache: { prompt: true, contentBlocks: true } }),
})
const runCachePatch = (input: ReturnType<typeof LLM.request>) =>
plan({
phase: "prompt",
context: context({ request: input }),
patches: [ProviderPatch.cachePromptHints],
}).apply(input)
test("marks first 2 system parts with an ephemeral cache hint", () => {
const input = LLM.request({
id: "cache_system",
model: cacheCapableModel({ provider: "anthropic", protocol: "anthropic-messages" }),
system: ["First", "Second", "Third"].map(LLM.system),
prompt: "hello",
})
const output = runCachePatch(input)
expect(output.system).toHaveLength(3)
expect(output.system[0]).toMatchObject({ text: "First", cache: { type: "ephemeral" } })
expect(output.system[1]).toMatchObject({ text: "Second", cache: { type: "ephemeral" } })
expect(output.system[2]).toMatchObject({ text: "Third" })
expect(output.system[2]?.cache).toBeUndefined()
})
test("marks the last text part of the last 2 messages on cache-capable models", () => {
const input = LLM.request({
id: "cache_messages",
model: cacheCapableModel({ provider: "anthropic", protocol: "anthropic-messages" }),
messages: [
LLM.user([{ type: "text", text: "m0" }]),
LLM.user([{ type: "text", text: "m1" }]),
LLM.user([{ type: "text", text: "m2" }]),
],
})
const output = runCachePatch(input)
expect(output.messages).toHaveLength(3)
// First message untouched.
const first = output.messages[0].content[0]
expect(first).toMatchObject({ type: "text", text: "m0" })
expect("cache" in first ? first.cache : undefined).toBeUndefined()
// Last 2 messages: cache on the (only) text part.
expect(output.messages[1].content[0]).toMatchObject({ type: "text", text: "m1", cache: { type: "ephemeral" } })
expect(output.messages[2].content[0]).toMatchObject({ type: "text", text: "m2", cache: { type: "ephemeral" } })
})
test("targets the last text part when a message has trailing non-text content", () => {
const input = LLM.request({
id: "cache_trailing_tool",
model: cacheCapableModel({ provider: "anthropic", protocol: "anthropic-messages" }),
messages: [
LLM.assistant([
{ type: "text", text: "calling tool" },
LLM.toolCall({ id: "call_1", name: "lookup", input: { q: "weather" } }),
]),
],
})
const output = runCachePatch(input)
const content = output.messages[0].content
expect(content[0]).toMatchObject({ type: "text", text: "calling tool", cache: { type: "ephemeral" } })
expect(content[1]).toMatchObject({ type: "tool-call", id: "call_1" })
})
test("returns the message unchanged when it has no text part", () => {
const input = LLM.request({
id: "cache_no_text",
model: cacheCapableModel({ provider: "anthropic", protocol: "anthropic-messages" }),
messages: [
LLM.toolMessage({ id: "call_1", name: "lookup", result: { ok: true } }),
],
})
const output = runCachePatch(input)
expect(output.messages[0].content[0]).toMatchObject({ type: "tool-result", id: "call_1" })
// No text part to mark, so the content array is identity-equal — the
// `findLastIndex === -1` short-circuit avoids reallocating.
expect(output.messages[0].content).toBe(input.messages[0].content)
})
test("is a no-op when the model does not advertise prompt caching", () => {
const input = LLM.request({
id: "cache_no_capability",
model: LLM.model({
id: "gpt-5",
provider: "openai",
protocol: "openai-responses",
// capabilities.cache.prompt defaults to false
}),
system: ["A", "B"].map(LLM.system),
messages: [LLM.user([{ type: "text", text: "hi" }])],
})
const output = runCachePatch(input)
// Every text part should be free of cache hints.
for (const part of output.system) expect(part.cache).toBeUndefined()
for (const message of output.messages) {
for (const part of message.content) {
if (part.type === "text") expect(part.cache).toBeUndefined()
}
}
})
})
})

View File

@@ -0,0 +1,51 @@
import { describe, expect, test } from "bun:test"
import { Azure, GitHubCopilot, OpenAI, OpenAICompatibleFamily, ProviderResolver } from "../src"
describe("provider resolver", () => {
test("fixed providers resolve protocol and auth defaults", () => {
expect(OpenAI.resolver.resolve(ProviderResolver.input("gpt-5", "openai", {}))).toMatchObject({
provider: "openai",
protocol: "openai-responses",
auth: "key",
})
})
test("dynamic providers can select protocols from model metadata", () => {
expect(GitHubCopilot.resolver.resolve(ProviderResolver.input("gpt-5", "github-copilot", {}))).toMatchObject({
provider: "github-copilot",
protocol: "openai-responses",
auth: "key",
})
expect(GitHubCopilot.resolver.resolve(ProviderResolver.input("gpt-5-mini", "github-copilot", {}))).toMatchObject({
provider: "github-copilot",
protocol: "openai-chat",
auth: "key",
})
})
test("OpenAI-compatible families carry provider-specific defaults", () => {
expect(OpenAICompatibleFamily.resolver.resolve(ProviderResolver.input("llama", "togetherai", {}))).toMatchObject({
provider: "togetherai",
protocol: "openai-compatible-chat",
baseURL: "https://api.together.xyz/v1",
auth: "key",
})
})
test("Azure resolves resource URLs and API-version query params", () => {
expect(
Azure.resolver.resolve(
ProviderResolver.input("gpt-5", "azure", { resourceName: "opencode-test", apiVersion: "2025-04-01-preview" }),
),
).toMatchObject({
provider: "azure",
protocol: "openai-responses",
baseURL: "https://opencode-test.openai.azure.com/openai/v1",
queryParams: { "api-version": "2025-04-01-preview" },
})
expect(Azure.resolver.resolve(ProviderResolver.input("gpt-4.1", "azure", { useCompletionUrls: true }))).toMatchObject({
protocol: "openai-chat",
queryParams: { "api-version": "v1" },
})
})
})

View File

@@ -0,0 +1,46 @@
import { describe, expect } from "bun:test"
import { Effect } from "effect"
import { LLM } from "../../src"
import { LLMClient } from "../../src/adapter"
import { AnthropicMessages } from "../../src/provider/anthropic-messages"
import { expectFinish, expectWeatherToolCall, textRequest, weatherToolRequest } from "../recorded-scenarios"
import { recordedTests } from "../recorded-test"
const model = AnthropicMessages.model({
id: "claude-haiku-4-5-20251001",
apiKey: process.env.ANTHROPIC_API_KEY ?? "fixture",
})
const request = textRequest({ id: "recorded_anthropic_messages_text", model })
const toolRequest = weatherToolRequest({ id: "recorded_anthropic_messages_tool_call", model })
const recorded = recordedTests({
prefix: "anthropic-messages",
provider: "anthropic",
protocol: "anthropic-messages",
requires: ["ANTHROPIC_API_KEY"],
options: { requestHeaders: ["content-type", "anthropic-version"] },
})
const anthropic = LLMClient.make({ adapters: [AnthropicMessages.adapter] })
describe("Anthropic Messages recorded", () => {
recorded.effect("streams text", () =>
Effect.gen(function* () {
const response = yield* anthropic.generate(request)
expect(LLM.outputText(response)).toBe("Hello!")
expect(response.usage?.totalTokens).toBeGreaterThan(0)
expectFinish(response.events, "stop")
}),
)
recorded.effect.with("streams tool call", { tags: ["tool"] }, () =>
Effect.gen(function* () {
const response = yield* anthropic.generate(toolRequest)
expect(response.events.some((event) => event.type === "tool-input-delta")).toBe(true)
expectWeatherToolCall(response)
expectFinish(response.events, "tool-calls")
}),
)
})

View File

@@ -0,0 +1,375 @@
import { describe, expect } from "bun:test"
import { Effect, Layer } from "effect"
import { CacheHint, LLM, ProviderRequestError } from "../../src"
import { LLMClient } from "../../src/adapter"
import { AnthropicMessages } from "../../src/provider/anthropic-messages"
import { testEffect } from "../lib/effect"
import { fixedResponse } from "../lib/http"
import { sseEvents } from "../lib/sse"
const model = AnthropicMessages.model({
id: "claude-sonnet-4-5",
baseURL: "https://api.anthropic.test/v1/",
headers: { "x-api-key": "test" },
})
const request = LLM.request({
id: "req_1",
model,
system: { type: "text", text: "You are concise.", cache: new CacheHint({ type: "ephemeral" }) },
prompt: "Say hello.",
generation: { maxTokens: 20, temperature: 0 },
})
const it = testEffect(Layer.empty)
describe("Anthropic Messages adapter", () => {
it.effect("prepares Anthropic Messages target", () =>
Effect.gen(function* () {
const prepared = yield* LLMClient.make({ adapters: [AnthropicMessages.adapter] }).prepare(request)
expect(prepared.target).toEqual({
model: "claude-sonnet-4-5",
system: [{ type: "text", text: "You are concise.", cache_control: { type: "ephemeral" } }],
messages: [{ role: "user", content: [{ type: "text", text: "Say hello." }] }],
stream: true,
max_tokens: 20,
temperature: 0,
})
}),
)
it.effect("prepares tool call and tool result messages", () =>
Effect.gen(function* () {
const prepared = yield* LLMClient.make({ adapters: [AnthropicMessages.adapter] }).prepare(
LLM.request({
id: "req_tool_result",
model,
messages: [
LLM.user("What is the weather?"),
LLM.assistant([LLM.toolCall({ id: "call_1", name: "lookup", input: { query: "weather" } })]),
LLM.toolMessage({ id: "call_1", name: "lookup", result: { forecast: "sunny" } }),
],
}),
)
expect(prepared.target).toEqual({
model: "claude-sonnet-4-5",
messages: [
{ role: "user", content: [{ type: "text", text: "What is the weather?" }] },
{ role: "assistant", content: [{ type: "tool_use", id: "call_1", name: "lookup", input: { query: "weather" } }] },
{ role: "user", content: [{ type: "tool_result", tool_use_id: "call_1", content: '{"forecast":"sunny"}' }] },
],
stream: true,
max_tokens: 4096,
})
}),
)
it.effect("parses text, reasoning, and usage stream fixtures", () =>
Effect.gen(function* () {
const body = sseEvents(
{ type: "message_start", message: { usage: { input_tokens: 5, cache_read_input_tokens: 1 } } },
{ type: "content_block_start", index: 0, content_block: { type: "text", text: "" } },
{ type: "content_block_delta", index: 0, delta: { type: "text_delta", text: "Hello" } },
{ type: "content_block_delta", index: 0, delta: { type: "text_delta", text: "!" } },
{ type: "content_block_stop", index: 0 },
{ type: "content_block_start", index: 1, content_block: { type: "thinking", thinking: "" } },
{ type: "content_block_delta", index: 1, delta: { type: "thinking_delta", thinking: "thinking" } },
{ type: "content_block_stop", index: 1 },
{ type: "message_delta", delta: { stop_reason: "end_turn" }, usage: { output_tokens: 2 } },
{ type: "message_stop" },
)
const response = yield* LLMClient.make({ adapters: [AnthropicMessages.adapter] })
.generate(request)
.pipe(Effect.provide(fixedResponse(body)))
expect(LLM.outputText(response)).toBe("Hello!")
expect(LLM.outputReasoning(response)).toBe("thinking")
expect(LLM.outputUsage(response)).toMatchObject({
inputTokens: 5,
outputTokens: 2,
cacheReadInputTokens: 1,
totalTokens: 7,
})
expect(response.events.at(-1)).toMatchObject({ type: "request-finish", reason: "stop" })
}),
)
it.effect("round-trips streamed thinking signatures", () =>
Effect.gen(function* () {
const body = sseEvents(
{ type: "message_start", message: { usage: { input_tokens: 5 } } },
{ type: "content_block_start", index: 0, content_block: { type: "thinking", thinking: "" } },
{ type: "content_block_delta", index: 0, delta: { type: "thinking_delta", thinking: "thinking" } },
{ type: "content_block_delta", index: 0, delta: { type: "signature_delta", signature: "sig_123" } },
{ type: "content_block_stop", index: 0 },
{ type: "message_delta", delta: { stop_reason: "tool_use" }, usage: { output_tokens: 1 } },
)
const response = yield* LLMClient.make({ adapters: [AnthropicMessages.adapter] })
.generate(request)
.pipe(Effect.provide(fixedResponse(body)))
expect(response.events).toContainEqual({ type: "reasoning-delta", text: "", encrypted: "sig_123" })
const prepared = yield* LLMClient.make({ adapters: [AnthropicMessages.adapter] }).prepare(
LLM.request({
id: "req_signed_thinking",
model,
messages: [LLM.assistant({ type: "reasoning", text: "thinking", encrypted: "sig_123" })],
}),
)
expect(prepared.target).toMatchObject({
messages: [{ role: "assistant", content: [{ type: "thinking", thinking: "thinking", signature: "sig_123" }] }],
})
}),
)
it.effect("assembles streamed tool call input", () =>
Effect.gen(function* () {
const body = sseEvents(
{ type: "message_start", message: { usage: { input_tokens: 5 } } },
{ type: "content_block_start", index: 0, content_block: { type: "tool_use", id: "call_1", name: "lookup" } },
{ type: "content_block_delta", index: 0, delta: { type: "input_json_delta", partial_json: '{"query"' } },
{ type: "content_block_delta", index: 0, delta: { type: "input_json_delta", partial_json: ':"weather"}' } },
{ type: "content_block_stop", index: 0 },
{ type: "message_delta", delta: { stop_reason: "tool_use" }, usage: { output_tokens: 1 } },
)
const response = yield* LLMClient.make({ adapters: [AnthropicMessages.adapter] })
.generate(
LLM.updateRequest(request, {
tools: [{ name: "lookup", description: "Lookup data", inputSchema: { type: "object" } }],
}),
)
.pipe(Effect.provide(fixedResponse(body)))
expect(LLM.outputToolCalls(response)).toEqual([{ type: "tool-call", id: "call_1", name: "lookup", input: { query: "weather" } }])
expect(response.events).toEqual([
{ type: "tool-input-delta", id: "call_1", name: "lookup", text: '{"query"' },
{ type: "tool-input-delta", id: "call_1", name: "lookup", text: ':"weather"}' },
{ type: "tool-call", id: "call_1", name: "lookup", input: { query: "weather" } },
{
type: "request-finish",
reason: "tool-calls",
usage: { inputTokens: 5, outputTokens: 1, totalTokens: 6, native: { input_tokens: 5, output_tokens: 1 } },
},
])
}),
)
it.effect("emits provider-error events for mid-stream provider errors", () =>
Effect.gen(function* () {
const response = yield* LLMClient.make({ adapters: [AnthropicMessages.adapter] })
.generate(request)
.pipe(
Effect.provide(
fixedResponse(sseEvents({ type: "error", error: { type: "overloaded_error", message: "Overloaded" } })),
),
)
expect(response.events).toEqual([{ type: "provider-error", message: "Overloaded" }])
}),
)
it.effect("fails HTTP provider errors before stream parsing", () =>
Effect.gen(function* () {
const error = yield* LLMClient.make({ adapters: [AnthropicMessages.adapter] })
.generate(request)
.pipe(
Effect.provide(
fixedResponse('{"type":"error","error":{"type":"invalid_request_error","message":"Bad request"}}', {
status: 400,
headers: { "content-type": "application/json" },
}),
),
Effect.flip,
)
expect(error).toBeInstanceOf(ProviderRequestError)
expect(error).toMatchObject({ status: 400 })
expect(error.message).toContain("HTTP 400")
}),
)
it.effect("decodes server_tool_use + web_search_tool_result as provider-executed events", () =>
Effect.gen(function* () {
const body = sseEvents(
{ type: "message_start", message: { usage: { input_tokens: 5 } } },
{ type: "content_block_start", index: 0, content_block: { type: "server_tool_use", id: "srvtoolu_abc", name: "web_search" } },
{ type: "content_block_delta", index: 0, delta: { type: "input_json_delta", partial_json: '{"query":"effect 4"}' } },
{ type: "content_block_stop", index: 0 },
{
type: "content_block_start",
index: 1,
content_block: {
type: "web_search_tool_result",
tool_use_id: "srvtoolu_abc",
content: [{ type: "web_search_result", url: "https://example.com", title: "Example" }],
},
},
{ type: "content_block_stop", index: 1 },
{ type: "content_block_start", index: 2, content_block: { type: "text", text: "" } },
{ type: "content_block_delta", index: 2, delta: { type: "text_delta", text: "Found it." } },
{ type: "content_block_stop", index: 2 },
{ type: "message_delta", delta: { stop_reason: "end_turn" }, usage: { output_tokens: 8 } },
)
const response = yield* LLMClient.make({ adapters: [AnthropicMessages.adapter] })
.generate(
LLM.updateRequest(request, {
tools: [{ name: "web_search", description: "Web search", inputSchema: { type: "object" } }],
}),
)
.pipe(Effect.provide(fixedResponse(body)))
const toolCall = response.events.find((event) => event.type === "tool-call")
expect(toolCall).toEqual({
type: "tool-call",
id: "srvtoolu_abc",
name: "web_search",
input: { query: "effect 4" },
providerExecuted: true,
})
const toolResult = response.events.find((event) => event.type === "tool-result")
expect(toolResult).toEqual({
type: "tool-result",
id: "srvtoolu_abc",
name: "web_search",
result: { type: "json", value: [{ type: "web_search_result", url: "https://example.com", title: "Example" }] },
providerExecuted: true,
})
expect(LLM.outputText(response)).toBe("Found it.")
expect(response.events.at(-1)).toMatchObject({ type: "request-finish", reason: "stop" })
}),
)
it.effect("decodes web_search_tool_result_error as provider-executed error result", () =>
Effect.gen(function* () {
const body = sseEvents(
{ type: "message_start", message: { usage: { input_tokens: 5 } } },
{ type: "content_block_start", index: 0, content_block: { type: "server_tool_use", id: "srvtoolu_x", name: "web_search" } },
{ type: "content_block_delta", index: 0, delta: { type: "input_json_delta", partial_json: '{"query":"q"}' } },
{ type: "content_block_stop", index: 0 },
{
type: "content_block_start",
index: 1,
content_block: {
type: "web_search_tool_result",
tool_use_id: "srvtoolu_x",
content: { type: "web_search_tool_result_error", error_code: "max_uses_exceeded" },
},
},
{ type: "content_block_stop", index: 1 },
{ type: "message_delta", delta: { stop_reason: "end_turn" }, usage: { output_tokens: 1 } },
)
const response = yield* LLMClient.make({ adapters: [AnthropicMessages.adapter] })
.generate(
LLM.updateRequest(request, {
tools: [{ name: "web_search", description: "Web search", inputSchema: { type: "object" } }],
}),
)
.pipe(Effect.provide(fixedResponse(body)))
const toolResult = response.events.find((event) => event.type === "tool-result")
expect(toolResult).toMatchObject({
type: "tool-result",
id: "srvtoolu_x",
name: "web_search",
result: { type: "error" },
providerExecuted: true,
})
}),
)
it.effect("round-trips provider-executed assistant content into server tool blocks", () =>
Effect.gen(function* () {
const prepared = yield* LLMClient.make({ adapters: [AnthropicMessages.adapter] }).prepare(
LLM.request({
id: "req_round_trip",
model,
messages: [
LLM.user("Search for something."),
LLM.assistant([
{
type: "tool-call",
id: "srvtoolu_abc",
name: "web_search",
input: { query: "effect 4" },
providerExecuted: true,
},
{
type: "tool-result",
id: "srvtoolu_abc",
name: "web_search",
result: { type: "json", value: [{ url: "https://example.com" }] },
providerExecuted: true,
},
{ type: "text", text: "Found it." },
]),
LLM.user("Thanks."),
],
}),
)
expect(prepared.target).toMatchObject({
messages: [
{ role: "user", content: [{ type: "text", text: "Search for something." }] },
{
role: "assistant",
content: [
{ type: "server_tool_use", id: "srvtoolu_abc", name: "web_search", input: { query: "effect 4" } },
{
type: "web_search_tool_result",
tool_use_id: "srvtoolu_abc",
content: [{ url: "https://example.com" }],
},
{ type: "text", text: "Found it." },
],
},
{ role: "user", content: [{ type: "text", text: "Thanks." }] },
],
})
}),
)
it.effect("rejects round-trip for unknown server tool names", () =>
Effect.gen(function* () {
const error = yield* LLMClient.make({ adapters: [AnthropicMessages.adapter] })
.prepare(
LLM.request({
id: "req_unknown_server_tool",
model,
messages: [
LLM.assistant([
{
type: "tool-result",
id: "srvtoolu_abc",
name: "future_server_tool",
result: { type: "json", value: {} },
providerExecuted: true,
},
]),
],
}),
)
.pipe(Effect.flip)
expect(error.message).toContain("future_server_tool")
}),
)
it.effect("rejects unsupported user media content", () =>
Effect.gen(function* () {
const error = yield* LLMClient.make({ adapters: [AnthropicMessages.adapter] })
.prepare(
LLM.request({
id: "req_media",
model,
messages: [LLM.user({ type: "media", mediaType: "image/png", data: "AAECAw==" })],
}),
)
.pipe(Effect.flip)
expect(error.message).toContain("Anthropic Messages user messages only support text content for now")
}),
)
})

View File

@@ -0,0 +1,533 @@
import { EventStreamCodec } from "@smithy/eventstream-codec"
import { fromUtf8, toUtf8 } from "@smithy/util-utf8"
import { describe, expect } from "bun:test"
import { Effect, Layer } from "effect"
import { CacheHint, LLM } from "../../src"
import { LLMClient } from "../../src/adapter"
import { BedrockConverse } from "../../src/provider/bedrock-converse"
import { testEffect } from "../lib/effect"
import { fixedResponse } from "../lib/http"
import { expectFinish, expectWeatherToolCall, weatherTool } from "../recorded-scenarios"
import { recordedTests } from "../recorded-test"
const codec = new EventStreamCodec(toUtf8, fromUtf8)
const utf8Encoder = new TextEncoder()
// Build a single AWS event-stream frame for a Converse stream event. Each
// frame carries `:message-type=event` + `:event-type=<name>` headers and a
// JSON payload body.
const eventFrame = (type: string, payload: object) =>
codec.encode({
headers: {
":message-type": { type: "string", value: "event" },
":event-type": { type: "string", value: type },
":content-type": { type: "string", value: "application/json" },
},
body: utf8Encoder.encode(JSON.stringify(payload)),
})
const concat = (frames: ReadonlyArray<Uint8Array>) => {
const total = frames.reduce((sum, frame) => sum + frame.length, 0)
const out = new Uint8Array(total)
let offset = 0
for (const frame of frames) {
out.set(frame, offset)
offset += frame.length
}
return out
}
const eventStreamBody = (...payloads: ReadonlyArray<readonly [string, object]>) =>
concat(payloads.map(([type, payload]) => eventFrame(type, payload)))
// Override the default SSE content-type with the binary event-stream type so
// the cassette layer treats the body as bytes when recording.
const fixedBytes = (bytes: Uint8Array) =>
fixedResponse(bytes, { headers: { "content-type": "application/vnd.amazon.eventstream" } })
const model = BedrockConverse.model({
id: "anthropic.claude-3-5-sonnet-20240620-v1:0",
baseURL: "https://bedrock-runtime.test",
apiKey: "test-bearer",
})
const baseRequest = LLM.request({
id: "req_1",
model,
system: "You are concise.",
prompt: "Say hello.",
generation: { maxTokens: 64, temperature: 0 },
})
const it = testEffect(Layer.empty)
describe("Bedrock Converse adapter", () => {
it.effect("prepares Converse target with system, inference config, and messages", () =>
Effect.gen(function* () {
const prepared = yield* LLMClient.make({ adapters: [BedrockConverse.adapter] }).prepare(baseRequest)
expect(prepared.target).toEqual({
modelId: "anthropic.claude-3-5-sonnet-20240620-v1:0",
system: [{ text: "You are concise." }],
messages: [{ role: "user", content: [{ text: "Say hello." }] }],
inferenceConfig: { maxTokens: 64, temperature: 0 },
})
}),
)
it.effect("prepares tool config with toolSpec and toolChoice", () =>
Effect.gen(function* () {
const prepared = yield* LLMClient.make({ adapters: [BedrockConverse.adapter] }).prepare(
LLM.updateRequest(baseRequest, {
tools: [
{
name: "lookup",
description: "Lookup data",
inputSchema: { type: "object", properties: { query: { type: "string" } }, required: ["query"] },
},
],
toolChoice: LLM.toolChoice({ type: "required" }),
}),
)
expect(prepared.target).toMatchObject({
toolConfig: {
tools: [
{
toolSpec: {
name: "lookup",
description: "Lookup data",
inputSchema: {
json: { type: "object", properties: { query: { type: "string" } }, required: ["query"] },
},
},
},
],
toolChoice: { any: {} },
},
})
}),
)
it.effect("lowers assistant tool-call + tool-result message history", () =>
Effect.gen(function* () {
const prepared = yield* LLMClient.make({ adapters: [BedrockConverse.adapter] }).prepare(
LLM.request({
id: "req_history",
model,
messages: [
LLM.user("What is the weather?"),
LLM.assistant([LLM.toolCall({ id: "tool_1", name: "lookup", input: { query: "weather" } })]),
LLM.toolMessage({ id: "tool_1", name: "lookup", result: { forecast: "sunny" } }),
],
}),
)
expect(prepared.target).toMatchObject({
messages: [
{ role: "user", content: [{ text: "What is the weather?" }] },
{
role: "assistant",
content: [{ toolUse: { toolUseId: "tool_1", name: "lookup", input: { query: "weather" } } }],
},
{
role: "user",
content: [
{
toolResult: {
toolUseId: "tool_1",
content: [{ json: { forecast: "sunny" } }],
status: "success",
},
},
],
},
],
})
}),
)
it.effect("decodes text-delta + messageStop + metadata usage from binary event stream", () =>
Effect.gen(function* () {
const body = eventStreamBody(
["messageStart", { role: "assistant" }],
["contentBlockDelta", { contentBlockIndex: 0, delta: { text: "Hello" } }],
["contentBlockDelta", { contentBlockIndex: 0, delta: { text: "!" } }],
["contentBlockStop", { contentBlockIndex: 0 }],
["messageStop", { stopReason: "end_turn" }],
["metadata", { usage: { inputTokens: 5, outputTokens: 2, totalTokens: 7 } }],
)
const response = yield* LLMClient.make({ adapters: [BedrockConverse.adapter] })
.generate(baseRequest)
.pipe(Effect.provide(fixedBytes(body)))
expect(LLM.outputText(response)).toBe("Hello!")
const finishes = response.events.filter((event) => event.type === "request-finish")
// Bedrock splits the finish across `messageStop` (carries reason) and
// `metadata` (carries usage). We consolidate them into a single
// terminal `request-finish` event with both.
expect(finishes).toHaveLength(1)
expect(finishes[0]).toMatchObject({ type: "request-finish", reason: "stop" })
expect(LLM.outputUsage(response)).toMatchObject({
inputTokens: 5,
outputTokens: 2,
totalTokens: 7,
})
}),
)
it.effect("assembles streamed tool call input", () =>
Effect.gen(function* () {
const body = eventStreamBody(
["messageStart", { role: "assistant" }],
[
"contentBlockStart",
{
contentBlockIndex: 0,
start: { toolUse: { toolUseId: "tool_1", name: "lookup" } },
},
],
["contentBlockDelta", { contentBlockIndex: 0, delta: { toolUse: { input: '{"query"' } } }],
["contentBlockDelta", { contentBlockIndex: 0, delta: { toolUse: { input: ':"weather"}' } } }],
["contentBlockStop", { contentBlockIndex: 0 }],
["messageStop", { stopReason: "tool_use" }],
)
const response = yield* LLMClient.make({ adapters: [BedrockConverse.adapter] })
.generate(
LLM.updateRequest(baseRequest, {
tools: [{ name: "lookup", description: "Lookup", inputSchema: { type: "object" } }],
}),
)
.pipe(Effect.provide(fixedBytes(body)))
expect(LLM.outputToolCalls(response)).toEqual([
{ type: "tool-call", id: "tool_1", name: "lookup", input: { query: "weather" } },
])
const events = response.events.filter((event) => event.type === "tool-input-delta")
expect(events).toEqual([
{ type: "tool-input-delta", id: "tool_1", name: "lookup", text: '{"query"' },
{ type: "tool-input-delta", id: "tool_1", name: "lookup", text: ':"weather"}' },
])
expect(response.events.at(-1)).toMatchObject({ type: "request-finish", reason: "tool-calls" })
}),
)
it.effect("decodes reasoning deltas", () =>
Effect.gen(function* () {
const body = eventStreamBody(
["messageStart", { role: "assistant" }],
[
"contentBlockDelta",
{ contentBlockIndex: 0, delta: { reasoningContent: { text: "Let me think." } } },
],
["contentBlockStop", { contentBlockIndex: 0 }],
["messageStop", { stopReason: "end_turn" }],
)
const response = yield* LLMClient.make({ adapters: [BedrockConverse.adapter] })
.generate(baseRequest)
.pipe(Effect.provide(fixedBytes(body)))
expect(LLM.outputReasoning(response)).toBe("Let me think.")
}),
)
it.effect("emits provider-error for throttlingException", () =>
Effect.gen(function* () {
const body = eventStreamBody(
["messageStart", { role: "assistant" }],
["throttlingException", { message: "Slow down" }],
)
const response = yield* LLMClient.make({ adapters: [BedrockConverse.adapter] })
.generate(baseRequest)
.pipe(Effect.provide(fixedBytes(body)))
expect(response.events.find((event) => event.type === "provider-error")).toEqual({
type: "provider-error",
message: "Slow down",
retryable: true,
})
}),
)
it.effect("rejects requests with no auth path", () =>
Effect.gen(function* () {
const unsignedModel = BedrockConverse.model({
id: "anthropic.claude-3-5-sonnet-20240620-v1:0",
baseURL: "https://bedrock-runtime.test",
})
const error = yield* LLMClient.make({ adapters: [BedrockConverse.adapter] })
.generate(LLM.updateRequest(baseRequest, { model: unsignedModel }))
.pipe(Effect.provide(fixedBytes(eventStreamBody(["messageStop", { stopReason: "end_turn" }]))), Effect.flip)
expect(error.message).toContain("Bedrock Converse requires either model.apiKey")
}),
)
it.effect("signs requests with SigV4 when AWS credentials are provided (deterministic plumbing check)", () =>
Effect.gen(function* () {
const signed = BedrockConverse.model({
id: "anthropic.claude-3-5-sonnet-20240620-v1:0",
baseURL: "https://bedrock-runtime.test",
credentials: {
region: "us-east-1",
accessKeyId: "AKIAIOSFODNN7EXAMPLE",
secretAccessKey: "wJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY",
},
})
const prepared = yield* LLMClient.make({ adapters: [BedrockConverse.adapter] }).prepare(
LLM.updateRequest(baseRequest, { model: signed }),
)
expect(prepared.adapter).toBe("bedrock-converse")
// The prepare phase doesn't sign — toHttp does. We assert the credential
// is plumbed onto the model native field for the signer to find.
expect(prepared.model.native).toMatchObject({
aws_credentials: { region: "us-east-1", accessKeyId: "AKIAIOSFODNN7EXAMPLE" },
aws_region: "us-east-1",
})
}),
)
it.effect("emits cachePoint markers after system, user-text, and assistant-text with cache hints", () =>
Effect.gen(function* () {
const cache = new CacheHint({ type: "ephemeral" })
const prepared = yield* LLMClient.make({ adapters: [BedrockConverse.adapter] }).prepare(
LLM.request({
id: "req_cache",
model,
system: [{ type: "text", text: "System prefix.", cache }],
messages: [
LLM.user([{ type: "text", text: "User prefix.", cache }]),
LLM.assistant([{ type: "text", text: "Assistant prefix.", cache }]),
],
generation: { maxTokens: 16, temperature: 0 },
}),
)
expect(prepared.target).toMatchObject({
// System: text block followed by cachePoint marker.
system: [{ text: "System prefix." }, { cachePoint: { type: "default" } }],
messages: [
{
role: "user",
content: [{ text: "User prefix." }, { cachePoint: { type: "default" } }],
},
{
role: "assistant",
content: [{ text: "Assistant prefix." }, { cachePoint: { type: "default" } }],
},
],
})
}),
)
it.effect("does not emit cachePoint when no cache hint is set", () =>
Effect.gen(function* () {
const prepared = yield* LLMClient.make({ adapters: [BedrockConverse.adapter] }).prepare(baseRequest)
expect(prepared.target).toMatchObject({
system: [{ text: "You are concise." }],
messages: [{ role: "user", content: [{ text: "Say hello." }] }],
})
}),
)
it.effect("lowers image media into Bedrock image blocks", () =>
Effect.gen(function* () {
const prepared = yield* LLMClient.make({ adapters: [BedrockConverse.adapter] }).prepare(
LLM.request({
id: "req_image",
model,
messages: [
LLM.user([
{ type: "text", text: "What is in this image?" },
{ type: "media", mediaType: "image/png", data: "AAAA" },
{ type: "media", mediaType: "image/jpeg", data: "BBBB" },
{ type: "media", mediaType: "image/jpg", data: "CCCC" },
{ type: "media", mediaType: "image/webp", data: "DDDD" },
]),
],
}),
)
expect(prepared.target).toMatchObject({
messages: [
{
role: "user",
content: [
{ text: "What is in this image?" },
{ image: { format: "png", source: { bytes: "AAAA" } } },
{ image: { format: "jpeg", source: { bytes: "BBBB" } } },
// image/jpg is a non-standard alias; we map it to jpeg.
{ image: { format: "jpeg", source: { bytes: "CCCC" } } },
{ image: { format: "webp", source: { bytes: "DDDD" } } },
],
},
],
})
}),
)
it.effect("base64-encodes Uint8Array image bytes", () =>
Effect.gen(function* () {
const prepared = yield* LLMClient.make({ adapters: [BedrockConverse.adapter] }).prepare(
LLM.request({
id: "req_image_bytes",
model,
messages: [
LLM.user([
{ type: "media", mediaType: "image/png", data: new Uint8Array([1, 2, 3, 4, 5]) },
]),
],
}),
)
// Buffer.from([1,2,3,4,5]).toString("base64") === "AQIDBAU="
expect(prepared.target).toMatchObject({
messages: [
{
role: "user",
content: [{ image: { format: "png", source: { bytes: "AQIDBAU=" } } }],
},
],
})
}),
)
it.effect("lowers document media into Bedrock document blocks with format and name", () =>
Effect.gen(function* () {
const prepared = yield* LLMClient.make({ adapters: [BedrockConverse.adapter] }).prepare(
LLM.request({
id: "req_doc",
model,
messages: [
LLM.user([
{ type: "media", mediaType: "application/pdf", data: "PDFDATA", filename: "report.pdf" },
{ type: "media", mediaType: "text/csv", data: "CSVDATA" },
]),
],
}),
)
expect(prepared.target).toMatchObject({
messages: [
{
role: "user",
content: [
// Filename round-trips when supplied.
{ document: { format: "pdf", name: "report.pdf", source: { bytes: "PDFDATA" } } },
// Falls back to a stable placeholder when filename is missing.
{ document: { format: "csv", name: "document.csv", source: { bytes: "CSVDATA" } } },
],
},
],
})
}),
)
it.effect("rejects unsupported image media types", () =>
Effect.gen(function* () {
const error = yield* LLMClient.make({ adapters: [BedrockConverse.adapter] })
.prepare(
LLM.request({
id: "req_bad_image",
model,
messages: [LLM.user([{ type: "media", mediaType: "image/svg+xml", data: "x" }])],
}),
)
.pipe(Effect.flip)
expect(error.message).toContain("Bedrock Converse does not support image media type image/svg+xml")
}),
)
it.effect("rejects unsupported document media types", () =>
Effect.gen(function* () {
const error = yield* LLMClient.make({ adapters: [BedrockConverse.adapter] })
.prepare(
LLM.request({
id: "req_bad_doc",
model,
messages: [
LLM.user([{ type: "media", mediaType: "application/x-tar", data: "x", filename: "a.tar" }]),
],
}),
)
.pipe(Effect.flip)
expect(error.message).toContain("Bedrock Converse does not support document media type application/x-tar")
}),
)
})
// Live recorded integration tests. Run with `RECORD=true AWS_ACCESS_KEY_ID=...
// AWS_SECRET_ACCESS_KEY=... [AWS_SESSION_TOKEN=...] bun run test ...` to refresh
// cassettes; replay is the default and works without credentials.
//
// Region is pinned to us-east-1 in tests so the request URL is stable across
// machines on replay. If you need to record from a different region (e.g. your
// account has access elsewhere), pass `BEDROCK_RECORDING_REGION=eu-west-1` —
// but then commit the resulting cassette and others should record from the
// same region too.
const RECORDING_REGION = process.env.BEDROCK_RECORDING_REGION ?? "us-east-1"
const recordedModel = () =>
BedrockConverse.model({
// Most newer Anthropic models on Bedrock require a cross-region inference
// profile (`us.` prefix). Nova does not require an Anthropic use-case form
// and is on-demand-throughput accessible by default for most accounts.
id: process.env.BEDROCK_MODEL_ID ?? "us.amazon.nova-micro-v1:0",
credentials: {
region: RECORDING_REGION,
accessKeyId: process.env.AWS_ACCESS_KEY_ID ?? "fixture",
secretAccessKey: process.env.AWS_SECRET_ACCESS_KEY ?? "fixture",
sessionToken: process.env.AWS_SESSION_TOKEN,
},
})
const recorded = recordedTests({
prefix: "bedrock-converse",
provider: "amazon-bedrock",
protocol: "bedrock-converse",
requires: ["AWS_ACCESS_KEY_ID", "AWS_SECRET_ACCESS_KEY"],
})
describe("Bedrock Converse recorded", () => {
recorded.effect("streams text", () =>
Effect.gen(function* () {
const llm = LLMClient.make({ adapters: [BedrockConverse.adapter] })
const response = yield* llm.generate(
LLM.request({
id: "recorded_bedrock_text",
model: recordedModel(),
system: "Reply with the single word 'Hello'.",
prompt: "Say hello.",
generation: { maxTokens: 16, temperature: 0 },
}),
)
expect(LLM.outputText(response)).toMatch(/hello/i)
expect(response.events.at(-1)).toMatchObject({ type: "request-finish" })
}),
)
recorded.effect.with("streams a tool call", { tags: ["tool"] }, () =>
Effect.gen(function* () {
const llm = LLMClient.make({ adapters: [BedrockConverse.adapter] })
const response = yield* llm.generate(
LLM.request({
id: "recorded_bedrock_tool_call",
model: recordedModel(),
system: "Call tools exactly as requested.",
prompt: "Call get_weather with city exactly Paris.",
tools: [weatherTool],
toolChoice: LLM.toolChoice(weatherTool),
generation: { maxTokens: 80, temperature: 0 },
}),
)
expect(response.events.some((event) => event.type === "tool-input-delta")).toBe(true)
expectWeatherToolCall(response)
expectFinish(response.events, "tool-calls")
}),
)
})

View File

@@ -0,0 +1,44 @@
import { describe, expect } from "bun:test"
import { Effect } from "effect"
import { LLM } from "../../src"
import { LLMClient } from "../../src/adapter"
import { Gemini } from "../../src/provider/gemini"
import { expectFinish, expectWeatherToolCall, textRequest, weatherToolRequest } from "../recorded-scenarios"
import { recordedTests } from "../recorded-test"
const model = Gemini.model({
id: "gemini-2.5-flash",
apiKey: process.env.GOOGLE_GENERATIVE_AI_API_KEY ?? "fixture",
})
const request = textRequest({ id: "recorded_gemini_text", model, maxTokens: 80 })
const toolRequest = weatherToolRequest({ id: "recorded_gemini_tool_call", model })
const recorded = recordedTests({
prefix: "gemini",
provider: "google",
protocol: "gemini",
requires: ["GOOGLE_GENERATIVE_AI_API_KEY"],
})
const gemini = LLMClient.make({ adapters: [Gemini.adapter] })
describe("Gemini recorded", () => {
recorded.effect("streams text", () =>
Effect.gen(function* () {
const response = yield* gemini.generate(request)
expect(LLM.outputText(response)).toMatch(/^Hello!?$/)
expect(response.usage?.totalTokens).toBeGreaterThan(0)
expectFinish(response.events, "stop")
}),
)
recorded.effect.with("streams tool call", { tags: ["tool"] }, () =>
Effect.gen(function* () {
const response = yield* gemini.generate(toolRequest)
expectWeatherToolCall(response)
expectFinish(response.events, "tool-calls")
}),
)
})

View File

@@ -0,0 +1,414 @@
import { describe, expect } from "bun:test"
import { Effect, Layer } from "effect"
import { LLM, ProviderChunkError } from "../../src"
import { LLMClient } from "../../src/adapter"
import { Gemini } from "../../src/provider/gemini"
import { testEffect } from "../lib/effect"
import { fixedResponse } from "../lib/http"
import { sseEvents, sseRaw } from "../lib/sse"
const model = Gemini.model({
id: "gemini-2.5-flash",
baseURL: "https://generativelanguage.test/v1beta/",
headers: { "x-goog-api-key": "test" },
})
const request = LLM.request({
id: "req_1",
model,
system: "You are concise.",
prompt: "Say hello.",
generation: { maxTokens: 20, temperature: 0 },
})
const it = testEffect(Layer.empty)
describe("Gemini adapter", () => {
it.effect("prepares Gemini target", () =>
Effect.gen(function* () {
const prepared = yield* LLMClient.make({ adapters: [Gemini.adapter] }).prepare(request)
expect(prepared.target).toEqual({
contents: [{ role: "user", parts: [{ text: "Say hello." }] }],
systemInstruction: { parts: [{ text: "You are concise." }] },
generationConfig: { maxOutputTokens: 20, temperature: 0 },
})
}),
)
it.effect("prepares multimodal user input and tool history", () =>
Effect.gen(function* () {
const prepared = yield* LLMClient.make({ adapters: [Gemini.adapter] }).prepare(
LLM.request({
id: "req_tool_result",
model,
tools: [{
name: "lookup",
description: "Lookup data",
inputSchema: { type: "object", properties: { query: { type: "string" } } },
}],
toolChoice: { type: "tool", name: "lookup" },
messages: [
LLM.user([
{ type: "text", text: "What is in this image?" },
{ type: "media", mediaType: "image/png", data: "AAECAw==" },
]),
LLM.assistant([LLM.toolCall({ id: "call_1", name: "lookup", input: { query: "weather" } })]),
LLM.toolMessage({ id: "call_1", name: "lookup", result: { forecast: "sunny" } }),
],
}),
)
expect(prepared.target).toEqual({
contents: [
{
role: "user",
parts: [
{ text: "What is in this image?" },
{ inlineData: { mimeType: "image/png", data: "AAECAw==" } },
],
},
{
role: "model",
parts: [{ functionCall: { id: "call_1", name: "lookup", args: { query: "weather" } } }],
},
{
role: "user",
parts: [{ functionResponse: { id: "call_1", name: "lookup", response: { name: "lookup", content: '{"forecast":"sunny"}' } } }],
},
],
tools: [{
functionDeclarations: [{
name: "lookup",
description: "Lookup data",
parameters: { type: "object", properties: { query: { type: "string" } } },
}],
}],
toolConfig: { functionCallingConfig: { mode: "ANY", allowedFunctionNames: ["lookup"] } },
})
}),
)
it.effect("round-trips thought signatures on text, reasoning, and tool calls", () =>
Effect.gen(function* () {
const body = sseEvents({
candidates: [{
content: {
role: "model",
parts: [
{ text: "visible", thoughtSignature: "text_sig" },
{ text: "thinking", thought: true, thoughtSignature: "reasoning_sig" },
{ functionCall: { id: "gemini_call_1", name: "lookup", args: { query: "weather" } }, thoughtSignature: "tool_sig" },
],
},
finishReason: "STOP",
}],
})
const response = yield* LLMClient.make({ adapters: [Gemini.adapter] })
.generate(
LLM.updateRequest(request, {
tools: [{ name: "lookup", description: "Lookup data", inputSchema: { type: "object" } }],
}),
)
.pipe(Effect.provide(fixedResponse(body)))
expect(response.events).toContainEqual({
type: "text-delta",
text: "visible",
metadata: { google: { thoughtSignature: "text_sig" } },
})
expect(response.events).toContainEqual({
type: "reasoning-delta",
text: "thinking",
metadata: { google: { thoughtSignature: "reasoning_sig" } },
})
expect(response.events).toContainEqual({
type: "tool-call",
id: "gemini_call_1",
name: "lookup",
input: { query: "weather" },
metadata: { google: { thoughtSignature: "tool_sig", functionCallId: "gemini_call_1" } },
})
const prepared = yield* LLMClient.make({ adapters: [Gemini.adapter] }).prepare(
LLM.request({
id: "req_thought_signatures",
model,
messages: [
LLM.assistant([
{ type: "text", text: "visible", metadata: { google: { thoughtSignature: "text_sig" } } },
{ type: "reasoning", text: "thinking", metadata: { google: { thoughtSignature: "reasoning_sig" } } },
LLM.toolCall({ id: "call_1", name: "lookup", input: { query: "weather" }, metadata: { google: { thoughtSignature: "tool_sig" } } }),
]),
],
}),
)
expect(prepared.target).toMatchObject({
contents: [{
role: "model",
parts: [
{ text: "visible", thoughtSignature: "text_sig" },
{ text: "thinking", thought: true, thoughtSignature: "reasoning_sig" },
{ functionCall: { id: "call_1", name: "lookup", args: { query: "weather" } }, thoughtSignature: "tool_sig" },
],
}],
})
}),
)
it.effect("omits tools when tool choice is none", () =>
Effect.gen(function* () {
const prepared = yield* LLMClient.make({ adapters: [Gemini.adapter] }).prepare(
LLM.request({
id: "req_no_tools",
model,
prompt: "Say hello.",
tools: [{ name: "lookup", description: "Lookup data", inputSchema: { type: "object" } }],
toolChoice: { type: "none" },
}),
)
expect(prepared.target).toEqual({
contents: [{ role: "user", parts: [{ text: "Say hello." }] }],
})
}),
)
it.effect("sanitizes integer enums, dangling required, untyped arrays, and scalar object keys", () =>
Effect.gen(function* () {
const prepared = yield* LLMClient.make({ adapters: [Gemini.adapter] }).prepare(
LLM.request({
id: "req_schema_patch",
model,
prompt: "Use the tool.",
tools: [{
name: "lookup",
description: "Lookup data",
inputSchema: {
type: "object",
required: ["status", "missing"],
properties: {
status: { type: "integer", enum: [1, 2] },
tags: { type: "array" },
name: { type: "string", properties: { ignored: { type: "string" } }, required: ["ignored"] },
},
},
}],
}),
)
expect(prepared.target).toMatchObject({
tools: [{
functionDeclarations: [{
parameters: {
type: "object",
required: ["status"],
properties: {
status: { type: "string", enum: ["1", "2"] },
tags: { type: "array", items: { type: "string" } },
name: { type: "string" },
},
},
}],
}],
})
}),
)
it.effect("parses text, reasoning, and usage stream fixtures", () =>
Effect.gen(function* () {
const body = sseEvents(
{
candidates: [{
content: { role: "model", parts: [{ text: "thinking", thought: true }] },
}],
},
{
candidates: [{
content: { role: "model", parts: [{ text: "Hello" }] },
}],
},
{
candidates: [{
content: { role: "model", parts: [{ text: "!" }] },
finishReason: "STOP",
}],
},
{
usageMetadata: {
promptTokenCount: 5,
candidatesTokenCount: 2,
totalTokenCount: 7,
thoughtsTokenCount: 1,
cachedContentTokenCount: 1,
},
},
)
const response = yield* LLMClient.make({ adapters: [Gemini.adapter] })
.generate(request)
.pipe(Effect.provide(fixedResponse(body)))
expect(LLM.outputText(response)).toBe("Hello!")
expect(LLM.outputReasoning(response)).toBe("thinking")
expect(LLM.outputUsage(response)).toMatchObject({
inputTokens: 5,
outputTokens: 2,
reasoningTokens: 1,
cacheReadInputTokens: 1,
totalTokens: 7,
})
expect(response.events).toEqual([
{ type: "reasoning-delta", text: "thinking" },
{ type: "text-delta", text: "Hello" },
{ type: "text-delta", text: "!" },
{
type: "request-finish",
reason: "stop",
usage: {
inputTokens: 5,
outputTokens: 2,
reasoningTokens: 1,
cacheReadInputTokens: 1,
totalTokens: 7,
native: {
promptTokenCount: 5,
candidatesTokenCount: 2,
totalTokenCount: 7,
thoughtsTokenCount: 1,
cachedContentTokenCount: 1,
},
},
},
])
}),
)
it.effect("emits streamed tool calls and maps finish reason", () =>
Effect.gen(function* () {
const body = sseEvents(
{
candidates: [{
content: {
role: "model",
parts: [{ functionCall: { name: "lookup", args: { query: "weather" } } }],
},
finishReason: "STOP",
}],
usageMetadata: { promptTokenCount: 5, candidatesTokenCount: 1 },
},
)
const response = yield* LLMClient.make({ adapters: [Gemini.adapter] })
.generate(
LLM.updateRequest(request, {
tools: [{ name: "lookup", description: "Lookup data", inputSchema: { type: "object" } }],
}),
)
.pipe(Effect.provide(fixedResponse(body)))
expect(LLM.outputToolCalls(response)).toEqual([{ type: "tool-call", id: "tool_0", name: "lookup", input: { query: "weather" } }])
expect(response.events).toEqual([
{ type: "tool-call", id: "tool_0", name: "lookup", input: { query: "weather" } },
{
type: "request-finish",
reason: "tool-calls",
usage: { inputTokens: 5, outputTokens: 1, totalTokens: 6, native: { promptTokenCount: 5, candidatesTokenCount: 1 } },
},
])
}),
)
it.effect("assigns unique ids to multiple streamed tool calls", () =>
Effect.gen(function* () {
const body = sseEvents(
{
candidates: [{
content: {
role: "model",
parts: [
{ functionCall: { name: "lookup", args: { query: "weather" } } },
{ functionCall: { name: "lookup", args: { query: "news" } } },
],
},
finishReason: "STOP",
}],
},
)
const response = yield* LLMClient.make({ adapters: [Gemini.adapter] })
.generate(
LLM.updateRequest(request, {
tools: [{ name: "lookup", description: "Lookup data", inputSchema: { type: "object" } }],
}),
)
.pipe(Effect.provide(fixedResponse(body)))
expect(LLM.outputToolCalls(response)).toEqual([
{ type: "tool-call", id: "tool_0", name: "lookup", input: { query: "weather" } },
{ type: "tool-call", id: "tool_1", name: "lookup", input: { query: "news" } },
])
expect(response.events.at(-1)).toMatchObject({ type: "request-finish", reason: "tool-calls" })
}),
)
it.effect("maps length and content-filter finish reasons", () =>
Effect.gen(function* () {
const length = yield* LLMClient.make({ adapters: [Gemini.adapter] })
.generate(request)
.pipe(
Effect.provide(
fixedResponse(sseEvents({ candidates: [{ content: { role: "model", parts: [] }, finishReason: "MAX_TOKENS" }] })),
),
)
const filtered = yield* LLMClient.make({ adapters: [Gemini.adapter] })
.generate(request)
.pipe(
Effect.provide(
fixedResponse(sseEvents({ candidates: [{ content: { role: "model", parts: [] }, finishReason: "SAFETY" }] })),
),
)
expect(length.events).toEqual([{ type: "request-finish", reason: "length" }])
expect(filtered.events).toEqual([{ type: "request-finish", reason: "content-filter" }])
}),
)
it.effect("leaves total usage undefined when component counts are missing", () =>
Effect.gen(function* () {
const response = yield* LLMClient.make({ adapters: [Gemini.adapter] })
.generate(request)
.pipe(Effect.provide(fixedResponse(sseEvents({ usageMetadata: { thoughtsTokenCount: 1 } }))))
expect(response.usage).toMatchObject({ reasoningTokens: 1 })
expect(response.usage?.totalTokens).toBeUndefined()
}),
)
it.effect("fails invalid stream chunks", () =>
Effect.gen(function* () {
const error = yield* LLMClient.make({ adapters: [Gemini.adapter] })
.generate(request)
.pipe(
Effect.provide(fixedResponse(sseRaw("data: {not json}"))),
Effect.flip,
)
expect(error).toBeInstanceOf(ProviderChunkError)
expect(error.message).toContain("Invalid Gemini stream chunk")
}),
)
it.effect("rejects unsupported assistant media content", () =>
Effect.gen(function* () {
const error = yield* LLMClient.make({ adapters: [Gemini.adapter] })
.prepare(
LLM.request({
id: "req_media",
model,
messages: [LLM.assistant({ type: "media", mediaType: "image/png", data: "AAECAw==" })],
}),
)
.pipe(Effect.flip)
expect(error.message).toContain("Gemini assistant messages only support text, reasoning, and tool-call content for now")
}),
)
})

Some files were not shown because too many files have changed in this diff Show More