360 lines
13 KiB
Python
360 lines
13 KiB
Python
"""Claude Code backend adapter.
|
|
|
|
One :class:`ClaudeCodeBackendAdapter` per :class:`ClaudeAgent`. The
|
|
underlying :class:`claude_code_api.ClaudeCodeBackend` bakes ``cwd`` /
|
|
``model`` / ``system_prompt`` / MCP wiring into a single
|
|
:class:`~claude_code_api.BackendOptions` at construction time, so a
|
|
single backend instance is conceptually bound to one agent (different
|
|
agents would mean different cwds / system prompts / exposed MCPs and
|
|
thus different live-session pools).
|
|
|
|
Per :meth:`complete` we:
|
|
|
|
* hand the full Anthropic-style ``messages`` list to
|
|
``ClaudeCodeBackend.complete`` — it does its own fingerprint-based
|
|
session lookup, so we never need to track sessions ourselves;
|
|
* re-emit each ``AssistantMessage`` as ``content_block_start`` +
|
|
one delta + ``content_block_stop`` per content block, with
|
|
monotonically increasing indices spanning the entire turn (one
|
|
``message_start`` … ``message_stop`` envelope per ``complete`` call);
|
|
* close the envelope on the synthesized ``ResultMessage``.
|
|
|
|
The per-request ``system`` parameter is intentionally **ignored** —
|
|
``BackendOptions.system_prompt`` is fixed at session-spawn time, and the
|
|
agent's ``system_prompt`` is the canonical identity of the agent.
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
|
|
import json
|
|
import uuid
|
|
from dataclasses import dataclass, field
|
|
from typing import TYPE_CHECKING, Any, Self
|
|
|
|
from claude_code_api import (
|
|
AssistantMessage,
|
|
BackendOptions,
|
|
ClaudeCodeBackend,
|
|
ResultMessage,
|
|
TextBlock,
|
|
ThinkingBlock,
|
|
ToolUseBlock,
|
|
synthesize_turn_messages,
|
|
)
|
|
|
|
from beaver_gateway.agents.claude import ClaudeAgent
|
|
from beaver_gateway.core.events import (
|
|
StopReason,
|
|
build_content_block_stop,
|
|
build_input_json_delta,
|
|
build_message_delta,
|
|
build_message_start,
|
|
build_message_stop,
|
|
build_signature_delta,
|
|
build_text_block_start,
|
|
build_text_delta,
|
|
build_thinking_block_start,
|
|
build_thinking_delta,
|
|
build_tool_use_block_start,
|
|
)
|
|
|
|
if TYPE_CHECKING:
|
|
from collections.abc import AsyncIterator, Iterable, Mapping
|
|
|
|
from anthropic.types import MessageParam
|
|
|
|
from beaver_gateway.agents.base import BaseAgent
|
|
from beaver_gateway.core.events import MessageStreamEvent
|
|
|
|
|
|
__all__ = ["ClaudeCodeBackendAdapter", "TurnCapture"]
|
|
|
|
|
|
@dataclass
|
|
class TurnCapture:
|
|
"""Side-channel sink for per-turn metadata.
|
|
|
|
Pass an instance via ``ClaudeCodeBackendAdapter.complete(capture=...)``.
|
|
After the stream finishes, :attr:`synthesized_messages` holds the
|
|
full assistant↔tool-result cycle (from
|
|
:func:`claude_code_api.synthesize_turn_messages`) — i.e. the exact
|
|
list of canonical Anthropic-shape messages claude-code-api stashed
|
|
the live session under. The markdown frontend uses this to write the
|
|
conversation history to its DB so a subsequent turn's prefix
|
|
fingerprint hits the same session.
|
|
|
|
Other backends (anthropic, raycast) ignore the kwarg — it lands in
|
|
their ``**options`` and is silently dropped.
|
|
"""
|
|
|
|
synthesized_messages: list[dict[str, Any]] = field(default_factory=list)
|
|
|
|
|
|
_CLAUDE_TO_ANTHROPIC_STOP: dict[str, StopReason] = {
|
|
"end_turn": "end_turn",
|
|
"tool_use": "tool_use",
|
|
"max_tokens": "max_tokens",
|
|
"stop_sequence": "stop_sequence",
|
|
"refusal": "refusal",
|
|
}
|
|
|
|
|
|
def _map_stop_reason(raw: str | None) -> StopReason:
|
|
"""Map claude-code's stop reason into Anthropic's vocabulary.
|
|
|
|
Unknown / missing values collapse to ``end_turn`` so the client sees
|
|
a clean finish rather than a wire-format error.
|
|
"""
|
|
if raw is None:
|
|
return "end_turn"
|
|
return _CLAUDE_TO_ANTHROPIC_STOP.get(raw, "end_turn")
|
|
|
|
|
|
def _build_mcp_servers(
|
|
agent: ClaudeAgent, mcp_internal_urls: Mapping[str, str]
|
|
) -> dict[str, dict[str, Any]] | None:
|
|
"""Render ``agent.expose_mcps`` into ``BackendOptions.mcp_servers``.
|
|
|
|
Each exposed MCP is a streamable-HTTP pointer at the gateway's
|
|
internal aggregator (built by :mod:`beaver_gateway.mcp.internal_app`).
|
|
``None`` keeps claude-code from materializing an ``--mcp-config``
|
|
file when the agent exposes nothing.
|
|
"""
|
|
if not agent.expose_mcps:
|
|
return None
|
|
servers: dict[str, dict[str, Any]] = {}
|
|
for em in agent.expose_mcps:
|
|
url = mcp_internal_urls.get(em.name)
|
|
if url is None:
|
|
msg = (
|
|
f"agent {agent.name!r} exposes MCP {em.name!r} "
|
|
"but no internal URL is registered for it"
|
|
)
|
|
raise ValueError(msg)
|
|
servers[em.name] = {"type": "http", "url": url}
|
|
return servers
|
|
|
|
|
|
def _build_backend_options(
|
|
agent: ClaudeAgent, mcp_internal_urls: Mapping[str, str]
|
|
) -> BackendOptions:
|
|
"""Compose the per-agent :class:`BackendOptions`.
|
|
|
|
Agent-primary fields:
|
|
|
|
* ``cwd`` / ``model`` come from the agent directly;
|
|
* ``system_prompt`` carries :attr:`BaseAgent.system_prompt`
|
|
verbatim — i.e. wire-level ``--system-prompt`` (~8.6k tokens
|
|
lighter than ``--append-system-prompt`` because claude-code's
|
|
persona/planning conventions and dynamic sections drop out;
|
|
tool schemas survive via the API ``tools=[]`` channel);
|
|
* ``append_system_prompt`` carries
|
|
:attr:`ClaudeCodeOptions.append_system_prompt`, normally
|
|
``None``. Setting it re-attaches claude-code's built-in prompt
|
|
*and* this delta — opt-in for "claude as a real coding session";
|
|
* ``allowed_tools`` follows the PLAN: when the user lists native
|
|
tools we restrict to those *plus* a per-MCP wildcard so MCP tools
|
|
stay reachable; when no native list is declared we leave
|
|
``allowed_tools`` empty (= all tools allowed by claude-code's
|
|
default);
|
|
* ``mcp_servers`` comes from :func:`_build_mcp_servers`.
|
|
|
|
Every other tunable knob is passed through from
|
|
:attr:`ClaudeAgent.options`. Our default overrides
|
|
(``wait_for_turn_duration=True``,
|
|
``dangerously_skip_permissions=True``) live on
|
|
:class:`ClaudeCodeOptions`, not here, so a user who builds
|
|
``ClaudeCodeOptions(...)`` explicitly inherits the same defaults
|
|
instead of getting whatever claude-code-api ships.
|
|
"""
|
|
allowed_tools: tuple[str, ...] = ()
|
|
if agent.available_native_tools:
|
|
mcp_wildcards = tuple(f"mcp__{em.name}" for em in agent.expose_mcps)
|
|
allowed_tools = tuple(agent.available_native_tools) + mcp_wildcards
|
|
|
|
opt = agent.options
|
|
return BackendOptions(
|
|
cwd=agent.cwd,
|
|
model=agent.model or None,
|
|
system_prompt=agent.system_prompt,
|
|
append_system_prompt=opt.append_system_prompt,
|
|
allowed_tools=allowed_tools,
|
|
mcp_servers=_build_mcp_servers(agent, mcp_internal_urls),
|
|
disallowed_tools=opt.disallowed_tools,
|
|
permission_mode=opt.permission_mode,
|
|
dangerously_skip_permissions=opt.dangerously_skip_permissions,
|
|
effort=opt.effort,
|
|
add_dir=opt.add_dir,
|
|
settings=opt.settings,
|
|
extra_args=opt.extra_args,
|
|
extra_env=opt.extra_env,
|
|
preserve_provider_env=opt.preserve_provider_env,
|
|
history_injection_mode=opt.history_injection_mode,
|
|
wait_for_turn_duration=opt.wait_for_turn_duration,
|
|
include_meta_user=opt.include_meta_user,
|
|
startup_delay=opt.startup_delay,
|
|
file_wait_timeout=opt.file_wait_timeout,
|
|
turn_duration_timeout=opt.turn_duration_timeout,
|
|
)
|
|
|
|
|
|
class ClaudeCodeBackendAdapter:
|
|
"""One ``claude-code-api`` backend bound to a single :class:`ClaudeAgent`.
|
|
|
|
Owns the underlying :class:`ClaudeCodeBackend`'s lifecycle through
|
|
the async-context-manager protocol so :mod:`beaver_gateway.cli` can
|
|
park it in its ``AsyncExitStack``.
|
|
"""
|
|
|
|
def __init__(
|
|
self, *, agent: ClaudeAgent, mcp_internal_urls: Mapping[str, str]
|
|
) -> None:
|
|
self._agent = agent
|
|
self._backend = ClaudeCodeBackend(
|
|
_build_backend_options(agent, mcp_internal_urls)
|
|
)
|
|
|
|
@property
|
|
def agent(self) -> ClaudeAgent:
|
|
return self._agent
|
|
|
|
@property
|
|
def live_session_count(self) -> int:
|
|
return self._backend.live_session_count
|
|
|
|
async def __aenter__(self) -> Self:
|
|
await self._backend.__aenter__()
|
|
return self
|
|
|
|
async def __aexit__(self, exc_type: object, exc: object, tb: object) -> None:
|
|
await self._backend.__aexit__(exc_type, exc, tb)
|
|
|
|
async def aclose(self) -> None:
|
|
await self._backend.aclose()
|
|
|
|
async def complete(
|
|
self,
|
|
*,
|
|
agent: BaseAgent,
|
|
messages: Iterable[MessageParam],
|
|
system: str | None = None, # noqa: ARG002 — see module docstring
|
|
capture: TurnCapture | None = None,
|
|
**options: Any, # noqa: ARG002 — no per-request knobs for claude-code yet
|
|
) -> AsyncIterator[MessageStreamEvent]:
|
|
if not isinstance(agent, ClaudeAgent):
|
|
msg = (
|
|
"ClaudeCodeBackendAdapter requires ClaudeAgent, "
|
|
f"got {type(agent).__name__}"
|
|
)
|
|
raise TypeError(msg)
|
|
if agent.name != self._agent.name:
|
|
# Adapter is per-agent; routing a different agent through it
|
|
# would mean a different cwd / system_prompt / MCP set than
|
|
# the live-session pool was spawned with.
|
|
msg = (
|
|
f"ClaudeCodeBackendAdapter bound to {self._agent.name!r} "
|
|
f"got request for {agent.name!r}"
|
|
)
|
|
raise ValueError(msg)
|
|
|
|
message_id = f"msg_{uuid.uuid4().hex}"
|
|
yield build_message_start(message_id=message_id, model=agent.model)
|
|
|
|
next_index = 0
|
|
stop_reason: str | None = None
|
|
usage: Mapping[str, Any] | None = None
|
|
# We keep raw events so we can hand them to
|
|
# ``synthesize_turn_messages`` after the stream closes — the
|
|
# markdown frontend stores the result in its conversation
|
|
# history so the next turn's prefix matches the backend's
|
|
# session-pool fingerprint. UserMessage (tool_result) events
|
|
# are silently discarded from the wire but kept here.
|
|
raw_events: list[Any] = []
|
|
|
|
async for event in self._backend.complete(list(messages)):
|
|
raw_events.append(event)
|
|
if isinstance(event, AssistantMessage):
|
|
for block in event.content:
|
|
for ev in _emit_block(block, next_index):
|
|
yield ev
|
|
next_index += 1
|
|
elif isinstance(event, ResultMessage):
|
|
# ResultMessage is the terminal event from TurnManager
|
|
# — we capture its stop_reason / usage for the envelope
|
|
# below. We DO NOT break here: an early break would
|
|
# raise GeneratorExit inside claude-code-api's
|
|
# ``complete`` coroutine before it gets a chance to
|
|
# stash the live session under the post-turn
|
|
# fingerprint, so every continuation would miss the
|
|
# cache and reseed. Let the inner generator exit
|
|
# naturally instead.
|
|
stop_reason = event.stop_reason
|
|
usage = event.usage
|
|
# UserMessage (tool_result records) and SystemMessage
|
|
# (turn_duration heartbeats) carry no content for the
|
|
# /v1/messages caller — skip silently on the wire, but they
|
|
# ARE retained in ``raw_events`` for synthesis below.
|
|
|
|
if capture is not None:
|
|
capture.synthesized_messages = synthesize_turn_messages(raw_events)
|
|
|
|
yield build_message_delta(
|
|
stop_reason=_map_stop_reason(stop_reason), usage=_normalize_usage(usage)
|
|
)
|
|
yield build_message_stop()
|
|
|
|
|
|
def _emit_block(
|
|
block: TextBlock | ThinkingBlock | ToolUseBlock | Any, index: int
|
|
) -> Iterable[MessageStreamEvent]:
|
|
"""Render one ``claude-code`` content block as Anthropic stream events.
|
|
|
|
``ToolResultBlock`` would arrive only on user-role records — we
|
|
don't emit it here because :meth:`complete` skips ``UserMessage``.
|
|
"""
|
|
if isinstance(block, TextBlock):
|
|
return (
|
|
build_text_block_start(index),
|
|
build_text_delta(index, block.text),
|
|
build_content_block_stop(index),
|
|
)
|
|
if isinstance(block, ThinkingBlock):
|
|
return (
|
|
build_thinking_block_start(index),
|
|
build_thinking_delta(index, block.thinking),
|
|
build_signature_delta(index, block.signature),
|
|
build_content_block_stop(index),
|
|
)
|
|
if isinstance(block, ToolUseBlock):
|
|
partial = json.dumps(block.input, separators=(",", ":"), ensure_ascii=False)
|
|
return (
|
|
build_tool_use_block_start(index, tool_use_id=block.id, name=block.name),
|
|
build_input_json_delta(index, partial),
|
|
build_content_block_stop(index),
|
|
)
|
|
return ()
|
|
|
|
|
|
def _normalize_usage(usage: Mapping[str, Any] | None) -> dict[str, int] | None:
|
|
"""Coerce claude-code's ``usage`` dict to Anthropic ``MessageDeltaUsage`` shape.
|
|
|
|
claude-code copies whatever the JSONL ``usage`` record carried —
|
|
fields can be missing, strings, or ints. We pass through only the
|
|
fields ``MessageDeltaUsage`` knows about and discard the rest so an
|
|
odd ``cache_creation`` object structure doesn't fail pydantic
|
|
validation downstream.
|
|
"""
|
|
if not usage:
|
|
return None
|
|
out: dict[str, int] = {}
|
|
for key in (
|
|
"input_tokens",
|
|
"output_tokens",
|
|
"cache_creation_input_tokens",
|
|
"cache_read_input_tokens",
|
|
):
|
|
value = usage.get(key)
|
|
if isinstance(value, int):
|
|
out[key] = value
|
|
return out or None
|