"""Claude Code backend adapter. One :class:`ClaudeCodeBackendAdapter` per :class:`ClaudeAgent`. The underlying :class:`claude_code_api.ClaudeCodeBackend` bakes ``cwd`` / ``model`` / ``system_prompt`` / MCP wiring into a single :class:`~claude_code_api.BackendOptions` at construction time, so a single backend instance is conceptually bound to one agent (different agents would mean different cwds / system prompts / exposed MCPs and thus different live-session pools). Per :meth:`complete` we: * hand the full Anthropic-style ``messages`` list to ``ClaudeCodeBackend.complete`` — it does its own fingerprint-based session lookup, so we never need to track sessions ourselves; * re-emit each ``AssistantMessage`` as ``content_block_start`` + one delta + ``content_block_stop`` per content block, with monotonically increasing indices spanning the entire turn (one ``message_start`` … ``message_stop`` envelope per ``complete`` call); * close the envelope on the synthesized ``ResultMessage``. The per-request ``system`` parameter is intentionally **ignored** — ``BackendOptions.system_prompt`` is fixed at session-spawn time, and the agent's ``system_prompt`` is the canonical identity of the agent. """ from __future__ import annotations import json import uuid from dataclasses import dataclass, field from typing import TYPE_CHECKING, Any, Self from claude_code_api import ( AssistantMessage, BackendOptions, ClaudeCodeBackend, ResultMessage, TextBlock, ThinkingBlock, ToolUseBlock, synthesize_turn_messages, ) from beaver_gateway.agents.claude import ClaudeAgent from beaver_gateway.core.events import ( StopReason, build_content_block_stop, build_input_json_delta, build_message_delta, build_message_start, build_message_stop, build_signature_delta, build_text_block_start, build_text_delta, build_thinking_block_start, build_thinking_delta, build_tool_use_block_start, ) if TYPE_CHECKING: from collections.abc import AsyncIterator, Iterable, Mapping from anthropic.types import MessageParam from beaver_gateway.agents.base import BaseAgent from beaver_gateway.core.events import MessageStreamEvent __all__ = ["ClaudeCodeBackendAdapter", "TurnCapture"] @dataclass class TurnCapture: """Side-channel sink for per-turn metadata. Pass an instance via ``ClaudeCodeBackendAdapter.complete(capture=...)``. After the stream finishes, :attr:`synthesized_messages` holds the full assistant↔tool-result cycle (from :func:`claude_code_api.synthesize_turn_messages`) — i.e. the exact list of canonical Anthropic-shape messages claude-code-api stashed the live session under. The markdown frontend uses this to write the conversation history to its DB so a subsequent turn's prefix fingerprint hits the same session. Other backends (anthropic, raycast) ignore the kwarg — it lands in their ``**options`` and is silently dropped. """ synthesized_messages: list[dict[str, Any]] = field(default_factory=list) _CLAUDE_TO_ANTHROPIC_STOP: dict[str, StopReason] = { "end_turn": "end_turn", "tool_use": "tool_use", "max_tokens": "max_tokens", "stop_sequence": "stop_sequence", "refusal": "refusal", } def _map_stop_reason(raw: str | None) -> StopReason: """Map claude-code's stop reason into Anthropic's vocabulary. Unknown / missing values collapse to ``end_turn`` so the client sees a clean finish rather than a wire-format error. """ if raw is None: return "end_turn" return _CLAUDE_TO_ANTHROPIC_STOP.get(raw, "end_turn") def _build_mcp_servers( agent: ClaudeAgent, mcp_internal_urls: Mapping[str, str] ) -> dict[str, dict[str, Any]] | None: """Render ``agent.expose_mcps`` into ``BackendOptions.mcp_servers``. Each exposed MCP is a streamable-HTTP pointer at the gateway's internal aggregator (built by :mod:`beaver_gateway.mcp.internal_app`). ``None`` keeps claude-code from materializing an ``--mcp-config`` file when the agent exposes nothing. """ if not agent.expose_mcps: return None servers: dict[str, dict[str, Any]] = {} for em in agent.expose_mcps: url = mcp_internal_urls.get(em.name) if url is None: msg = ( f"agent {agent.name!r} exposes MCP {em.name!r} " "but no internal URL is registered for it" ) raise ValueError(msg) servers[em.name] = {"type": "http", "url": url} return servers def _build_backend_options( agent: ClaudeAgent, mcp_internal_urls: Mapping[str, str] ) -> BackendOptions: """Compose the per-agent :class:`BackendOptions`. Agent-primary fields: * ``cwd`` / ``model`` come from the agent directly; * ``system_prompt`` carries :attr:`BaseAgent.system_prompt` verbatim — i.e. wire-level ``--system-prompt`` (~8.6k tokens lighter than ``--append-system-prompt`` because claude-code's persona/planning conventions and dynamic sections drop out; tool schemas survive via the API ``tools=[]`` channel); * ``append_system_prompt`` carries :attr:`ClaudeCodeOptions.append_system_prompt`, normally ``None``. Setting it re-attaches claude-code's built-in prompt *and* this delta — opt-in for "claude as a real coding session"; * ``allowed_tools`` follows the PLAN: when the user lists native tools we restrict to those *plus* a per-MCP wildcard so MCP tools stay reachable; when no native list is declared we leave ``allowed_tools`` empty (= all tools allowed by claude-code's default); * ``mcp_servers`` comes from :func:`_build_mcp_servers`. Every other tunable knob is passed through from :attr:`ClaudeAgent.options`. Our default overrides (``wait_for_turn_duration=True``, ``dangerously_skip_permissions=True``) live on :class:`ClaudeCodeOptions`, not here, so a user who builds ``ClaudeCodeOptions(...)`` explicitly inherits the same defaults instead of getting whatever claude-code-api ships. """ allowed_tools: tuple[str, ...] = () if agent.available_native_tools: mcp_wildcards = tuple(f"mcp__{em.name}" for em in agent.expose_mcps) allowed_tools = tuple(agent.available_native_tools) + mcp_wildcards opt = agent.options return BackendOptions( cwd=agent.cwd, model=agent.model or None, system_prompt=agent.system_prompt, append_system_prompt=opt.append_system_prompt, allowed_tools=allowed_tools, mcp_servers=_build_mcp_servers(agent, mcp_internal_urls), disallowed_tools=opt.disallowed_tools, permission_mode=opt.permission_mode, dangerously_skip_permissions=opt.dangerously_skip_permissions, effort=opt.effort, add_dir=opt.add_dir, settings=opt.settings, extra_args=opt.extra_args, extra_env=opt.extra_env, preserve_provider_env=opt.preserve_provider_env, history_injection_mode=opt.history_injection_mode, wait_for_turn_duration=opt.wait_for_turn_duration, include_meta_user=opt.include_meta_user, startup_delay=opt.startup_delay, file_wait_timeout=opt.file_wait_timeout, turn_duration_timeout=opt.turn_duration_timeout, ) class ClaudeCodeBackendAdapter: """One ``claude-code-api`` backend bound to a single :class:`ClaudeAgent`. Owns the underlying :class:`ClaudeCodeBackend`'s lifecycle through the async-context-manager protocol so :mod:`beaver_gateway.cli` can park it in its ``AsyncExitStack``. """ def __init__( self, *, agent: ClaudeAgent, mcp_internal_urls: Mapping[str, str] ) -> None: self._agent = agent self._backend = ClaudeCodeBackend( _build_backend_options(agent, mcp_internal_urls) ) @property def agent(self) -> ClaudeAgent: return self._agent @property def live_session_count(self) -> int: return self._backend.live_session_count async def __aenter__(self) -> Self: await self._backend.__aenter__() return self async def __aexit__(self, exc_type: object, exc: object, tb: object) -> None: await self._backend.__aexit__(exc_type, exc, tb) async def aclose(self) -> None: await self._backend.aclose() async def complete( self, *, agent: BaseAgent, messages: Iterable[MessageParam], system: str | None = None, # noqa: ARG002 — see module docstring capture: TurnCapture | None = None, **options: Any, # noqa: ARG002 — no per-request knobs for claude-code yet ) -> AsyncIterator[MessageStreamEvent]: if not isinstance(agent, ClaudeAgent): msg = ( "ClaudeCodeBackendAdapter requires ClaudeAgent, " f"got {type(agent).__name__}" ) raise TypeError(msg) if agent.name != self._agent.name: # Adapter is per-agent; routing a different agent through it # would mean a different cwd / system_prompt / MCP set than # the live-session pool was spawned with. msg = ( f"ClaudeCodeBackendAdapter bound to {self._agent.name!r} " f"got request for {agent.name!r}" ) raise ValueError(msg) message_id = f"msg_{uuid.uuid4().hex}" yield build_message_start(message_id=message_id, model=agent.model) next_index = 0 stop_reason: str | None = None usage: Mapping[str, Any] | None = None # We keep raw events so we can hand them to # ``synthesize_turn_messages`` after the stream closes — the # markdown frontend stores the result in its conversation # history so the next turn's prefix matches the backend's # session-pool fingerprint. UserMessage (tool_result) events # are silently discarded from the wire but kept here. raw_events: list[Any] = [] async for event in self._backend.complete(list(messages)): raw_events.append(event) if isinstance(event, AssistantMessage): for block in event.content: for ev in _emit_block(block, next_index): yield ev next_index += 1 elif isinstance(event, ResultMessage): # ResultMessage is the terminal event from TurnManager # — we capture its stop_reason / usage for the envelope # below. We DO NOT break here: an early break would # raise GeneratorExit inside claude-code-api's # ``complete`` coroutine before it gets a chance to # stash the live session under the post-turn # fingerprint, so every continuation would miss the # cache and reseed. Let the inner generator exit # naturally instead. stop_reason = event.stop_reason usage = event.usage # UserMessage (tool_result records) and SystemMessage # (turn_duration heartbeats) carry no content for the # /v1/messages caller — skip silently on the wire, but they # ARE retained in ``raw_events`` for synthesis below. if capture is not None: capture.synthesized_messages = synthesize_turn_messages(raw_events) yield build_message_delta( stop_reason=_map_stop_reason(stop_reason), usage=_normalize_usage(usage) ) yield build_message_stop() def _emit_block( block: TextBlock | ThinkingBlock | ToolUseBlock | Any, index: int ) -> Iterable[MessageStreamEvent]: """Render one ``claude-code`` content block as Anthropic stream events. ``ToolResultBlock`` would arrive only on user-role records — we don't emit it here because :meth:`complete` skips ``UserMessage``. """ if isinstance(block, TextBlock): return ( build_text_block_start(index), build_text_delta(index, block.text), build_content_block_stop(index), ) if isinstance(block, ThinkingBlock): return ( build_thinking_block_start(index), build_thinking_delta(index, block.thinking), build_signature_delta(index, block.signature), build_content_block_stop(index), ) if isinstance(block, ToolUseBlock): partial = json.dumps(block.input, separators=(",", ":"), ensure_ascii=False) return ( build_tool_use_block_start(index, tool_use_id=block.id, name=block.name), build_input_json_delta(index, partial), build_content_block_stop(index), ) return () def _normalize_usage(usage: Mapping[str, Any] | None) -> dict[str, int] | None: """Coerce claude-code's ``usage`` dict to Anthropic ``MessageDeltaUsage`` shape. claude-code copies whatever the JSONL ``usage`` record carried — fields can be missing, strings, or ints. We pass through only the fields ``MessageDeltaUsage`` knows about and discard the rest so an odd ``cache_creation`` object structure doesn't fail pydantic validation downstream. """ if not usage: return None out: dict[str, int] = {} for key in ( "input_tokens", "output_tokens", "cache_creation_input_tokens", "cache_read_input_tokens", ): value = usage.get(key) if isinstance(value, int): out[key] = value return out or None