feat: implement claude code backend

2026-05-19 23:11:07 +02:00
parent 757065f21c
commit 99a30f256d
8 changed files with 797 additions and 7 deletions
@@ -0,0 +1,327 @@
+"""Claude Code backend adapter.
+
+One :class:`ClaudeCodeBackendAdapter` per :class:`ClaudeAgent`. The
+underlying :class:`claude_code_api.ClaudeCodeBackend` bakes ``cwd`` /
+``model`` / ``system_prompt`` / MCP wiring into a single
+:class:`~claude_code_api.BackendOptions` at construction time, so a
+single backend instance is conceptually bound to one agent (different
+agents would mean different cwds / system prompts / exposed MCPs and
+thus different live-session pools).
+
+Per :meth:`complete` we:
+
+* hand the full Anthropic-style ``messages`` list to
+  ``ClaudeCodeBackend.complete`` — it does its own fingerprint-based
+  session lookup, so we never need to track sessions ourselves;
+* re-emit each ``AssistantMessage`` as ``content_block_start`` +
+  one delta + ``content_block_stop`` per content block, with
+  monotonically increasing indices spanning the entire turn (one
+  ``message_start`` … ``message_stop`` envelope per ``complete`` call);
+* close the envelope on the synthesized ``ResultMessage``.
+
+The per-request ``system`` parameter is intentionally **ignored** —
+``BackendOptions.system_prompt`` is fixed at session-spawn time, and the
+agent's ``system_prompt`` is the canonical identity of the agent.
+"""
+
+from __future__ import annotations
+
+import json
+import uuid
+from typing import TYPE_CHECKING, Any, Self
+
+from claude_code_api import (
+    AssistantMessage,
+    BackendOptions,
+    ClaudeCodeBackend,
+    ResultMessage,
+    TextBlock,
+    ThinkingBlock,
+    ToolUseBlock,
+)
+
+from beaver_gateway.agents.claude import ClaudeAgent
+from beaver_gateway.core.events import (
+    StopReason,
+    build_content_block_stop,
+    build_input_json_delta,
+    build_message_delta,
+    build_message_start,
+    build_message_stop,
+    build_signature_delta,
+    build_text_block_start,
+    build_text_delta,
+    build_thinking_block_start,
+    build_thinking_delta,
+    build_tool_use_block_start,
+)
+
+if TYPE_CHECKING:
+    from collections.abc import AsyncIterator, Iterable, Mapping
+
+    from anthropic.types import MessageParam
+
+    from beaver_gateway.agents.base import BaseAgent
+    from beaver_gateway.core.events import MessageStreamEvent
+
+
+__all__ = ["ClaudeCodeBackendAdapter"]
+
+
+_CLAUDE_TO_ANTHROPIC_STOP: dict[str, StopReason] = {
+    "end_turn": "end_turn",
+    "tool_use": "tool_use",
+    "max_tokens": "max_tokens",
+    "stop_sequence": "stop_sequence",
+    "refusal": "refusal",
+}
+
+
+def _map_stop_reason(raw: str | None) -> StopReason:
+    """Map claude-code's stop reason into Anthropic's vocabulary.
+
+    Unknown / missing values collapse to ``end_turn`` so the client sees
+    a clean finish rather than a wire-format error.
+    """
+    if raw is None:
+        return "end_turn"
+    return _CLAUDE_TO_ANTHROPIC_STOP.get(raw, "end_turn")
+
+
+def _build_mcp_servers(
+    agent: ClaudeAgent, mcp_internal_urls: Mapping[str, str]
+) -> dict[str, dict[str, Any]] | None:
+    """Render ``agent.expose_mcps`` into ``BackendOptions.mcp_servers``.
+
+    Each exposed MCP is a streamable-HTTP pointer at the gateway's
+    internal aggregator (built by :mod:`beaver_gateway.mcp.internal_app`).
+    ``None`` keeps claude-code from materializing an ``--mcp-config``
+    file when the agent exposes nothing.
+    """
+    if not agent.expose_mcps:
+        return None
+    servers: dict[str, dict[str, Any]] = {}
+    for em in agent.expose_mcps:
+        url = mcp_internal_urls.get(em.name)
+        if url is None:
+            msg = (
+                f"agent {agent.name!r} exposes MCP {em.name!r} "
+                "but no internal URL is registered for it"
+            )
+            raise ValueError(msg)
+        servers[em.name] = {"type": "http", "url": url}
+    return servers
+
+
+def _build_backend_options(
+    agent: ClaudeAgent, mcp_internal_urls: Mapping[str, str]
+) -> BackendOptions:
+    """Compose the per-agent :class:`BackendOptions`.
+
+    Agent-primary fields:
+
+    * ``cwd`` / ``model`` come from the agent directly;
+    * ``system_prompt`` carries :attr:`BaseAgent.system_prompt`
+      verbatim — i.e. wire-level ``--system-prompt`` (~8.6k tokens
+      lighter than ``--append-system-prompt`` because claude-code's
+      persona/planning conventions and dynamic sections drop out;
+      tool schemas survive via the API ``tools=[]`` channel);
+    * ``append_system_prompt`` carries
+      :attr:`ClaudeCodeOptions.append_system_prompt`, normally
+      ``None``. Setting it re-attaches claude-code's built-in prompt
+      *and* this delta — opt-in for "claude as a real coding session";
+    * ``allowed_tools`` follows the PLAN: when the user lists native
+      tools we restrict to those *plus* a per-MCP wildcard so MCP tools
+      stay reachable; when no native list is declared we leave
+      ``allowed_tools`` empty (= all tools allowed by claude-code's
+      default);
+    * ``mcp_servers`` comes from :func:`_build_mcp_servers`.
+
+    Every other tunable knob is passed through from
+    :attr:`ClaudeAgent.options`. Our default overrides
+    (``wait_for_turn_duration=True``,
+    ``dangerously_skip_permissions=True``) live on
+    :class:`ClaudeCodeOptions`, not here, so a user who builds
+    ``ClaudeCodeOptions(...)`` explicitly inherits the same defaults
+    instead of getting whatever claude-code-api ships.
+    """
+    allowed_tools: tuple[str, ...] = ()
+    if agent.available_native_tools:
+        mcp_wildcards = tuple(f"mcp__{em.name}" for em in agent.expose_mcps)
+        allowed_tools = tuple(agent.available_native_tools) + mcp_wildcards
+
+    opt = agent.options
+    return BackendOptions(
+        cwd=agent.cwd,
+        model=agent.model or None,
+        system_prompt=agent.system_prompt,
+        append_system_prompt=opt.append_system_prompt,
+        allowed_tools=allowed_tools,
+        mcp_servers=_build_mcp_servers(agent, mcp_internal_urls),
+        disallowed_tools=opt.disallowed_tools,
+        permission_mode=opt.permission_mode,
+        dangerously_skip_permissions=opt.dangerously_skip_permissions,
+        effort=opt.effort,
+        add_dir=opt.add_dir,
+        settings=opt.settings,
+        extra_args=opt.extra_args,
+        extra_env=opt.extra_env,
+        preserve_provider_env=opt.preserve_provider_env,
+        history_injection_mode=opt.history_injection_mode,
+        wait_for_turn_duration=opt.wait_for_turn_duration,
+        include_meta_user=opt.include_meta_user,
+        startup_delay=opt.startup_delay,
+        file_wait_timeout=opt.file_wait_timeout,
+        turn_duration_timeout=opt.turn_duration_timeout,
+    )
+
+
+class ClaudeCodeBackendAdapter:
+    """One ``claude-code-api`` backend bound to a single :class:`ClaudeAgent`.
+
+    Owns the underlying :class:`ClaudeCodeBackend`'s lifecycle through
+    the async-context-manager protocol so :mod:`beaver_gateway.cli` can
+    park it in its ``AsyncExitStack``.
+    """
+
+    def __init__(
+        self,
+        *,
+        agent: ClaudeAgent,
+        mcp_internal_urls: Mapping[str, str],
+    ) -> None:
+        self._agent = agent
+        self._backend = ClaudeCodeBackend(
+            _build_backend_options(agent, mcp_internal_urls)
+        )
+
+    @property
+    def agent(self) -> ClaudeAgent:
+        return self._agent
+
+    @property
+    def live_session_count(self) -> int:
+        return self._backend.live_session_count
+
+    async def __aenter__(self) -> Self:
+        await self._backend.__aenter__()
+        return self
+
+    async def __aexit__(
+        self, exc_type: object, exc: object, tb: object
+    ) -> None:
+        await self._backend.__aexit__(exc_type, exc, tb)
+
+    async def aclose(self) -> None:
+        await self._backend.aclose()
+
+    async def complete(
+        self,
+        *,
+        agent: BaseAgent,
+        messages: Iterable[MessageParam],
+        system: str | None = None,  # noqa: ARG002 — see module docstring
+        **options: Any,  # noqa: ARG002 — no per-request knobs for claude-code yet
+    ) -> AsyncIterator[MessageStreamEvent]:
+        if not isinstance(agent, ClaudeAgent):
+            msg = (
+                "ClaudeCodeBackendAdapter requires ClaudeAgent, "
+                f"got {type(agent).__name__}"
+            )
+            raise TypeError(msg)
+        if agent.name != self._agent.name:
+            # Adapter is per-agent; routing a different agent through it
+            # would mean a different cwd / system_prompt / MCP set than
+            # the live-session pool was spawned with.
+            msg = (
+                f"ClaudeCodeBackendAdapter bound to {self._agent.name!r} "
+                f"got request for {agent.name!r}"
+            )
+            raise ValueError(msg)
+
+        message_id = f"msg_{uuid.uuid4().hex}"
+        yield build_message_start(message_id=message_id, model=agent.model)
+
+        next_index = 0
+        stop_reason: str | None = None
+        usage: Mapping[str, Any] | None = None
+
+        async for event in self._backend.complete(list(messages)):
+            if isinstance(event, AssistantMessage):
+                for block in event.content:
+                    for ev in _emit_block(block, next_index):
+                        yield ev
+                    next_index += 1
+            elif isinstance(event, ResultMessage):
+                stop_reason = event.stop_reason
+                usage = event.usage
+                # ResultMessage is always last (TurnManager synthesizes
+                # it as the terminal event), so we break after emitting
+                # the envelope close.
+                break
+            # UserMessage (tool_result records) and SystemMessage
+            # (turn_duration heartbeats) carry no content for the
+            # /v1/messages caller — skip silently.
+
+        yield build_message_delta(
+            stop_reason=_map_stop_reason(stop_reason),
+            usage=_normalize_usage(usage),
+        )
+        yield build_message_stop()
+
+
+def _emit_block(
+    block: TextBlock | ThinkingBlock | ToolUseBlock | Any, index: int
+) -> Iterable[MessageStreamEvent]:
+    """Render one ``claude-code`` content block as Anthropic stream events.
+
+    ``ToolResultBlock`` would arrive only on user-role records — we
+    don't emit it here because :meth:`complete` skips ``UserMessage``.
+    """
+    if isinstance(block, TextBlock):
+        return (
+            build_text_block_start(index),
+            build_text_delta(index, block.text),
+            build_content_block_stop(index),
+        )
+    if isinstance(block, ThinkingBlock):
+        return (
+            build_thinking_block_start(index),
+            build_thinking_delta(index, block.thinking),
+            build_signature_delta(index, block.signature),
+            build_content_block_stop(index),
+        )
+    if isinstance(block, ToolUseBlock):
+        partial = json.dumps(
+            block.input, separators=(",", ":"), ensure_ascii=False
+        )
+        return (
+            build_tool_use_block_start(index, tool_use_id=block.id, name=block.name),
+            build_input_json_delta(index, partial),
+            build_content_block_stop(index),
+        )
+    return ()
+
+
+def _normalize_usage(usage: Mapping[str, Any] | None) -> dict[str, int] | None:
+    """Coerce claude-code's ``usage`` dict to Anthropic ``MessageDeltaUsage`` shape.
+
+    claude-code copies whatever the JSONL ``usage`` record carried —
+    fields can be missing, strings, or ints. We pass through only the
+    fields ``MessageDeltaUsage`` knows about and discard the rest so an
+    odd ``cache_creation`` object structure doesn't fail pydantic
+    validation downstream.
+    """
+    if not usage:
+        return None
+    out: dict[str, int] = {}
+    for key in (
+        "input_tokens",
+        "output_tokens",
+        "cache_creation_input_tokens",
+        "cache_read_input_tokens",
+    ):
+        value = usage.get(key)
+        if isinstance(value, int):
+            out[key] = value
+    return out or None