beaver-gateway/src/beaver_gateway/backends/claude_code.py

"""Claude Code backend adapter.

One :class:`ClaudeCodeBackendAdapter` per :class:`ClaudeAgent`. The
underlying :class:`claude_code_api.ClaudeCodeBackend` bakes ``cwd`` /
``model`` / ``system_prompt`` / MCP wiring into a single
:class:`~claude_code_api.BackendOptions` at construction time, so a
single backend instance is conceptually bound to one agent (different
agents would mean different cwds / system prompts / exposed MCPs and
thus different live-session pools).

Per :meth:`complete` we:

* hand the full Anthropic-style ``messages`` list to
  ``ClaudeCodeBackend.complete`` — it does its own fingerprint-based
  session lookup, so we never need to track sessions ourselves;
* re-emit each ``AssistantMessage`` as ``content_block_start`` +
  one delta + ``content_block_stop`` per content block, with
  monotonically increasing indices spanning the entire turn (one
  ``message_start`` … ``message_stop`` envelope per ``complete`` call);
* close the envelope on the synthesized ``ResultMessage``.

The per-request ``system`` parameter is intentionally **ignored** —
``BackendOptions.system_prompt`` is fixed at session-spawn time, and the
agent's ``system_prompt`` is the canonical identity of the agent.
"""

from __future__ import annotations

import json
import uuid
from dataclasses import dataclass, field
from typing import TYPE_CHECKING, Any, Self

from claude_code_api import (
    AssistantMessage,
    BackendOptions,
    ClaudeCodeBackend,
    ResultMessage,
    TextBlock,
    ThinkingBlock,
    ToolUseBlock,
    synthesize_turn_messages,
)

from beaver_gateway.agents.claude import ClaudeAgent
from beaver_gateway.core.events import (
    StopReason,
    build_content_block_stop,
    build_input_json_delta,
    build_message_delta,
    build_message_start,
    build_message_stop,
    build_signature_delta,
    build_text_block_start,
    build_text_delta,
    build_thinking_block_start,
    build_thinking_delta,
    build_tool_use_block_start,
)

if TYPE_CHECKING:
    from collections.abc import AsyncIterator, Iterable, Mapping

    from anthropic.types import MessageParam

    from beaver_gateway.agents.base import BaseAgent
    from beaver_gateway.core.events import MessageStreamEvent


__all__ = ["ClaudeCodeBackendAdapter", "TurnCapture"]


@dataclass
class TurnCapture:
    """Side-channel sink for per-turn metadata.

    Pass an instance via ``ClaudeCodeBackendAdapter.complete(capture=...)``.
    After the stream finishes, :attr:`synthesized_messages` holds the
    full assistant↔tool-result cycle (from
    :func:`claude_code_api.synthesize_turn_messages`) — i.e. the exact
    list of canonical Anthropic-shape messages claude-code-api stashed
    the live session under. The markdown frontend uses this to write the
    conversation history to its DB so a subsequent turn's prefix
    fingerprint hits the same session.

    Other backends (anthropic, raycast) ignore the kwarg — it lands in
    their ``**options`` and is silently dropped.
    """

    synthesized_messages: list[dict[str, Any]] = field(default_factory=list)


_CLAUDE_TO_ANTHROPIC_STOP: dict[str, StopReason] = {
    "end_turn": "end_turn",
    "tool_use": "tool_use",
    "max_tokens": "max_tokens",
    "stop_sequence": "stop_sequence",
    "refusal": "refusal",
}


def _map_stop_reason(raw: str | None) -> StopReason:
    """Map claude-code's stop reason into Anthropic's vocabulary.

    Unknown / missing values collapse to ``end_turn`` so the client sees
    a clean finish rather than a wire-format error.
    """
    if raw is None:
        return "end_turn"
    return _CLAUDE_TO_ANTHROPIC_STOP.get(raw, "end_turn")


def _build_mcp_servers(
    agent: ClaudeAgent, mcp_internal_urls: Mapping[str, str]
) -> dict[str, dict[str, Any]] | None:
    """Render ``agent.expose_mcps`` into ``BackendOptions.mcp_servers``.

    Each exposed MCP is a streamable-HTTP pointer at the gateway's
    internal aggregator (built by :mod:`beaver_gateway.mcp.internal_app`).
    ``None`` keeps claude-code from materializing an ``--mcp-config``
    file when the agent exposes nothing.
    """
    if not agent.expose_mcps:
        return None
    servers: dict[str, dict[str, Any]] = {}
    for em in agent.expose_mcps:
        url = mcp_internal_urls.get(em.name)
        if url is None:
            msg = (
                f"agent {agent.name!r} exposes MCP {em.name!r} "
                "but no internal URL is registered for it"
            )
            raise ValueError(msg)
        servers[em.name] = {"type": "http", "url": url}
    return servers


def _build_backend_options(
    agent: ClaudeAgent, mcp_internal_urls: Mapping[str, str]
) -> BackendOptions:
    """Compose the per-agent :class:`BackendOptions`.

    Agent-primary fields:

    * ``cwd`` / ``model`` come from the agent directly;
    * ``system_prompt`` carries :attr:`BaseAgent.system_prompt`
      verbatim — i.e. wire-level ``--system-prompt`` (~8.6k tokens
      lighter than ``--append-system-prompt`` because claude-code's
      persona/planning conventions and dynamic sections drop out;
      tool schemas survive via the API ``tools=[]`` channel);
    * ``append_system_prompt`` carries
      :attr:`ClaudeCodeOptions.append_system_prompt`, normally
      ``None``. Setting it re-attaches claude-code's built-in prompt
      *and* this delta — opt-in for "claude as a real coding session";
    * ``allowed_tools`` follows the PLAN: when the user lists native
      tools we restrict to those *plus* a per-MCP wildcard so MCP tools
      stay reachable; when no native list is declared we leave
      ``allowed_tools`` empty (= all tools allowed by claude-code's
      default);
    * ``mcp_servers`` comes from :func:`_build_mcp_servers`.

    Every other tunable knob is passed through from
    :attr:`ClaudeAgent.options`. Our default overrides
    (``wait_for_turn_duration=True``,
    ``dangerously_skip_permissions=True``) live on
    :class:`ClaudeCodeOptions`, not here, so a user who builds
    ``ClaudeCodeOptions(...)`` explicitly inherits the same defaults
    instead of getting whatever claude-code-api ships.
    """
    allowed_tools: tuple[str, ...] = ()
    if agent.available_native_tools:
        mcp_wildcards = tuple(f"mcp__{em.name}" for em in agent.expose_mcps)
        allowed_tools = tuple(agent.available_native_tools) + mcp_wildcards

    opt = agent.options
    return BackendOptions(
        cwd=agent.cwd,
        model=agent.model or None,
        system_prompt=agent.system_prompt,
        append_system_prompt=opt.append_system_prompt,
        allowed_tools=allowed_tools,
        mcp_servers=_build_mcp_servers(agent, mcp_internal_urls),
        disallowed_tools=opt.disallowed_tools,
        permission_mode=opt.permission_mode,
        dangerously_skip_permissions=opt.dangerously_skip_permissions,
        effort=opt.effort,
        add_dir=opt.add_dir,
        settings=opt.settings,
        extra_args=opt.extra_args,
        extra_env=opt.extra_env,
        preserve_provider_env=opt.preserve_provider_env,
        history_injection_mode=opt.history_injection_mode,
        wait_for_turn_duration=opt.wait_for_turn_duration,
        include_meta_user=opt.include_meta_user,
        startup_delay=opt.startup_delay,
        file_wait_timeout=opt.file_wait_timeout,
        turn_duration_timeout=opt.turn_duration_timeout,
    )


class ClaudeCodeBackendAdapter:
    """One ``claude-code-api`` backend bound to a single :class:`ClaudeAgent`.

    Owns the underlying :class:`ClaudeCodeBackend`'s lifecycle through
    the async-context-manager protocol so :mod:`beaver_gateway.cli` can
    park it in its ``AsyncExitStack``.
    """

    def __init__(
        self, *, agent: ClaudeAgent, mcp_internal_urls: Mapping[str, str]
    ) -> None:
        self._agent = agent
        self._backend = ClaudeCodeBackend(
            _build_backend_options(agent, mcp_internal_urls)
        )

    @property
    def agent(self) -> ClaudeAgent:
        return self._agent

    @property
    def live_session_count(self) -> int:
        return self._backend.live_session_count

    async def __aenter__(self) -> Self:
        await self._backend.__aenter__()
        return self

    async def __aexit__(self, exc_type: object, exc: object, tb: object) -> None:
        await self._backend.__aexit__(exc_type, exc, tb)

    async def aclose(self) -> None:
        await self._backend.aclose()

    async def complete(
        self,
        *,
        agent: BaseAgent,
        messages: Iterable[MessageParam],
        system: str | None = None,  # noqa: ARG002 — see module docstring
        capture: TurnCapture | None = None,
        **options: Any,  # noqa: ARG002 — no per-request knobs for claude-code yet
    ) -> AsyncIterator[MessageStreamEvent]:
        if not isinstance(agent, ClaudeAgent):
            msg = (
                "ClaudeCodeBackendAdapter requires ClaudeAgent, "
                f"got {type(agent).__name__}"
            )
            raise TypeError(msg)
        if agent.name != self._agent.name:
            # Adapter is per-agent; routing a different agent through it
            # would mean a different cwd / system_prompt / MCP set than
            # the live-session pool was spawned with.
            msg = (
                f"ClaudeCodeBackendAdapter bound to {self._agent.name!r} "
                f"got request for {agent.name!r}"
            )
            raise ValueError(msg)

        message_id = f"msg_{uuid.uuid4().hex}"
        yield build_message_start(message_id=message_id, model=agent.model)

        next_index = 0
        stop_reason: str | None = None
        usage: Mapping[str, Any] | None = None
        # We keep raw events so we can hand them to
        # ``synthesize_turn_messages`` after the stream closes — the
        # markdown frontend stores the result in its conversation
        # history so the next turn's prefix matches the backend's
        # session-pool fingerprint. UserMessage (tool_result) events
        # are silently discarded from the wire but kept here.
        raw_events: list[Any] = []

        async for event in self._backend.complete(list(messages)):
            raw_events.append(event)
            if isinstance(event, AssistantMessage):
                for block in event.content:
                    for ev in _emit_block(block, next_index):
                        yield ev
                    next_index += 1
            elif isinstance(event, ResultMessage):
                # ResultMessage is the terminal event from TurnManager
                # — we capture its stop_reason / usage for the envelope
                # below. We DO NOT break here: an early break would
                # raise GeneratorExit inside claude-code-api's
                # ``complete`` coroutine before it gets a chance to
                # stash the live session under the post-turn
                # fingerprint, so every continuation would miss the
                # cache and reseed. Let the inner generator exit
                # naturally instead.
                stop_reason = event.stop_reason
                usage = event.usage
            # UserMessage (tool_result records) and SystemMessage
            # (turn_duration heartbeats) carry no content for the
            # /v1/messages caller — skip silently on the wire, but they
            # ARE retained in ``raw_events`` for synthesis below.

        if capture is not None:
            capture.synthesized_messages = synthesize_turn_messages(raw_events)

        yield build_message_delta(
            stop_reason=_map_stop_reason(stop_reason), usage=_normalize_usage(usage)
        )
        yield build_message_stop()


def _emit_block(
    block: TextBlock | ThinkingBlock | ToolUseBlock | Any, index: int
) -> Iterable[MessageStreamEvent]:
    """Render one ``claude-code`` content block as Anthropic stream events.

    ``ToolResultBlock`` would arrive only on user-role records — we
    don't emit it here because :meth:`complete` skips ``UserMessage``.
    """
    if isinstance(block, TextBlock):
        return (
            build_text_block_start(index),
            build_text_delta(index, block.text),
            build_content_block_stop(index),
        )
    if isinstance(block, ThinkingBlock):
        return (
            build_thinking_block_start(index),
            build_thinking_delta(index, block.thinking),
            build_signature_delta(index, block.signature),
            build_content_block_stop(index),
        )
    if isinstance(block, ToolUseBlock):
        partial = json.dumps(block.input, separators=(",", ":"), ensure_ascii=False)
        return (
            build_tool_use_block_start(index, tool_use_id=block.id, name=block.name),
            build_input_json_delta(index, partial),
            build_content_block_stop(index),
        )
    return ()


def _normalize_usage(usage: Mapping[str, Any] | None) -> dict[str, int] | None:
    """Coerce claude-code's ``usage`` dict to Anthropic ``MessageDeltaUsage`` shape.

    claude-code copies whatever the JSONL ``usage`` record carried —
    fields can be missing, strings, or ints. We pass through only the
    fields ``MessageDeltaUsage`` knows about and discard the rest so an
    odd ``cache_creation`` object structure doesn't fail pydantic
    validation downstream.
    """
    if not usage:
        return None
    out: dict[str, int] = {}
    for key in (
        "input_tokens",
        "output_tokens",
        "cache_creation_input_tokens",
        "cache_read_input_tokens",
    ):
        value = usage.get(key)
        if isinstance(value, int):
            out[key] = value
    return out or None