feat: implement claude code backend

2026-05-19 23:11:07 +02:00
parent 757065f21c
commit 99a30f256d
8 changed files with 797 additions and 7 deletions
@@ -4,20 +4,65 @@
 # ClaudeAgent, RaycastAgent, McpServer, ExposedMcp, Gateway already
 # bound — so importing them is optional. We import explicitly here so
 # IDEs and type-checkers see real symbols instead of free variables.
 import tempfile
 from datetime import date
 from pathlib import Path
 from beaver_gateway.agents.base import ExposedMcp
 from beaver_gateway.agents.claude import ClaudeAgent
 from beaver_gateway.agents.raycast import RaycastAgent, RemoteTool, UserPreferences
 from beaver_gateway.core.registry import Gateway
 from beaver_gateway.frontends.anthropic import AnthropicMessagesFrontend
 from beaver_gateway.mcp.types import McpServer
 def current_time() -> str:
    """Return the current local time as an ISO-8601 string.
    Trivial demo tool for the Phase 2.1 internal MCP aggregator —
    confirms a ``python_tool`` namespace is reachable on
    ``http://127.0.0.1:<INTERNAL_MCP_PORT>/mcp/time``.
    """
    from datetime import datetime
    return datetime.now().astimezone().isoformat()
 gateway = Gateway(
    agents=[
        # Phase 2.2 — ClaudeCodeBackendAdapter routes this agent's
        # ``/v1/messages`` calls through ``claude-code-api``. The
        # ``time`` MCP gets exposed as ``mcp__time__current_time`` to
        # the subscription claude session via
        # ``BackendOptions.mcp_servers`` pointing at the internal
        # aggregator on ``127.0.0.1:INTERNAL_MCP_PORT/mcp/time/``.
        #
        # Fresh empty tempdir (not a hardcoded ``/tmp``) for two
        # reasons: claude-code-api derives the JSONL project-key from
        # ``cwd``, but claude itself writes the JSONL using the cwd's
        # realpath — on macOS ``/tmp`` and ``/var/folders/...`` are
        # both ``/private/...`` symlinks, so unresolved cwds make
        # ``JsonlWatcher`` time out waiting on the wrong path. The
        # explicit ``.resolve()`` collapses the symlink before claude
        # ever sees the dir, and ``mkdtemp`` guarantees the directory
        # is empty so claude does not pick up leftover files.
        ClaudeAgent(
            name="stub",
            model="claude-sonnet-4-6",
-            system_prompt="You are a stub agent used to validate the Phase 0 skeleton.",
+            # ``system_prompt`` is appended to claude's built-in agent
-            cwd="/tmp",
+            # prompt (via ``--append-system-prompt``) — so it adds the
            # agent's identity on top of claude-code's baseline tool
            # knowledge, rather than replacing it. Same shape as the
            # RaycastAgent's ``system_prompt → additional_system_instructions``
            # mapping. For full ``BackendOptions`` knobs (timeouts,
            # extra_args, history mode, etc.) import ``ClaudeCodeOptions``
            # and pass ``options=ClaudeCodeOptions(...)``.
            system_prompt=(
                "You are a stub agent used to validate the Phase 0 skeleton.\n"
                "If asked the current time, call the `current_time`"
                " MCP tool instead of guessing."
            ),
            cwd=Path(tempfile.mkdtemp(prefix="beaver-stub-cwd-")).resolve(),
            expose_mcps=(ExposedMcp(name="time"),),
        ),
        # Phase 1.2 — a RaycastAgent the AnthropicMessagesFrontend will
        # route via RaycastBackend. Phase 1.5 added the per-agent knobs
@@ -45,7 +90,14 @@ gateway = Gateway(
            ),
        ),
    ],
-    mcps=[],
+    mcps=[
        # Phase 2.1 — bundle of plain Python callables exposed as one
        # FastMCP namespace. The internal aggregator mounts it under
        # ``/mcp/time`` on ``127.0.0.1:INTERNAL_MCP_PORT``; Phase 2.2's
        # ClaudeCode adapter will forward that URL into
        # ``BackendOptions.mcp_servers``.
        McpServer.python_tool(name="time", tools=[current_time]),
    ],
    frontends=[
        # Phase 1.4 — expose the agents as `model=<name>` on an
        # Anthropic-compatible Messages endpoint. Auth comes from
@@ -3,14 +3,169 @@
 Notice the absence of a ``streaming`` field — claude-code does not emit
 token-level deltas, and that fact is encoded in the type, not in a
 runtime branch.
 ``BaseAgent.system_prompt`` maps onto the claude CLI's
 ``--system-prompt`` — i.e. it really *is* the agent's system prompt,
 not "added on top of claude-code's giant built-in". The additive slot
 ``--append-system-prompt`` is exposed via
 :attr:`ClaudeCodeOptions.append_system_prompt` for the rare case
 when the user wants claude-code's planning conventions / dynamic
 sections *and* a delta on top. Difference, measured empirically:
 * tools (names, JSON schemas, embedded guidance like Bash's "prefer
  Read over cat/head/tail") survive both flags — they ride the
  Anthropic API ``tools=[]`` field, not the system prompt text;
 * ``--system-prompt`` drops ~8.6k tokens of claude-code's *textual*
  baseline — agent persona, multi-step-work conventions ("use
  TaskCreate proactively", etc.), and the dynamic per-machine sections
  (cwd, env info, git status, memory paths — see the
  ``--exclude-dynamic-system-prompt-sections`` flag note in
  ``claude --help``: "Only applies with the default system prompt
  (ignored with --system-prompt)").
 We pick override as the default because it preserves the principle of
 least surprise: the ``system_prompt=...`` you wrote on the agent is
 what claude actually receives. If you need claude-code's full
 planning/dynamic-context behaviour on top of your prompt, opt in via
 ``options=ClaudeCodeOptions(append_system_prompt=...)`` and leave
 ``system_prompt`` for your agent's identity (or vice versa — set
 ``system_prompt=""``-ish and put everything in ``append_*``).
 Per-agent passthrough of ``claude_code_api.BackendOptions`` lives in
 :class:`ClaudeCodeOptions`, attached to :class:`ClaudeAgent` as
 ``options``. Every tunable knob ``BackendOptions`` supports — except
 the ones derived from the agent's own primary surface
 (``cwd`` / ``model`` / ``system_prompt`` / ``available_native_tools`` →
 ``allowed_tools`` / ``expose_mcps`` → ``mcp_servers``) — is exposed
 there. Defaults match upstream except where correctness demands
 otherwise (``wait_for_turn_duration=True``,
 ``dangerously_skip_permissions=True``).
 """
 from __future__ import annotations
-from pathlib import Path  # noqa: TC003 — runtime use by pydantic
+from collections.abc import Mapping  # noqa: TC003 — pydantic runtime
 from pathlib import Path  # noqa: TC003 — pydantic runtime
 from typing import Literal
 from pydantic import BaseModel, ConfigDict, Field
 from beaver_gateway.agents.base import BaseAgent
 HistoryInjectionMode = Literal["native_jsonl", "concat_message"]
 """Mirrors ``claude_code_api.HistoryInjectionMode`` to avoid an import
 cycle on the user-facing path (configs may be loaded without
 ``claude-code-api`` installed, e.g. ``--extra prod`` minus claude)."""
 class ClaudeCodeOptions(BaseModel):
    """Per-agent passthrough for ``claude_code_api.BackendOptions``.
    Mirrors every field of ``BackendOptions`` except those that come
    from the agent's primary surface
    (``cwd`` / ``model`` / ``system_prompt`` /
    ``available_native_tools`` / ``expose_mcps``). Defaults match
    upstream, with two exceptions for correctness:
    * ``wait_for_turn_duration=True`` — without it, extended-thinking
      turns drop the text response because ``TurnManager`` returns on
      the first terminal assistant record (the thinking snapshot) and
      never reads the second one (the actual text). Phase 2.2 PROGRESS
      describes the incident in detail.
    * ``dangerously_skip_permissions=True`` — Beaver Gateway is a
      single-user trusted-config product; the user already wrote the
      ``config.py`` that spawns claude. Permission prompts on a
      headless backend mean stuck turns.
    Any field on :class:`claude_code_api.BackendOptions` we forward
    here lives on this model with the same name and type, so a future
    ``BackendOptions`` field addition is a one-liner here plus a
    one-liner in :mod:`backends.claude_code`.
    """
    model_config = ConfigDict(frozen=True, arbitrary_types_allowed=True)
    append_system_prompt: str | None = None
    """Maps to claude CLI's ``--append-system-prompt``. Opting in
    re-attaches claude-code's full built-in prompt (persona, planning
    conventions like "use TaskCreate proactively", and the dynamic
    per-machine sections — cwd, env info, git status, memory paths)
    plus this string on top. By default we ship only
    :attr:`BaseAgent.system_prompt` via ``--system-prompt`` (~8.6k
    tokens lighter); use this when the agent should behave like a
    real claude-code coding session and your text is a delta on top
    of those built-ins. Tool schemas survive either way — they ride
    the Anthropic ``tools=[]`` channel, not the prompt text."""
    disallowed_tools: tuple[str, ...] = ()
    """Tool names that claude must refuse to call. Combines with the
    agent's ``available_native_tools`` allowlist — disallow wins."""
    permission_mode: str = "bypassPermissions"
    """``claude --permission-mode`` value. Only meaningful when
    ``dangerously_skip_permissions=False``; otherwise the CLI bypasses
    the prompt path entirely."""
    dangerously_skip_permissions: bool = True
    """Pass ``--dangerously-skip-permissions`` to claude. ``True`` by
    default — see the class docstring for why."""
    effort: str | None = None
    """``claude --effort`` value (typically ``low`` / ``medium`` /
    ``high``). Tunes reasoning effort budget for newer models."""
    add_dir: tuple[str, ...] = ()
    """Extra directories claude is allowed to read/edit beyond
    ``cwd``. Maps to repeated ``--add-dir`` flags."""
    settings: str | None = None
    """Path to a ``--settings`` JSON file claude should load (hooks,
    MCP servers, etc. that don't belong in our internal aggregator)."""
    extra_args: tuple[str, ...] = ()
    """Raw extra argv to append to the claude command. Escape hatch
    for flags we haven't surfaced as first-class options."""
    extra_env: Mapping[str, str] = Field(default_factory=dict)
    """Additional environment variables for the spawned claude
    process. Layered on top of the gateway's own env after
    ``preserve_provider_env`` is applied."""
    preserve_provider_env: bool = False
    """When ``False`` (default), the spawn env strips
    ``ANTHROPIC_API_KEY`` / ``ANTHROPIC_AUTH_TOKEN`` /
    ``ANTHROPIC_BASE_URL`` so claude uses subscription auth instead of
    leaking through whatever the gateway process inherited."""
    history_injection_mode: HistoryInjectionMode = "native_jsonl"
    """How prior turns are seeded into a fresh session when no live
    session matches: ``native_jsonl`` writes a hand-crafted transcript
    and ``--resume``s; ``concat_message`` instead folds history into
    the first user prompt. ``native_jsonl`` is more faithful."""
    wait_for_turn_duration: bool = True
    """Keep reading JSONL until the ``turn_duration`` heartbeat
    arrives, instead of returning on the first terminal assistant
    record. ``True`` by default — see class docstring."""
    include_meta_user: bool = False
    """Surface claude's ``isMeta=True`` user records (local-command
    caveats) as ``UserMessage`` events. Off by default — they're not
    part of the real conversation."""
    startup_delay: float = 1.0
    """Seconds to wait after spawning the PTY before the first
    write — claude's TUI takes a beat to settle."""
    file_wait_timeout: float = 30.0
    """How long to wait for the session JSONL to appear after spawn.
    Failure here usually means a CLI auth / config problem."""
    turn_duration_timeout: float = 5.0
    """How long to wait for the ``turn_duration`` heartbeat once a
    terminal assistant has been seen. Bound on extra latency when
    ``wait_for_turn_duration=True``."""
 class ClaudeAgent(BaseAgent):
    """Agent backed by ``claude-code-api``.
@@ -25,3 +180,6 @@ class ClaudeAgent(BaseAgent):
    cwd: Path
    available_native_tools: tuple[str, ...] = ()
    options: ClaudeCodeOptions = Field(default_factory=ClaudeCodeOptions)
    """Per-agent passthrough for the underlying claude-code-api
    ``BackendOptions``. See :class:`ClaudeCodeOptions`."""
@@ -0,0 +1,327 @@
 """Claude Code backend adapter.
 One :class:`ClaudeCodeBackendAdapter` per :class:`ClaudeAgent`. The
 underlying :class:`claude_code_api.ClaudeCodeBackend` bakes ``cwd`` /
 ``model`` / ``system_prompt`` / MCP wiring into a single
 :class:`~claude_code_api.BackendOptions` at construction time, so a
 single backend instance is conceptually bound to one agent (different
 agents would mean different cwds / system prompts / exposed MCPs and
 thus different live-session pools).
 Per :meth:`complete` we:
 * hand the full Anthropic-style ``messages`` list to
  ``ClaudeCodeBackend.complete`` — it does its own fingerprint-based
  session lookup, so we never need to track sessions ourselves;
 * re-emit each ``AssistantMessage`` as ``content_block_start`` +
  one delta + ``content_block_stop`` per content block, with
  monotonically increasing indices spanning the entire turn (one
  ``message_start`` … ``message_stop`` envelope per ``complete`` call);
 * close the envelope on the synthesized ``ResultMessage``.
 The per-request ``system`` parameter is intentionally **ignored** —
 ``BackendOptions.system_prompt`` is fixed at session-spawn time, and the
 agent's ``system_prompt`` is the canonical identity of the agent.
 """
 from __future__ import annotations
 import json
 import uuid
 from typing import TYPE_CHECKING, Any, Self
 from claude_code_api import (
    AssistantMessage,
    BackendOptions,
    ClaudeCodeBackend,
    ResultMessage,
    TextBlock,
    ThinkingBlock,
    ToolUseBlock,
 )
 from beaver_gateway.agents.claude import ClaudeAgent
 from beaver_gateway.core.events import (
    StopReason,
    build_content_block_stop,
    build_input_json_delta,
    build_message_delta,
    build_message_start,
    build_message_stop,
    build_signature_delta,
    build_text_block_start,
    build_text_delta,
    build_thinking_block_start,
    build_thinking_delta,
    build_tool_use_block_start,
 )
 if TYPE_CHECKING:
    from collections.abc import AsyncIterator, Iterable, Mapping
    from anthropic.types import MessageParam
    from beaver_gateway.agents.base import BaseAgent
    from beaver_gateway.core.events import MessageStreamEvent
 __all__ = ["ClaudeCodeBackendAdapter"]
 _CLAUDE_TO_ANTHROPIC_STOP: dict[str, StopReason] = {
    "end_turn": "end_turn",
    "tool_use": "tool_use",
    "max_tokens": "max_tokens",
    "stop_sequence": "stop_sequence",
    "refusal": "refusal",
 }
 def _map_stop_reason(raw: str | None) -> StopReason:
    """Map claude-code's stop reason into Anthropic's vocabulary.
    Unknown / missing values collapse to ``end_turn`` so the client sees
    a clean finish rather than a wire-format error.
    """
    if raw is None:
        return "end_turn"
    return _CLAUDE_TO_ANTHROPIC_STOP.get(raw, "end_turn")
 def _build_mcp_servers(
    agent: ClaudeAgent, mcp_internal_urls: Mapping[str, str]
 ) -> dict[str, dict[str, Any]] | None:
    """Render ``agent.expose_mcps`` into ``BackendOptions.mcp_servers``.
    Each exposed MCP is a streamable-HTTP pointer at the gateway's
    internal aggregator (built by :mod:`beaver_gateway.mcp.internal_app`).
    ``None`` keeps claude-code from materializing an ``--mcp-config``
    file when the agent exposes nothing.
    """
    if not agent.expose_mcps:
        return None
    servers: dict[str, dict[str, Any]] = {}
    for em in agent.expose_mcps:
        url = mcp_internal_urls.get(em.name)
        if url is None:
            msg = (
                f"agent {agent.name!r} exposes MCP {em.name!r} "
                "but no internal URL is registered for it"
            )
            raise ValueError(msg)
        servers[em.name] = {"type": "http", "url": url}
    return servers
 def _build_backend_options(
    agent: ClaudeAgent, mcp_internal_urls: Mapping[str, str]
 ) -> BackendOptions:
    """Compose the per-agent :class:`BackendOptions`.
    Agent-primary fields:
    * ``cwd`` / ``model`` come from the agent directly;
    * ``system_prompt`` carries :attr:`BaseAgent.system_prompt`
      verbatim — i.e. wire-level ``--system-prompt`` (~8.6k tokens
      lighter than ``--append-system-prompt`` because claude-code's
      persona/planning conventions and dynamic sections drop out;
      tool schemas survive via the API ``tools=[]`` channel);
    * ``append_system_prompt`` carries
      :attr:`ClaudeCodeOptions.append_system_prompt`, normally
      ``None``. Setting it re-attaches claude-code's built-in prompt
      *and* this delta — opt-in for "claude as a real coding session";
    * ``allowed_tools`` follows the PLAN: when the user lists native
      tools we restrict to those *plus* a per-MCP wildcard so MCP tools
      stay reachable; when no native list is declared we leave
      ``allowed_tools`` empty (= all tools allowed by claude-code's
      default);
    * ``mcp_servers`` comes from :func:`_build_mcp_servers`.
    Every other tunable knob is passed through from
    :attr:`ClaudeAgent.options`. Our default overrides
    (``wait_for_turn_duration=True``,
    ``dangerously_skip_permissions=True``) live on
    :class:`ClaudeCodeOptions`, not here, so a user who builds
    ``ClaudeCodeOptions(...)`` explicitly inherits the same defaults
    instead of getting whatever claude-code-api ships.
    """
    allowed_tools: tuple[str, ...] = ()
    if agent.available_native_tools:
        mcp_wildcards = tuple(f"mcp__{em.name}" for em in agent.expose_mcps)
        allowed_tools = tuple(agent.available_native_tools) + mcp_wildcards
    opt = agent.options
    return BackendOptions(
        cwd=agent.cwd,
        model=agent.model or None,
        system_prompt=agent.system_prompt,
        append_system_prompt=opt.append_system_prompt,
        allowed_tools=allowed_tools,
        mcp_servers=_build_mcp_servers(agent, mcp_internal_urls),
        disallowed_tools=opt.disallowed_tools,
        permission_mode=opt.permission_mode,
        dangerously_skip_permissions=opt.dangerously_skip_permissions,
        effort=opt.effort,
        add_dir=opt.add_dir,
        settings=opt.settings,
        extra_args=opt.extra_args,
        extra_env=opt.extra_env,
        preserve_provider_env=opt.preserve_provider_env,
        history_injection_mode=opt.history_injection_mode,
        wait_for_turn_duration=opt.wait_for_turn_duration,
        include_meta_user=opt.include_meta_user,
        startup_delay=opt.startup_delay,
        file_wait_timeout=opt.file_wait_timeout,
        turn_duration_timeout=opt.turn_duration_timeout,
    )
 class ClaudeCodeBackendAdapter:
    """One ``claude-code-api`` backend bound to a single :class:`ClaudeAgent`.
    Owns the underlying :class:`ClaudeCodeBackend`'s lifecycle through
    the async-context-manager protocol so :mod:`beaver_gateway.cli` can
    park it in its ``AsyncExitStack``.
    """
    def __init__(
        self,
        *,
        agent: ClaudeAgent,
        mcp_internal_urls: Mapping[str, str],
    ) -> None:
        self._agent = agent
        self._backend = ClaudeCodeBackend(
            _build_backend_options(agent, mcp_internal_urls)
        )
    @property
    def agent(self) -> ClaudeAgent:
        return self._agent
    @property
    def live_session_count(self) -> int:
        return self._backend.live_session_count
    async def __aenter__(self) -> Self:
        await self._backend.__aenter__()
        return self
    async def __aexit__(
        self, exc_type: object, exc: object, tb: object
    ) -> None:
        await self._backend.__aexit__(exc_type, exc, tb)
    async def aclose(self) -> None:
        await self._backend.aclose()
    async def complete(
        self,
        *,
        agent: BaseAgent,
        messages: Iterable[MessageParam],
        system: str | None = None,  # noqa: ARG002 — see module docstring
        **options: Any,  # noqa: ARG002 — no per-request knobs for claude-code yet
    ) -> AsyncIterator[MessageStreamEvent]:
        if not isinstance(agent, ClaudeAgent):
            msg = (
                "ClaudeCodeBackendAdapter requires ClaudeAgent, "
                f"got {type(agent).__name__}"
            )
            raise TypeError(msg)
        if agent.name != self._agent.name:
            # Adapter is per-agent; routing a different agent through it
            # would mean a different cwd / system_prompt / MCP set than
            # the live-session pool was spawned with.
            msg = (
                f"ClaudeCodeBackendAdapter bound to {self._agent.name!r} "
                f"got request for {agent.name!r}"
            )
            raise ValueError(msg)
        message_id = f"msg_{uuid.uuid4().hex}"
        yield build_message_start(message_id=message_id, model=agent.model)
        next_index = 0
        stop_reason: str | None = None
        usage: Mapping[str, Any] | None = None
        async for event in self._backend.complete(list(messages)):
            if isinstance(event, AssistantMessage):
                for block in event.content:
                    for ev in _emit_block(block, next_index):
                        yield ev
                    next_index += 1
            elif isinstance(event, ResultMessage):
                stop_reason = event.stop_reason
                usage = event.usage
                # ResultMessage is always last (TurnManager synthesizes
                # it as the terminal event), so we break after emitting
                # the envelope close.
                break
            # UserMessage (tool_result records) and SystemMessage
            # (turn_duration heartbeats) carry no content for the
            # /v1/messages caller — skip silently.
        yield build_message_delta(
            stop_reason=_map_stop_reason(stop_reason),
            usage=_normalize_usage(usage),
        )
        yield build_message_stop()
 def _emit_block(
    block: TextBlock | ThinkingBlock | ToolUseBlock | Any, index: int
 ) -> Iterable[MessageStreamEvent]:
    """Render one ``claude-code`` content block as Anthropic stream events.
    ``ToolResultBlock`` would arrive only on user-role records — we
    don't emit it here because :meth:`complete` skips ``UserMessage``.
    """
    if isinstance(block, TextBlock):
        return (
            build_text_block_start(index),
            build_text_delta(index, block.text),
            build_content_block_stop(index),
        )
    if isinstance(block, ThinkingBlock):
        return (
            build_thinking_block_start(index),
            build_thinking_delta(index, block.thinking),
            build_signature_delta(index, block.signature),
            build_content_block_stop(index),
        )
    if isinstance(block, ToolUseBlock):
        partial = json.dumps(
            block.input, separators=(",", ":"), ensure_ascii=False
        )
        return (
            build_tool_use_block_start(index, tool_use_id=block.id, name=block.name),
            build_input_json_delta(index, partial),
            build_content_block_stop(index),
        )
    return ()
 def _normalize_usage(usage: Mapping[str, Any] | None) -> dict[str, int] | None:
    """Coerce claude-code's ``usage`` dict to Anthropic ``MessageDeltaUsage`` shape.
    claude-code copies whatever the JSONL ``usage`` record carried —
    fields can be missing, strings, or ints. We pass through only the
    fields ``MessageDeltaUsage`` knows about and discard the rest so an
    odd ``cache_creation`` object structure doesn't fail pydantic
    validation downstream.
    """
    if not usage:
        return None
    out: dict[str, int] = {}
    for key in (
        "input_tokens",
        "output_tokens",
        "cache_creation_input_tokens",
        "cache_read_input_tokens",
    ):
        value = usage.get(key)
        if isinstance(value, int):
            out[key] = value
    return out or None
@@ -6,6 +6,12 @@ each frontend with a ``GatewayRuntime``, and run all
 ``frontend.serve()`` coroutines concurrently. Without any frontends we
 still print the Phase 0 DoD line and exit cleanly so the bare skeleton
 keeps working.
 Phase 2.1 — when the user declares any ``McpServer``, we additionally
 build the internal MCP aggregator app and run it on
 ``127.0.0.1:INTERNAL_MCP_PORT`` as another task inside the same
 TaskGroup. URLs are surfaced through ``GatewayRuntime.mcp_internal_urls``
 so Phase 2.2's ClaudeCode adapter can find them.
 """
 from __future__ import annotations
@@ -15,20 +21,27 @@ import logging
 from contextlib import AsyncExitStack
 from typing import TYPE_CHECKING
 import uvicorn
 import uvloop
 from raycast_api import Client as RaycastClient
 from raycast_api.config import Config as RaycastConfig
 from beaver_gateway import config_loader
 from beaver_gateway.agents.claude import ClaudeAgent
 from beaver_gateway.agents.raycast import RaycastAgent
 from beaver_gateway.backends.claude_code import ClaudeCodeBackendAdapter
 from beaver_gateway.backends.raycast import RaycastBackend
 from beaver_gateway.core.auth import TokenStore
 from beaver_gateway.core.registry import AgentRegistry, McpRegistry
 from beaver_gateway.frontends.base import GatewayRuntime
 from beaver_gateway.mcp.internal_app import build_internal_app
 from beaver_gateway.settings import Settings
 if TYPE_CHECKING:
    from starlette.applications import Starlette
    from beaver_gateway.backends.base import Backend
    from beaver_gateway.mcp.types import McpServerT
 _log = logging.getLogger("beaver_gateway.cli")
@@ -51,8 +64,18 @@ async def _async_main() -> None:
    token_store = TokenStore.from_env(settings.bootstrap_tokens)
    async with AsyncExitStack() as stack:
        # Internal MCP URLs must exist before we construct any
        # ClaudeCodeBackendAdapter — adapters bake the URLs into their
        # ``BackendOptions.mcp_servers`` at construction time.
        internal_app, internal_urls = _build_internal_mcp(
            gateway.mcps, settings=settings
        )
        backends: dict[str, Backend] = await _build_backends(
-            settings=settings, agents=agents, stack=stack
+            settings=settings,
            agents=agents,
            stack=stack,
            mcp_internal_urls=internal_urls,
        )
        runtime = GatewayRuntime(
@@ -60,6 +83,7 @@ async def _async_main() -> None:
            mcps=mcps,
            backends=backends,
            token_store=token_store,
            mcp_internal_urls=internal_urls,
        )
        for fe in gateway.frontends:
@@ -80,18 +104,66 @@ async def _async_main() -> None:
        )
        if not gateway.frontends:
            # No external listeners → nothing to serve. The internal
            # MCP app has no consumer on its own, so we skip running
            # it in this path and exit cleanly (Phase 0 DoD).
            return
        async with asyncio.TaskGroup() as tg:
            if internal_app is not None:
                tg.create_task(
                    _serve_internal_mcp(internal_app, settings=settings)
                )
            for fe in gateway.frontends:
                tg.create_task(fe.serve())
 def _build_internal_mcp(
    mcps: list[McpServerT], *, settings: Settings
 ) -> tuple[Starlette | None, dict[str, str]]:
    """Build the aggregator app + URL map, or return ``(None, {})``.
    The URL map is always handed out (frontends may still introspect
    ``runtime.mcp_internal_urls`` even if nothing is configured); the
    app is ``None`` when there are no MCPs to mount, so the caller
    skips the uvicorn task entirely.
    """
    if not mcps:
        return None, {}
    app, urls = build_internal_app(
        mcps, host="127.0.0.1", port=settings.internal_mcp_port
    )
    return app, urls
 async def _serve_internal_mcp(app: Starlette, *, settings: Settings) -> None:
    """Run the internal MCP aggregator on loopback.
    Bound to ``127.0.0.1`` (never EXPOSE'd) — only the in-process
    ClaudeCode subprocess reaches it. Logged at ``warning`` level so
    we don't drown the gateway's own logs in per-request noise.
    """
    config = uvicorn.Config(
        app,
        host="127.0.0.1",
        port=settings.internal_mcp_port,
        log_level="warning",
        loop="uvloop",
    )
    server = uvicorn.Server(config)
    _log.info(
        "internal MCP aggregator on http://127.0.0.1:%d/mcp/<name>",
        settings.internal_mcp_port,
    )
    await server.serve()
 async def _build_backends(
    *,
    settings: Settings,
    agents: AgentRegistry,
    stack: AsyncExitStack,
    mcp_internal_urls: dict[str, str],
 ) -> dict[str, Backend]:
    """Construct one backend per agent name.
@@ -99,6 +171,12 @@ async def _build_backends(
    (bearer + device-id are process-wide), so we open it lazily — only
    when at least one ``RaycastAgent`` is present — and close it via
    the caller's exit stack.
    Each :class:`ClaudeAgent` gets its own
    :class:`ClaudeCodeBackendAdapter`: ``BackendOptions`` pins
    ``cwd`` / ``model`` / ``system_prompt`` / ``mcp_servers`` for the
    lifetime of the underlying ``ClaudeCodeBackend``, so different
    agents can't share one.
    """
    backends: dict[str, Backend] = {}
@@ -110,7 +188,13 @@ async def _build_backends(
            for a in raycast_agents:
                backends[a.name] = raycast_backend
-    # Phase 2: ClaudeAgent → ClaudeCodeBackendAdapter goes here.
+    for a in agents:
        if isinstance(a, ClaudeAgent):
            adapter = ClaudeCodeBackendAdapter(
                agent=a, mcp_internal_urls=mcp_internal_urls
            )
            await stack.enter_async_context(adapter)
            backends[a.name] = adapter
    return backends
@@ -11,10 +11,12 @@ and hands it to each frontend's ``configure``.
 from __future__ import annotations
 from abc import ABC, abstractmethod
-from dataclasses import dataclass
+from dataclasses import dataclass, field
 from typing import TYPE_CHECKING
 if TYPE_CHECKING:
    from collections.abc import Mapping
    from beaver_gateway.backends.base import Backend
    from beaver_gateway.core.auth import TokenStore
    from beaver_gateway.core.registry import AgentRegistry, McpRegistry
@@ -28,12 +30,18 @@ class GatewayRuntime:
    instance can serve many ``RaycastAgent`` instances, but the lookup
    site (an inbound request with ``model=<agent.name>``) already has
    the name in hand, so the indirection lives one step earlier.
    ``mcp_internal_urls`` is filled in Phase 2.1: one loopback URL per
    declared ``McpServer`` so ``ClaudeCodeBackendAdapter`` (Phase 2.2)
    can pass them to ``BackendOptions.mcp_servers`` without re-running
    discovery.
    """
    agents: AgentRegistry
    mcps: McpRegistry
    backends: dict[str, Backend]
    token_store: TokenStore
    mcp_internal_urls: Mapping[str, str] = field(default_factory=dict)
 class Frontend(ABC):
@@ -0,0 +1,48 @@
 """Proxy ``FastMCP`` servers for user-declared external MCPs (``stdio``/``http``).
 Both flavours end up as a ``FastMCPProxy`` instance, built via
 ``fastmcp.server.create_proxy``. The proxy lazily opens the underlying
 client transport when the first MCP request arrives, so we don't pay
 for connections that nothing routes to. From the aggregator app's
 point of view a proxy is indistinguishable from a regular ``FastMCP``
 namespace — same ``http_app`` surface, same mount semantics.
 """
 from __future__ import annotations
 from typing import TYPE_CHECKING
 from fastmcp import Client
 from fastmcp.client.transports import StdioTransport, StreamableHttpTransport
 from fastmcp.server import create_proxy
 if TYPE_CHECKING:
    from fastmcp import FastMCP
    from beaver_gateway.mcp.types import HttpMcp, StdioMcp
 def build_stdio_proxy(spec: StdioMcp) -> FastMCP:
    """Wrap a stdio subprocess MCP into a mountable ``FastMCPProxy``.
    ``spec.command`` is a non-empty tuple; the first element is the
    executable and the rest are CLI args. ``StdioTransport`` keeps the
    subprocess alive across calls.
    """
    if not spec.command:
        msg = f"stdio MCP {spec.name!r} has empty command"
        raise ValueError(msg)
    command, *args = spec.command
    transport = StdioTransport(
        command=command,
        args=list(args),
        env=spec.env,
        cwd=str(spec.cwd) if spec.cwd is not None else None,
    )
    return create_proxy(Client(transport, name=spec.name))
 def build_http_proxy(spec: HttpMcp) -> FastMCP:
    """Wrap a remote streamable-HTTP MCP into a mountable ``FastMCPProxy``."""
    transport = StreamableHttpTransport(url=spec.url, auth=spec.auth)
    return create_proxy(Client(transport, name=spec.name))
@@ -0,0 +1,87 @@
 """Internal MCP aggregator — one ASGI app, N FastMCP namespaces.
 Each ``McpServer`` declared in the user's config becomes its own
 ``FastMCP`` instance (regular for ``python_tool``, ``FastMCPProxy`` for
 ``stdio``/``http``) and is mounted under ``/mcp/<name>`` on a single
 Starlette app. This app runs on ``127.0.0.1:INTERNAL_MCP_PORT`` (not
 EXPOSE'd in Docker) so the ClaudeCode subprocess can reach each
 namespace via loopback as a distinct MCP server URL — preserving
 per-domain framing while costing only one process worth of RAM
 (PRD §6).
 The aggregator returns both the app and a ``{name: url}`` map; Phase
 2.2's ``ClaudeCodeBackendAdapter`` plugs the map directly into
 ``BackendOptions.mcp_servers``.
 """
 from __future__ import annotations
 from contextlib import AsyncExitStack, asynccontextmanager
 from typing import TYPE_CHECKING
 from starlette.applications import Starlette
 from starlette.routing import Mount
 from beaver_gateway.mcp.client_pool import build_http_proxy, build_stdio_proxy
 from beaver_gateway.mcp.types import HttpMcp, PythonToolMcp, StdioMcp
 from beaver_gateway.mcp.wrap import build_python_tool_server
 if TYPE_CHECKING:
    from collections.abc import AsyncIterator, Iterable
    from fastmcp import FastMCP
    from beaver_gateway.mcp.types import McpServerT
 def build_internal_app(
    mcps: Iterable[McpServerT], *, host: str, port: int
 ) -> tuple[Starlette, dict[str, str]]:
    """Build the aggregator ``Starlette`` app and the ``{name: url}`` map.
    ``host``/``port`` only flavour the URL strings handed back — actually
    listening on them is the caller's job (``cli.main`` runs a uvicorn
    server in a TaskGroup). We accept the address here so callers don't
    have to format the URLs themselves and risk drifting from the
    ``/mcp/<name>`` convention.
    """
    servers: dict[str, FastMCP] = {spec.name: _build_server(spec) for spec in mcps}
    child_apps = [s.http_app(transport="http", path="/") for s in servers.values()]
    routes = [
        Mount(f"/mcp/{name}", app=app)
        for name, app in zip(servers, child_apps, strict=True)
    ]
    @asynccontextmanager
    async def lifespan(_parent: Starlette) -> AsyncIterator[None]:
        # Each FastMCP http_app stores its session manager init in its
        # own lifespan. Without entering them the streamable-HTTP layer
        # 500s on every request. AsyncExitStack composes them so all
        # children come up together and unwind in reverse order on
        # shutdown.
        async with AsyncExitStack() as stack:
            for child in child_apps:
                await stack.enter_async_context(child.router.lifespan_context(child))
            yield
    app = Starlette(routes=routes, lifespan=lifespan)
    # Trailing slash on the published URL skips Starlette's
    # 307 redirect from ``/mcp/<name>`` to ``/mcp/<name>/`` that
    # ``Mount`` produces when a child route lives at ``/``.
    urls = {name: f"http://{host}:{port}/mcp/{name}/" for name in servers}
    return app, urls
 def _build_server(spec: McpServerT) -> FastMCP:
    """Dispatch on the discriminated union to the matching builder."""
    if isinstance(spec, PythonToolMcp):
        return build_python_tool_server(spec)
    if isinstance(spec, StdioMcp):
        return build_stdio_proxy(spec)
    if isinstance(spec, HttpMcp):
        return build_http_proxy(spec)
    # `McpServerT` is a closed union; this is unreachable but keeps
    # type-narrowing honest if a new variant lands without updates here.
    msg = f"unsupported McpServer variant: {type(spec).__name__}"
    raise TypeError(msg)
@@ -0,0 +1,26 @@
 """Wrap a ``PythonToolMcp`` spec into a mountable ``FastMCP`` instance.
 Each ``python_tool`` McpServer in the user's config becomes a separate
 ``FastMCP`` namespace — one domain, one server URL — so models keep the
 per-domain framing they were trained on (see PRD §6).
 """
 from __future__ import annotations
 from typing import TYPE_CHECKING
 from fastmcp import FastMCP
 if TYPE_CHECKING:
    from beaver_gateway.mcp.types import PythonToolMcp
 def build_python_tool_server(spec: PythonToolMcp) -> FastMCP:
    """Construct a ``FastMCP`` namespace from a ``PythonToolMcp``.
    The callables in ``spec.tools`` are registered as MCP tools using
    FastMCP's introspection — names, docstrings, and type hints turn
    into the tool schema. No decorator wiring is needed: the
    ``tools=`` constructor argument accepts plain callables.
    """
    return FastMCP(name=spec.name, tools=list(spec.tools))