feat: implement claude code backend
This commit is contained in:
+55
-3
@@ -4,20 +4,65 @@
|
||||
# ClaudeAgent, RaycastAgent, McpServer, ExposedMcp, Gateway already
|
||||
# bound — so importing them is optional. We import explicitly here so
|
||||
# IDEs and type-checkers see real symbols instead of free variables.
|
||||
import tempfile
|
||||
from datetime import date
|
||||
from pathlib import Path
|
||||
|
||||
from beaver_gateway.agents.base import ExposedMcp
|
||||
from beaver_gateway.agents.claude import ClaudeAgent
|
||||
from beaver_gateway.agents.raycast import RaycastAgent, RemoteTool, UserPreferences
|
||||
from beaver_gateway.core.registry import Gateway
|
||||
from beaver_gateway.frontends.anthropic import AnthropicMessagesFrontend
|
||||
from beaver_gateway.mcp.types import McpServer
|
||||
|
||||
|
||||
def current_time() -> str:
|
||||
"""Return the current local time as an ISO-8601 string.
|
||||
|
||||
Trivial demo tool for the Phase 2.1 internal MCP aggregator —
|
||||
confirms a ``python_tool`` namespace is reachable on
|
||||
``http://127.0.0.1:<INTERNAL_MCP_PORT>/mcp/time``.
|
||||
"""
|
||||
from datetime import datetime
|
||||
|
||||
return datetime.now().astimezone().isoformat()
|
||||
|
||||
gateway = Gateway(
|
||||
agents=[
|
||||
# Phase 2.2 — ClaudeCodeBackendAdapter routes this agent's
|
||||
# ``/v1/messages`` calls through ``claude-code-api``. The
|
||||
# ``time`` MCP gets exposed as ``mcp__time__current_time`` to
|
||||
# the subscription claude session via
|
||||
# ``BackendOptions.mcp_servers`` pointing at the internal
|
||||
# aggregator on ``127.0.0.1:INTERNAL_MCP_PORT/mcp/time/``.
|
||||
#
|
||||
# Fresh empty tempdir (not a hardcoded ``/tmp``) for two
|
||||
# reasons: claude-code-api derives the JSONL project-key from
|
||||
# ``cwd``, but claude itself writes the JSONL using the cwd's
|
||||
# realpath — on macOS ``/tmp`` and ``/var/folders/...`` are
|
||||
# both ``/private/...`` symlinks, so unresolved cwds make
|
||||
# ``JsonlWatcher`` time out waiting on the wrong path. The
|
||||
# explicit ``.resolve()`` collapses the symlink before claude
|
||||
# ever sees the dir, and ``mkdtemp`` guarantees the directory
|
||||
# is empty so claude does not pick up leftover files.
|
||||
ClaudeAgent(
|
||||
name="stub",
|
||||
model="claude-sonnet-4-6",
|
||||
system_prompt="You are a stub agent used to validate the Phase 0 skeleton.",
|
||||
cwd="/tmp",
|
||||
# ``system_prompt`` is appended to claude's built-in agent
|
||||
# prompt (via ``--append-system-prompt``) — so it adds the
|
||||
# agent's identity on top of claude-code's baseline tool
|
||||
# knowledge, rather than replacing it. Same shape as the
|
||||
# RaycastAgent's ``system_prompt → additional_system_instructions``
|
||||
# mapping. For full ``BackendOptions`` knobs (timeouts,
|
||||
# extra_args, history mode, etc.) import ``ClaudeCodeOptions``
|
||||
# and pass ``options=ClaudeCodeOptions(...)``.
|
||||
system_prompt=(
|
||||
"You are a stub agent used to validate the Phase 0 skeleton.\n"
|
||||
"If asked the current time, call the `current_time`"
|
||||
" MCP tool instead of guessing."
|
||||
),
|
||||
cwd=Path(tempfile.mkdtemp(prefix="beaver-stub-cwd-")).resolve(),
|
||||
expose_mcps=(ExposedMcp(name="time"),),
|
||||
),
|
||||
# Phase 1.2 — a RaycastAgent the AnthropicMessagesFrontend will
|
||||
# route via RaycastBackend. Phase 1.5 added the per-agent knobs
|
||||
@@ -45,7 +90,14 @@ gateway = Gateway(
|
||||
),
|
||||
),
|
||||
],
|
||||
mcps=[],
|
||||
mcps=[
|
||||
# Phase 2.1 — bundle of plain Python callables exposed as one
|
||||
# FastMCP namespace. The internal aggregator mounts it under
|
||||
# ``/mcp/time`` on ``127.0.0.1:INTERNAL_MCP_PORT``; Phase 2.2's
|
||||
# ClaudeCode adapter will forward that URL into
|
||||
# ``BackendOptions.mcp_servers``.
|
||||
McpServer.python_tool(name="time", tools=[current_time]),
|
||||
],
|
||||
frontends=[
|
||||
# Phase 1.4 — expose the agents as `model=<name>` on an
|
||||
# Anthropic-compatible Messages endpoint. Auth comes from
|
||||
|
||||
@@ -3,14 +3,169 @@
|
||||
Notice the absence of a ``streaming`` field — claude-code does not emit
|
||||
token-level deltas, and that fact is encoded in the type, not in a
|
||||
runtime branch.
|
||||
|
||||
``BaseAgent.system_prompt`` maps onto the claude CLI's
|
||||
``--system-prompt`` — i.e. it really *is* the agent's system prompt,
|
||||
not "added on top of claude-code's giant built-in". The additive slot
|
||||
``--append-system-prompt`` is exposed via
|
||||
:attr:`ClaudeCodeOptions.append_system_prompt` for the rare case
|
||||
when the user wants claude-code's planning conventions / dynamic
|
||||
sections *and* a delta on top. Difference, measured empirically:
|
||||
|
||||
* tools (names, JSON schemas, embedded guidance like Bash's "prefer
|
||||
Read over cat/head/tail") survive both flags — they ride the
|
||||
Anthropic API ``tools=[]`` field, not the system prompt text;
|
||||
* ``--system-prompt`` drops ~8.6k tokens of claude-code's *textual*
|
||||
baseline — agent persona, multi-step-work conventions ("use
|
||||
TaskCreate proactively", etc.), and the dynamic per-machine sections
|
||||
(cwd, env info, git status, memory paths — see the
|
||||
``--exclude-dynamic-system-prompt-sections`` flag note in
|
||||
``claude --help``: "Only applies with the default system prompt
|
||||
(ignored with --system-prompt)").
|
||||
|
||||
We pick override as the default because it preserves the principle of
|
||||
least surprise: the ``system_prompt=...`` you wrote on the agent is
|
||||
what claude actually receives. If you need claude-code's full
|
||||
planning/dynamic-context behaviour on top of your prompt, opt in via
|
||||
``options=ClaudeCodeOptions(append_system_prompt=...)`` and leave
|
||||
``system_prompt`` for your agent's identity (or vice versa — set
|
||||
``system_prompt=""``-ish and put everything in ``append_*``).
|
||||
|
||||
Per-agent passthrough of ``claude_code_api.BackendOptions`` lives in
|
||||
:class:`ClaudeCodeOptions`, attached to :class:`ClaudeAgent` as
|
||||
``options``. Every tunable knob ``BackendOptions`` supports — except
|
||||
the ones derived from the agent's own primary surface
|
||||
(``cwd`` / ``model`` / ``system_prompt`` / ``available_native_tools`` →
|
||||
``allowed_tools`` / ``expose_mcps`` → ``mcp_servers``) — is exposed
|
||||
there. Defaults match upstream except where correctness demands
|
||||
otherwise (``wait_for_turn_duration=True``,
|
||||
``dangerously_skip_permissions=True``).
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from pathlib import Path # noqa: TC003 — runtime use by pydantic
|
||||
from collections.abc import Mapping # noqa: TC003 — pydantic runtime
|
||||
from pathlib import Path # noqa: TC003 — pydantic runtime
|
||||
from typing import Literal
|
||||
|
||||
from pydantic import BaseModel, ConfigDict, Field
|
||||
|
||||
from beaver_gateway.agents.base import BaseAgent
|
||||
|
||||
HistoryInjectionMode = Literal["native_jsonl", "concat_message"]
|
||||
"""Mirrors ``claude_code_api.HistoryInjectionMode`` to avoid an import
|
||||
cycle on the user-facing path (configs may be loaded without
|
||||
``claude-code-api`` installed, e.g. ``--extra prod`` minus claude)."""
|
||||
|
||||
|
||||
class ClaudeCodeOptions(BaseModel):
|
||||
"""Per-agent passthrough for ``claude_code_api.BackendOptions``.
|
||||
|
||||
Mirrors every field of ``BackendOptions`` except those that come
|
||||
from the agent's primary surface
|
||||
(``cwd`` / ``model`` / ``system_prompt`` /
|
||||
``available_native_tools`` / ``expose_mcps``). Defaults match
|
||||
upstream, with two exceptions for correctness:
|
||||
|
||||
* ``wait_for_turn_duration=True`` — without it, extended-thinking
|
||||
turns drop the text response because ``TurnManager`` returns on
|
||||
the first terminal assistant record (the thinking snapshot) and
|
||||
never reads the second one (the actual text). Phase 2.2 PROGRESS
|
||||
describes the incident in detail.
|
||||
* ``dangerously_skip_permissions=True`` — Beaver Gateway is a
|
||||
single-user trusted-config product; the user already wrote the
|
||||
``config.py`` that spawns claude. Permission prompts on a
|
||||
headless backend mean stuck turns.
|
||||
|
||||
Any field on :class:`claude_code_api.BackendOptions` we forward
|
||||
here lives on this model with the same name and type, so a future
|
||||
``BackendOptions`` field addition is a one-liner here plus a
|
||||
one-liner in :mod:`backends.claude_code`.
|
||||
"""
|
||||
|
||||
model_config = ConfigDict(frozen=True, arbitrary_types_allowed=True)
|
||||
|
||||
append_system_prompt: str | None = None
|
||||
"""Maps to claude CLI's ``--append-system-prompt``. Opting in
|
||||
re-attaches claude-code's full built-in prompt (persona, planning
|
||||
conventions like "use TaskCreate proactively", and the dynamic
|
||||
per-machine sections — cwd, env info, git status, memory paths)
|
||||
plus this string on top. By default we ship only
|
||||
:attr:`BaseAgent.system_prompt` via ``--system-prompt`` (~8.6k
|
||||
tokens lighter); use this when the agent should behave like a
|
||||
real claude-code coding session and your text is a delta on top
|
||||
of those built-ins. Tool schemas survive either way — they ride
|
||||
the Anthropic ``tools=[]`` channel, not the prompt text."""
|
||||
|
||||
disallowed_tools: tuple[str, ...] = ()
|
||||
"""Tool names that claude must refuse to call. Combines with the
|
||||
agent's ``available_native_tools`` allowlist — disallow wins."""
|
||||
|
||||
permission_mode: str = "bypassPermissions"
|
||||
"""``claude --permission-mode`` value. Only meaningful when
|
||||
``dangerously_skip_permissions=False``; otherwise the CLI bypasses
|
||||
the prompt path entirely."""
|
||||
|
||||
dangerously_skip_permissions: bool = True
|
||||
"""Pass ``--dangerously-skip-permissions`` to claude. ``True`` by
|
||||
default — see the class docstring for why."""
|
||||
|
||||
effort: str | None = None
|
||||
"""``claude --effort`` value (typically ``low`` / ``medium`` /
|
||||
``high``). Tunes reasoning effort budget for newer models."""
|
||||
|
||||
add_dir: tuple[str, ...] = ()
|
||||
"""Extra directories claude is allowed to read/edit beyond
|
||||
``cwd``. Maps to repeated ``--add-dir`` flags."""
|
||||
|
||||
settings: str | None = None
|
||||
"""Path to a ``--settings`` JSON file claude should load (hooks,
|
||||
MCP servers, etc. that don't belong in our internal aggregator)."""
|
||||
|
||||
extra_args: tuple[str, ...] = ()
|
||||
"""Raw extra argv to append to the claude command. Escape hatch
|
||||
for flags we haven't surfaced as first-class options."""
|
||||
|
||||
extra_env: Mapping[str, str] = Field(default_factory=dict)
|
||||
"""Additional environment variables for the spawned claude
|
||||
process. Layered on top of the gateway's own env after
|
||||
``preserve_provider_env`` is applied."""
|
||||
|
||||
preserve_provider_env: bool = False
|
||||
"""When ``False`` (default), the spawn env strips
|
||||
``ANTHROPIC_API_KEY`` / ``ANTHROPIC_AUTH_TOKEN`` /
|
||||
``ANTHROPIC_BASE_URL`` so claude uses subscription auth instead of
|
||||
leaking through whatever the gateway process inherited."""
|
||||
|
||||
history_injection_mode: HistoryInjectionMode = "native_jsonl"
|
||||
"""How prior turns are seeded into a fresh session when no live
|
||||
session matches: ``native_jsonl`` writes a hand-crafted transcript
|
||||
and ``--resume``s; ``concat_message`` instead folds history into
|
||||
the first user prompt. ``native_jsonl`` is more faithful."""
|
||||
|
||||
wait_for_turn_duration: bool = True
|
||||
"""Keep reading JSONL until the ``turn_duration`` heartbeat
|
||||
arrives, instead of returning on the first terminal assistant
|
||||
record. ``True`` by default — see class docstring."""
|
||||
|
||||
include_meta_user: bool = False
|
||||
"""Surface claude's ``isMeta=True`` user records (local-command
|
||||
caveats) as ``UserMessage`` events. Off by default — they're not
|
||||
part of the real conversation."""
|
||||
|
||||
startup_delay: float = 1.0
|
||||
"""Seconds to wait after spawning the PTY before the first
|
||||
write — claude's TUI takes a beat to settle."""
|
||||
|
||||
file_wait_timeout: float = 30.0
|
||||
"""How long to wait for the session JSONL to appear after spawn.
|
||||
Failure here usually means a CLI auth / config problem."""
|
||||
|
||||
turn_duration_timeout: float = 5.0
|
||||
"""How long to wait for the ``turn_duration`` heartbeat once a
|
||||
terminal assistant has been seen. Bound on extra latency when
|
||||
``wait_for_turn_duration=True``."""
|
||||
|
||||
|
||||
class ClaudeAgent(BaseAgent):
|
||||
"""Agent backed by ``claude-code-api``.
|
||||
@@ -25,3 +180,6 @@ class ClaudeAgent(BaseAgent):
|
||||
|
||||
cwd: Path
|
||||
available_native_tools: tuple[str, ...] = ()
|
||||
options: ClaudeCodeOptions = Field(default_factory=ClaudeCodeOptions)
|
||||
"""Per-agent passthrough for the underlying claude-code-api
|
||||
``BackendOptions``. See :class:`ClaudeCodeOptions`."""
|
||||
|
||||
@@ -0,0 +1,327 @@
|
||||
"""Claude Code backend adapter.
|
||||
|
||||
One :class:`ClaudeCodeBackendAdapter` per :class:`ClaudeAgent`. The
|
||||
underlying :class:`claude_code_api.ClaudeCodeBackend` bakes ``cwd`` /
|
||||
``model`` / ``system_prompt`` / MCP wiring into a single
|
||||
:class:`~claude_code_api.BackendOptions` at construction time, so a
|
||||
single backend instance is conceptually bound to one agent (different
|
||||
agents would mean different cwds / system prompts / exposed MCPs and
|
||||
thus different live-session pools).
|
||||
|
||||
Per :meth:`complete` we:
|
||||
|
||||
* hand the full Anthropic-style ``messages`` list to
|
||||
``ClaudeCodeBackend.complete`` — it does its own fingerprint-based
|
||||
session lookup, so we never need to track sessions ourselves;
|
||||
* re-emit each ``AssistantMessage`` as ``content_block_start`` +
|
||||
one delta + ``content_block_stop`` per content block, with
|
||||
monotonically increasing indices spanning the entire turn (one
|
||||
``message_start`` … ``message_stop`` envelope per ``complete`` call);
|
||||
* close the envelope on the synthesized ``ResultMessage``.
|
||||
|
||||
The per-request ``system`` parameter is intentionally **ignored** —
|
||||
``BackendOptions.system_prompt`` is fixed at session-spawn time, and the
|
||||
agent's ``system_prompt`` is the canonical identity of the agent.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import uuid
|
||||
from typing import TYPE_CHECKING, Any, Self
|
||||
|
||||
from claude_code_api import (
|
||||
AssistantMessage,
|
||||
BackendOptions,
|
||||
ClaudeCodeBackend,
|
||||
ResultMessage,
|
||||
TextBlock,
|
||||
ThinkingBlock,
|
||||
ToolUseBlock,
|
||||
)
|
||||
|
||||
from beaver_gateway.agents.claude import ClaudeAgent
|
||||
from beaver_gateway.core.events import (
|
||||
StopReason,
|
||||
build_content_block_stop,
|
||||
build_input_json_delta,
|
||||
build_message_delta,
|
||||
build_message_start,
|
||||
build_message_stop,
|
||||
build_signature_delta,
|
||||
build_text_block_start,
|
||||
build_text_delta,
|
||||
build_thinking_block_start,
|
||||
build_thinking_delta,
|
||||
build_tool_use_block_start,
|
||||
)
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from collections.abc import AsyncIterator, Iterable, Mapping
|
||||
|
||||
from anthropic.types import MessageParam
|
||||
|
||||
from beaver_gateway.agents.base import BaseAgent
|
||||
from beaver_gateway.core.events import MessageStreamEvent
|
||||
|
||||
|
||||
__all__ = ["ClaudeCodeBackendAdapter"]
|
||||
|
||||
|
||||
_CLAUDE_TO_ANTHROPIC_STOP: dict[str, StopReason] = {
|
||||
"end_turn": "end_turn",
|
||||
"tool_use": "tool_use",
|
||||
"max_tokens": "max_tokens",
|
||||
"stop_sequence": "stop_sequence",
|
||||
"refusal": "refusal",
|
||||
}
|
||||
|
||||
|
||||
def _map_stop_reason(raw: str | None) -> StopReason:
|
||||
"""Map claude-code's stop reason into Anthropic's vocabulary.
|
||||
|
||||
Unknown / missing values collapse to ``end_turn`` so the client sees
|
||||
a clean finish rather than a wire-format error.
|
||||
"""
|
||||
if raw is None:
|
||||
return "end_turn"
|
||||
return _CLAUDE_TO_ANTHROPIC_STOP.get(raw, "end_turn")
|
||||
|
||||
|
||||
def _build_mcp_servers(
|
||||
agent: ClaudeAgent, mcp_internal_urls: Mapping[str, str]
|
||||
) -> dict[str, dict[str, Any]] | None:
|
||||
"""Render ``agent.expose_mcps`` into ``BackendOptions.mcp_servers``.
|
||||
|
||||
Each exposed MCP is a streamable-HTTP pointer at the gateway's
|
||||
internal aggregator (built by :mod:`beaver_gateway.mcp.internal_app`).
|
||||
``None`` keeps claude-code from materializing an ``--mcp-config``
|
||||
file when the agent exposes nothing.
|
||||
"""
|
||||
if not agent.expose_mcps:
|
||||
return None
|
||||
servers: dict[str, dict[str, Any]] = {}
|
||||
for em in agent.expose_mcps:
|
||||
url = mcp_internal_urls.get(em.name)
|
||||
if url is None:
|
||||
msg = (
|
||||
f"agent {agent.name!r} exposes MCP {em.name!r} "
|
||||
"but no internal URL is registered for it"
|
||||
)
|
||||
raise ValueError(msg)
|
||||
servers[em.name] = {"type": "http", "url": url}
|
||||
return servers
|
||||
|
||||
|
||||
def _build_backend_options(
|
||||
agent: ClaudeAgent, mcp_internal_urls: Mapping[str, str]
|
||||
) -> BackendOptions:
|
||||
"""Compose the per-agent :class:`BackendOptions`.
|
||||
|
||||
Agent-primary fields:
|
||||
|
||||
* ``cwd`` / ``model`` come from the agent directly;
|
||||
* ``system_prompt`` carries :attr:`BaseAgent.system_prompt`
|
||||
verbatim — i.e. wire-level ``--system-prompt`` (~8.6k tokens
|
||||
lighter than ``--append-system-prompt`` because claude-code's
|
||||
persona/planning conventions and dynamic sections drop out;
|
||||
tool schemas survive via the API ``tools=[]`` channel);
|
||||
* ``append_system_prompt`` carries
|
||||
:attr:`ClaudeCodeOptions.append_system_prompt`, normally
|
||||
``None``. Setting it re-attaches claude-code's built-in prompt
|
||||
*and* this delta — opt-in for "claude as a real coding session";
|
||||
* ``allowed_tools`` follows the PLAN: when the user lists native
|
||||
tools we restrict to those *plus* a per-MCP wildcard so MCP tools
|
||||
stay reachable; when no native list is declared we leave
|
||||
``allowed_tools`` empty (= all tools allowed by claude-code's
|
||||
default);
|
||||
* ``mcp_servers`` comes from :func:`_build_mcp_servers`.
|
||||
|
||||
Every other tunable knob is passed through from
|
||||
:attr:`ClaudeAgent.options`. Our default overrides
|
||||
(``wait_for_turn_duration=True``,
|
||||
``dangerously_skip_permissions=True``) live on
|
||||
:class:`ClaudeCodeOptions`, not here, so a user who builds
|
||||
``ClaudeCodeOptions(...)`` explicitly inherits the same defaults
|
||||
instead of getting whatever claude-code-api ships.
|
||||
"""
|
||||
allowed_tools: tuple[str, ...] = ()
|
||||
if agent.available_native_tools:
|
||||
mcp_wildcards = tuple(f"mcp__{em.name}" for em in agent.expose_mcps)
|
||||
allowed_tools = tuple(agent.available_native_tools) + mcp_wildcards
|
||||
|
||||
opt = agent.options
|
||||
return BackendOptions(
|
||||
cwd=agent.cwd,
|
||||
model=agent.model or None,
|
||||
system_prompt=agent.system_prompt,
|
||||
append_system_prompt=opt.append_system_prompt,
|
||||
allowed_tools=allowed_tools,
|
||||
mcp_servers=_build_mcp_servers(agent, mcp_internal_urls),
|
||||
disallowed_tools=opt.disallowed_tools,
|
||||
permission_mode=opt.permission_mode,
|
||||
dangerously_skip_permissions=opt.dangerously_skip_permissions,
|
||||
effort=opt.effort,
|
||||
add_dir=opt.add_dir,
|
||||
settings=opt.settings,
|
||||
extra_args=opt.extra_args,
|
||||
extra_env=opt.extra_env,
|
||||
preserve_provider_env=opt.preserve_provider_env,
|
||||
history_injection_mode=opt.history_injection_mode,
|
||||
wait_for_turn_duration=opt.wait_for_turn_duration,
|
||||
include_meta_user=opt.include_meta_user,
|
||||
startup_delay=opt.startup_delay,
|
||||
file_wait_timeout=opt.file_wait_timeout,
|
||||
turn_duration_timeout=opt.turn_duration_timeout,
|
||||
)
|
||||
|
||||
|
||||
class ClaudeCodeBackendAdapter:
|
||||
"""One ``claude-code-api`` backend bound to a single :class:`ClaudeAgent`.
|
||||
|
||||
Owns the underlying :class:`ClaudeCodeBackend`'s lifecycle through
|
||||
the async-context-manager protocol so :mod:`beaver_gateway.cli` can
|
||||
park it in its ``AsyncExitStack``.
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
*,
|
||||
agent: ClaudeAgent,
|
||||
mcp_internal_urls: Mapping[str, str],
|
||||
) -> None:
|
||||
self._agent = agent
|
||||
self._backend = ClaudeCodeBackend(
|
||||
_build_backend_options(agent, mcp_internal_urls)
|
||||
)
|
||||
|
||||
@property
|
||||
def agent(self) -> ClaudeAgent:
|
||||
return self._agent
|
||||
|
||||
@property
|
||||
def live_session_count(self) -> int:
|
||||
return self._backend.live_session_count
|
||||
|
||||
async def __aenter__(self) -> Self:
|
||||
await self._backend.__aenter__()
|
||||
return self
|
||||
|
||||
async def __aexit__(
|
||||
self, exc_type: object, exc: object, tb: object
|
||||
) -> None:
|
||||
await self._backend.__aexit__(exc_type, exc, tb)
|
||||
|
||||
async def aclose(self) -> None:
|
||||
await self._backend.aclose()
|
||||
|
||||
async def complete(
|
||||
self,
|
||||
*,
|
||||
agent: BaseAgent,
|
||||
messages: Iterable[MessageParam],
|
||||
system: str | None = None, # noqa: ARG002 — see module docstring
|
||||
**options: Any, # noqa: ARG002 — no per-request knobs for claude-code yet
|
||||
) -> AsyncIterator[MessageStreamEvent]:
|
||||
if not isinstance(agent, ClaudeAgent):
|
||||
msg = (
|
||||
"ClaudeCodeBackendAdapter requires ClaudeAgent, "
|
||||
f"got {type(agent).__name__}"
|
||||
)
|
||||
raise TypeError(msg)
|
||||
if agent.name != self._agent.name:
|
||||
# Adapter is per-agent; routing a different agent through it
|
||||
# would mean a different cwd / system_prompt / MCP set than
|
||||
# the live-session pool was spawned with.
|
||||
msg = (
|
||||
f"ClaudeCodeBackendAdapter bound to {self._agent.name!r} "
|
||||
f"got request for {agent.name!r}"
|
||||
)
|
||||
raise ValueError(msg)
|
||||
|
||||
message_id = f"msg_{uuid.uuid4().hex}"
|
||||
yield build_message_start(message_id=message_id, model=agent.model)
|
||||
|
||||
next_index = 0
|
||||
stop_reason: str | None = None
|
||||
usage: Mapping[str, Any] | None = None
|
||||
|
||||
async for event in self._backend.complete(list(messages)):
|
||||
if isinstance(event, AssistantMessage):
|
||||
for block in event.content:
|
||||
for ev in _emit_block(block, next_index):
|
||||
yield ev
|
||||
next_index += 1
|
||||
elif isinstance(event, ResultMessage):
|
||||
stop_reason = event.stop_reason
|
||||
usage = event.usage
|
||||
# ResultMessage is always last (TurnManager synthesizes
|
||||
# it as the terminal event), so we break after emitting
|
||||
# the envelope close.
|
||||
break
|
||||
# UserMessage (tool_result records) and SystemMessage
|
||||
# (turn_duration heartbeats) carry no content for the
|
||||
# /v1/messages caller — skip silently.
|
||||
|
||||
yield build_message_delta(
|
||||
stop_reason=_map_stop_reason(stop_reason),
|
||||
usage=_normalize_usage(usage),
|
||||
)
|
||||
yield build_message_stop()
|
||||
|
||||
|
||||
def _emit_block(
|
||||
block: TextBlock | ThinkingBlock | ToolUseBlock | Any, index: int
|
||||
) -> Iterable[MessageStreamEvent]:
|
||||
"""Render one ``claude-code`` content block as Anthropic stream events.
|
||||
|
||||
``ToolResultBlock`` would arrive only on user-role records — we
|
||||
don't emit it here because :meth:`complete` skips ``UserMessage``.
|
||||
"""
|
||||
if isinstance(block, TextBlock):
|
||||
return (
|
||||
build_text_block_start(index),
|
||||
build_text_delta(index, block.text),
|
||||
build_content_block_stop(index),
|
||||
)
|
||||
if isinstance(block, ThinkingBlock):
|
||||
return (
|
||||
build_thinking_block_start(index),
|
||||
build_thinking_delta(index, block.thinking),
|
||||
build_signature_delta(index, block.signature),
|
||||
build_content_block_stop(index),
|
||||
)
|
||||
if isinstance(block, ToolUseBlock):
|
||||
partial = json.dumps(
|
||||
block.input, separators=(",", ":"), ensure_ascii=False
|
||||
)
|
||||
return (
|
||||
build_tool_use_block_start(index, tool_use_id=block.id, name=block.name),
|
||||
build_input_json_delta(index, partial),
|
||||
build_content_block_stop(index),
|
||||
)
|
||||
return ()
|
||||
|
||||
|
||||
def _normalize_usage(usage: Mapping[str, Any] | None) -> dict[str, int] | None:
|
||||
"""Coerce claude-code's ``usage`` dict to Anthropic ``MessageDeltaUsage`` shape.
|
||||
|
||||
claude-code copies whatever the JSONL ``usage`` record carried —
|
||||
fields can be missing, strings, or ints. We pass through only the
|
||||
fields ``MessageDeltaUsage`` knows about and discard the rest so an
|
||||
odd ``cache_creation`` object structure doesn't fail pydantic
|
||||
validation downstream.
|
||||
"""
|
||||
if not usage:
|
||||
return None
|
||||
out: dict[str, int] = {}
|
||||
for key in (
|
||||
"input_tokens",
|
||||
"output_tokens",
|
||||
"cache_creation_input_tokens",
|
||||
"cache_read_input_tokens",
|
||||
):
|
||||
value = usage.get(key)
|
||||
if isinstance(value, int):
|
||||
out[key] = value
|
||||
return out or None
|
||||
@@ -6,6 +6,12 @@ each frontend with a ``GatewayRuntime``, and run all
|
||||
``frontend.serve()`` coroutines concurrently. Without any frontends we
|
||||
still print the Phase 0 DoD line and exit cleanly so the bare skeleton
|
||||
keeps working.
|
||||
|
||||
Phase 2.1 — when the user declares any ``McpServer``, we additionally
|
||||
build the internal MCP aggregator app and run it on
|
||||
``127.0.0.1:INTERNAL_MCP_PORT`` as another task inside the same
|
||||
TaskGroup. URLs are surfaced through ``GatewayRuntime.mcp_internal_urls``
|
||||
so Phase 2.2's ClaudeCode adapter can find them.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
@@ -15,20 +21,27 @@ import logging
|
||||
from contextlib import AsyncExitStack
|
||||
from typing import TYPE_CHECKING
|
||||
|
||||
import uvicorn
|
||||
import uvloop
|
||||
from raycast_api import Client as RaycastClient
|
||||
from raycast_api.config import Config as RaycastConfig
|
||||
|
||||
from beaver_gateway import config_loader
|
||||
from beaver_gateway.agents.claude import ClaudeAgent
|
||||
from beaver_gateway.agents.raycast import RaycastAgent
|
||||
from beaver_gateway.backends.claude_code import ClaudeCodeBackendAdapter
|
||||
from beaver_gateway.backends.raycast import RaycastBackend
|
||||
from beaver_gateway.core.auth import TokenStore
|
||||
from beaver_gateway.core.registry import AgentRegistry, McpRegistry
|
||||
from beaver_gateway.frontends.base import GatewayRuntime
|
||||
from beaver_gateway.mcp.internal_app import build_internal_app
|
||||
from beaver_gateway.settings import Settings
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from starlette.applications import Starlette
|
||||
|
||||
from beaver_gateway.backends.base import Backend
|
||||
from beaver_gateway.mcp.types import McpServerT
|
||||
|
||||
|
||||
_log = logging.getLogger("beaver_gateway.cli")
|
||||
@@ -51,8 +64,18 @@ async def _async_main() -> None:
|
||||
token_store = TokenStore.from_env(settings.bootstrap_tokens)
|
||||
|
||||
async with AsyncExitStack() as stack:
|
||||
# Internal MCP URLs must exist before we construct any
|
||||
# ClaudeCodeBackendAdapter — adapters bake the URLs into their
|
||||
# ``BackendOptions.mcp_servers`` at construction time.
|
||||
internal_app, internal_urls = _build_internal_mcp(
|
||||
gateway.mcps, settings=settings
|
||||
)
|
||||
|
||||
backends: dict[str, Backend] = await _build_backends(
|
||||
settings=settings, agents=agents, stack=stack
|
||||
settings=settings,
|
||||
agents=agents,
|
||||
stack=stack,
|
||||
mcp_internal_urls=internal_urls,
|
||||
)
|
||||
|
||||
runtime = GatewayRuntime(
|
||||
@@ -60,6 +83,7 @@ async def _async_main() -> None:
|
||||
mcps=mcps,
|
||||
backends=backends,
|
||||
token_store=token_store,
|
||||
mcp_internal_urls=internal_urls,
|
||||
)
|
||||
|
||||
for fe in gateway.frontends:
|
||||
@@ -80,18 +104,66 @@ async def _async_main() -> None:
|
||||
)
|
||||
|
||||
if not gateway.frontends:
|
||||
# No external listeners → nothing to serve. The internal
|
||||
# MCP app has no consumer on its own, so we skip running
|
||||
# it in this path and exit cleanly (Phase 0 DoD).
|
||||
return
|
||||
|
||||
async with asyncio.TaskGroup() as tg:
|
||||
if internal_app is not None:
|
||||
tg.create_task(
|
||||
_serve_internal_mcp(internal_app, settings=settings)
|
||||
)
|
||||
for fe in gateway.frontends:
|
||||
tg.create_task(fe.serve())
|
||||
|
||||
|
||||
def _build_internal_mcp(
|
||||
mcps: list[McpServerT], *, settings: Settings
|
||||
) -> tuple[Starlette | None, dict[str, str]]:
|
||||
"""Build the aggregator app + URL map, or return ``(None, {})``.
|
||||
|
||||
The URL map is always handed out (frontends may still introspect
|
||||
``runtime.mcp_internal_urls`` even if nothing is configured); the
|
||||
app is ``None`` when there are no MCPs to mount, so the caller
|
||||
skips the uvicorn task entirely.
|
||||
"""
|
||||
if not mcps:
|
||||
return None, {}
|
||||
app, urls = build_internal_app(
|
||||
mcps, host="127.0.0.1", port=settings.internal_mcp_port
|
||||
)
|
||||
return app, urls
|
||||
|
||||
|
||||
async def _serve_internal_mcp(app: Starlette, *, settings: Settings) -> None:
|
||||
"""Run the internal MCP aggregator on loopback.
|
||||
|
||||
Bound to ``127.0.0.1`` (never EXPOSE'd) — only the in-process
|
||||
ClaudeCode subprocess reaches it. Logged at ``warning`` level so
|
||||
we don't drown the gateway's own logs in per-request noise.
|
||||
"""
|
||||
config = uvicorn.Config(
|
||||
app,
|
||||
host="127.0.0.1",
|
||||
port=settings.internal_mcp_port,
|
||||
log_level="warning",
|
||||
loop="uvloop",
|
||||
)
|
||||
server = uvicorn.Server(config)
|
||||
_log.info(
|
||||
"internal MCP aggregator on http://127.0.0.1:%d/mcp/<name>",
|
||||
settings.internal_mcp_port,
|
||||
)
|
||||
await server.serve()
|
||||
|
||||
|
||||
async def _build_backends(
|
||||
*,
|
||||
settings: Settings,
|
||||
agents: AgentRegistry,
|
||||
stack: AsyncExitStack,
|
||||
mcp_internal_urls: dict[str, str],
|
||||
) -> dict[str, Backend]:
|
||||
"""Construct one backend per agent name.
|
||||
|
||||
@@ -99,6 +171,12 @@ async def _build_backends(
|
||||
(bearer + device-id are process-wide), so we open it lazily — only
|
||||
when at least one ``RaycastAgent`` is present — and close it via
|
||||
the caller's exit stack.
|
||||
|
||||
Each :class:`ClaudeAgent` gets its own
|
||||
:class:`ClaudeCodeBackendAdapter`: ``BackendOptions`` pins
|
||||
``cwd`` / ``model`` / ``system_prompt`` / ``mcp_servers`` for the
|
||||
lifetime of the underlying ``ClaudeCodeBackend``, so different
|
||||
agents can't share one.
|
||||
"""
|
||||
backends: dict[str, Backend] = {}
|
||||
|
||||
@@ -110,7 +188,13 @@ async def _build_backends(
|
||||
for a in raycast_agents:
|
||||
backends[a.name] = raycast_backend
|
||||
|
||||
# Phase 2: ClaudeAgent → ClaudeCodeBackendAdapter goes here.
|
||||
for a in agents:
|
||||
if isinstance(a, ClaudeAgent):
|
||||
adapter = ClaudeCodeBackendAdapter(
|
||||
agent=a, mcp_internal_urls=mcp_internal_urls
|
||||
)
|
||||
await stack.enter_async_context(adapter)
|
||||
backends[a.name] = adapter
|
||||
|
||||
return backends
|
||||
|
||||
|
||||
@@ -11,10 +11,12 @@ and hands it to each frontend's ``configure``.
|
||||
from __future__ import annotations
|
||||
|
||||
from abc import ABC, abstractmethod
|
||||
from dataclasses import dataclass
|
||||
from dataclasses import dataclass, field
|
||||
from typing import TYPE_CHECKING
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from collections.abc import Mapping
|
||||
|
||||
from beaver_gateway.backends.base import Backend
|
||||
from beaver_gateway.core.auth import TokenStore
|
||||
from beaver_gateway.core.registry import AgentRegistry, McpRegistry
|
||||
@@ -28,12 +30,18 @@ class GatewayRuntime:
|
||||
instance can serve many ``RaycastAgent`` instances, but the lookup
|
||||
site (an inbound request with ``model=<agent.name>``) already has
|
||||
the name in hand, so the indirection lives one step earlier.
|
||||
|
||||
``mcp_internal_urls`` is filled in Phase 2.1: one loopback URL per
|
||||
declared ``McpServer`` so ``ClaudeCodeBackendAdapter`` (Phase 2.2)
|
||||
can pass them to ``BackendOptions.mcp_servers`` without re-running
|
||||
discovery.
|
||||
"""
|
||||
|
||||
agents: AgentRegistry
|
||||
mcps: McpRegistry
|
||||
backends: dict[str, Backend]
|
||||
token_store: TokenStore
|
||||
mcp_internal_urls: Mapping[str, str] = field(default_factory=dict)
|
||||
|
||||
|
||||
class Frontend(ABC):
|
||||
|
||||
@@ -0,0 +1,48 @@
|
||||
"""Proxy ``FastMCP`` servers for user-declared external MCPs (``stdio``/``http``).
|
||||
|
||||
Both flavours end up as a ``FastMCPProxy`` instance, built via
|
||||
``fastmcp.server.create_proxy``. The proxy lazily opens the underlying
|
||||
client transport when the first MCP request arrives, so we don't pay
|
||||
for connections that nothing routes to. From the aggregator app's
|
||||
point of view a proxy is indistinguishable from a regular ``FastMCP``
|
||||
namespace — same ``http_app`` surface, same mount semantics.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import TYPE_CHECKING
|
||||
|
||||
from fastmcp import Client
|
||||
from fastmcp.client.transports import StdioTransport, StreamableHttpTransport
|
||||
from fastmcp.server import create_proxy
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from fastmcp import FastMCP
|
||||
|
||||
from beaver_gateway.mcp.types import HttpMcp, StdioMcp
|
||||
|
||||
|
||||
def build_stdio_proxy(spec: StdioMcp) -> FastMCP:
|
||||
"""Wrap a stdio subprocess MCP into a mountable ``FastMCPProxy``.
|
||||
|
||||
``spec.command`` is a non-empty tuple; the first element is the
|
||||
executable and the rest are CLI args. ``StdioTransport`` keeps the
|
||||
subprocess alive across calls.
|
||||
"""
|
||||
if not spec.command:
|
||||
msg = f"stdio MCP {spec.name!r} has empty command"
|
||||
raise ValueError(msg)
|
||||
command, *args = spec.command
|
||||
transport = StdioTransport(
|
||||
command=command,
|
||||
args=list(args),
|
||||
env=spec.env,
|
||||
cwd=str(spec.cwd) if spec.cwd is not None else None,
|
||||
)
|
||||
return create_proxy(Client(transport, name=spec.name))
|
||||
|
||||
|
||||
def build_http_proxy(spec: HttpMcp) -> FastMCP:
|
||||
"""Wrap a remote streamable-HTTP MCP into a mountable ``FastMCPProxy``."""
|
||||
transport = StreamableHttpTransport(url=spec.url, auth=spec.auth)
|
||||
return create_proxy(Client(transport, name=spec.name))
|
||||
@@ -0,0 +1,87 @@
|
||||
"""Internal MCP aggregator — one ASGI app, N FastMCP namespaces.
|
||||
|
||||
Each ``McpServer`` declared in the user's config becomes its own
|
||||
``FastMCP`` instance (regular for ``python_tool``, ``FastMCPProxy`` for
|
||||
``stdio``/``http``) and is mounted under ``/mcp/<name>`` on a single
|
||||
Starlette app. This app runs on ``127.0.0.1:INTERNAL_MCP_PORT`` (not
|
||||
EXPOSE'd in Docker) so the ClaudeCode subprocess can reach each
|
||||
namespace via loopback as a distinct MCP server URL — preserving
|
||||
per-domain framing while costing only one process worth of RAM
|
||||
(PRD §6).
|
||||
|
||||
The aggregator returns both the app and a ``{name: url}`` map; Phase
|
||||
2.2's ``ClaudeCodeBackendAdapter`` plugs the map directly into
|
||||
``BackendOptions.mcp_servers``.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from contextlib import AsyncExitStack, asynccontextmanager
|
||||
from typing import TYPE_CHECKING
|
||||
|
||||
from starlette.applications import Starlette
|
||||
from starlette.routing import Mount
|
||||
|
||||
from beaver_gateway.mcp.client_pool import build_http_proxy, build_stdio_proxy
|
||||
from beaver_gateway.mcp.types import HttpMcp, PythonToolMcp, StdioMcp
|
||||
from beaver_gateway.mcp.wrap import build_python_tool_server
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from collections.abc import AsyncIterator, Iterable
|
||||
|
||||
from fastmcp import FastMCP
|
||||
|
||||
from beaver_gateway.mcp.types import McpServerT
|
||||
|
||||
|
||||
def build_internal_app(
|
||||
mcps: Iterable[McpServerT], *, host: str, port: int
|
||||
) -> tuple[Starlette, dict[str, str]]:
|
||||
"""Build the aggregator ``Starlette`` app and the ``{name: url}`` map.
|
||||
|
||||
``host``/``port`` only flavour the URL strings handed back — actually
|
||||
listening on them is the caller's job (``cli.main`` runs a uvicorn
|
||||
server in a TaskGroup). We accept the address here so callers don't
|
||||
have to format the URLs themselves and risk drifting from the
|
||||
``/mcp/<name>`` convention.
|
||||
"""
|
||||
servers: dict[str, FastMCP] = {spec.name: _build_server(spec) for spec in mcps}
|
||||
|
||||
child_apps = [s.http_app(transport="http", path="/") for s in servers.values()]
|
||||
routes = [
|
||||
Mount(f"/mcp/{name}", app=app)
|
||||
for name, app in zip(servers, child_apps, strict=True)
|
||||
]
|
||||
|
||||
@asynccontextmanager
|
||||
async def lifespan(_parent: Starlette) -> AsyncIterator[None]:
|
||||
# Each FastMCP http_app stores its session manager init in its
|
||||
# own lifespan. Without entering them the streamable-HTTP layer
|
||||
# 500s on every request. AsyncExitStack composes them so all
|
||||
# children come up together and unwind in reverse order on
|
||||
# shutdown.
|
||||
async with AsyncExitStack() as stack:
|
||||
for child in child_apps:
|
||||
await stack.enter_async_context(child.router.lifespan_context(child))
|
||||
yield
|
||||
|
||||
app = Starlette(routes=routes, lifespan=lifespan)
|
||||
# Trailing slash on the published URL skips Starlette's
|
||||
# 307 redirect from ``/mcp/<name>`` to ``/mcp/<name>/`` that
|
||||
# ``Mount`` produces when a child route lives at ``/``.
|
||||
urls = {name: f"http://{host}:{port}/mcp/{name}/" for name in servers}
|
||||
return app, urls
|
||||
|
||||
|
||||
def _build_server(spec: McpServerT) -> FastMCP:
|
||||
"""Dispatch on the discriminated union to the matching builder."""
|
||||
if isinstance(spec, PythonToolMcp):
|
||||
return build_python_tool_server(spec)
|
||||
if isinstance(spec, StdioMcp):
|
||||
return build_stdio_proxy(spec)
|
||||
if isinstance(spec, HttpMcp):
|
||||
return build_http_proxy(spec)
|
||||
# `McpServerT` is a closed union; this is unreachable but keeps
|
||||
# type-narrowing honest if a new variant lands without updates here.
|
||||
msg = f"unsupported McpServer variant: {type(spec).__name__}"
|
||||
raise TypeError(msg)
|
||||
@@ -0,0 +1,26 @@
|
||||
"""Wrap a ``PythonToolMcp`` spec into a mountable ``FastMCP`` instance.
|
||||
|
||||
Each ``python_tool`` McpServer in the user's config becomes a separate
|
||||
``FastMCP`` namespace — one domain, one server URL — so models keep the
|
||||
per-domain framing they were trained on (see PRD §6).
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import TYPE_CHECKING
|
||||
|
||||
from fastmcp import FastMCP
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from beaver_gateway.mcp.types import PythonToolMcp
|
||||
|
||||
|
||||
def build_python_tool_server(spec: PythonToolMcp) -> FastMCP:
|
||||
"""Construct a ``FastMCP`` namespace from a ``PythonToolMcp``.
|
||||
|
||||
The callables in ``spec.tools`` are registered as MCP tools using
|
||||
FastMCP's introspection — names, docstrings, and type hints turn
|
||||
into the tool schema. No decorator wiring is needed: the
|
||||
``tools=`` constructor argument accepts plain callables.
|
||||
"""
|
||||
return FastMCP(name=spec.name, tools=list(spec.tools))
|
||||
Reference in New Issue
Block a user