feat: implement raycast backend
This commit is contained in:
@@ -14,3 +14,6 @@ docs/
|
||||
|
||||
# IDE
|
||||
.idea
|
||||
|
||||
# Local env
|
||||
.env
|
||||
|
||||
+43
-6
@@ -1,18 +1,55 @@
|
||||
# Stub user config for the Phase 0 DoD.
|
||||
# Sample user config — grows alongside the implementation phases.
|
||||
#
|
||||
# Loader (beaver_gateway/config_loader.py) execs this file with
|
||||
# ClaudeAgent, RaycastAgent, McpServer, ExposedMcp, Gateway already
|
||||
# bound. The top-level `gateway = Gateway(...)` is what gets picked up.
|
||||
# bound — so importing them is optional. We import explicitly here so
|
||||
# IDEs and type-checkers see real symbols instead of free variables.
|
||||
from datetime import date
|
||||
|
||||
gateway = Gateway( # type: ignore[name-defined] # noqa: F821
|
||||
from beaver_gateway.agents.claude import ClaudeAgent
|
||||
from beaver_gateway.agents.raycast import RaycastAgent, RemoteTool, UserPreferences
|
||||
from beaver_gateway.core.registry import Gateway
|
||||
from beaver_gateway.frontends.anthropic import AnthropicMessagesFrontend
|
||||
|
||||
gateway = Gateway(
|
||||
agents=[
|
||||
ClaudeAgent( # type: ignore[name-defined] # noqa: F821
|
||||
ClaudeAgent(
|
||||
name="stub",
|
||||
model="claude-sonnet-4-5",
|
||||
model="claude-sonnet-4-6",
|
||||
system_prompt="You are a stub agent used to validate the Phase 0 skeleton.",
|
||||
cwd="/tmp",
|
||||
),
|
||||
# Phase 1.2 — a RaycastAgent the AnthropicMessagesFrontend will
|
||||
# route via RaycastBackend. Phase 1.5 added the per-agent knobs
|
||||
# (`temperature` / `additional_system_instructions` / etc.) —
|
||||
# only `model`, `system_prompt`, and at least one of the others
|
||||
# is mandatory.
|
||||
RaycastAgent(
|
||||
name="research",
|
||||
model="Gemini 3.1 Flash Lite",
|
||||
system_prompt=(
|
||||
"You are a research assistant. "
|
||||
"Reply in the user's language. Cite URLs when you use web search."
|
||||
),
|
||||
temperature=0.5,
|
||||
available_native_tools=(RemoteTool.WEB_SEARCH, RemoteTool.READ_PAGE),
|
||||
# Lambda so today's date is rebuilt on every request while
|
||||
# locale/timezone stay pinned. ``True`` would give the same
|
||||
# fresh-date behaviour but would also auto-pick host locale
|
||||
# and timezone (``en-US`` / system tz), which isn't what we
|
||||
# want here.
|
||||
user_preferences=lambda: UserPreferences(
|
||||
locale="en-GB",
|
||||
timezone="Europe/Berlin",
|
||||
current_date=date.today().isoformat(), # noqa: DTZ011 — local date is intended
|
||||
),
|
||||
),
|
||||
],
|
||||
mcps=[],
|
||||
frontends=[],
|
||||
frontends=[
|
||||
# Phase 1.4 — expose the agents as `model=<name>` on an
|
||||
# Anthropic-compatible Messages endpoint. Auth comes from
|
||||
# `BOOTSTRAP_TOKENS` in the env (`name1:value1,name2:value2`).
|
||||
AnthropicMessagesFrontend(host="0.0.0.0", port=8000),
|
||||
],
|
||||
)
|
||||
|
||||
@@ -16,5 +16,14 @@ services:
|
||||
ADMIN_USER: "admin"
|
||||
ADMIN_PASS: "change-me"
|
||||
SESSION_SECRET: "dev-secret-change-me"
|
||||
# Phase 1.4 — comma-separated `name:value` pairs. The frontend
|
||||
# 401s every request until at least one token is set.
|
||||
BOOTSTRAP_TOKENS: "cursor:dev-token-change-me"
|
||||
# RAYCAST_BEARER / RAYCAST_DEVICE_ID + a raycast.json bind go
|
||||
# here once you wire up an actual RaycastAgent. The example
|
||||
# config.py declares one, so set these (or remove the agent)
|
||||
# before exposing port 8000.
|
||||
ports:
|
||||
- "8000:8000"
|
||||
volumes:
|
||||
- ./config.py:/config/config.py:ro
|
||||
|
||||
@@ -7,11 +7,21 @@ runtime branch.
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from pathlib import Path # noqa: TC003 — runtime use by pydantic
|
||||
|
||||
from beaver_gateway.agents.base import BaseAgent
|
||||
|
||||
|
||||
class ClaudeAgent(BaseAgent):
|
||||
"""Agent backed by ``claude-code-api``."""
|
||||
"""Agent backed by ``claude-code-api``.
|
||||
|
||||
cwd: str
|
||||
``cwd`` is the working directory the claude CLI is spawned in;
|
||||
typed as ``Path`` (Pydantic coerces strings) since claude-code-api
|
||||
accepts ``str | os.PathLike[str]``. ``available_native_tools`` stays
|
||||
``tuple[str, ...]`` because Claude Code's tool set is a moving target
|
||||
(Bash/Read/Edit/Write/WebSearch/etc. — versions add and rename), so
|
||||
pinning it as a ``Literal`` would rot the type each release.
|
||||
"""
|
||||
|
||||
cwd: Path
|
||||
available_native_tools: tuple[str, ...] = ()
|
||||
|
||||
@@ -1,15 +1,79 @@
|
||||
"""Raycast agent definition."""
|
||||
"""Raycast agent definition.
|
||||
|
||||
Field set is the union of ``raycast_api.ChatAPI.stream`` parameters that
|
||||
make sense as **per-agent defaults**. Per-request values (currently:
|
||||
``temperature``) win when both are set; the rest fall back to whatever
|
||||
the agent declared, then to Raycast's own defaults.
|
||||
|
||||
``BaseAgent.system_prompt`` maps onto Raycast's wire field
|
||||
``additional_system_instructions`` (the slot the real client uses for
|
||||
*content*); the wire field ``system_instructions`` stays at the Raycast
|
||||
source default — ``"markdown"`` for ``AI_CHAT``, ``"plain"`` otherwise.
|
||||
We don't expose that wire dichotomy to the user — they get one
|
||||
conceptual "system prompt".
|
||||
|
||||
Excluded on purpose:
|
||||
|
||||
* ``buffer_id``/``message_id``/``current_date`` — per-call ephemeral
|
||||
* ``provider`` override — escape hatch for non-catalog models, no clear
|
||||
use case yet (revisit in PRD §14 when discovery lands)
|
||||
* ``locale`` — process-wide via ``Settings.raycast_locale`` because we
|
||||
only spin up one ``raycast_api.Client`` per gateway
|
||||
* ``system_instructions`` (wire) — that's a format marker, not content;
|
||||
the SDK fills it from the source default and we let it
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from raycast_api import Source
|
||||
from raycast_api import ( # noqa: F401 — UserPreferences re-exported for user configs
|
||||
RemoteTool,
|
||||
Source,
|
||||
UserPreferences,
|
||||
UserPreferencesArg,
|
||||
)
|
||||
|
||||
from beaver_gateway.agents.base import BaseAgent
|
||||
|
||||
|
||||
class RaycastAgent(BaseAgent):
|
||||
"""Agent backed by ``raycast-api``."""
|
||||
"""Agent backed by ``raycast-api``.
|
||||
|
||||
``available_native_tools`` is the closed set of Raycast's server-side
|
||||
"remote tools" (``web_search``, ``search_images``, ``read_page``) —
|
||||
typed as ``RemoteTool`` so config-time IDE completion lists exactly
|
||||
the three valid values. Pydantic also coerces string literals, so
|
||||
``("web_search", "read_page")`` keeps working unchanged.
|
||||
|
||||
``user_preferences`` toggles the auto-generated ``<user-preferences>``
|
||||
block Raycast prepends to ``additional_system_instructions``:
|
||||
|
||||
* ``True`` (default) → auto from host locale/timezone/today, rebuilt
|
||||
every request so the date stays fresh;
|
||||
* ``False`` → omit the block entirely;
|
||||
* ``UserPreferences(...)`` instance → used verbatim (frozen at the
|
||||
time the agent was loaded, so the date won't auto-update);
|
||||
* ``Callable[[], UserPreferencesArg]`` → re-invoked on every request.
|
||||
Use this for the common case "fresh date but custom
|
||||
locale/timezone": ``user_preferences=lambda:
|
||||
UserPreferences(locale="ru-RU", timezone="Europe/Berlin",
|
||||
current_date=date.today().isoformat())``. Callables may nest
|
||||
(a lambda returning a lambda…) but there's no real reason to.
|
||||
|
||||
The library uses this block for date/locale-aware formatting, not
|
||||
for personalisation/memory — those are out of scope upstream.
|
||||
|
||||
``reasoning_effort`` values vary by model: GPT-5 takes
|
||||
``"minimal"|"low"|"medium"|"high"``; Anthropic exposes nothing here
|
||||
(Claude reasoning lives in a separate ``…-reasoning`` model variant
|
||||
in the catalog). Unknown effort for the chosen model is ignored
|
||||
server-side, so we stay loose as ``str | None``.
|
||||
"""
|
||||
|
||||
streaming: bool = True
|
||||
available_native_tools: tuple[str, ...] = ()
|
||||
available_native_tools: tuple[RemoteTool, ...] = ()
|
||||
source: Source = Source.AI_CHAT
|
||||
|
||||
temperature: float | None = None
|
||||
reasoning_effort: str | None = None
|
||||
tool_choice: str | None = None
|
||||
user_preferences: UserPreferencesArg = True
|
||||
|
||||
@@ -0,0 +1,6 @@
|
||||
"""Backend adapters.
|
||||
|
||||
Each backend wraps a provider-specific SDK (``raycast-api``, ``claude-code-api``)
|
||||
and yields the unified :class:`~beaver_gateway.core.events.MessageStreamEvent`
|
||||
family. The Anthropic-style frontend serialises events straight to SSE.
|
||||
"""
|
||||
@@ -0,0 +1,37 @@
|
||||
"""Backend protocol.
|
||||
|
||||
A backend turns an Anthropic-style turn (``messages`` + agent definition)
|
||||
into a stream of :class:`~beaver_gateway.core.events.MessageStreamEvent`
|
||||
records. The frontend serializes whatever comes out straight to SSE, so
|
||||
backends are the only place where provider quirks are translated.
|
||||
|
||||
Implementations are plain :class:`typing.Protocol` conformers — no ABC
|
||||
subclassing — to keep them swappable in tests with bare async generators.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import TYPE_CHECKING, Any, Protocol
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from collections.abc import AsyncIterator, Iterable
|
||||
|
||||
from anthropic.types import MessageParam
|
||||
|
||||
from beaver_gateway.agents.base import BaseAgent
|
||||
from beaver_gateway.core.events import MessageStreamEvent
|
||||
|
||||
|
||||
class Backend(Protocol):
|
||||
"""Single-method protocol; ``complete`` returns an async iterator of events."""
|
||||
|
||||
def complete(
|
||||
self,
|
||||
*,
|
||||
agent: BaseAgent,
|
||||
messages: Iterable[MessageParam],
|
||||
system: str | None = None,
|
||||
**options: Any,
|
||||
) -> AsyncIterator[MessageStreamEvent]:
|
||||
"""Yield Anthropic stream events for one turn against ``agent``."""
|
||||
...
|
||||
@@ -0,0 +1,468 @@
|
||||
"""Raycast backend adapter.
|
||||
|
||||
Translates between Anthropic's ``/v1/messages`` wire vocabulary (incoming
|
||||
``MessageParam`` history, outgoing ``MessageStreamEvent`` SSE) and the
|
||||
``raycast-api`` SDK (``Message`` history, ``ChatStreamChunk`` SSE).
|
||||
|
||||
Two halves live here:
|
||||
|
||||
* :func:`_to_raycast_messages` — pure conversion of an Anthropic message
|
||||
list into ``list[raycast_api.Message]``. ``tool_result`` blocks carry no
|
||||
tool name in Anthropic; we recover it by remembering each ``tool_use``
|
||||
id we saw upstream.
|
||||
* :meth:`RaycastBackend.complete` — opens a ``client.chat.stream`` and
|
||||
walks chunks through a tiny block-state machine. The state machine
|
||||
exists only because Raycast streams ``tool_calls`` in three phases
|
||||
(open with id+name, deltas with empty id, final summary with the full
|
||||
``arguments``) — Anthropic wants one ``content_block_start`` → deltas
|
||||
→ ``content_block_stop`` per block, so we de-duplicate the final
|
||||
summary against the per-delta increments already emitted.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import uuid
|
||||
from typing import TYPE_CHECKING, Any, cast
|
||||
|
||||
from raycast_api import Message as RaycastMessage
|
||||
from raycast_api import RemoteTool, Tool, ToolCall
|
||||
|
||||
from beaver_gateway.agents.raycast import RaycastAgent
|
||||
from beaver_gateway.core.events import (
|
||||
StopReason,
|
||||
build_content_block_stop,
|
||||
build_input_json_delta,
|
||||
build_message_delta,
|
||||
build_message_start,
|
||||
build_message_stop,
|
||||
build_text_block_start,
|
||||
build_text_delta,
|
||||
build_thinking_block_start,
|
||||
build_thinking_delta,
|
||||
build_tool_use_block_start,
|
||||
)
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from collections.abc import AsyncIterator, Iterable, Mapping, Sequence
|
||||
|
||||
from anthropic.types import MessageParam
|
||||
from raycast_api import ChatStreamChunk, Client
|
||||
|
||||
from beaver_gateway.agents.base import BaseAgent
|
||||
from beaver_gateway.core.events import MessageStreamEvent
|
||||
else:
|
||||
from collections.abc import Mapping
|
||||
|
||||
|
||||
__all__ = ["RaycastBackend"]
|
||||
|
||||
|
||||
_RAYCAST_TO_ANTHROPIC_STOP: dict[str, StopReason] = {
|
||||
"stop": "end_turn",
|
||||
"STOP": "end_turn",
|
||||
"end_turn": "end_turn",
|
||||
"tool_calls": "tool_use",
|
||||
"tool_use": "tool_use",
|
||||
"length": "max_tokens",
|
||||
"max_tokens": "max_tokens",
|
||||
"stop_sequence": "stop_sequence",
|
||||
}
|
||||
|
||||
|
||||
def _first_set[T](*values: T | None) -> T | None:
|
||||
"""Return the first value that isn't ``None``, else ``None``.
|
||||
|
||||
Used to layer per-request options over per-agent defaults: a real
|
||||
``0.0`` temperature on the request must override the agent's
|
||||
``None``, but the agent's value must take effect when the request
|
||||
omits it. ``or``-chaining is wrong here because ``0.0`` / ``""`` are
|
||||
legitimate values and falsy.
|
||||
"""
|
||||
for v in values:
|
||||
if v is not None:
|
||||
return v
|
||||
return None
|
||||
|
||||
|
||||
def _map_stop_reason(raw: str | None) -> StopReason:
|
||||
"""Map Raycast ``finish_reason`` strings into Anthropic stop reasons.
|
||||
|
||||
Unknown values collapse to ``end_turn`` — Anthropic clients treat that
|
||||
as a clean finish, which is the right user-visible behaviour when the
|
||||
upstream simply went off-vocabulary.
|
||||
"""
|
||||
if raw is None:
|
||||
return "end_turn"
|
||||
return _RAYCAST_TO_ANTHROPIC_STOP.get(raw, "end_turn")
|
||||
|
||||
|
||||
def _as_mapping(block: object) -> Mapping[str, Any] | None:
|
||||
"""Narrow a block param (TypedDict | dict | anything) to a read-only mapping.
|
||||
|
||||
ty refuses to assign a TypedDict to ``dict[str, Any]`` (TypedDicts are
|
||||
not freely-mutable dicts in its model), and our access pattern is
|
||||
strictly read-only — so we go through ``Mapping``.
|
||||
"""
|
||||
if isinstance(block, Mapping):
|
||||
return cast("Mapping[str, Any]", block)
|
||||
return None
|
||||
|
||||
|
||||
def _extract_tool_result_text(content: object) -> str:
|
||||
"""Flatten an Anthropic ``tool_result`` block's content into plain text.
|
||||
|
||||
Anthropic accepts either a string or a list of typed blocks (text /
|
||||
image / etc.). Raycast only carries text in tool results, so we keep
|
||||
text blocks and JSON-encode anything richer rather than dropping it.
|
||||
"""
|
||||
if isinstance(content, str):
|
||||
return content
|
||||
if isinstance(content, list):
|
||||
parts: list[str] = []
|
||||
for block in content:
|
||||
block_map = _as_mapping(block)
|
||||
if block_map is not None and block_map.get("type") == "text":
|
||||
parts.append(str(block_map.get("text", "")))
|
||||
else:
|
||||
parts.append(
|
||||
json.dumps(block, separators=(",", ":"), ensure_ascii=False)
|
||||
)
|
||||
return "\n".join(parts)
|
||||
return json.dumps(content, separators=(",", ":"), ensure_ascii=False)
|
||||
|
||||
|
||||
def _to_raycast_messages(
|
||||
messages: Iterable[MessageParam],
|
||||
) -> list[RaycastMessage]:
|
||||
"""Convert an Anthropic message history into a Raycast one.
|
||||
|
||||
``tool_use_id → name`` is tracked across the iteration so that
|
||||
Raycast ``tool`` messages — which require a tool name the Anthropic
|
||||
side does not carry on ``tool_result`` blocks — can be reconstructed.
|
||||
"""
|
||||
out: list[RaycastMessage] = []
|
||||
tool_use_names: dict[str, str] = {}
|
||||
|
||||
for msg in messages:
|
||||
role = msg["role"]
|
||||
content = msg.get("content", "")
|
||||
|
||||
if isinstance(content, str):
|
||||
if role == "user":
|
||||
out.append(RaycastMessage.user(content))
|
||||
else:
|
||||
out.append(RaycastMessage.assistant(text=content))
|
||||
continue
|
||||
|
||||
if role == "user":
|
||||
text_parts: list[str] = []
|
||||
tool_results: list[Mapping[str, Any]] = []
|
||||
for block in content:
|
||||
block_map = _as_mapping(block)
|
||||
if block_map is None:
|
||||
continue
|
||||
btype = block_map.get("type")
|
||||
if btype == "text":
|
||||
text_parts.append(str(block_map.get("text", "")))
|
||||
elif btype == "tool_result":
|
||||
tool_results.append(block_map)
|
||||
if text_parts:
|
||||
out.append(RaycastMessage.user("\n".join(text_parts)))
|
||||
for tr in tool_results:
|
||||
tool_use_id = str(tr.get("tool_use_id", ""))
|
||||
out.append(
|
||||
RaycastMessage.tool(
|
||||
tool_call_id=tool_use_id,
|
||||
name=tool_use_names.get(tool_use_id, ""),
|
||||
result=_extract_tool_result_text(tr.get("content", "")),
|
||||
)
|
||||
)
|
||||
continue
|
||||
|
||||
text_parts = []
|
||||
tool_calls: list[ToolCall] = []
|
||||
for block in content:
|
||||
block_map = _as_mapping(block)
|
||||
if block_map is None:
|
||||
continue
|
||||
btype = block_map.get("type")
|
||||
if btype == "text":
|
||||
text_parts.append(str(block_map.get("text", "")))
|
||||
elif btype == "tool_use":
|
||||
tu_id = str(block_map.get("id", ""))
|
||||
tu_name = str(block_map.get("name", ""))
|
||||
tool_use_names[tu_id] = tu_name
|
||||
tool_calls.append(
|
||||
ToolCall(
|
||||
id=tu_id,
|
||||
name=tu_name,
|
||||
arguments=json.dumps(
|
||||
block_map.get("input", {}),
|
||||
separators=(",", ":"),
|
||||
ensure_ascii=False,
|
||||
),
|
||||
)
|
||||
)
|
||||
out.append(
|
||||
RaycastMessage.assistant(
|
||||
text="\n".join(text_parts),
|
||||
tool_calls=tool_calls or None,
|
||||
)
|
||||
)
|
||||
|
||||
return out
|
||||
|
||||
|
||||
def _build_tool_list(
|
||||
agent: RaycastAgent,
|
||||
) -> list[Tool | RemoteTool | str] | None:
|
||||
"""Wrap each native-tool name as a Raycast remote tool.
|
||||
|
||||
Phase 1.2 scope: only the three model-agnostic remote tools
|
||||
(`web_search`, `search_images`, `read_page`). Client-defined tools
|
||||
coming through the Anthropic body's ``tools`` field stay out — that's
|
||||
Phase 1.3+ alongside ``accept_client_tools``.
|
||||
"""
|
||||
if not agent.available_native_tools:
|
||||
return None
|
||||
return [Tool.remote(name) for name in agent.available_native_tools]
|
||||
|
||||
|
||||
class _BlockState:
|
||||
"""Tracks the currently-open Anthropic content block, if any.
|
||||
|
||||
Anthropic events are sequential per block (``content_block_start`` →
|
||||
deltas → ``content_block_stop``). Raycast streams text and tool_calls
|
||||
interleaved across chunks; we keep one slot open at a time and close
|
||||
it whenever the kind changes.
|
||||
|
||||
Tool-call routing needs two side tables because Raycast streams ids
|
||||
in phase 1 only and ``index`` in every chunk: ``tool_id_to_block``
|
||||
keys by Raycast tool-call id, ``tool_idx_to_id`` resolves chunk
|
||||
indices back to that id for the no-id delta chunks.
|
||||
"""
|
||||
|
||||
__slots__ = ("index", "kind", "tool_id_to_block", "tool_idx_to_id")
|
||||
|
||||
def __init__(self) -> None:
|
||||
self.index: int = -1
|
||||
self.kind: str | None = None # "text" | "thinking" | "tool_use" | None
|
||||
self.tool_id_to_block: dict[str, int] = {}
|
||||
self.tool_idx_to_id: dict[int, str] = {}
|
||||
|
||||
|
||||
class RaycastBackend:
|
||||
"""Adapter from ``raycast-api`` chat streams to Anthropic stream events.
|
||||
|
||||
Construction takes a long-lived :class:`raycast_api.Client` (one per
|
||||
gateway — bearer + device_id are process-wide). Each
|
||||
:meth:`complete` call opens one ``chat.stream`` and yields a fully
|
||||
Anthropic-shaped event sequence.
|
||||
"""
|
||||
|
||||
def __init__(self, client: Client) -> None:
|
||||
self._client = client
|
||||
|
||||
async def complete(
|
||||
self,
|
||||
*,
|
||||
agent: BaseAgent,
|
||||
messages: Iterable[MessageParam],
|
||||
system: str | None = None,
|
||||
**options: Any,
|
||||
) -> AsyncIterator[MessageStreamEvent]:
|
||||
if not isinstance(agent, RaycastAgent):
|
||||
msg = f"RaycastBackend requires RaycastAgent, got {type(agent).__name__}"
|
||||
raise TypeError(msg)
|
||||
|
||||
raycast_messages = _to_raycast_messages(messages)
|
||||
tools = _build_tool_list(agent)
|
||||
|
||||
# On the wire Raycast uses ``system_instructions`` as a format
|
||||
# marker (``"markdown"`` for AI_CHAT, ``"plain"`` otherwise —
|
||||
# filled in by the SDK from the source default when we pass
|
||||
# ``None``) and ``additional_system_instructions`` as the actual
|
||||
# prompt content. So our ``system_prompt`` (or the per-request
|
||||
# Anthropic ``system``, if present) flows into the *additional*
|
||||
# slot. The SDK still prepends ``<user-preferences>`` to whatever
|
||||
# we hand it via ``_build_preamble``.
|
||||
prompt_content = system if system is not None else agent.system_prompt
|
||||
|
||||
# Per-request options win over agent defaults; agent defaults
|
||||
# win over Raycast SDK defaults. ``None`` means "fall back".
|
||||
async for event in self._stream(
|
||||
agent=agent,
|
||||
raycast_messages=raycast_messages,
|
||||
tools=tools,
|
||||
prompt_content=prompt_content,
|
||||
temperature=_first_set(options.get("temperature"), agent.temperature),
|
||||
reasoning_effort=_first_set(
|
||||
options.get("reasoning_effort"), agent.reasoning_effort
|
||||
),
|
||||
tool_choice=_first_set(options.get("tool_choice"), agent.tool_choice),
|
||||
):
|
||||
yield event
|
||||
|
||||
async def _stream(
|
||||
self,
|
||||
*,
|
||||
agent: RaycastAgent,
|
||||
raycast_messages: Sequence[RaycastMessage],
|
||||
tools: list[Tool | RemoteTool | str] | None,
|
||||
prompt_content: str | None,
|
||||
temperature: float | None,
|
||||
reasoning_effort: str | None,
|
||||
tool_choice: str | None,
|
||||
) -> AsyncIterator[MessageStreamEvent]:
|
||||
message_id = f"msg_{uuid.uuid4().hex}"
|
||||
yield build_message_start(message_id=message_id, model=agent.model)
|
||||
|
||||
state = _BlockState()
|
||||
final_finish: str | None = None
|
||||
final_usage: dict[str, int] | None = None
|
||||
|
||||
stream = self._client.chat.stream(
|
||||
model=agent.model,
|
||||
messages=list(raycast_messages),
|
||||
source=agent.source,
|
||||
# ``system_instructions=None`` → SDK substitutes the source
|
||||
# default (``"markdown"`` / ``"plain"``). Real prompt goes
|
||||
# into ``additional_system_instructions``.
|
||||
additional_system_instructions=prompt_content,
|
||||
user_preferences=agent.user_preferences,
|
||||
tools=tools,
|
||||
tool_choice=tool_choice,
|
||||
temperature=temperature,
|
||||
reasoning_effort=reasoning_effort,
|
||||
)
|
||||
|
||||
async for chunk in stream:
|
||||
for event in self._handle_chunk(chunk, state):
|
||||
yield event
|
||||
if chunk.finish_reason:
|
||||
final_finish = chunk.finish_reason
|
||||
if chunk.usage:
|
||||
final_usage = chunk.usage
|
||||
|
||||
# Close whatever block is still open before the message delta.
|
||||
if state.kind is not None:
|
||||
yield build_content_block_stop(state.index)
|
||||
state.kind = None
|
||||
|
||||
yield build_message_delta(
|
||||
stop_reason=_map_stop_reason(final_finish),
|
||||
usage=final_usage,
|
||||
)
|
||||
yield build_message_stop()
|
||||
|
||||
def _handle_chunk(
|
||||
self, chunk: ChatStreamChunk, state: _BlockState
|
||||
) -> Iterable[MessageStreamEvent]:
|
||||
"""Translate one Raycast chunk into zero or more Anthropic events.
|
||||
|
||||
Branch order matters: we close the previous block kind before
|
||||
opening a new one (text → tool_use, tool_use → text, etc.) and we
|
||||
intentionally fall through ``tool_calls`` only if there's a real
|
||||
delta — the final-summary chunk re-sends the full arguments
|
||||
string we've already streamed delta-by-delta.
|
||||
"""
|
||||
events: list[MessageStreamEvent] = []
|
||||
is_final_summary = chunk.finish_reason is not None
|
||||
|
||||
if chunk.text:
|
||||
events.extend(self._ensure_kind(state, "text"))
|
||||
events.append(build_text_delta(state.index, chunk.text))
|
||||
|
||||
if chunk.reasoning:
|
||||
events.extend(self._ensure_kind(state, "thinking"))
|
||||
events.append(build_thinking_delta(state.index, chunk.reasoning))
|
||||
|
||||
if chunk.tool_calls:
|
||||
events.extend(
|
||||
self._handle_tool_calls(chunk, state, is_final_summary=is_final_summary)
|
||||
)
|
||||
|
||||
return events
|
||||
|
||||
def _ensure_kind(
|
||||
self, state: _BlockState, kind: str
|
||||
) -> Iterable[MessageStreamEvent]:
|
||||
"""Open a block of ``kind``, closing any current block first."""
|
||||
if state.kind == kind:
|
||||
return []
|
||||
events: list[MessageStreamEvent] = []
|
||||
if state.kind is not None:
|
||||
events.append(build_content_block_stop(state.index))
|
||||
state.index += 1
|
||||
state.kind = kind
|
||||
if kind == "text":
|
||||
events.append(build_text_block_start(state.index))
|
||||
elif kind == "thinking":
|
||||
events.append(build_thinking_block_start(state.index))
|
||||
else:
|
||||
msg = f"unexpected block kind: {kind!r}"
|
||||
raise ValueError(msg)
|
||||
return events
|
||||
|
||||
def _handle_tool_calls(
|
||||
self,
|
||||
chunk: ChatStreamChunk,
|
||||
state: _BlockState,
|
||||
*,
|
||||
is_final_summary: bool,
|
||||
) -> Iterable[MessageStreamEvent]:
|
||||
"""Translate one chunk's ``tool_calls`` payload into Anthropic events.
|
||||
|
||||
Mirrors the keying logic of ``raycast_api.ChatResult._merge_tool_calls``
|
||||
so the same dedupe behaviour lands here: tool calls are tracked
|
||||
by id when present, otherwise by their wire ``index`` field, and
|
||||
the final-summary chunk's arguments string is dropped because the
|
||||
deltas already streamed it.
|
||||
"""
|
||||
events: list[MessageStreamEvent] = []
|
||||
raw_tcs = chunk.raw.get("tool_calls") or []
|
||||
|
||||
for i, tc in enumerate(chunk.tool_calls or []):
|
||||
raw_tc = raw_tcs[i] if i < len(raw_tcs) else {}
|
||||
idx_field = raw_tc.get("index") if isinstance(raw_tc, dict) else None
|
||||
|
||||
# Resolve this entry to a tool-id key, mirroring
|
||||
# `raycast_api.ChatResult._merge_tool_calls`. Phase 1 carries
|
||||
# id+index, phase 2 only index, phase 3 only id.
|
||||
tool_id: str | None = None
|
||||
if tc.id:
|
||||
tool_id = tc.id
|
||||
if isinstance(idx_field, int):
|
||||
state.tool_idx_to_id[idx_field] = tc.id
|
||||
elif isinstance(idx_field, int):
|
||||
tool_id = state.tool_idx_to_id.get(idx_field)
|
||||
if tool_id is None:
|
||||
continue
|
||||
|
||||
block_idx = state.tool_id_to_block.get(tool_id)
|
||||
if block_idx is None:
|
||||
# New tool_use block. Close any open text/thinking block first.
|
||||
if state.kind is not None:
|
||||
events.append(build_content_block_stop(state.index))
|
||||
state.index += 1
|
||||
state.kind = "tool_use"
|
||||
block_idx = state.index
|
||||
state.tool_id_to_block[tool_id] = block_idx
|
||||
events.append(
|
||||
build_tool_use_block_start(
|
||||
block_idx, tool_use_id=tool_id, name=tc.name or ""
|
||||
)
|
||||
)
|
||||
if tc.arguments and not is_final_summary:
|
||||
events.append(build_input_json_delta(block_idx, tc.arguments))
|
||||
continue
|
||||
|
||||
# Existing block. Skip args on the final summary — they're a
|
||||
# full restatement of what's already been delta'd.
|
||||
if is_final_summary:
|
||||
continue
|
||||
if tc.arguments:
|
||||
events.append(build_input_json_delta(block_idx, tc.arguments))
|
||||
|
||||
return events
|
||||
+136
-9
@@ -1,28 +1,155 @@
|
||||
"""Process entrypoint.
|
||||
|
||||
Phase 0.3 — load the user's ``/config/config.py`` via
|
||||
``config_loader``, build registries, print the
|
||||
``loaded N agents, M mcps, K frontends`` line from the Phase 0 DoD,
|
||||
exit cleanly. Phase 0.4 will install uvloop and start uvicorn(s) for
|
||||
the frontends and the internal MCP app.
|
||||
Phase 1.4 — async ``main``: install uvloop, load the user config, build
|
||||
registries + per-agent backends (only ``RaycastBackend`` so far), wire
|
||||
each frontend with a ``GatewayRuntime``, and run all
|
||||
``frontend.serve()`` coroutines concurrently. Without any frontends we
|
||||
still print the Phase 0 DoD line and exit cleanly so the bare skeleton
|
||||
keeps working.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
import logging
|
||||
from contextlib import AsyncExitStack
|
||||
from typing import TYPE_CHECKING
|
||||
|
||||
import uvloop
|
||||
from raycast_api import Client as RaycastClient
|
||||
from raycast_api.config import Config as RaycastConfig
|
||||
|
||||
from beaver_gateway import config_loader
|
||||
from beaver_gateway.agents.raycast import RaycastAgent
|
||||
from beaver_gateway.backends.raycast import RaycastBackend
|
||||
from beaver_gateway.core.auth import TokenStore
|
||||
from beaver_gateway.core.registry import AgentRegistry, McpRegistry
|
||||
from beaver_gateway.frontends.base import GatewayRuntime
|
||||
from beaver_gateway.settings import Settings
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from beaver_gateway.backends.base import Backend
|
||||
|
||||
|
||||
_log = logging.getLogger("beaver_gateway.cli")
|
||||
|
||||
|
||||
def main() -> None:
|
||||
settings = Settings() # ty: ignore[missing-argument]
|
||||
"""Sync wrapper: uvloop loop factory + asyncio.run."""
|
||||
logging.basicConfig(
|
||||
level=logging.INFO, format="%(asctime)s %(levelname)s %(name)s: %(message)s"
|
||||
)
|
||||
asyncio.run(_async_main(), loop_factory=uvloop.new_event_loop)
|
||||
|
||||
|
||||
async def _async_main() -> None:
|
||||
settings = Settings() # ty: ignore[missing-argument]
|
||||
gateway = config_loader.load(settings.config_path)
|
||||
|
||||
agents = AgentRegistry(gateway.agents)
|
||||
mcps = McpRegistry(gateway.mcps)
|
||||
token_store = TokenStore.from_env(settings.bootstrap_tokens)
|
||||
|
||||
print(
|
||||
f"beaver-gateway: loaded {len(agents)} agents, "
|
||||
f"{len(mcps)} mcps, {len(gateway.frontends)} frontends"
|
||||
async with AsyncExitStack() as stack:
|
||||
backends: dict[str, Backend] = await _build_backends(
|
||||
settings=settings, agents=agents, stack=stack
|
||||
)
|
||||
|
||||
runtime = GatewayRuntime(
|
||||
agents=agents,
|
||||
mcps=mcps,
|
||||
backends=backends,
|
||||
token_store=token_store,
|
||||
)
|
||||
|
||||
for fe in gateway.frontends:
|
||||
fe.configure(runtime)
|
||||
|
||||
_log.info(
|
||||
"beaver-gateway: loaded %d agents, %d mcps, %d frontends",
|
||||
len(agents),
|
||||
len(mcps),
|
||||
len(gateway.frontends),
|
||||
)
|
||||
|
||||
# Keep the Phase 0 DoD line on stdout for grep-friendly smoke
|
||||
# tests, in addition to the structured log line above.
|
||||
print(
|
||||
f"beaver-gateway: loaded {len(agents)} agents, "
|
||||
f"{len(mcps)} mcps, {len(gateway.frontends)} frontends"
|
||||
)
|
||||
|
||||
if not gateway.frontends:
|
||||
return
|
||||
|
||||
async with asyncio.TaskGroup() as tg:
|
||||
for fe in gateway.frontends:
|
||||
tg.create_task(fe.serve())
|
||||
|
||||
|
||||
async def _build_backends(
|
||||
*,
|
||||
settings: Settings,
|
||||
agents: AgentRegistry,
|
||||
stack: AsyncExitStack,
|
||||
) -> dict[str, Backend]:
|
||||
"""Construct one backend per agent name.
|
||||
|
||||
The Raycast ``Client`` is shared across every ``RaycastAgent``
|
||||
(bearer + device-id are process-wide), so we open it lazily — only
|
||||
when at least one ``RaycastAgent`` is present — and close it via
|
||||
the caller's exit stack.
|
||||
"""
|
||||
backends: dict[str, Backend] = {}
|
||||
|
||||
raycast_agents = [a for a in agents if isinstance(a, RaycastAgent)]
|
||||
if raycast_agents:
|
||||
client = await _try_open_raycast_client(settings, stack)
|
||||
if client is not None:
|
||||
raycast_backend = RaycastBackend(client)
|
||||
for a in raycast_agents:
|
||||
backends[a.name] = raycast_backend
|
||||
|
||||
# Phase 2: ClaudeAgent → ClaudeCodeBackendAdapter goes here.
|
||||
|
||||
return backends
|
||||
|
||||
|
||||
async def _try_open_raycast_client(
|
||||
settings: Settings, stack: AsyncExitStack
|
||||
) -> RaycastClient | None:
|
||||
"""Open a ``raycast_api.Client`` if creds are available, else warn + skip.
|
||||
|
||||
Skipping is gentler than failing startup: the user can bring up the
|
||||
gateway, list their agents, and still hit a ``ClaudeAgent``; affected
|
||||
``RaycastAgent`` instances 503 with a specific message at request time.
|
||||
Missing ``raycast.json`` falls into the same bucket — first-run
|
||||
users won't have it yet.
|
||||
"""
|
||||
if not settings.raycast_bearer:
|
||||
_log.warning(
|
||||
"RaycastAgent present but RAYCAST_BEARER is unset — those agents will 503"
|
||||
)
|
||||
return None
|
||||
if not settings.raycast_device_id:
|
||||
_log.warning(
|
||||
"RaycastAgent present but RAYCAST_DEVICE_ID is unset — those agents "
|
||||
"will 503 (generate once with `python -c 'import secrets; "
|
||||
"print(secrets.token_hex(32))'`)"
|
||||
)
|
||||
return None
|
||||
if not settings.raycast_config_path.exists():
|
||||
_log.warning(
|
||||
"RaycastAgent present but %s is missing — those agents will 503",
|
||||
settings.raycast_config_path,
|
||||
)
|
||||
return None
|
||||
|
||||
config = RaycastConfig.load(settings.raycast_config_path)
|
||||
client = RaycastClient(
|
||||
config=config,
|
||||
bearer_token=settings.raycast_bearer,
|
||||
device_id=settings.raycast_device_id,
|
||||
locale=settings.raycast_locale,
|
||||
)
|
||||
return await stack.enter_async_context(client)
|
||||
|
||||
@@ -0,0 +1,105 @@
|
||||
"""Bearer-token verification (Phase 1.3 — in-memory only).
|
||||
|
||||
Phase 4 will replace this with a DB-backed store (PRD §8 ``tokens``
|
||||
table, Argon2 hashes, scopes, ``last_used_at`` batching). Until then,
|
||||
frontends authenticate callers against an in-memory ``{value: name}``
|
||||
dict seeded from the ``BOOTSTRAP_TOKENS`` env var.
|
||||
|
||||
Format::
|
||||
|
||||
BOOTSTRAP_TOKENS=cursor:s3cret,laptop:hunter2
|
||||
|
||||
The *name* side is for audit lines — :py:meth:`TokenStore.verify`
|
||||
returns it on hit so callers can attribute the request without
|
||||
exposing the raw secret. ``None`` means "no such token"; callers
|
||||
turn that into 401.
|
||||
|
||||
This module deliberately knows nothing about HTTP frameworks — it
|
||||
takes a raw token (or the verbatim ``Authorization`` header value)
|
||||
and returns a name-or-None. Frontends own the response shape.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import TYPE_CHECKING
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from collections.abc import Mapping
|
||||
|
||||
|
||||
class TokenStoreError(ValueError):
|
||||
"""Malformed ``BOOTSTRAP_TOKENS`` value."""
|
||||
|
||||
|
||||
class TokenStore:
|
||||
"""Constant-time-ish bearer verification over a static name→value map.
|
||||
|
||||
The store is keyed by value internally (``verify`` lookup is O(1))
|
||||
but constructed from a name→value mapping because that's how the
|
||||
user thinks about it — one human-readable label per caller.
|
||||
"""
|
||||
|
||||
__slots__ = ("_by_value",)
|
||||
|
||||
def __init__(self, tokens: Mapping[str, str]) -> None:
|
||||
by_value: dict[str, str] = {}
|
||||
for name, value in tokens.items():
|
||||
if not name or not value:
|
||||
msg = f"empty name or value in token map (name={name!r})"
|
||||
raise TokenStoreError(msg)
|
||||
if value in by_value:
|
||||
msg = (
|
||||
f"duplicate token value for names "
|
||||
f"{by_value[value]!r} and {name!r}"
|
||||
)
|
||||
raise TokenStoreError(msg)
|
||||
by_value[value] = name
|
||||
self._by_value = by_value
|
||||
|
||||
@classmethod
|
||||
def from_env(cls, raw: str) -> TokenStore:
|
||||
"""Parse ``name1:value1,name2:value2`` (the ``BOOTSTRAP_TOKENS`` form)."""
|
||||
tokens: dict[str, str] = {}
|
||||
for chunk in raw.split(","):
|
||||
entry = chunk.strip()
|
||||
if not entry:
|
||||
continue
|
||||
name, sep, value = entry.partition(":")
|
||||
if not sep:
|
||||
msg = f"token entry missing ':' separator: {entry!r}"
|
||||
raise TokenStoreError(msg)
|
||||
name, value = name.strip(), value.strip()
|
||||
if name in tokens:
|
||||
msg = f"duplicate token name: {name!r}"
|
||||
raise TokenStoreError(msg)
|
||||
tokens[name] = value
|
||||
return cls(tokens)
|
||||
|
||||
def verify(self, token: str | None) -> str | None:
|
||||
"""Return the token's name if known, else ``None``."""
|
||||
if not token:
|
||||
return None
|
||||
return self._by_value.get(token)
|
||||
|
||||
def verify_bearer(self, authorization: str | None) -> str | None:
|
||||
"""Strip the ``Bearer`` prefix (case-insensitive) then verify.
|
||||
|
||||
Accepts a bare token too — Cursor's MCP transport sometimes
|
||||
passes the raw value via ``?token=`` and reuses the same
|
||||
verifier; treating an unprefixed header as a bare token keeps
|
||||
both call sites on one method.
|
||||
"""
|
||||
if not authorization:
|
||||
return None
|
||||
head, sep, rest = authorization.partition(" ")
|
||||
token = rest.strip() if sep and head.lower() == "bearer" else authorization
|
||||
return self.verify(token)
|
||||
|
||||
def __len__(self) -> int:
|
||||
return len(self._by_value)
|
||||
|
||||
def __bool__(self) -> bool:
|
||||
return bool(self._by_value)
|
||||
|
||||
|
||||
__all__ = ["TokenStore", "TokenStoreError"]
|
||||
@@ -0,0 +1,175 @@
|
||||
"""Unified streaming-event protocol shared by every backend adapter.
|
||||
|
||||
We piggyback on :mod:`anthropic.types` rather than reinvent the wire format:
|
||||
``AnthropicMessagesFrontend`` ultimately serializes whatever a backend yields
|
||||
straight to SSE via :py:meth:`pydantic.BaseModel.model_dump_json`, so events
|
||||
must be valid Anthropic ``message_stream`` records. The aliases below give
|
||||
the rest of the codebase one import path; the ``build_*`` helpers keep the
|
||||
verbose constructors out of every adapter.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import TYPE_CHECKING, Literal
|
||||
|
||||
from anthropic.types import (
|
||||
InputJSONDelta,
|
||||
Message,
|
||||
MessageDeltaUsage,
|
||||
RawContentBlockDeltaEvent,
|
||||
RawContentBlockStartEvent,
|
||||
RawContentBlockStopEvent,
|
||||
RawMessageDeltaEvent,
|
||||
RawMessageStartEvent,
|
||||
RawMessageStopEvent,
|
||||
RawMessageStreamEvent,
|
||||
SignatureDelta,
|
||||
TextBlock,
|
||||
TextDelta,
|
||||
ThinkingBlock,
|
||||
ThinkingDelta,
|
||||
ToolUseBlock,
|
||||
Usage,
|
||||
)
|
||||
from anthropic.types.raw_message_delta_event import Delta as MessageDelta
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from collections.abc import Mapping
|
||||
|
||||
StopReason = Literal[
|
||||
"end_turn", "max_tokens", "stop_sequence", "tool_use", "pause_turn", "refusal"
|
||||
]
|
||||
"""Anthropic-canonical stop reasons. Backends map their native vocabulary into this."""
|
||||
|
||||
|
||||
MessageStreamEvent = RawMessageStreamEvent
|
||||
"""Single public alias for the event family yielded by ``Backend.complete``."""
|
||||
|
||||
|
||||
__all__ = [
|
||||
"MessageDelta",
|
||||
"MessageDeltaUsage",
|
||||
"MessageStreamEvent",
|
||||
"RawContentBlockDeltaEvent",
|
||||
"RawContentBlockStartEvent",
|
||||
"RawContentBlockStopEvent",
|
||||
"RawMessageDeltaEvent",
|
||||
"RawMessageStartEvent",
|
||||
"RawMessageStopEvent",
|
||||
"StopReason",
|
||||
"Usage",
|
||||
"build_content_block_stop",
|
||||
"build_input_json_delta",
|
||||
"build_message_delta",
|
||||
"build_message_start",
|
||||
"build_message_stop",
|
||||
"build_signature_delta",
|
||||
"build_text_block_start",
|
||||
"build_text_delta",
|
||||
"build_thinking_block_start",
|
||||
"build_thinking_delta",
|
||||
"build_tool_use_block_start",
|
||||
]
|
||||
|
||||
|
||||
def build_message_start(
|
||||
*, message_id: str, model: str, input_tokens: int = 0, output_tokens: int = 0
|
||||
) -> RawMessageStartEvent:
|
||||
"""Open a turn. Token counts get finalized later via ``message_delta``."""
|
||||
return RawMessageStartEvent(
|
||||
type="message_start",
|
||||
message=Message(
|
||||
id=message_id,
|
||||
type="message",
|
||||
role="assistant",
|
||||
model=model,
|
||||
content=[],
|
||||
stop_reason=None,
|
||||
stop_sequence=None,
|
||||
usage=Usage(input_tokens=input_tokens, output_tokens=output_tokens),
|
||||
),
|
||||
)
|
||||
|
||||
|
||||
def build_text_block_start(index: int) -> RawContentBlockStartEvent:
|
||||
return RawContentBlockStartEvent(
|
||||
type="content_block_start",
|
||||
index=index,
|
||||
content_block=TextBlock(type="text", text="", citations=None),
|
||||
)
|
||||
|
||||
|
||||
def build_text_delta(index: int, text: str) -> RawContentBlockDeltaEvent:
|
||||
return RawContentBlockDeltaEvent(
|
||||
type="content_block_delta",
|
||||
index=index,
|
||||
delta=TextDelta(type="text_delta", text=text),
|
||||
)
|
||||
|
||||
|
||||
def build_tool_use_block_start(
|
||||
index: int, *, tool_use_id: str, name: str
|
||||
) -> RawContentBlockStartEvent:
|
||||
return RawContentBlockStartEvent(
|
||||
type="content_block_start",
|
||||
index=index,
|
||||
content_block=ToolUseBlock(
|
||||
type="tool_use", id=tool_use_id, name=name, input={}
|
||||
),
|
||||
)
|
||||
|
||||
|
||||
def build_input_json_delta(index: int, partial_json: str) -> RawContentBlockDeltaEvent:
|
||||
return RawContentBlockDeltaEvent(
|
||||
type="content_block_delta",
|
||||
index=index,
|
||||
delta=InputJSONDelta(type="input_json_delta", partial_json=partial_json),
|
||||
)
|
||||
|
||||
|
||||
def build_thinking_block_start(index: int) -> RawContentBlockStartEvent:
|
||||
return RawContentBlockStartEvent(
|
||||
type="content_block_start",
|
||||
index=index,
|
||||
content_block=ThinkingBlock(type="thinking", thinking="", signature=""),
|
||||
)
|
||||
|
||||
|
||||
def build_thinking_delta(index: int, thinking: str) -> RawContentBlockDeltaEvent:
|
||||
return RawContentBlockDeltaEvent(
|
||||
type="content_block_delta",
|
||||
index=index,
|
||||
delta=ThinkingDelta(type="thinking_delta", thinking=thinking),
|
||||
)
|
||||
|
||||
|
||||
def build_signature_delta(index: int, signature: str) -> RawContentBlockDeltaEvent:
|
||||
return RawContentBlockDeltaEvent(
|
||||
type="content_block_delta",
|
||||
index=index,
|
||||
delta=SignatureDelta(type="signature_delta", signature=signature),
|
||||
)
|
||||
|
||||
|
||||
def build_content_block_stop(index: int) -> RawContentBlockStopEvent:
|
||||
return RawContentBlockStopEvent(type="content_block_stop", index=index)
|
||||
|
||||
|
||||
def build_message_delta(
|
||||
*,
|
||||
stop_reason: StopReason | None,
|
||||
usage: Mapping[str, int] | None = None,
|
||||
stop_sequence: str | None = None,
|
||||
) -> RawMessageDeltaEvent:
|
||||
"""Close a turn. ``usage`` is a partial dict (input/output tokens, cache_*)."""
|
||||
return RawMessageDeltaEvent(
|
||||
type="message_delta",
|
||||
delta=MessageDelta(stop_reason=stop_reason, stop_sequence=stop_sequence),
|
||||
usage=MessageDeltaUsage.model_validate(
|
||||
dict(usage) if usage else {"output_tokens": 0}
|
||||
),
|
||||
)
|
||||
|
||||
|
||||
def build_message_stop() -> RawMessageStopEvent:
|
||||
return RawMessageStopEvent(type="message_stop")
|
||||
@@ -2,6 +2,7 @@
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from beaver_gateway.frontends.base import Frontend
|
||||
from beaver_gateway.frontends.anthropic import AnthropicMessagesFrontend
|
||||
from beaver_gateway.frontends.base import Frontend, GatewayRuntime
|
||||
|
||||
__all__ = ["Frontend"]
|
||||
__all__ = ["AnthropicMessagesFrontend", "Frontend", "GatewayRuntime"]
|
||||
|
||||
@@ -0,0 +1,299 @@
|
||||
"""``POST /v1/messages`` frontend.
|
||||
|
||||
Exposes the gateway as an Anthropic-compatible Messages endpoint, so any
|
||||
client that already speaks Anthropic (Cursor, Cline, the official SDK,
|
||||
``curl``) can hit a configured agent by passing its name as ``model``.
|
||||
|
||||
The frontend is intentionally thin: it authenticates the bearer token,
|
||||
resolves ``body.model`` to an agent + its backend, and then either
|
||||
streams the backend's events straight to SSE or accumulates them into a
|
||||
single ``Message`` for ``stream=false`` callers. All provider quirks
|
||||
already live in the backend adapters; we don't translate here.
|
||||
|
||||
Phase 1.4 wires only ``RaycastAgent`` through ``RaycastBackend``;
|
||||
``ClaudeAgent`` lands in Phase 2 and will plug into the same dispatch
|
||||
table without changes to this module.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import logging
|
||||
from typing import TYPE_CHECKING, Any
|
||||
|
||||
from anthropic.types import (
|
||||
InputJSONDelta,
|
||||
Message,
|
||||
RawContentBlockDeltaEvent,
|
||||
RawContentBlockStartEvent,
|
||||
RawContentBlockStopEvent,
|
||||
RawMessageDeltaEvent,
|
||||
RawMessageStartEvent,
|
||||
SignatureDelta,
|
||||
TextBlock,
|
||||
TextDelta,
|
||||
ThinkingBlock,
|
||||
ThinkingDelta,
|
||||
ToolUseBlock,
|
||||
Usage,
|
||||
)
|
||||
from fastapi import FastAPI, HTTPException, Request, status
|
||||
from fastapi.responses import JSONResponse, StreamingResponse
|
||||
|
||||
from beaver_gateway.frontends.base import Frontend
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from collections.abc import AsyncIterator
|
||||
|
||||
from beaver_gateway.core.events import MessageStreamEvent
|
||||
from beaver_gateway.frontends.base import GatewayRuntime
|
||||
|
||||
|
||||
_log = logging.getLogger("beaver_gateway.frontends.anthropic")
|
||||
|
||||
|
||||
__all__ = ["AnthropicMessagesFrontend"]
|
||||
|
||||
|
||||
class AnthropicMessagesFrontend(Frontend):
|
||||
"""FastAPI app behind ``POST /v1/messages`` + ``GET /v1/models``."""
|
||||
|
||||
def __init__(self, *, host: str = "0.0.0.0", port: int = 8000) -> None: # noqa: S104
|
||||
self.host = host
|
||||
self.port = port
|
||||
self._runtime: GatewayRuntime | None = None
|
||||
self._app: FastAPI | None = None
|
||||
|
||||
def configure(self, runtime: GatewayRuntime) -> None:
|
||||
self._runtime = runtime
|
||||
self._app = self._build_app(runtime)
|
||||
|
||||
async def serve(self) -> None:
|
||||
# Local import: uvicorn pulls in a lot, no reason to load it when
|
||||
# something else (a test, a script) imports this module just for
|
||||
# the FastAPI factory.
|
||||
import uvicorn
|
||||
|
||||
if self._app is None:
|
||||
msg = "configure() must be called before serve()"
|
||||
raise RuntimeError(msg)
|
||||
|
||||
config = uvicorn.Config(
|
||||
self._app, host=self.host, port=self.port, log_level="info"
|
||||
)
|
||||
server = uvicorn.Server(config)
|
||||
await server.serve()
|
||||
|
||||
def _build_app(self, runtime: GatewayRuntime) -> FastAPI:
|
||||
app = FastAPI(title="beaver-gateway / Anthropic Messages")
|
||||
|
||||
@app.get("/healthz")
|
||||
async def healthz() -> dict[str, str]:
|
||||
return {"status": "ok"}
|
||||
|
||||
@app.get("/v1/models")
|
||||
async def list_models(request: Request) -> dict[str, Any]:
|
||||
_require_token(request, runtime)
|
||||
data = [
|
||||
{
|
||||
"type": "model",
|
||||
"id": a.name,
|
||||
"display_name": a.name,
|
||||
"created_at": None,
|
||||
}
|
||||
for a in runtime.agents
|
||||
]
|
||||
return {"data": data, "has_more": False, "first_id": None, "last_id": None}
|
||||
|
||||
@app.post("/v1/messages")
|
||||
async def create_message(request: Request) -> Any:
|
||||
token_name = _require_token(request, runtime)
|
||||
try:
|
||||
body = await request.json()
|
||||
except json.JSONDecodeError as exc:
|
||||
raise HTTPException(
|
||||
status.HTTP_400_BAD_REQUEST, f"invalid JSON: {exc}"
|
||||
) from exc
|
||||
|
||||
model = body.get("model")
|
||||
if not isinstance(model, str):
|
||||
raise HTTPException(
|
||||
status.HTTP_400_BAD_REQUEST, "missing or non-string `model`"
|
||||
)
|
||||
|
||||
agent = runtime.agents.get(model)
|
||||
if agent is None:
|
||||
raise HTTPException(
|
||||
status.HTTP_404_NOT_FOUND, f"unknown agent: {model!r}"
|
||||
)
|
||||
|
||||
backend = runtime.backends.get(agent.name)
|
||||
if backend is None:
|
||||
# Agent exists in config but its backend isn't wired in
|
||||
# this phase (e.g. ClaudeAgent before Phase 2, or a
|
||||
# RaycastAgent without RAYCAST_BEARER set at startup).
|
||||
raise HTTPException(
|
||||
status.HTTP_503_SERVICE_UNAVAILABLE,
|
||||
f"no backend configured for agent {agent.name!r}",
|
||||
)
|
||||
|
||||
messages = body.get("messages") or []
|
||||
system = body.get("system")
|
||||
stream_flag = bool(body.get("stream", False))
|
||||
|
||||
_log.info(
|
||||
"messages: actor=%s agent=%s stream=%s msgs=%d",
|
||||
token_name,
|
||||
agent.name,
|
||||
stream_flag,
|
||||
len(messages),
|
||||
)
|
||||
|
||||
# Forward per-request knobs the Anthropic body may carry —
|
||||
# backend adapters layer these over per-agent defaults. Only
|
||||
# values explicitly present (not Anthropic-defaulted ones we
|
||||
# never received) are forwarded, so the agent's default still
|
||||
# wins when the caller omits the field.
|
||||
options: dict[str, Any] = {}
|
||||
if isinstance(body.get("temperature"), int | float):
|
||||
options["temperature"] = body["temperature"]
|
||||
|
||||
events = backend.complete(
|
||||
agent=agent,
|
||||
messages=messages,
|
||||
system=system if isinstance(system, str) else None,
|
||||
**options,
|
||||
)
|
||||
|
||||
if stream_flag:
|
||||
return StreamingResponse(
|
||||
_sse(events), media_type="text/event-stream"
|
||||
)
|
||||
message = await _accumulate(events, model=model)
|
||||
return JSONResponse(content=message.model_dump(mode="json"))
|
||||
|
||||
return app
|
||||
|
||||
|
||||
def _require_token(request: Request, runtime: GatewayRuntime) -> str:
|
||||
"""Verify the request's bearer and return the token's audit name.
|
||||
|
||||
Accepts both ``X-Api-Key: <token>`` (what the official Anthropic
|
||||
SDK sends — LibreChat, the CLI, third-party clients) and
|
||||
``Authorization: Bearer <token>`` (curl, Cursor). Raises 401 on
|
||||
miss. ``TokenStore`` doesn't know about HTTP, so response shape
|
||||
is owned here.
|
||||
"""
|
||||
api_key = request.headers.get("x-api-key")
|
||||
name = (
|
||||
runtime.token_store.verify(api_key)
|
||||
if api_key
|
||||
else runtime.token_store.verify_bearer(request.headers.get("authorization"))
|
||||
)
|
||||
if name is None:
|
||||
raise HTTPException(
|
||||
status.HTTP_401_UNAUTHORIZED,
|
||||
"invalid or missing bearer token",
|
||||
headers={"WWW-Authenticate": "Bearer"},
|
||||
)
|
||||
return name
|
||||
|
||||
|
||||
async def _sse(events: AsyncIterator[MessageStreamEvent]) -> AsyncIterator[bytes]:
|
||||
r"""Serialize an event stream into Anthropic's ``text/event-stream`` form.
|
||||
|
||||
Each event becomes ``event: <type>\ndata: <json>\n\n`` — the shape
|
||||
the Anthropic SDK's SSE decoder expects. Errors mid-stream are
|
||||
swallowed into a synthetic ``error`` event so the client sees the
|
||||
failure rather than a hung connection.
|
||||
"""
|
||||
try:
|
||||
async for ev in events:
|
||||
payload = ev.model_dump_json()
|
||||
yield f"event: {ev.type}\ndata: {payload}\n\n".encode()
|
||||
except Exception as exc: # noqa: BLE001
|
||||
_log.exception("backend stream failed")
|
||||
err = json.dumps(
|
||||
{"type": "error", "error": {"type": "api_error", "message": str(exc)}}
|
||||
)
|
||||
yield f"event: error\ndata: {err}\n\n".encode()
|
||||
|
||||
|
||||
async def _accumulate(
|
||||
events: AsyncIterator[MessageStreamEvent], *, model: str
|
||||
) -> Message:
|
||||
"""Collapse a stream-event sequence into one ``Message`` (``stream=false``).
|
||||
|
||||
Mirrors the Anthropic SDK's own accumulator: walk events, build
|
||||
block dicts indexed by their ``content_block`` index, fold text /
|
||||
thinking deltas in, buffer ``input_json_delta`` chunks until the
|
||||
block closes (then JSON-parse them once).
|
||||
"""
|
||||
message_id = ""
|
||||
role = "assistant"
|
||||
usage = Usage(input_tokens=0, output_tokens=0)
|
||||
blocks: dict[int, dict[str, Any]] = {}
|
||||
json_buffers: dict[int, str] = {}
|
||||
stop_reason: str | None = None
|
||||
stop_sequence: str | None = None
|
||||
|
||||
async for ev in events:
|
||||
# isinstance, not ``ev.type == "..."``: ty narrows on the
|
||||
# discriminator only via the class, and the raw event union
|
||||
# carries its own discriminators (``Raw*Event``) the SDK
|
||||
# already promises.
|
||||
if isinstance(ev, RawMessageStartEvent):
|
||||
message_id = ev.message.id
|
||||
role = ev.message.role
|
||||
usage = ev.message.usage
|
||||
elif isinstance(ev, RawContentBlockStartEvent):
|
||||
blocks[ev.index] = ev.content_block.model_dump()
|
||||
if blocks[ev.index].get("type") == "tool_use":
|
||||
json_buffers[ev.index] = ""
|
||||
elif isinstance(ev, RawContentBlockDeltaEvent):
|
||||
blk = blocks.setdefault(ev.index, {})
|
||||
delta = ev.delta
|
||||
if isinstance(delta, TextDelta):
|
||||
blk["text"] = blk.get("text", "") + delta.text
|
||||
elif isinstance(delta, InputJSONDelta):
|
||||
json_buffers[ev.index] = (
|
||||
json_buffers.get(ev.index, "") + delta.partial_json
|
||||
)
|
||||
elif isinstance(delta, ThinkingDelta):
|
||||
blk["thinking"] = blk.get("thinking", "") + delta.thinking
|
||||
elif isinstance(delta, SignatureDelta):
|
||||
blk["signature"] = delta.signature
|
||||
elif isinstance(ev, RawContentBlockStopEvent):
|
||||
blk = blocks.get(ev.index, {})
|
||||
if blk.get("type") == "tool_use":
|
||||
raw = json_buffers.pop(ev.index, "")
|
||||
blk["input"] = json.loads(raw) if raw.strip() else {}
|
||||
elif isinstance(ev, RawMessageDeltaEvent):
|
||||
stop_reason = ev.delta.stop_reason
|
||||
stop_sequence = ev.delta.stop_sequence
|
||||
if ev.usage.output_tokens:
|
||||
usage = Usage.model_validate(
|
||||
{**usage.model_dump(), "output_tokens": ev.usage.output_tokens}
|
||||
)
|
||||
|
||||
content = []
|
||||
for idx in sorted(blocks):
|
||||
bd = blocks[idx]
|
||||
btype = bd.get("type")
|
||||
if btype == "text":
|
||||
content.append(TextBlock.model_validate(bd))
|
||||
elif btype == "tool_use":
|
||||
content.append(ToolUseBlock.model_validate(bd))
|
||||
elif btype == "thinking":
|
||||
content.append(ThinkingBlock.model_validate(bd))
|
||||
|
||||
return Message(
|
||||
id=message_id or "msg_unknown",
|
||||
type="message",
|
||||
role=role,
|
||||
model=model,
|
||||
content=content,
|
||||
stop_reason=stop_reason, # type: ignore[arg-type]
|
||||
stop_sequence=stop_sequence,
|
||||
usage=usage,
|
||||
)
|
||||
@@ -1,24 +1,46 @@
|
||||
"""Frontend ABC.
|
||||
"""Frontend ABC + the runtime context handed to ``configure``.
|
||||
|
||||
A frontend is anything that listens on a port and routes inbound traffic
|
||||
into the agent/MCP registries. ``configure`` is called once after the
|
||||
``Gateway`` is built; ``serve`` runs the listening loop.
|
||||
into the gateway. ``GatewayRuntime`` carries everything a frontend may
|
||||
need that isn't user-config: built registries, per-agent backends, and
|
||||
the in-memory token store. The user's ``/config/config.py`` defines a
|
||||
``Gateway`` (lists); ``cli.main`` turns that into a ``GatewayRuntime``
|
||||
and hands it to each frontend's ``configure``.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from abc import ABC, abstractmethod
|
||||
from dataclasses import dataclass
|
||||
from typing import TYPE_CHECKING
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from beaver_gateway.core.registry import Gateway
|
||||
from beaver_gateway.backends.base import Backend
|
||||
from beaver_gateway.core.auth import TokenStore
|
||||
from beaver_gateway.core.registry import AgentRegistry, McpRegistry
|
||||
|
||||
|
||||
@dataclass(frozen=True, slots=True)
|
||||
class GatewayRuntime:
|
||||
"""Post-build state of the gateway, shared with every frontend.
|
||||
|
||||
Backends are keyed by **agent name**, not type — one ``RaycastBackend``
|
||||
instance can serve many ``RaycastAgent`` instances, but the lookup
|
||||
site (an inbound request with ``model=<agent.name>``) already has
|
||||
the name in hand, so the indirection lives one step earlier.
|
||||
"""
|
||||
|
||||
agents: AgentRegistry
|
||||
mcps: McpRegistry
|
||||
backends: dict[str, Backend]
|
||||
token_store: TokenStore
|
||||
|
||||
|
||||
class Frontend(ABC):
|
||||
"""Listens on a port, dispatches into the gateway."""
|
||||
|
||||
@abstractmethod
|
||||
def configure(self, gateway: Gateway) -> None: ...
|
||||
def configure(self, runtime: GatewayRuntime) -> None: ...
|
||||
|
||||
@abstractmethod
|
||||
async def serve(self) -> None: ...
|
||||
|
||||
@@ -7,6 +7,7 @@ discriminated-union members so downstream code can ``match`` on ``kind``.
|
||||
from __future__ import annotations
|
||||
|
||||
from collections.abc import Callable # noqa: TC003 — runtime use by pydantic
|
||||
from pathlib import Path
|
||||
from typing import TYPE_CHECKING, Annotated, Literal
|
||||
|
||||
from pydantic import BaseModel, ConfigDict, Field
|
||||
@@ -27,7 +28,7 @@ class StdioMcp(_BaseMcp):
|
||||
kind: Literal["stdio"] = "stdio"
|
||||
command: tuple[str, ...]
|
||||
env: dict[str, str] | None = None
|
||||
cwd: str | None = None
|
||||
cwd: Path | None = None
|
||||
|
||||
|
||||
class HttpMcp(_BaseMcp):
|
||||
@@ -60,9 +61,14 @@ class McpServer:
|
||||
name: str,
|
||||
command: Iterable[str],
|
||||
env: dict[str, str] | None = None,
|
||||
cwd: str | None = None,
|
||||
cwd: Path | str | None = None,
|
||||
) -> StdioMcp:
|
||||
return StdioMcp(name=name, command=tuple(command), env=env, cwd=cwd)
|
||||
return StdioMcp(
|
||||
name=name,
|
||||
command=tuple(command),
|
||||
env=env,
|
||||
cwd=Path(cwd) if isinstance(cwd, str) else cwd,
|
||||
)
|
||||
|
||||
@classmethod
|
||||
def http(
|
||||
|
||||
@@ -29,3 +29,18 @@ class Settings(BaseSettings):
|
||||
|
||||
raycast_bearer: str | None = None
|
||||
raycast_config_path: Path = Path("/config/raycast.json")
|
||||
raycast_device_id: str | None = None
|
||||
"""64-hex-char stable per-install id. Required when any ``RaycastAgent``
|
||||
is configured — generate once via ``secrets.token_hex(32)`` and keep
|
||||
in ``.env`` so Raycast doesn't see every restart as a new device."""
|
||||
|
||||
raycast_locale: str = "en-US"
|
||||
"""``Accept-Language`` header sent by the shared ``raycast_api.Client``
|
||||
and the default locale used to render auto ``UserPreferences``. One
|
||||
value per gateway since we open one Client total."""
|
||||
|
||||
bootstrap_tokens: str = ""
|
||||
"""Phase 1.3 seed: ``name1:value1,name2:value2``. Empty = no auth wired yet.
|
||||
|
||||
Phase 4 moves tokens into the DB. Until then this is the only source.
|
||||
"""
|
||||
|
||||
Reference in New Issue
Block a user