feat: add admin panel

This commit is contained in:
h
2026-05-20 13:00:08 +02:00
parent 7970d4be9b
commit 0128191ac3
26 changed files with 2985 additions and 115 deletions
+1
View File
@@ -17,3 +17,4 @@ docs/
# Local env
.env
db.db
+35 -4
View File
@@ -12,6 +12,7 @@ from beaver_gateway.agents.base import ExposedMcp
from beaver_gateway.agents.claude import ClaudeAgent
from beaver_gateway.agents.raycast import RaycastAgent, RemoteTool, UserPreferences
from beaver_gateway.core.registry import Gateway
from beaver_gateway.frontends.admin import AdminFrontend
from beaver_gateway.frontends.anthropic import AnthropicMessagesFrontend
from beaver_gateway.frontends.mcp_server import McpServerFrontend
from beaver_gateway.mcp.types import McpServer
@@ -28,6 +29,7 @@ def current_time() -> str:
return datetime.now().astimezone().isoformat()
gateway = Gateway(
agents=[
# Phase 2.2 — ClaudeCodeBackendAdapter routes this agent's
@@ -98,7 +100,7 @@ gateway = Gateway(
# ClaudeCode adapter forwards that URL into
# ``BackendOptions.mcp_servers``. Phase 3's ``McpServerFrontend``
# reverse-proxies the same internal URL out to external clients.
McpServer.python_tool(name="time", tools=[current_time]),
McpServer.python_tool(name="time", tools=[current_time])
# Phase 3 — illustrates the ``lenient`` flag. Real-world stdio MCPs
# sometimes print "Processing..." or other chatter to stdout before
# their actual JSON-RPC frames; the default mcp client forwards
@@ -124,12 +126,41 @@ gateway = Gateway(
# Phase 1.4 — expose the agents as `model=<name>` on an
# Anthropic-compatible Messages endpoint. Auth comes from
# `BOOTSTRAP_TOKENS` in the env (`name1:value1,name2:value2`).
#
# Behind a reverse proxy (Caddy / nginx / Cloudflare) pass
# `public_base_url=` so the admin dashboard advertises the
# outside URL instead of `host:port`. Caddy strips its own
# prefix and the frontend's internal paths (`/v1/messages`,
# `/v1/models`) get appended:
# Caddy: handle_path /ai/* { reverse_proxy localhost:8000 }
# Config: AnthropicMessagesFrontend(
# port=8000,
# public_base_url="https://domain.com/ai")
# Result: https://domain.com/ai/v1/messages
AnthropicMessagesFrontend(host="0.0.0.0", port=8000),
# Phase 3 — re-exposes every declared `McpServer` outside the
# gateway with bearer auth + audit log. Per-namespace endpoints
# at `/mcp/<name>/`; flat bundle at `/mcp/all/`. Discovery page
# (HTML, auth-gated) at `/` with copy-pastable Cursor / Claude
# gateway with bearer auth + audit log. Each namespace lives
# at `/<name>/` on this port (the port itself disambiguates
# MCP traffic — no extra `/mcp` segment in the route); a flat
# bundle is published at `/all/`. Discovery page (HTML,
# auth-gated) at `/` with copy-pastable Cursor / Claude
# Desktop snippets. Auth re-uses `BOOTSTRAP_TOKENS`.
#
# Same `public_base_url=` knob as above. Caddy strips its
# prefix; the frontend's `/<name>/` segment gets appended:
# Caddy: handle_path /mcp/* { reverse_proxy localhost:8001 }
# Config: McpServerFrontend(
# port=8001,
# public_base_url="https://domain.com/mcp")
# Result: https://domain.com/mcp/<name>/ (and /mcp/all/)
McpServerFrontend(host="0.0.0.0", port=8001),
# Phase 4.3 — browser admin UI. Creds come from
# `ADMIN_USER`/`ADMIN_PASS`; the session cookie is signed with
# `SESSION_SECRET`. Use it to mint tokens (Argon2-hashed in
# the DB), revoke them, and watch the audit log. Scope is
# enforced on the bearer frontends: tokens minted with scope
# `messages` only work on `/v1/messages`; `mcp` only on
# `/mcp/<name>`; `*` works everywhere.
AdminFrontend(host="0.0.0.0", port=8002),
],
)
+5
View File
@@ -24,6 +24,11 @@ services:
# config.py declares one, so set these (or remove the agent)
# before exposing port 8000.
ports:
# /v1/messages frontend
- "8000:8000"
# MCP server frontend
- "8001:8001"
# Admin UI (Phase 4.3) — change ADMIN_USER/ADMIN_PASS/SESSION_SECRET
- "8002:8002"
volumes:
- ./config.py:/config/config.py:ro
+2
View File
@@ -8,11 +8,13 @@ authors = [
requires-python = ">=3.13"
dependencies = [
"aiohttp>=3.13.5",
"aiosqlite>=0.22.1",
"anthropic>=0.103.0",
"anyio>=4.13.0",
"argon2-cffi>=25.1.0",
"fastapi>=0.136.1",
"fastmcp>=3.3.1",
"greenlet>=3.5.0",
"itsdangerous>=2.2.0",
"jinja2>=3.1.6",
"psycopg[binary]>=3.3.4",
+10
View File
@@ -0,0 +1,10 @@
{
"venvPath": ".",
"venv": ".venv",
"pythonVersion": "3.13",
"include": ["src"],
"exclude": [".venv", "**/__pycache__", ".pytest_cache"],
"extraPaths": ["."],
"reportMissingImports": "error",
"reportMissingTypeStubs": "none"
}
+23 -4
View File
@@ -40,6 +40,7 @@ from beaver_gateway.core.registry import AgentRegistry, McpRegistry
from beaver_gateway.frontends.base import GatewayRuntime
from beaver_gateway.mcp.internal_app import build_internal_app
from beaver_gateway.settings import Settings
from beaver_gateway.storage import Database
if TYPE_CHECKING:
from starlette.applications import Starlette
@@ -65,9 +66,24 @@ async def _async_main() -> None:
agents = AgentRegistry(gateway.agents)
mcps = McpRegistry(gateway.mcps)
token_store = TokenStore.from_env(settings.bootstrap_tokens)
# Phase 4.1 — open the async DB and run create_all once. Engine
# pool is process-wide; ``dispose()`` after the TaskGroup unwinds.
db = Database(settings.database_url)
await db.create_all()
# Phase 4.2 — TokenStore now reads from the DB (in-memory cache
# primed at start, TTL-refreshed, last_used_at flushed by a
# background task). BOOTSTRAP_TOKENS layers on top so first-run /
# examples still work without DB writes.
token_store = TokenStore(
db, bootstrap=TokenStore.parse_bootstrap(settings.bootstrap_tokens)
)
async with AsyncExitStack() as stack:
stack.push_async_callback(db.dispose)
await token_store.start()
stack.push_async_callback(token_store.stop)
# Internal MCP URLs must exist before we construct any
# ClaudeCodeBackendAdapter — adapters bake the URLs into their
# ``BackendOptions.mcp_servers`` at construction time.
@@ -87,7 +103,12 @@ async def _async_main() -> None:
mcps=mcps,
backends=backends,
token_store=token_store,
db=db,
mcp_internal_urls=internal_urls,
admin_user=settings.admin_user,
admin_pass=settings.admin_pass,
session_secret=settings.session_secret,
frontends=tuple(gateway.frontends),
)
for fe in gateway.frontends:
@@ -115,9 +136,7 @@ async def _async_main() -> None:
async with asyncio.TaskGroup() as tg:
if internal_app is not None:
tg.create_task(
_serve_internal_mcp(internal_app, settings=settings)
)
tg.create_task(_serve_internal_mcp(internal_app, settings=settings))
for fe in gateway.frontends:
tg.create_task(fe.serve())
+93
View File
@@ -0,0 +1,93 @@
"""Single entry point for writing :class:`AuditLog` rows.
Every frontend ends up needing the same three-line pattern — open a DB
session, append a row, swallow failures so the user-visible request
still succeeds. Phase 4.3 inlined that pattern in the admin frontend
under a private ``_audit()`` helper; Phase 4.4 lifts it here so the
Messages and MCP frontends can call the same function and so the
swallow-and-log policy lives in one place.
The contract:
* ``log(runtime, actor=..., kind=...)`` is fire-and-forget. It awaits
the DB write (so callers can ``await`` it before responding and get
ordering), but never raises — if the audit insert fails, the function
emits an ``exception`` log line and returns.
* ``actor`` is a free-form string. By convention: ``"token:<name>"``
for bearer-authenticated traffic, ``"admin:<user>"`` for admin-UI
actions, ``"anon"`` for failed-auth paths we still want to record.
* ``kind`` is a short tag — see :data:`KNOWN_KINDS` for the set the
current frontends emit; new tags don't need a code change here, the
column is free-form.
* ``**detail`` is JSON-serialised by :func:`append_audit`. Keep it
small: paths, methods, status codes — not request bodies. Anything
passed here lands in ``AuditLog.detail_json`` verbatim.
Why a thin wrapper rather than ``append_audit`` directly: callers want
"write if you can, otherwise carry on", and pulling the try/except into
every frontend was already starting to drift (admin had it, bearer
frontends would have copy-pasted). One module, one policy.
"""
from __future__ import annotations
import logging
from typing import TYPE_CHECKING, Any
from beaver_gateway.storage import append_audit
if TYPE_CHECKING:
from beaver_gateway.frontends.base import GatewayRuntime
_log = logging.getLogger("beaver_gateway.audit")
# Tags currently emitted by the gateway. The set is informational —
# ``AuditLog.kind`` is free-form so new code can introduce new tags
# without touching this list — but listing them here gives the admin UI
# and any downstream log consumers one canonical reference.
KNOWN_KINDS: frozenset[str] = frozenset(
{
"messages", # POST /v1/messages accepted
"mcp_call", # /mcp/<ns>/... proxied
"login_ok",
"login_failed",
"logout",
"token_create",
"token_revoke",
}
)
async def log(
runtime: GatewayRuntime,
*,
actor: str,
kind: str,
agent_name: str | None = None,
**detail: Any,
) -> None:
"""Best-effort audit insert. Never raises.
Opens its own short-lived :class:`AsyncSession` so callers don't
have to thread one through. If the DB hiccups (table missing,
disk full, connection drop), we log and move on — the audit trail
is observability, not a hard precondition for serving the request.
"""
try:
async with runtime.db.session() as session:
await append_audit(
session,
actor=actor,
kind=kind,
agent_name=agent_name,
detail=detail or None,
)
except Exception: # noqa: BLE001
_log.exception(
"audit write failed: actor=%s kind=%s agent=%s", actor, kind, agent_name
)
__all__ = ["KNOWN_KINDS", "log"]
+305 -36
View File
@@ -1,63 +1,195 @@
"""Bearer-token verification (Phase 1.3 — in-memory only).
"""Bearer-token verification (Phase 4.2 — DB-backed with in-memory cache).
Phase 4 will replace this with a DB-backed store (PRD §8 ``tokens``
table, Argon2 hashes, scopes, ``last_used_at`` batching). Until then,
frontends authenticate callers against an in-memory ``{value: name}``
dict seeded from the ``BOOTSTRAP_TOKENS`` env var.
The store is fed by two sources:
Format::
1. **DB** (``Token`` table from Phase 4.1) — the primary source. Rows
carry Argon2id hashes; the admin UI (Phase 4.3) will be the only
writer at steady state.
2. **`BOOTSTRAP_TOKENS`** env — a name→plaintext map kept around for
first-run, disaster-recovery, and ``examples/`` smoke tests. These
entries live alongside DB rows in the cache and are never persisted.
BOOTSTRAP_TOKENS=cursor:s3cret,laptop:hunter2
Hot path is in-memory: at :meth:`start` we pull every non-revoked DB
row and stash it in a list; subsequent :meth:`verify` calls re-load
when the cache is older than ``ttl_seconds``. ``last_used_at`` updates
are coalesced into a small dict and flushed by a background task every
``flush_interval`` seconds — one transaction per flush rather than one
per request.
The *name* side is for audit lines — :py:meth:`TokenStore.verify`
returns it on hit so callers can attribute the request without
exposing the raw secret. ``None`` means "no such token"; callers
turn that into 401.
We can't index DB rows by a derived plaintext key because Argon2 salts
are random — so verify does a linear scan over candidates, calling
``argon2.PasswordHasher.verify`` on each. N is small by design (single
operator, ~10 tokens at most); the cost is irrelevant. The scan runs
through ``asyncio.to_thread`` to keep the event loop free of the ~50ms
KDF block.
This module deliberately knows nothing about HTTP frameworks — it
takes a raw token (or the verbatim ``Authorization`` header value)
and returns a name-or-None. Frontends own the response shape.
The module knows nothing about HTTP frameworks. It takes a raw token
(or a verbatim ``Authorization`` header value) and returns a
:class:`TokenIdentity` (name + scope + db-id), or ``None`` for a miss.
Frontends own the 401 response shape.
"""
from __future__ import annotations
import asyncio
import contextlib
import hmac
import logging
import time
from dataclasses import dataclass
from datetime import UTC, datetime
from typing import TYPE_CHECKING
from argon2 import PasswordHasher
from argon2.exceptions import InvalidHashError, VerifyMismatchError
from beaver_gateway.storage import list_active_tokens, touch_token
if TYPE_CHECKING:
from collections.abc import Mapping
from beaver_gateway.storage import Database
_log = logging.getLogger("beaver_gateway.auth")
_BOOTSTRAP_SCOPE = "*"
class TokenStoreError(ValueError):
"""Malformed ``BOOTSTRAP_TOKENS`` value."""
"""Malformed ``BOOTSTRAP_TOKENS`` value or duplicate token."""
VALID_SCOPES: frozenset[str] = frozenset({"*", "messages", "mcp", "admin"})
"""The scopes a ``Token.scope`` may hold (Phase 4.3 admin UI enforces).
* ``*`` — wildcard, may use any frontend
* ``messages`` — Anthropic Messages frontend only
* ``mcp`` — MCP server frontend only
* ``admin`` — reserved for programmatic admin access; the AdminFrontend
itself authenticates via session cookies, not bearer tokens, so this
scope is unused today and kept for forward compatibility.
"""
@dataclass(frozen=True, slots=True)
class TokenIdentity:
"""What :meth:`TokenStore.verify` resolves to on success.
``token_id`` is the DB row id for persisted tokens, or ``None`` for
an env-bootstrap match (those have no DB row to touch). ``scope``
gates which frontend the token may hit (see :data:`VALID_SCOPES`);
bootstrap tokens implicitly get ``"*"``.
"""
name: str
scope: str
token_id: int | None
def allows(self, required: str) -> bool:
"""``True`` when this identity may access a route gated by ``required``.
``"*"`` is the wildcard; an exact match satisfies a single scope.
Unknown ``required`` values intentionally fall through to a
strict equality check — callers should pass one of
:data:`VALID_SCOPES`.
"""
return self.scope in ("*", required)
@dataclass(frozen=True, slots=True)
class _CachedToken:
"""One non-revoked row, copied out of the DB into the hot-path cache."""
id: int
name: str
scope: str
hashed_value: str
# Default Argon2id parameters from ``argon2-cffi`` are fine for our scope.
# They target ~50ms on a modern CPU — enough to make a stolen-hash brute
# force expensive, cheap enough to verify a handful per request.
_HASHER = PasswordHasher()
def hash_token(plaintext: str) -> str:
"""Return an Argon2id hash for ``plaintext`` (admin / seed-only path).
Phase 4.3 will call this when the admin creates a token; Phase 4.2
exposes it so smoke scripts can seed the DB without re-implementing
the same line.
"""
return _HASHER.hash(plaintext)
class TokenStore:
"""Constant-time-ish bearer verification over a static name→value map.
"""DB-backed verifier with in-memory cache + TTL + batched touches.
The store is keyed by value internally (``verify`` lookup is O(1))
but constructed from a name→value mapping because that's how the
user thinks about it — one human-readable label per caller.
Construct in ``cli.main`` after :class:`Database` is up, then
``await store.start()`` to prime the cache and spin up the flusher
task. ``await store.stop()`` on shutdown drains the touch queue.
Bootstrap entries (from ``BOOTSTRAP_TOKENS``) sit alongside DB rows
in the same lookup path; we check them first, in constant time, so
they remain usable even if the DB is unreachable. They never appear
in ``last_used_at`` flushes because they have no DB row.
"""
__slots__ = ("_by_value",)
__slots__ = (
"_bootstrap_by_value",
"_bootstrap_scopes",
"_cache",
"_db",
"_flush_interval",
"_flusher_task",
"_loaded_at",
"_lock",
"_touch_queue",
"_ttl",
)
def __init__(self, tokens: Mapping[str, str]) -> None:
def __init__(
self,
db: Database | None = None,
*,
bootstrap: Mapping[str, str] | None = None,
ttl_seconds: float = 30.0,
flush_interval: float = 5.0,
) -> None:
# Bootstrap is keyed by value internally so verify is O(1) over
# plaintext. Each value also keeps its name for audit lines.
by_value: dict[str, str] = {}
for name, value in tokens.items():
for name, value in (bootstrap or {}).items():
if not name or not value:
msg = f"empty name or value in token map (name={name!r})"
msg = f"empty name or value in bootstrap map (name={name!r})"
raise TokenStoreError(msg)
if value in by_value:
msg = (
f"duplicate token value for names "
f"duplicate bootstrap token value for names "
f"{by_value[value]!r} and {name!r}"
)
raise TokenStoreError(msg)
by_value[value] = name
self._by_value = by_value
self._bootstrap_by_value: dict[str, str] = by_value
self._bootstrap_scopes: dict[str, str] = dict.fromkeys(
by_value.values(), _BOOTSTRAP_SCOPE
)
@classmethod
def from_env(cls, raw: str) -> TokenStore:
self._db = db
self._ttl = ttl_seconds
self._flush_interval = flush_interval
self._cache: list[_CachedToken] = []
self._loaded_at: float = 0.0
self._lock = asyncio.Lock()
self._touch_queue: dict[int, datetime] = {}
self._flusher_task: asyncio.Task[None] | None = None
# ---- bootstrap parsing (kept for `cli` / tests) ---------------------
@staticmethod
def parse_bootstrap(raw: str) -> dict[str, str]:
"""Parse ``name1:value1,name2:value2`` (the ``BOOTSTRAP_TOKENS`` form)."""
tokens: dict[str, str] = {}
for chunk in raw.split(","):
@@ -73,15 +205,82 @@ class TokenStore:
msg = f"duplicate token name: {name!r}"
raise TokenStoreError(msg)
tokens[name] = value
return cls(tokens)
return tokens
def verify(self, token: str | None) -> str | None:
"""Return the token's name if known, else ``None``."""
@classmethod
def from_env(cls, raw: str, db: Database | None = None) -> TokenStore:
"""Legacy entrypoint: bootstrap-only (or bootstrap + db).
Phase 1.3 call sites still expect a one-liner; we keep the
classmethod so they don't have to learn the new constructor.
"""
return cls(db, bootstrap=cls.parse_bootstrap(raw))
# ---- lifecycle ------------------------------------------------------
async def start(self) -> None:
"""Prime the cache and (if a DB is attached) start the flusher loop."""
await self._refresh()
if self._db is not None:
self._flusher_task = asyncio.create_task(
self._flusher_loop(), name="beaver-gateway.token-flusher"
)
async def stop(self) -> None:
"""Cancel the flusher and run one final drain."""
if self._flusher_task is not None:
self._flusher_task.cancel()
with contextlib.suppress(asyncio.CancelledError):
await self._flusher_task
self._flusher_task = None
await self._flush_now()
async def invalidate(self) -> None:
"""Force the next verify to re-read from DB (Phase 4.3 admin hook)."""
self._loaded_at = 0.0
# ---- verify path ----------------------------------------------------
async def verify(self, token: str | None) -> TokenIdentity | None:
"""Return the matching identity, or ``None`` for unknown/empty tokens."""
if not token:
return None
return self._by_value.get(token)
def verify_bearer(self, authorization: str | None) -> str | None:
# Bootstrap first: constant-time compare per entry, never hits DB.
# `compare_digest` is overkill for a name→value lookup but cheap
# and removes one timing variable for free.
for value, name in self._bootstrap_by_value.items():
if hmac.compare_digest(token, value):
return TokenIdentity(
name=name,
scope=self._bootstrap_scopes.get(name, _BOOTSTRAP_SCOPE),
token_id=None,
)
if self._db is None:
return None
await self._ensure_fresh()
# Snapshot the cache reference so a refresh mid-scan doesn't
# surprise us. List itself is immutable per refresh (we swap,
# not mutate).
cache = self._cache
for entry in cache:
try:
await asyncio.to_thread(_HASHER.verify, entry.hashed_value, token)
except VerifyMismatchError:
continue
except InvalidHashError:
_log.warning(
"token row %d has an unparseable hash — skipping", entry.id
)
continue
self._touch_queue[entry.id] = datetime.now(UTC)
return TokenIdentity(name=entry.name, scope=entry.scope, token_id=entry.id)
return None
async def verify_bearer(self, authorization: str | None) -> TokenIdentity | None:
"""Strip the ``Bearer`` prefix (case-insensitive) then verify.
Accepts a bare token too — Cursor's MCP transport sometimes
@@ -93,13 +292,83 @@ class TokenStore:
return None
head, sep, rest = authorization.partition(" ")
token = rest.strip() if sep and head.lower() == "bearer" else authorization
return self.verify(token)
return await self.verify(token)
def __len__(self) -> int:
return len(self._by_value)
return len(self._cache) + len(self._bootstrap_by_value)
def __bool__(self) -> bool:
return bool(self._by_value)
return bool(self._cache) or bool(self._bootstrap_by_value)
# ---- internals ------------------------------------------------------
async def _ensure_fresh(self) -> None:
if self._db is None:
return
now = time.monotonic()
if now - self._loaded_at <= self._ttl:
return
async with self._lock:
# Re-check under the lock — first arrival reloaded, others
# should fall through.
now = time.monotonic()
if now - self._loaded_at <= self._ttl:
return
await self._refresh()
async def _refresh(self) -> None:
if self._db is None:
self._loaded_at = time.monotonic()
return
async with self._db.session() as session:
rows = await list_active_tokens(session)
next_cache: list[_CachedToken] = []
for row in rows:
if row.id is None:
# Defensive: SQLModel will assign an id on insert; a
# None here would mean someone handed us an unsaved row.
continue
next_cache.append(
_CachedToken(
id=row.id,
name=row.name,
scope=row.scope,
hashed_value=row.hashed_value,
)
)
self._cache = next_cache
self._loaded_at = time.monotonic()
_log.debug("token cache refreshed: %d active row(s)", len(next_cache))
async def _flusher_loop(self) -> None:
try:
while True:
await asyncio.sleep(self._flush_interval)
await self._flush_now()
except asyncio.CancelledError:
raise
except Exception: # noqa: BLE001 — never let the flusher die silently
_log.exception("token flusher crashed; touches will stop")
async def _flush_now(self) -> None:
if self._db is None or not self._touch_queue:
return
# Detach the queue so concurrent verify() writes don't bleed
# into the in-flight transaction.
pending, self._touch_queue = self._touch_queue, {}
async with self._db.session() as session:
for token_id in pending:
# We don't pass the timestamp through — `touch_token`
# stamps `now` itself, and we'd rather have one source
# of truth than reconcile clocks.
await touch_token(session, token_id=token_id)
_log.debug("flushed %d token touch(es)", len(pending))
__all__ = ["TokenStore", "TokenStoreError"]
__all__ = [
"VALID_SCOPES",
"TokenIdentity",
"TokenStore",
"TokenStoreError",
"hash_token",
]
@@ -0,0 +1,11 @@
"""Admin UI (Phase 4.3).
Browser-facing console: login, dashboard, token CRUD, audit viewer.
Templates live in ``./templates``; the package loader picks them up via
``importlib.resources`` so the layout works editable and inside the
Docker image without copy hacks.
"""
from beaver_gateway.frontends.admin.frontend import AdminFrontend
__all__ = ["AdminFrontend"]
@@ -0,0 +1,707 @@
"""Single-operator admin console.
Jinja2 + HTMX, no SPA framework. Session cookies signed with
``SESSION_SECRET`` via ``itsdangerous`` (8h TTL). CSRF is double-submit:
the signed session payload carries a random token that must echo back
on every state-changing request as a ``csrf_token`` form field.
What lives in here:
* ``GET /login`` / ``POST /login`` — env-cred check (``ADMIN_USER`` /
``ADMIN_PASS`` compared with :func:`hmac.compare_digest`).
* ``POST /logout`` — clears the cookie.
* ``GET /`` — dashboard: declared agents + MCP namespaces + the last
audit slice.
* ``GET /tokens`` + ``POST /tokens`` + ``POST /tokens/{id}/revoke`` —
bearer-token CRUD. Plaintext is rendered exactly once at creation; the
DB only ever holds the Argon2 hash. HTMX fragments swap rows in place.
* ``GET /audit`` — paginated audit list (id-cursor).
Things the admin frontend is *not* responsible for: HTTP-token verify
on ``/v1/messages`` and ``/mcp`` — that lives on those frontends and
uses :class:`TokenStore`. Admin's only authentication path is the
cookie session.
"""
from __future__ import annotations
import hmac
import json
import logging
import secrets
import time
from collections import deque
from typing import TYPE_CHECKING, Annotated, Any
import itsdangerous
from fastapi import FastAPI, Form, HTTPException, Request, status
from fastapi.responses import (
HTMLResponse,
RedirectResponse,
Response,
StreamingResponse,
)
from jinja2 import Environment, PackageLoader, select_autoescape
from beaver_gateway.core import audit
from beaver_gateway.core.auth import VALID_SCOPES, hash_token
from beaver_gateway.frontends.base import Frontend
from beaver_gateway.storage import (
create_token,
list_audit_records,
list_tokens,
revoke_token,
)
if TYPE_CHECKING:
from collections.abc import AsyncIterator
from datetime import datetime
from beaver_gateway.core.events import MessageStreamEvent
from beaver_gateway.frontends.base import GatewayRuntime
_log = logging.getLogger("beaver_gateway.frontends.admin")
# Cookie name is namespaced so a user can run multiple gateway instances
# behind one host header without sessions colliding. ``_v1`` lets us
# bump the payload schema later by invalidating old cookies via salt.
SESSION_COOKIE = "beaver_admin_session"
SESSION_MAX_AGE = 8 * 3600
SESSION_SALT = "beaver-gateway.admin.session.v1"
# Scopes the admin is allowed to mint. ``admin`` is reserved (no bearer
# route consumes it yet, but listing it keeps future programmatic
# admin access first-class) — see ``core.auth.VALID_SCOPES``.
CREATABLE_SCOPES = ("*", "messages", "mcp", "admin")
# Audit-log page size — also used by the dashboard's "recent activity"
# panel. Small enough to keep the dashboard cheap, big enough to be
# useful at a glance.
AUDIT_PAGE_SIZE = 50
# /login brute-force ceiling: 5 failed attempts per source IP within
# LOGIN_WINDOW_SECONDS shuts the door with a 429 until the oldest
# failure ages out. Single-operator admin, so a legit user only ever
# burns one IP — a forgiving window beats a tight one with a lockout.
LOGIN_MAX_ATTEMPTS = 5
LOGIN_WINDOW_SECONDS = 300.0
__all__ = ["AdminFrontend"]
class AdminFrontend(Frontend):
"""FastAPI app behind ``/login`` / ``/tokens`` / ``/audit`` / ``/``."""
def __init__(self, *, host: str = "0.0.0.0", port: int = 8002) -> None: # noqa: S104
self.host = host
self.port = port
self._runtime: GatewayRuntime | None = None
self._app: FastAPI | None = None
def configure(self, runtime: GatewayRuntime) -> None:
# Refuse to wire up if the env didn't carry the bits we need —
# an admin UI with empty creds is a footgun, and an unsigned
# session cookie is no session at all.
if not runtime.session_secret:
msg = "AdminFrontend requires SESSION_SECRET in env"
raise RuntimeError(msg)
if not runtime.admin_user or not runtime.admin_pass:
msg = "AdminFrontend requires ADMIN_USER and ADMIN_PASS in env"
raise RuntimeError(msg)
self._runtime = runtime
self._app = self._build_app(runtime)
async def serve(self) -> None:
import uvicorn
if self._app is None:
msg = "configure() must be called before serve()"
raise RuntimeError(msg)
config = uvicorn.Config(
self._app, host=self.host, port=self.port, log_level="info"
)
await uvicorn.Server(config).serve()
# ---- app construction ----------------------------------------------
def _build_app(self, runtime: GatewayRuntime) -> FastAPI: # noqa: PLR0915
# Routes are declared inline so they close over ``runtime`` /
# ``signer`` / ``templates`` instead of threading them through
# self-state. Splitting the function would just trade size for
# bookkeeping — same total surface, harder to follow.
templates = _build_template_env()
signer = itsdangerous.URLSafeTimedSerializer(
runtime.session_secret, salt=SESSION_SALT
)
login_limit = _LoginRateLimit(
max_attempts=LOGIN_MAX_ATTEMPTS, window=LOGIN_WINDOW_SECONDS
)
app = FastAPI(title="beaver-gateway / admin", docs_url=None, redoc_url=None)
def render(name: str, **ctx: Any) -> str:
return templates.get_template(name).render(**ctx)
# ---- login ----
@app.get("/login", response_class=HTMLResponse)
async def login_page(request: Request) -> Response:
# Already-signed-in users skip the form. Login flash is
# carried in a one-shot query param so a failed POST can
# redirect back here without leaking creds in the URL.
if _current_user(request, signer):
return RedirectResponse("/", status_code=status.HTTP_303_SEE_OTHER)
error = request.query_params.get("error")
return HTMLResponse(render("login.html", error=error))
@app.post("/login", response_class=HTMLResponse)
async def login_submit(
request: Request,
username: Annotated[str, Form(...)],
password: Annotated[str, Form(...)],
) -> Response:
ip = _client_ip(request)
if not login_limit.check(ip):
_log.warning("admin login rate-limited: ip=%s user=%r", ip, username)
await audit.log(
runtime,
actor=f"admin:{username}",
kind="login_failed",
reason="rate_limited",
ip=ip,
)
# 429 carries the rendered form back inline so the user
# sees the same page with an error banner — no redirect
# roundtrip, and an explicit Retry-After for any well-
# behaved client (or Caddy doing its own bookkeeping).
return HTMLResponse(
render(
"login.html",
error="too many attempts; try again in a few minutes",
),
status_code=status.HTTP_429_TOO_MANY_REQUESTS,
headers={"Retry-After": str(int(LOGIN_WINDOW_SECONDS))},
)
ok = hmac.compare_digest(
username, runtime.admin_user
) and hmac.compare_digest(password, runtime.admin_pass)
if not ok:
login_limit.record_failure(ip)
_log.info("admin login failed: user=%r ip=%s", username, ip)
await audit.log(
runtime,
actor=f"admin:{username}",
kind="login_failed",
reason="bad_credentials",
ip=ip,
)
# 303-redirect after POST so the browser doesn't replay
# the form on refresh. Status-303 forces GET on the
# follow-up regardless of the original method.
return RedirectResponse(
"/login?error=invalid+credentials",
status_code=status.HTTP_303_SEE_OTHER,
)
login_limit.clear(ip)
csrf = secrets.token_urlsafe(24)
cookie = signer.dumps({"user": username, "csrf": csrf})
response = RedirectResponse("/", status_code=status.HTTP_303_SEE_OTHER)
_set_session_cookie(response, cookie)
_log.info("admin login ok: user=%s ip=%s", username, ip)
await audit.log(runtime, actor=f"admin:{username}", kind="login_ok", ip=ip)
return response
@app.post("/logout")
async def logout(request: Request) -> Response:
session = _require_session(request, signer)
await _require_csrf(request, session)
response = RedirectResponse("/login", status_code=status.HTTP_303_SEE_OTHER)
response.delete_cookie(SESSION_COOKIE)
await audit.log(runtime, actor=f"admin:{session['user']}", kind="logout")
return response
# ---- dashboard ----
@app.get("/", response_class=HTMLResponse)
async def dashboard(request: Request) -> Response:
session = _require_session(request, signer)
async with runtime.db.session() as db_session:
audit = await list_audit_records(db_session, limit=AUDIT_PAGE_SIZE)
tokens = await list_tokens(db_session, include_revoked=False)
endpoints = _build_endpoint_catalog(request, runtime)
return HTMLResponse(
render(
"dashboard.html",
user=session["user"],
csrf=session["csrf"],
agents=list(runtime.agents),
mcps=list(runtime.mcps),
audit=audit,
tokens=tokens,
endpoints=endpoints,
)
)
# ---- tokens ----
@app.get("/tokens", response_class=HTMLResponse)
async def tokens_page(request: Request) -> Response:
session = _require_session(request, signer)
include_revoked = request.query_params.get("include_revoked") == "1"
async with runtime.db.session() as db_session:
tokens = await list_tokens(db_session, include_revoked=include_revoked)
return HTMLResponse(
render(
"tokens.html",
user=session["user"],
csrf=session["csrf"],
tokens=tokens,
scopes=CREATABLE_SCOPES,
include_revoked=include_revoked,
)
)
@app.post("/tokens", response_class=HTMLResponse)
async def tokens_create(
request: Request,
name: Annotated[str, Form(...)],
scope: Annotated[str, Form(...)],
) -> Response:
session = _require_session(request, signer)
await _require_csrf(request, session)
name = name.strip()
if not name:
raise HTTPException(
status.HTTP_400_BAD_REQUEST, "token name is required"
)
if scope not in VALID_SCOPES:
raise HTTPException(
status.HTTP_400_BAD_REQUEST, f"invalid scope: {scope!r}"
)
# Generate the plaintext server-side — clients never pick
# their own. URL-safe so it copies cleanly into ``.env``.
plaintext = secrets.token_urlsafe(32)
hashed = hash_token(plaintext)
async with runtime.db.session() as db_session:
try:
row = await create_token(
db_session, name=name, scope=scope, hashed_value=hashed
)
except Exception as exc: # noqa: BLE001
# Likely UNIQUE(name) violation; the column is
# indexed so collisions are common. We surface a
# readable HTMX-friendly response rather than 500.
_log.warning("token create failed: %s", exc)
return HTMLResponse(
render(
"_token_error.html",
message=f"could not create token {name!r}: {exc}",
),
status_code=status.HTTP_409_CONFLICT,
)
await runtime.token_store.invalidate()
await audit.log(
runtime,
actor=f"admin:{session['user']}",
kind="token_create",
name=name,
scope=scope,
token_id=row.id,
)
return HTMLResponse(
render(
"_token_created.html",
token=row,
plaintext=plaintext,
csrf=session["csrf"],
)
)
@app.post("/tokens/{token_id}/revoke", response_class=HTMLResponse)
async def tokens_revoke(request: Request, token_id: int) -> Response:
session = _require_session(request, signer)
await _require_csrf(request, session)
async with runtime.db.session() as db_session:
ok = await revoke_token(db_session, token_id=token_id)
if not ok:
raise HTTPException(
status.HTTP_404_NOT_FOUND, f"no active token with id {token_id}"
)
await runtime.token_store.invalidate()
await audit.log(
runtime,
actor=f"admin:{session['user']}",
kind="token_revoke",
token_id=token_id,
)
# Re-fetch so the row reflects the just-stamped revoked_at.
async with runtime.db.session() as db_session:
rows = await list_tokens(db_session, include_revoked=True)
row = next((r for r in rows if r.id == token_id), None)
if row is None:
# Shouldn't happen — revoke_token said ok — but stay
# honest: respond empty so the row disappears from the
# table.
return HTMLResponse("")
return HTMLResponse(
render("_token_row.html", token=row, csrf=session["csrf"])
)
# ---- audit ----
@app.get("/audit", response_class=HTMLResponse)
async def audit_page(request: Request) -> Response:
session = _require_session(request, signer)
before_raw = request.query_params.get("before")
before_id: int | None = None
if before_raw and before_raw.isdigit():
before_id = int(before_raw)
async with runtime.db.session() as db_session:
rows = await list_audit_records(
db_session, limit=AUDIT_PAGE_SIZE, before_id=before_id
)
# Cursor for the "next page" link — the smallest id on this
# page; ``None`` if we've run out of rows.
next_before = rows[-1].id if rows and len(rows) == AUDIT_PAGE_SIZE else None
return HTMLResponse(
render(
"audit.html",
user=session["user"],
csrf=session["csrf"],
audit=rows,
next_before=next_before,
)
)
# ---- chat ----
# In-process playground: lets the operator drive any configured
# agent without minting a bearer token. Auth comes from the
# admin cookie; CSRF rides an ``X-CSRF-Token`` header because
# the body is JSON (no form to read). We call the backend
# directly — no HTTP hop to ``/v1/messages`` — so a chat turn
# bypasses the token store but is still audited as
# ``kind="messages"`` with ``actor="admin:<user>"`` and
# ``source="admin_chat"`` in the detail.
@app.get("/chat", response_class=HTMLResponse)
async def chat_page(request: Request) -> Response:
session = _require_session(request, signer)
available = [
a for a in runtime.agents if runtime.backends.get(a.name) is not None
]
return HTMLResponse(
render(
"chat.html",
user=session["user"],
csrf=session["csrf"],
agents=available,
)
)
@app.post("/chat/send")
async def chat_send(request: Request) -> Response:
session = _require_session(request, signer)
submitted = request.headers.get("x-csrf-token")
if not isinstance(submitted, str) or not hmac.compare_digest(
submitted, session["csrf"]
):
raise HTTPException(status.HTTP_403_FORBIDDEN, "csrf check failed")
try:
body = await request.json()
except json.JSONDecodeError as exc:
raise HTTPException(
status.HTTP_400_BAD_REQUEST, f"invalid JSON: {exc}"
) from exc
model = body.get("model")
if not isinstance(model, str):
raise HTTPException(
status.HTTP_400_BAD_REQUEST, "missing or non-string `model`"
)
agent = runtime.agents.get(model)
if agent is None:
raise HTTPException(
status.HTTP_404_NOT_FOUND, f"unknown agent: {model!r}"
)
backend = runtime.backends.get(agent.name)
if backend is None:
raise HTTPException(
status.HTTP_503_SERVICE_UNAVAILABLE,
f"no backend configured for agent {agent.name!r}",
)
messages = body.get("messages") or []
if not isinstance(messages, list):
raise HTTPException(
status.HTTP_400_BAD_REQUEST, "`messages` must be a list"
)
system = body.get("system")
await audit.log(
runtime,
actor=f"admin:{session['user']}",
kind="messages",
agent_name=agent.name,
source="admin_chat",
msgs=len(messages),
)
events = backend.complete(
agent=agent,
messages=messages,
system=system if isinstance(system, str) else None,
)
return StreamingResponse(
_sse_events(events), media_type="text/event-stream"
)
return app
# ---- helpers ------------------------------------------------------------
def _build_template_env() -> Environment:
env = Environment(
loader=PackageLoader("beaver_gateway.frontends.admin", "templates"),
autoescape=select_autoescape(["html"]),
trim_blocks=True,
lstrip_blocks=True,
)
env.filters["fmt_dt"] = _fmt_dt
env.filters["fmt_detail"] = _fmt_detail
return env
def _fmt_dt(value: datetime | None) -> str:
if value is None:
return ""
# ISO without microseconds, with explicit ``Z`` when UTC — easier to
# scan than the default ``+00:00``.
s = value.replace(microsecond=0).isoformat()
return s.replace("+00:00", "Z")
def _fmt_detail(raw: str) -> str:
"""Pretty-print the audit detail blob; leave invalid JSON alone."""
if not raw or raw == "{}":
return ""
try:
return json.dumps(json.loads(raw), separators=(", ", ": "))
except json.JSONDecodeError:
return raw
def _build_endpoint_catalog(
request: Request, runtime: GatewayRuntime
) -> dict[str, Any]:
"""Collect copy-pastable URLs for sibling bearer frontends.
Precedence per frontend:
1. ``frontend.public_base_url`` if set — the operator's explicit
statement of "this is the URL my reverse proxy (Caddy / nginx /
Cloudflare / …) puts in front of me". Used verbatim with the
internal path (``/v1/messages``, ``/<ns>/`` for MCP) appended.
2. ``{scheme}://{request_hostname}:{frontend_port}`` — derived from
the browser's own request so dev / no-proxy setups Just Work.
Scheme honours ``X-Forwarded-Proto`` so a TLS terminator in
front of the admin gets the right protocol.
Imports happen inside the function to avoid a hard dep from
``admin.frontend`` on the other frontend modules — they're optional
and may have non-trivial transitive deps (aiohttp etc.).
"""
from beaver_gateway.frontends.anthropic import AnthropicMessagesFrontend
from beaver_gateway.frontends.mcp_server import McpServerFrontend
scheme = request.headers.get("x-forwarded-proto") or request.url.scheme
hostname = request.url.hostname or "localhost"
def _base_for(fe: AnthropicMessagesFrontend | McpServerFrontend) -> str:
if fe.public_base_url:
return fe.public_base_url
return f"{scheme}://{hostname}:{fe.port}"
anthropic_base: str | None = None
mcp_base: str | None = None
for fe in runtime.frontends:
if isinstance(fe, AnthropicMessagesFrontend) and anthropic_base is None:
anthropic_base = _base_for(fe)
elif isinstance(fe, McpServerFrontend) and mcp_base is None:
mcp_base = _base_for(fe)
agent_rows: list[dict[str, Any]] = []
if anthropic_base is not None:
messages_url = f"{anthropic_base}/v1/messages"
agent_rows.extend(
{
"agent": a.name,
"model": a.model,
"agent_type": a.__class__.__name__,
"url": messages_url,
}
for a in runtime.agents
)
mcp_rows: list[dict[str, Any]] = []
if mcp_base is not None:
mcp_rows.extend(
{"namespace": m.name, "kind": m.kind, "url": f"{mcp_base}/{m.name}/"}
for m in runtime.mcps
)
# ``all`` is synthesised by the aggregator whenever at least
# one MCP is configured — see McpServerFrontend._upstream_url.
if runtime.mcps:
mcp_rows.append(
{"namespace": "all", "kind": "bundle", "url": f"{mcp_base}/all/"}
)
return {
"anthropic_base": anthropic_base,
"mcp_base": mcp_base,
"agents": agent_rows,
"mcps": mcp_rows,
}
async def _sse_events(
events: AsyncIterator[MessageStreamEvent],
) -> AsyncIterator[bytes]:
r"""Serialize a backend stream into Anthropic's ``text/event-stream`` form.
Same wire shape as :mod:`beaver_gateway.frontends.anthropic` —
duplicated rather than imported so the admin frontend stays
independent of that module's private helpers, and so a mid-stream
failure surfaces as an in-band ``error`` event the chat UI can
render rather than a dangling connection.
"""
try:
async for ev in events:
payload = ev.model_dump_json()
yield f"event: {ev.type}\ndata: {payload}\n\n".encode()
except Exception as exc: # noqa: BLE001
_log.exception("admin chat backend stream failed")
err = json.dumps(
{"type": "error", "error": {"type": "api_error", "message": str(exc)}}
)
yield f"event: error\ndata: {err}\n\n".encode()
def _set_session_cookie(response: Response, value: str) -> None:
# ``samesite=lax`` keeps the cookie out of cross-site POSTs but
# follows top-level navigation; ``httponly`` keeps it out of JS;
# ``secure`` is gated on the deployment scheme — toggled by reverse
# proxies in front. Skip the secure flag here so localhost dev
# works over plain HTTP; production deployments behind a TLS
# terminator should set ``Secure`` via the proxy.
response.set_cookie(
SESSION_COOKIE,
value,
max_age=SESSION_MAX_AGE,
httponly=True,
samesite="lax",
path="/",
)
def _current_user(
request: Request, signer: itsdangerous.URLSafeTimedSerializer
) -> dict[str, Any] | None:
raw = request.cookies.get(SESSION_COOKIE)
if not raw:
return None
try:
payload = signer.loads(raw, max_age=SESSION_MAX_AGE)
except itsdangerous.BadSignature:
return None
if not isinstance(payload, dict):
return None
user = payload.get("user")
csrf = payload.get("csrf")
if not isinstance(user, str) or not isinstance(csrf, str):
return None
return {"user": user, "csrf": csrf}
def _require_session(
request: Request, signer: itsdangerous.URLSafeTimedSerializer
) -> dict[str, Any]:
session = _current_user(request, signer)
if session is None:
# GET endpoints want a redirect (so the browser walks the user
# to the login form), not a JSON 401. Mutating endpoints will
# still trip CSRF below, so the redirect is harmless for those.
raise HTTPException(status.HTTP_303_SEE_OTHER, headers={"Location": "/login"})
return session
def _client_ip(request: Request) -> str:
"""Best-effort source IP, in precedence order.
1. ``Cf-Connecting-IP`` — Cloudflare's edge writes this and strips
anything inbound, so when it's present it's authoritative.
2. ``X-Forwarded-For`` leftmost entry — what Caddy / nginx set when
they're the only proxy. We trust this because the deploy plan
puts Caddy directly in front; if the chain ever grows untrusted
hops, this header becomes spoofable from the public side.
3. Socket peer — direct Tailscale / localhost hits.
"""
cf = request.headers.get("cf-connecting-ip")
if cf:
return cf.strip()
xff = request.headers.get("x-forwarded-for")
if xff:
first = xff.split(",", 1)[0].strip()
if first:
return first
return request.client.host if request.client else "unknown"
class _LoginRateLimit:
"""In-memory sliding-window failure counter for ``POST /login``.
Keyed by source IP (see :func:`_client_ip`). Each failure appends a
monotonic timestamp; :meth:`check` drops timestamps older than
``window`` and refuses once ``max_attempts`` remain in the bucket.
A successful login calls :meth:`clear` to wipe the IP's bucket.
No external store: a single-operator admin only needs to survive
process lifetime. Lives entirely on the event loop thread, so no
lock is needed — every method is synchronous and doesn't ``await``.
"""
__slots__ = ("_failures", "_max_attempts", "_window")
def __init__(self, *, max_attempts: int, window: float) -> None:
self._failures: dict[str, deque[float]] = {}
self._max_attempts = max_attempts
self._window = window
def check(self, ip: str) -> bool:
bucket = self._failures.get(ip)
if bucket is None:
return True
cutoff = time.monotonic() - self._window
while bucket and bucket[0] < cutoff:
bucket.popleft()
if not bucket:
self._failures.pop(ip, None)
return True
return len(bucket) < self._max_attempts
def record_failure(self, ip: str) -> None:
bucket = self._failures.setdefault(ip, deque())
cutoff = time.monotonic() - self._window
while bucket and bucket[0] < cutoff:
bucket.popleft()
bucket.append(time.monotonic())
def clear(self, ip: str) -> None:
self._failures.pop(ip, None)
async def _require_csrf(request: Request, session: dict[str, Any]) -> None:
form = await request.form()
submitted = form.get("csrf_token")
if not isinstance(submitted, str) or not hmac.compare_digest(
submitted, session["csrf"]
):
raise HTTPException(status.HTTP_403_FORBIDDEN, "csrf check failed")
@@ -0,0 +1,205 @@
<!doctype html>
<html lang="en">
<head>
<meta charset="utf-8">
<meta name="viewport" content="width=device-width, initial-scale=1">
<title>{% block title %}beaver-gateway · admin{% endblock %}</title>
<script src="https://unpkg.com/htmx.org@2.0.4" defer></script>
<style>
:root {
--bg: #fbfbfd;
--fg: #1d1d1f;
--muted: #6e6e73;
--line: #e5e5ea;
--accent: #0071e3;
--danger: #d70015;
--surface: #ffffff;
--code-bg: #f5f5f7;
}
* { box-sizing: border-box; }
html, body { margin: 0; padding: 0; background: var(--bg); color: var(--fg); }
body {
font-family: -apple-system, "SF Pro Display", "SF Pro Text",
BlinkMacSystemFont, system-ui, sans-serif;
font-size: 15px;
line-height: 1.55;
letter-spacing: -0.005em;
}
a { color: var(--accent); text-decoration: none; }
a:hover { text-decoration: underline; }
code, pre, kbd, samp {
font-family: ui-monospace, "SF Mono", Menlo, Consolas, monospace;
font-size: 0.92em;
}
pre {
background: var(--code-bg);
padding: 1rem 1.25rem;
border-radius: 12px;
overflow-x: auto;
}
header.top {
border-bottom: 1px solid var(--line);
background: var(--surface);
}
header.top .inner {
max-width: 1080px;
margin: 0 auto;
padding: 1rem 1.5rem;
display: flex;
align-items: center;
gap: 1.25rem;
}
header.top h1 {
font-size: 1.05rem;
font-weight: 600;
letter-spacing: -0.01em;
margin: 0;
}
nav.tabs { display: flex; gap: 1.1rem; flex: 1; }
nav.tabs a {
color: var(--fg);
padding: 0.25rem 0.1rem;
border-bottom: 2px solid transparent;
}
nav.tabs a.active {
border-bottom-color: var(--fg);
}
header.top .actor {
color: var(--muted);
font-size: 0.9em;
}
main {
max-width: 1080px;
margin: 0 auto;
padding: 2.25rem 1.5rem 4rem;
}
h2 {
font-weight: 600;
letter-spacing: -0.015em;
margin: 2rem 0 0.75rem;
}
h2:first-of-type { margin-top: 0; }
.muted { color: var(--muted); }
.card {
background: var(--surface);
border: 1px solid var(--line);
border-radius: 14px;
padding: 1.25rem 1.5rem;
margin-bottom: 1.25rem;
}
table { width: 100%; border-collapse: collapse; }
th, td {
text-align: left;
padding: 0.6rem 0.85rem;
border-bottom: 1px solid var(--line);
vertical-align: top;
}
th {
font-weight: 500;
color: var(--muted);
font-size: 0.85em;
text-transform: uppercase;
letter-spacing: 0.04em;
}
tr:last-child td { border-bottom: none; }
.revoked td { color: var(--muted); }
button, .btn {
font-family: inherit;
font-size: 0.92em;
padding: 0.45rem 0.95rem;
border-radius: 8px;
border: 1px solid var(--line);
background: var(--surface);
color: var(--fg);
cursor: pointer;
transition: background 120ms ease;
}
button:hover, .btn:hover { background: var(--code-bg); }
button.primary, .btn.primary {
background: var(--accent);
border-color: var(--accent);
color: white;
}
button.primary:hover, .btn.primary:hover {
background: #005bb5;
}
button.danger { color: var(--danger); border-color: #f0c5c9; }
button.danger:hover { background: #fdeff1; }
form.inline { display: inline; margin: 0; }
.form-grid {
display: grid;
grid-template-columns: minmax(180px, 1fr) 160px auto;
gap: 0.75rem;
align-items: end;
}
.form-grid label {
display: block;
font-size: 0.8em;
color: var(--muted);
text-transform: uppercase;
letter-spacing: 0.05em;
margin-bottom: 0.3rem;
}
input[type="text"], input[type="password"], select {
width: 100%;
padding: 0.55rem 0.7rem;
border: 1px solid var(--line);
border-radius: 8px;
font-family: inherit;
font-size: 0.95em;
background: var(--surface);
color: var(--fg);
}
input:focus, select:focus { outline: 2px solid var(--accent); outline-offset: 1px; }
.banner {
padding: 1rem 1.25rem;
border-radius: 12px;
background: #f0f9ff;
border: 1px solid #cfe8ff;
margin: 0 0 1.25rem;
}
.banner.error {
background: #fff0f0;
border-color: #f5c2c5;
color: var(--danger);
}
.banner.warn {
background: #fff7e6;
border-color: #fadf9c;
}
.pill {
display: inline-block;
padding: 0.1rem 0.55rem;
border-radius: 999px;
background: var(--code-bg);
font-size: 0.78em;
color: var(--muted);
}
.pill.scope-wild { background: #ecfdf3; color: #027a48; }
.pill.scope-admin { background: #fff4e6; color: #b54708; }
</style>
</head>
<body>
{% block header %}
<header class="top">
<div class="inner">
<h1>beaver-gateway</h1>
<nav class="tabs">
<a href="/" class="{% if active == 'dashboard' %}active{% endif %}">Dashboard</a>
<a href="/chat" class="{% if active == 'chat' %}active{% endif %}">Chat</a>
<a href="/tokens" class="{% if active == 'tokens' %}active{% endif %}">Tokens</a>
<a href="/audit" class="{% if active == 'audit' %}active{% endif %}">Audit</a>
</nav>
<span class="actor">Signed in as <strong>{{ user }}</strong></span>
<form class="inline" method="post" action="/logout">
<input type="hidden" name="csrf_token" value="{{ csrf }}">
<button type="submit">Log out</button>
</form>
</div>
</header>
{% endblock %}
<main>
{% block content %}{% endblock %}
</main>
</body>
</html>
@@ -0,0 +1,12 @@
{# Renders into #token-create-result (hx-target on the form). The
two OOB swaps reach the rest of the page: prepend the new row,
and erase the empty-state placeholder if it's still in the DOM. #}
<div class="banner warn">
<strong>Token created: {{ token.name }}</strong>
<p class="muted" style="margin:0.25rem 0 0.5rem;">Copy it now — this is the only time it will be shown.</p>
<pre style="margin:0;">{{ plaintext }}</pre>
</div>
<tbody id="tokens-rows" hx-swap-oob="afterbegin">
{% include "_token_row.html" %}
</tbody>
<tr id="tokens-empty" hx-swap-oob="delete"></tr>
@@ -0,0 +1 @@
<div class="banner error">{{ message }}</div>
@@ -0,0 +1,25 @@
<tr id="token-row-{{ token.id }}" {% if token.revoked_at %}class="revoked"{% endif %}>
<td><strong>{{ token.name }}</strong></td>
<td>
<span class="pill {% if token.scope == '*' %}scope-wild{% elif token.scope == 'admin' %}scope-admin{% endif %}">{{ token.scope }}</span>
</td>
<td><code>{{ token.created_at | fmt_dt }}</code></td>
<td><code>{{ token.last_used_at | fmt_dt }}</code></td>
<td><code>{{ token.revoked_at | fmt_dt }}</code></td>
<td style="text-align:right;">
{% if not token.revoked_at %}
<form
class="inline"
hx-post="/tokens/{{ token.id }}/revoke"
hx-target="#token-row-{{ token.id }}"
hx-swap="outerHTML"
hx-confirm="Revoke token {{ token.name }}? This cannot be undone."
>
<input type="hidden" name="csrf_token" value="{{ csrf }}">
<button class="danger" type="submit">Revoke</button>
</form>
{% else %}
<span class="muted">revoked</span>
{% endif %}
</td>
</tr>
@@ -0,0 +1,37 @@
{% extends "_layout.html" %}
{% set active = "audit" %}
{% block title %}beaver-gateway · audit{% endblock %}
{% block content %}
<h2>Audit log</h2>
<div class="card">
{% if audit %}
<table>
<thead>
<tr>
<th>Time</th><th>Actor</th><th>Kind</th><th>Agent</th><th>Detail</th>
</tr>
</thead>
<tbody>
{% for row in audit %}
<tr>
<td><code>{{ row.ts | fmt_dt }}</code></td>
<td>{{ row.actor }}</td>
<td><span class="pill">{{ row.kind }}</span></td>
<td>{{ row.agent_name or "—" }}</td>
<td><code>{{ row.detail_json | fmt_detail }}</code></td>
</tr>
{% endfor %}
</tbody>
</table>
<p class="muted" style="margin-top:1rem;">
{% if next_before %}
<a href="/audit?before={{ next_before }}">Older entries →</a>
{% else %}
End of log.
{% endif %}
</p>
{% else %}
<p class="muted">Nothing logged yet.</p>
{% endif %}
</div>
{% endblock %}
@@ -0,0 +1,354 @@
{% extends "_layout.html" %}
{% set active = "chat" %}
{% block title %}beaver-gateway · chat{% endblock %}
{% block content %}
<div class="chat-wrap">
<div class="chat-toolbar">
<label>
<span>Agent</span>
<select id="agent-select">
{% for a in agents %}
<option value="{{ a.name }}">{{ a.name }} · {{ a.model }}</option>
{% else %}
<option disabled>no agents with a backend</option>
{% endfor %}
</select>
</label>
<div class="spacer"></div>
<button id="new-chat-btn" type="button">New chat</button>
</div>
<div id="messages" class="chat-messages" aria-live="polite"></div>
<form id="chat-form" class="chat-input">
<textarea id="chat-text" rows="3" placeholder="Message… (⌘/Ctrl+Enter to send)" required></textarea>
<button type="submit" class="primary" id="send-btn">Send</button>
</form>
</div>
<style>
.chat-wrap {
display: flex; flex-direction: column;
height: calc(100vh - 200px); min-height: 480px;
}
.chat-toolbar {
display: flex; gap: 0.85rem; align-items: end;
margin-bottom: 1rem;
}
.chat-toolbar .spacer { flex: 1; }
.chat-toolbar label {
display: flex; flex-direction: column; gap: 0.3rem;
min-width: 260px;
}
.chat-toolbar label > span {
font-size: 0.8em; color: var(--muted);
text-transform: uppercase; letter-spacing: 0.05em;
}
.chat-messages {
flex: 1; overflow-y: auto;
background: var(--surface); border: 1px solid var(--line);
border-radius: 14px; padding: 1.25rem;
display: flex; flex-direction: column; gap: 1rem;
}
.chat-empty { color: var(--muted); margin: auto; }
.msg { display: flex; }
.msg.user { justify-content: flex-end; }
.msg.user .bubble {
max-width: 78%; padding: 0.7rem 0.95rem; border-radius: 14px;
background: var(--accent); color: white;
white-space: pre-wrap; word-wrap: break-word;
}
.msg.assistant .blocks {
display: flex; flex-direction: column; gap: 0.5rem;
max-width: 88%;
}
.block-text {
background: var(--code-bg); padding: 0.7rem 0.95rem;
border-radius: 14px;
white-space: pre-wrap; word-wrap: break-word;
}
details.tool-call, details.thinking-block {
border: 1px solid var(--line); border-radius: 10px;
background: var(--surface); padding: 0 0.85rem;
font-size: 0.9em;
}
details.tool-call > summary,
details.thinking-block > summary {
cursor: pointer; padding: 0.55rem 0;
list-style: none;
display: flex; align-items: center; gap: 0.55rem;
}
details > summary::-webkit-details-marker { display: none; }
details.tool-call > summary::before,
details.thinking-block > summary::before {
content: "▸"; color: var(--muted); font-size: 0.78em;
}
details.tool-call[open] > summary::before,
details.thinking-block[open] > summary::before { content: "▾"; }
.tool-name {
font-family: ui-monospace, "SF Mono", Menlo, monospace;
font-weight: 500;
}
.tool-id, .tool-label {
color: var(--muted); font-size: 0.78em;
font-family: ui-monospace, "SF Mono", Menlo, monospace;
}
details.tool-call pre, details.thinking-block pre {
margin: 0 0 0.6rem; padding: 0.7rem 0.85rem;
background: var(--code-bg); border-radius: 8px;
font-size: 0.85em; max-height: 360px; overflow: auto;
}
.tool-label { display: block; margin: 0.15rem 0 0.25rem; }
.chat-input { display: flex; gap: 0.75rem; margin-top: 1rem; }
.chat-input textarea {
flex: 1; padding: 0.7rem 0.9rem;
border: 1px solid var(--line); border-radius: 12px;
font-family: inherit; font-size: 0.95em;
background: var(--surface); color: var(--fg); resize: vertical;
}
.chat-input textarea:focus { outline: 2px solid var(--accent); outline-offset: 1px; }
.chat-input button { align-self: stretch; padding-left: 1.4rem; padding-right: 1.4rem; }
.chat-error {
color: var(--danger); font-size: 0.85em;
padding: 0.55rem 0.75rem;
background: #fff0f0; border-radius: 10px;
border: 1px solid #f5c2c5;
}
.chat-input button:disabled { opacity: 0.55; cursor: progress; }
</style>
<script>
(() => {
const CSRF = {{ csrf | tojson }};
const agentSelect = document.getElementById("agent-select");
const messagesEl = document.getElementById("messages");
const form = document.getElementById("chat-form");
const textEl = document.getElementById("chat-text");
const sendBtn = document.getElementById("send-btn");
const newBtn = document.getElementById("new-chat-btn");
// Anthropic-style history sent to the backend. We keep assistant
// content text-only — tool_use blocks can't round-trip without
// matching tool_result, and backends (claude-code) run tools
// internally anyway.
let apiMessages = [];
function renderEmpty() {
messagesEl.innerHTML = '<div class="chat-empty">No messages yet — say something.</div>';
}
renderEmpty();
newBtn.addEventListener("click", () => {
apiMessages = [];
renderEmpty();
});
function clearEmpty() {
const e = messagesEl.querySelector(".chat-empty");
if (e) e.remove();
}
function scrollDown() { messagesEl.scrollTop = messagesEl.scrollHeight; }
function appendUser(text) {
clearEmpty();
const row = document.createElement("div");
row.className = "msg user";
const bub = document.createElement("div");
bub.className = "bubble";
bub.textContent = text;
row.appendChild(bub);
messagesEl.appendChild(row);
scrollDown();
}
function appendAssistant() {
clearEmpty();
const row = document.createElement("div");
row.className = "msg assistant";
const blocks = document.createElement("div");
blocks.className = "blocks";
row.appendChild(blocks);
messagesEl.appendChild(row);
scrollDown();
return blocks;
}
function appendError(text) {
clearEmpty();
const e = document.createElement("div");
e.className = "chat-error";
e.textContent = text;
messagesEl.appendChild(e);
scrollDown();
}
function ensureBlock(state, index, type, extra) {
if (state.blocks[index]) return state.blocks[index];
const block = { type };
if (type === "text") {
const el = document.createElement("div");
el.className = "block-text";
state.container.appendChild(el);
block.el = el;
} else if (type === "thinking") {
const det = document.createElement("details");
det.className = "thinking-block";
const sum = document.createElement("summary");
sum.textContent = "Thinking";
det.appendChild(sum);
const pre = document.createElement("pre");
det.appendChild(pre);
state.container.appendChild(det);
block.el = pre;
} else if (type === "tool_use") {
const det = document.createElement("details");
det.className = "tool-call";
const sum = document.createElement("summary");
const name = document.createElement("span");
name.className = "tool-name";
name.textContent = "🔧 " + (extra.name || "tool");
sum.appendChild(name);
if (extra.id) {
const idEl = document.createElement("span");
idEl.className = "tool-id";
idEl.textContent = extra.id;
sum.appendChild(idEl);
}
det.appendChild(sum);
const label = document.createElement("span");
label.className = "tool-label";
label.textContent = "input";
det.appendChild(label);
const pre = document.createElement("pre");
det.appendChild(pre);
state.container.appendChild(det);
block.el = pre;
block.jsonBuf = "";
block.seedInput = extra.input;
}
state.blocks[index] = block;
return block;
}
function applyEvent(state, ev) {
const t = ev.type;
if (t === "content_block_start") {
const cb = ev.content_block || {};
if (cb.type === "text") {
ensureBlock(state, ev.index, "text", {});
} else if (cb.type === "thinking") {
ensureBlock(state, ev.index, "thinking", {});
} else if (cb.type === "tool_use") {
ensureBlock(state, ev.index, "tool_use",
{ name: cb.name, id: cb.id, input: cb.input });
}
} else if (t === "content_block_delta") {
const d = ev.delta || {};
const b = state.blocks[ev.index];
if (!b) return;
if (d.type === "text_delta") {
b.el.textContent += d.text || "";
state.assistantText += d.text || "";
scrollDown();
} else if (d.type === "thinking_delta") {
b.el.textContent += d.thinking || "";
scrollDown();
} else if (d.type === "input_json_delta") {
b.jsonBuf += d.partial_json || "";
}
} else if (t === "content_block_stop") {
const b = state.blocks[ev.index];
if (!b) return;
if (b.type === "tool_use") {
let input = null;
if (b.jsonBuf && b.jsonBuf.trim()) {
try { input = JSON.parse(b.jsonBuf); }
catch { input = b.jsonBuf; }
} else if (b.seedInput !== undefined && b.seedInput !== null) {
input = b.seedInput;
}
b.el.textContent = input === null
? "(no input)"
: (typeof input === "string"
? input
: JSON.stringify(input, null, 2));
}
} else if (t === "error") {
const msg = (ev.error && ev.error.message) || "stream error";
appendError(msg);
}
}
async function send() {
const text = textEl.value.trim();
if (!text) return;
const model = agentSelect.value;
if (!model) { appendError("no agent selected"); return; }
apiMessages.push({ role: "user", content: text });
appendUser(text);
textEl.value = "";
sendBtn.disabled = true;
const container = appendAssistant();
const state = { container, blocks: {}, assistantText: "" };
let resp;
try {
resp = await fetch("/chat/send", {
method: "POST",
headers: {
"Content-Type": "application/json",
"X-CSRF-Token": CSRF,
},
body: JSON.stringify({ model, messages: apiMessages }),
});
} catch (e) {
appendError("network error: " + e.message);
sendBtn.disabled = false;
return;
}
if (!resp.ok || !resp.body) {
let msg = resp.status + " " + resp.statusText;
try { const body = await resp.text(); if (body) msg = body; } catch {}
appendError(msg);
sendBtn.disabled = false;
return;
}
const reader = resp.body.getReader();
const dec = new TextDecoder();
let buf = "";
while (true) {
const { value, done } = await reader.read();
if (done) break;
buf += dec.decode(value, { stream: true });
let i;
while ((i = buf.indexOf("\n\n")) >= 0) {
const raw = buf.slice(0, i);
buf = buf.slice(i + 2);
if (!raw) continue;
let dataLine = "";
for (const line of raw.split("\n")) {
if (line.startsWith("data:")) dataLine += line.slice(5).trimStart();
}
if (!dataLine) continue;
let payload;
try { payload = JSON.parse(dataLine); } catch { continue; }
applyEvent(state, payload);
}
}
if (state.assistantText) {
apiMessages.push({ role: "assistant", content: state.assistantText });
}
sendBtn.disabled = false;
textEl.focus();
}
form.addEventListener("submit", (e) => { e.preventDefault(); send(); });
textEl.addEventListener("keydown", (e) => {
if ((e.metaKey || e.ctrlKey) && e.key === "Enter") {
e.preventDefault();
send();
}
});
})();
</script>
{% endblock %}
@@ -0,0 +1,430 @@
{% extends "_layout.html" %}
{% set active = "dashboard" %}
{% block title %}beaver-gateway · dashboard{% endblock %}
{% block content %}
<h2>Agents</h2>
<div class="card">
{% if agents %}
<table>
<thead><tr><th>Name</th><th>Type</th><th>Model</th><th>Exposed MCPs</th></tr></thead>
<tbody>
{% for a in agents %}
<tr>
<td><code>{{ a.name }}</code></td>
<td><span class="pill">{{ a.__class__.__name__ }}</span></td>
<td><code>{{ a.model }}</code></td>
<td>
{% for em in a.expose_mcps %}<code>{{ em.name }}</code>{% if not loop.last %}, {% endif %}{% else %}<span class="muted"></span>{% endfor %}
</td>
</tr>
{% endfor %}
</tbody>
</table>
{% else %}
<p class="muted">No agents configured.</p>
{% endif %}
</div>
<h2>MCP namespaces</h2>
<div class="card">
{% if mcps %}
<table>
<thead><tr><th>Name</th><th>Kind</th></tr></thead>
<tbody>
{% for m in mcps %}
<tr>
<td><code>{{ m.name }}</code></td>
<td><span class="pill">{{ m.kind }}</span></td>
</tr>
{% endfor %}
</tbody>
</table>
{% else %}
<p class="muted">No MCP servers configured.</p>
{% endif %}
</div>
<h2>Endpoints</h2>
<div class="card endpoints">
{% if endpoints.agents or endpoints.mcps %}
<div class="ep-controls">
<label class="ep-token">
<span>Token (hint)</span>
<select id="ep-token-select">
<option value="" data-scope="">— select a known token —</option>
{% for t in tokens %}
<option value="{{ t.name }}" data-scope="{{ t.scope }}">{{ t.name }} · scope {{ t.scope }}</option>
{% endfor %}
</select>
</label>
<label class="ep-secret">
<span>Bearer secret</span>
<input type="password" id="ep-token-secret" autocomplete="off" spellcheck="false"
placeholder="paste plaintext (we only store the Argon2 hash)">
</label>
<label class="ep-show">
<input type="checkbox" id="ep-show-secret"> <span>show</span>
</label>
</div>
<p class="muted ep-note">
Beaver stores only an Argon2 hash of each token, so the plaintext can't be reconstructed.
Paste the value you saved at creation; if you've lost it, <a href="/tokens">mint a new one</a>.
Below: pick a token to see which endpoints its scope covers, paste the secret to fill it
into URL / curl, then click Copy.
</p>
{% if endpoints.agents %}
<h3 class="ep-h3">Agents — <code>POST /v1/messages</code></h3>
<table class="ep-table" data-required-scope="messages">
<thead><tr><th>Agent</th><th>Model</th><th>URL</th><th class="ep-actions-th"></th></tr></thead>
<tbody>
{% for ep in endpoints.agents %}
<tr class="ep-row" data-kind="messages" data-agent="{{ ep.agent }}" data-url="{{ ep.url }}">
<td>
<code>{{ ep.agent }}</code>
<span class="pill">{{ ep.agent_type }}</span>
<span class="ep-scope-warn" hidden>scope mismatch</span>
</td>
<td><code>{{ ep.model }}</code></td>
<td><code class="ep-url">{{ ep.url }}</code></td>
<td class="ep-actions">
<button type="button" data-action="copy-url">Copy URL</button>
<button type="button" data-action="copy-curl">Copy curl</button>
<button type="button" data-action="toggle-curl" aria-expanded="false">▸ curl</button>
</td>
</tr>
<tr class="ep-curl-row" hidden><td colspan="4"><pre class="ep-curl"></pre></td></tr>
{% endfor %}
</tbody>
</table>
{% elif endpoints.anthropic_base is none and agents %}
<p class="muted">
Agents are declared but no <code>AnthropicMessagesFrontend</code> is configured —
add one to <code>Gateway(frontends=[...])</code> to expose them over HTTP.
</p>
{% endif %}
{% if endpoints.mcps %}
<h3 class="ep-h3">MCP — streamable HTTP</h3>
<table class="ep-table" data-required-scope="mcp">
<thead><tr><th>Namespace</th><th>Kind</th><th>URL</th><th class="ep-actions-th"></th></tr></thead>
<tbody>
{% for ep in endpoints.mcps %}
<tr class="ep-row" data-kind="mcp" data-namespace="{{ ep.namespace }}" data-url="{{ ep.url }}">
<td>
<code>{{ ep.namespace }}</code>
<span class="ep-scope-warn" hidden>scope mismatch</span>
</td>
<td><span class="pill">{{ ep.kind }}</span></td>
<td><code class="ep-url">{{ ep.url }}</code></td>
<td class="ep-actions">
<button type="button" data-action="copy-url">Copy URL</button>
<button type="button" data-action="copy-url-token">Copy URL+token</button>
<button type="button" data-action="copy-curl">Copy curl</button>
<button type="button" data-action="toggle-curl" aria-expanded="false">▸ curl</button>
</td>
</tr>
<tr class="ep-curl-row" hidden><td colspan="4"><pre class="ep-curl"></pre></td></tr>
{% endfor %}
</tbody>
</table>
{% elif endpoints.mcp_base is none and mcps %}
<p class="muted">
MCP servers are declared but no <code>McpServerFrontend</code> is configured —
add one to <code>Gateway(frontends=[...])</code> to expose them over HTTP.
</p>
{% endif %}
{% else %}
<p class="muted">
Nothing exposed yet — declare agents / MCPs and the matching frontends
(<code>AnthropicMessagesFrontend</code>, <code>McpServerFrontend</code>) in your config.
</p>
{% endif %}
</div>
<h2>Recent activity</h2>
<div class="card">
{% if audit %}
<table>
<thead><tr><th>Time</th><th>Actor</th><th>Kind</th><th>Agent</th><th>Detail</th></tr></thead>
<tbody>
{% for row in audit %}
<tr>
<td><code>{{ row.ts | fmt_dt }}</code></td>
<td>{{ row.actor }}</td>
<td><span class="pill">{{ row.kind }}</span></td>
<td>{{ row.agent_name or "—" }}</td>
<td><code>{{ row.detail_json | fmt_detail }}</code></td>
</tr>
{% endfor %}
</tbody>
</table>
<p class="muted" style="margin-top:0.85rem;"><a href="/audit">Full log →</a></p>
{% else %}
<p class="muted">Nothing logged yet.</p>
{% endif %}
</div>
<style>
.endpoints .ep-controls {
display: grid;
grid-template-columns: minmax(220px, 1fr) 2fr auto;
gap: 0.85rem;
align-items: end;
margin-bottom: 0.5rem;
}
.endpoints .ep-controls label > span {
display: block;
font-size: 0.8em; color: var(--muted);
text-transform: uppercase; letter-spacing: 0.05em;
margin-bottom: 0.3rem;
}
.endpoints .ep-show {
display: flex; align-items: center; gap: 0.4rem;
padding-bottom: 0.6rem; color: var(--muted); font-size: 0.9em;
}
.endpoints .ep-note {
font-size: 0.85em; margin: 0.25rem 0 1.2rem;
}
.endpoints .ep-h3 {
font-size: 0.95rem; font-weight: 600;
margin: 1.5rem 0 0.55rem;
}
.endpoints .ep-table { table-layout: auto; }
.endpoints .ep-table th.ep-actions-th { width: 1%; }
.endpoints .ep-url {
word-break: break-all;
}
.endpoints .ep-actions {
white-space: nowrap;
text-align: right;
}
.endpoints .ep-actions button {
padding: 0.32rem 0.7rem;
font-size: 0.85em;
margin-left: 0.3rem;
}
.endpoints .ep-scope-warn {
display: inline-block;
margin-left: 0.5rem;
padding: 0.05rem 0.45rem;
background: #fff7e6;
border: 1px solid #fadf9c;
color: #8a5a00;
border-radius: 999px;
font-size: 0.72em;
}
.endpoints .ep-row.scope-mismatch td:not(.ep-actions) { opacity: 0.55; }
.endpoints .ep-row.scope-mismatch .ep-actions button { opacity: 0.7; }
.endpoints .ep-curl-row td { padding-top: 0; padding-bottom: 0; }
.endpoints .ep-curl {
margin: 0.4rem 0 1rem;
font-size: 0.82em;
max-height: 280px;
}
.endpoints button[data-copied="1"] {
background: #ecfdf3 !important;
border-color: #b6e6cb !important;
color: #027a48;
}
@media (max-width: 760px) {
.endpoints .ep-controls { grid-template-columns: 1fr; }
.endpoints .ep-actions { text-align: left; padding-top: 0.4rem; }
}
</style>
<script>
(() => {
const card = document.querySelector(".endpoints");
if (!card) return;
const sel = card.querySelector("#ep-token-select");
const secret = card.querySelector("#ep-token-secret");
const showCb = card.querySelector("#ep-show-secret");
if (!sel || !secret || !showCb) return;
const PLACEHOLDER = "<YOUR_TOKEN>";
function currentToken() {
const v = secret.value;
return v && v.length > 0 ? v : null;
}
function selectedScope() {
const o = sel.options[sel.selectedIndex];
return o ? (o.getAttribute("data-scope") || "") : "";
}
function scopeCovers(have, need) {
if (!have) return true; // no token selected — don't grey anything out
if (have === "*") return true;
return have === need;
}
function escSh(s) {
// single-quote shell escape: close, escape, reopen.
return "'" + String(s).replace(/'/g, "'\\''") + "'";
}
function buildCurl(row) {
const kind = row.getAttribute("data-kind");
const url = row.getAttribute("data-url");
const tok = currentToken() || PLACEHOLDER;
if (kind === "messages") {
const agent = row.getAttribute("data-agent");
const body = JSON.stringify({
model: agent,
messages: [{ role: "user", content: "hello" }],
stream: true,
});
return [
"curl -N \\",
" -H " + escSh("x-api-key: " + tok) + " \\",
" -H 'content-type: application/json' \\",
" -d " + escSh(body) + " \\",
" " + url,
].join("\n");
}
if (kind === "mcp") {
// Streamable-HTTP MCP wants `initialize` first — the response
// carries the `Mcp-Session-Id` header you must echo back on
// every subsequent call (tools/list, tools/call, ...). We use
// `-i` so the session-id is visible in the response, and ship
// the proper handshake body so a fresh paste actually works.
const body = JSON.stringify({
jsonrpc: "2.0",
id: 1,
method: "initialize",
params: {
protocolVersion: "2024-11-05",
capabilities: {},
clientInfo: { name: "beaver-admin-curl", version: "0" },
},
});
return [
"# 1) initialize — grab Mcp-Session-Id from the response headers",
"curl -N -i \\",
" -H " + escSh("Authorization: Bearer " + tok) + " \\",
" -H 'content-type: application/json' \\",
" -H 'accept: application/json, text/event-stream' \\",
" -d " + escSh(body) + " \\",
" " + url,
"",
"# 2) reuse that id on follow-up calls, e.g. tools/list:",
"# curl -N \\",
"# -H " + escSh("Authorization: Bearer " + tok) + " \\",
"# -H 'Mcp-Session-Id: <paste-from-step-1>' \\",
"# -H 'content-type: application/json' \\",
"# -H 'accept: application/json, text/event-stream' \\",
"# -d '{\"jsonrpc\":\"2.0\",\"id\":2,\"method\":\"tools/list\"}' \\",
"# " + url,
].join("\n");
}
return "";
}
function refreshScopeMarks() {
const have = selectedScope();
card.querySelectorAll(".ep-table").forEach((tbl) => {
const need = tbl.getAttribute("data-required-scope") || "";
const ok = scopeCovers(have, need);
tbl.querySelectorAll("tr.ep-row").forEach((row) => {
const warn = row.querySelector(".ep-scope-warn");
if (!ok) {
row.classList.add("scope-mismatch");
if (warn) warn.hidden = false;
} else {
row.classList.remove("scope-mismatch");
if (warn) warn.hidden = true;
}
});
});
}
function refreshOpenCurls() {
card.querySelectorAll("tr.ep-curl-row").forEach((cr) => {
if (cr.hidden) return;
const row = cr.previousElementSibling;
const pre = cr.querySelector(".ep-curl");
if (row && pre) pre.textContent = buildCurl(row);
});
}
function copyText(btn, text) {
const done = () => {
btn.setAttribute("data-copied", "1");
const prev = btn.textContent;
btn.textContent = "✓ copied";
setTimeout(() => {
btn.removeAttribute("data-copied");
btn.textContent = prev;
}, 1200);
};
if (navigator.clipboard && navigator.clipboard.writeText) {
navigator.clipboard.writeText(text).then(done, () => {
// Fallback for non-secure contexts.
fallbackCopy(text);
done();
});
} else {
fallbackCopy(text);
done();
}
}
function fallbackCopy(text) {
const ta = document.createElement("textarea");
ta.value = text;
ta.style.position = "fixed"; ta.style.opacity = "0";
document.body.appendChild(ta);
ta.select();
try { document.execCommand("copy"); } catch {}
document.body.removeChild(ta);
}
sel.addEventListener("change", () => {
const name = sel.value;
secret.placeholder = name
? "paste plaintext for '" + name + "' (we only store the hash)"
: "paste plaintext (we only store the Argon2 hash)";
refreshScopeMarks();
});
secret.addEventListener("input", refreshOpenCurls);
showCb.addEventListener("change", () => {
secret.type = showCb.checked ? "text" : "password";
});
card.addEventListener("click", (ev) => {
const btn = ev.target.closest("button[data-action]");
if (!btn) return;
const row = btn.closest("tr.ep-row");
if (!row) return;
const action = btn.getAttribute("data-action");
const url = row.getAttribute("data-url");
const tok = currentToken() || PLACEHOLDER;
if (action === "copy-url") {
copyText(btn, url);
} else if (action === "copy-url-token") {
const sep = url.indexOf("?") >= 0 ? "&" : "?";
copyText(btn, url + sep + "token=" + encodeURIComponent(tok));
} else if (action === "copy-curl") {
copyText(btn, buildCurl(row));
} else if (action === "toggle-curl") {
const curlRow = row.nextElementSibling;
if (!curlRow || !curlRow.classList.contains("ep-curl-row")) return;
const pre = curlRow.querySelector(".ep-curl");
if (curlRow.hidden) {
if (pre) pre.textContent = buildCurl(row);
curlRow.hidden = false;
btn.textContent = "▾ curl";
btn.setAttribute("aria-expanded", "true");
} else {
curlRow.hidden = true;
btn.textContent = "▸ curl";
btn.setAttribute("aria-expanded", "false");
}
}
});
refreshScopeMarks();
})();
</script>
{% endblock %}
@@ -0,0 +1,83 @@
<!doctype html>
<html lang="en">
<head>
<meta charset="utf-8">
<meta name="viewport" content="width=device-width, initial-scale=1">
<title>beaver-gateway · log in</title>
<style>
html, body { margin: 0; padding: 0; height: 100%; background: #fbfbfd; color: #1d1d1f; }
body {
font-family: -apple-system, "SF Pro Display", system-ui, sans-serif;
display: flex;
align-items: center;
justify-content: center;
min-height: 100vh;
}
.card {
background: #fff;
border: 1px solid #e5e5ea;
border-radius: 16px;
padding: 2rem 2.25rem;
width: min(360px, 92vw);
box-shadow: 0 10px 24px rgba(0,0,0,0.04);
}
h1 {
margin: 0 0 0.25rem;
font-size: 1.25rem;
font-weight: 600;
letter-spacing: -0.02em;
}
p.muted { color: #6e6e73; margin: 0 0 1.4rem; font-size: 0.92em; }
label { display: block; margin: 0.85rem 0 0.3rem; font-size: 0.82em; color: #6e6e73; }
input {
width: 100%;
padding: 0.6rem 0.75rem;
font-family: inherit;
font-size: 0.95em;
border: 1px solid #e5e5ea;
border-radius: 8px;
background: #fff;
box-sizing: border-box;
}
input:focus { outline: 2px solid #0071e3; outline-offset: 1px; }
button {
width: 100%;
margin-top: 1.25rem;
padding: 0.65rem;
font-family: inherit;
font-size: 0.95em;
background: #0071e3;
border: 1px solid #0071e3;
color: white;
border-radius: 8px;
cursor: pointer;
}
button:hover { background: #005bb5; }
.error {
background: #fff0f0;
border: 1px solid #f5c2c5;
color: #d70015;
padding: 0.65rem 0.85rem;
border-radius: 8px;
font-size: 0.9em;
margin-bottom: 1rem;
}
</style>
</head>
<body>
<div class="card">
<h1>beaver-gateway</h1>
<p class="muted">Sign in to manage tokens and view audit logs.</p>
{% if error %}
<div class="error">{{ error }}</div>
{% endif %}
<form method="post" action="/login">
<label for="username">Username</label>
<input id="username" name="username" type="text" autocomplete="username" required autofocus>
<label for="password">Password</label>
<input id="password" name="password" type="password" autocomplete="current-password" required>
<button type="submit">Sign in</button>
</form>
</div>
</body>
</html>
@@ -0,0 +1,64 @@
{% extends "_layout.html" %}
{% set active = "tokens" %}
{% block title %}beaver-gateway · tokens{% endblock %}
{% block content %}
<h2>Create token</h2>
<div class="card">
<p class="muted">Plaintext is shown <strong>once</strong>, immediately after creation. Copy it before you navigate away — the database only ever holds the Argon2 hash.</p>
<div id="token-create-result"></div>
<form
hx-post="/tokens"
hx-target="#token-create-result"
hx-swap="innerHTML"
hx-on::after-request="if(event.detail.successful){this.reset();}"
>
<input type="hidden" name="csrf_token" value="{{ csrf }}">
<div class="form-grid">
<div>
<label for="name">Name</label>
<input id="name" name="name" type="text" placeholder="cursor / claude-desktop / mobile …" required>
</div>
<div>
<label for="scope">Scope</label>
<select id="scope" name="scope">
{% for s in scopes %}
<option value="{{ s }}" {% if s == "*" %}selected{% endif %}>{{ s }}</option>
{% endfor %}
</select>
</div>
<div>
<button class="primary" type="submit">Create</button>
</div>
</div>
</form>
</div>
<h2>
Tokens
<span class="muted" style="font-size:0.8em; font-weight:400; margin-left:0.5rem;">
{% if include_revoked %}
<a href="/tokens">Hide revoked</a>
{% else %}
<a href="/tokens?include_revoked=1">Show revoked</a>
{% endif %}
</span>
</h2>
<div class="card">
<table>
<thead>
<tr>
<th>Name</th><th>Scope</th><th>Created</th><th>Last used</th><th>Revoked</th><th></th>
</tr>
</thead>
{# Render the tbody unconditionally so the HTMX OOB swap on
create has a target even when the table starts empty. #}
<tbody id="tokens-rows">
{% for token in tokens %}
{% include "_token_row.html" %}
{% else %}
<tr id="tokens-empty"><td colspan="6" class="muted">No tokens yet. Create one above.</td></tr>
{% endfor %}
</tbody>
</table>
</div>
{% endblock %}
+52 -16
View File
@@ -40,6 +40,7 @@ from anthropic.types import (
from fastapi import FastAPI, HTTPException, Request, status
from fastapi.responses import JSONResponse, StreamingResponse
from beaver_gateway.core import audit
from beaver_gateway.frontends.base import Frontend
if TYPE_CHECKING:
@@ -58,9 +59,24 @@ __all__ = ["AnthropicMessagesFrontend"]
class AnthropicMessagesFrontend(Frontend):
"""FastAPI app behind ``POST /v1/messages`` + ``GET /v1/models``."""
def __init__(self, *, host: str = "0.0.0.0", port: int = 8000) -> None: # noqa: S104
def __init__(
self,
*,
host: str = "0.0.0.0", # noqa: S104
port: int = 8000,
public_base_url: str | None = None,
) -> None:
self.host = host
self.port = port
# External URL prefix the reverse proxy (Caddy/nginx/Cloudflare/…)
# uses to reach this frontend, e.g. ``https://api.example.com/ai``.
# The frontend's internal paths (``/v1/messages``, ``/v1/models``)
# are appended to it when the admin dashboard renders copy-pastable
# URLs. Trailing slash is stripped so the concatenation is
# idempotent. ``None`` means "advertise raw ``host:port``" (dev /
# no proxy) — the dashboard then derives the base from the
# browser's own request hostname.
self.public_base_url = public_base_url.rstrip("/") if public_base_url else None
self._runtime: GatewayRuntime | None = None
self._app: FastAPI | None = None
@@ -93,7 +109,7 @@ class AnthropicMessagesFrontend(Frontend):
@app.get("/v1/models")
async def list_models(request: Request) -> dict[str, Any]:
_require_token(request, runtime)
await _require_token(request, runtime, scope="messages")
data = [
{
"type": "model",
@@ -107,7 +123,7 @@ class AnthropicMessagesFrontend(Frontend):
@app.post("/v1/messages")
async def create_message(request: Request) -> Any:
token_name = _require_token(request, runtime)
token_name = await _require_token(request, runtime, scope="messages")
try:
body = await request.json()
except json.JSONDecodeError as exc:
@@ -148,6 +164,18 @@ class AnthropicMessagesFrontend(Frontend):
stream_flag,
len(messages),
)
# Record at request acceptance, not at stream completion:
# a long streaming response can be aborted mid-flight by
# the client, and we still want the row in the audit trail.
# Detail stays small — no message bodies, no system prompt.
await audit.log(
runtime,
actor=f"token:{token_name}",
kind="messages",
agent_name=agent.name,
stream=stream_flag,
msgs=len(messages),
)
# Forward per-request knobs the Anthropic body may carry —
# backend adapters layer these over per-agent defaults. Only
@@ -166,37 +194,45 @@ class AnthropicMessagesFrontend(Frontend):
)
if stream_flag:
return StreamingResponse(
_sse(events), media_type="text/event-stream"
)
return StreamingResponse(_sse(events), media_type="text/event-stream")
message = await _accumulate(events, model=model)
return JSONResponse(content=message.model_dump(mode="json"))
return app
def _require_token(request: Request, runtime: GatewayRuntime) -> str:
"""Verify the request's bearer and return the token's audit name.
async def _require_token(
request: Request, runtime: GatewayRuntime, *, scope: str
) -> str:
"""Verify the request's bearer + scope, return the token's audit name.
Accepts both ``X-Api-Key: <token>`` (what the official Anthropic
SDK sends — LibreChat, the CLI, third-party clients) and
``Authorization: Bearer <token>`` (curl, Cursor). Raises 401 on
miss. ``TokenStore`` doesn't know about HTTP, so response shape
is owned here.
``Authorization: Bearer <token>`` (curl, Cursor). 401 on missing /
unknown token; 403 on a known token whose scope doesn't cover
``scope`` (Phase 4.3 — bootstrap tokens get ``"*"`` and pass
everything).
"""
api_key = request.headers.get("x-api-key")
name = (
runtime.token_store.verify(api_key)
identity = (
await runtime.token_store.verify(api_key)
if api_key
else runtime.token_store.verify_bearer(request.headers.get("authorization"))
else await runtime.token_store.verify_bearer(
request.headers.get("authorization")
)
if name is None:
)
if identity is None:
raise HTTPException(
status.HTTP_401_UNAUTHORIZED,
"invalid or missing bearer token",
headers={"WWW-Authenticate": "Bearer"},
)
return name
if not identity.allows(scope):
raise HTTPException(
status.HTTP_403_FORBIDDEN,
f"token scope {identity.scope!r} does not cover {scope!r}",
)
return identity.name
async def _sse(events: AsyncIterator[MessageStreamEvent]) -> AsyncIterator[bytes]:
+21 -1
View File
@@ -15,11 +15,12 @@ from dataclasses import dataclass, field
from typing import TYPE_CHECKING
if TYPE_CHECKING:
from collections.abc import Mapping
from collections.abc import Mapping, Sequence
from beaver_gateway.backends.base import Backend
from beaver_gateway.core.auth import TokenStore
from beaver_gateway.core.registry import AgentRegistry, McpRegistry
from beaver_gateway.storage import Database
@dataclass(frozen=True, slots=True)
@@ -35,13 +36,32 @@ class GatewayRuntime:
declared ``McpServer`` so ``ClaudeCodeBackendAdapter`` (Phase 2.2)
can pass them to ``BackendOptions.mcp_servers`` without re-running
discovery.
``db`` (Phase 4.1) is the shared :class:`Database` handle. Phase 4.2
will switch ``TokenStore`` to read from it; Phase 4.3 admin/audit
write through it. Phase 4.1 only attaches it — existing frontends
ignore it.
"""
agents: AgentRegistry
mcps: McpRegistry
backends: dict[str, Backend]
token_store: TokenStore
db: Database
mcp_internal_urls: Mapping[str, str] = field(default_factory=dict)
# Phase 4.3 — AdminFrontend reads creds + cookie-signing key from
# the runtime so the user's ``config.py`` doesn't have to know
# anything about env wiring. Defaulted to empty so existing tests /
# call sites that don't touch the admin path keep building; the
# admin frontend ``configure()`` itself rejects empty values.
admin_user: str = ""
admin_pass: str = ""
session_secret: str = ""
# The full sibling-frontends list, in declaration order. AdminFrontend
# uses it to advertise concrete bearer-endpoint URLs (host/port) on
# the dashboard so the operator can copy ready-to-use links / curl
# snippets. Other frontends ignore it.
frontends: Sequence[Frontend] = field(default_factory=tuple)
class Frontend(ABC):
+134 -49
View File
@@ -1,11 +1,18 @@
"""External MCP frontend (Phase 3.1).
A streamable-HTTP gateway in front of the internal MCP aggregator
(``beaver_gateway.mcp.internal_app``). The aggregator already hosts
every declared ``McpServer`` (``python_tool``, stdio proxy, HTTP proxy)
(``beaver_gateway.mcp.internal_app``). The aggregator hosts every
declared ``McpServer`` (``python_tool``, stdio proxy, HTTP proxy)
under ``/mcp/<name>`` plus a flat ``/mcp/all`` bundle on
``127.0.0.1:INTERNAL_MCP_PORT``. This frontend re-exposes those URLs to
external clients with three additions:
``127.0.0.1:INTERNAL_MCP_PORT`` — that's the *internal* shape.
This frontend re-exposes those namespaces on its own port directly at
``/<name>/`` (no ``/mcp/`` prefix in the external routes — the port
itself already disambiguates). Caddy / nginx / Cloudflare in front
typically strips a prefix back on: ``domain.com/mcp/* → :8001/*``,
controlled by the operator's reverse-proxy config and surfaced to the
admin dashboard via ``public_base_url``. Three additions on top of the
raw aggregator:
* **Bearer auth** — ``Authorization: Bearer <token>``, ``X-Api-Key``,
or ``?token=<…>`` query string. All three forms verify against the
@@ -39,6 +46,7 @@ from starlette.applications import Starlette
from starlette.responses import HTMLResponse, JSONResponse, StreamingResponse
from starlette.routing import Route
from beaver_gateway.core import audit
from beaver_gateway.frontends.base import Frontend
from beaver_gateway.mcp.internal_app import ALL_NAMESPACE
@@ -86,9 +94,30 @@ __all__ = ["McpServerFrontend"]
class McpServerFrontend(Frontend):
"""Auth + audit + reverse-proxy in front of the internal MCP aggregator."""
def __init__(self, *, host: str = "0.0.0.0", port: int = 8001) -> None: # noqa: S104
def __init__(
self,
*,
host: str = "0.0.0.0", # noqa: S104
port: int = 8001,
public_base_url: str | None = None,
) -> None:
self.host = host
self.port = port
# External URL prefix the reverse proxy uses to reach this
# frontend. Internal routes mount namespaces at the port root
# (``/<ns>/`` and ``/all/``) — Caddy / nginx / Cloudflare in
# front decides what external prefix they sit under. Typical
# symmetric setup:
#
# Caddy: handle_path /mcp/* { reverse_proxy localhost:8001 }
# config: public_base_url -> https://api.example.com/mcp
# dashboard advertises: https://api.example.com/mcp/<ns>/
#
# The frontend's ``/<ns>/`` segment gets appended verbatim. Set
# it to whatever matches your proxy. ``None`` means "advertise
# raw ``host:port`` derived from the inbound request" (dev /
# no proxy).
self.public_base_url = public_base_url.rstrip("/") if public_base_url else None
self._runtime: GatewayRuntime | None = None
self._app: Starlette | None = None
# Single shared aiohttp session, opened once when uvicorn starts
@@ -126,7 +155,7 @@ class McpServerFrontend(Frontend):
# Long sock_read because MCP tool calls can take a while
# (a claude tool over HTTP can easily stretch beyond 30s
# on a real tool).
timeout=aiohttp.ClientTimeout(total=None, sock_read=600),
timeout=aiohttp.ClientTimeout(total=None, sock_read=600)
)
try:
yield
@@ -138,16 +167,21 @@ class McpServerFrontend(Frontend):
routes = [
Route("/", self._discovery, methods=["GET"]),
Route("/healthz", self._healthz, methods=["GET"]),
# Two routes per namespace so both the trailing-slash and
# sub-path forms work (``/mcp/time`` AND ``/mcp/time/foo``).
# Starlette doesn't fold them into one route automatically.
# Namespaces mount at the root of this port — the port
# itself already disambiguates this from any other gateway
# surface. Two routes per namespace so both the
# trailing-slash and sub-path forms work (``/time`` AND
# ``/time/foo``); Starlette doesn't fold them into one
# route automatically. The literal routes above (``/``,
# ``/healthz``) are listed first and win the match, so
# they're not eaten by ``/{namespace}``.
Route(
"/mcp/{namespace}",
"/{namespace}",
self._proxy_endpoint,
methods=["GET", "POST", "DELETE", "OPTIONS"],
),
Route(
"/mcp/{namespace}/{path:path}",
"/{namespace}/{path:path}",
self._proxy_endpoint,
methods=["GET", "POST", "DELETE", "OPTIONS"],
),
@@ -159,17 +193,19 @@ class McpServerFrontend(Frontend):
async def _discovery(self, request: Request) -> HTMLResponse | JSONResponse:
runtime = self._require_runtime()
token_name = _verify_request(request, runtime)
if token_name is None:
return _unauthorized()
# Use the request's own scheme+host so the snippets work behind
# reverse proxies / tunnels. Falls back to the configured
# host:port if the client didn't send Host (curl --raw).
base = _external_base_url(request, self.host, self.port)
token_name, err = await _verify_request(request, runtime)
if err is not None:
return err
assert token_name is not None # noqa: S101 — narrow for ty
# ``public_base_url`` wins if configured — it's the operator's
# explicit statement of "this is the URL my reverse proxy puts
# in front of me". Otherwise: use the request's own scheme+host
# so snippets work behind generic reverse proxies / tunnels;
# and fall back to the configured host:port if the client
# didn't send Host (curl --raw).
base = self.public_base_url or _external_base_url(request, self.host, self.port)
html = _render_discovery_page(
base_url=base,
namespaces=list(runtime.mcps),
actor=token_name,
base_url=base, namespaces=list(runtime.mcps), actor=token_name
)
return HTMLResponse(html)
@@ -177,9 +213,10 @@ class McpServerFrontend(Frontend):
self, request: Request
) -> StreamingResponse | JSONResponse:
runtime = self._require_runtime()
token_name = _verify_request(request, runtime)
if token_name is None:
return _unauthorized()
token_name, err = await _verify_request(request, runtime)
if err is not None:
return err
assert token_name is not None # noqa: S101 — narrow for ty
namespace = request.path_params["namespace"]
subpath = request.path_params.get("path", "")
@@ -190,16 +227,21 @@ class McpServerFrontend(Frontend):
token_name,
namespace,
)
await audit.log(
runtime,
actor=f"token:{token_name}",
kind="mcp_call",
namespace=namespace,
method=request.method,
status=404,
)
return JSONResponse(
{"error": "unknown namespace", "namespace": namespace},
status_code=404,
{"error": "unknown namespace", "namespace": namespace}, status_code=404
)
if self._http is None:
# Lifespan hasn't run yet (shouldn't happen with uvicorn).
return JSONResponse(
{"error": "frontend not ready"}, status_code=503
)
return JSONResponse({"error": "frontend not ready"}, status_code=503)
return await _reverse_proxy(
client=self._http,
@@ -207,6 +249,7 @@ class McpServerFrontend(Frontend):
upstream_url=upstream_url,
namespace=namespace,
actor=token_name,
runtime=runtime,
)
def _upstream_url(self, namespace: str, subpath: str) -> str | None:
@@ -233,23 +276,38 @@ class McpServerFrontend(Frontend):
return self._runtime
def _verify_request(request: Request, runtime: GatewayRuntime) -> str | None:
_MCP_SCOPE = "mcp"
async def _verify_request(
request: Request, runtime: GatewayRuntime
) -> tuple[str | None, JSONResponse | None]:
"""Accept ``Authorization: Bearer``, ``X-Api-Key``, or ``?token=``.
The third form is the escape hatch for clients that can only put
secrets in the URL (claude.ai's MCP config historically did this).
All three roads end at the same :class:`TokenStore`.
All three roads end at the same :class:`TokenStore`. Returns
``(actor_name, None)`` on success, ``(None, 401|403)`` otherwise
— the caller forwards the response as-is. Splitting auth vs scope
failures matters: 401 says "send me a token", 403 says "this token
is real but not for this endpoint".
"""
api_key = request.headers.get("x-api-key")
if api_key:
return runtime.token_store.verify(api_key)
identity = await runtime.token_store.verify(api_key)
else:
auth_header = request.headers.get("authorization")
if auth_header:
return runtime.token_store.verify_bearer(auth_header)
identity = await runtime.token_store.verify_bearer(auth_header)
else:
qs_token = request.query_params.get("token")
if qs_token:
return runtime.token_store.verify(qs_token)
return None
identity = await runtime.token_store.verify(qs_token) if qs_token else None
if identity is None:
return None, _unauthorized()
if not identity.allows(_MCP_SCOPE):
return None, _forbidden(identity.scope, _MCP_SCOPE)
return identity.name, None
def _unauthorized() -> JSONResponse:
@@ -260,6 +318,13 @@ def _unauthorized() -> JSONResponse:
)
def _forbidden(scope: str, required: str) -> JSONResponse:
return JSONResponse(
{"error": "insufficient scope", "scope": scope, "required": required},
status_code=403,
)
def _join_subpath(base_url: str, subpath: str) -> str:
"""Concatenate the loopback URL with the proxied sub-path.
@@ -287,6 +352,7 @@ async def _reverse_proxy(
upstream_url: str,
namespace: str,
actor: str,
runtime: GatewayRuntime,
) -> StreamingResponse | JSONResponse:
"""Bidirectionally stream an MCP request between client ↔ internal aggregator.
@@ -325,9 +391,16 @@ async def _reverse_proxy(
namespace,
exc,
)
await audit.log(
runtime,
actor=f"token:{actor}",
kind="mcp_call",
namespace=namespace,
method=request.method,
status=502,
)
return JSONResponse(
{"error": "upstream MCP unreachable", "detail": str(exc)},
status_code=502,
{"error": "upstream MCP unreachable", "detail": str(exc)}, status_code=502
)
_log.info(
@@ -338,6 +411,17 @@ async def _reverse_proxy(
request.url.path,
upstream_resp.status,
)
# Audit at upstream-response time: status reflects the MCP call's
# outcome (200 / tool-error / 4xx). Streaming relay below may be
# cut short by the client, but the row is already in by then.
await audit.log(
runtime,
actor=f"token:{actor}",
kind="mcp_call",
namespace=namespace,
method=request.method,
status=upstream_resp.status,
)
response_headers = _response_headers(upstream_resp.headers)
@@ -397,29 +481,31 @@ def _scrub_query(query: str, *, drop: frozenset[str] | set[str]) -> str:
return urlencode(kept)
def _render_discovery_page(
*, base_url: str, namespaces: list[Any], actor: str
) -> str:
def _render_discovery_page(*, base_url: str, namespaces: list[Any], actor: str) -> str:
"""Render the auth-gated namespace + config-snippet page.
Inline HTML (no Jinja file) — keeps Phase 3 free of template-dir
plumbing that Phase 4's AdminFrontend will own.
"""
name_list = [getattr(ns, "name", str(ns)) for ns in namespaces]
rows = "\n".join(
rows = (
"\n".join(
f""" <tr>
<td><code>{_escape(name)}</code></td>
<td><code>{_escape(f"{base_url}/mcp/{name}/")}</code></td>
<td><code>{_escape(f"{base_url}/{name}/")}</code></td>
</tr>"""
for name in name_list
) or """ <tr><td colspan="2"><em>No MCP servers configured.</em></td></tr>"""
)
or """ \
<tr><td colspan="2"><em>No MCP servers configured.</em></td></tr>"""
)
cursor_snippet = _CURSOR_SNIPPET.format(base_url=base_url)
claude_desktop_snippet = _CLAUDE_DESKTOP_SNIPPET.format(base_url=base_url)
return _DISCOVERY_TEMPLATE.format(
actor=_escape(actor),
base_url=_escape(base_url),
rows=rows,
all_url=_escape(f"{base_url}/mcp/{ALL_NAMESPACE}/"),
all_url=_escape(f"{base_url}/{ALL_NAMESPACE}/"),
cursor_snippet=_escape(cursor_snippet),
claude_desktop_snippet=_escape(claude_desktop_snippet),
)
@@ -443,8 +529,7 @@ _DISCOVERY_TEMPLATE = "\n".join( # noqa: FLY002 — readability beats one-strin
" <title>beaver-gateway · MCP discovery</title>",
" <style>",
" body {{",
" font-family: -apple-system, 'SF Pro Display', system-ui,"
" sans-serif;",
" font-family: -apple-system, 'SF Pro Display', system-ui, sans-serif;",
" max-width: 880px;",
" margin: 3rem auto;",
" padding: 0 1.25rem;",
@@ -502,7 +587,7 @@ _DISCOVERY_TEMPLATE = "\n".join( # noqa: FLY002 — readability beats one-strin
_CURSOR_SNIPPET = """{{
"mcpServers": {{
"beaver-time": {{
"url": "{base_url}/mcp/time/",
"url": "{base_url}/time/",
"headers": {{ "Authorization": "Bearer <YOUR_TOKEN>" }}
}}
}}
@@ -513,7 +598,7 @@ _CLAUDE_DESKTOP_SNIPPET = """{{
"mcpServers": {{
"beaver-time": {{
"type": "http",
"url": "{base_url}/mcp/time/",
"url": "{base_url}/time/",
"headers": {{ "Authorization": "Bearer <YOUR_TOKEN>" }}
}}
}}
+40
View File
@@ -0,0 +1,40 @@
"""SQLModel-backed persistence (Phase 4.1).
The storage layer carries three tables — :class:`Token`, :class:`Session`,
:class:`AuditLog` — and a thin :class:`Database` wrapper around a sync
SQLAlchemy engine. Phase 4.2 (auth migration) and Phase 4.3 (admin UI)
build on this; Phase 4.1 itself only schemas the data and exposes the
``Database`` on :class:`GatewayRuntime` so later phases can reach it.
"""
from beaver_gateway.storage.db import (
Database,
append_audit,
close_session,
create_token,
list_active_tokens,
list_audit_records,
list_tokens,
revoke_token,
touch_session,
touch_token,
upsert_session,
)
from beaver_gateway.storage.models import AuditLog, Session, Token
__all__ = [
"AuditLog",
"Database",
"Session",
"Token",
"append_audit",
"close_session",
"create_token",
"list_active_tokens",
"list_audit_records",
"list_tokens",
"revoke_token",
"touch_session",
"touch_token",
"upsert_session",
]
+237
View File
@@ -0,0 +1,237 @@
"""Async ``Database`` wrapper + the bare-minimum CRUD helpers.
Async to match the rest of the stack (aiohttp, uvicorn, claude-code-api).
psycopg3 has native async support — ``postgresql+psycopg://...`` works
with ``create_async_engine`` directly. SQLite goes through ``aiosqlite``
(``sqlite+aiosqlite://...``); user-facing config still uses the plain
``sqlite:///`` form and we normalise the URL here, so nothing leaks into
``.env`` / docker-compose.
No repository layer (PLAN §4.1 explicitly waives it). Helpers take an
``AsyncSession`` so callers can batch operations into one transaction
(e.g. touch ``last_used_at`` + write an audit line on the same request).
"""
from __future__ import annotations
import json
from datetime import UTC, datetime
from typing import TYPE_CHECKING, Any
from sqlalchemy.ext.asyncio import create_async_engine
from sqlmodel import SQLModel, select
from sqlmodel.ext.asyncio.session import AsyncSession
from beaver_gateway.storage.models import AuditLog, Session, Token
if TYPE_CHECKING:
from collections.abc import Sequence
from sqlalchemy.ext.asyncio import AsyncEngine
def _utcnow() -> datetime:
return datetime.now(UTC)
def _to_async_url(url: str) -> str:
"""Promote the user-facing sync URL to its async driver variant.
Users write ``sqlite:///gw.db`` or ``postgresql://...`` in ``.env``;
we translate to ``sqlite+aiosqlite://`` / ``postgresql+psycopg://``
so they don't have to know which driver we use internally.
"""
if url.startswith(("sqlite+aiosqlite://", "postgresql+psycopg://")):
return url
if url.startswith("sqlite://"):
return "sqlite+aiosqlite://" + url[len("sqlite://") :]
if url.startswith("postgresql://"):
return "postgresql+psycopg://" + url[len("postgresql://") :]
if url.startswith("postgres://"):
return "postgresql+psycopg://" + url[len("postgres://") :]
return url
class Database:
"""Owner of the async SQLAlchemy engine.
Construct once in ``cli`` from ``settings.database_url``, ``await``
:meth:`create_all` at startup, hand the instance to
:class:`GatewayRuntime`. Frontends grab sessions via
:meth:`session` (an async context manager).
``echo=False`` keeps SQL out of INFO logs (admin UI is the
user-facing view); ``connect_args={"check_same_thread": False}``
isn't needed under async sqlite — aiosqlite already runs each
connection on a dedicated thread.
"""
__slots__ = ("_engine",)
def __init__(self, url: str) -> None:
self._engine: AsyncEngine = create_async_engine(_to_async_url(url), echo=False)
async def create_all(self) -> None:
"""Idempotent ``CREATE TABLE IF NOT EXISTS`` for every model."""
async with self._engine.begin() as conn:
await conn.run_sync(SQLModel.metadata.create_all)
def session(self) -> AsyncSession:
"""Open a fresh :class:`AsyncSession` (use as ``async with``).
Callers commit explicitly; we don't auto-commit on exit so a
request that fails mid-flight rolls back by simply not
committing.
"""
return AsyncSession(self._engine, expire_on_commit=False)
async def dispose(self) -> None:
"""Close the engine's connection pool. Idempotent."""
await self._engine.dispose()
# ---- Token CRUD ---------------------------------------------------------
async def list_active_tokens(session: AsyncSession) -> Sequence[Token]:
"""Return every non-revoked token (Phase 4.2 seeds the cache from this)."""
stmt = select(Token).where(Token.revoked_at.is_(None)) # ty: ignore[unresolved-attribute]
result = await session.exec(stmt)
return result.all()
async def list_tokens(
session: AsyncSession, *, include_revoked: bool = False
) -> Sequence[Token]:
"""Return tokens ordered newest-first (Phase 4.3 admin table)."""
stmt = select(Token).order_by(Token.created_at.desc()) # ty: ignore[unresolved-attribute]
if not include_revoked:
stmt = stmt.where(Token.revoked_at.is_(None)) # ty: ignore[unresolved-attribute]
result = await session.exec(stmt)
return result.all()
async def create_token(
session: AsyncSession, *, name: str, scope: str, hashed_value: str
) -> Token:
"""Persist a new token. Caller hashes the plaintext before passing it in."""
row = Token(name=name, scope=scope, hashed_value=hashed_value)
session.add(row)
await session.commit()
await session.refresh(row)
return row
async def revoke_token(session: AsyncSession, *, token_id: int) -> bool:
"""Mark a token revoked. Returns ``False`` if no such row."""
row = await session.get(Token, token_id)
if row is None or row.revoked_at is not None:
return False
row.revoked_at = _utcnow()
session.add(row)
await session.commit()
return True
async def touch_token(session: AsyncSession, *, token_id: int) -> None:
"""Bump ``last_used_at``. Phase 4.2 batches these — not per-request."""
row = await session.get(Token, token_id)
if row is None:
return
row.last_used_at = _utcnow()
session.add(row)
await session.commit()
# ---- Session bookkeeping ------------------------------------------------
async def upsert_session(
session: AsyncSession, *, session_id: str, agent_name: str, fingerprint: str
) -> Session:
"""Insert-or-bump a Session row. Agent/fingerprint never change for an id."""
row = await session.get(Session, session_id)
if row is None:
row = Session(id=session_id, agent_name=agent_name, fingerprint=fingerprint)
else:
row.last_active_at = _utcnow()
session.add(row)
await session.commit()
await session.refresh(row)
return row
async def touch_session(session: AsyncSession, *, session_id: str) -> None:
"""Bump ``last_active_at`` without changing fingerprint/agent."""
row = await session.get(Session, session_id)
if row is None:
return
row.last_active_at = _utcnow()
session.add(row)
await session.commit()
async def close_session(session: AsyncSession, *, session_id: str) -> bool:
"""Mark a session closed. Returns ``False`` if no such row."""
row = await session.get(Session, session_id)
if row is None or row.closed_at is not None:
return False
row.closed_at = _utcnow()
session.add(row)
await session.commit()
return True
# ---- Audit --------------------------------------------------------------
async def append_audit(
session: AsyncSession,
*,
actor: str,
kind: str,
agent_name: str | None = None,
detail: dict[str, Any] | None = None,
) -> AuditLog:
"""Append-only insert. ``detail`` JSON-serialised here, not by callers."""
row = AuditLog(
actor=actor,
kind=kind,
agent_name=agent_name,
detail_json=json.dumps(detail or {}, separators=(",", ":")),
)
session.add(row)
await session.commit()
await session.refresh(row)
return row
async def list_audit_records(
session: AsyncSession, *, limit: int = 50, before_id: int | None = None
) -> Sequence[AuditLog]:
"""Return audit entries newest-first, optionally paginated by id cursor.
``before_id`` is a forward-only cursor: pass the smallest id from the
current page to fetch the next slice. Cheap because ``id`` is the
primary key (ordered insert).
"""
stmt = select(AuditLog).order_by(AuditLog.id.desc()).limit(limit) # ty: ignore[unresolved-attribute]
if before_id is not None:
stmt = stmt.where(AuditLog.id < before_id) # ty: ignore[unsupported-operator]
result = await session.exec(stmt)
return result.all()
__all__ = [
"Database",
"append_audit",
"close_session",
"create_token",
"list_active_tokens",
"list_audit_records",
"list_tokens",
"revoke_token",
"touch_session",
"touch_token",
"upsert_session",
]
+80
View File
@@ -0,0 +1,80 @@
"""SQLModel tables — see PRD §9.
Three tables, all flat, no relationships modelled yet (Phase 4 talks
about ``actor`` and ``agent_name`` as strings — joining audit→token by
name is fine at this volume; we'll introduce FKs when the admin UI
actually demands them).
Datetimes are stored UTC; we set ``default_factory`` rather than relying
on DB defaults so SQLite + Postgres behave identically. Every row that
needs an id uses ``Optional[int]`` so SQLAlchemy can autoincrement.
"""
from __future__ import annotations
from datetime import UTC, datetime
from sqlmodel import Field, SQLModel
def _utcnow() -> datetime:
return datetime.now(UTC)
class Token(SQLModel, table=True):
"""Bearer token issued to an external caller.
``hashed_value`` holds the Argon2 hash (Phase 4.2 — until then,
rows are written by tests / the admin UI, not by ``TokenStore``).
Plaintext is shown to the user **once** at creation and then
discarded.
"""
id: int | None = Field(default=None, primary_key=True)
name: str = Field(index=True, unique=True)
scope: str = Field(default="*")
hashed_value: str
created_at: datetime = Field(default_factory=_utcnow)
last_used_at: datetime | None = Field(default=None)
revoked_at: datetime | None = Field(default=None)
class Session(SQLModel, table=True):
"""Mirror of one live ``claude-code-api`` session.
The id is the ``session_id`` claude itself assigns on the first
turn; we don't generate it. Rows here are for admin observability
(live count, last activity) — the actual pool lives in
``claude_code_api.ClaudeCodeBackend`` and is the source of truth.
"""
id: str = Field(primary_key=True)
agent_name: str = Field(index=True)
fingerprint: str = Field(index=True)
created_at: datetime = Field(default_factory=_utcnow)
last_active_at: datetime = Field(default_factory=_utcnow)
closed_at: datetime | None = Field(default=None)
class AuditLog(SQLModel, table=True):
"""Append-only record of who-did-what.
``actor`` is ``"token:<name>"`` for inbound traffic or
``"admin:<user>"`` for admin-UI actions. ``kind`` is a short tag
(``"messages"`` / ``"mcp_call"`` / ``"token_create"`` / …);
free-form rather than enum so we can add new kinds without a
schema migration. ``detail_json`` is a JSON-encoded blob — keep
it small (paths, method, status), not full bodies.
"""
__tablename__ = "audit_log"
id: int | None = Field(default=None, primary_key=True)
ts: datetime = Field(default_factory=_utcnow, index=True)
actor: str = Field(index=True)
kind: str = Field(index=True)
agent_name: str | None = Field(default=None, index=True)
detail_json: str = Field(default="{}")
__all__ = ["AuditLog", "Session", "Token"]
Generated
+13
View File
@@ -124,6 +124,15 @@ wheels = [
{ url = "https://files.pythonhosted.org/packages/fb/76/641ae371508676492379f16e2fa48f4e2c11741bd63c48be4b12a6b09cba/aiosignal-1.4.0-py3-none-any.whl", hash = "sha256:053243f8b92b990551949e63930a839ff0cf0b0ebbe0597b0f3fb19e1a0fe82e", size = 7490, upload-time = "2025-07-03T22:54:42.156Z" },
]
[[package]]
name = "aiosqlite"
version = "0.22.1"
source = { registry = "https://pypi.org/simple" }
sdist = { url = "https://files.pythonhosted.org/packages/4e/8a/64761f4005f17809769d23e518d915db74e6310474e733e3593cfc854ef1/aiosqlite-0.22.1.tar.gz", hash = "sha256:043e0bd78d32888c0a9ca90fc788b38796843360c855a7262a532813133a0650", size = 14821, upload-time = "2025-12-23T19:25:43.997Z" }
wheels = [
{ url = "https://files.pythonhosted.org/packages/00/b7/e3bf5133d697a08128598c8d0abc5e16377b51465a33756de24fa7dee953/aiosqlite-0.22.1-py3-none-any.whl", hash = "sha256:21c002eb13823fad740196c5a2e9d8e62f6243bd9e7e4a1f87fb5e44ecb4fceb", size = 17405, upload-time = "2025-12-23T19:25:42.139Z" },
]
[[package]]
name = "annotated-doc"
version = "0.0.4"
@@ -253,11 +262,13 @@ version = "0.1.0"
source = { editable = "." }
dependencies = [
{ name = "aiohttp" },
{ name = "aiosqlite" },
{ name = "anthropic" },
{ name = "anyio" },
{ name = "argon2-cffi" },
{ name = "fastapi" },
{ name = "fastmcp" },
{ name = "greenlet" },
{ name = "itsdangerous" },
{ name = "jinja2" },
{ name = "psycopg", extra = ["binary"] },
@@ -290,6 +301,7 @@ dev = [
[package.metadata]
requires-dist = [
{ name = "aiohttp", specifier = ">=3.13.5" },
{ name = "aiosqlite", specifier = ">=0.22.1" },
{ name = "anthropic", specifier = ">=0.103.0" },
{ name = "anyio", specifier = ">=4.13.0" },
{ name = "argon2-cffi", specifier = ">=25.1.0" },
@@ -297,6 +309,7 @@ requires-dist = [
{ name = "claude-code-api", marker = "extra == 'prod'", git = "https://git.kotikot.com/beaver/claude-code-api.git" },
{ name = "fastapi", specifier = ">=0.136.1" },
{ name = "fastmcp", specifier = ">=3.3.1" },
{ name = "greenlet", specifier = ">=3.5.0" },
{ name = "itsdangerous", specifier = ">=2.2.0" },
{ name = "jinja2", specifier = ">=3.1.6" },
{ name = "psycopg", extras = ["binary"], specifier = ">=3.3.4" },