feat: add admin panel

2026-05-20 13:00:08 +02:00
parent 7970d4be9b
commit 0128191ac3
26 changed files with 2985 additions and 115 deletions
@@ -17,3 +17,4 @@ docs/

 # Local env
 .env
+db.db
@@ -12,6 +12,7 @@ from beaver_gateway.agents.base import ExposedMcp
 from beaver_gateway.agents.claude import ClaudeAgent
 from beaver_gateway.agents.raycast import RaycastAgent, RemoteTool, UserPreferences
 from beaver_gateway.core.registry import Gateway
+from beaver_gateway.frontends.admin import AdminFrontend
 from beaver_gateway.frontends.anthropic import AnthropicMessagesFrontend
 from beaver_gateway.frontends.mcp_server import McpServerFrontend
 from beaver_gateway.mcp.types import McpServer
@@ -28,6 +29,7 @@ def current_time() -> str:

    return datetime.now().astimezone().isoformat()

+
 gateway = Gateway(
    agents=[
        # Phase 2.2 — ClaudeCodeBackendAdapter routes this agent's
@@ -98,7 +100,7 @@ gateway = Gateway(
        # ClaudeCode adapter forwards that URL into
        # ``BackendOptions.mcp_servers``. Phase 3's ``McpServerFrontend``
        # reverse-proxies the same internal URL out to external clients.
-        McpServer.python_tool(name="time", tools=[current_time]),
+        McpServer.python_tool(name="time", tools=[current_time])
        # Phase 3 — illustrates the ``lenient`` flag. Real-world stdio MCPs
        # sometimes print "Processing..." or other chatter to stdout before
        # their actual JSON-RPC frames; the default mcp client forwards
@@ -124,12 +126,41 @@ gateway = Gateway(
        # Phase 1.4 — expose the agents as `model=<name>` on an
        # Anthropic-compatible Messages endpoint. Auth comes from
        # `BOOTSTRAP_TOKENS` in the env (`name1:value1,name2:value2`).
+        #
+        # Behind a reverse proxy (Caddy / nginx / Cloudflare) pass
+        # `public_base_url=` so the admin dashboard advertises the
+        # outside URL instead of `host:port`. Caddy strips its own
+        # prefix and the frontend's internal paths (`/v1/messages`,
+        # `/v1/models`) get appended:
+        #   Caddy:  handle_path /ai/*  { reverse_proxy localhost:8000 }
+        #   Config: AnthropicMessagesFrontend(
+        #               port=8000,
+        #               public_base_url="https://domain.com/ai")
+        #   Result: https://domain.com/ai/v1/messages
        AnthropicMessagesFrontend(host="0.0.0.0", port=8000),
        # Phase 3 — re-exposes every declared `McpServer` outside the
-        # gateway with bearer auth + audit log. Per-namespace endpoints
-        # at `/mcp/<name>/`; flat bundle at `/mcp/all/`. Discovery page
-        # (HTML, auth-gated) at `/` with copy-pastable Cursor / Claude
+        # gateway with bearer auth + audit log. Each namespace lives
+        # at `/<name>/` on this port (the port itself disambiguates
+        # MCP traffic — no extra `/mcp` segment in the route); a flat
+        # bundle is published at `/all/`. Discovery page (HTML,
+        # auth-gated) at `/` with copy-pastable Cursor / Claude
        # Desktop snippets. Auth re-uses `BOOTSTRAP_TOKENS`.
+        #
+        # Same `public_base_url=` knob as above. Caddy strips its
+        # prefix; the frontend's `/<name>/` segment gets appended:
+        #   Caddy:  handle_path /mcp/* { reverse_proxy localhost:8001 }
+        #   Config: McpServerFrontend(
+        #               port=8001,
+        #               public_base_url="https://domain.com/mcp")
+        #   Result: https://domain.com/mcp/<name>/  (and /mcp/all/)
        McpServerFrontend(host="0.0.0.0", port=8001),
+        # Phase 4.3 — browser admin UI. Creds come from
+        # `ADMIN_USER`/`ADMIN_PASS`; the session cookie is signed with
+        # `SESSION_SECRET`. Use it to mint tokens (Argon2-hashed in
+        # the DB), revoke them, and watch the audit log. Scope is
+        # enforced on the bearer frontends: tokens minted with scope
+        # `messages` only work on `/v1/messages`; `mcp` only on
+        # `/mcp/<name>`; `*` works everywhere.
+        AdminFrontend(host="0.0.0.0", port=8002),
    ],
 )
@@ -24,6 +24,11 @@ services:
      # config.py declares one, so set these (or remove the agent)
      # before exposing port 8000.
    ports:
+      # /v1/messages frontend
      - "8000:8000"
+      # MCP server frontend
+      - "8001:8001"
+      # Admin UI (Phase 4.3) — change ADMIN_USER/ADMIN_PASS/SESSION_SECRET
+      - "8002:8002"
    volumes:
      - ./config.py:/config/config.py:ro
@@ -8,11 +8,13 @@ authors = [
 requires-python = ">=3.13"
 dependencies = [
    "aiohttp>=3.13.5",
+    "aiosqlite>=0.22.1",
    "anthropic>=0.103.0",
    "anyio>=4.13.0",
    "argon2-cffi>=25.1.0",
    "fastapi>=0.136.1",
    "fastmcp>=3.3.1",
+    "greenlet>=3.5.0",
    "itsdangerous>=2.2.0",
    "jinja2>=3.1.6",
    "psycopg[binary]>=3.3.4",
@@ -0,0 +1,10 @@
+{
+  "venvPath": ".",
+  "venv": ".venv",
+  "pythonVersion": "3.13",
+  "include": ["src"],
+  "exclude": [".venv", "**/__pycache__", ".pytest_cache"],
+  "extraPaths": ["."],
+  "reportMissingImports": "error",
+  "reportMissingTypeStubs": "none"
+}
@@ -40,6 +40,7 @@ from beaver_gateway.core.registry import AgentRegistry, McpRegistry
 from beaver_gateway.frontends.base import GatewayRuntime
 from beaver_gateway.mcp.internal_app import build_internal_app
 from beaver_gateway.settings import Settings
+from beaver_gateway.storage import Database

 if TYPE_CHECKING:
    from starlette.applications import Starlette
@@ -65,9 +66,24 @@ async def _async_main() -> None:

    agents = AgentRegistry(gateway.agents)
    mcps = McpRegistry(gateway.mcps)
-    token_store = TokenStore.from_env(settings.bootstrap_tokens)
+
+    # Phase 4.1 — open the async DB and run create_all once. Engine
+    # pool is process-wide; ``dispose()`` after the TaskGroup unwinds.
+    db = Database(settings.database_url)
+    await db.create_all()
+
+    # Phase 4.2 — TokenStore now reads from the DB (in-memory cache
+    # primed at start, TTL-refreshed, last_used_at flushed by a
+    # background task). BOOTSTRAP_TOKENS layers on top so first-run /
+    # examples still work without DB writes.
+    token_store = TokenStore(
+        db, bootstrap=TokenStore.parse_bootstrap(settings.bootstrap_tokens)
+    )

    async with AsyncExitStack() as stack:
+        stack.push_async_callback(db.dispose)
+        await token_store.start()
+        stack.push_async_callback(token_store.stop)
        # Internal MCP URLs must exist before we construct any
        # ClaudeCodeBackendAdapter — adapters bake the URLs into their
        # ``BackendOptions.mcp_servers`` at construction time.
@@ -87,7 +103,12 @@ async def _async_main() -> None:
            mcps=mcps,
            backends=backends,
            token_store=token_store,
+            db=db,
            mcp_internal_urls=internal_urls,
+            admin_user=settings.admin_user,
+            admin_pass=settings.admin_pass,
+            session_secret=settings.session_secret,
+            frontends=tuple(gateway.frontends),
        )

        for fe in gateway.frontends:
@@ -115,9 +136,7 @@ async def _async_main() -> None:

        async with asyncio.TaskGroup() as tg:
            if internal_app is not None:
-                tg.create_task(
-                    _serve_internal_mcp(internal_app, settings=settings)
-                )
+                tg.create_task(_serve_internal_mcp(internal_app, settings=settings))
            for fe in gateway.frontends:
                tg.create_task(fe.serve())

@@ -0,0 +1,93 @@
+"""Single entry point for writing :class:`AuditLog` rows.
+
+Every frontend ends up needing the same three-line pattern — open a DB
+session, append a row, swallow failures so the user-visible request
+still succeeds. Phase 4.3 inlined that pattern in the admin frontend
+under a private ``_audit()`` helper; Phase 4.4 lifts it here so the
+Messages and MCP frontends can call the same function and so the
+swallow-and-log policy lives in one place.
+
+The contract:
+
+* ``log(runtime, actor=..., kind=...)`` is fire-and-forget. It awaits
+  the DB write (so callers can ``await`` it before responding and get
+  ordering), but never raises — if the audit insert fails, the function
+  emits an ``exception`` log line and returns.
+* ``actor`` is a free-form string. By convention: ``"token:<name>"``
+  for bearer-authenticated traffic, ``"admin:<user>"`` for admin-UI
+  actions, ``"anon"`` for failed-auth paths we still want to record.
+* ``kind`` is a short tag — see :data:`KNOWN_KINDS` for the set the
+  current frontends emit; new tags don't need a code change here, the
+  column is free-form.
+* ``**detail`` is JSON-serialised by :func:`append_audit`. Keep it
+  small: paths, methods, status codes — not request bodies. Anything
+  passed here lands in ``AuditLog.detail_json`` verbatim.
+
+Why a thin wrapper rather than ``append_audit`` directly: callers want
+"write if you can, otherwise carry on", and pulling the try/except into
+every frontend was already starting to drift (admin had it, bearer
+frontends would have copy-pasted). One module, one policy.
+"""
+
+from __future__ import annotations
+
+import logging
+from typing import TYPE_CHECKING, Any
+
+from beaver_gateway.storage import append_audit
+
+if TYPE_CHECKING:
+    from beaver_gateway.frontends.base import GatewayRuntime
+
+
+_log = logging.getLogger("beaver_gateway.audit")
+
+
+# Tags currently emitted by the gateway. The set is informational —
+# ``AuditLog.kind`` is free-form so new code can introduce new tags
+# without touching this list — but listing them here gives the admin UI
+# and any downstream log consumers one canonical reference.
+KNOWN_KINDS: frozenset[str] = frozenset(
+    {
+        "messages",  # POST /v1/messages accepted
+        "mcp_call",  # /mcp/<ns>/... proxied
+        "login_ok",
+        "login_failed",
+        "logout",
+        "token_create",
+        "token_revoke",
+    }
+)
+
+
+async def log(
+    runtime: GatewayRuntime,
+    *,
+    actor: str,
+    kind: str,
+    agent_name: str | None = None,
+    **detail: Any,
+) -> None:
+    """Best-effort audit insert. Never raises.
+
+    Opens its own short-lived :class:`AsyncSession` so callers don't
+    have to thread one through. If the DB hiccups (table missing,
+    disk full, connection drop), we log and move on — the audit trail
+    is observability, not a hard precondition for serving the request.
+    """
+    try:
+        async with runtime.db.session() as session:
+            await append_audit(
+                session,
+                actor=actor,
+                kind=kind,
+                agent_name=agent_name,
+                detail=detail or None,
+            )
+    except Exception:  # noqa: BLE001
+        _log.exception(
+            "audit write failed: actor=%s kind=%s agent=%s", actor, kind, agent_name
+        )
+
+
+__all__ = ["KNOWN_KINDS", "log"]
@@ -1,63 +1,195 @@
-"""Bearer-token verification (Phase 1.3 — in-memory only).
+"""Bearer-token verification (Phase 4.2 — DB-backed with in-memory cache).

-Phase 4 will replace this with a DB-backed store (PRD §8 ``tokens``
-table, Argon2 hashes, scopes, ``last_used_at`` batching). Until then,
-frontends authenticate callers against an in-memory ``{value: name}``
-dict seeded from the ``BOOTSTRAP_TOKENS`` env var.
+The store is fed by two sources:

-Format::
+1. **DB** (``Token`` table from Phase 4.1) — the primary source. Rows
+   carry Argon2id hashes; the admin UI (Phase 4.3) will be the only
+   writer at steady state.
+2. **`BOOTSTRAP_TOKENS`** env — a name→plaintext map kept around for
+   first-run, disaster-recovery, and ``examples/`` smoke tests. These
+   entries live alongside DB rows in the cache and are never persisted.

-    BOOTSTRAP_TOKENS=cursor:s3cret,laptop:hunter2
+Hot path is in-memory: at :meth:`start` we pull every non-revoked DB
+row and stash it in a list; subsequent :meth:`verify` calls re-load
+when the cache is older than ``ttl_seconds``. ``last_used_at`` updates
+are coalesced into a small dict and flushed by a background task every
+``flush_interval`` seconds — one transaction per flush rather than one
+per request.

-The *name* side is for audit lines — :py:meth:`TokenStore.verify`
-returns it on hit so callers can attribute the request without
-exposing the raw secret. ``None`` means "no such token"; callers
-turn that into 401.
+We can't index DB rows by a derived plaintext key because Argon2 salts
+are random — so verify does a linear scan over candidates, calling
+``argon2.PasswordHasher.verify`` on each. N is small by design (single
+operator, ~10 tokens at most); the cost is irrelevant. The scan runs
+through ``asyncio.to_thread`` to keep the event loop free of the ~50ms
+KDF block.

-This module deliberately knows nothing about HTTP frameworks — it
-takes a raw token (or the verbatim ``Authorization`` header value)
-and returns a name-or-None. Frontends own the response shape.
+The module knows nothing about HTTP frameworks. It takes a raw token
+(or a verbatim ``Authorization`` header value) and returns a
+:class:`TokenIdentity` (name + scope + db-id), or ``None`` for a miss.
+Frontends own the 401 response shape.
 """

 from __future__ import annotations

+import asyncio
+import contextlib
+import hmac
+import logging
+import time
+from dataclasses import dataclass
+from datetime import UTC, datetime
 from typing import TYPE_CHECKING

+from argon2 import PasswordHasher
+from argon2.exceptions import InvalidHashError, VerifyMismatchError
+
+from beaver_gateway.storage import list_active_tokens, touch_token
+
 if TYPE_CHECKING:
    from collections.abc import Mapping

+    from beaver_gateway.storage import Database
+
+
+_log = logging.getLogger("beaver_gateway.auth")
+
+_BOOTSTRAP_SCOPE = "*"
+

 class TokenStoreError(ValueError):
-    """Malformed ``BOOTSTRAP_TOKENS`` value."""
+    """Malformed ``BOOTSTRAP_TOKENS`` value or duplicate token."""
+
+
+VALID_SCOPES: frozenset[str] = frozenset({"*", "messages", "mcp", "admin"})
+"""The scopes a ``Token.scope`` may hold (Phase 4.3 admin UI enforces).
+
+* ``*`` — wildcard, may use any frontend
+* ``messages`` — Anthropic Messages frontend only
+* ``mcp`` — MCP server frontend only
+* ``admin`` — reserved for programmatic admin access; the AdminFrontend
+  itself authenticates via session cookies, not bearer tokens, so this
+  scope is unused today and kept for forward compatibility.
+"""
+
+
+@dataclass(frozen=True, slots=True)
+class TokenIdentity:
+    """What :meth:`TokenStore.verify` resolves to on success.
+
+    ``token_id`` is the DB row id for persisted tokens, or ``None`` for
+    an env-bootstrap match (those have no DB row to touch). ``scope``
+    gates which frontend the token may hit (see :data:`VALID_SCOPES`);
+    bootstrap tokens implicitly get ``"*"``.
+    """
+
+    name: str
+    scope: str
+    token_id: int | None
+
+    def allows(self, required: str) -> bool:
+        """``True`` when this identity may access a route gated by ``required``.
+
+        ``"*"`` is the wildcard; an exact match satisfies a single scope.
+        Unknown ``required`` values intentionally fall through to a
+        strict equality check — callers should pass one of
+        :data:`VALID_SCOPES`.
+        """
+        return self.scope in ("*", required)
+
+
+@dataclass(frozen=True, slots=True)
+class _CachedToken:
+    """One non-revoked row, copied out of the DB into the hot-path cache."""
+
+    id: int
+    name: str
+    scope: str
+    hashed_value: str
+
+
+# Default Argon2id parameters from ``argon2-cffi`` are fine for our scope.
+# They target ~50ms on a modern CPU — enough to make a stolen-hash brute
+# force expensive, cheap enough to verify a handful per request.
+_HASHER = PasswordHasher()
+
+
+def hash_token(plaintext: str) -> str:
+    """Return an Argon2id hash for ``plaintext`` (admin / seed-only path).
+
+    Phase 4.3 will call this when the admin creates a token; Phase 4.2
+    exposes it so smoke scripts can seed the DB without re-implementing
+    the same line.
+    """
+    return _HASHER.hash(plaintext)


 class TokenStore:
-    """Constant-time-ish bearer verification over a static name→value map.
+    """DB-backed verifier with in-memory cache + TTL + batched touches.

-    The store is keyed by value internally (``verify`` lookup is O(1))
-    but constructed from a name→value mapping because that's how the
-    user thinks about it — one human-readable label per caller.
+    Construct in ``cli.main`` after :class:`Database` is up, then
+    ``await store.start()`` to prime the cache and spin up the flusher
+    task. ``await store.stop()`` on shutdown drains the touch queue.
+
+    Bootstrap entries (from ``BOOTSTRAP_TOKENS``) sit alongside DB rows
+    in the same lookup path; we check them first, in constant time, so
+    they remain usable even if the DB is unreachable. They never appear
+    in ``last_used_at`` flushes because they have no DB row.
    """

-    __slots__ = ("_by_value",)
+    __slots__ = (
+        "_bootstrap_by_value",
+        "_bootstrap_scopes",
+        "_cache",
+        "_db",
+        "_flush_interval",
+        "_flusher_task",
+        "_loaded_at",
+        "_lock",
+        "_touch_queue",
+        "_ttl",
+    )

-    def __init__(self, tokens: Mapping[str, str]) -> None:
+    def __init__(
+        self,
+        db: Database | None = None,
+        *,
+        bootstrap: Mapping[str, str] | None = None,
+        ttl_seconds: float = 30.0,
+        flush_interval: float = 5.0,
+    ) -> None:
+        # Bootstrap is keyed by value internally so verify is O(1) over
+        # plaintext. Each value also keeps its name for audit lines.
        by_value: dict[str, str] = {}
-        for name, value in tokens.items():
+        for name, value in (bootstrap or {}).items():
            if not name or not value:
-                msg = f"empty name or value in token map (name={name!r})"
+                msg = f"empty name or value in bootstrap map (name={name!r})"
                raise TokenStoreError(msg)
            if value in by_value:
                msg = (
-                    f"duplicate token value for names "
+                    f"duplicate bootstrap token value for names "
                    f"{by_value[value]!r} and {name!r}"
                )
                raise TokenStoreError(msg)
            by_value[value] = name
-        self._by_value = by_value
+        self._bootstrap_by_value: dict[str, str] = by_value
+        self._bootstrap_scopes: dict[str, str] = dict.fromkeys(
+            by_value.values(), _BOOTSTRAP_SCOPE
+        )

-    @classmethod
-    def from_env(cls, raw: str) -> TokenStore:
+        self._db = db
+        self._ttl = ttl_seconds
+        self._flush_interval = flush_interval
+
+        self._cache: list[_CachedToken] = []
+        self._loaded_at: float = 0.0
+        self._lock = asyncio.Lock()
+        self._touch_queue: dict[int, datetime] = {}
+        self._flusher_task: asyncio.Task[None] | None = None
+
+    # ---- bootstrap parsing (kept for `cli` / tests) ---------------------
+
+    @staticmethod
+    def parse_bootstrap(raw: str) -> dict[str, str]:
        """Parse ``name1:value1,name2:value2`` (the ``BOOTSTRAP_TOKENS`` form)."""
        tokens: dict[str, str] = {}
        for chunk in raw.split(","):
@@ -73,15 +205,82 @@ class TokenStore:
                msg = f"duplicate token name: {name!r}"
                raise TokenStoreError(msg)
            tokens[name] = value
-        return cls(tokens)
+        return tokens

-    def verify(self, token: str | None) -> str | None:
-        """Return the token's name if known, else ``None``."""
+    @classmethod
+    def from_env(cls, raw: str, db: Database | None = None) -> TokenStore:
+        """Legacy entrypoint: bootstrap-only (or bootstrap + db).
+
+        Phase 1.3 call sites still expect a one-liner; we keep the
+        classmethod so they don't have to learn the new constructor.
+        """
+        return cls(db, bootstrap=cls.parse_bootstrap(raw))
+
+    # ---- lifecycle ------------------------------------------------------
+
+    async def start(self) -> None:
+        """Prime the cache and (if a DB is attached) start the flusher loop."""
+        await self._refresh()
+        if self._db is not None:
+            self._flusher_task = asyncio.create_task(
+                self._flusher_loop(), name="beaver-gateway.token-flusher"
+            )
+
+    async def stop(self) -> None:
+        """Cancel the flusher and run one final drain."""
+        if self._flusher_task is not None:
+            self._flusher_task.cancel()
+            with contextlib.suppress(asyncio.CancelledError):
+                await self._flusher_task
+            self._flusher_task = None
+        await self._flush_now()
+
+    async def invalidate(self) -> None:
+        """Force the next verify to re-read from DB (Phase 4.3 admin hook)."""
+        self._loaded_at = 0.0
+
+    # ---- verify path ----------------------------------------------------
+
+    async def verify(self, token: str | None) -> TokenIdentity | None:
+        """Return the matching identity, or ``None`` for unknown/empty tokens."""
        if not token:
            return None
-        return self._by_value.get(token)

-    def verify_bearer(self, authorization: str | None) -> str | None:
+        # Bootstrap first: constant-time compare per entry, never hits DB.
+        # `compare_digest` is overkill for a name→value lookup but cheap
+        # and removes one timing variable for free.
+        for value, name in self._bootstrap_by_value.items():
+            if hmac.compare_digest(token, value):
+                return TokenIdentity(
+                    name=name,
+                    scope=self._bootstrap_scopes.get(name, _BOOTSTRAP_SCOPE),
+                    token_id=None,
+                )
+
+        if self._db is None:
+            return None
+
+        await self._ensure_fresh()
+
+        # Snapshot the cache reference so a refresh mid-scan doesn't
+        # surprise us. List itself is immutable per refresh (we swap,
+        # not mutate).
+        cache = self._cache
+        for entry in cache:
+            try:
+                await asyncio.to_thread(_HASHER.verify, entry.hashed_value, token)
+            except VerifyMismatchError:
+                continue
+            except InvalidHashError:
+                _log.warning(
+                    "token row %d has an unparseable hash — skipping", entry.id
+                )
+                continue
+            self._touch_queue[entry.id] = datetime.now(UTC)
+            return TokenIdentity(name=entry.name, scope=entry.scope, token_id=entry.id)
+        return None
+
+    async def verify_bearer(self, authorization: str | None) -> TokenIdentity | None:
        """Strip the ``Bearer`` prefix (case-insensitive) then verify.

        Accepts a bare token too — Cursor's MCP transport sometimes
@@ -93,13 +292,83 @@ class TokenStore:
            return None
        head, sep, rest = authorization.partition(" ")
        token = rest.strip() if sep and head.lower() == "bearer" else authorization
-        return self.verify(token)
+        return await self.verify(token)

    def __len__(self) -> int:
-        return len(self._by_value)
+        return len(self._cache) + len(self._bootstrap_by_value)

    def __bool__(self) -> bool:
-        return bool(self._by_value)
+        return bool(self._cache) or bool(self._bootstrap_by_value)
+
+    # ---- internals ------------------------------------------------------
+
+    async def _ensure_fresh(self) -> None:
+        if self._db is None:
+            return
+        now = time.monotonic()
+        if now - self._loaded_at <= self._ttl:
+            return
+        async with self._lock:
+            # Re-check under the lock — first arrival reloaded, others
+            # should fall through.
+            now = time.monotonic()
+            if now - self._loaded_at <= self._ttl:
+                return
+            await self._refresh()
+
+    async def _refresh(self) -> None:
+        if self._db is None:
+            self._loaded_at = time.monotonic()
+            return
+        async with self._db.session() as session:
+            rows = await list_active_tokens(session)
+        next_cache: list[_CachedToken] = []
+        for row in rows:
+            if row.id is None:
+                # Defensive: SQLModel will assign an id on insert; a
+                # None here would mean someone handed us an unsaved row.
+                continue
+            next_cache.append(
+                _CachedToken(
+                    id=row.id,
+                    name=row.name,
+                    scope=row.scope,
+                    hashed_value=row.hashed_value,
+                )
+            )
+        self._cache = next_cache
+        self._loaded_at = time.monotonic()
+        _log.debug("token cache refreshed: %d active row(s)", len(next_cache))
+
+    async def _flusher_loop(self) -> None:
+        try:
+            while True:
+                await asyncio.sleep(self._flush_interval)
+                await self._flush_now()
+        except asyncio.CancelledError:
+            raise
+        except Exception:  # noqa: BLE001 — never let the flusher die silently
+            _log.exception("token flusher crashed; touches will stop")
+
+    async def _flush_now(self) -> None:
+        if self._db is None or not self._touch_queue:
+            return
+        # Detach the queue so concurrent verify() writes don't bleed
+        # into the in-flight transaction.
+        pending, self._touch_queue = self._touch_queue, {}
+        async with self._db.session() as session:
+            for token_id in pending:
+                # We don't pass the timestamp through — `touch_token`
+                # stamps `now` itself, and we'd rather have one source
+                # of truth than reconcile clocks.
+                await touch_token(session, token_id=token_id)
+        _log.debug("flushed %d token touch(es)", len(pending))


-__all__ = ["TokenStore", "TokenStoreError"]
+__all__ = [
+    "VALID_SCOPES",
+    "TokenIdentity",
+    "TokenStore",
+    "TokenStoreError",
+    "hash_token",
+]
@@ -0,0 +1,11 @@
+"""Admin UI (Phase 4.3).
+
+Browser-facing console: login, dashboard, token CRUD, audit viewer.
+Templates live in ``./templates``; the package loader picks them up via
+``importlib.resources`` so the layout works editable and inside the
+Docker image without copy hacks.
+"""
+
+from beaver_gateway.frontends.admin.frontend import AdminFrontend
+
+__all__ = ["AdminFrontend"]
@@ -0,0 +1,707 @@
+"""Single-operator admin console.
+
+Jinja2 + HTMX, no SPA framework. Session cookies signed with
+``SESSION_SECRET`` via ``itsdangerous`` (8h TTL). CSRF is double-submit:
+the signed session payload carries a random token that must echo back
+on every state-changing request as a ``csrf_token`` form field.
+
+What lives in here:
+
+* ``GET /login`` / ``POST /login`` — env-cred check (``ADMIN_USER`` /
+  ``ADMIN_PASS`` compared with :func:`hmac.compare_digest`).
+* ``POST /logout`` — clears the cookie.
+* ``GET /`` — dashboard: declared agents + MCP namespaces + the last
+  audit slice.
+* ``GET /tokens`` + ``POST /tokens`` + ``POST /tokens/{id}/revoke`` —
+  bearer-token CRUD. Plaintext is rendered exactly once at creation; the
+  DB only ever holds the Argon2 hash. HTMX fragments swap rows in place.
+* ``GET /audit`` — paginated audit list (id-cursor).
+
+Things the admin frontend is *not* responsible for: HTTP-token verify
+on ``/v1/messages`` and ``/mcp`` — that lives on those frontends and
+uses :class:`TokenStore`. Admin's only authentication path is the
+cookie session.
+"""
+
+from __future__ import annotations
+
+import hmac
+import json
+import logging
+import secrets
+import time
+from collections import deque
+from typing import TYPE_CHECKING, Annotated, Any
+
+import itsdangerous
+from fastapi import FastAPI, Form, HTTPException, Request, status
+from fastapi.responses import (
+    HTMLResponse,
+    RedirectResponse,
+    Response,
+    StreamingResponse,
+)
+from jinja2 import Environment, PackageLoader, select_autoescape
+
+from beaver_gateway.core import audit
+from beaver_gateway.core.auth import VALID_SCOPES, hash_token
+from beaver_gateway.frontends.base import Frontend
+from beaver_gateway.storage import (
+    create_token,
+    list_audit_records,
+    list_tokens,
+    revoke_token,
+)
+
+if TYPE_CHECKING:
+    from collections.abc import AsyncIterator
+    from datetime import datetime
+
+    from beaver_gateway.core.events import MessageStreamEvent
+    from beaver_gateway.frontends.base import GatewayRuntime
+
+
+_log = logging.getLogger("beaver_gateway.frontends.admin")
+
+# Cookie name is namespaced so a user can run multiple gateway instances
+# behind one host header without sessions colliding. ``_v1`` lets us
+# bump the payload schema later by invalidating old cookies via salt.
+SESSION_COOKIE = "beaver_admin_session"
+SESSION_MAX_AGE = 8 * 3600
+SESSION_SALT = "beaver-gateway.admin.session.v1"
+
+# Scopes the admin is allowed to mint. ``admin`` is reserved (no bearer
+# route consumes it yet, but listing it keeps future programmatic
+# admin access first-class) — see ``core.auth.VALID_SCOPES``.
+CREATABLE_SCOPES = ("*", "messages", "mcp", "admin")
+
+# Audit-log page size — also used by the dashboard's "recent activity"
+# panel. Small enough to keep the dashboard cheap, big enough to be
+# useful at a glance.
+AUDIT_PAGE_SIZE = 50
+
+# /login brute-force ceiling: 5 failed attempts per source IP within
+# LOGIN_WINDOW_SECONDS shuts the door with a 429 until the oldest
+# failure ages out. Single-operator admin, so a legit user only ever
+# burns one IP — a forgiving window beats a tight one with a lockout.
+LOGIN_MAX_ATTEMPTS = 5
+LOGIN_WINDOW_SECONDS = 300.0
+
+
+__all__ = ["AdminFrontend"]
+
+
+class AdminFrontend(Frontend):
+    """FastAPI app behind ``/login`` / ``/tokens`` / ``/audit`` / ``/``."""
+
+    def __init__(self, *, host: str = "0.0.0.0", port: int = 8002) -> None:  # noqa: S104
+        self.host = host
+        self.port = port
+        self._runtime: GatewayRuntime | None = None
+        self._app: FastAPI | None = None
+
+    def configure(self, runtime: GatewayRuntime) -> None:
+        # Refuse to wire up if the env didn't carry the bits we need —
+        # an admin UI with empty creds is a footgun, and an unsigned
+        # session cookie is no session at all.
+        if not runtime.session_secret:
+            msg = "AdminFrontend requires SESSION_SECRET in env"
+            raise RuntimeError(msg)
+        if not runtime.admin_user or not runtime.admin_pass:
+            msg = "AdminFrontend requires ADMIN_USER and ADMIN_PASS in env"
+            raise RuntimeError(msg)
+        self._runtime = runtime
+        self._app = self._build_app(runtime)
+
+    async def serve(self) -> None:
+        import uvicorn
+
+        if self._app is None:
+            msg = "configure() must be called before serve()"
+            raise RuntimeError(msg)
+        config = uvicorn.Config(
+            self._app, host=self.host, port=self.port, log_level="info"
+        )
+        await uvicorn.Server(config).serve()
+
+    # ---- app construction ----------------------------------------------
+
+    def _build_app(self, runtime: GatewayRuntime) -> FastAPI:  # noqa: PLR0915
+        # Routes are declared inline so they close over ``runtime`` /
+        # ``signer`` / ``templates`` instead of threading them through
+        # self-state. Splitting the function would just trade size for
+        # bookkeeping — same total surface, harder to follow.
+        templates = _build_template_env()
+        signer = itsdangerous.URLSafeTimedSerializer(
+            runtime.session_secret, salt=SESSION_SALT
+        )
+        login_limit = _LoginRateLimit(
+            max_attempts=LOGIN_MAX_ATTEMPTS, window=LOGIN_WINDOW_SECONDS
+        )
+        app = FastAPI(title="beaver-gateway / admin", docs_url=None, redoc_url=None)
+
+        def render(name: str, **ctx: Any) -> str:
+            return templates.get_template(name).render(**ctx)
+
+        # ---- login ----
+
+        @app.get("/login", response_class=HTMLResponse)
+        async def login_page(request: Request) -> Response:
+            # Already-signed-in users skip the form. Login flash is
+            # carried in a one-shot query param so a failed POST can
+            # redirect back here without leaking creds in the URL.
+            if _current_user(request, signer):
+                return RedirectResponse("/", status_code=status.HTTP_303_SEE_OTHER)
+            error = request.query_params.get("error")
+            return HTMLResponse(render("login.html", error=error))
+
+        @app.post("/login", response_class=HTMLResponse)
+        async def login_submit(
+            request: Request,
+            username: Annotated[str, Form(...)],
+            password: Annotated[str, Form(...)],
+        ) -> Response:
+            ip = _client_ip(request)
+            if not login_limit.check(ip):
+                _log.warning("admin login rate-limited: ip=%s user=%r", ip, username)
+                await audit.log(
+                    runtime,
+                    actor=f"admin:{username}",
+                    kind="login_failed",
+                    reason="rate_limited",
+                    ip=ip,
+                )
+                # 429 carries the rendered form back inline so the user
+                # sees the same page with an error banner — no redirect
+                # roundtrip, and an explicit Retry-After for any well-
+                # behaved client (or Caddy doing its own bookkeeping).
+                return HTMLResponse(
+                    render(
+                        "login.html",
+                        error="too many attempts; try again in a few minutes",
+                    ),
+                    status_code=status.HTTP_429_TOO_MANY_REQUESTS,
+                    headers={"Retry-After": str(int(LOGIN_WINDOW_SECONDS))},
+                )
+            ok = hmac.compare_digest(
+                username, runtime.admin_user
+            ) and hmac.compare_digest(password, runtime.admin_pass)
+            if not ok:
+                login_limit.record_failure(ip)
+                _log.info("admin login failed: user=%r ip=%s", username, ip)
+                await audit.log(
+                    runtime,
+                    actor=f"admin:{username}",
+                    kind="login_failed",
+                    reason="bad_credentials",
+                    ip=ip,
+                )
+                # 303-redirect after POST so the browser doesn't replay
+                # the form on refresh. Status-303 forces GET on the
+                # follow-up regardless of the original method.
+                return RedirectResponse(
+                    "/login?error=invalid+credentials",
+                    status_code=status.HTTP_303_SEE_OTHER,
+                )
+            login_limit.clear(ip)
+            csrf = secrets.token_urlsafe(24)
+            cookie = signer.dumps({"user": username, "csrf": csrf})
+            response = RedirectResponse("/", status_code=status.HTTP_303_SEE_OTHER)
+            _set_session_cookie(response, cookie)
+            _log.info("admin login ok: user=%s ip=%s", username, ip)
+            await audit.log(runtime, actor=f"admin:{username}", kind="login_ok", ip=ip)
+            return response
+
+        @app.post("/logout")
+        async def logout(request: Request) -> Response:
+            session = _require_session(request, signer)
+            await _require_csrf(request, session)
+            response = RedirectResponse("/login", status_code=status.HTTP_303_SEE_OTHER)
+            response.delete_cookie(SESSION_COOKIE)
+            await audit.log(runtime, actor=f"admin:{session['user']}", kind="logout")
+            return response
+
+        # ---- dashboard ----
+
+        @app.get("/", response_class=HTMLResponse)
+        async def dashboard(request: Request) -> Response:
+            session = _require_session(request, signer)
+            async with runtime.db.session() as db_session:
+                audit = await list_audit_records(db_session, limit=AUDIT_PAGE_SIZE)
+                tokens = await list_tokens(db_session, include_revoked=False)
+            endpoints = _build_endpoint_catalog(request, runtime)
+            return HTMLResponse(
+                render(
+                    "dashboard.html",
+                    user=session["user"],
+                    csrf=session["csrf"],
+                    agents=list(runtime.agents),
+                    mcps=list(runtime.mcps),
+                    audit=audit,
+                    tokens=tokens,
+                    endpoints=endpoints,
+                )
+            )
+
+        # ---- tokens ----
+
+        @app.get("/tokens", response_class=HTMLResponse)
+        async def tokens_page(request: Request) -> Response:
+            session = _require_session(request, signer)
+            include_revoked = request.query_params.get("include_revoked") == "1"
+            async with runtime.db.session() as db_session:
+                tokens = await list_tokens(db_session, include_revoked=include_revoked)
+            return HTMLResponse(
+                render(
+                    "tokens.html",
+                    user=session["user"],
+                    csrf=session["csrf"],
+                    tokens=tokens,
+                    scopes=CREATABLE_SCOPES,
+                    include_revoked=include_revoked,
+                )
+            )
+
+        @app.post("/tokens", response_class=HTMLResponse)
+        async def tokens_create(
+            request: Request,
+            name: Annotated[str, Form(...)],
+            scope: Annotated[str, Form(...)],
+        ) -> Response:
+            session = _require_session(request, signer)
+            await _require_csrf(request, session)
+            name = name.strip()
+            if not name:
+                raise HTTPException(
+                    status.HTTP_400_BAD_REQUEST, "token name is required"
+                )
+            if scope not in VALID_SCOPES:
+                raise HTTPException(
+                    status.HTTP_400_BAD_REQUEST, f"invalid scope: {scope!r}"
+                )
+            # Generate the plaintext server-side — clients never pick
+            # their own. URL-safe so it copies cleanly into ``.env``.
+            plaintext = secrets.token_urlsafe(32)
+            hashed = hash_token(plaintext)
+            async with runtime.db.session() as db_session:
+                try:
+                    row = await create_token(
+                        db_session, name=name, scope=scope, hashed_value=hashed
+                    )
+                except Exception as exc:  # noqa: BLE001
+                    # Likely UNIQUE(name) violation; the column is
+                    # indexed so collisions are common. We surface a
+                    # readable HTMX-friendly response rather than 500.
+                    _log.warning("token create failed: %s", exc)
+                    return HTMLResponse(
+                        render(
+                            "_token_error.html",
+                            message=f"could not create token {name!r}: {exc}",
+                        ),
+                        status_code=status.HTTP_409_CONFLICT,
+                    )
+            await runtime.token_store.invalidate()
+            await audit.log(
+                runtime,
+                actor=f"admin:{session['user']}",
+                kind="token_create",
+                name=name,
+                scope=scope,
+                token_id=row.id,
+            )
+            return HTMLResponse(
+                render(
+                    "_token_created.html",
+                    token=row,
+                    plaintext=plaintext,
+                    csrf=session["csrf"],
+                )
+            )
+
+        @app.post("/tokens/{token_id}/revoke", response_class=HTMLResponse)
+        async def tokens_revoke(request: Request, token_id: int) -> Response:
+            session = _require_session(request, signer)
+            await _require_csrf(request, session)
+            async with runtime.db.session() as db_session:
+                ok = await revoke_token(db_session, token_id=token_id)
+            if not ok:
+                raise HTTPException(
+                    status.HTTP_404_NOT_FOUND, f"no active token with id {token_id}"
+                )
+            await runtime.token_store.invalidate()
+            await audit.log(
+                runtime,
+                actor=f"admin:{session['user']}",
+                kind="token_revoke",
+                token_id=token_id,
+            )
+            # Re-fetch so the row reflects the just-stamped revoked_at.
+            async with runtime.db.session() as db_session:
+                rows = await list_tokens(db_session, include_revoked=True)
+            row = next((r for r in rows if r.id == token_id), None)
+            if row is None:
+                # Shouldn't happen — revoke_token said ok — but stay
+                # honest: respond empty so the row disappears from the
+                # table.
+                return HTMLResponse("")
+            return HTMLResponse(
+                render("_token_row.html", token=row, csrf=session["csrf"])
+            )
+
+        # ---- audit ----
+
+        @app.get("/audit", response_class=HTMLResponse)
+        async def audit_page(request: Request) -> Response:
+            session = _require_session(request, signer)
+            before_raw = request.query_params.get("before")
+            before_id: int | None = None
+            if before_raw and before_raw.isdigit():
+                before_id = int(before_raw)
+            async with runtime.db.session() as db_session:
+                rows = await list_audit_records(
+                    db_session, limit=AUDIT_PAGE_SIZE, before_id=before_id
+                )
+            # Cursor for the "next page" link — the smallest id on this
+            # page; ``None`` if we've run out of rows.
+            next_before = rows[-1].id if rows and len(rows) == AUDIT_PAGE_SIZE else None
+            return HTMLResponse(
+                render(
+                    "audit.html",
+                    user=session["user"],
+                    csrf=session["csrf"],
+                    audit=rows,
+                    next_before=next_before,
+                )
+            )
+
+        # ---- chat ----
+
+        # In-process playground: lets the operator drive any configured
+        # agent without minting a bearer token. Auth comes from the
+        # admin cookie; CSRF rides an ``X-CSRF-Token`` header because
+        # the body is JSON (no form to read). We call the backend
+        # directly — no HTTP hop to ``/v1/messages`` — so a chat turn
+        # bypasses the token store but is still audited as
+        # ``kind="messages"`` with ``actor="admin:<user>"`` and
+        # ``source="admin_chat"`` in the detail.
+        @app.get("/chat", response_class=HTMLResponse)
+        async def chat_page(request: Request) -> Response:
+            session = _require_session(request, signer)
+            available = [
+                a for a in runtime.agents if runtime.backends.get(a.name) is not None
+            ]
+            return HTMLResponse(
+                render(
+                    "chat.html",
+                    user=session["user"],
+                    csrf=session["csrf"],
+                    agents=available,
+                )
+            )
+
+        @app.post("/chat/send")
+        async def chat_send(request: Request) -> Response:
+            session = _require_session(request, signer)
+            submitted = request.headers.get("x-csrf-token")
+            if not isinstance(submitted, str) or not hmac.compare_digest(
+                submitted, session["csrf"]
+            ):
+                raise HTTPException(status.HTTP_403_FORBIDDEN, "csrf check failed")
+            try:
+                body = await request.json()
+            except json.JSONDecodeError as exc:
+                raise HTTPException(
+                    status.HTTP_400_BAD_REQUEST, f"invalid JSON: {exc}"
+                ) from exc
+            model = body.get("model")
+            if not isinstance(model, str):
+                raise HTTPException(
+                    status.HTTP_400_BAD_REQUEST, "missing or non-string `model`"
+                )
+            agent = runtime.agents.get(model)
+            if agent is None:
+                raise HTTPException(
+                    status.HTTP_404_NOT_FOUND, f"unknown agent: {model!r}"
+                )
+            backend = runtime.backends.get(agent.name)
+            if backend is None:
+                raise HTTPException(
+                    status.HTTP_503_SERVICE_UNAVAILABLE,
+                    f"no backend configured for agent {agent.name!r}",
+                )
+            messages = body.get("messages") or []
+            if not isinstance(messages, list):
+                raise HTTPException(
+                    status.HTTP_400_BAD_REQUEST, "`messages` must be a list"
+                )
+            system = body.get("system")
+            await audit.log(
+                runtime,
+                actor=f"admin:{session['user']}",
+                kind="messages",
+                agent_name=agent.name,
+                source="admin_chat",
+                msgs=len(messages),
+            )
+            events = backend.complete(
+                agent=agent,
+                messages=messages,
+                system=system if isinstance(system, str) else None,
+            )
+            return StreamingResponse(
+                _sse_events(events), media_type="text/event-stream"
+            )
+
+        return app
+
+
+# ---- helpers ------------------------------------------------------------
+
+
+def _build_template_env() -> Environment:
+    env = Environment(
+        loader=PackageLoader("beaver_gateway.frontends.admin", "templates"),
+        autoescape=select_autoescape(["html"]),
+        trim_blocks=True,
+        lstrip_blocks=True,
+    )
+    env.filters["fmt_dt"] = _fmt_dt
+    env.filters["fmt_detail"] = _fmt_detail
+    return env
+
+
+def _fmt_dt(value: datetime | None) -> str:
+    if value is None:
+        return "—"
+    # ISO without microseconds, with explicit ``Z`` when UTC — easier to
+    # scan than the default ``+00:00``.
+    s = value.replace(microsecond=0).isoformat()
+    return s.replace("+00:00", "Z")
+
+
+def _fmt_detail(raw: str) -> str:
+    """Pretty-print the audit detail blob; leave invalid JSON alone."""
+    if not raw or raw == "{}":
+        return ""
+    try:
+        return json.dumps(json.loads(raw), separators=(", ", ": "))
+    except json.JSONDecodeError:
+        return raw
+
+
+def _build_endpoint_catalog(
+    request: Request, runtime: GatewayRuntime
+) -> dict[str, Any]:
+    """Collect copy-pastable URLs for sibling bearer frontends.
+
+    Precedence per frontend:
+
+    1. ``frontend.public_base_url`` if set — the operator's explicit
+       statement of "this is the URL my reverse proxy (Caddy / nginx /
+       Cloudflare / …) puts in front of me". Used verbatim with the
+       internal path (``/v1/messages``, ``/<ns>/`` for MCP) appended.
+    2. ``{scheme}://{request_hostname}:{frontend_port}`` — derived from
+       the browser's own request so dev / no-proxy setups Just Work.
+       Scheme honours ``X-Forwarded-Proto`` so a TLS terminator in
+       front of the admin gets the right protocol.
+
+    Imports happen inside the function to avoid a hard dep from
+    ``admin.frontend`` on the other frontend modules — they're optional
+    and may have non-trivial transitive deps (aiohttp etc.).
+    """
+    from beaver_gateway.frontends.anthropic import AnthropicMessagesFrontend
+    from beaver_gateway.frontends.mcp_server import McpServerFrontend
+
+    scheme = request.headers.get("x-forwarded-proto") or request.url.scheme
+    hostname = request.url.hostname or "localhost"
+
+    def _base_for(fe: AnthropicMessagesFrontend | McpServerFrontend) -> str:
+        if fe.public_base_url:
+            return fe.public_base_url
+        return f"{scheme}://{hostname}:{fe.port}"
+
+    anthropic_base: str | None = None
+    mcp_base: str | None = None
+    for fe in runtime.frontends:
+        if isinstance(fe, AnthropicMessagesFrontend) and anthropic_base is None:
+            anthropic_base = _base_for(fe)
+        elif isinstance(fe, McpServerFrontend) and mcp_base is None:
+            mcp_base = _base_for(fe)
+
+    agent_rows: list[dict[str, Any]] = []
+    if anthropic_base is not None:
+        messages_url = f"{anthropic_base}/v1/messages"
+        agent_rows.extend(
+            {
+                "agent": a.name,
+                "model": a.model,
+                "agent_type": a.__class__.__name__,
+                "url": messages_url,
+            }
+            for a in runtime.agents
+        )
+
+    mcp_rows: list[dict[str, Any]] = []
+    if mcp_base is not None:
+        mcp_rows.extend(
+            {"namespace": m.name, "kind": m.kind, "url": f"{mcp_base}/{m.name}/"}
+            for m in runtime.mcps
+        )
+        # ``all`` is synthesised by the aggregator whenever at least
+        # one MCP is configured — see McpServerFrontend._upstream_url.
+        if runtime.mcps:
+            mcp_rows.append(
+                {"namespace": "all", "kind": "bundle", "url": f"{mcp_base}/all/"}
+            )
+
+    return {
+        "anthropic_base": anthropic_base,
+        "mcp_base": mcp_base,
+        "agents": agent_rows,
+        "mcps": mcp_rows,
+    }
+
+
+async def _sse_events(
+    events: AsyncIterator[MessageStreamEvent],
+) -> AsyncIterator[bytes]:
+    r"""Serialize a backend stream into Anthropic's ``text/event-stream`` form.
+
+    Same wire shape as :mod:`beaver_gateway.frontends.anthropic` —
+    duplicated rather than imported so the admin frontend stays
+    independent of that module's private helpers, and so a mid-stream
+    failure surfaces as an in-band ``error`` event the chat UI can
+    render rather than a dangling connection.
+    """
+    try:
+        async for ev in events:
+            payload = ev.model_dump_json()
+            yield f"event: {ev.type}\ndata: {payload}\n\n".encode()
+    except Exception as exc:  # noqa: BLE001
+        _log.exception("admin chat backend stream failed")
+        err = json.dumps(
+            {"type": "error", "error": {"type": "api_error", "message": str(exc)}}
+        )
+        yield f"event: error\ndata: {err}\n\n".encode()
+
+
+def _set_session_cookie(response: Response, value: str) -> None:
+    # ``samesite=lax`` keeps the cookie out of cross-site POSTs but
+    # follows top-level navigation; ``httponly`` keeps it out of JS;
+    # ``secure`` is gated on the deployment scheme — toggled by reverse
+    # proxies in front. Skip the secure flag here so localhost dev
+    # works over plain HTTP; production deployments behind a TLS
+    # terminator should set ``Secure`` via the proxy.
+    response.set_cookie(
+        SESSION_COOKIE,
+        value,
+        max_age=SESSION_MAX_AGE,
+        httponly=True,
+        samesite="lax",
+        path="/",
+    )
+
+
+def _current_user(
+    request: Request, signer: itsdangerous.URLSafeTimedSerializer
+) -> dict[str, Any] | None:
+    raw = request.cookies.get(SESSION_COOKIE)
+    if not raw:
+        return None
+    try:
+        payload = signer.loads(raw, max_age=SESSION_MAX_AGE)
+    except itsdangerous.BadSignature:
+        return None
+    if not isinstance(payload, dict):
+        return None
+    user = payload.get("user")
+    csrf = payload.get("csrf")
+    if not isinstance(user, str) or not isinstance(csrf, str):
+        return None
+    return {"user": user, "csrf": csrf}
+
+
+def _require_session(
+    request: Request, signer: itsdangerous.URLSafeTimedSerializer
+) -> dict[str, Any]:
+    session = _current_user(request, signer)
+    if session is None:
+        # GET endpoints want a redirect (so the browser walks the user
+        # to the login form), not a JSON 401. Mutating endpoints will
+        # still trip CSRF below, so the redirect is harmless for those.
+        raise HTTPException(status.HTTP_303_SEE_OTHER, headers={"Location": "/login"})
+    return session
+
+
+def _client_ip(request: Request) -> str:
+    """Best-effort source IP, in precedence order.
+
+    1. ``Cf-Connecting-IP`` — Cloudflare's edge writes this and strips
+       anything inbound, so when it's present it's authoritative.
+    2. ``X-Forwarded-For`` leftmost entry — what Caddy / nginx set when
+       they're the only proxy. We trust this because the deploy plan
+       puts Caddy directly in front; if the chain ever grows untrusted
+       hops, this header becomes spoofable from the public side.
+    3. Socket peer — direct Tailscale / localhost hits.
+    """
+    cf = request.headers.get("cf-connecting-ip")
+    if cf:
+        return cf.strip()
+    xff = request.headers.get("x-forwarded-for")
+    if xff:
+        first = xff.split(",", 1)[0].strip()
+        if first:
+            return first
+    return request.client.host if request.client else "unknown"
+
+
+class _LoginRateLimit:
+    """In-memory sliding-window failure counter for ``POST /login``.
+
+    Keyed by source IP (see :func:`_client_ip`). Each failure appends a
+    monotonic timestamp; :meth:`check` drops timestamps older than
+    ``window`` and refuses once ``max_attempts`` remain in the bucket.
+    A successful login calls :meth:`clear` to wipe the IP's bucket.
+
+    No external store: a single-operator admin only needs to survive
+    process lifetime. Lives entirely on the event loop thread, so no
+    lock is needed — every method is synchronous and doesn't ``await``.
+    """
+
+    __slots__ = ("_failures", "_max_attempts", "_window")
+
+    def __init__(self, *, max_attempts: int, window: float) -> None:
+        self._failures: dict[str, deque[float]] = {}
+        self._max_attempts = max_attempts
+        self._window = window
+
+    def check(self, ip: str) -> bool:
+        bucket = self._failures.get(ip)
+        if bucket is None:
+            return True
+        cutoff = time.monotonic() - self._window
+        while bucket and bucket[0] < cutoff:
+            bucket.popleft()
+        if not bucket:
+            self._failures.pop(ip, None)
+            return True
+        return len(bucket) < self._max_attempts
+
+    def record_failure(self, ip: str) -> None:
+        bucket = self._failures.setdefault(ip, deque())
+        cutoff = time.monotonic() - self._window
+        while bucket and bucket[0] < cutoff:
+            bucket.popleft()
+        bucket.append(time.monotonic())
+
+    def clear(self, ip: str) -> None:
+        self._failures.pop(ip, None)
+
+
+async def _require_csrf(request: Request, session: dict[str, Any]) -> None:
+    form = await request.form()
+    submitted = form.get("csrf_token")
+    if not isinstance(submitted, str) or not hmac.compare_digest(
+        submitted, session["csrf"]
+    ):
+        raise HTTPException(status.HTTP_403_FORBIDDEN, "csrf check failed")
@@ -0,0 +1,205 @@
+<!doctype html>
+<html lang="en">
+<head>
+  <meta charset="utf-8">
+  <meta name="viewport" content="width=device-width, initial-scale=1">
+  <title>{% block title %}beaver-gateway · admin{% endblock %}</title>
+  <script src="https://unpkg.com/htmx.org@2.0.4" defer></script>
+  <style>
+    :root {
+      --bg: #fbfbfd;
+      --fg: #1d1d1f;
+      --muted: #6e6e73;
+      --line: #e5e5ea;
+      --accent: #0071e3;
+      --danger: #d70015;
+      --surface: #ffffff;
+      --code-bg: #f5f5f7;
+    }
+    * { box-sizing: border-box; }
+    html, body { margin: 0; padding: 0; background: var(--bg); color: var(--fg); }
+    body {
+      font-family: -apple-system, "SF Pro Display", "SF Pro Text",
+        BlinkMacSystemFont, system-ui, sans-serif;
+      font-size: 15px;
+      line-height: 1.55;
+      letter-spacing: -0.005em;
+    }
+    a { color: var(--accent); text-decoration: none; }
+    a:hover { text-decoration: underline; }
+    code, pre, kbd, samp {
+      font-family: ui-monospace, "SF Mono", Menlo, Consolas, monospace;
+      font-size: 0.92em;
+    }
+    pre {
+      background: var(--code-bg);
+      padding: 1rem 1.25rem;
+      border-radius: 12px;
+      overflow-x: auto;
+    }
+    header.top {
+      border-bottom: 1px solid var(--line);
+      background: var(--surface);
+    }
+    header.top .inner {
+      max-width: 1080px;
+      margin: 0 auto;
+      padding: 1rem 1.5rem;
+      display: flex;
+      align-items: center;
+      gap: 1.25rem;
+    }
+    header.top h1 {
+      font-size: 1.05rem;
+      font-weight: 600;
+      letter-spacing: -0.01em;
+      margin: 0;
+    }
+    nav.tabs { display: flex; gap: 1.1rem; flex: 1; }
+    nav.tabs a {
+      color: var(--fg);
+      padding: 0.25rem 0.1rem;
+      border-bottom: 2px solid transparent;
+    }
+    nav.tabs a.active {
+      border-bottom-color: var(--fg);
+    }
+    header.top .actor {
+      color: var(--muted);
+      font-size: 0.9em;
+    }
+    main {
+      max-width: 1080px;
+      margin: 0 auto;
+      padding: 2.25rem 1.5rem 4rem;
+    }
+    h2 {
+      font-weight: 600;
+      letter-spacing: -0.015em;
+      margin: 2rem 0 0.75rem;
+    }
+    h2:first-of-type { margin-top: 0; }
+    .muted { color: var(--muted); }
+    .card {
+      background: var(--surface);
+      border: 1px solid var(--line);
+      border-radius: 14px;
+      padding: 1.25rem 1.5rem;
+      margin-bottom: 1.25rem;
+    }
+    table { width: 100%; border-collapse: collapse; }
+    th, td {
+      text-align: left;
+      padding: 0.6rem 0.85rem;
+      border-bottom: 1px solid var(--line);
+      vertical-align: top;
+    }
+    th {
+      font-weight: 500;
+      color: var(--muted);
+      font-size: 0.85em;
+      text-transform: uppercase;
+      letter-spacing: 0.04em;
+    }
+    tr:last-child td { border-bottom: none; }
+    .revoked td { color: var(--muted); }
+    button, .btn {
+      font-family: inherit;
+      font-size: 0.92em;
+      padding: 0.45rem 0.95rem;
+      border-radius: 8px;
+      border: 1px solid var(--line);
+      background: var(--surface);
+      color: var(--fg);
+      cursor: pointer;
+      transition: background 120ms ease;
+    }
+    button:hover, .btn:hover { background: var(--code-bg); }
+    button.primary, .btn.primary {
+      background: var(--accent);
+      border-color: var(--accent);
+      color: white;
+    }
+    button.primary:hover, .btn.primary:hover {
+      background: #005bb5;
+    }
+    button.danger { color: var(--danger); border-color: #f0c5c9; }
+    button.danger:hover { background: #fdeff1; }
+    form.inline { display: inline; margin: 0; }
+    .form-grid {
+      display: grid;
+      grid-template-columns: minmax(180px, 1fr) 160px auto;
+      gap: 0.75rem;
+      align-items: end;
+    }
+    .form-grid label {
+      display: block;
+      font-size: 0.8em;
+      color: var(--muted);
+      text-transform: uppercase;
+      letter-spacing: 0.05em;
+      margin-bottom: 0.3rem;
+    }
+    input[type="text"], input[type="password"], select {
+      width: 100%;
+      padding: 0.55rem 0.7rem;
+      border: 1px solid var(--line);
+      border-radius: 8px;
+      font-family: inherit;
+      font-size: 0.95em;
+      background: var(--surface);
+      color: var(--fg);
+    }
+    input:focus, select:focus { outline: 2px solid var(--accent); outline-offset: 1px; }
+    .banner {
+      padding: 1rem 1.25rem;
+      border-radius: 12px;
+      background: #f0f9ff;
+      border: 1px solid #cfe8ff;
+      margin: 0 0 1.25rem;
+    }
+    .banner.error {
+      background: #fff0f0;
+      border-color: #f5c2c5;
+      color: var(--danger);
+    }
+    .banner.warn {
+      background: #fff7e6;
+      border-color: #fadf9c;
+    }
+    .pill {
+      display: inline-block;
+      padding: 0.1rem 0.55rem;
+      border-radius: 999px;
+      background: var(--code-bg);
+      font-size: 0.78em;
+      color: var(--muted);
+    }
+    .pill.scope-wild { background: #ecfdf3; color: #027a48; }
+    .pill.scope-admin { background: #fff4e6; color: #b54708; }
+  </style>
+</head>
+<body>
+  {% block header %}
+  <header class="top">
+    <div class="inner">
+      <h1>beaver-gateway</h1>
+      <nav class="tabs">
+        <a href="/" class="{% if active == 'dashboard' %}active{% endif %}">Dashboard</a>
+        <a href="/chat" class="{% if active == 'chat' %}active{% endif %}">Chat</a>
+        <a href="/tokens" class="{% if active == 'tokens' %}active{% endif %}">Tokens</a>
+        <a href="/audit" class="{% if active == 'audit' %}active{% endif %}">Audit</a>
+      </nav>
+      <span class="actor">Signed in as <strong>{{ user }}</strong></span>
+      <form class="inline" method="post" action="/logout">
+        <input type="hidden" name="csrf_token" value="{{ csrf }}">
+        <button type="submit">Log out</button>
+      </form>
+    </div>
+  </header>
+  {% endblock %}
+  <main>
+    {% block content %}{% endblock %}
+  </main>
+</body>
+</html>
@@ -0,0 +1,12 @@
+{# Renders into #token-create-result (hx-target on the form). The
+   two OOB swaps reach the rest of the page: prepend the new row,
+   and erase the empty-state placeholder if it's still in the DOM. #}
+<div class="banner warn">
+  <strong>Token created: {{ token.name }}</strong>
+  <p class="muted" style="margin:0.25rem 0 0.5rem;">Copy it now — this is the only time it will be shown.</p>
+  <pre style="margin:0;">{{ plaintext }}</pre>
+</div>
+<tbody id="tokens-rows" hx-swap-oob="afterbegin">
+  {% include "_token_row.html" %}
+</tbody>
+<tr id="tokens-empty" hx-swap-oob="delete"></tr>
@@ -0,0 +1 @@
+<div class="banner error">{{ message }}</div>
@@ -0,0 +1,25 @@
+<tr id="token-row-{{ token.id }}" {% if token.revoked_at %}class="revoked"{% endif %}>
+  <td><strong>{{ token.name }}</strong></td>
+  <td>
+    <span class="pill {% if token.scope == '*' %}scope-wild{% elif token.scope == 'admin' %}scope-admin{% endif %}">{{ token.scope }}</span>
+  </td>
+  <td><code>{{ token.created_at | fmt_dt }}</code></td>
+  <td><code>{{ token.last_used_at | fmt_dt }}</code></td>
+  <td><code>{{ token.revoked_at | fmt_dt }}</code></td>
+  <td style="text-align:right;">
+    {% if not token.revoked_at %}
+      <form
+        class="inline"
+        hx-post="/tokens/{{ token.id }}/revoke"
+        hx-target="#token-row-{{ token.id }}"
+        hx-swap="outerHTML"
+        hx-confirm="Revoke token {{ token.name }}? This cannot be undone."
+      >
+        <input type="hidden" name="csrf_token" value="{{ csrf }}">
+        <button class="danger" type="submit">Revoke</button>
+      </form>
+    {% else %}
+      <span class="muted">revoked</span>
+    {% endif %}
+  </td>
+</tr>
@@ -0,0 +1,37 @@
+{% extends "_layout.html" %}
+{% set active = "audit" %}
+{% block title %}beaver-gateway · audit{% endblock %}
+{% block content %}
+  <h2>Audit log</h2>
+  <div class="card">
+    {% if audit %}
+    <table>
+      <thead>
+        <tr>
+          <th>Time</th><th>Actor</th><th>Kind</th><th>Agent</th><th>Detail</th>
+        </tr>
+      </thead>
+      <tbody>
+        {% for row in audit %}
+        <tr>
+          <td><code>{{ row.ts | fmt_dt }}</code></td>
+          <td>{{ row.actor }}</td>
+          <td><span class="pill">{{ row.kind }}</span></td>
+          <td>{{ row.agent_name or "—" }}</td>
+          <td><code>{{ row.detail_json | fmt_detail }}</code></td>
+        </tr>
+        {% endfor %}
+      </tbody>
+    </table>
+    <p class="muted" style="margin-top:1rem;">
+      {% if next_before %}
+        <a href="/audit?before={{ next_before }}">Older entries →</a>
+      {% else %}
+        End of log.
+      {% endif %}
+    </p>
+    {% else %}
+      <p class="muted">Nothing logged yet.</p>
+    {% endif %}
+  </div>
+{% endblock %}
@@ -0,0 +1,354 @@
+{% extends "_layout.html" %}
+{% set active = "chat" %}
+{% block title %}beaver-gateway · chat{% endblock %}
+{% block content %}
+<div class="chat-wrap">
+  <div class="chat-toolbar">
+    <label>
+      <span>Agent</span>
+      <select id="agent-select">
+        {% for a in agents %}
+        <option value="{{ a.name }}">{{ a.name }} · {{ a.model }}</option>
+        {% else %}
+        <option disabled>no agents with a backend</option>
+        {% endfor %}
+      </select>
+    </label>
+    <div class="spacer"></div>
+    <button id="new-chat-btn" type="button">New chat</button>
+  </div>
+  <div id="messages" class="chat-messages" aria-live="polite"></div>
+  <form id="chat-form" class="chat-input">
+    <textarea id="chat-text" rows="3" placeholder="Message… (⌘/Ctrl+Enter to send)" required></textarea>
+    <button type="submit" class="primary" id="send-btn">Send</button>
+  </form>
+</div>
+
+<style>
+  .chat-wrap {
+    display: flex; flex-direction: column;
+    height: calc(100vh - 200px); min-height: 480px;
+  }
+  .chat-toolbar {
+    display: flex; gap: 0.85rem; align-items: end;
+    margin-bottom: 1rem;
+  }
+  .chat-toolbar .spacer { flex: 1; }
+  .chat-toolbar label {
+    display: flex; flex-direction: column; gap: 0.3rem;
+    min-width: 260px;
+  }
+  .chat-toolbar label > span {
+    font-size: 0.8em; color: var(--muted);
+    text-transform: uppercase; letter-spacing: 0.05em;
+  }
+  .chat-messages {
+    flex: 1; overflow-y: auto;
+    background: var(--surface); border: 1px solid var(--line);
+    border-radius: 14px; padding: 1.25rem;
+    display: flex; flex-direction: column; gap: 1rem;
+  }
+  .chat-empty { color: var(--muted); margin: auto; }
+  .msg { display: flex; }
+  .msg.user { justify-content: flex-end; }
+  .msg.user .bubble {
+    max-width: 78%; padding: 0.7rem 0.95rem; border-radius: 14px;
+    background: var(--accent); color: white;
+    white-space: pre-wrap; word-wrap: break-word;
+  }
+  .msg.assistant .blocks {
+    display: flex; flex-direction: column; gap: 0.5rem;
+    max-width: 88%;
+  }
+  .block-text {
+    background: var(--code-bg); padding: 0.7rem 0.95rem;
+    border-radius: 14px;
+    white-space: pre-wrap; word-wrap: break-word;
+  }
+  details.tool-call, details.thinking-block {
+    border: 1px solid var(--line); border-radius: 10px;
+    background: var(--surface); padding: 0 0.85rem;
+    font-size: 0.9em;
+  }
+  details.tool-call > summary,
+  details.thinking-block > summary {
+    cursor: pointer; padding: 0.55rem 0;
+    list-style: none;
+    display: flex; align-items: center; gap: 0.55rem;
+  }
+  details > summary::-webkit-details-marker { display: none; }
+  details.tool-call > summary::before,
+  details.thinking-block > summary::before {
+    content: "▸"; color: var(--muted); font-size: 0.78em;
+  }
+  details.tool-call[open] > summary::before,
+  details.thinking-block[open] > summary::before { content: "▾"; }
+  .tool-name {
+    font-family: ui-monospace, "SF Mono", Menlo, monospace;
+    font-weight: 500;
+  }
+  .tool-id, .tool-label {
+    color: var(--muted); font-size: 0.78em;
+    font-family: ui-monospace, "SF Mono", Menlo, monospace;
+  }
+  details.tool-call pre, details.thinking-block pre {
+    margin: 0 0 0.6rem; padding: 0.7rem 0.85rem;
+    background: var(--code-bg); border-radius: 8px;
+    font-size: 0.85em; max-height: 360px; overflow: auto;
+  }
+  .tool-label { display: block; margin: 0.15rem 0 0.25rem; }
+  .chat-input { display: flex; gap: 0.75rem; margin-top: 1rem; }
+  .chat-input textarea {
+    flex: 1; padding: 0.7rem 0.9rem;
+    border: 1px solid var(--line); border-radius: 12px;
+    font-family: inherit; font-size: 0.95em;
+    background: var(--surface); color: var(--fg); resize: vertical;
+  }
+  .chat-input textarea:focus { outline: 2px solid var(--accent); outline-offset: 1px; }
+  .chat-input button { align-self: stretch; padding-left: 1.4rem; padding-right: 1.4rem; }
+  .chat-error {
+    color: var(--danger); font-size: 0.85em;
+    padding: 0.55rem 0.75rem;
+    background: #fff0f0; border-radius: 10px;
+    border: 1px solid #f5c2c5;
+  }
+  .chat-input button:disabled { opacity: 0.55; cursor: progress; }
+</style>
+
+<script>
+(() => {
+  const CSRF = {{ csrf | tojson }};
+  const agentSelect = document.getElementById("agent-select");
+  const messagesEl = document.getElementById("messages");
+  const form = document.getElementById("chat-form");
+  const textEl = document.getElementById("chat-text");
+  const sendBtn = document.getElementById("send-btn");
+  const newBtn = document.getElementById("new-chat-btn");
+
+  // Anthropic-style history sent to the backend. We keep assistant
+  // content text-only — tool_use blocks can't round-trip without
+  // matching tool_result, and backends (claude-code) run tools
+  // internally anyway.
+  let apiMessages = [];
+
+  function renderEmpty() {
+    messagesEl.innerHTML = '<div class="chat-empty">No messages yet — say something.</div>';
+  }
+  renderEmpty();
+
+  newBtn.addEventListener("click", () => {
+    apiMessages = [];
+    renderEmpty();
+  });
+
+  function clearEmpty() {
+    const e = messagesEl.querySelector(".chat-empty");
+    if (e) e.remove();
+  }
+  function scrollDown() { messagesEl.scrollTop = messagesEl.scrollHeight; }
+
+  function appendUser(text) {
+    clearEmpty();
+    const row = document.createElement("div");
+    row.className = "msg user";
+    const bub = document.createElement("div");
+    bub.className = "bubble";
+    bub.textContent = text;
+    row.appendChild(bub);
+    messagesEl.appendChild(row);
+    scrollDown();
+  }
+  function appendAssistant() {
+    clearEmpty();
+    const row = document.createElement("div");
+    row.className = "msg assistant";
+    const blocks = document.createElement("div");
+    blocks.className = "blocks";
+    row.appendChild(blocks);
+    messagesEl.appendChild(row);
+    scrollDown();
+    return blocks;
+  }
+  function appendError(text) {
+    clearEmpty();
+    const e = document.createElement("div");
+    e.className = "chat-error";
+    e.textContent = text;
+    messagesEl.appendChild(e);
+    scrollDown();
+  }
+
+  function ensureBlock(state, index, type, extra) {
+    if (state.blocks[index]) return state.blocks[index];
+    const block = { type };
+    if (type === "text") {
+      const el = document.createElement("div");
+      el.className = "block-text";
+      state.container.appendChild(el);
+      block.el = el;
+    } else if (type === "thinking") {
+      const det = document.createElement("details");
+      det.className = "thinking-block";
+      const sum = document.createElement("summary");
+      sum.textContent = "Thinking";
+      det.appendChild(sum);
+      const pre = document.createElement("pre");
+      det.appendChild(pre);
+      state.container.appendChild(det);
+      block.el = pre;
+    } else if (type === "tool_use") {
+      const det = document.createElement("details");
+      det.className = "tool-call";
+      const sum = document.createElement("summary");
+      const name = document.createElement("span");
+      name.className = "tool-name";
+      name.textContent = "🔧 " + (extra.name || "tool");
+      sum.appendChild(name);
+      if (extra.id) {
+        const idEl = document.createElement("span");
+        idEl.className = "tool-id";
+        idEl.textContent = extra.id;
+        sum.appendChild(idEl);
+      }
+      det.appendChild(sum);
+      const label = document.createElement("span");
+      label.className = "tool-label";
+      label.textContent = "input";
+      det.appendChild(label);
+      const pre = document.createElement("pre");
+      det.appendChild(pre);
+      state.container.appendChild(det);
+      block.el = pre;
+      block.jsonBuf = "";
+      block.seedInput = extra.input;
+    }
+    state.blocks[index] = block;
+    return block;
+  }
+
+  function applyEvent(state, ev) {
+    const t = ev.type;
+    if (t === "content_block_start") {
+      const cb = ev.content_block || {};
+      if (cb.type === "text") {
+        ensureBlock(state, ev.index, "text", {});
+      } else if (cb.type === "thinking") {
+        ensureBlock(state, ev.index, "thinking", {});
+      } else if (cb.type === "tool_use") {
+        ensureBlock(state, ev.index, "tool_use",
+          { name: cb.name, id: cb.id, input: cb.input });
+      }
+    } else if (t === "content_block_delta") {
+      const d = ev.delta || {};
+      const b = state.blocks[ev.index];
+      if (!b) return;
+      if (d.type === "text_delta") {
+        b.el.textContent += d.text || "";
+        state.assistantText += d.text || "";
+        scrollDown();
+      } else if (d.type === "thinking_delta") {
+        b.el.textContent += d.thinking || "";
+        scrollDown();
+      } else if (d.type === "input_json_delta") {
+        b.jsonBuf += d.partial_json || "";
+      }
+    } else if (t === "content_block_stop") {
+      const b = state.blocks[ev.index];
+      if (!b) return;
+      if (b.type === "tool_use") {
+        let input = null;
+        if (b.jsonBuf && b.jsonBuf.trim()) {
+          try { input = JSON.parse(b.jsonBuf); }
+          catch { input = b.jsonBuf; }
+        } else if (b.seedInput !== undefined && b.seedInput !== null) {
+          input = b.seedInput;
+        }
+        b.el.textContent = input === null
+          ? "(no input)"
+          : (typeof input === "string"
+            ? input
+            : JSON.stringify(input, null, 2));
+      }
+    } else if (t === "error") {
+      const msg = (ev.error && ev.error.message) || "stream error";
+      appendError(msg);
+    }
+  }
+
+  async function send() {
+    const text = textEl.value.trim();
+    if (!text) return;
+    const model = agentSelect.value;
+    if (!model) { appendError("no agent selected"); return; }
+
+    apiMessages.push({ role: "user", content: text });
+    appendUser(text);
+    textEl.value = "";
+    sendBtn.disabled = true;
+
+    const container = appendAssistant();
+    const state = { container, blocks: {}, assistantText: "" };
+
+    let resp;
+    try {
+      resp = await fetch("/chat/send", {
+        method: "POST",
+        headers: {
+          "Content-Type": "application/json",
+          "X-CSRF-Token": CSRF,
+        },
+        body: JSON.stringify({ model, messages: apiMessages }),
+      });
+    } catch (e) {
+      appendError("network error: " + e.message);
+      sendBtn.disabled = false;
+      return;
+    }
+    if (!resp.ok || !resp.body) {
+      let msg = resp.status + " " + resp.statusText;
+      try { const body = await resp.text(); if (body) msg = body; } catch {}
+      appendError(msg);
+      sendBtn.disabled = false;
+      return;
+    }
+
+    const reader = resp.body.getReader();
+    const dec = new TextDecoder();
+    let buf = "";
+    while (true) {
+      const { value, done } = await reader.read();
+      if (done) break;
+      buf += dec.decode(value, { stream: true });
+      let i;
+      while ((i = buf.indexOf("\n\n")) >= 0) {
+        const raw = buf.slice(0, i);
+        buf = buf.slice(i + 2);
+        if (!raw) continue;
+        let dataLine = "";
+        for (const line of raw.split("\n")) {
+          if (line.startsWith("data:")) dataLine += line.slice(5).trimStart();
+        }
+        if (!dataLine) continue;
+        let payload;
+        try { payload = JSON.parse(dataLine); } catch { continue; }
+        applyEvent(state, payload);
+      }
+    }
+
+    if (state.assistantText) {
+      apiMessages.push({ role: "assistant", content: state.assistantText });
+    }
+    sendBtn.disabled = false;
+    textEl.focus();
+  }
+
+  form.addEventListener("submit", (e) => { e.preventDefault(); send(); });
+  textEl.addEventListener("keydown", (e) => {
+    if ((e.metaKey || e.ctrlKey) && e.key === "Enter") {
+      e.preventDefault();
+      send();
+    }
+  });
+})();
+</script>
+{% endblock %}
@@ -0,0 +1,430 @@
+{% extends "_layout.html" %}
+{% set active = "dashboard" %}
+{% block title %}beaver-gateway · dashboard{% endblock %}
+{% block content %}
+  <h2>Agents</h2>
+  <div class="card">
+    {% if agents %}
+    <table>
+      <thead><tr><th>Name</th><th>Type</th><th>Model</th><th>Exposed MCPs</th></tr></thead>
+      <tbody>
+        {% for a in agents %}
+        <tr>
+          <td><code>{{ a.name }}</code></td>
+          <td><span class="pill">{{ a.__class__.__name__ }}</span></td>
+          <td><code>{{ a.model }}</code></td>
+          <td>
+            {% for em in a.expose_mcps %}<code>{{ em.name }}</code>{% if not loop.last %}, {% endif %}{% else %}<span class="muted">—</span>{% endfor %}
+          </td>
+        </tr>
+        {% endfor %}
+      </tbody>
+    </table>
+    {% else %}
+      <p class="muted">No agents configured.</p>
+    {% endif %}
+  </div>
+
+  <h2>MCP namespaces</h2>
+  <div class="card">
+    {% if mcps %}
+    <table>
+      <thead><tr><th>Name</th><th>Kind</th></tr></thead>
+      <tbody>
+        {% for m in mcps %}
+        <tr>
+          <td><code>{{ m.name }}</code></td>
+          <td><span class="pill">{{ m.kind }}</span></td>
+        </tr>
+        {% endfor %}
+      </tbody>
+    </table>
+    {% else %}
+      <p class="muted">No MCP servers configured.</p>
+    {% endif %}
+  </div>
+
+  <h2>Endpoints</h2>
+  <div class="card endpoints">
+    {% if endpoints.agents or endpoints.mcps %}
+    <div class="ep-controls">
+      <label class="ep-token">
+        <span>Token (hint)</span>
+        <select id="ep-token-select">
+          <option value="" data-scope="">— select a known token —</option>
+          {% for t in tokens %}
+          <option value="{{ t.name }}" data-scope="{{ t.scope }}">{{ t.name }} · scope {{ t.scope }}</option>
+          {% endfor %}
+        </select>
+      </label>
+      <label class="ep-secret">
+        <span>Bearer secret</span>
+        <input type="password" id="ep-token-secret" autocomplete="off" spellcheck="false"
+               placeholder="paste plaintext (we only store the Argon2 hash)">
+      </label>
+      <label class="ep-show">
+        <input type="checkbox" id="ep-show-secret"> <span>show</span>
+      </label>
+    </div>
+    <p class="muted ep-note">
+      Beaver stores only an Argon2 hash of each token, so the plaintext can't be reconstructed.
+      Paste the value you saved at creation; if you've lost it, <a href="/tokens">mint a new one</a>.
+      Below: pick a token to see which endpoints its scope covers, paste the secret to fill it
+      into URL / curl, then click Copy.
+    </p>
+
+    {% if endpoints.agents %}
+    <h3 class="ep-h3">Agents — <code>POST /v1/messages</code></h3>
+    <table class="ep-table" data-required-scope="messages">
+      <thead><tr><th>Agent</th><th>Model</th><th>URL</th><th class="ep-actions-th"></th></tr></thead>
+      <tbody>
+        {% for ep in endpoints.agents %}
+        <tr class="ep-row" data-kind="messages" data-agent="{{ ep.agent }}" data-url="{{ ep.url }}">
+          <td>
+            <code>{{ ep.agent }}</code>
+            <span class="pill">{{ ep.agent_type }}</span>
+            <span class="ep-scope-warn" hidden>scope mismatch</span>
+          </td>
+          <td><code>{{ ep.model }}</code></td>
+          <td><code class="ep-url">{{ ep.url }}</code></td>
+          <td class="ep-actions">
+            <button type="button" data-action="copy-url">Copy URL</button>
+            <button type="button" data-action="copy-curl">Copy curl</button>
+            <button type="button" data-action="toggle-curl" aria-expanded="false">▸ curl</button>
+          </td>
+        </tr>
+        <tr class="ep-curl-row" hidden><td colspan="4"><pre class="ep-curl"></pre></td></tr>
+        {% endfor %}
+      </tbody>
+    </table>
+    {% elif endpoints.anthropic_base is none and agents %}
+    <p class="muted">
+      Agents are declared but no <code>AnthropicMessagesFrontend</code> is configured —
+      add one to <code>Gateway(frontends=[...])</code> to expose them over HTTP.
+    </p>
+    {% endif %}
+
+    {% if endpoints.mcps %}
+    <h3 class="ep-h3">MCP — streamable HTTP</h3>
+    <table class="ep-table" data-required-scope="mcp">
+      <thead><tr><th>Namespace</th><th>Kind</th><th>URL</th><th class="ep-actions-th"></th></tr></thead>
+      <tbody>
+        {% for ep in endpoints.mcps %}
+        <tr class="ep-row" data-kind="mcp" data-namespace="{{ ep.namespace }}" data-url="{{ ep.url }}">
+          <td>
+            <code>{{ ep.namespace }}</code>
+            <span class="ep-scope-warn" hidden>scope mismatch</span>
+          </td>
+          <td><span class="pill">{{ ep.kind }}</span></td>
+          <td><code class="ep-url">{{ ep.url }}</code></td>
+          <td class="ep-actions">
+            <button type="button" data-action="copy-url">Copy URL</button>
+            <button type="button" data-action="copy-url-token">Copy URL+token</button>
+            <button type="button" data-action="copy-curl">Copy curl</button>
+            <button type="button" data-action="toggle-curl" aria-expanded="false">▸ curl</button>
+          </td>
+        </tr>
+        <tr class="ep-curl-row" hidden><td colspan="4"><pre class="ep-curl"></pre></td></tr>
+        {% endfor %}
+      </tbody>
+    </table>
+    {% elif endpoints.mcp_base is none and mcps %}
+    <p class="muted">
+      MCP servers are declared but no <code>McpServerFrontend</code> is configured —
+      add one to <code>Gateway(frontends=[...])</code> to expose them over HTTP.
+    </p>
+    {% endif %}
+    {% else %}
+    <p class="muted">
+      Nothing exposed yet — declare agents / MCPs and the matching frontends
+      (<code>AnthropicMessagesFrontend</code>, <code>McpServerFrontend</code>) in your config.
+    </p>
+    {% endif %}
+  </div>
+
+  <h2>Recent activity</h2>
+  <div class="card">
+    {% if audit %}
+    <table>
+      <thead><tr><th>Time</th><th>Actor</th><th>Kind</th><th>Agent</th><th>Detail</th></tr></thead>
+      <tbody>
+        {% for row in audit %}
+        <tr>
+          <td><code>{{ row.ts | fmt_dt }}</code></td>
+          <td>{{ row.actor }}</td>
+          <td><span class="pill">{{ row.kind }}</span></td>
+          <td>{{ row.agent_name or "—" }}</td>
+          <td><code>{{ row.detail_json | fmt_detail }}</code></td>
+        </tr>
+        {% endfor %}
+      </tbody>
+    </table>
+    <p class="muted" style="margin-top:0.85rem;"><a href="/audit">Full log →</a></p>
+    {% else %}
+      <p class="muted">Nothing logged yet.</p>
+    {% endif %}
+  </div>
+
+<style>
+  .endpoints .ep-controls {
+    display: grid;
+    grid-template-columns: minmax(220px, 1fr) 2fr auto;
+    gap: 0.85rem;
+    align-items: end;
+    margin-bottom: 0.5rem;
+  }
+  .endpoints .ep-controls label > span {
+    display: block;
+    font-size: 0.8em; color: var(--muted);
+    text-transform: uppercase; letter-spacing: 0.05em;
+    margin-bottom: 0.3rem;
+  }
+  .endpoints .ep-show {
+    display: flex; align-items: center; gap: 0.4rem;
+    padding-bottom: 0.6rem; color: var(--muted); font-size: 0.9em;
+  }
+  .endpoints .ep-note {
+    font-size: 0.85em; margin: 0.25rem 0 1.2rem;
+  }
+  .endpoints .ep-h3 {
+    font-size: 0.95rem; font-weight: 600;
+    margin: 1.5rem 0 0.55rem;
+  }
+  .endpoints .ep-table { table-layout: auto; }
+  .endpoints .ep-table th.ep-actions-th { width: 1%; }
+  .endpoints .ep-url {
+    word-break: break-all;
+  }
+  .endpoints .ep-actions {
+    white-space: nowrap;
+    text-align: right;
+  }
+  .endpoints .ep-actions button {
+    padding: 0.32rem 0.7rem;
+    font-size: 0.85em;
+    margin-left: 0.3rem;
+  }
+  .endpoints .ep-scope-warn {
+    display: inline-block;
+    margin-left: 0.5rem;
+    padding: 0.05rem 0.45rem;
+    background: #fff7e6;
+    border: 1px solid #fadf9c;
+    color: #8a5a00;
+    border-radius: 999px;
+    font-size: 0.72em;
+  }
+  .endpoints .ep-row.scope-mismatch td:not(.ep-actions) { opacity: 0.55; }
+  .endpoints .ep-row.scope-mismatch .ep-actions button { opacity: 0.7; }
+  .endpoints .ep-curl-row td { padding-top: 0; padding-bottom: 0; }
+  .endpoints .ep-curl {
+    margin: 0.4rem 0 1rem;
+    font-size: 0.82em;
+    max-height: 280px;
+  }
+  .endpoints button[data-copied="1"] {
+    background: #ecfdf3 !important;
+    border-color: #b6e6cb !important;
+    color: #027a48;
+  }
+  @media (max-width: 760px) {
+    .endpoints .ep-controls { grid-template-columns: 1fr; }
+    .endpoints .ep-actions { text-align: left; padding-top: 0.4rem; }
+  }
+</style>
+
+<script>
+(() => {
+  const card = document.querySelector(".endpoints");
+  if (!card) return;
+  const sel = card.querySelector("#ep-token-select");
+  const secret = card.querySelector("#ep-token-secret");
+  const showCb = card.querySelector("#ep-show-secret");
+  if (!sel || !secret || !showCb) return;
+
+  const PLACEHOLDER = "<YOUR_TOKEN>";
+
+  function currentToken() {
+    const v = secret.value;
+    return v && v.length > 0 ? v : null;
+  }
+
+  function selectedScope() {
+    const o = sel.options[sel.selectedIndex];
+    return o ? (o.getAttribute("data-scope") || "") : "";
+  }
+
+  function scopeCovers(have, need) {
+    if (!have) return true;          // no token selected — don't grey anything out
+    if (have === "*") return true;
+    return have === need;
+  }
+
+  function escSh(s) {
+    // single-quote shell escape: close, escape, reopen.
+    return "'" + String(s).replace(/'/g, "'\\''") + "'";
+  }
+
+  function buildCurl(row) {
+    const kind = row.getAttribute("data-kind");
+    const url = row.getAttribute("data-url");
+    const tok = currentToken() || PLACEHOLDER;
+    if (kind === "messages") {
+      const agent = row.getAttribute("data-agent");
+      const body = JSON.stringify({
+        model: agent,
+        messages: [{ role: "user", content: "hello" }],
+        stream: true,
+      });
+      return [
+        "curl -N \\",
+        "  -H " + escSh("x-api-key: " + tok) + " \\",
+        "  -H 'content-type: application/json' \\",
+        "  -d " + escSh(body) + " \\",
+        "  " + url,
+      ].join("\n");
+    }
+    if (kind === "mcp") {
+      // Streamable-HTTP MCP wants `initialize` first — the response
+      // carries the `Mcp-Session-Id` header you must echo back on
+      // every subsequent call (tools/list, tools/call, ...). We use
+      // `-i` so the session-id is visible in the response, and ship
+      // the proper handshake body so a fresh paste actually works.
+      const body = JSON.stringify({
+        jsonrpc: "2.0",
+        id: 1,
+        method: "initialize",
+        params: {
+          protocolVersion: "2024-11-05",
+          capabilities: {},
+          clientInfo: { name: "beaver-admin-curl", version: "0" },
+        },
+      });
+      return [
+        "# 1) initialize — grab Mcp-Session-Id from the response headers",
+        "curl -N -i \\",
+        "  -H " + escSh("Authorization: Bearer " + tok) + " \\",
+        "  -H 'content-type: application/json' \\",
+        "  -H 'accept: application/json, text/event-stream' \\",
+        "  -d " + escSh(body) + " \\",
+        "  " + url,
+        "",
+        "# 2) reuse that id on follow-up calls, e.g. tools/list:",
+        "# curl -N \\",
+        "#   -H " + escSh("Authorization: Bearer " + tok) + " \\",
+        "#   -H 'Mcp-Session-Id: <paste-from-step-1>' \\",
+        "#   -H 'content-type: application/json' \\",
+        "#   -H 'accept: application/json, text/event-stream' \\",
+        "#   -d '{\"jsonrpc\":\"2.0\",\"id\":2,\"method\":\"tools/list\"}' \\",
+        "#   " + url,
+      ].join("\n");
+    }
+    return "";
+  }
+
+  function refreshScopeMarks() {
+    const have = selectedScope();
+    card.querySelectorAll(".ep-table").forEach((tbl) => {
+      const need = tbl.getAttribute("data-required-scope") || "";
+      const ok = scopeCovers(have, need);
+      tbl.querySelectorAll("tr.ep-row").forEach((row) => {
+        const warn = row.querySelector(".ep-scope-warn");
+        if (!ok) {
+          row.classList.add("scope-mismatch");
+          if (warn) warn.hidden = false;
+        } else {
+          row.classList.remove("scope-mismatch");
+          if (warn) warn.hidden = true;
+        }
+      });
+    });
+  }
+
+  function refreshOpenCurls() {
+    card.querySelectorAll("tr.ep-curl-row").forEach((cr) => {
+      if (cr.hidden) return;
+      const row = cr.previousElementSibling;
+      const pre = cr.querySelector(".ep-curl");
+      if (row && pre) pre.textContent = buildCurl(row);
+    });
+  }
+
+  function copyText(btn, text) {
+    const done = () => {
+      btn.setAttribute("data-copied", "1");
+      const prev = btn.textContent;
+      btn.textContent = "✓ copied";
+      setTimeout(() => {
+        btn.removeAttribute("data-copied");
+        btn.textContent = prev;
+      }, 1200);
+    };
+    if (navigator.clipboard && navigator.clipboard.writeText) {
+      navigator.clipboard.writeText(text).then(done, () => {
+        // Fallback for non-secure contexts.
+        fallbackCopy(text);
+        done();
+      });
+    } else {
+      fallbackCopy(text);
+      done();
+    }
+  }
+  function fallbackCopy(text) {
+    const ta = document.createElement("textarea");
+    ta.value = text;
+    ta.style.position = "fixed"; ta.style.opacity = "0";
+    document.body.appendChild(ta);
+    ta.select();
+    try { document.execCommand("copy"); } catch {}
+    document.body.removeChild(ta);
+  }
+
+  sel.addEventListener("change", () => {
+    const name = sel.value;
+    secret.placeholder = name
+      ? "paste plaintext for '" + name + "' (we only store the hash)"
+      : "paste plaintext (we only store the Argon2 hash)";
+    refreshScopeMarks();
+  });
+  secret.addEventListener("input", refreshOpenCurls);
+  showCb.addEventListener("change", () => {
+    secret.type = showCb.checked ? "text" : "password";
+  });
+
+  card.addEventListener("click", (ev) => {
+    const btn = ev.target.closest("button[data-action]");
+    if (!btn) return;
+    const row = btn.closest("tr.ep-row");
+    if (!row) return;
+    const action = btn.getAttribute("data-action");
+    const url = row.getAttribute("data-url");
+    const tok = currentToken() || PLACEHOLDER;
+    if (action === "copy-url") {
+      copyText(btn, url);
+    } else if (action === "copy-url-token") {
+      const sep = url.indexOf("?") >= 0 ? "&" : "?";
+      copyText(btn, url + sep + "token=" + encodeURIComponent(tok));
+    } else if (action === "copy-curl") {
+      copyText(btn, buildCurl(row));
+    } else if (action === "toggle-curl") {
+      const curlRow = row.nextElementSibling;
+      if (!curlRow || !curlRow.classList.contains("ep-curl-row")) return;
+      const pre = curlRow.querySelector(".ep-curl");
+      if (curlRow.hidden) {
+        if (pre) pre.textContent = buildCurl(row);
+        curlRow.hidden = false;
+        btn.textContent = "▾ curl";
+        btn.setAttribute("aria-expanded", "true");
+      } else {
+        curlRow.hidden = true;
+        btn.textContent = "▸ curl";
+        btn.setAttribute("aria-expanded", "false");
+      }
+    }
+  });
+
+  refreshScopeMarks();
+})();
+</script>
+{% endblock %}
@@ -0,0 +1,83 @@
+<!doctype html>
+<html lang="en">
+<head>
+  <meta charset="utf-8">
+  <meta name="viewport" content="width=device-width, initial-scale=1">
+  <title>beaver-gateway · log in</title>
+  <style>
+    html, body { margin: 0; padding: 0; height: 100%; background: #fbfbfd; color: #1d1d1f; }
+    body {
+      font-family: -apple-system, "SF Pro Display", system-ui, sans-serif;
+      display: flex;
+      align-items: center;
+      justify-content: center;
+      min-height: 100vh;
+    }
+    .card {
+      background: #fff;
+      border: 1px solid #e5e5ea;
+      border-radius: 16px;
+      padding: 2rem 2.25rem;
+      width: min(360px, 92vw);
+      box-shadow: 0 10px 24px rgba(0,0,0,0.04);
+    }
+    h1 {
+      margin: 0 0 0.25rem;
+      font-size: 1.25rem;
+      font-weight: 600;
+      letter-spacing: -0.02em;
+    }
+    p.muted { color: #6e6e73; margin: 0 0 1.4rem; font-size: 0.92em; }
+    label { display: block; margin: 0.85rem 0 0.3rem; font-size: 0.82em; color: #6e6e73; }
+    input {
+      width: 100%;
+      padding: 0.6rem 0.75rem;
+      font-family: inherit;
+      font-size: 0.95em;
+      border: 1px solid #e5e5ea;
+      border-radius: 8px;
+      background: #fff;
+      box-sizing: border-box;
+    }
+    input:focus { outline: 2px solid #0071e3; outline-offset: 1px; }
+    button {
+      width: 100%;
+      margin-top: 1.25rem;
+      padding: 0.65rem;
+      font-family: inherit;
+      font-size: 0.95em;
+      background: #0071e3;
+      border: 1px solid #0071e3;
+      color: white;
+      border-radius: 8px;
+      cursor: pointer;
+    }
+    button:hover { background: #005bb5; }
+    .error {
+      background: #fff0f0;
+      border: 1px solid #f5c2c5;
+      color: #d70015;
+      padding: 0.65rem 0.85rem;
+      border-radius: 8px;
+      font-size: 0.9em;
+      margin-bottom: 1rem;
+    }
+  </style>
+</head>
+<body>
+  <div class="card">
+    <h1>beaver-gateway</h1>
+    <p class="muted">Sign in to manage tokens and view audit logs.</p>
+    {% if error %}
+      <div class="error">{{ error }}</div>
+    {% endif %}
+    <form method="post" action="/login">
+      <label for="username">Username</label>
+      <input id="username" name="username" type="text" autocomplete="username" required autofocus>
+      <label for="password">Password</label>
+      <input id="password" name="password" type="password" autocomplete="current-password" required>
+      <button type="submit">Sign in</button>
+    </form>
+  </div>
+</body>
+</html>
@@ -0,0 +1,64 @@
+{% extends "_layout.html" %}
+{% set active = "tokens" %}
+{% block title %}beaver-gateway · tokens{% endblock %}
+{% block content %}
+  <h2>Create token</h2>
+  <div class="card">
+    <p class="muted">Plaintext is shown <strong>once</strong>, immediately after creation. Copy it before you navigate away — the database only ever holds the Argon2 hash.</p>
+    <div id="token-create-result"></div>
+    <form
+      hx-post="/tokens"
+      hx-target="#token-create-result"
+      hx-swap="innerHTML"
+      hx-on::after-request="if(event.detail.successful){this.reset();}"
+    >
+      <input type="hidden" name="csrf_token" value="{{ csrf }}">
+      <div class="form-grid">
+        <div>
+          <label for="name">Name</label>
+          <input id="name" name="name" type="text" placeholder="cursor / claude-desktop / mobile …" required>
+        </div>
+        <div>
+          <label for="scope">Scope</label>
+          <select id="scope" name="scope">
+            {% for s in scopes %}
+              <option value="{{ s }}" {% if s == "*" %}selected{% endif %}>{{ s }}</option>
+            {% endfor %}
+          </select>
+        </div>
+        <div>
+          <button class="primary" type="submit">Create</button>
+        </div>
+      </div>
+    </form>
+  </div>
+
+  <h2>
+    Tokens
+    <span class="muted" style="font-size:0.8em; font-weight:400; margin-left:0.5rem;">
+      {% if include_revoked %}
+        <a href="/tokens">Hide revoked</a>
+      {% else %}
+        <a href="/tokens?include_revoked=1">Show revoked</a>
+      {% endif %}
+    </span>
+  </h2>
+  <div class="card">
+    <table>
+      <thead>
+        <tr>
+          <th>Name</th><th>Scope</th><th>Created</th><th>Last used</th><th>Revoked</th><th></th>
+        </tr>
+      </thead>
+      {# Render the tbody unconditionally so the HTMX OOB swap on
+         create has a target even when the table starts empty. #}
+      <tbody id="tokens-rows">
+        {% for token in tokens %}
+          {% include "_token_row.html" %}
+        {% else %}
+          <tr id="tokens-empty"><td colspan="6" class="muted">No tokens yet. Create one above.</td></tr>
+        {% endfor %}
+      </tbody>
+    </table>
+  </div>
+{% endblock %}
@@ -40,6 +40,7 @@ from anthropic.types import (
 from fastapi import FastAPI, HTTPException, Request, status
 from fastapi.responses import JSONResponse, StreamingResponse

+from beaver_gateway.core import audit
 from beaver_gateway.frontends.base import Frontend

 if TYPE_CHECKING:
@@ -58,9 +59,24 @@ __all__ = ["AnthropicMessagesFrontend"]
 class AnthropicMessagesFrontend(Frontend):
    """FastAPI app behind ``POST /v1/messages`` + ``GET /v1/models``."""

-    def __init__(self, *, host: str = "0.0.0.0", port: int = 8000) -> None:  # noqa: S104
+    def __init__(
+        self,
+        *,
+        host: str = "0.0.0.0",  # noqa: S104
+        port: int = 8000,
+        public_base_url: str | None = None,
+    ) -> None:
        self.host = host
        self.port = port
+        # External URL prefix the reverse proxy (Caddy/nginx/Cloudflare/…)
+        # uses to reach this frontend, e.g. ``https://api.example.com/ai``.
+        # The frontend's internal paths (``/v1/messages``, ``/v1/models``)
+        # are appended to it when the admin dashboard renders copy-pastable
+        # URLs. Trailing slash is stripped so the concatenation is
+        # idempotent. ``None`` means "advertise raw ``host:port``" (dev /
+        # no proxy) — the dashboard then derives the base from the
+        # browser's own request hostname.
+        self.public_base_url = public_base_url.rstrip("/") if public_base_url else None
        self._runtime: GatewayRuntime | None = None
        self._app: FastAPI | None = None

@@ -93,7 +109,7 @@ class AnthropicMessagesFrontend(Frontend):

        @app.get("/v1/models")
        async def list_models(request: Request) -> dict[str, Any]:
-            _require_token(request, runtime)
+            await _require_token(request, runtime, scope="messages")
            data = [
                {
                    "type": "model",
@@ -107,7 +123,7 @@ class AnthropicMessagesFrontend(Frontend):

        @app.post("/v1/messages")
        async def create_message(request: Request) -> Any:
-            token_name = _require_token(request, runtime)
+            token_name = await _require_token(request, runtime, scope="messages")
            try:
                body = await request.json()
            except json.JSONDecodeError as exc:
@@ -148,6 +164,18 @@ class AnthropicMessagesFrontend(Frontend):
                stream_flag,
                len(messages),
            )
+            # Record at request acceptance, not at stream completion:
+            # a long streaming response can be aborted mid-flight by
+            # the client, and we still want the row in the audit trail.
+            # Detail stays small — no message bodies, no system prompt.
+            await audit.log(
+                runtime,
+                actor=f"token:{token_name}",
+                kind="messages",
+                agent_name=agent.name,
+                stream=stream_flag,
+                msgs=len(messages),
+            )

            # Forward per-request knobs the Anthropic body may carry —
            # backend adapters layer these over per-agent defaults. Only
@@ -166,37 +194,45 @@ class AnthropicMessagesFrontend(Frontend):
            )

            if stream_flag:
-                return StreamingResponse(
-                    _sse(events), media_type="text/event-stream"
-                )
+                return StreamingResponse(_sse(events), media_type="text/event-stream")
            message = await _accumulate(events, model=model)
            return JSONResponse(content=message.model_dump(mode="json"))

        return app


-def _require_token(request: Request, runtime: GatewayRuntime) -> str:
-    """Verify the request's bearer and return the token's audit name.
+async def _require_token(
+    request: Request, runtime: GatewayRuntime, *, scope: str
+) -> str:
+    """Verify the request's bearer + scope, return the token's audit name.

    Accepts both ``X-Api-Key: <token>`` (what the official Anthropic
    SDK sends — LibreChat, the CLI, third-party clients) and
-    ``Authorization: Bearer <token>`` (curl, Cursor). Raises 401 on
-    miss. ``TokenStore`` doesn't know about HTTP, so response shape
-    is owned here.
+    ``Authorization: Bearer <token>`` (curl, Cursor). 401 on missing /
+    unknown token; 403 on a known token whose scope doesn't cover
+    ``scope`` (Phase 4.3 — bootstrap tokens get ``"*"`` and pass
+    everything).
    """
    api_key = request.headers.get("x-api-key")
-    name = (
-        runtime.token_store.verify(api_key)
+    identity = (
+        await runtime.token_store.verify(api_key)
        if api_key
-        else runtime.token_store.verify_bearer(request.headers.get("authorization"))
+        else await runtime.token_store.verify_bearer(
+            request.headers.get("authorization")
        )
-    if name is None:
+    )
+    if identity is None:
        raise HTTPException(
            status.HTTP_401_UNAUTHORIZED,
            "invalid or missing bearer token",
            headers={"WWW-Authenticate": "Bearer"},
        )
-    return name
+    if not identity.allows(scope):
+        raise HTTPException(
+            status.HTTP_403_FORBIDDEN,
+            f"token scope {identity.scope!r} does not cover {scope!r}",
+        )
+    return identity.name


 async def _sse(events: AsyncIterator[MessageStreamEvent]) -> AsyncIterator[bytes]:
@@ -15,11 +15,12 @@ from dataclasses import dataclass, field
 from typing import TYPE_CHECKING

 if TYPE_CHECKING:
-    from collections.abc import Mapping
+    from collections.abc import Mapping, Sequence

    from beaver_gateway.backends.base import Backend
    from beaver_gateway.core.auth import TokenStore
    from beaver_gateway.core.registry import AgentRegistry, McpRegistry
+    from beaver_gateway.storage import Database


@dataclass(frozen=True, slots=True)
@@ -35,13 +36,32 @@ class GatewayRuntime:
    declared ``McpServer`` so ``ClaudeCodeBackendAdapter`` (Phase 2.2)
    can pass them to ``BackendOptions.mcp_servers`` without re-running
    discovery.
+
+    ``db`` (Phase 4.1) is the shared :class:`Database` handle. Phase 4.2
+    will switch ``TokenStore`` to read from it; Phase 4.3 admin/audit
+    write through it. Phase 4.1 only attaches it — existing frontends
+    ignore it.
    """

    agents: AgentRegistry
    mcps: McpRegistry
    backends: dict[str, Backend]
    token_store: TokenStore
+    db: Database
    mcp_internal_urls: Mapping[str, str] = field(default_factory=dict)
+    # Phase 4.3 — AdminFrontend reads creds + cookie-signing key from
+    # the runtime so the user's ``config.py`` doesn't have to know
+    # anything about env wiring. Defaulted to empty so existing tests /
+    # call sites that don't touch the admin path keep building; the
+    # admin frontend ``configure()`` itself rejects empty values.
+    admin_user: str = ""
+    admin_pass: str = ""
+    session_secret: str = ""
+    # The full sibling-frontends list, in declaration order. AdminFrontend
+    # uses it to advertise concrete bearer-endpoint URLs (host/port) on
+    # the dashboard so the operator can copy ready-to-use links / curl
+    # snippets. Other frontends ignore it.
+    frontends: Sequence[Frontend] = field(default_factory=tuple)


 class Frontend(ABC):
@@ -1,11 +1,18 @@
 """External MCP frontend (Phase 3.1).

 A streamable-HTTP gateway in front of the internal MCP aggregator
-(``beaver_gateway.mcp.internal_app``). The aggregator already hosts
-every declared ``McpServer`` (``python_tool``, stdio proxy, HTTP proxy)
+(``beaver_gateway.mcp.internal_app``). The aggregator hosts every
+declared ``McpServer`` (``python_tool``, stdio proxy, HTTP proxy)
 under ``/mcp/<name>`` plus a flat ``/mcp/all`` bundle on
-``127.0.0.1:INTERNAL_MCP_PORT``. This frontend re-exposes those URLs to
-external clients with three additions:
+``127.0.0.1:INTERNAL_MCP_PORT`` — that's the *internal* shape.
+
+This frontend re-exposes those namespaces on its own port directly at
+``/<name>/`` (no ``/mcp/`` prefix in the external routes — the port
+itself already disambiguates). Caddy / nginx / Cloudflare in front
+typically strips a prefix back on: ``domain.com/mcp/* → :8001/*``,
+controlled by the operator's reverse-proxy config and surfaced to the
+admin dashboard via ``public_base_url``. Three additions on top of the
+raw aggregator:

 * **Bearer auth** — ``Authorization: Bearer <token>``, ``X-Api-Key``,
  or ``?token=<…>`` query string. All three forms verify against the
@@ -39,6 +46,7 @@ from starlette.applications import Starlette
 from starlette.responses import HTMLResponse, JSONResponse, StreamingResponse
 from starlette.routing import Route

+from beaver_gateway.core import audit
 from beaver_gateway.frontends.base import Frontend
 from beaver_gateway.mcp.internal_app import ALL_NAMESPACE

@@ -86,9 +94,30 @@ __all__ = ["McpServerFrontend"]
 class McpServerFrontend(Frontend):
    """Auth + audit + reverse-proxy in front of the internal MCP aggregator."""

-    def __init__(self, *, host: str = "0.0.0.0", port: int = 8001) -> None:  # noqa: S104
+    def __init__(
+        self,
+        *,
+        host: str = "0.0.0.0",  # noqa: S104
+        port: int = 8001,
+        public_base_url: str | None = None,
+    ) -> None:
        self.host = host
        self.port = port
+        # External URL prefix the reverse proxy uses to reach this
+        # frontend. Internal routes mount namespaces at the port root
+        # (``/<ns>/`` and ``/all/``) — Caddy / nginx / Cloudflare in
+        # front decides what external prefix they sit under. Typical
+        # symmetric setup:
+        #
+        #     Caddy: handle_path /mcp/* { reverse_proxy localhost:8001 }
+        #     config: public_base_url -> https://api.example.com/mcp
+        #     dashboard advertises: https://api.example.com/mcp/<ns>/
+        #
+        # The frontend's ``/<ns>/`` segment gets appended verbatim. Set
+        # it to whatever matches your proxy. ``None`` means "advertise
+        # raw ``host:port`` derived from the inbound request" (dev /
+        # no proxy).
+        self.public_base_url = public_base_url.rstrip("/") if public_base_url else None
        self._runtime: GatewayRuntime | None = None
        self._app: Starlette | None = None
        # Single shared aiohttp session, opened once when uvicorn starts
@@ -126,7 +155,7 @@ class McpServerFrontend(Frontend):
                # Long sock_read because MCP tool calls can take a while
                # (a claude tool over HTTP can easily stretch beyond 30s
                # on a real tool).
-                timeout=aiohttp.ClientTimeout(total=None, sock_read=600),
+                timeout=aiohttp.ClientTimeout(total=None, sock_read=600)
            )
            try:
                yield
@@ -138,16 +167,21 @@ class McpServerFrontend(Frontend):
        routes = [
            Route("/", self._discovery, methods=["GET"]),
            Route("/healthz", self._healthz, methods=["GET"]),
-            # Two routes per namespace so both the trailing-slash and
-            # sub-path forms work (``/mcp/time`` AND ``/mcp/time/foo``).
-            # Starlette doesn't fold them into one route automatically.
+            # Namespaces mount at the root of this port — the port
+            # itself already disambiguates this from any other gateway
+            # surface. Two routes per namespace so both the
+            # trailing-slash and sub-path forms work (``/time`` AND
+            # ``/time/foo``); Starlette doesn't fold them into one
+            # route automatically. The literal routes above (``/``,
+            # ``/healthz``) are listed first and win the match, so
+            # they're not eaten by ``/{namespace}``.
            Route(
-                "/mcp/{namespace}",
+                "/{namespace}",
                self._proxy_endpoint,
                methods=["GET", "POST", "DELETE", "OPTIONS"],
            ),
            Route(
-                "/mcp/{namespace}/{path:path}",
+                "/{namespace}/{path:path}",
                self._proxy_endpoint,
                methods=["GET", "POST", "DELETE", "OPTIONS"],
            ),
@@ -159,17 +193,19 @@ class McpServerFrontend(Frontend):

    async def _discovery(self, request: Request) -> HTMLResponse | JSONResponse:
        runtime = self._require_runtime()
-        token_name = _verify_request(request, runtime)
-        if token_name is None:
-            return _unauthorized()
-        # Use the request's own scheme+host so the snippets work behind
-        # reverse proxies / tunnels. Falls back to the configured
-        # host:port if the client didn't send Host (curl --raw).
-        base = _external_base_url(request, self.host, self.port)
+        token_name, err = await _verify_request(request, runtime)
+        if err is not None:
+            return err
+        assert token_name is not None  # noqa: S101 — narrow for ty
+        # ``public_base_url`` wins if configured — it's the operator's
+        # explicit statement of "this is the URL my reverse proxy puts
+        # in front of me". Otherwise: use the request's own scheme+host
+        # so snippets work behind generic reverse proxies / tunnels;
+        # and fall back to the configured host:port if the client
+        # didn't send Host (curl --raw).
+        base = self.public_base_url or _external_base_url(request, self.host, self.port)
        html = _render_discovery_page(
-            base_url=base,
-            namespaces=list(runtime.mcps),
-            actor=token_name,
+            base_url=base, namespaces=list(runtime.mcps), actor=token_name
        )
        return HTMLResponse(html)

@@ -177,9 +213,10 @@ class McpServerFrontend(Frontend):
        self, request: Request
    ) -> StreamingResponse | JSONResponse:
        runtime = self._require_runtime()
-        token_name = _verify_request(request, runtime)
-        if token_name is None:
-            return _unauthorized()
+        token_name, err = await _verify_request(request, runtime)
+        if err is not None:
+            return err
+        assert token_name is not None  # noqa: S101 — narrow for ty

        namespace = request.path_params["namespace"]
        subpath = request.path_params.get("path", "")
@@ -190,16 +227,21 @@ class McpServerFrontend(Frontend):
                token_name,
                namespace,
            )
+            await audit.log(
+                runtime,
+                actor=f"token:{token_name}",
+                kind="mcp_call",
+                namespace=namespace,
+                method=request.method,
+                status=404,
+            )
            return JSONResponse(
-                {"error": "unknown namespace", "namespace": namespace},
-                status_code=404,
+                {"error": "unknown namespace", "namespace": namespace}, status_code=404
            )

        if self._http is None:
            # Lifespan hasn't run yet (shouldn't happen with uvicorn).
-            return JSONResponse(
-                {"error": "frontend not ready"}, status_code=503
-            )
+            return JSONResponse({"error": "frontend not ready"}, status_code=503)

        return await _reverse_proxy(
            client=self._http,
@@ -207,6 +249,7 @@ class McpServerFrontend(Frontend):
            upstream_url=upstream_url,
            namespace=namespace,
            actor=token_name,
+            runtime=runtime,
        )

    def _upstream_url(self, namespace: str, subpath: str) -> str | None:
@@ -233,23 +276,38 @@ class McpServerFrontend(Frontend):
        return self._runtime


-def _verify_request(request: Request, runtime: GatewayRuntime) -> str | None:
+_MCP_SCOPE = "mcp"
+
+
+async def _verify_request(
+    request: Request, runtime: GatewayRuntime
+) -> tuple[str | None, JSONResponse | None]:
    """Accept ``Authorization: Bearer``, ``X-Api-Key``, or ``?token=``.

    The third form is the escape hatch for clients that can only put
    secrets in the URL (claude.ai's MCP config historically did this).
-    All three roads end at the same :class:`TokenStore`.
+    All three roads end at the same :class:`TokenStore`. Returns
+    ``(actor_name, None)`` on success, ``(None, 401|403)`` otherwise
+    — the caller forwards the response as-is. Splitting auth vs scope
+    failures matters: 401 says "send me a token", 403 says "this token
+    is real but not for this endpoint".
    """
    api_key = request.headers.get("x-api-key")
    if api_key:
-        return runtime.token_store.verify(api_key)
+        identity = await runtime.token_store.verify(api_key)
+    else:
        auth_header = request.headers.get("authorization")
        if auth_header:
-        return runtime.token_store.verify_bearer(auth_header)
+            identity = await runtime.token_store.verify_bearer(auth_header)
+        else:
            qs_token = request.query_params.get("token")
-    if qs_token:
-        return runtime.token_store.verify(qs_token)
-    return None
+            identity = await runtime.token_store.verify(qs_token) if qs_token else None
+
+    if identity is None:
+        return None, _unauthorized()
+    if not identity.allows(_MCP_SCOPE):
+        return None, _forbidden(identity.scope, _MCP_SCOPE)
+    return identity.name, None


 def _unauthorized() -> JSONResponse:
@@ -260,6 +318,13 @@ def _unauthorized() -> JSONResponse:
    )


+def _forbidden(scope: str, required: str) -> JSONResponse:
+    return JSONResponse(
+        {"error": "insufficient scope", "scope": scope, "required": required},
+        status_code=403,
+    )
+
+
 def _join_subpath(base_url: str, subpath: str) -> str:
    """Concatenate the loopback URL with the proxied sub-path.

@@ -287,6 +352,7 @@ async def _reverse_proxy(
    upstream_url: str,
    namespace: str,
    actor: str,
+    runtime: GatewayRuntime,
 ) -> StreamingResponse | JSONResponse:
    """Bidirectionally stream an MCP request between client ↔ internal aggregator.

@@ -325,9 +391,16 @@ async def _reverse_proxy(
            namespace,
            exc,
        )
+        await audit.log(
+            runtime,
+            actor=f"token:{actor}",
+            kind="mcp_call",
+            namespace=namespace,
+            method=request.method,
+            status=502,
+        )
        return JSONResponse(
-            {"error": "upstream MCP unreachable", "detail": str(exc)},
-            status_code=502,
+            {"error": "upstream MCP unreachable", "detail": str(exc)}, status_code=502
        )

    _log.info(
@@ -338,6 +411,17 @@ async def _reverse_proxy(
        request.url.path,
        upstream_resp.status,
    )
+    # Audit at upstream-response time: status reflects the MCP call's
+    # outcome (200 / tool-error / 4xx). Streaming relay below may be
+    # cut short by the client, but the row is already in by then.
+    await audit.log(
+        runtime,
+        actor=f"token:{actor}",
+        kind="mcp_call",
+        namespace=namespace,
+        method=request.method,
+        status=upstream_resp.status,
+    )

    response_headers = _response_headers(upstream_resp.headers)

@@ -397,29 +481,31 @@ def _scrub_query(query: str, *, drop: frozenset[str] | set[str]) -> str:
    return urlencode(kept)


-def _render_discovery_page(
-    *, base_url: str, namespaces: list[Any], actor: str
-) -> str:
+def _render_discovery_page(*, base_url: str, namespaces: list[Any], actor: str) -> str:
    """Render the auth-gated namespace + config-snippet page.

    Inline HTML (no Jinja file) — keeps Phase 3 free of template-dir
    plumbing that Phase 4's AdminFrontend will own.
    """
    name_list = [getattr(ns, "name", str(ns)) for ns in namespaces]
-    rows = "\n".join(
+    rows = (
+        "\n".join(
            f"""        <tr>
          <td><code>{_escape(name)}</code></td>
-          <td><code>{_escape(f"{base_url}/mcp/{name}/")}</code></td>
+          <td><code>{_escape(f"{base_url}/{name}/")}</code></td>
        </tr>"""
            for name in name_list
-    ) or """        <tr><td colspan="2"><em>No MCP servers configured.</em></td></tr>"""
+        )
+        or """        \
+<tr><td colspan="2"><em>No MCP servers configured.</em></td></tr>"""
+    )
    cursor_snippet = _CURSOR_SNIPPET.format(base_url=base_url)
    claude_desktop_snippet = _CLAUDE_DESKTOP_SNIPPET.format(base_url=base_url)
    return _DISCOVERY_TEMPLATE.format(
        actor=_escape(actor),
        base_url=_escape(base_url),
        rows=rows,
-        all_url=_escape(f"{base_url}/mcp/{ALL_NAMESPACE}/"),
+        all_url=_escape(f"{base_url}/{ALL_NAMESPACE}/"),
        cursor_snippet=_escape(cursor_snippet),
        claude_desktop_snippet=_escape(claude_desktop_snippet),
    )
@@ -443,8 +529,7 @@ _DISCOVERY_TEMPLATE = "\n".join(  # noqa: FLY002 — readability beats one-strin
        "  <title>beaver-gateway · MCP discovery</title>",
        "  <style>",
        "    body {{",
-        "      font-family: -apple-system, 'SF Pro Display', system-ui,"
-        " sans-serif;",
+        "      font-family: -apple-system, 'SF Pro Display', system-ui, sans-serif;",
        "      max-width: 880px;",
        "      margin: 3rem auto;",
        "      padding: 0 1.25rem;",
@@ -502,7 +587,7 @@ _DISCOVERY_TEMPLATE = "\n".join(  # noqa: FLY002 — readability beats one-strin
 _CURSOR_SNIPPET = """{{
  "mcpServers": {{
    "beaver-time": {{
-      "url": "{base_url}/mcp/time/",
+      "url": "{base_url}/time/",
      "headers": {{ "Authorization": "Bearer <YOUR_TOKEN>" }}
    }}
  }}
@@ -513,7 +598,7 @@ _CLAUDE_DESKTOP_SNIPPET = """{{
  "mcpServers": {{
    "beaver-time": {{
      "type": "http",
-      "url": "{base_url}/mcp/time/",
+      "url": "{base_url}/time/",
      "headers": {{ "Authorization": "Bearer <YOUR_TOKEN>" }}
    }}
  }}
@@ -0,0 +1,40 @@
+"""SQLModel-backed persistence (Phase 4.1).
+
+The storage layer carries three tables — :class:`Token`, :class:`Session`,
+:class:`AuditLog` — and a thin :class:`Database` wrapper around a sync
+SQLAlchemy engine. Phase 4.2 (auth migration) and Phase 4.3 (admin UI)
+build on this; Phase 4.1 itself only schemas the data and exposes the
+``Database`` on :class:`GatewayRuntime` so later phases can reach it.
+"""
+
+from beaver_gateway.storage.db import (
+    Database,
+    append_audit,
+    close_session,
+    create_token,
+    list_active_tokens,
+    list_audit_records,
+    list_tokens,
+    revoke_token,
+    touch_session,
+    touch_token,
+    upsert_session,
+)
+from beaver_gateway.storage.models import AuditLog, Session, Token
+
+__all__ = [
+    "AuditLog",
+    "Database",
+    "Session",
+    "Token",
+    "append_audit",
+    "close_session",
+    "create_token",
+    "list_active_tokens",
+    "list_audit_records",
+    "list_tokens",
+    "revoke_token",
+    "touch_session",
+    "touch_token",
+    "upsert_session",
+]
@@ -0,0 +1,237 @@
+"""Async ``Database`` wrapper + the bare-minimum CRUD helpers.
+
+Async to match the rest of the stack (aiohttp, uvicorn, claude-code-api).
+psycopg3 has native async support — ``postgresql+psycopg://...`` works
+with ``create_async_engine`` directly. SQLite goes through ``aiosqlite``
+(``sqlite+aiosqlite://...``); user-facing config still uses the plain
+``sqlite:///`` form and we normalise the URL here, so nothing leaks into
+``.env`` / docker-compose.
+
+No repository layer (PLAN §4.1 explicitly waives it). Helpers take an
+``AsyncSession`` so callers can batch operations into one transaction
+(e.g. touch ``last_used_at`` + write an audit line on the same request).
+"""
+
+from __future__ import annotations
+
+import json
+from datetime import UTC, datetime
+from typing import TYPE_CHECKING, Any
+
+from sqlalchemy.ext.asyncio import create_async_engine
+from sqlmodel import SQLModel, select
+from sqlmodel.ext.asyncio.session import AsyncSession
+
+from beaver_gateway.storage.models import AuditLog, Session, Token
+
+if TYPE_CHECKING:
+    from collections.abc import Sequence
+
+    from sqlalchemy.ext.asyncio import AsyncEngine
+
+
+def _utcnow() -> datetime:
+    return datetime.now(UTC)
+
+
+def _to_async_url(url: str) -> str:
+    """Promote the user-facing sync URL to its async driver variant.
+
+    Users write ``sqlite:///gw.db`` or ``postgresql://...`` in ``.env``;
+    we translate to ``sqlite+aiosqlite://`` / ``postgresql+psycopg://``
+    so they don't have to know which driver we use internally.
+    """
+    if url.startswith(("sqlite+aiosqlite://", "postgresql+psycopg://")):
+        return url
+    if url.startswith("sqlite://"):
+        return "sqlite+aiosqlite://" + url[len("sqlite://") :]
+    if url.startswith("postgresql://"):
+        return "postgresql+psycopg://" + url[len("postgresql://") :]
+    if url.startswith("postgres://"):
+        return "postgresql+psycopg://" + url[len("postgres://") :]
+    return url
+
+
+class Database:
+    """Owner of the async SQLAlchemy engine.
+
+    Construct once in ``cli`` from ``settings.database_url``, ``await``
+    :meth:`create_all` at startup, hand the instance to
+    :class:`GatewayRuntime`. Frontends grab sessions via
+    :meth:`session` (an async context manager).
+
+    ``echo=False`` keeps SQL out of INFO logs (admin UI is the
+    user-facing view); ``connect_args={"check_same_thread": False}``
+    isn't needed under async sqlite — aiosqlite already runs each
+    connection on a dedicated thread.
+    """
+
+    __slots__ = ("_engine",)
+
+    def __init__(self, url: str) -> None:
+        self._engine: AsyncEngine = create_async_engine(_to_async_url(url), echo=False)
+
+    async def create_all(self) -> None:
+        """Idempotent ``CREATE TABLE IF NOT EXISTS`` for every model."""
+        async with self._engine.begin() as conn:
+            await conn.run_sync(SQLModel.metadata.create_all)
+
+    def session(self) -> AsyncSession:
+        """Open a fresh :class:`AsyncSession` (use as ``async with``).
+
+        Callers commit explicitly; we don't auto-commit on exit so a
+        request that fails mid-flight rolls back by simply not
+        committing.
+        """
+        return AsyncSession(self._engine, expire_on_commit=False)
+
+    async def dispose(self) -> None:
+        """Close the engine's connection pool. Idempotent."""
+        await self._engine.dispose()
+
+
+# ---- Token CRUD ---------------------------------------------------------
+
+
+async def list_active_tokens(session: AsyncSession) -> Sequence[Token]:
+    """Return every non-revoked token (Phase 4.2 seeds the cache from this)."""
+    stmt = select(Token).where(Token.revoked_at.is_(None))  # ty: ignore[unresolved-attribute]
+    result = await session.exec(stmt)
+    return result.all()
+
+
+async def list_tokens(
+    session: AsyncSession, *, include_revoked: bool = False
+) -> Sequence[Token]:
+    """Return tokens ordered newest-first (Phase 4.3 admin table)."""
+    stmt = select(Token).order_by(Token.created_at.desc())  # ty: ignore[unresolved-attribute]
+    if not include_revoked:
+        stmt = stmt.where(Token.revoked_at.is_(None))  # ty: ignore[unresolved-attribute]
+    result = await session.exec(stmt)
+    return result.all()
+
+
+async def create_token(
+    session: AsyncSession, *, name: str, scope: str, hashed_value: str
+) -> Token:
+    """Persist a new token. Caller hashes the plaintext before passing it in."""
+    row = Token(name=name, scope=scope, hashed_value=hashed_value)
+    session.add(row)
+    await session.commit()
+    await session.refresh(row)
+    return row
+
+
+async def revoke_token(session: AsyncSession, *, token_id: int) -> bool:
+    """Mark a token revoked. Returns ``False`` if no such row."""
+    row = await session.get(Token, token_id)
+    if row is None or row.revoked_at is not None:
+        return False
+    row.revoked_at = _utcnow()
+    session.add(row)
+    await session.commit()
+    return True
+
+
+async def touch_token(session: AsyncSession, *, token_id: int) -> None:
+    """Bump ``last_used_at``. Phase 4.2 batches these — not per-request."""
+    row = await session.get(Token, token_id)
+    if row is None:
+        return
+    row.last_used_at = _utcnow()
+    session.add(row)
+    await session.commit()
+
+
+# ---- Session bookkeeping ------------------------------------------------
+
+
+async def upsert_session(
+    session: AsyncSession, *, session_id: str, agent_name: str, fingerprint: str
+) -> Session:
+    """Insert-or-bump a Session row. Agent/fingerprint never change for an id."""
+    row = await session.get(Session, session_id)
+    if row is None:
+        row = Session(id=session_id, agent_name=agent_name, fingerprint=fingerprint)
+    else:
+        row.last_active_at = _utcnow()
+    session.add(row)
+    await session.commit()
+    await session.refresh(row)
+    return row
+
+
+async def touch_session(session: AsyncSession, *, session_id: str) -> None:
+    """Bump ``last_active_at`` without changing fingerprint/agent."""
+    row = await session.get(Session, session_id)
+    if row is None:
+        return
+    row.last_active_at = _utcnow()
+    session.add(row)
+    await session.commit()
+
+
+async def close_session(session: AsyncSession, *, session_id: str) -> bool:
+    """Mark a session closed. Returns ``False`` if no such row."""
+    row = await session.get(Session, session_id)
+    if row is None or row.closed_at is not None:
+        return False
+    row.closed_at = _utcnow()
+    session.add(row)
+    await session.commit()
+    return True
+
+
+# ---- Audit --------------------------------------------------------------
+
+
+async def append_audit(
+    session: AsyncSession,
+    *,
+    actor: str,
+    kind: str,
+    agent_name: str | None = None,
+    detail: dict[str, Any] | None = None,
+) -> AuditLog:
+    """Append-only insert. ``detail`` JSON-serialised here, not by callers."""
+    row = AuditLog(
+        actor=actor,
+        kind=kind,
+        agent_name=agent_name,
+        detail_json=json.dumps(detail or {}, separators=(",", ":")),
+    )
+    session.add(row)
+    await session.commit()
+    await session.refresh(row)
+    return row
+
+
+async def list_audit_records(
+    session: AsyncSession, *, limit: int = 50, before_id: int | None = None
+) -> Sequence[AuditLog]:
+    """Return audit entries newest-first, optionally paginated by id cursor.
+
+    ``before_id`` is a forward-only cursor: pass the smallest id from the
+    current page to fetch the next slice. Cheap because ``id`` is the
+    primary key (ordered insert).
+    """
+    stmt = select(AuditLog).order_by(AuditLog.id.desc()).limit(limit)  # ty: ignore[unresolved-attribute]
+    if before_id is not None:
+        stmt = stmt.where(AuditLog.id < before_id)  # ty: ignore[unsupported-operator]
+    result = await session.exec(stmt)
+    return result.all()
+
+
+__all__ = [
+    "Database",
+    "append_audit",
+    "close_session",
+    "create_token",
+    "list_active_tokens",
+    "list_audit_records",
+    "list_tokens",
+    "revoke_token",
+    "touch_session",
+    "touch_token",
+    "upsert_session",
+]
@@ -0,0 +1,80 @@
+"""SQLModel tables — see PRD §9.
+
+Three tables, all flat, no relationships modelled yet (Phase 4 talks
+about ``actor`` and ``agent_name`` as strings — joining audit→token by
+name is fine at this volume; we'll introduce FKs when the admin UI
+actually demands them).
+
+Datetimes are stored UTC; we set ``default_factory`` rather than relying
+on DB defaults so SQLite + Postgres behave identically. Every row that
+needs an id uses ``Optional[int]`` so SQLAlchemy can autoincrement.
+"""
+
+from __future__ import annotations
+
+from datetime import UTC, datetime
+
+from sqlmodel import Field, SQLModel
+
+
+def _utcnow() -> datetime:
+    return datetime.now(UTC)
+
+
+class Token(SQLModel, table=True):
+    """Bearer token issued to an external caller.
+
+    ``hashed_value`` holds the Argon2 hash (Phase 4.2 — until then,
+    rows are written by tests / the admin UI, not by ``TokenStore``).
+    Plaintext is shown to the user **once** at creation and then
+    discarded.
+    """
+
+    id: int | None = Field(default=None, primary_key=True)
+    name: str = Field(index=True, unique=True)
+    scope: str = Field(default="*")
+    hashed_value: str
+    created_at: datetime = Field(default_factory=_utcnow)
+    last_used_at: datetime | None = Field(default=None)
+    revoked_at: datetime | None = Field(default=None)
+
+
+class Session(SQLModel, table=True):
+    """Mirror of one live ``claude-code-api`` session.
+
+    The id is the ``session_id`` claude itself assigns on the first
+    turn; we don't generate it. Rows here are for admin observability
+    (live count, last activity) — the actual pool lives in
+    ``claude_code_api.ClaudeCodeBackend`` and is the source of truth.
+    """
+
+    id: str = Field(primary_key=True)
+    agent_name: str = Field(index=True)
+    fingerprint: str = Field(index=True)
+    created_at: datetime = Field(default_factory=_utcnow)
+    last_active_at: datetime = Field(default_factory=_utcnow)
+    closed_at: datetime | None = Field(default=None)
+
+
+class AuditLog(SQLModel, table=True):
+    """Append-only record of who-did-what.
+
+    ``actor`` is ``"token:<name>"`` for inbound traffic or
+    ``"admin:<user>"`` for admin-UI actions. ``kind`` is a short tag
+    (``"messages"`` / ``"mcp_call"`` / ``"token_create"`` / …);
+    free-form rather than enum so we can add new kinds without a
+    schema migration. ``detail_json`` is a JSON-encoded blob — keep
+    it small (paths, method, status), not full bodies.
+    """
+
+    __tablename__ = "audit_log"
+
+    id: int | None = Field(default=None, primary_key=True)
+    ts: datetime = Field(default_factory=_utcnow, index=True)
+    actor: str = Field(index=True)
+    kind: str = Field(index=True)
+    agent_name: str | None = Field(default=None, index=True)
+    detail_json: str = Field(default="{}")
+
+
+__all__ = ["AuditLog", "Session", "Token"]
@@ -124,6 +124,15 @@ wheels = [
    { url = "https://files.pythonhosted.org/packages/fb/76/641ae371508676492379f16e2fa48f4e2c11741bd63c48be4b12a6b09cba/aiosignal-1.4.0-py3-none-any.whl", hash = "sha256:053243f8b92b990551949e63930a839ff0cf0b0ebbe0597b0f3fb19e1a0fe82e", size = 7490, upload-time = "2025-07-03T22:54:42.156Z" },
 ]

+[[package]]
+name = "aiosqlite"
+version = "0.22.1"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/4e/8a/64761f4005f17809769d23e518d915db74e6310474e733e3593cfc854ef1/aiosqlite-0.22.1.tar.gz", hash = "sha256:043e0bd78d32888c0a9ca90fc788b38796843360c855a7262a532813133a0650", size = 14821, upload-time = "2025-12-23T19:25:43.997Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/00/b7/e3bf5133d697a08128598c8d0abc5e16377b51465a33756de24fa7dee953/aiosqlite-0.22.1-py3-none-any.whl", hash = "sha256:21c002eb13823fad740196c5a2e9d8e62f6243bd9e7e4a1f87fb5e44ecb4fceb", size = 17405, upload-time = "2025-12-23T19:25:42.139Z" },
+]
+
 [[package]]
 name = "annotated-doc"
 version = "0.0.4"
@@ -253,11 +262,13 @@ version = "0.1.0"
 source = { editable = "." }
 dependencies = [
    { name = "aiohttp" },
+    { name = "aiosqlite" },
    { name = "anthropic" },
    { name = "anyio" },
    { name = "argon2-cffi" },
    { name = "fastapi" },
    { name = "fastmcp" },
+    { name = "greenlet" },
    { name = "itsdangerous" },
    { name = "jinja2" },
    { name = "psycopg", extra = ["binary"] },
@@ -290,6 +301,7 @@ dev = [
 [package.metadata]
 requires-dist = [
    { name = "aiohttp", specifier = ">=3.13.5" },
+    { name = "aiosqlite", specifier = ">=0.22.1" },
    { name = "anthropic", specifier = ">=0.103.0" },
    { name = "anyio", specifier = ">=4.13.0" },
    { name = "argon2-cffi", specifier = ">=25.1.0" },
@@ -297,6 +309,7 @@ requires-dist = [
    { name = "claude-code-api", marker = "extra == 'prod'", git = "https://git.kotikot.com/beaver/claude-code-api.git" },
    { name = "fastapi", specifier = ">=0.136.1" },
    { name = "fastmcp", specifier = ">=3.3.1" },
+    { name = "greenlet", specifier = ">=3.5.0" },
    { name = "itsdangerous", specifier = ">=2.2.0" },
    { name = "jinja2", specifier = ">=3.1.6" },
    { name = "psycopg", extras = ["binary"], specifier = ">=3.3.4" },
				`@@ -0,0 +1 @@`
				`<div class="banner error">{{ message }}</div>`