feat: add streaming to markdown, fix raycast mcps exposing

This commit is contained in:
h
2026-05-21 13:52:48 +02:00
parent 7fc0c9c0b1
commit 11f061070f
6 changed files with 557 additions and 99 deletions
@@ -29,10 +29,12 @@ import json
import logging
import os
import tempfile
import time
from pathlib import Path
from typing import TYPE_CHECKING, Any
import aiofile
from anthropic.types import RawContentBlockStopEvent
from fastapi import FastAPI, HTTPException, Request, status
from fastapi.responses import JSONResponse
@@ -46,7 +48,7 @@ from beaver_gateway.core.conversation_store import (
rewrite_messages,
)
from beaver_gateway.core.turn_record import TurnRecord
from beaver_gateway.frontends._accumulate import accumulate
from beaver_gateway.frontends._accumulate import StreamAccumulator
from beaver_gateway.frontends._auth import require_token
from beaver_gateway.frontends.base import Frontend
from beaver_gateway.frontends.markdown import parser, renderer
@@ -69,6 +71,14 @@ _log = logging.getLogger("beaver_gateway.frontends.markdown")
__all__ = ["MarkdownFrontend"]
# How often we re-render the assistant turn into the .md file while the
# backend stream is still open. Trades responsiveness (faster updates to
# Obsidian sync / Raycast tailers) against write amplification. Each
# ``RawContentBlockStopEvent`` also forces a flush regardless of the
# timer, so block boundaries always land in the file.
_STREAM_FLUSH_DEBOUNCE = 0.4
class MarkdownFrontend(Frontend):
"""FastAPI app behind ``POST /chat`` driven by Obsidian-vault files."""
@@ -285,21 +295,23 @@ class MarkdownFrontend(Frontend):
TurnCapture() if isinstance(backend, ClaudeCodeBackendAdapter) else None
)
kwargs: dict[str, Any] = {}
if capture is not None:
kwargs["capture"] = capture
events = backend.complete(
agent=agent, messages=outcome.messages, system=None, **kwargs
)
try:
kwargs: dict[str, Any] = {}
if capture is not None:
kwargs["capture"] = capture
events = backend.complete(
agent=agent, messages=outcome.messages, system=None, **kwargs
message = await self._stream_to_file(
events=events,
file_path=file_path,
parsed=parsed,
model=agent.model or agent.name,
filename=filename,
)
message = await accumulate(events, model=agent.model or agent.name)
except HTTPException:
raise
except Exception as exc:
_log.exception("backend failed for %s", filename)
error_block = _render_error_block(exc)
new_body = renderer.append_to_body(parsed.body, error_block)
await _write_atomic(
file_path, _reattach_frontmatter(parsed.metadata, new_body)
)
raise HTTPException(
status.HTTP_500_INTERNAL_SERVER_ERROR, f"backend error: {exc}"
) from exc
@@ -346,6 +358,67 @@ class MarkdownFrontend(Frontend):
# ---- helpers -------------------------------------------------------
async def _stream_to_file(
self,
*,
events: Any,
file_path: Path,
parsed: parser.ParsedFile,
model: str,
filename: str,
) -> Any:
"""Drain ``events`` into a ``Message``, flushing partials to disk.
Flushes happen on each ``RawContentBlockStopEvent`` (natural
block boundary, content is markdown-consistent) and on the
``_STREAM_FLUSH_DEBOUNCE`` timer between events. The partial
write keeps the as-parsed frontmatter; the post-stream final
write in ``_write_assistant_reply`` is what stamps the refreshed
fingerprint / agent / conversation_id.
On backend exception we still flush the last partial and append
an error callout, so the human sees both what arrived and why it
stopped. The exception propagates so ``_handle_chat`` can map it
to a 500.
"""
acc = StreamAccumulator()
async def flush_partial() -> None:
partial = acc.finalize(model=model)
if not partial.content:
return
rendered = renderer.render_assistant_message(partial)
new_body = renderer.append_to_body(parsed.body, rendered)
await _write_atomic(
file_path, _reattach_frontmatter(parsed.metadata, new_body)
)
try:
last_flush = time.monotonic()
async for ev in events:
acc.feed(ev)
now = time.monotonic()
if (
isinstance(ev, RawContentBlockStopEvent)
or (now - last_flush) >= _STREAM_FLUSH_DEBOUNCE
):
await flush_partial()
last_flush = now
except Exception as exc:
_log.exception("backend failed for %s", filename)
partial = acc.finalize(model=model)
new_body = parsed.body
if partial.content:
new_body = renderer.append_to_body(
new_body, renderer.render_assistant_message(partial)
)
new_body = renderer.append_to_body(new_body, _render_error_block(exc))
await _write_atomic(
file_path, _reattach_frontmatter(parsed.metadata, new_body)
)
raise
return acc.finalize(model=model)
async def _write_assistant_reply(
self,
*,