feat: add streaming to markdown, fix raycast mcps exposing
This commit is contained in:
@@ -29,10 +29,12 @@ import json
|
||||
import logging
|
||||
import os
|
||||
import tempfile
|
||||
import time
|
||||
from pathlib import Path
|
||||
from typing import TYPE_CHECKING, Any
|
||||
|
||||
import aiofile
|
||||
from anthropic.types import RawContentBlockStopEvent
|
||||
from fastapi import FastAPI, HTTPException, Request, status
|
||||
from fastapi.responses import JSONResponse
|
||||
|
||||
@@ -46,7 +48,7 @@ from beaver_gateway.core.conversation_store import (
|
||||
rewrite_messages,
|
||||
)
|
||||
from beaver_gateway.core.turn_record import TurnRecord
|
||||
from beaver_gateway.frontends._accumulate import accumulate
|
||||
from beaver_gateway.frontends._accumulate import StreamAccumulator
|
||||
from beaver_gateway.frontends._auth import require_token
|
||||
from beaver_gateway.frontends.base import Frontend
|
||||
from beaver_gateway.frontends.markdown import parser, renderer
|
||||
@@ -69,6 +71,14 @@ _log = logging.getLogger("beaver_gateway.frontends.markdown")
|
||||
__all__ = ["MarkdownFrontend"]
|
||||
|
||||
|
||||
# How often we re-render the assistant turn into the .md file while the
|
||||
# backend stream is still open. Trades responsiveness (faster updates to
|
||||
# Obsidian sync / Raycast tailers) against write amplification. Each
|
||||
# ``RawContentBlockStopEvent`` also forces a flush regardless of the
|
||||
# timer, so block boundaries always land in the file.
|
||||
_STREAM_FLUSH_DEBOUNCE = 0.4
|
||||
|
||||
|
||||
class MarkdownFrontend(Frontend):
|
||||
"""FastAPI app behind ``POST /chat`` driven by Obsidian-vault files."""
|
||||
|
||||
@@ -285,21 +295,23 @@ class MarkdownFrontend(Frontend):
|
||||
TurnCapture() if isinstance(backend, ClaudeCodeBackendAdapter) else None
|
||||
)
|
||||
|
||||
kwargs: dict[str, Any] = {}
|
||||
if capture is not None:
|
||||
kwargs["capture"] = capture
|
||||
events = backend.complete(
|
||||
agent=agent, messages=outcome.messages, system=None, **kwargs
|
||||
)
|
||||
try:
|
||||
kwargs: dict[str, Any] = {}
|
||||
if capture is not None:
|
||||
kwargs["capture"] = capture
|
||||
events = backend.complete(
|
||||
agent=agent, messages=outcome.messages, system=None, **kwargs
|
||||
message = await self._stream_to_file(
|
||||
events=events,
|
||||
file_path=file_path,
|
||||
parsed=parsed,
|
||||
model=agent.model or agent.name,
|
||||
filename=filename,
|
||||
)
|
||||
message = await accumulate(events, model=agent.model or agent.name)
|
||||
except HTTPException:
|
||||
raise
|
||||
except Exception as exc:
|
||||
_log.exception("backend failed for %s", filename)
|
||||
error_block = _render_error_block(exc)
|
||||
new_body = renderer.append_to_body(parsed.body, error_block)
|
||||
await _write_atomic(
|
||||
file_path, _reattach_frontmatter(parsed.metadata, new_body)
|
||||
)
|
||||
raise HTTPException(
|
||||
status.HTTP_500_INTERNAL_SERVER_ERROR, f"backend error: {exc}"
|
||||
) from exc
|
||||
@@ -346,6 +358,67 @@ class MarkdownFrontend(Frontend):
|
||||
|
||||
# ---- helpers -------------------------------------------------------
|
||||
|
||||
async def _stream_to_file(
|
||||
self,
|
||||
*,
|
||||
events: Any,
|
||||
file_path: Path,
|
||||
parsed: parser.ParsedFile,
|
||||
model: str,
|
||||
filename: str,
|
||||
) -> Any:
|
||||
"""Drain ``events`` into a ``Message``, flushing partials to disk.
|
||||
|
||||
Flushes happen on each ``RawContentBlockStopEvent`` (natural
|
||||
block boundary, content is markdown-consistent) and on the
|
||||
``_STREAM_FLUSH_DEBOUNCE`` timer between events. The partial
|
||||
write keeps the as-parsed frontmatter; the post-stream final
|
||||
write in ``_write_assistant_reply`` is what stamps the refreshed
|
||||
fingerprint / agent / conversation_id.
|
||||
|
||||
On backend exception we still flush the last partial and append
|
||||
an error callout, so the human sees both what arrived and why it
|
||||
stopped. The exception propagates so ``_handle_chat`` can map it
|
||||
to a 500.
|
||||
"""
|
||||
acc = StreamAccumulator()
|
||||
|
||||
async def flush_partial() -> None:
|
||||
partial = acc.finalize(model=model)
|
||||
if not partial.content:
|
||||
return
|
||||
rendered = renderer.render_assistant_message(partial)
|
||||
new_body = renderer.append_to_body(parsed.body, rendered)
|
||||
await _write_atomic(
|
||||
file_path, _reattach_frontmatter(parsed.metadata, new_body)
|
||||
)
|
||||
|
||||
try:
|
||||
last_flush = time.monotonic()
|
||||
async for ev in events:
|
||||
acc.feed(ev)
|
||||
now = time.monotonic()
|
||||
if (
|
||||
isinstance(ev, RawContentBlockStopEvent)
|
||||
or (now - last_flush) >= _STREAM_FLUSH_DEBOUNCE
|
||||
):
|
||||
await flush_partial()
|
||||
last_flush = now
|
||||
except Exception as exc:
|
||||
_log.exception("backend failed for %s", filename)
|
||||
partial = acc.finalize(model=model)
|
||||
new_body = parsed.body
|
||||
if partial.content:
|
||||
new_body = renderer.append_to_body(
|
||||
new_body, renderer.render_assistant_message(partial)
|
||||
)
|
||||
new_body = renderer.append_to_body(new_body, _render_error_block(exc))
|
||||
await _write_atomic(
|
||||
file_path, _reattach_frontmatter(parsed.metadata, new_body)
|
||||
)
|
||||
raise
|
||||
return acc.finalize(model=model)
|
||||
|
||||
async def _write_assistant_reply(
|
||||
self,
|
||||
*,
|
||||
|
||||
Reference in New Issue
Block a user