feat: add stateful conversation storage

2026-05-21 12:27:11 +02:00
parent 4a405faf25
commit a83bec709d
6 changed files with 994 additions and 94 deletions
@@ -28,6 +28,7 @@ from __future__ import annotations
 import json
 import uuid
 from dataclasses import dataclass, field
 from typing import TYPE_CHECKING, Any, Self
 from claude_code_api import (
@@ -38,6 +39,7 @@ from claude_code_api import (
    TextBlock,
    ThinkingBlock,
    ToolUseBlock,
    synthesize_turn_messages,
 )
 from beaver_gateway.agents.claude import ClaudeAgent
@@ -65,7 +67,27 @@ if TYPE_CHECKING:
    from beaver_gateway.core.events import MessageStreamEvent
-__all__ = ["ClaudeCodeBackendAdapter"]
+__all__ = ["ClaudeCodeBackendAdapter", "TurnCapture"]
@dataclass
 class TurnCapture:
    """Side-channel sink for per-turn metadata.
    Pass an instance via ``ClaudeCodeBackendAdapter.complete(capture=...)``.
    After the stream finishes, :attr:`synthesized_messages` holds the
    full assistant↔tool-result cycle (from
    :func:`claude_code_api.synthesize_turn_messages`) — i.e. the exact
    list of canonical Anthropic-shape messages claude-code-api stashed
    the live session under. The markdown frontend uses this to write the
    conversation history to its DB so a subsequent turn's prefix
    fingerprint hits the same session.
    Other backends (anthropic, raycast) ignore the kwarg — it lands in
    their ``**options`` and is silently dropped.
    """
    synthesized_messages: list[dict[str, Any]] = field(default_factory=list)
 _CLAUDE_TO_ANTHROPIC_STOP: dict[str, StopReason] = {
@@ -185,10 +207,7 @@ class ClaudeCodeBackendAdapter:
    """
    def __init__(
-        self,
+        self, *, agent: ClaudeAgent, mcp_internal_urls: Mapping[str, str]
        *,
        agent: ClaudeAgent,
        mcp_internal_urls: Mapping[str, str],
    ) -> None:
        self._agent = agent
        self._backend = ClaudeCodeBackend(
@@ -207,9 +226,7 @@ class ClaudeCodeBackendAdapter:
        await self._backend.__aenter__()
        return self
-    async def __aexit__(
+    async def __aexit__(self, exc_type: object, exc: object, tb: object) -> None:
        self, exc_type: object, exc: object, tb: object
    ) -> None:
        await self._backend.__aexit__(exc_type, exc, tb)
    async def aclose(self) -> None:
@@ -221,6 +238,7 @@ class ClaudeCodeBackendAdapter:
        agent: BaseAgent,
        messages: Iterable[MessageParam],
        system: str | None = None,  # noqa: ARG002 — see module docstring
        capture: TurnCapture | None = None,
        **options: Any,  # noqa: ARG002 — no per-request knobs for claude-code yet
    ) -> AsyncIterator[MessageStreamEvent]:
        if not isinstance(agent, ClaudeAgent):
@@ -245,27 +263,43 @@ class ClaudeCodeBackendAdapter:
        next_index = 0
        stop_reason: str | None = None
        usage: Mapping[str, Any] | None = None
        # We keep raw events so we can hand them to
        # ``synthesize_turn_messages`` after the stream closes — the
        # markdown frontend stores the result in its conversation
        # history so the next turn's prefix matches the backend's
        # session-pool fingerprint. UserMessage (tool_result) events
        # are silently discarded from the wire but kept here.
        raw_events: list[Any] = []
        async for event in self._backend.complete(list(messages)):
            raw_events.append(event)
            if isinstance(event, AssistantMessage):
                for block in event.content:
                    for ev in _emit_block(block, next_index):
                        yield ev
                    next_index += 1
            elif isinstance(event, ResultMessage):
                # ResultMessage is the terminal event from TurnManager
                # — we capture its stop_reason / usage for the envelope
                # below. We DO NOT break here: an early break would
                # raise GeneratorExit inside claude-code-api's
                # ``complete`` coroutine before it gets a chance to
                # stash the live session under the post-turn
                # fingerprint, so every continuation would miss the
                # cache and reseed. Let the inner generator exit
                # naturally instead.
                stop_reason = event.stop_reason
                usage = event.usage
                # ResultMessage is always last (TurnManager synthesizes
                # it as the terminal event), so we break after emitting
                # the envelope close.
                break
            # UserMessage (tool_result records) and SystemMessage
            # (turn_duration heartbeats) carry no content for the
-            # /v1/messages caller — skip silently.
+            # /v1/messages caller — skip silently on the wire, but they
            # ARE retained in ``raw_events`` for synthesis below.
        if capture is not None:
            capture.synthesized_messages = synthesize_turn_messages(raw_events)
        yield build_message_delta(
-            stop_reason=_map_stop_reason(stop_reason),
+            stop_reason=_map_stop_reason(stop_reason), usage=_normalize_usage(usage)
            usage=_normalize_usage(usage),
        )
        yield build_message_stop()
@@ -292,9 +326,7 @@ def _emit_block(
            build_content_block_stop(index),
        )
    if isinstance(block, ToolUseBlock):
-        partial = json.dumps(
+        partial = json.dumps(block.input, separators=(",", ":"), ensure_ascii=False)
            block.input, separators=(",", ":"), ensure_ascii=False
        )
        return (
            build_tool_use_block_start(index, tool_use_id=block.id, name=block.name),
            build_input_json_delta(index, partial),
@@ -0,0 +1,523 @@
 """Stateful conversation history for the markdown frontend.
 The gateway used to be stateless about identity: claude-code-api's
 in-memory session pool was keyed by a fingerprint of the messages the
 gateway forwarded, and on a fingerprint miss the same fingerprint was
 used to seed a fresh PTY's JSONL transcript. That worked as long as
 the frontend could round-trip the *exact* content blocks the live
 session had observed. The markdown frontend can't — the parser strips
 ``[!tool]-`` callouts because the human is allowed to edit the prose,
 and the rendered tool callouts don't carry the canonical ``tool_use``
 block fields anyway. So a continuation hit was *only* reliable for
 turns that never used a tool; once tools entered the picture, every
 subsequent turn missed the cache and reseeded from a tool-less
 transcript, leading to "assistant doesn't remember the tool calls it
 just made."
 This module makes the gateway stateful for the markdown frontend (and
 any other frontend that wants in). The DB stores the full
 Anthropic-shape message list — text blocks, ``tool_use`` blocks,
 ``tool_result`` blocks, thinking signatures — exactly as
 claude-code-api would have seen on the wire. Before each turn we
 align the file the user is editing against the stored history:
 * If the user just appended a new user turn at the bottom, we feed
  the backend our stored-plus-new history and the fingerprint hits.
 * If the user edited the *text* inside an assistant turn but left the
  tool callouts alone, we splice the new text into the stored
  ``tool_use`` blocks and feed *that* — the fingerprint misses (text
  differs), claude-code-api reseeds with a full transcript (tools and
  all), the new live session has memory of the prior tool calls.
 * If the user changed the *structure* (added/removed/reordered a tool
  callout, edited an old user turn, etc.) we fork: take stored history
  up to the divergence, take incoming text-only past the divergence.
  The fingerprint misses; claude-code-api reseeds with a clean
  truncated history; downstream turns continue from there.
 "Divergence point" is found by walking the file's turns and the
 stored display turns in lockstep. See :func:`diff_and_fork`.
 """
 from __future__ import annotations
 import json
 import uuid
 from dataclasses import dataclass
 from typing import TYPE_CHECKING, Any, cast
 from sqlmodel import select
 from beaver_gateway.storage.models import Conversation, ConversationMessage
 if TYPE_CHECKING:
    from anthropic.types import MessageParam
    from sqlmodel.ext.asyncio.session import AsyncSession
    from beaver_gateway.frontends.markdown.parser import ParsedTurn
 __all__ = [
    "ForkOutcome",
    "diff_and_fork",
    "load_conversation",
    "load_messages",
    "mint_conversation",
    "rewrite_messages",
 ]
 # ---- types --------------------------------------------------------------
@dataclass(frozen=True, slots=True)
 class ForkOutcome:
    """Result of aligning the incoming file against stored history.
    ``messages`` is what the gateway feeds to the backend (already
    includes the new user prompt at the tail). ``persist_messages``
    is the canonical conversation state the gateway should hold in
    the DB *up to but not including* the new assistant reply — the
    caller appends the synthesized turn from the backend onto this
    and writes the result back. ``divergence_index`` is the
    display-turn index at which incoming first disagreed with stored
    (``None`` if everything matched; the new tail is appended cleanly).
    """
    messages: list[MessageParam]
    persist_messages: list[dict[str, Any]]
    divergence_index: int | None
 # ---- public store API ---------------------------------------------------
 async def load_conversation(
    session: AsyncSession, *, frontend: str, external_id: str
 ) -> Conversation | None:
    stmt = (
        select(Conversation)
        .where(Conversation.frontend == frontend)
        .where(Conversation.external_id == external_id)
    )
    result = await session.exec(stmt)
    return result.first()
 async def mint_conversation(
    session: AsyncSession, *, frontend: str, agent_name: str
 ) -> Conversation:
    """Create a fresh conversation row with a new uuid for external_id.
    Caller is responsible for persisting the returned ``external_id`` on
    the frontend side (frontmatter, response header, …) so future
    requests can find this conversation again.
    """
    row = Conversation(
        frontend=frontend, external_id=str(uuid.uuid4()), agent_name=agent_name
    )
    session.add(row)
    await session.commit()
    await session.refresh(row)
    return row
 async def load_messages(
    session: AsyncSession, *, conversation_id: int
 ) -> list[dict[str, Any]]:
    """Return stored messages ordered by ``seq`` ascending.
    Each entry is a canonical Anthropic ``MessageParam`` dict — ``role``
    plus ``content`` (string or list of block dicts). The same shape
    we feed to the backend on continuation.
    """
    stmt = (
        select(ConversationMessage)
        .where(ConversationMessage.conversation_id == conversation_id)
        .order_by(ConversationMessage.seq.asc())  # ty: ignore[unresolved-attribute]
    )
    result = await session.exec(stmt)
    rows = result.all()
    return [{"role": r.role, "content": json.loads(r.content_json)} for r in rows]
 async def rewrite_messages(
    session: AsyncSession, *, conversation_id: int, messages: list[dict[str, Any]]
 ) -> None:
    """Replace the conversation's stored messages with ``messages``.
    The user said no branch history — we overwrite on fork. Cheap at
    our volume; if it ever matters we can switch to soft-delete +
    branch pointers.
    """
    # Delete existing rows for this conversation.
    existing_stmt = select(ConversationMessage).where(
        ConversationMessage.conversation_id == conversation_id
    )
    result = await session.exec(existing_stmt)
    for row in result.all():
        await session.delete(row)
    # Insert the new sequence.
    for seq, m in enumerate(messages):
        session.add(
            ConversationMessage(
                conversation_id=conversation_id,
                seq=seq,
                role=str(m["role"]),
                content_json=json.dumps(m["content"], separators=(",", ":")),
            )
        )
    # Bump conversation.updated_at.
    conv = await session.get(Conversation, conversation_id)
    if conv is not None:
        from datetime import UTC, datetime
        conv.updated_at = datetime.now(UTC)
        session.add(conv)
    await session.commit()
 # ---- alignment ----------------------------------------------------------
@dataclass(frozen=True, slots=True)
 class _StoredDisplayTurn:
    """A "display turn" reconstructed from stored raw messages.
    ``role`` is ``"user"`` (single user-prompt message) or
    ``"assistant"`` (one or more assistant messages, optionally
    interleaved with user-only-tool_result messages). ``messages`` is
    the slice of stored raw messages this display turn covers, in
    order. ``spoken_text`` and ``skeleton`` are the
    parser-equivalents for diff purposes; ``text_segment_count`` lets
    us refuse a splice when the user edited across a tool boundary in
    a way we can't safely undo.
    """
    role: str
    messages: tuple[dict[str, Any], ...]
    spoken_text: str
    skeleton: tuple[str, ...]
    text_segment_count: int
 def _group_display_turns(stored: list[dict[str, Any]]) -> list[_StoredDisplayTurn]:
    """Walk raw stored messages, group them into Obsidian-visible turns.
    A user-prompt message (``role=user`` with string content, or list
    content with no ``tool_result`` blocks) opens a user display turn.
    Otherwise it's a tool-result follow-up and rolls into the current
    assistant display turn.
    """
    out: list[_StoredDisplayTurn] = []
    i = 0
    while i < len(stored):
        msg = stored[i]
        role = msg["role"]
        if role == "user" and _is_user_prompt(msg.get("content")):
            out.append(
                _StoredDisplayTurn(
                    role="user",
                    messages=(msg,),
                    spoken_text=_user_prompt_text(msg.get("content")),
                    skeleton=(),
                    text_segment_count=0,
                )
            )
            i += 1
            continue
        # Assistant display turn: collect consecutive non-prompt messages.
        group: list[dict[str, Any]] = []
        while i < len(stored):
            m = stored[i]
            if m["role"] == "user" and _is_user_prompt(m.get("content")):
                break
            group.append(m)
            i += 1
        spoken, skeleton, text_count = _summarize_assistant_group(group)
        out.append(
            _StoredDisplayTurn(
                role="assistant",
                messages=tuple(group),
                spoken_text=spoken,
                skeleton=tuple(skeleton),
                text_segment_count=text_count,
            )
        )
    return out
 def _is_user_prompt(content: Any) -> bool:
    """A user message is a *prompt* unless its content carries tool_result blocks."""
    if isinstance(content, str):
        return True
    if isinstance(content, list):
        return not any(
            isinstance(b, dict) and b.get("type") == "tool_result" for b in content
        )
    # Unknown shape — be conservative, treat as prompt.
    return True
 def _user_prompt_text(content: Any) -> str:
    if isinstance(content, str):
        return content
    if isinstance(content, list):
        chunks = [
            str(b.get("text", ""))
            for b in content
            if isinstance(b, dict) and b.get("type") == "text"
        ]
        return "\n\n".join(c for c in chunks if c)
    return ""
 def _summarize_assistant_group(
    group: list[dict[str, Any]],
 ) -> tuple[str, list[str], int]:
    """Compute (spoken_text, tool_skeleton, text_segment_count) for a display group.
    Mirrors what ``parser.parse_assistant_structure`` would produce when
    re-parsing the rendered version of this group: consecutive text
    blocks across assistant messages collapse into one text segment;
    tool_use blocks become skeleton entries; tool_result messages and
    thinking blocks are invisible.
    """
    # See ``diff_and_fork`` for why the parser-type imports are deferred.
    from beaver_gateway.frontends.markdown.parser import TextSegment, ToolSegment
    segments: list[TextSegment | ToolSegment] = []
    pending: list[str] = []
    def _flush() -> None:
        if not pending:
            return
        joined = "\n\n".join(p for p in pending if p)
        pending.clear()
        cleaned = joined.strip()
        if cleaned:
            segments.append(TextSegment(text=cleaned))
    for msg in group:
        if msg["role"] == "user":
            # tool_result message — boundary for text but emits no segment.
            _flush()
            continue
        content = msg.get("content")
        if not isinstance(content, list):
            continue
        for blk in content:
            if not isinstance(blk, dict):
                continue
            btype = blk.get("type")
            if btype == "text":
                text = str(blk.get("text", "")).strip()
                if text:
                    pending.append(text)
            elif btype == "tool_use":
                _flush()
                segments.append(ToolSegment(name=str(blk.get("name", ""))))
            # thinking: skip silently
    _flush()
    spoken_chunks = [s.text for s in segments if isinstance(s, TextSegment)]
    spoken = "\n\n".join(c for c in spoken_chunks if c).strip()
    skeleton = [s.name for s in segments if isinstance(s, ToolSegment)]
    text_count = sum(1 for s in segments if isinstance(s, TextSegment))
    return spoken, skeleton, text_count
 # ---- the core algorithm -------------------------------------------------
 def diff_and_fork(
    *, stored: list[dict[str, Any]], incoming: list[ParsedTurn]
 ) -> ForkOutcome:
    """Align the incoming parsed file against stored history.
    ``stored`` is the raw Anthropic-shape message list from the DB
    (one entry per ``ConversationMessage`` row). ``incoming`` is the
    user-visible turn list from the markdown parser. The last
    ``incoming`` entry must be a user turn — that's the new prompt
    triggering this request.
    Returns a :class:`ForkOutcome` whose ``messages`` is what the
    backend should run on and whose ``persist_messages`` is the
    canonical history to store in the DB once the backend's
    synthesized cycle is appended.
    """
    # ``parser`` lives under ``frontends/markdown/`` whose ``__init__``
    # eagerly loads ``frontend.py``, which in turn imports this module
    # — pulling the parser at module-import time creates a cycle. The
    # helpers below import the segment classes lazily inside their own
    # function bodies to break it.
    if not incoming or incoming[-1].role != "user":
        msg = (
            "diff_and_fork expects incoming to end with a user turn "
            "(the new prompt); got "
            f"{incoming[-1].role if incoming else 'empty'}"
        )
        raise ValueError(msg)
    stored_groups = _group_display_turns(stored)
    new_user_turn = incoming[-1]
    prior_incoming = incoming[:-1]
    spliced_groups, divergence = _walk_prefix(prior_incoming, stored_groups)
    if divergence is None and len(prior_incoming) < len(stored_groups):
        # Incoming truncated stored (user deleted some past turns).
        # Truncate stored to match.
        divergence = len(prior_incoming)
    backend_msgs, persist_msgs = _assemble_tail(
        spliced_groups=spliced_groups,
        prior_incoming=prior_incoming,
        divergence=divergence,
        new_user_turn=new_user_turn,
    )
    return ForkOutcome(
        messages=backend_msgs,
        persist_messages=persist_msgs,
        divergence_index=divergence,
    )
 def _walk_prefix(
    prior_incoming: list[ParsedTurn], stored_groups: list[_StoredDisplayTurn]
 ) -> tuple[list[list[dict[str, Any]]], int | None]:
    """Walk incoming vs stored side-by-side until first divergence.
    Returns the spliced/matched group list (one entry per matched
    display turn, each carrying the raw messages we'll feed to the
    backend for that turn) and the divergence index (``None`` if all
    of ``prior_incoming`` matched).
    """
    from beaver_gateway.frontends.markdown.parser import TextSegment, ToolSegment
    spliced_groups: list[list[dict[str, Any]]] = []
    for i, inc in enumerate(prior_incoming):
        if i >= len(stored_groups):
            return spliced_groups, i
        st = stored_groups[i]
        if inc.role != st.role:
            return spliced_groups, i
        if inc.role == "user":
            if inc.text != st.spoken_text:
                return spliced_groups, i
            spliced_groups.append(list(st.messages))
            continue
        inc_skeleton = tuple(
            s.name for s in inc.structure if isinstance(s, ToolSegment)
        )
        inc_text_count = sum(1 for s in inc.structure if isinstance(s, TextSegment))
        if inc_skeleton != st.skeleton:
            return spliced_groups, i
        if inc.text == st.spoken_text:
            spliced_groups.append(list(st.messages))
            continue
        if inc_text_count != st.text_segment_count:
            return spliced_groups, i
        spliced = _splice_assistant_group(stored_group=st, incoming=inc)
        if spliced is None:
            return spliced_groups, i
        spliced_groups.append(spliced)
    return spliced_groups, None
 def _assemble_tail(
    *,
    spliced_groups: list[list[dict[str, Any]]],
    prior_incoming: list[ParsedTurn],
    divergence: int | None,
    new_user_turn: ParsedTurn,
 ) -> tuple[list[MessageParam], list[dict[str, Any]]]:
    """Build the (backend, persist) lists from aligned + post-divergence tail."""
    backend_msgs: list[MessageParam] = []
    persist_msgs: list[dict[str, Any]] = []
    for spliced in spliced_groups:
        for m in spliced:
            entry: dict[str, Any] = {"role": m["role"], "content": m["content"]}
            backend_msgs.append(cast("MessageParam", entry))
            persist_msgs.append(entry)
    if divergence is not None:
        for inc in prior_incoming[divergence:]:
            if not inc.text:
                continue
            entry = {"role": inc.role, "content": inc.text}
            backend_msgs.append(cast("MessageParam", entry))
            persist_msgs.append(entry)
    backend_msgs.append({"role": "user", "content": new_user_turn.text})
    return backend_msgs, persist_msgs
 def _splice_assistant_group(
    *, stored_group: _StoredDisplayTurn, incoming: ParsedTurn
 ) -> list[dict[str, Any]] | None:
    """Rebuild an assistant display turn with new text + stored tool_use blocks.
    Walks the incoming structure; for each ``TextSegment`` emits a
    text block into the current assistant message; for each
    ``ToolSegment`` consumes the next stored ``tool_use`` block (by
    position), closes the current assistant message, emits the
    matching ``tool_result`` user message, and opens a new assistant
    message. Final ``TextSegment`` closes the last assistant message.
    Returns ``None`` if we can't find a matching tool_result for some
    tool_use (stored history is malformed) — caller falls back to
    fork.
    """
    # See ``diff_and_fork`` for why this import is deferred.
    from beaver_gateway.frontends.markdown.parser import TextSegment
    tool_uses, tool_results_by_id = _harvest_tool_blocks(stored_group)
    spliced: list[dict[str, Any]] = []
    current_asst: list[dict[str, Any]] = []
    next_tool = 0
    for seg in incoming.structure:
        if isinstance(seg, TextSegment):
            if seg.text:
                current_asst.append({"type": "text", "text": seg.text})
            continue
        if next_tool >= len(tool_uses):
            return None
        tu = tool_uses[next_tool]
        next_tool += 1
        current_asst.append(tu)
        spliced.append({"role": "assistant", "content": current_asst})
        current_asst = []
        tr = tool_results_by_id.get(str(tu.get("id", "")))
        if tr is None:
            return None
        spliced.append({"role": "user", "content": [tr]})
    if current_asst:
        spliced.append({"role": "assistant", "content": current_asst})
    elif not spliced:
        # Defensive: assistant turn with no text and no tools makes no
        # sense; caller will treat as fork.
        return None
    return spliced
 def _harvest_tool_blocks(
    stored_group: _StoredDisplayTurn,
 ) -> tuple[list[dict[str, Any]], dict[str, dict[str, Any]]]:
    """Pull stored ``tool_use`` blocks (ordered) and ``tool_result`` blocks (by id)."""
    tool_uses: list[dict[str, Any]] = []
    tool_results_by_id: dict[str, dict[str, Any]] = {}
    for msg in stored_group.messages:
        content = msg.get("content")
        if not isinstance(content, list):
            continue
        if msg["role"] == "assistant":
            tool_uses.extend(
                blk
                for blk in content
                if isinstance(blk, dict) and blk.get("type") == "tool_use"
            )
            continue
        for blk in content:
            if not isinstance(blk, dict) or blk.get("type") != "tool_result":
                continue
            tid = blk.get("tool_use_id")
            if isinstance(tid, str):
                tool_results_by_id[tid] = blk
    return tool_uses, tool_results_by_id
@@ -36,7 +36,15 @@ import aiofile
 from fastapi import FastAPI, HTTPException, Request, status
 from fastapi.responses import JSONResponse
 from beaver_gateway.backends.claude_code import ClaudeCodeBackendAdapter, TurnCapture
 from beaver_gateway.core import audit
 from beaver_gateway.core.conversation_store import (
    diff_and_fork,
    load_conversation,
    load_messages,
    mint_conversation,
    rewrite_messages,
 )
 from beaver_gateway.core.turn_record import TurnRecord
 from beaver_gateway.frontends._accumulate import accumulate
 from beaver_gateway.frontends._auth import require_token
@@ -264,9 +272,25 @@ class MarkdownFrontend(Frontend):
            msgs=len(parsed.messages),
        )
        # Resolve / mint the conversation row, align incoming against
        # stored history, and feed the aligned messages to the backend
        # — see ``core/conversation_store.py`` for the full rationale.
        # If the backend isn't claude-code (no ``TurnCapture`` support)
        # we fall through to the legacy parser-only path.
        conv, conv_external_id, stored_msgs = await self._resolve_conversation(
            runtime=runtime, metadata=parsed.metadata, agent_name=agent.name
        )
        outcome = diff_and_fork(stored=stored_msgs, incoming=parsed.turns)
        capture: TurnCapture | None = (
            TurnCapture() if isinstance(backend, ClaudeCodeBackendAdapter) else None
        )
        try:
            kwargs: dict[str, Any] = {}
            if capture is not None:
                kwargs["capture"] = capture
            events = backend.complete(
-                agent=agent, messages=parsed.messages, system=None
+                agent=agent, messages=outcome.messages, system=None, **kwargs
            )
            message = await accumulate(events, model=agent.model or agent.name)
        except Exception as exc:
@@ -280,22 +304,22 @@ class MarkdownFrontend(Frontend):
                status.HTTP_500_INTERNAL_SERVER_ERROR, f"backend error: {exc}"
            ) from exc
-        rendered = renderer.render_assistant_message(message)
+        new_content = await self._write_assistant_reply(
-        new_body = renderer.append_to_body(parsed.body, rendered)
+            file_path=file_path,
-        new_body = renderer.append_to_body(new_body, renderer.USER_SCAFFOLD)
+            parsed=parsed,
-        # Recompute fingerprint so a future cross-frontend hit on this
+            message=message,
-        # same conversation can find it. Stored as hex string in
+            agent_name=agent.name,
-        # frontmatter — only the markdown frontend reads it.
+            conv_external_id=conv_external_id,
-        assistant_param: MessageParam = {
+        )
-            "role": "assistant",
+
-            "content": _flatten_assistant_text(message),
+        await self._persist_canonical_history(
-        }
+            runtime=runtime,
-        updated_messages: list[MessageParam] = [*parsed.messages, assistant_param]
+            conversation_id=conv.id,
-        updated_metadata = dict(parsed.metadata)
+            persist_messages=outcome.persist_messages,
-        updated_metadata["agent"] = agent.name
+            new_user_text=parsed.turns[-1].text,
-        updated_metadata["fingerprint"] = fingerprint_messages(updated_messages)
+            capture=capture,
-        new_content = _reattach_frontmatter(updated_metadata, new_body)
+            message=message,
-        await _write_atomic(file_path, new_content)
+        )
        # Broadcast our own turn so other handlers (none today, but the
        # symmetry is worth keeping) see what happened. ``source`` marks
@@ -322,6 +346,94 @@ class MarkdownFrontend(Frontend):
    # ---- helpers -------------------------------------------------------
    async def _write_assistant_reply(
        self,
        *,
        file_path: Path,
        parsed: parser.ParsedFile,
        message: Any,
        agent_name: str,
        conv_external_id: str,
    ) -> str:
        """Render the assistant turn, append to the file, refresh frontmatter."""
        rendered = renderer.render_assistant_message(message)
        new_body = renderer.append_to_body(parsed.body, rendered)
        new_body = renderer.append_to_body(new_body, renderer.USER_SCAFFOLD)
        # Recompute fingerprint so a future cross-frontend hit on this
        # same conversation can find it. Stored as hex string in
        # frontmatter — only the markdown frontend reads it.
        assistant_param: MessageParam = {
            "role": "assistant",
            "content": _flatten_assistant_text(message),
        }
        updated_messages: list[MessageParam] = [*parsed.messages, assistant_param]
        updated_metadata = dict(parsed.metadata)
        updated_metadata["agent"] = agent_name
        updated_metadata["conversation_id"] = conv_external_id
        updated_metadata["fingerprint"] = fingerprint_messages(updated_messages)
        new_content = _reattach_frontmatter(updated_metadata, new_body)
        await _write_atomic(file_path, new_content)
        return new_content
    async def _resolve_conversation(
        self, *, runtime: GatewayRuntime, metadata: dict[str, Any], agent_name: str
    ) -> tuple[Any, str, list[dict[str, Any]]]:
        """Resolve the conversation row + stored messages for this request.
        Looks up by frontmatter ``conversation_id``, mints a new row if
        missing, and returns ``(conv, external_id, stored_messages)``.
        ``conv.id`` is guaranteed non-None because both
        ``load_conversation`` (after refresh on a committed row) and
        ``mint_conversation`` (post-commit refresh) populate it. We
        coerce with a runtime check so the rest of the handler can
        treat it as ``int``.
        """
        raw = metadata.get("conversation_id")
        lookup_id = raw if isinstance(raw, str) and raw else None
        async with runtime.db.session() as session:
            conv = None
            if lookup_id is not None:
                conv = await load_conversation(
                    session, frontend="markdown", external_id=lookup_id
                )
            if conv is None:
                conv = await mint_conversation(
                    session, frontend="markdown", agent_name=agent_name
                )
            if conv.id is None:
                msg = "conversation row missing primary key after commit"
                raise RuntimeError(msg)
            stored = await load_messages(session, conversation_id=conv.id)
        return conv, conv.external_id, stored
    async def _persist_canonical_history(
        self,
        *,
        runtime: GatewayRuntime,
        conversation_id: int,
        persist_messages: list[dict[str, Any]],
        new_user_text: str,
        capture: TurnCapture | None,
        message: Any,
    ) -> None:
        """Stamp the DB with the post-turn canonical Anthropic-shape history.
        Combines the matched/spliced prior state, the new user prompt,
        and the synthesized assistant↔tool cycle from the backend (or
        a text-only fallback for backends without ``TurnCapture``).
        """
        new_user_msg = {"role": "user", "content": new_user_text}
        synthesized = (
            capture.synthesized_messages
            if capture is not None
            else _fallback_synthesized(message)
        )
        canonical = [*persist_messages, new_user_msg, *synthesized]
        async with runtime.db.session() as session:
            await rewrite_messages(
                session, conversation_id=conversation_id, messages=canonical
            )
    def _resolve_path(self, filename: str) -> Path:
        """Resolve ``filename`` under the vault; reject escapes."""
        # ``filename`` may be relative or absolute; we always anchor
@@ -399,6 +511,43 @@ def _reattach_frontmatter(metadata: dict[str, Any], body: str) -> str:
    return _fm.dumps(post) + "\n"
 def _fallback_synthesized(message: Any) -> list[dict[str, Any]]:
    """Build a single-assistant ``synthesized_messages`` list from a raw ``Message``.
    For backends that don't populate a :class:`TurnCapture` (anthropic
    HTTP, raycast, …) we don't have access to per-tool-cycle
    granularity, so the assistant reply lands in the DB as one
    canonical-block message. Tool memory across cache misses would
    degrade in that case, but those backends don't have the cache-miss
    re-seed problem to begin with — they manage history client-side.
    """
    content: list[dict[str, Any]] = []
    for block in getattr(message, "content", ()):
        btype = getattr(block, "type", None)
        if btype == "text":
            content.append({"type": "text", "text": getattr(block, "text", "") or ""})
        elif btype == "tool_use":
            content.append(
                {
                    "type": "tool_use",
                    "id": getattr(block, "id", ""),
                    "name": getattr(block, "name", ""),
                    "input": getattr(block, "input", {}),
                }
            )
        elif btype == "thinking":
            content.append(
                {
                    "type": "thinking",
                    "thinking": getattr(block, "thinking", "") or "",
                    "signature": getattr(block, "signature", "") or "",
                }
            )
    if not content:
        return []
    return [{"role": "assistant", "content": content}]
 def _flatten_assistant_text(message: Any) -> str:
    """Pull all text blocks from an assistant ``Message`` and join them.
@@ -27,7 +27,41 @@ if TYPE_CHECKING:
    from anthropic.types import MessageParam
-__all__ = ["ParsedFile", "last_role", "parse", "resolve_agent"]
+__all__ = [
    "AssistantSegment",
    "ParsedFile",
    "ParsedTurn",
    "TextSegment",
    "ToolSegment",
    "last_role",
    "parse",
    "parse_assistant_structure",
    "resolve_agent",
 ]
@dataclass(frozen=True, slots=True)
 class TextSegment:
    """A run of plain text inside an assistant turn (between callouts)."""
    text: str
@dataclass(frozen=True, slots=True)
 class ToolSegment:
    """A ``> [!tool]- <name>`` callout placeholder.
    Only the tool ``name`` is captured — the " · summary" suffix on the
    callout title and the JSON body inside the quote block are
    decorative for the human reader; the canonical tool_use block lives
    in the DB and is keyed by *position+name* against the structure
    parsed here.
    """
    name: str
 AssistantSegment = TextSegment | ToolSegment
 # Turn marker — must be exactly ``### User:`` or ``### Assistant:`` on
@@ -42,6 +76,35 @@ _TURN_RE = re.compile(r"^###\s+(User|Assistant):\s*$", re.MULTILINE)
 # need to drop the whole quoted block.
 _CALLOUT_START_RE = re.compile(r"^>\s+\[!(thinking|tool)\]")
 # Tool-callout title line: ``> [!tool]- <name>`` or ``> [!tool]- <name> · <summary>``.
 # We only need the ``<name>`` part for skeleton matching; the summary is
 # decorative (built by ``renderer.summarize_tool_input`` from inputs the
 # user can edit visually without semantic consequence).
 _TOOL_TITLE_RE = re.compile(r"^>\s+\[!tool\]-\s*(.*?)\s*$")
 # Renderer joins name + summary with " · " (U+00B7) — see
 # ``renderer.summarize_tool_input``. We split on it to recover the
 # bare tool name.
 _TOOL_TITLE_SEP = " · "
@dataclass(frozen=True, slots=True)
 class ParsedTurn:
    """One turn extracted from the chat file.
    ``role`` is ``"user"`` or ``"assistant"``. ``text`` is the spoken
    content with callouts stripped and HRs dropped — used both as the
    backend's ``MessageParam.content`` (back-compat with the existing
    parser shape) and as the diff key against stored turns.
    ``structure`` is non-empty only for assistant turns: an ordered
    list of ``TextSegment`` / ``ToolSegment`` reflecting the visible
    layout of the assistant block, used by the conversation store to
    align with the canonical tool_use blocks held in DB.
    """
    role: str
    text: str
    structure: tuple[TextSegment | ToolSegment, ...] = ()
@dataclass(frozen=True, slots=True)
 class ParsedFile:
@@ -49,48 +112,159 @@ class ParsedFile:
    ``metadata`` is the YAML frontmatter as a plain dict (empty if the
    file has none). ``messages`` is the conversation history shaped for
-    ``Backend.complete`` — assistant turns are text-only. ``body`` is the
+    ``Backend.complete`` — assistant turns are text-only. ``turns`` is
-    raw markdown content *after* the frontmatter is stripped; the
+    1:1 with ``messages`` and carries the per-turn structure (for
-    renderer needs it when it appends a new assistant turn so it can
+    assistant turns) that the conversation store needs to detect
-    preserve whatever the human typed verbatim (including any callouts
+    text-only edits vs. structural forks. ``body`` is the raw markdown
-    or HRs they added).
+    content *after* the frontmatter is stripped; the renderer needs it
    when it appends a new assistant turn so it can preserve whatever
    the human typed verbatim (including any callouts or HRs they
    added).
    """
    metadata: dict[str, Any]
    body: str
    messages: list[MessageParam]
    turns: list[ParsedTurn]
 def parse(text: str) -> ParsedFile:
-    """Parse a chat ``.md`` into ``(metadata, body, messages)``.
+    """Parse a chat ``.md`` into ``(metadata, body, messages, turns)``.
    A file with no turn markers but non-empty body is treated as a
    single user turn — the friendly path for "user types into a new
    file and hits send" before any turn markers exist.
    Assistant turns that have *only* tool callouts (no spoken text) are
    preserved here even though their ``MessageParam.content`` is empty
    — the structure carries tool-segment information the conversation
    store needs for skeleton matching. The renderer in practice always
    emits at least a trailing text block, so this branch is defensive.
    """
    parsed = frontmatter.loads(text)
    metadata = dict(parsed.metadata)
    body = parsed.content
    messages: list[MessageParam] = []
-    turns = _split_turns(body)
+    parsed_turns: list[ParsedTurn] = []
-    if not turns:
+    raw_turns = _split_turns(body)
    if not raw_turns:
        stripped = body.strip()
        if stripped:
            messages.append({"role": "user", "content": stripped})
-        return ParsedFile(metadata=metadata, body=body, messages=messages)
+            parsed_turns.append(ParsedTurn(role="user", text=stripped))
        return ParsedFile(
            metadata=metadata, body=body, messages=messages, turns=parsed_turns
        )
-    for role, raw in turns:
+    for role, raw in raw_turns:
        if role == "user":
            text_content = _strip_hrs(raw).strip()
            if text_content:
                messages.append({"role": "user", "content": text_content})
                parsed_turns.append(ParsedTurn(role="user", text=text_content))
        else:
-            text_content = _extract_assistant_text(raw)
+            structure = parse_assistant_structure(raw)
            text_content = _segments_to_spoken_text(structure)
            has_tools = any(isinstance(s, ToolSegment) for s in structure)
            if text_content:
                messages.append({"role": "assistant", "content": text_content})
                parsed_turns.append(
                    ParsedTurn(
                        role="assistant", text=text_content, structure=tuple(structure)
                    )
                )
            elif has_tools:
                # Tool-only assistant turn: nothing to feed the backend
                # as ``content`` (it'd reject an empty string), but the
                # structure must survive so the store can align it
                # against stored tool_use blocks. We synthesize a
                # single-space text content for backend round-trip; the
                # conversation store will replace this payload with the
                # canonical stored blocks before the backend ever sees
                # it on a continuation.
                messages.append({"role": "assistant", "content": " "})
                parsed_turns.append(
                    ParsedTurn(role="assistant", text="", structure=tuple(structure))
                )
-    return ParsedFile(metadata=metadata, body=body, messages=messages)
+    return ParsedFile(
        metadata=metadata, body=body, messages=messages, turns=parsed_turns
    )
 def parse_assistant_structure(raw: str) -> list[TextSegment | ToolSegment]:
    """Walk an assistant turn body, return its ordered text/tool segments.
    Tool callouts become :class:`ToolSegment` with just the tool name —
    the title's optional ``" · summary"`` suffix and the JSON body
    inside the quote block are decorative; the canonical tool_use
    block is held in the conversation store. Thinking callouts are
    stripped entirely (they were never round-trippable through the
    file — signatures expire). HR separator lines drop out.
    Empty / whitespace-only text segments at the boundaries (start,
    end, between adjacent tool callouts) are dropped so the skeleton
    is robust against renderer whitespace choices; a non-empty text
    segment with surrounding whitespace is trimmed on both ends but
    preserved.
    """
    segments: list[TextSegment | ToolSegment] = []
    pending_text: list[str] = []
    def _flush_text() -> None:
        if not pending_text:
            return
        joined = "\n".join(pending_text)
        # Collapse runs of >2 blank lines (created when we stripped a
        # mid-block callout) into one so the diff against a re-render
        # is stable.
        cleaned = re.sub(r"\n{3,}", "\n\n", joined).strip()
        pending_text.clear()
        if cleaned:
            segments.append(TextSegment(text=cleaned))
    lines = raw.splitlines()
    i = 0
    while i < len(lines):
        line = lines[i]
        callout_match = _CALLOUT_START_RE.match(line)
        if callout_match:
            kind = callout_match.group(1)
            # Capture tool name *before* advancing past the block.
            if kind == "tool":
                title_match = _TOOL_TITLE_RE.match(line)
                title = title_match.group(1) if title_match else ""
                name = title.split(_TOOL_TITLE_SEP, 1)[0].strip()
                _flush_text()
                segments.append(ToolSegment(name=name))
            else:
                # Thinking callout — drop the whole block, emit nothing.
                _flush_text()
            # Skip the rest of the quote block.
            while i < len(lines) and lines[i].lstrip().startswith(">"):
                i += 1
            continue
        if line.strip() == "---":
            i += 1
            continue
        pending_text.append(line)
        i += 1
    _flush_text()
    return segments
 def _segments_to_spoken_text(segments: list[TextSegment | ToolSegment]) -> str:
    r"""Reduce a structure list to the spoken-text view the backend sees.
    Concatenates :class:`TextSegment` contents with ``\n\n`` between
    them, dropping :class:`ToolSegment` entries. Equivalent to what
    the pre-Conversation-store parser did — we keep that behavior so
    existing fingerprints (frontmatter ``fingerprint`` field) stay
    valid.
    """
    chunks = [s.text for s in segments if isinstance(s, TextSegment)]
    return "\n\n".join(c for c in chunks if c).strip()
 def last_role(messages: list[MessageParam]) -> str | None:
@@ -148,40 +322,3 @@ def _strip_hrs(raw: str) -> str:
    lines = raw.splitlines()
    kept = [ln for ln in lines if ln.strip() != "---"]
    return "\n".join(kept)
 def _extract_assistant_text(raw: str) -> str:
    """Strip thinking/tool callouts from an assistant turn, return spoken text.
    Walks line by line. When we see a callout-start line (``> [!thinking]-``
    or ``> [!tool]- ...``), we skip the entire contiguous quote block
    (lines beginning with ``>`` or blank-then-`>` continuations don't
    happen in Obsidian callouts — a blank line ends the callout). HR
    lines (``---``) are dropped. Everything else is kept and joined,
    then collapsed to a clean trim.
    """
    lines = raw.splitlines()
    out_lines: list[str] = []
    i = 0
    while i < len(lines):
        line = lines[i]
        if _CALLOUT_START_RE.match(line):
            # Skip the whole quote block (consecutive lines starting
            # with ``>``). Stop at first non-``>`` line, leaving it for
            # the next iteration. Blank lines do not end the block — a
            # callout body with a blank line uses ``> `` (quote-space)
            # too — but in practice Obsidian's quote block ends on the
            # first line that doesn't start with ``>``.
            while i < len(lines) and lines[i].lstrip().startswith(">"):
                i += 1
            continue
        if line.strip() == "---":
            i += 1
            continue
        out_lines.append(line)
        i += 1
    # Collapse runs of blank lines that callout-stripping creates
    # (two newlines around a stripped block fold into one).
    text_joined = "\n".join(out_lines)
    text_joined = re.sub(r"\n{3,}", "\n\n", text_joined)
    return text_joined.strip()
@@ -1,15 +1,19 @@
 """SQLModel tables.
-Two tables, both flat, no relationships modelled yet (``actor`` and
+Four tables, all flat, no FK relationships modelled (``actor`` and
 ``agent_name`` are stored as strings — joining audit→token by name is
 fine at this volume; we'll introduce FKs when the admin UI actually
 demands them).
-A ``Session`` table originally lived here for live-session
+The ``Conversation`` + ``ConversationMessage`` pair persists chat
-observability. It was dropped after we decided the gateway stays
+history per frontend so we can survive cache misses without losing
-stateless about identity (claude-code-api's in-memory fingerprint pool
+tool-call memory. The gateway is now stateful about conversation
-is the source of truth) and that conversation persistence belongs in a
+content (we keep the raw Anthropic-shape message list including
-future Obsidian-sync frontend, not a sessions table.
+``tool_use`` / ``tool_result`` blocks); the live ``claude-code-api``
 session pool stays the source of truth for *fingerprints*, and the DB
 mirrors what we'd want to re-seed if a session evicts. See
 ``core/conversation_store.py`` for the diff-and-fork logic and
 ``frontends/markdown/frontend.py`` for the integration point.
 Datetimes are stored UTC; we set ``default_factory`` rather than relying
 on DB defaults so SQLite + Postgres behave identically. Every row that
@@ -20,6 +24,7 @@ from __future__ import annotations
 from datetime import UTC, datetime
 from sqlalchemy import UniqueConstraint
 from sqlmodel import Field, SQLModel
@@ -66,4 +71,58 @@ class AuditLog(SQLModel, table=True):
    detail_json: str = Field(default="{}")
-__all__ = ["AuditLog", "Token"]
+class Conversation(SQLModel, table=True):
    """One chat thread, scoped to a frontend.
    ``external_id`` is the identifier the frontend uses to find this
    thread again on the next request — for the markdown frontend it's a
    uuid we mint and persist into the file's frontmatter, for the
    anthropic frontend it'd be the same metadata.conversation_id the
    client passes. Unique per ``(frontend, external_id)`` because two
    frontends sharing a uuid is fine; the same frontend reusing one is
    a bug.
    """
    __tablename__ = "conversations"
    __table_args__ = (
        UniqueConstraint("frontend", "external_id", name="uq_conv_frontend_extid"),
    )
    id: int | None = Field(default=None, primary_key=True)
    frontend: str = Field(index=True)
    external_id: str = Field(index=True)
    agent_name: str = Field(index=True)
    created_at: datetime = Field(default_factory=_utcnow)
    updated_at: datetime = Field(default_factory=_utcnow)
 class ConversationMessage(SQLModel, table=True):
    """One raw Anthropic-shape message in a conversation's transcript.
    A single user/assistant exchange visible in Obsidian can occupy
    multiple rows when claude ran a tool cycle: ``assistant``
    (tool_use), ``user`` (tool_result), ``assistant`` (final text) all
    live as separate rows with the same conversation_id and monotonic
    ``seq``. ``content_json`` is the canonical Anthropic content payload
    (string or list-of-blocks) — exactly what we'll feed back to the
    backend so its session-pool fingerprint matches.
    ``seq`` is per-conversation 0-based monotonic; the unique
    constraint catches the trivial bug of two writers racing on the
    same conversation.
    """
    __tablename__ = "conversation_messages"
    __table_args__ = (
        UniqueConstraint("conversation_id", "seq", name="uq_msg_conv_seq"),
    )
    id: int | None = Field(default=None, primary_key=True)
    conversation_id: int = Field(index=True)
    seq: int
    role: str
    content_json: str
    created_at: datetime = Field(default_factory=_utcnow)
 __all__ = ["AuditLog", "Conversation", "ConversationMessage", "Token"]
@@ -287,7 +287,7 @@ local = [
    { name = "raycast-api", version = "0.1.0", source = { editable = "../raycast-api" } },
 ]
 prod = [
-    { name = "claude-code-api", version = "0.1.0", source = { git = "https://git.kotikot.com/beaver/claude-code-api.git#bf6116dc8b7f3708685c5a6e27061859e73eb4c9" } },
+    { name = "claude-code-api", version = "0.1.0", source = { git = "https://git.kotikot.com/beaver/claude-code-api.git#1f20cef7d49d290f2b620ebb8a7aca92cdbd0e2a" } },
    { name = "raycast-api", version = "0.1.0", source = { git = "https://git.kotikot.com/beaver/raycast-api.git#e73894c8e435da5c0709f92f69f11bcd0dab9afe" } },
 ]
@@ -419,7 +419,7 @@ wheels = [
 [[package]]
 name = "claude-code-api"
 version = "0.1.0"
-source = { git = "https://git.kotikot.com/beaver/claude-code-api.git#bf6116dc8b7f3708685c5a6e27061859e73eb4c9" }
+source = { git = "https://git.kotikot.com/beaver/claude-code-api.git#1f20cef7d49d290f2b620ebb8a7aca92cdbd0e2a" }
 resolution-markers = [
    "python_full_version >= '3.14'",
    "python_full_version < '3.14'",