"""Parse a markdown chat file into Anthropic ``MessageParam`` history. The file format is documented in ``frontends/markdown/__init__.py``: ``### User:`` / ``### Assistant:`` H3 headers split turns, optional ``---`` HRs between turns are visual-only, ``> [!thinking]-`` and ``> [!tool]- `` callouts mark structured assistant content. For backend consumption we strip thinking and tool_use callouts — assistant turns become text-only. Rationale: history replay through claude-code's JSONL injection only needs the *narrated* answer (the thinking signatures expire and the original tool_results aren't captured in the renderer's output, so a faithful tool_use round-trip isn't possible today). The renderer keeps callouts in the file because they're informational for the human reader; the parser drops them when shaping the backend's input. """ from __future__ import annotations import re from dataclasses import dataclass from typing import TYPE_CHECKING, Any import frontmatter if TYPE_CHECKING: from anthropic.types import MessageParam __all__ = ["ParsedFile", "last_role", "parse", "resolve_agent"] # Turn marker — must be exactly ``### User:`` or ``### Assistant:`` on # its own line. Trailing whitespace tolerated; nothing after the colon # on the same line (any inline content would mean the user typed # something that just happens to look like a header, and we'd rather # misparse than silently fold inline content into a turn). _TURN_RE = re.compile(r"^###\s+(User|Assistant):\s*$", re.MULTILINE) # Callout-start lines we strip from assistant turns when extracting # text. We don't try to parse the contents — for backend input we just # need to drop the whole quoted block. _CALLOUT_START_RE = re.compile(r"^>\s+\[!(thinking|tool)\]") @dataclass(frozen=True, slots=True) class ParsedFile: """Result of parsing a single chat ``.md``. ``metadata`` is the YAML frontmatter as a plain dict (empty if the file has none). ``messages`` is the conversation history shaped for ``Backend.complete`` — assistant turns are text-only. ``body`` is the raw markdown content *after* the frontmatter is stripped; the renderer needs it when it appends a new assistant turn so it can preserve whatever the human typed verbatim (including any callouts or HRs they added). """ metadata: dict[str, Any] body: str messages: list[MessageParam] def parse(text: str) -> ParsedFile: """Parse a chat ``.md`` into ``(metadata, body, messages)``. A file with no turn markers but non-empty body is treated as a single user turn — the friendly path for "user types into a new file and hits send" before any turn markers exist. """ parsed = frontmatter.loads(text) metadata = dict(parsed.metadata) body = parsed.content messages: list[MessageParam] = [] turns = _split_turns(body) if not turns: stripped = body.strip() if stripped: messages.append({"role": "user", "content": stripped}) return ParsedFile(metadata=metadata, body=body, messages=messages) for role, raw in turns: if role == "user": text_content = _strip_hrs(raw).strip() if text_content: messages.append({"role": "user", "content": text_content}) else: text_content = _extract_assistant_text(raw) if text_content: messages.append({"role": "assistant", "content": text_content}) return ParsedFile(metadata=metadata, body=body, messages=messages) def last_role(messages: list[MessageParam]) -> str | None: """Return ``"user"`` / ``"assistant"`` / ``None`` for an empty list.""" if not messages: return None return messages[-1]["role"] def resolve_agent( *, metadata: dict[str, Any], request_override: str | None, default: str | None ) -> str | None: """Resolve the agent for this chat. Precedence: request body override > frontmatter > frontend default. Returns ``None`` if none match — caller responds with 400. """ if request_override: return request_override fm_agent = metadata.get("agent") if isinstance(fm_agent, str) and fm_agent: return fm_agent return default # ---- internals --------------------------------------------------------- def _split_turns(body: str) -> list[tuple[str, str]]: """Walk turn markers, return ``[(role_lc, raw_body), ...]``. Body for each turn is everything between this marker and the next (or EOF). Leading marker line itself is dropped. We don't trim whitespace here — that's per-role. """ matches = list(_TURN_RE.finditer(body)) if not matches: return [] out: list[tuple[str, str]] = [] for i, m in enumerate(matches): role = m.group(1).lower() start = m.end() end = matches[i + 1].start() if i + 1 < len(matches) else len(body) out.append((role, body[start:end])) return out def _strip_hrs(raw: str) -> str: """Drop decorative ``---`` separator lines (whole-line HRs only). A ``---`` mid-paragraph (rare, but possible) stays. Only lines that are *exactly* the HR after optional surrounding whitespace are removed — those are the ones the renderer emits between turns. """ lines = raw.splitlines() kept = [ln for ln in lines if ln.strip() != "---"] return "\n".join(kept) def _extract_assistant_text(raw: str) -> str: """Strip thinking/tool callouts from an assistant turn, return spoken text. Walks line by line. When we see a callout-start line (``> [!thinking]-`` or ``> [!tool]- ...``), we skip the entire contiguous quote block (lines beginning with ``>`` or blank-then-`>` continuations don't happen in Obsidian callouts — a blank line ends the callout). HR lines (``---``) are dropped. Everything else is kept and joined, then collapsed to a clean trim. """ lines = raw.splitlines() out_lines: list[str] = [] i = 0 while i < len(lines): line = lines[i] if _CALLOUT_START_RE.match(line): # Skip the whole quote block (consecutive lines starting # with ``>``). Stop at first non-``>`` line, leaving it for # the next iteration. Blank lines do not end the block — a # callout body with a blank line uses ``> `` (quote-space) # too — but in practice Obsidian's quote block ends on the # first line that doesn't start with ``>``. while i < len(lines) and lines[i].lstrip().startswith(">"): i += 1 continue if line.strip() == "---": i += 1 continue out_lines.append(line) i += 1 # Collapse runs of blank lines that callout-stripping creates # (two newlines around a stripped block fold into one). text_joined = "\n".join(out_lines) text_joined = re.sub(r"\n{3,}", "\n\n", text_joined) return text_joined.strip()