feat(*): first mvp

2026-01-20 21:54:48 +01:00
parent b9703da2fc
commit ec17f5e0fd
52 changed files with 2599 additions and 576 deletions
--- a/backend/src/bot/modules/ai/init.py
+++ b/backend/src/bot/modules/ai/init.py
@@ -0,0 +1,21 @@
+from .agent import (
+    ImageData,
+    StreamCallback,
+    create_follow_up_agent,
+    create_text_agent,
+    get_follow_ups,
+    stream_response,
+)
+from .prompts import DEFAULT_FOLLOW_UP, PRESETS, SUMMARIZE_PROMPT
+
+__all__ = [
+    "DEFAULT_FOLLOW_UP",
+    "PRESETS",
+    "SUMMARIZE_PROMPT",
+    "ImageData",
+    "StreamCallback",
+    "create_follow_up_agent",
+    "create_text_agent",
+    "get_follow_ups",
+    "stream_response",
+]
--- a/backend/src/bot/modules/ai/agent.py
+++ b/backend/src/bot/modules/ai/agent.py
@@ -0,0 +1,115 @@
+from collections.abc import Awaitable, Callable
+from dataclasses import dataclass
+
+from pydantic_ai import (
+    Agent,
+    BinaryContent,
+    ModelMessage,
+    ModelRequest,
+    ModelResponse,
+    TextPart,
+    UserPromptPart,
+)
+from pydantic_ai.models.google import GoogleModel
+from pydantic_ai.providers.google import GoogleProvider
+
+from .models import FollowUpOptions
+from .prompts import DEFAULT_FOLLOW_UP
+
+StreamCallback = Callable[[str], Awaitable[None]]
+
+
+@dataclass
+class ImageData:
+    data: bytes
+    media_type: str
+
+
+LATEX_INSTRUCTION = "For math, use LaTeX: $...$ inline, $$...$$ display."
+
+DEFAULT_SYSTEM_PROMPT = (
+    "You are a helpful AI assistant. Provide clear, concise answers."
+)
+
+
+def create_text_agent(
+    api_key: str,
+    model_name: str = "gemini-3-pro-preview",
+    system_prompt: str | None = None,
+) -> Agent[None, str]:
+    provider = GoogleProvider(api_key=api_key)
+    model = GoogleModel(model_name, provider=provider)
+    base_prompt = system_prompt or DEFAULT_SYSTEM_PROMPT
+    full_prompt = f"{base_prompt} {LATEX_INSTRUCTION}"
+    return Agent(model, system_prompt=full_prompt)
+
+
+def create_follow_up_agent(
+    api_key: str,
+    model_name: str = "gemini-2.5-flash-lite",
+    system_prompt: str | None = None,
+) -> Agent[None, FollowUpOptions]:
+    provider = GoogleProvider(api_key=api_key)
+    model = GoogleModel(model_name, provider=provider)
+    prompt = system_prompt or DEFAULT_FOLLOW_UP
+    return Agent(model, output_type=FollowUpOptions, system_prompt=prompt)
+
+
+def build_message_history(history: list[dict[str, str]]) -> list[ModelMessage]:
+    messages: list[ModelMessage] = []
+    for msg in history:
+        if msg["role"] == "user":
+            messages.append(
+                ModelRequest(parts=[UserPromptPart(content=msg["content"])])
+            )
+        else:
+            messages.append(ModelResponse(parts=[TextPart(content=msg["content"])]))
+    return messages
+
+
+async def stream_response(  # noqa: PLR0913
+    text_agent: Agent[None, str],
+    message: str,
+    history: list[dict[str, str]] | None = None,
+    on_chunk: StreamCallback | None = None,
+    image: ImageData | None = None,
+    images: list[ImageData] | None = None,
+) -> str:
+    message_history = build_message_history(history) if history else None
+
+    all_images = images or ([image] if image else [])
+
+    if all_images:
+        prompt: list[str | BinaryContent] = [message]
+        prompt.extend(
+            BinaryContent(data=img.data, media_type=img.media_type)
+            for img in all_images
+        )
+    else:
+        prompt = message  # type: ignore[assignment]
+
+    stream = text_agent.run_stream(prompt, message_history=message_history)
+    async with stream as result:
+        async for text in result.stream_text():
+            if on_chunk:
+                await on_chunk(text)
+        return await result.get_output()
+
+
+async def get_follow_ups(
+    follow_up_agent: Agent[None, FollowUpOptions],
+    history: list[dict[str, str]],
+    image: ImageData | None = None,
+) -> list[str]:
+    message_history = build_message_history(history) if history else None
+
+    if image:
+        prompt: list[str | BinaryContent] = [
+            "Suggest follow-up options based on this conversation and image.",
+            BinaryContent(data=image.data, media_type=image.media_type),
+        ]
+    else:
+        prompt = "Suggest follow-up questions based on this conversation."  # type: ignore[assignment]
+
+    result = await follow_up_agent.run(prompt, message_history=message_history)
+    return result.output["options"]
--- a/backend/src/bot/modules/ai/models.py
+++ b/backend/src/bot/modules/ai/models.py
@@ -0,0 +1,10 @@
+from typing import TypedDict
+
+
+class AIResponse(TypedDict):
+    answer: str
+    follow_up_options: list[str]
+
+
+class FollowUpOptions(TypedDict):
+    options: list[str]
--- a/backend/src/bot/modules/ai/prompts.py
+++ b/backend/src/bot/modules/ai/prompts.py
@@ -0,0 +1,37 @@
+EXAM_SYSTEM = """You help solve problem sets and exams.
+
+When you receive an IMAGE with problems:
+- Give HINTS in Russian for each problem
+- Focus on key insights and potential difficulties,
+give all formulas that will be helpful
+- Be quite concise, but include all needed hints - this will be viewed on Apple Watch
+- Format: info needed to solve each problem or "unstuck" while solving
+
+When asked for DETAILS on a specific problem (or a problem number):
+- Provide full structured solution in English
+- Academic style, as it would be written in a notebook
+- Step by step, clean, no fluff"""
+
+EXAM_FOLLOW_UP = """You see a problem set image. List available problem numbers.
+Output only the numbers that exist in the image, like: 1, 2, 3, 4, 5
+If problems have letters (a, b, c), list them as: 1a, 1b, 2a, etc.
+Keep it minimal - just the identifiers.
+Then, if applicable, output some possible followups of conversation"""
+
+DEFAULT_FOLLOW_UP = (
+    "Based on the conversation, suggest 3 short follow-up questions "
+    "the user might want to ask. Be concise, each under 50 chars."
+)
+
+SUMMARIZE_PROMPT = """You are summarize agent. You may receive:
+1. Images
+2. Conversation history showing what was discussed/solved
+
+Summarize VERY briefly:
+- Which problems were solved
+- Key results or answers found
+- What's left to do
+
+Max 2-3 sentences. This is for Apple Watch display."""
+
+PRESETS: dict[str, tuple[str, str]] = {"exam": (EXAM_SYSTEM, EXAM_FOLLOW_UP)}