fix(*): images do work

2026-01-21 02:33:01 +01:00
parent 592aa5bc6b
commit ae9013536b
6 changed files with 85 additions and 113 deletions
@@ -1,4 +1,5 @@
 import asyncio
+import base64
 import contextlib
 import io
 import time
@@ -26,6 +27,14 @@ EDIT_THROTTLE_SECONDS = 1.0
 TELEGRAM_MAX_LENGTH = 4096


+async def fetch_chat_images(chat_id: str) -> list[ImageData]:
+    chat_images = await convex.query("messages:getChatImages", {"chatId": chat_id})
+    return [
+        ImageData(data=base64.b64decode(img["base64"]), media_type=img["mediaType"])
+        for img in (chat_images or [])
+    ]
+
+
 def make_follow_up_keyboard(options: list[str]) -> ReplyKeyboardMarkup:
    buttons = [[KeyboardButton(text=opt)] for opt in options]
    return ReplyKeyboardMarkup(
@@ -114,7 +123,7 @@ async def send_long_message(
        )


-async def process_message_from_web(  # noqa: C901, PLR0915
+async def process_message_from_web(  # noqa: C901, PLR0912, PLR0915
    convex_user_id: str, text: str, bot: Bot, convex_chat_id: str
 ) -> None:
    user = await convex.query("users:getById", {"userId": convex_user_id})
@@ -178,7 +187,11 @@ async def process_message_from_web(  # noqa: C901, PLR0915
            prompt_text = text
            hist = history[:-1]

-        final_answer = await stream_response(text_agent, prompt_text, hist, on_chunk)
+        chat_images = await fetch_chat_images(convex_chat_id)
+
+        final_answer = await stream_response(
+            text_agent, prompt_text, hist, on_chunk, images=chat_images
+        )

        if state:
            await state.flush()
@@ -204,6 +217,21 @@ async def process_message_from_web(  # noqa: C901, PLR0915
            },
        )

+        if is_summarize:
+            await convex.mutation(
+                "chats:clear", {"chatId": convex_chat_id, "preserveImages": True}
+            )
+            await convex.mutation(
+                "messages:create",
+                {
+                    "chatId": convex_chat_id,
+                    "role": "assistant",
+                    "content": final_answer,
+                    "source": "web",
+                    "followUpOptions": follow_ups,
+                },
+            )
+
        if tg_chat_id and processing_msg:
            with contextlib.suppress(Exception):
                await processing_msg.delete()
@@ -229,7 +257,7 @@ async def process_message_from_web(  # noqa: C901, PLR0915


 async def process_message(
-    user_id: int, text: str, bot: Bot, chat_id: int, image: ImageData | None = None
+    user_id: int, text: str, bot: Bot, chat_id: int, *, skip_user_message: bool = False
 ) -> None:
    user = await convex.query(
        "users:getByTelegramId", {"telegramId": ConvexInt64(user_id)}
@@ -251,15 +279,16 @@ async def process_message(
    api_key = user["geminiApiKey"]
    model_name = user.get("model", "gemini-3-pro-preview")

-    await convex.mutation(
-        "messages:create",
-        {
-            "chatId": active_chat_id,
-            "role": "user",
-            "content": text,
-            "source": "telegram",
-        },
-    )
+    if not skip_user_message:
+        await convex.mutation(
+            "messages:create",
+            {
+                "chatId": active_chat_id,
+                "role": "user",
+                "content": text,
+                "source": "telegram",
+            },
+        )

    assistant_message_id = await convex.mutation(
        "messages:create",
@@ -293,8 +322,10 @@ async def process_message(
                {"messageId": assistant_message_id, "content": content},
            )

+        chat_images = await fetch_chat_images(active_chat_id)
+
        final_answer = await stream_response(
-            text_agent, text, history[:-2], on_chunk, image=image
+            text_agent, text, history[:-2], on_chunk, images=chat_images
        )

        await state.flush()
@@ -305,7 +336,7 @@ async def process_message(
        follow_up_agent = create_follow_up_agent(
            api_key=api_key, model_name=follow_up_model, system_prompt=follow_up_prompt
        )
-        follow_ups = await get_follow_ups(follow_up_agent, full_history, image=image)
+        follow_ups = await get_follow_ups(follow_up_agent, full_history)

        await state.stop_typing()

@@ -380,6 +411,14 @@ async def on_photo_message(message: types.Message, bot: Bot) -> None:
        },
    )

+    user = await convex.query(
+        "users:getByTelegramId", {"telegramId": ConvexInt64(message.from_user.id)}
+    )
+
+    if not user or not user.get("activeChatId"):
+        await message.answer("Use /new first to create a chat.")
+        return
+
    caption = message.caption or "Process the image according to your task"
    photo = message.photo[-1]

@@ -391,11 +430,24 @@ async def on_photo_message(message: types.Message, bot: Bot) -> None:
    buffer = io.BytesIO()
    await bot.download_file(file.file_path, buffer)
    image_bytes = buffer.getvalue()
+    image_base64 = base64.b64encode(image_bytes).decode()

    ext = file.file_path.rsplit(".", 1)[-1].lower()
    media_type = f"image/{ext}" if ext in ("png", "gif", "webp") else "image/jpeg"
-    image = ImageData(data=image_bytes, media_type=media_type)
+
+    active_chat_id = user["activeChatId"]
+    await convex.mutation(
+        "messages:create",
+        {
+            "chatId": active_chat_id,
+            "role": "user",
+            "content": caption,
+            "source": "telegram",
+            "imageBase64": image_base64,
+            "imageMediaType": media_type,
+        },
+    )

    await process_message(
-        message.from_user.id, caption, bot, message.chat.id, image=image
+        message.from_user.id, caption, bot, message.chat.id, skip_user_message=True
    )