diff --git a/backend/src/bot/handlers/message/handler.py b/backend/src/bot/handlers/message/handler.py index 9ab5719..9558093 100644 --- a/backend/src/bot/handlers/message/handler.py +++ b/backend/src/bot/handlers/message/handler.py @@ -1,4 +1,5 @@ import asyncio +import base64 import contextlib import io import time @@ -26,6 +27,14 @@ EDIT_THROTTLE_SECONDS = 1.0 TELEGRAM_MAX_LENGTH = 4096 +async def fetch_chat_images(chat_id: str) -> list[ImageData]: + chat_images = await convex.query("messages:getChatImages", {"chatId": chat_id}) + return [ + ImageData(data=base64.b64decode(img["base64"]), media_type=img["mediaType"]) + for img in (chat_images or []) + ] + + def make_follow_up_keyboard(options: list[str]) -> ReplyKeyboardMarkup: buttons = [[KeyboardButton(text=opt)] for opt in options] return ReplyKeyboardMarkup( @@ -114,7 +123,7 @@ async def send_long_message( ) -async def process_message_from_web( # noqa: C901, PLR0915 +async def process_message_from_web( # noqa: C901, PLR0912, PLR0915 convex_user_id: str, text: str, bot: Bot, convex_chat_id: str ) -> None: user = await convex.query("users:getById", {"userId": convex_user_id}) @@ -178,7 +187,11 @@ async def process_message_from_web( # noqa: C901, PLR0915 prompt_text = text hist = history[:-1] - final_answer = await stream_response(text_agent, prompt_text, hist, on_chunk) + chat_images = await fetch_chat_images(convex_chat_id) + + final_answer = await stream_response( + text_agent, prompt_text, hist, on_chunk, images=chat_images + ) if state: await state.flush() @@ -204,6 +217,21 @@ async def process_message_from_web( # noqa: C901, PLR0915 }, ) + if is_summarize: + await convex.mutation( + "chats:clear", {"chatId": convex_chat_id, "preserveImages": True} + ) + await convex.mutation( + "messages:create", + { + "chatId": convex_chat_id, + "role": "assistant", + "content": final_answer, + "source": "web", + "followUpOptions": follow_ups, + }, + ) + if tg_chat_id and processing_msg: with contextlib.suppress(Exception): await processing_msg.delete() @@ -229,7 +257,7 @@ async def process_message_from_web( # noqa: C901, PLR0915 async def process_message( - user_id: int, text: str, bot: Bot, chat_id: int, image: ImageData | None = None + user_id: int, text: str, bot: Bot, chat_id: int, *, skip_user_message: bool = False ) -> None: user = await convex.query( "users:getByTelegramId", {"telegramId": ConvexInt64(user_id)} @@ -251,15 +279,16 @@ async def process_message( api_key = user["geminiApiKey"] model_name = user.get("model", "gemini-3-pro-preview") - await convex.mutation( - "messages:create", - { - "chatId": active_chat_id, - "role": "user", - "content": text, - "source": "telegram", - }, - ) + if not skip_user_message: + await convex.mutation( + "messages:create", + { + "chatId": active_chat_id, + "role": "user", + "content": text, + "source": "telegram", + }, + ) assistant_message_id = await convex.mutation( "messages:create", @@ -293,8 +322,10 @@ async def process_message( {"messageId": assistant_message_id, "content": content}, ) + chat_images = await fetch_chat_images(active_chat_id) + final_answer = await stream_response( - text_agent, text, history[:-2], on_chunk, image=image + text_agent, text, history[:-2], on_chunk, images=chat_images ) await state.flush() @@ -305,7 +336,7 @@ async def process_message( follow_up_agent = create_follow_up_agent( api_key=api_key, model_name=follow_up_model, system_prompt=follow_up_prompt ) - follow_ups = await get_follow_ups(follow_up_agent, full_history, image=image) + follow_ups = await get_follow_ups(follow_up_agent, full_history) await state.stop_typing() @@ -380,6 +411,14 @@ async def on_photo_message(message: types.Message, bot: Bot) -> None: }, ) + user = await convex.query( + "users:getByTelegramId", {"telegramId": ConvexInt64(message.from_user.id)} + ) + + if not user or not user.get("activeChatId"): + await message.answer("Use /new first to create a chat.") + return + caption = message.caption or "Process the image according to your task" photo = message.photo[-1] @@ -391,11 +430,24 @@ async def on_photo_message(message: types.Message, bot: Bot) -> None: buffer = io.BytesIO() await bot.download_file(file.file_path, buffer) image_bytes = buffer.getvalue() + image_base64 = base64.b64encode(image_bytes).decode() ext = file.file_path.rsplit(".", 1)[-1].lower() media_type = f"image/{ext}" if ext in ("png", "gif", "webp") else "image/jpeg" - image = ImageData(data=image_bytes, media_type=media_type) + + active_chat_id = user["activeChatId"] + await convex.mutation( + "messages:create", + { + "chatId": active_chat_id, + "role": "user", + "content": caption, + "source": "telegram", + "imageBase64": image_base64, + "imageMediaType": media_type, + }, + ) await process_message( - message.from_user.id, caption, bot, message.chat.id, image=image + message.from_user.id, caption, bot, message.chat.id, skip_user_message=True ) diff --git a/frontend/src/lib/convex/_generated/api.d.ts b/frontend/src/lib/convex/_generated/api.d.ts index f223d27..1f51849 100644 --- a/frontend/src/lib/convex/_generated/api.d.ts +++ b/frontend/src/lib/convex/_generated/api.d.ts @@ -9,7 +9,6 @@ */ import type * as chats from "../chats.js"; -import type * as http from "../http.js"; import type * as messages from "../messages.js"; import type * as pendingGenerations from "../pendingGenerations.js"; import type * as users from "../users.js"; @@ -22,7 +21,6 @@ import type { declare const fullApi: ApiFromModules<{ chats: typeof chats; - http: typeof http; messages: typeof messages; pendingGenerations: typeof pendingGenerations; users: typeof users; diff --git a/frontend/src/lib/convex/chats.ts b/frontend/src/lib/convex/chats.ts index f08a543..51430ca 100644 --- a/frontend/src/lib/convex/chats.ts +++ b/frontend/src/lib/convex/chats.ts @@ -43,7 +43,7 @@ export const clear = mutation({ .collect(); for (const message of messages) { - if (args.preserveImages && message.imageStorageId) { + if (args.preserveImages && message.imageBase64) { continue; } await ctx.db.delete(message._id); diff --git a/frontend/src/lib/convex/http.ts b/frontend/src/lib/convex/http.ts deleted file mode 100644 index a5f1fdc..0000000 --- a/frontend/src/lib/convex/http.ts +++ /dev/null @@ -1,40 +0,0 @@ -import { httpRouter } from 'convex/server'; -import { httpAction } from './_generated/server'; -import { internal } from './_generated/api'; -import type { Id } from './_generated/dataModel'; - -const http = httpRouter(); - -http.route({ - path: '/upload-image', - method: 'POST', - handler: httpAction(async (ctx, req) => { - const chatId = req.headers.get('X-Chat-Id'); - const mediaType = req.headers.get('Content-Type') || 'image/jpeg'; - const caption = req.headers.get('X-Caption') || ''; - - if (!chatId) { - return new Response(JSON.stringify({ error: 'Missing X-Chat-Id header' }), { - status: 400, - headers: { 'Content-Type': 'application/json' } - }); - } - - const blob = await req.blob(); - const storageId = await ctx.storage.store(blob); - - await ctx.runMutation(internal.messages.createWithImage, { - chatId: chatId as Id<'chats'>, - content: caption, - imageStorageId: storageId, - imageMediaType: mediaType - }); - - return new Response(JSON.stringify({ storageId }), { - status: 200, - headers: { 'Content-Type': 'application/json' } - }); - }) -}); - -export default http; diff --git a/frontend/src/lib/convex/messages.ts b/frontend/src/lib/convex/messages.ts index 33e31f1..6ab3ac9 100644 --- a/frontend/src/lib/convex/messages.ts +++ b/frontend/src/lib/convex/messages.ts @@ -1,5 +1,5 @@ import { v } from 'convex/values'; -import { internalMutation, mutation, query } from './_generated/server'; +import { mutation, query } from './_generated/server'; export const listByChat = query({ args: { chatId: v.id('chats') }, @@ -10,7 +10,7 @@ export const listByChat = query({ chatId: v.id('chats'), role: v.union(v.literal('user'), v.literal('assistant')), content: v.string(), - imageStorageId: v.optional(v.id('_storage')), + imageBase64: v.optional(v.string()), imageMediaType: v.optional(v.string()), followUpOptions: v.optional(v.array(v.string())), source: v.union(v.literal('telegram'), v.literal('web')), @@ -33,7 +33,7 @@ export const create = mutation({ role: v.union(v.literal('user'), v.literal('assistant')), content: v.string(), source: v.union(v.literal('telegram'), v.literal('web')), - imageStorageId: v.optional(v.id('_storage')), + imageBase64: v.optional(v.string()), imageMediaType: v.optional(v.string()), followUpOptions: v.optional(v.array(v.string())), isStreaming: v.optional(v.boolean()) @@ -45,7 +45,7 @@ export const create = mutation({ role: args.role, content: args.content, source: args.source, - imageStorageId: args.imageStorageId, + imageBase64: args.imageBase64, imageMediaType: args.imageMediaType, followUpOptions: args.followUpOptions, createdAt: Date.now(), @@ -132,7 +132,7 @@ export const getLastAssistantMessage = query({ chatId: v.id('chats'), role: v.union(v.literal('user'), v.literal('assistant')), content: v.string(), - imageStorageId: v.optional(v.id('_storage')), + imageBase64: v.optional(v.string()), imageMediaType: v.optional(v.string()), followUpOptions: v.optional(v.array(v.string())), source: v.union(v.literal('telegram'), v.literal('web')), @@ -152,21 +152,12 @@ export const getLastAssistantMessage = query({ } }); -export const generateUploadUrl = mutation({ - args: {}, - returns: v.string(), - handler: async (ctx) => { - return await ctx.storage.generateUploadUrl(); - } -}); - -export const getImageUrls = query({ +export const getChatImages = query({ args: { chatId: v.id('chats') }, returns: v.array( v.object({ - storageId: v.id('_storage'), - mediaType: v.string(), - url: v.union(v.string(), v.null()) + base64: v.string(), + mediaType: v.string() }) ), handler: async (ctx, args) => { @@ -175,41 +166,11 @@ export const getImageUrls = query({ .withIndex('by_chat_id', (q) => q.eq('chatId', args.chatId)) .collect(); - const imageMessages = messages.filter((m) => m.imageStorageId && m.imageMediaType); - const results = []; - - for (const msg of imageMessages) { - if (msg.imageStorageId && msg.imageMediaType) { - const url = await ctx.storage.getUrl(msg.imageStorageId); - results.push({ - storageId: msg.imageStorageId, - mediaType: msg.imageMediaType, - url - }); - } - } - - return results; - } -}); - -export const createWithImage = internalMutation({ - args: { - chatId: v.id('chats'), - content: v.string(), - imageStorageId: v.id('_storage'), - imageMediaType: v.string() - }, - returns: v.id('messages'), - handler: async (ctx, args) => { - return await ctx.db.insert('messages', { - chatId: args.chatId, - role: 'user' as const, - content: args.content, - source: 'telegram' as const, - imageStorageId: args.imageStorageId, - imageMediaType: args.imageMediaType, - createdAt: Date.now() - }); + return messages + .filter((m) => m.imageBase64 && m.imageMediaType) + .map((m) => ({ + base64: m.imageBase64!, + mediaType: m.imageMediaType! + })); } }); diff --git a/frontend/src/lib/convex/schema.ts b/frontend/src/lib/convex/schema.ts index 29a0a02..64c9114 100644 --- a/frontend/src/lib/convex/schema.ts +++ b/frontend/src/lib/convex/schema.ts @@ -23,8 +23,9 @@ export default defineSchema({ chatId: v.id('chats'), role: v.union(v.literal('user'), v.literal('assistant')), content: v.string(), - imageStorageId: v.optional(v.id('_storage')), + imageBase64: v.optional(v.string()), imageMediaType: v.optional(v.string()), + imageStorageId: v.optional(v.id('_storage')), followUpOptions: v.optional(v.array(v.string())), source: v.union(v.literal('telegram'), v.literal('web')), createdAt: v.number(),