fix(*): images do work

This commit is contained in:
h
2026-01-21 02:33:01 +01:00
parent 69ddb3173f
commit 6ccc06f90f
6 changed files with 85 additions and 113 deletions

View File

@@ -1,4 +1,5 @@
import asyncio import asyncio
import base64
import contextlib import contextlib
import io import io
import time import time
@@ -26,6 +27,14 @@ EDIT_THROTTLE_SECONDS = 1.0
TELEGRAM_MAX_LENGTH = 4096 TELEGRAM_MAX_LENGTH = 4096
async def fetch_chat_images(chat_id: str) -> list[ImageData]:
chat_images = await convex.query("messages:getChatImages", {"chatId": chat_id})
return [
ImageData(data=base64.b64decode(img["base64"]), media_type=img["mediaType"])
for img in (chat_images or [])
]
def make_follow_up_keyboard(options: list[str]) -> ReplyKeyboardMarkup: def make_follow_up_keyboard(options: list[str]) -> ReplyKeyboardMarkup:
buttons = [[KeyboardButton(text=opt)] for opt in options] buttons = [[KeyboardButton(text=opt)] for opt in options]
return ReplyKeyboardMarkup( return ReplyKeyboardMarkup(
@@ -114,7 +123,7 @@ async def send_long_message(
) )
async def process_message_from_web( # noqa: C901, PLR0915 async def process_message_from_web( # noqa: C901, PLR0912, PLR0915
convex_user_id: str, text: str, bot: Bot, convex_chat_id: str convex_user_id: str, text: str, bot: Bot, convex_chat_id: str
) -> None: ) -> None:
user = await convex.query("users:getById", {"userId": convex_user_id}) user = await convex.query("users:getById", {"userId": convex_user_id})
@@ -178,7 +187,11 @@ async def process_message_from_web( # noqa: C901, PLR0915
prompt_text = text prompt_text = text
hist = history[:-1] hist = history[:-1]
final_answer = await stream_response(text_agent, prompt_text, hist, on_chunk) chat_images = await fetch_chat_images(convex_chat_id)
final_answer = await stream_response(
text_agent, prompt_text, hist, on_chunk, images=chat_images
)
if state: if state:
await state.flush() await state.flush()
@@ -204,6 +217,21 @@ async def process_message_from_web( # noqa: C901, PLR0915
}, },
) )
if is_summarize:
await convex.mutation(
"chats:clear", {"chatId": convex_chat_id, "preserveImages": True}
)
await convex.mutation(
"messages:create",
{
"chatId": convex_chat_id,
"role": "assistant",
"content": final_answer,
"source": "web",
"followUpOptions": follow_ups,
},
)
if tg_chat_id and processing_msg: if tg_chat_id and processing_msg:
with contextlib.suppress(Exception): with contextlib.suppress(Exception):
await processing_msg.delete() await processing_msg.delete()
@@ -229,7 +257,7 @@ async def process_message_from_web( # noqa: C901, PLR0915
async def process_message( async def process_message(
user_id: int, text: str, bot: Bot, chat_id: int, image: ImageData | None = None user_id: int, text: str, bot: Bot, chat_id: int, *, skip_user_message: bool = False
) -> None: ) -> None:
user = await convex.query( user = await convex.query(
"users:getByTelegramId", {"telegramId": ConvexInt64(user_id)} "users:getByTelegramId", {"telegramId": ConvexInt64(user_id)}
@@ -251,15 +279,16 @@ async def process_message(
api_key = user["geminiApiKey"] api_key = user["geminiApiKey"]
model_name = user.get("model", "gemini-3-pro-preview") model_name = user.get("model", "gemini-3-pro-preview")
await convex.mutation( if not skip_user_message:
"messages:create", await convex.mutation(
{ "messages:create",
"chatId": active_chat_id, {
"role": "user", "chatId": active_chat_id,
"content": text, "role": "user",
"source": "telegram", "content": text,
}, "source": "telegram",
) },
)
assistant_message_id = await convex.mutation( assistant_message_id = await convex.mutation(
"messages:create", "messages:create",
@@ -293,8 +322,10 @@ async def process_message(
{"messageId": assistant_message_id, "content": content}, {"messageId": assistant_message_id, "content": content},
) )
chat_images = await fetch_chat_images(active_chat_id)
final_answer = await stream_response( final_answer = await stream_response(
text_agent, text, history[:-2], on_chunk, image=image text_agent, text, history[:-2], on_chunk, images=chat_images
) )
await state.flush() await state.flush()
@@ -305,7 +336,7 @@ async def process_message(
follow_up_agent = create_follow_up_agent( follow_up_agent = create_follow_up_agent(
api_key=api_key, model_name=follow_up_model, system_prompt=follow_up_prompt api_key=api_key, model_name=follow_up_model, system_prompt=follow_up_prompt
) )
follow_ups = await get_follow_ups(follow_up_agent, full_history, image=image) follow_ups = await get_follow_ups(follow_up_agent, full_history)
await state.stop_typing() await state.stop_typing()
@@ -380,6 +411,14 @@ async def on_photo_message(message: types.Message, bot: Bot) -> None:
}, },
) )
user = await convex.query(
"users:getByTelegramId", {"telegramId": ConvexInt64(message.from_user.id)}
)
if not user or not user.get("activeChatId"):
await message.answer("Use /new first to create a chat.")
return
caption = message.caption or "Process the image according to your task" caption = message.caption or "Process the image according to your task"
photo = message.photo[-1] photo = message.photo[-1]
@@ -391,11 +430,24 @@ async def on_photo_message(message: types.Message, bot: Bot) -> None:
buffer = io.BytesIO() buffer = io.BytesIO()
await bot.download_file(file.file_path, buffer) await bot.download_file(file.file_path, buffer)
image_bytes = buffer.getvalue() image_bytes = buffer.getvalue()
image_base64 = base64.b64encode(image_bytes).decode()
ext = file.file_path.rsplit(".", 1)[-1].lower() ext = file.file_path.rsplit(".", 1)[-1].lower()
media_type = f"image/{ext}" if ext in ("png", "gif", "webp") else "image/jpeg" media_type = f"image/{ext}" if ext in ("png", "gif", "webp") else "image/jpeg"
image = ImageData(data=image_bytes, media_type=media_type)
active_chat_id = user["activeChatId"]
await convex.mutation(
"messages:create",
{
"chatId": active_chat_id,
"role": "user",
"content": caption,
"source": "telegram",
"imageBase64": image_base64,
"imageMediaType": media_type,
},
)
await process_message( await process_message(
message.from_user.id, caption, bot, message.chat.id, image=image message.from_user.id, caption, bot, message.chat.id, skip_user_message=True
) )

View File

@@ -9,7 +9,6 @@
*/ */
import type * as chats from "../chats.js"; import type * as chats from "../chats.js";
import type * as http from "../http.js";
import type * as messages from "../messages.js"; import type * as messages from "../messages.js";
import type * as pendingGenerations from "../pendingGenerations.js"; import type * as pendingGenerations from "../pendingGenerations.js";
import type * as users from "../users.js"; import type * as users from "../users.js";
@@ -22,7 +21,6 @@ import type {
declare const fullApi: ApiFromModules<{ declare const fullApi: ApiFromModules<{
chats: typeof chats; chats: typeof chats;
http: typeof http;
messages: typeof messages; messages: typeof messages;
pendingGenerations: typeof pendingGenerations; pendingGenerations: typeof pendingGenerations;
users: typeof users; users: typeof users;

View File

@@ -43,7 +43,7 @@ export const clear = mutation({
.collect(); .collect();
for (const message of messages) { for (const message of messages) {
if (args.preserveImages && message.imageStorageId) { if (args.preserveImages && message.imageBase64) {
continue; continue;
} }
await ctx.db.delete(message._id); await ctx.db.delete(message._id);

View File

@@ -1,40 +0,0 @@
import { httpRouter } from 'convex/server';
import { httpAction } from './_generated/server';
import { internal } from './_generated/api';
import type { Id } from './_generated/dataModel';
const http = httpRouter();
http.route({
path: '/upload-image',
method: 'POST',
handler: httpAction(async (ctx, req) => {
const chatId = req.headers.get('X-Chat-Id');
const mediaType = req.headers.get('Content-Type') || 'image/jpeg';
const caption = req.headers.get('X-Caption') || '';
if (!chatId) {
return new Response(JSON.stringify({ error: 'Missing X-Chat-Id header' }), {
status: 400,
headers: { 'Content-Type': 'application/json' }
});
}
const blob = await req.blob();
const storageId = await ctx.storage.store(blob);
await ctx.runMutation(internal.messages.createWithImage, {
chatId: chatId as Id<'chats'>,
content: caption,
imageStorageId: storageId,
imageMediaType: mediaType
});
return new Response(JSON.stringify({ storageId }), {
status: 200,
headers: { 'Content-Type': 'application/json' }
});
})
});
export default http;

View File

@@ -1,5 +1,5 @@
import { v } from 'convex/values'; import { v } from 'convex/values';
import { internalMutation, mutation, query } from './_generated/server'; import { mutation, query } from './_generated/server';
export const listByChat = query({ export const listByChat = query({
args: { chatId: v.id('chats') }, args: { chatId: v.id('chats') },
@@ -10,7 +10,7 @@ export const listByChat = query({
chatId: v.id('chats'), chatId: v.id('chats'),
role: v.union(v.literal('user'), v.literal('assistant')), role: v.union(v.literal('user'), v.literal('assistant')),
content: v.string(), content: v.string(),
imageStorageId: v.optional(v.id('_storage')), imageBase64: v.optional(v.string()),
imageMediaType: v.optional(v.string()), imageMediaType: v.optional(v.string()),
followUpOptions: v.optional(v.array(v.string())), followUpOptions: v.optional(v.array(v.string())),
source: v.union(v.literal('telegram'), v.literal('web')), source: v.union(v.literal('telegram'), v.literal('web')),
@@ -33,7 +33,7 @@ export const create = mutation({
role: v.union(v.literal('user'), v.literal('assistant')), role: v.union(v.literal('user'), v.literal('assistant')),
content: v.string(), content: v.string(),
source: v.union(v.literal('telegram'), v.literal('web')), source: v.union(v.literal('telegram'), v.literal('web')),
imageStorageId: v.optional(v.id('_storage')), imageBase64: v.optional(v.string()),
imageMediaType: v.optional(v.string()), imageMediaType: v.optional(v.string()),
followUpOptions: v.optional(v.array(v.string())), followUpOptions: v.optional(v.array(v.string())),
isStreaming: v.optional(v.boolean()) isStreaming: v.optional(v.boolean())
@@ -45,7 +45,7 @@ export const create = mutation({
role: args.role, role: args.role,
content: args.content, content: args.content,
source: args.source, source: args.source,
imageStorageId: args.imageStorageId, imageBase64: args.imageBase64,
imageMediaType: args.imageMediaType, imageMediaType: args.imageMediaType,
followUpOptions: args.followUpOptions, followUpOptions: args.followUpOptions,
createdAt: Date.now(), createdAt: Date.now(),
@@ -132,7 +132,7 @@ export const getLastAssistantMessage = query({
chatId: v.id('chats'), chatId: v.id('chats'),
role: v.union(v.literal('user'), v.literal('assistant')), role: v.union(v.literal('user'), v.literal('assistant')),
content: v.string(), content: v.string(),
imageStorageId: v.optional(v.id('_storage')), imageBase64: v.optional(v.string()),
imageMediaType: v.optional(v.string()), imageMediaType: v.optional(v.string()),
followUpOptions: v.optional(v.array(v.string())), followUpOptions: v.optional(v.array(v.string())),
source: v.union(v.literal('telegram'), v.literal('web')), source: v.union(v.literal('telegram'), v.literal('web')),
@@ -152,21 +152,12 @@ export const getLastAssistantMessage = query({
} }
}); });
export const generateUploadUrl = mutation({ export const getChatImages = query({
args: {},
returns: v.string(),
handler: async (ctx) => {
return await ctx.storage.generateUploadUrl();
}
});
export const getImageUrls = query({
args: { chatId: v.id('chats') }, args: { chatId: v.id('chats') },
returns: v.array( returns: v.array(
v.object({ v.object({
storageId: v.id('_storage'), base64: v.string(),
mediaType: v.string(), mediaType: v.string()
url: v.union(v.string(), v.null())
}) })
), ),
handler: async (ctx, args) => { handler: async (ctx, args) => {
@@ -175,41 +166,11 @@ export const getImageUrls = query({
.withIndex('by_chat_id', (q) => q.eq('chatId', args.chatId)) .withIndex('by_chat_id', (q) => q.eq('chatId', args.chatId))
.collect(); .collect();
const imageMessages = messages.filter((m) => m.imageStorageId && m.imageMediaType); return messages
const results = []; .filter((m) => m.imageBase64 && m.imageMediaType)
.map((m) => ({
for (const msg of imageMessages) { base64: m.imageBase64!,
if (msg.imageStorageId && msg.imageMediaType) { mediaType: m.imageMediaType!
const url = await ctx.storage.getUrl(msg.imageStorageId); }));
results.push({
storageId: msg.imageStorageId,
mediaType: msg.imageMediaType,
url
});
}
}
return results;
}
});
export const createWithImage = internalMutation({
args: {
chatId: v.id('chats'),
content: v.string(),
imageStorageId: v.id('_storage'),
imageMediaType: v.string()
},
returns: v.id('messages'),
handler: async (ctx, args) => {
return await ctx.db.insert('messages', {
chatId: args.chatId,
role: 'user' as const,
content: args.content,
source: 'telegram' as const,
imageStorageId: args.imageStorageId,
imageMediaType: args.imageMediaType,
createdAt: Date.now()
});
} }
}); });

View File

@@ -23,8 +23,9 @@ export default defineSchema({
chatId: v.id('chats'), chatId: v.id('chats'),
role: v.union(v.literal('user'), v.literal('assistant')), role: v.union(v.literal('user'), v.literal('assistant')),
content: v.string(), content: v.string(),
imageStorageId: v.optional(v.id('_storage')), imageBase64: v.optional(v.string()),
imageMediaType: v.optional(v.string()), imageMediaType: v.optional(v.string()),
imageStorageId: v.optional(v.id('_storage')),
followUpOptions: v.optional(v.array(v.string())), followUpOptions: v.optional(v.array(v.string())),
source: v.union(v.literal('telegram'), v.literal('web')), source: v.union(v.literal('telegram'), v.literal('web')),
createdAt: v.number(), createdAt: v.number(),