From bfd16ab02c57798060b6988d9ea930a2cc5dad87 Mon Sep 17 00:00:00 2001 From: h Date: Fri, 29 May 2026 20:45:02 +0200 Subject: [PATCH] feat: add SST --- backend/src/userbot/handlers/messages.py | 8 ++++ .../src/userbot/handlers/raw/dispatcher.py | 10 ++++- .../src/userbot/handlers/raw/read_contents.py | 34 ++++++++++++++ .../src/userbot/handlers/raw/transcribed.py | 22 +++++++++ .../src/userbot/modules/capture/chat_meta.py | 8 ++++ .../userbot/modules/jobs/handlers/__init__.py | 4 +- .../modules/jobs/handlers/transcribe.py | 16 +++++++ backend/src/userbot/modules/stt/__init__.py | 3 ++ backend/src/userbot/modules/stt/gate.py | 23 ++++++++++ backend/src/userbot/modules/stt/repository.py | 45 +++++++++++++++++++ backend/src/userbot/modules/stt/service.py | 27 +++++++++++ 11 files changed, 196 insertions(+), 4 deletions(-) create mode 100644 backend/src/userbot/handlers/raw/read_contents.py create mode 100644 backend/src/userbot/handlers/raw/transcribed.py create mode 100644 backend/src/userbot/modules/jobs/handlers/transcribe.py create mode 100644 backend/src/userbot/modules/stt/__init__.py create mode 100644 backend/src/userbot/modules/stt/gate.py create mode 100644 backend/src/userbot/modules/stt/repository.py create mode 100644 backend/src/userbot/modules/stt/service.py diff --git a/backend/src/userbot/handlers/messages.py b/backend/src/userbot/handlers/messages.py index 879f669..31acee0 100644 --- a/backend/src/userbot/handlers/messages.py +++ b/backend/src/userbot/handlers/messages.py @@ -3,6 +3,8 @@ from pyrogram.types import Message from userbot import PyroClient from userbot.modules.capture import capture_message from userbot.modules.capture.chat_meta import meta_from_chat +from userbot.modules.stt import is_transcribable +from userbot.modules.stt.gate import safe_transcribe @PyroClient.on_message() @@ -15,6 +17,12 @@ async def on_message(client: PyroClient, message: Message) -> None: if not toggles.messages: return await capture_message(client, message, ctx, toggles) + if ( + toggles.stt + and is_transcribable(message) + and (message.outgoing or message.unread_media is False) + ): + await safe_transcribe(client, ctx, meta.chat_id, message.id) handlers = on_message.handlers diff --git a/backend/src/userbot/handlers/raw/dispatcher.py b/backend/src/userbot/handlers/raw/dispatcher.py index 44bf7cd..bd95811 100644 --- a/backend/src/userbot/handlers/raw/dispatcher.py +++ b/backend/src/userbot/handlers/raw/dispatcher.py @@ -3,11 +3,17 @@ from collections.abc import Awaitable, Callable from pyrogram import raw from userbot import PyroClient -from userbot.handlers.raw import contacts, dialog_filters, reactions +from userbot.handlers.raw import ( + contacts, + dialog_filters, + reactions, + read_contents, + transcribed, +) RawHandler = Callable[..., Awaitable[None]] -_MODULES = (contacts, dialog_filters, reactions) +_MODULES = (contacts, dialog_filters, reactions, read_contents, transcribed) _REGISTRY: dict[type, RawHandler] = { update_type: module.handle for module in _MODULES for update_type in module.HANDLES } diff --git a/backend/src/userbot/handlers/raw/read_contents.py b/backend/src/userbot/handlers/raw/read_contents.py new file mode 100644 index 0000000..7a199d5 --- /dev/null +++ b/backend/src/userbot/handlers/raw/read_contents.py @@ -0,0 +1,34 @@ +from pyrogram import raw, utils + +from userbot import PyroClient +from userbot.modules.capture.chat_meta import meta_from_chat_id +from userbot.modules.stt import repository +from userbot.modules.stt.gate import safe_transcribe + +HANDLES = ( + raw.types.UpdateReadMessagesContents, + raw.types.UpdateChannelReadMessagesContents, +) + + +async def handle( + client: PyroClient, update: raw.base.Update, _users: dict, _chats: dict +) -> None: + ctx = client.capture + if ctx is None: + return + if isinstance(update, raw.types.UpdateChannelReadMessagesContents): + chat_id = utils.get_peer_id(raw.types.PeerChannel(channel_id=update.channel_id)) + candidates = await repository.pending_voice_reads( + ctx.pool, ctx.account_id, update.messages, chat_id=chat_id + ) + elif isinstance(update, raw.types.UpdateReadMessagesContents): + candidates = await repository.pending_voice_reads( + ctx.pool, ctx.account_id, update.messages + ) + else: + return + for cand_chat_id, message_id in candidates: + meta = meta_from_chat_id(cand_chat_id, ctx.contacts.ids) + if ctx.resolve(meta).stt: + await safe_transcribe(client, ctx, cand_chat_id, message_id) diff --git a/backend/src/userbot/handlers/raw/transcribed.py b/backend/src/userbot/handlers/raw/transcribed.py new file mode 100644 index 0000000..a2bcaf4 --- /dev/null +++ b/backend/src/userbot/handlers/raw/transcribed.py @@ -0,0 +1,22 @@ +from pyrogram import raw + +from userbot import PyroClient +from userbot.modules.capture.chat_meta import meta_from_peer +from userbot.modules.stt import repository + +HANDLES = (raw.types.UpdateTranscribedAudio,) + + +async def handle( + client: PyroClient, + update: raw.types.UpdateTranscribedAudio, + users: dict, + chats: dict, +) -> None: + ctx = client.capture + if ctx is None or not update.text: + return + meta = meta_from_peer(update.peer, chats, users, ctx.contacts.ids) + await repository.set_extracted_text( + ctx.pool, ctx.account_id, meta.chat_id, update.msg_id, update.text + ) diff --git a/backend/src/userbot/modules/capture/chat_meta.py b/backend/src/userbot/modules/capture/chat_meta.py index 6da537d..01b5ba7 100644 --- a/backend/src/userbot/modules/capture/chat_meta.py +++ b/backend/src/userbot/modules/capture/chat_meta.py @@ -25,6 +25,14 @@ def meta_from_chat(chat: Chat, contacts: set[int]) -> ChatMeta: return ChatMeta(chat_id=chat_id, kind=kind, is_bot=is_bot, is_contact=is_contact) +def meta_from_chat_id(chat_id: int, contacts: set[int]) -> ChatMeta: + if chat_id > 0: + return ChatMeta( + chat_id=chat_id, kind=ChatKind.DM, is_contact=chat_id in contacts + ) + return ChatMeta(chat_id=chat_id, kind=ChatKind.GROUP) + + def meta_from_peer( peer: raw.base.Peer, chats: dict, users: dict, contacts: set[int] ) -> ChatMeta: diff --git a/backend/src/userbot/modules/jobs/handlers/__init__.py b/backend/src/userbot/modules/jobs/handlers/__init__.py index 76bdaba..dc36fd6 100644 --- a/backend/src/userbot/modules/jobs/handlers/__init__.py +++ b/backend/src/userbot/modules/jobs/handlers/__init__.py @@ -1,3 +1,3 @@ -from userbot.modules.jobs.handlers import backfill, fetch_media +from userbot.modules.jobs.handlers import backfill, fetch_media, transcribe -__all__ = ["backfill", "fetch_media"] +__all__ = ["backfill", "fetch_media", "transcribe"] diff --git a/backend/src/userbot/modules/jobs/handlers/transcribe.py b/backend/src/userbot/modules/jobs/handlers/transcribe.py new file mode 100644 index 0000000..7f5124f --- /dev/null +++ b/backend/src/userbot/modules/jobs/handlers/transcribe.py @@ -0,0 +1,16 @@ +from userbot.modules.jobs.context import JobContext +from userbot.modules.jobs.registry import register +from userbot.modules.stt import transcribe_message + + +@register("transcribe") +async def transcribe(ctx: JobContext) -> None: + client = ctx.client + if client is None: + return + capture = getattr(client, "capture", None) + if capture is None: + return + chat_id = ctx.job.params["chat_id"] + message_id = ctx.job.params["message_id"] + await transcribe_message(client, capture, chat_id, message_id) diff --git a/backend/src/userbot/modules/stt/__init__.py b/backend/src/userbot/modules/stt/__init__.py new file mode 100644 index 0000000..7449d1e --- /dev/null +++ b/backend/src/userbot/modules/stt/__init__.py @@ -0,0 +1,3 @@ +from userbot.modules.stt.service import is_transcribable, transcribe_message + +__all__ = ["is_transcribable", "transcribe_message"] diff --git a/backend/src/userbot/modules/stt/gate.py b/backend/src/userbot/modules/stt/gate.py new file mode 100644 index 0000000..78329bb --- /dev/null +++ b/backend/src/userbot/modules/stt/gate.py @@ -0,0 +1,23 @@ +from pyrogram import Client +from pyrogram.errors import FloodPremiumWait, FloodWait, RPCError + +from userbot.modules.capture.context import CaptureContext +from userbot.modules.jobs.repository import enqueue +from userbot.modules.stt.service import transcribe_message +from utils.logging import logger + + +async def safe_transcribe( + client: Client, ctx: CaptureContext, chat_id: int, message_id: int +) -> None: + try: + await transcribe_message(client, ctx, chat_id, message_id) + except (FloodWait, FloodPremiumWait): + await enqueue( + ctx.pool, + ctx.account_id, + "transcribe", + {"chat_id": chat_id, "message_id": message_id}, + ) + except RPCError as exc: + logger.warning(f"[yellow]STT failed for {chat_id}/{message_id}: {exc}[/]") diff --git a/backend/src/userbot/modules/stt/repository.py b/backend/src/userbot/modules/stt/repository.py new file mode 100644 index 0000000..99e174f --- /dev/null +++ b/backend/src/userbot/modules/stt/repository.py @@ -0,0 +1,45 @@ +import asyncpg + +from userbot.modules.capture.repository import CHANNEL_ID_THRESHOLD + +_VOICE_KINDS = ["voice", "video_note"] + +_SET_EXTRACTED_TEXT = """ +UPDATE media SET extracted_text = $4 +WHERE account_id = $1 AND chat_id = $2 AND message_id = $3 +""" + +_PENDING_BOX = """ +SELECT chat_id, message_id FROM media +WHERE account_id = $1 AND message_id = ANY($2::bigint[]) +AND chat_id > $3 AND kind = ANY($4::text[]) AND extracted_text IS NULL +""" + +_PENDING_CHANNEL = """ +SELECT chat_id, message_id FROM media +WHERE account_id = $1 AND message_id = ANY($2::bigint[]) +AND chat_id = $3 AND kind = ANY($4::text[]) AND extracted_text IS NULL +""" + + +async def set_extracted_text( + pool: asyncpg.Pool, account_id: int, chat_id: int, message_id: int, text: str +) -> None: + await pool.execute(_SET_EXTRACTED_TEXT, account_id, chat_id, message_id, text) + + +async def pending_voice_reads( + pool: asyncpg.Pool, + account_id: int, + message_ids: list[int], + chat_id: int | None = None, +) -> list[tuple[int, int]]: + if chat_id is None: + rows = await pool.fetch( + _PENDING_BOX, account_id, message_ids, CHANNEL_ID_THRESHOLD, _VOICE_KINDS + ) + else: + rows = await pool.fetch( + _PENDING_CHANNEL, account_id, message_ids, chat_id, _VOICE_KINDS + ) + return [(row["chat_id"], row["message_id"]) for row in rows] diff --git a/backend/src/userbot/modules/stt/service.py b/backend/src/userbot/modules/stt/service.py new file mode 100644 index 0000000..6c87787 --- /dev/null +++ b/backend/src/userbot/modules/stt/service.py @@ -0,0 +1,27 @@ +from pyrogram import Client, raw +from pyrogram.types import Message + +from userbot.modules.capture.context import CaptureContext +from userbot.modules.media import self_destruct_ttl +from userbot.modules.stt import repository + + +def is_transcribable(message: Message) -> bool: + if self_destruct_ttl(message) is not None: + return False + return message.voice is not None or message.video_note is not None + + +async def transcribe_message( + client: Client, ctx: CaptureContext, chat_id: int, message_id: int +) -> None: + peer = await client.resolve_peer(chat_id) + if peer is None: + return + result = await client.invoke( + raw.functions.messages.TranscribeAudio(peer=peer, msg_id=message_id) + ) + if not result.pending and result.text: + await repository.set_extracted_text( + ctx.pool, ctx.account_id, chat_id, message_id, result.text + )