From 81875620ff975a374488c6e7d14e43484cc2a002 Mon Sep 17 00:00:00 2001 From: shinrei Date: Tue, 1 Jul 2025 19:50:06 +0000 Subject: [PATCH] added TTS agent for voice messages --- pyproject.toml | 1 + src/bot/modules/solaris/agents/tts.py | 32 +++++++++++++++++++ .../{content_config.py => content_configs.py} | 20 +++++++++++- uv.lock | 11 +++++++ 4 files changed, 63 insertions(+), 1 deletion(-) create mode 100644 src/bot/modules/solaris/agents/tts.py rename src/bot/modules/solaris/{content_config.py => content_configs.py} (54%) diff --git a/pyproject.toml b/pyproject.toml index 87ef9fa..0a20371 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -12,6 +12,7 @@ dependencies = [ "dishka>=1.6.0", "google-genai>=1.23.0", "pydantic-settings>=2.10.1", + "pydub>=0.25.1", "rich>=14.0.0", ] diff --git a/src/bot/modules/solaris/agents/tts.py b/src/bot/modules/solaris/agents/tts.py new file mode 100644 index 0000000..4246a94 --- /dev/null +++ b/src/bot/modules/solaris/agents/tts.py @@ -0,0 +1,32 @@ +import io +import json +from google import genai +from google.genai import types +from pydub import AudioSegment +from ..content_configs import TTS_CONTENT_CONFIG + +class TTSAgent: + def __init__(self, api_key: str) -> None: + # код повторяется некрасиво + self.client = genai.Client(api_key=api_key).aio + async def generate(self, text: str): + response = await self.client.models.generate_content( + model="gemini-2.5-flash-preview-tts", + contents=text, + config=TTS_CONTENT_CONFIG + ) + data = response.candidates[0].content.parts[0].inline_data.data + pcm_io = io.BytesIO(data) + audio = AudioSegment( + pcm_io.read(), + sample_width=2, + frame_rate=24000, + channels=1 + ) + + # Экспортируем как .ogg с кодеком opus + ogg_io = io.BytesIO() + audio.export(ogg_io, format="ogg", codec="libopus") + ogg_bytes = ogg_io.getvalue() + return ogg_bytes + diff --git a/src/bot/modules/solaris/content_config.py b/src/bot/modules/solaris/content_configs.py similarity index 54% rename from src/bot/modules/solaris/content_config.py rename to src/bot/modules/solaris/content_configs.py index c1f7c04..7a0e236 100644 --- a/src/bot/modules/solaris/content_config.py +++ b/src/bot/modules/solaris/content_configs.py @@ -2,7 +2,7 @@ from google.genai import types from .structures import OutputMessage -CONTENT_CONFIG = types.GenerateContentConfig( +MAIN_CONTENT_CONFIG = types.GenerateContentConfig( system_instruction="meow meow meow", # надо где-то промпт хранить, в бд наверное хезе thinking_config=types.ThinkingConfig(thinking_budget=0), response_mime_type="application/json", @@ -15,3 +15,21 @@ CONTENT_CONFIG = types.GenerateContentConfig( for category in types.HarmBlockThreshold ] ) + +TTS_CONTENT_CONFIG = types.GenerateContentConfig( + response_modalities=["AUDIO"], + speech_config=types.SpeechConfig( + voice_config=types.VoiceConfig( + prebuilt_voice_config=types.PrebuiltVoiceConfig( + voice_name='Kore', + ) + ) + ), + safety_settings=[ + types.SafetySetting( + category=category, + threshold=types.HarmBlockThreshold.OFF + ) + for category in types.HarmBlockThreshold + ] +) diff --git a/uv.lock b/uv.lock index a1bc84e..33cd86c 100644 --- a/uv.lock +++ b/uv.lock @@ -548,6 +548,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/58/f0/427018098906416f580e3cf1366d3b1abfb408a0652e9f31600c24a1903c/pydantic_settings-2.10.1-py3-none-any.whl", hash = "sha256:a60952460b99cf661dc25c29c0ef171721f98bfcb52ef8d9ea4c943d7c8cc796", size = 45235, upload-time = "2025-06-24T13:26:45.485Z" }, ] +[[package]] +name = "pydub" +version = "0.25.1" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/fe/9a/e6bca0eed82db26562c73b5076539a4a08d3cffd19c3cc5913a3e61145fd/pydub-0.25.1.tar.gz", hash = "sha256:980a33ce9949cab2a569606b65674d748ecbca4f0796887fd6f46173a7b0d30f", size = 38326, upload-time = "2021-03-10T02:09:54.659Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/a6/53/d78dc063216e62fc55f6b2eebb447f6a4b0a59f55c8406376f76bf959b08/pydub-0.25.1-py2.py3-none-any.whl", hash = "sha256:65617e33033874b59d87db603aa1ed450633288aefead953b30bded59cb599a6", size = 32327, upload-time = "2021-03-10T02:09:53.503Z" }, +] + [[package]] name = "pygments" version = "2.19.2" @@ -654,6 +663,7 @@ dependencies = [ { name = "dishka" }, { name = "google-genai" }, { name = "pydantic-settings" }, + { name = "pydub" }, { name = "rich" }, ] @@ -664,6 +674,7 @@ requires-dist = [ { name = "dishka", specifier = ">=1.6.0" }, { name = "google-genai", specifier = ">=1.23.0" }, { name = "pydantic-settings", specifier = ">=2.10.1" }, + { name = "pydub", specifier = ">=0.25.1" }, { name = "rich", specifier = ">=14.0.0" }, ]