added TTS agent for voice messages

This commit is contained in:
shinrei
2025-07-01 19:50:06 +00:00
parent 955550e3bf
commit 81875620ff
4 changed files with 63 additions and 1 deletions

View File

@@ -12,6 +12,7 @@ dependencies = [
"dishka>=1.6.0", "dishka>=1.6.0",
"google-genai>=1.23.0", "google-genai>=1.23.0",
"pydantic-settings>=2.10.1", "pydantic-settings>=2.10.1",
"pydub>=0.25.1",
"rich>=14.0.0", "rich>=14.0.0",
] ]

View File

@@ -0,0 +1,32 @@
import io
import json
from google import genai
from google.genai import types
from pydub import AudioSegment
from ..content_configs import TTS_CONTENT_CONFIG
class TTSAgent:
def __init__(self, api_key: str) -> None:
# код повторяется некрасиво
self.client = genai.Client(api_key=api_key).aio
async def generate(self, text: str):
response = await self.client.models.generate_content(
model="gemini-2.5-flash-preview-tts",
contents=text,
config=TTS_CONTENT_CONFIG
)
data = response.candidates[0].content.parts[0].inline_data.data
pcm_io = io.BytesIO(data)
audio = AudioSegment(
pcm_io.read(),
sample_width=2,
frame_rate=24000,
channels=1
)
# Экспортируем как .ogg с кодеком opus
ogg_io = io.BytesIO()
audio.export(ogg_io, format="ogg", codec="libopus")
ogg_bytes = ogg_io.getvalue()
return ogg_bytes

View File

@@ -2,7 +2,7 @@
from google.genai import types from google.genai import types
from .structures import OutputMessage from .structures import OutputMessage
CONTENT_CONFIG = types.GenerateContentConfig( MAIN_CONTENT_CONFIG = types.GenerateContentConfig(
system_instruction="meow meow meow", # надо где-то промпт хранить, в бд наверное хезе system_instruction="meow meow meow", # надо где-то промпт хранить, в бд наверное хезе
thinking_config=types.ThinkingConfig(thinking_budget=0), thinking_config=types.ThinkingConfig(thinking_budget=0),
response_mime_type="application/json", response_mime_type="application/json",
@@ -15,3 +15,21 @@ CONTENT_CONFIG = types.GenerateContentConfig(
for category in types.HarmBlockThreshold for category in types.HarmBlockThreshold
] ]
) )
TTS_CONTENT_CONFIG = types.GenerateContentConfig(
response_modalities=["AUDIO"],
speech_config=types.SpeechConfig(
voice_config=types.VoiceConfig(
prebuilt_voice_config=types.PrebuiltVoiceConfig(
voice_name='Kore',
)
)
),
safety_settings=[
types.SafetySetting(
category=category,
threshold=types.HarmBlockThreshold.OFF
)
for category in types.HarmBlockThreshold
]
)

11
uv.lock generated
View File

@@ -548,6 +548,15 @@ wheels = [
{ url = "https://files.pythonhosted.org/packages/58/f0/427018098906416f580e3cf1366d3b1abfb408a0652e9f31600c24a1903c/pydantic_settings-2.10.1-py3-none-any.whl", hash = "sha256:a60952460b99cf661dc25c29c0ef171721f98bfcb52ef8d9ea4c943d7c8cc796", size = 45235, upload-time = "2025-06-24T13:26:45.485Z" }, { url = "https://files.pythonhosted.org/packages/58/f0/427018098906416f580e3cf1366d3b1abfb408a0652e9f31600c24a1903c/pydantic_settings-2.10.1-py3-none-any.whl", hash = "sha256:a60952460b99cf661dc25c29c0ef171721f98bfcb52ef8d9ea4c943d7c8cc796", size = 45235, upload-time = "2025-06-24T13:26:45.485Z" },
] ]
[[package]]
name = "pydub"
version = "0.25.1"
source = { registry = "https://pypi.org/simple" }
sdist = { url = "https://files.pythonhosted.org/packages/fe/9a/e6bca0eed82db26562c73b5076539a4a08d3cffd19c3cc5913a3e61145fd/pydub-0.25.1.tar.gz", hash = "sha256:980a33ce9949cab2a569606b65674d748ecbca4f0796887fd6f46173a7b0d30f", size = 38326, upload-time = "2021-03-10T02:09:54.659Z" }
wheels = [
{ url = "https://files.pythonhosted.org/packages/a6/53/d78dc063216e62fc55f6b2eebb447f6a4b0a59f55c8406376f76bf959b08/pydub-0.25.1-py2.py3-none-any.whl", hash = "sha256:65617e33033874b59d87db603aa1ed450633288aefead953b30bded59cb599a6", size = 32327, upload-time = "2021-03-10T02:09:53.503Z" },
]
[[package]] [[package]]
name = "pygments" name = "pygments"
version = "2.19.2" version = "2.19.2"
@@ -654,6 +663,7 @@ dependencies = [
{ name = "dishka" }, { name = "dishka" },
{ name = "google-genai" }, { name = "google-genai" },
{ name = "pydantic-settings" }, { name = "pydantic-settings" },
{ name = "pydub" },
{ name = "rich" }, { name = "rich" },
] ]
@@ -664,6 +674,7 @@ requires-dist = [
{ name = "dishka", specifier = ">=1.6.0" }, { name = "dishka", specifier = ">=1.6.0" },
{ name = "google-genai", specifier = ">=1.23.0" }, { name = "google-genai", specifier = ">=1.23.0" },
{ name = "pydantic-settings", specifier = ">=2.10.1" }, { name = "pydantic-settings", specifier = ">=2.10.1" },
{ name = "pydub", specifier = ">=0.25.1" },
{ name = "rich", specifier = ">=14.0.0" }, { name = "rich", specifier = ">=14.0.0" },
] ]