added TTS agent for voice messages
This commit is contained in:
@@ -12,6 +12,7 @@ dependencies = [
|
||||
"dishka>=1.6.0",
|
||||
"google-genai>=1.23.0",
|
||||
"pydantic-settings>=2.10.1",
|
||||
"pydub>=0.25.1",
|
||||
"rich>=14.0.0",
|
||||
]
|
||||
|
||||
|
||||
32
src/bot/modules/solaris/agents/tts.py
Normal file
32
src/bot/modules/solaris/agents/tts.py
Normal file
@@ -0,0 +1,32 @@
|
||||
import io
|
||||
import json
|
||||
from google import genai
|
||||
from google.genai import types
|
||||
from pydub import AudioSegment
|
||||
from ..content_configs import TTS_CONTENT_CONFIG
|
||||
|
||||
class TTSAgent:
|
||||
def __init__(self, api_key: str) -> None:
|
||||
# код повторяется некрасиво
|
||||
self.client = genai.Client(api_key=api_key).aio
|
||||
async def generate(self, text: str):
|
||||
response = await self.client.models.generate_content(
|
||||
model="gemini-2.5-flash-preview-tts",
|
||||
contents=text,
|
||||
config=TTS_CONTENT_CONFIG
|
||||
)
|
||||
data = response.candidates[0].content.parts[0].inline_data.data
|
||||
pcm_io = io.BytesIO(data)
|
||||
audio = AudioSegment(
|
||||
pcm_io.read(),
|
||||
sample_width=2,
|
||||
frame_rate=24000,
|
||||
channels=1
|
||||
)
|
||||
|
||||
# Экспортируем как .ogg с кодеком opus
|
||||
ogg_io = io.BytesIO()
|
||||
audio.export(ogg_io, format="ogg", codec="libopus")
|
||||
ogg_bytes = ogg_io.getvalue()
|
||||
return ogg_bytes
|
||||
|
||||
@@ -2,7 +2,7 @@
|
||||
from google.genai import types
|
||||
from .structures import OutputMessage
|
||||
|
||||
CONTENT_CONFIG = types.GenerateContentConfig(
|
||||
MAIN_CONTENT_CONFIG = types.GenerateContentConfig(
|
||||
system_instruction="meow meow meow", # надо где-то промпт хранить, в бд наверное хезе
|
||||
thinking_config=types.ThinkingConfig(thinking_budget=0),
|
||||
response_mime_type="application/json",
|
||||
@@ -15,3 +15,21 @@ CONTENT_CONFIG = types.GenerateContentConfig(
|
||||
for category in types.HarmBlockThreshold
|
||||
]
|
||||
)
|
||||
|
||||
TTS_CONTENT_CONFIG = types.GenerateContentConfig(
|
||||
response_modalities=["AUDIO"],
|
||||
speech_config=types.SpeechConfig(
|
||||
voice_config=types.VoiceConfig(
|
||||
prebuilt_voice_config=types.PrebuiltVoiceConfig(
|
||||
voice_name='Kore',
|
||||
)
|
||||
)
|
||||
),
|
||||
safety_settings=[
|
||||
types.SafetySetting(
|
||||
category=category,
|
||||
threshold=types.HarmBlockThreshold.OFF
|
||||
)
|
||||
for category in types.HarmBlockThreshold
|
||||
]
|
||||
)
|
||||
11
uv.lock
generated
11
uv.lock
generated
@@ -548,6 +548,15 @@ wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/58/f0/427018098906416f580e3cf1366d3b1abfb408a0652e9f31600c24a1903c/pydantic_settings-2.10.1-py3-none-any.whl", hash = "sha256:a60952460b99cf661dc25c29c0ef171721f98bfcb52ef8d9ea4c943d7c8cc796", size = 45235, upload-time = "2025-06-24T13:26:45.485Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "pydub"
|
||||
version = "0.25.1"
|
||||
source = { registry = "https://pypi.org/simple" }
|
||||
sdist = { url = "https://files.pythonhosted.org/packages/fe/9a/e6bca0eed82db26562c73b5076539a4a08d3cffd19c3cc5913a3e61145fd/pydub-0.25.1.tar.gz", hash = "sha256:980a33ce9949cab2a569606b65674d748ecbca4f0796887fd6f46173a7b0d30f", size = 38326, upload-time = "2021-03-10T02:09:54.659Z" }
|
||||
wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/a6/53/d78dc063216e62fc55f6b2eebb447f6a4b0a59f55c8406376f76bf959b08/pydub-0.25.1-py2.py3-none-any.whl", hash = "sha256:65617e33033874b59d87db603aa1ed450633288aefead953b30bded59cb599a6", size = 32327, upload-time = "2021-03-10T02:09:53.503Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "pygments"
|
||||
version = "2.19.2"
|
||||
@@ -654,6 +663,7 @@ dependencies = [
|
||||
{ name = "dishka" },
|
||||
{ name = "google-genai" },
|
||||
{ name = "pydantic-settings" },
|
||||
{ name = "pydub" },
|
||||
{ name = "rich" },
|
||||
]
|
||||
|
||||
@@ -664,6 +674,7 @@ requires-dist = [
|
||||
{ name = "dishka", specifier = ">=1.6.0" },
|
||||
{ name = "google-genai", specifier = ">=1.23.0" },
|
||||
{ name = "pydantic-settings", specifier = ">=2.10.1" },
|
||||
{ name = "pydub", specifier = ">=0.25.1" },
|
||||
{ name = "rich", specifier = ">=14.0.0" },
|
||||
]
|
||||
|
||||
|
||||
Reference in New Issue
Block a user