added TTS agent for voice messages
This commit is contained in:
@@ -12,6 +12,7 @@ dependencies = [
|
|||||||
"dishka>=1.6.0",
|
"dishka>=1.6.0",
|
||||||
"google-genai>=1.23.0",
|
"google-genai>=1.23.0",
|
||||||
"pydantic-settings>=2.10.1",
|
"pydantic-settings>=2.10.1",
|
||||||
|
"pydub>=0.25.1",
|
||||||
"rich>=14.0.0",
|
"rich>=14.0.0",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
|||||||
32
src/bot/modules/solaris/agents/tts.py
Normal file
32
src/bot/modules/solaris/agents/tts.py
Normal file
@@ -0,0 +1,32 @@
|
|||||||
|
import io
|
||||||
|
import json
|
||||||
|
from google import genai
|
||||||
|
from google.genai import types
|
||||||
|
from pydub import AudioSegment
|
||||||
|
from ..content_configs import TTS_CONTENT_CONFIG
|
||||||
|
|
||||||
|
class TTSAgent:
|
||||||
|
def __init__(self, api_key: str) -> None:
|
||||||
|
# код повторяется некрасиво
|
||||||
|
self.client = genai.Client(api_key=api_key).aio
|
||||||
|
async def generate(self, text: str):
|
||||||
|
response = await self.client.models.generate_content(
|
||||||
|
model="gemini-2.5-flash-preview-tts",
|
||||||
|
contents=text,
|
||||||
|
config=TTS_CONTENT_CONFIG
|
||||||
|
)
|
||||||
|
data = response.candidates[0].content.parts[0].inline_data.data
|
||||||
|
pcm_io = io.BytesIO(data)
|
||||||
|
audio = AudioSegment(
|
||||||
|
pcm_io.read(),
|
||||||
|
sample_width=2,
|
||||||
|
frame_rate=24000,
|
||||||
|
channels=1
|
||||||
|
)
|
||||||
|
|
||||||
|
# Экспортируем как .ogg с кодеком opus
|
||||||
|
ogg_io = io.BytesIO()
|
||||||
|
audio.export(ogg_io, format="ogg", codec="libopus")
|
||||||
|
ogg_bytes = ogg_io.getvalue()
|
||||||
|
return ogg_bytes
|
||||||
|
|
||||||
@@ -2,7 +2,7 @@
|
|||||||
from google.genai import types
|
from google.genai import types
|
||||||
from .structures import OutputMessage
|
from .structures import OutputMessage
|
||||||
|
|
||||||
CONTENT_CONFIG = types.GenerateContentConfig(
|
MAIN_CONTENT_CONFIG = types.GenerateContentConfig(
|
||||||
system_instruction="meow meow meow", # надо где-то промпт хранить, в бд наверное хезе
|
system_instruction="meow meow meow", # надо где-то промпт хранить, в бд наверное хезе
|
||||||
thinking_config=types.ThinkingConfig(thinking_budget=0),
|
thinking_config=types.ThinkingConfig(thinking_budget=0),
|
||||||
response_mime_type="application/json",
|
response_mime_type="application/json",
|
||||||
@@ -15,3 +15,21 @@ CONTENT_CONFIG = types.GenerateContentConfig(
|
|||||||
for category in types.HarmBlockThreshold
|
for category in types.HarmBlockThreshold
|
||||||
]
|
]
|
||||||
)
|
)
|
||||||
|
|
||||||
|
TTS_CONTENT_CONFIG = types.GenerateContentConfig(
|
||||||
|
response_modalities=["AUDIO"],
|
||||||
|
speech_config=types.SpeechConfig(
|
||||||
|
voice_config=types.VoiceConfig(
|
||||||
|
prebuilt_voice_config=types.PrebuiltVoiceConfig(
|
||||||
|
voice_name='Kore',
|
||||||
|
)
|
||||||
|
)
|
||||||
|
),
|
||||||
|
safety_settings=[
|
||||||
|
types.SafetySetting(
|
||||||
|
category=category,
|
||||||
|
threshold=types.HarmBlockThreshold.OFF
|
||||||
|
)
|
||||||
|
for category in types.HarmBlockThreshold
|
||||||
|
]
|
||||||
|
)
|
||||||
11
uv.lock
generated
11
uv.lock
generated
@@ -548,6 +548,15 @@ wheels = [
|
|||||||
{ url = "https://files.pythonhosted.org/packages/58/f0/427018098906416f580e3cf1366d3b1abfb408a0652e9f31600c24a1903c/pydantic_settings-2.10.1-py3-none-any.whl", hash = "sha256:a60952460b99cf661dc25c29c0ef171721f98bfcb52ef8d9ea4c943d7c8cc796", size = 45235, upload-time = "2025-06-24T13:26:45.485Z" },
|
{ url = "https://files.pythonhosted.org/packages/58/f0/427018098906416f580e3cf1366d3b1abfb408a0652e9f31600c24a1903c/pydantic_settings-2.10.1-py3-none-any.whl", hash = "sha256:a60952460b99cf661dc25c29c0ef171721f98bfcb52ef8d9ea4c943d7c8cc796", size = 45235, upload-time = "2025-06-24T13:26:45.485Z" },
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "pydub"
|
||||||
|
version = "0.25.1"
|
||||||
|
source = { registry = "https://pypi.org/simple" }
|
||||||
|
sdist = { url = "https://files.pythonhosted.org/packages/fe/9a/e6bca0eed82db26562c73b5076539a4a08d3cffd19c3cc5913a3e61145fd/pydub-0.25.1.tar.gz", hash = "sha256:980a33ce9949cab2a569606b65674d748ecbca4f0796887fd6f46173a7b0d30f", size = 38326, upload-time = "2021-03-10T02:09:54.659Z" }
|
||||||
|
wheels = [
|
||||||
|
{ url = "https://files.pythonhosted.org/packages/a6/53/d78dc063216e62fc55f6b2eebb447f6a4b0a59f55c8406376f76bf959b08/pydub-0.25.1-py2.py3-none-any.whl", hash = "sha256:65617e33033874b59d87db603aa1ed450633288aefead953b30bded59cb599a6", size = 32327, upload-time = "2021-03-10T02:09:53.503Z" },
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "pygments"
|
name = "pygments"
|
||||||
version = "2.19.2"
|
version = "2.19.2"
|
||||||
@@ -654,6 +663,7 @@ dependencies = [
|
|||||||
{ name = "dishka" },
|
{ name = "dishka" },
|
||||||
{ name = "google-genai" },
|
{ name = "google-genai" },
|
||||||
{ name = "pydantic-settings" },
|
{ name = "pydantic-settings" },
|
||||||
|
{ name = "pydub" },
|
||||||
{ name = "rich" },
|
{ name = "rich" },
|
||||||
]
|
]
|
||||||
|
|
||||||
@@ -664,6 +674,7 @@ requires-dist = [
|
|||||||
{ name = "dishka", specifier = ">=1.6.0" },
|
{ name = "dishka", specifier = ">=1.6.0" },
|
||||||
{ name = "google-genai", specifier = ">=1.23.0" },
|
{ name = "google-genai", specifier = ">=1.23.0" },
|
||||||
{ name = "pydantic-settings", specifier = ">=2.10.1" },
|
{ name = "pydantic-settings", specifier = ">=2.10.1" },
|
||||||
|
{ name = "pydub", specifier = ">=0.25.1" },
|
||||||
{ name = "rich", specifier = ">=14.0.0" },
|
{ name = "rich", specifier = ">=14.0.0" },
|
||||||
]
|
]
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user