feat(solaris): moving to service system to support multi-chat

2025-07-05 01:13:12 +03:00
parent fd84210a65
commit 41927a1e07
16 changed files with 143 additions and 74 deletions
--- a/src/bot/modules/solaris/agents/tts.py
+++ b/src/bot/modules/solaris/agents/tts.py
@@ -4,7 +4,7 @@ from google import genai
 from google.genai import types
 from pydub import AudioSegment

-from ..content_configs import generate_tts_config
+from ..constants import SAFETY_SETTINGS

 TTS_MODEL = "gemini-2.5-flash-preview-tts"

@@ -12,18 +12,34 @@ TTS_MODEL = "gemini-2.5-flash-preview-tts"
 class TTSAgent:
    def __init__(self, client: genai.client.AsyncClient) -> None:
        self.client = client
-        self.content_config = generate_tts_config()
+
+        self.content_config = types.GenerateContentConfig(
+            response_modalities=[types.Modality.AUDIO],
+            speech_config=types.SpeechConfig(
+                voice_config=types.VoiceConfig(
+                    prebuilt_voice_config=types.PrebuiltVoiceConfig(
+                        voice_name="Kore",
+                    )
+                )
+            ),
+            safety_settings=SAFETY_SETTINGS,
+        )

    async def generate(self, text: str):
        response = await self.client.models.generate_content(
            model=TTS_MODEL, contents=text, config=self.content_config
        )
+
        data = response.candidates[0].content.parts[0].inline_data.data
        pcm_io = io.BytesIO(data)
+        pcm_io.seek(0)
+
        audio = AudioSegment(
            pcm_io.read(), sample_width=2, frame_rate=24000, channels=1
        )
+
        ogg_io = io.BytesIO()
        audio.export(ogg_io, format="ogg", codec="libopus")
        ogg_bytes = ogg_io.getvalue()
+
        return ogg_bytes