feat(solaris): moving to service system to support multi-chat

This commit is contained in:
h
2025-07-05 01:13:12 +03:00
parent fd84210a65
commit 41927a1e07
16 changed files with 143 additions and 74 deletions

View File

@@ -4,7 +4,7 @@ from google import genai
from google.genai import types
from pydub import AudioSegment
from ..content_configs import generate_tts_config
from ..constants import SAFETY_SETTINGS
TTS_MODEL = "gemini-2.5-flash-preview-tts"
@@ -12,18 +12,34 @@ TTS_MODEL = "gemini-2.5-flash-preview-tts"
class TTSAgent:
def __init__(self, client: genai.client.AsyncClient) -> None:
self.client = client
self.content_config = generate_tts_config()
self.content_config = types.GenerateContentConfig(
response_modalities=[types.Modality.AUDIO],
speech_config=types.SpeechConfig(
voice_config=types.VoiceConfig(
prebuilt_voice_config=types.PrebuiltVoiceConfig(
voice_name="Kore",
)
)
),
safety_settings=SAFETY_SETTINGS,
)
async def generate(self, text: str):
response = await self.client.models.generate_content(
model=TTS_MODEL, contents=text, config=self.content_config
)
data = response.candidates[0].content.parts[0].inline_data.data
pcm_io = io.BytesIO(data)
pcm_io.seek(0)
audio = AudioSegment(
pcm_io.read(), sample_width=2, frame_rate=24000, channels=1
)
ogg_io = io.BytesIO()
audio.export(ogg_io, format="ogg", codec="libopus")
ogg_bytes = ogg_io.getvalue()
return ogg_bytes