feat(solaris): moving to service system to support multi-chat
This commit is contained in:
@@ -4,7 +4,7 @@ from google import genai
|
||||
from google.genai import types
|
||||
from pydub import AudioSegment
|
||||
|
||||
from ..content_configs import generate_tts_config
|
||||
from ..constants import SAFETY_SETTINGS
|
||||
|
||||
TTS_MODEL = "gemini-2.5-flash-preview-tts"
|
||||
|
||||
@@ -12,18 +12,34 @@ TTS_MODEL = "gemini-2.5-flash-preview-tts"
|
||||
class TTSAgent:
|
||||
def __init__(self, client: genai.client.AsyncClient) -> None:
|
||||
self.client = client
|
||||
self.content_config = generate_tts_config()
|
||||
|
||||
self.content_config = types.GenerateContentConfig(
|
||||
response_modalities=[types.Modality.AUDIO],
|
||||
speech_config=types.SpeechConfig(
|
||||
voice_config=types.VoiceConfig(
|
||||
prebuilt_voice_config=types.PrebuiltVoiceConfig(
|
||||
voice_name="Kore",
|
||||
)
|
||||
)
|
||||
),
|
||||
safety_settings=SAFETY_SETTINGS,
|
||||
)
|
||||
|
||||
async def generate(self, text: str):
|
||||
response = await self.client.models.generate_content(
|
||||
model=TTS_MODEL, contents=text, config=self.content_config
|
||||
)
|
||||
|
||||
data = response.candidates[0].content.parts[0].inline_data.data
|
||||
pcm_io = io.BytesIO(data)
|
||||
pcm_io.seek(0)
|
||||
|
||||
audio = AudioSegment(
|
||||
pcm_io.read(), sample_width=2, frame_rate=24000, channels=1
|
||||
)
|
||||
|
||||
ogg_io = io.BytesIO()
|
||||
audio.export(ogg_io, format="ogg", codec="libopus")
|
||||
ogg_bytes = ogg_io.getvalue()
|
||||
|
||||
return ogg_bytes
|
||||
|
||||
Reference in New Issue
Block a user