Files
ai-hell/server/voice_generator.py

74 lines
2.3 KiB
Python

"""XTTS v2 wrapper for voice cloning from non-voice audio samples."""
import os
import random
import tempfile
from pathlib import Path
try:
from TTS.api import TTS
except ImportError:
TTS = None # Tests patch this; real runtime requires the TTS package
from server.config import config
class VoiceGenerator:
"""Generates speech cloned from arbitrary audio samples via XTTS v2."""
def __init__(
self,
device: str | None = None,
model_name: str | None = None,
samples_dir: str | None = None,
):
self.device = device or config.device
self.model_name = model_name or config.models.xtts_model
self.samples_dir = Path(samples_dir or config.samples_dir)
if TTS is None:
raise RuntimeError(
"TTS package is not installed; cannot instantiate VoiceGenerator"
)
self._tts = TTS(model_name=self.model_name)
self._tts.to(self.device)
def generate(self, text: str, speaker_wav: str | None = None) -> bytes:
"""Generate speech as WAV bytes. Uses a random clone source if none specified."""
if speaker_wav is None:
speaker_wav = self.random_clone_source()
if speaker_wav is None:
raise ValueError("No speaker WAV provided and no samples available")
# XTTS writes to file, so use a temp file
tmp = tempfile.NamedTemporaryFile(suffix=".wav", delete=False)
tmp.close()
try:
self._tts.tts_to_file(
text=text,
speaker_wav=speaker_wav,
language=config.models.xtts_language,
file_path=tmp.name,
)
with open(tmp.name, "rb") as f:
return f.read()
finally:
try:
os.unlink(tmp.name)
except OSError:
pass
def list_clone_sources(self) -> list[str]:
"""List all WAV files in the samples directory."""
if not self.samples_dir.is_dir():
return []
return [
str(p) for p in sorted(self.samples_dir.glob("*.wav"))
]
def random_clone_source(self) -> str | None:
"""Pick a random clone source WAV file."""
sources = self.list_clone_sources()
if not sources:
return None
return random.choice(sources)