"""XTTS v2 wrapper for voice cloning from non-voice audio samples.""" import os import random import tempfile from pathlib import Path try: from TTS.api import TTS except ImportError: TTS = None # Tests patch this; real runtime requires the TTS package from server.config import config class VoiceGenerator: """Generates speech cloned from arbitrary audio samples via XTTS v2.""" def __init__( self, device: str | None = None, model_name: str | None = None, samples_dir: str | None = None, ): self.device = device or config.device self.model_name = model_name or config.models.xtts_model self.samples_dir = Path(samples_dir or config.samples_dir) if TTS is None: raise RuntimeError( "TTS package is not installed; cannot instantiate VoiceGenerator" ) self._tts = TTS(model_name=self.model_name) self._tts.to(self.device) def generate(self, text: str, speaker_wav: str | None = None) -> bytes: """Generate speech as WAV bytes. Uses a random clone source if none specified.""" if speaker_wav is None: speaker_wav = self.random_clone_source() if speaker_wav is None: raise ValueError("No speaker WAV provided and no samples available") # XTTS writes to file, so use a temp file tmp = tempfile.NamedTemporaryFile(suffix=".wav", delete=False) tmp.close() try: self._tts.tts_to_file( text=text, speaker_wav=speaker_wav, language=config.models.xtts_language, file_path=tmp.name, ) with open(tmp.name, "rb") as f: return f.read() finally: try: os.unlink(tmp.name) except OSError: pass def list_clone_sources(self) -> list[str]: """List all WAV files in the samples directory.""" if not self.samples_dir.is_dir(): return [] return [ str(p) for p in sorted(self.samples_dir.glob("*.wav")) ] def random_clone_source(self) -> str | None: """Pick a random clone source WAV file.""" sources = self.list_clone_sources() if not sources: return None return random.choice(sources)