feat: add XTTS v2 voice generator with clone source management
This commit is contained in:
@@ -0,0 +1,73 @@
|
||||
"""XTTS v2 wrapper for voice cloning from non-voice audio samples."""
|
||||
|
||||
import os
|
||||
import random
|
||||
import tempfile
|
||||
from pathlib import Path
|
||||
|
||||
try:
|
||||
from TTS.api import TTS
|
||||
except ImportError:
|
||||
TTS = None # Tests patch this; real runtime requires the TTS package
|
||||
|
||||
from server.config import config
|
||||
|
||||
|
||||
class VoiceGenerator:
|
||||
"""Generates speech cloned from arbitrary audio samples via XTTS v2."""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
device: str | None = None,
|
||||
model_name: str | None = None,
|
||||
samples_dir: str | None = None,
|
||||
):
|
||||
self.device = device or config.device
|
||||
self.model_name = model_name or config.models.xtts_model
|
||||
self.samples_dir = Path(samples_dir or config.samples_dir)
|
||||
if TTS is None:
|
||||
raise RuntimeError(
|
||||
"TTS package is not installed; cannot instantiate VoiceGenerator"
|
||||
)
|
||||
self._tts = TTS(model_name=self.model_name)
|
||||
self._tts.to(self.device)
|
||||
|
||||
def generate(self, text: str, speaker_wav: str | None = None) -> bytes:
|
||||
"""Generate speech as WAV bytes. Uses a random clone source if none specified."""
|
||||
if speaker_wav is None:
|
||||
speaker_wav = self.random_clone_source()
|
||||
if speaker_wav is None:
|
||||
raise ValueError("No speaker WAV provided and no samples available")
|
||||
|
||||
# XTTS writes to file, so use a temp file
|
||||
tmp = tempfile.NamedTemporaryFile(suffix=".wav", delete=False)
|
||||
tmp.close()
|
||||
try:
|
||||
self._tts.tts_to_file(
|
||||
text=text,
|
||||
speaker_wav=speaker_wav,
|
||||
language=config.models.xtts_language,
|
||||
file_path=tmp.name,
|
||||
)
|
||||
with open(tmp.name, "rb") as f:
|
||||
return f.read()
|
||||
finally:
|
||||
try:
|
||||
os.unlink(tmp.name)
|
||||
except OSError:
|
||||
pass
|
||||
|
||||
def list_clone_sources(self) -> list[str]:
|
||||
"""List all WAV files in the samples directory."""
|
||||
if not self.samples_dir.is_dir():
|
||||
return []
|
||||
return [
|
||||
str(p) for p in sorted(self.samples_dir.glob("*.wav"))
|
||||
]
|
||||
|
||||
def random_clone_source(self) -> str | None:
|
||||
"""Pick a random clone source WAV file."""
|
||||
sources = self.list_clone_sources()
|
||||
if not sources:
|
||||
return None
|
||||
return random.choice(sources)
|
||||
Reference in New Issue
Block a user