Compare commits
No commits in common. "30b6c49461ea9b00b916b3d70678e7fd38154321" and "09569549c925a32098644ee31aca33a02223fd41" have entirely different histories.
30b6c49461
...
09569549c9
1 changed files with 85 additions and 0 deletions
85
tts
Executable file
85
tts
Executable file
|
@ -0,0 +1,85 @@
|
|||
#!/Users/sij/miniforge3/bin/python
|
||||
|
||||
import sys
|
||||
import os
|
||||
import tempfile
|
||||
from pathlib import Path
|
||||
import uuid
|
||||
import hashlib
|
||||
from pydub import AudioSegment
|
||||
import torch
|
||||
from TTS.api import TTS # Adjust with actual import
|
||||
from playsound import playsound
|
||||
|
||||
from TTS.api import TTS
|
||||
|
||||
device = torch.device('cpu') # keep trying 'mps' it will eventually be implemented
|
||||
model_name = "tts_models/multilingual/multi-dataset/xtts_v2"
|
||||
tts = TTS(model_name=model_name).to(device)
|
||||
DEFAULT_VOICE = "kiel"
|
||||
|
||||
def select_voice(voice_name: str) -> str:
|
||||
voice_dir = Path('/Users/sij/AI/banana-phone/voices')
|
||||
voice_file = voice_dir / f"{voice_name}.wav"
|
||||
if voice_file.is_file():
|
||||
return str(voice_file)
|
||||
else:
|
||||
print(f"Voice file not found for {voice_name}, using default")
|
||||
return str(voice_dir / f"{DEFAULT_VOICE}.wav")
|
||||
|
||||
def generate_speech(text, speed, voice_file):
|
||||
output_dir = Path(tempfile.gettempdir())
|
||||
output_dir.mkdir(exist_ok=True)
|
||||
|
||||
short_uuid = str(uuid.uuid4())[:8]
|
||||
output_file_name = f"{Path(voice_file).stem}-{short_uuid}.wav"
|
||||
output_file = output_dir / output_file_name
|
||||
|
||||
tts.tts_to_file(
|
||||
text=text,
|
||||
speed=speed,
|
||||
file_path=output_file,
|
||||
speaker_wav=[voice_file],
|
||||
language="en"
|
||||
)
|
||||
|
||||
return output_file
|
||||
|
||||
def main():
|
||||
if len(sys.argv) < 2:
|
||||
print("Usage: python script.py <text/file> [voice] [speed]")
|
||||
sys.exit(1)
|
||||
|
||||
text_input = sys.argv[1]
|
||||
if len(text_input) < 255 and os.path.isfile(text_input):
|
||||
with open(text_input, 'r') as file:
|
||||
text = file.read()
|
||||
else:
|
||||
text = text_input
|
||||
|
||||
voice = sys.argv[2] if len(sys.argv) > 2 else DEFAULT_VOICE
|
||||
speed = float(sys.argv[3]) if len(sys.argv) > 3 else 1.1
|
||||
|
||||
voice_file_path = select_voice(voice)
|
||||
|
||||
print(f"Using voice file at {voice_file_path}")
|
||||
|
||||
combined_audio = AudioSegment.silent(duration=0)
|
||||
output_file = generate_speech(text, speed, voice_file_path)
|
||||
combined_audio += AudioSegment.from_wav(str(output_file))
|
||||
|
||||
# Exporting combined audio
|
||||
final_output_path = Path(tempfile.gettempdir()) / "output.wav"
|
||||
combined_audio.export(str(final_output_path), format="wav")
|
||||
|
||||
# Now playing the generated speech file
|
||||
print(f"Playing generated speech from {final_output_path}")
|
||||
playsound(str(final_output_path))
|
||||
|
||||
# Cleanup
|
||||
os.remove(output_file)
|
||||
os.remove(final_output_path)
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
|
Loading…
Reference in a new issue