#!/Users/sij/miniforge3/bin/python import sys import os import tempfile from pathlib import Path import uuid import hashlib from pydub import AudioSegment import torch from TTS.api import TTS # Adjust with actual import from playsound import playsound from TTS.api import TTS device = torch.device('cpu') # keep trying 'mps' it will eventually be implemented model_name = "tts_models/multilingual/multi-dataset/xtts_v2" tts = TTS(model_name=model_name).to(device) DEFAULT_VOICE = "kiel" def select_voice(voice_name: str) -> str: voice_dir = Path('/Users/sij/AI/banana-phone/voices') voice_file = voice_dir / f"{voice_name}.wav" if voice_file.is_file(): return str(voice_file) else: print(f"Voice file not found for {voice_name}, using default") return str(voice_dir / f"{DEFAULT_VOICE}.wav") def generate_speech(text, speed, voice_file): output_dir = Path(tempfile.gettempdir()) output_dir.mkdir(exist_ok=True) short_uuid = str(uuid.uuid4())[:8] output_file_name = f"{Path(voice_file).stem}-{short_uuid}.wav" output_file = output_dir / output_file_name tts.tts_to_file( text=text, speed=speed, file_path=output_file, speaker_wav=[voice_file], language="en" ) return output_file def main(): if len(sys.argv) < 2: print("Usage: python script.py [voice] [speed]") sys.exit(1) text_input = sys.argv[1] if len(text_input) < 255 and os.path.isfile(text_input): with open(text_input, 'r') as file: text = file.read() else: text = text_input voice = sys.argv[2] if len(sys.argv) > 2 else DEFAULT_VOICE speed = float(sys.argv[3]) if len(sys.argv) > 3 else 1.1 voice_file_path = select_voice(voice) print(f"Using voice file at {voice_file_path}") combined_audio = AudioSegment.silent(duration=0) output_file = generate_speech(text, speed, voice_file_path) combined_audio += AudioSegment.from_wav(str(output_file)) # Exporting combined audio final_output_path = Path(tempfile.gettempdir()) / "output.wav" combined_audio.export(str(final_output_path), format="wav") # Now playing the generated speech file print(f"Playing generated speech from {final_output_path}") playsound(str(final_output_path)) # Cleanup os.remove(output_file) os.remove(final_output_path) if __name__ == "__main__": main()