pathScripts/tts copy

#!/Users/sij/miniforge3/bin/python

import sys
import os
import tempfile
from pathlib import Path
import uuid
import hashlib
from pydub import AudioSegment
import torch
from TTS.api import TTS  # Adjust with actual import
from playsound import playsound

from TTS.api import TTS

device = torch.device('cpu') # keep trying 'mps' it will eventually be implemented
model_name = "tts_models/multilingual/multi-dataset/xtts_v2"
tts = TTS(model_name=model_name).to(device)
DEFAULT_VOICE = "kiel"

def select_voice(voice_name: str) -> str:
    voice_dir = Path('/Users/sij/AI/banana-phone/voices')
    voice_file = voice_dir / f"{voice_name}.wav"
    if voice_file.is_file():
        return str(voice_file)
    else:
        print(f"Voice file not found for {voice_name}, using default")
        return str(voice_dir / f"{DEFAULT_VOICE}.wav")

def generate_speech(text, speed, voice_file):
    output_dir = Path(tempfile.gettempdir())
    output_dir.mkdir(exist_ok=True)

    short_uuid = str(uuid.uuid4())[:8]
    output_file_name = f"{Path(voice_file).stem}-{short_uuid}.wav"
    output_file = output_dir / output_file_name

    tts.tts_to_file(
        text=text,
        speed=speed,
        file_path=output_file,
        speaker_wav=[voice_file],
        language="en"
    )

    return output_file

def main():
    if len(sys.argv) < 2:
        print("Usage: python script.py <text/file> [voice] [speed]")
        sys.exit(1)

    text_input = sys.argv[1]
    if len(text_input) < 255 and os.path.isfile(text_input):
        with open(text_input, 'r') as file:
            text = file.read()
    else:
        text = text_input

    voice = sys.argv[2] if len(sys.argv) > 2 else DEFAULT_VOICE
    speed = float(sys.argv[3]) if len(sys.argv) > 3 else 1.1

    voice_file_path = select_voice(voice)

    print(f"Using voice file at {voice_file_path}")

    combined_audio = AudioSegment.silent(duration=0)
    output_file = generate_speech(text, speed, voice_file_path)
    combined_audio += AudioSegment.from_wav(str(output_file))

    # Exporting combined audio
    final_output_path = Path(tempfile.gettempdir()) / "output.wav"
    combined_audio.export(str(final_output_path), format="wav")

    # Now playing the generated speech file
    print(f"Playing generated speech from {final_output_path}")
    playsound(str(final_output_path))

    # Cleanup
    os.remove(output_file)
    os.remove(final_output_path)

if __name__ == "__main__":
    main()
Auto-update: Tue Jul 30 19:30:24 PDT 2024 2024-07-31 04:30:24 +02:00			`#!/Users/sij/miniforge3/bin/python`

			`import sys`
			`import os`
			`import tempfile`
			`from pathlib import Path`
			`import uuid`
			`import hashlib`
			`from pydub import AudioSegment`
			`import torch`
			`from TTS.api import TTS # Adjust with actual import`
			`from playsound import playsound`

			`from TTS.api import TTS`

			`device = torch.device('cpu') # keep trying 'mps' it will eventually be implemented`
			`model_name = "tts_models/multilingual/multi-dataset/xtts_v2"`
			`tts = TTS(model_name=model_name).to(device)`
			`DEFAULT_VOICE = "kiel"`

			`def select_voice(voice_name: str) -> str:`
			`voice_dir = Path('/Users/sij/AI/banana-phone/voices')`
			`voice_file = voice_dir / f"{voice_name}.wav"`
			`if voice_file.is_file():`
			`return str(voice_file)`
			`else:`
			`print(f"Voice file not found for {voice_name}, using default")`
			`return str(voice_dir / f"{DEFAULT_VOICE}.wav")`

			`def generate_speech(text, speed, voice_file):`
			`output_dir = Path(tempfile.gettempdir())`
			`output_dir.mkdir(exist_ok=True)`

			`short_uuid = str(uuid.uuid4())[:8]`
			`output_file_name = f"{Path(voice_file).stem}-{short_uuid}.wav"`
			`output_file = output_dir / output_file_name`

			`tts.tts_to_file(`
			`text=text,`
			`speed=speed,`
			`file_path=output_file,`
			`speaker_wav=[voice_file],`
			`language="en"`
			`)`

			`return output_file`

			`def main():`
			`if len(sys.argv) < 2:`
			`print("Usage: python script.py <text/file> [voice] [speed]")`
			`sys.exit(1)`

			`text_input = sys.argv[1]`
			`if len(text_input) < 255 and os.path.isfile(text_input):`
			`with open(text_input, 'r') as file:`
			`text = file.read()`
			`else:`
			`text = text_input`

			`voice = sys.argv[2] if len(sys.argv) > 2 else DEFAULT_VOICE`
			`speed = float(sys.argv[3]) if len(sys.argv) > 3 else 1.1`

			`voice_file_path = select_voice(voice)`

			`print(f"Using voice file at {voice_file_path}")`

			`combined_audio = AudioSegment.silent(duration=0)`
			`output_file = generate_speech(text, speed, voice_file_path)`
			`combined_audio += AudioSegment.from_wav(str(output_file))`

			`# Exporting combined audio`
			`final_output_path = Path(tempfile.gettempdir()) / "output.wav"`
			`combined_audio.export(str(final_output_path), format="wav")`

			`# Now playing the generated speech file`
			`print(f"Playing generated speech from {final_output_path}")`
			`playsound(str(final_output_path))`

			`# Cleanup`
			`os.remove(output_file)`
			`os.remove(final_output_path)`

			`if __name__ == "__main__":`
			`main()`