From fbbb92fe7a8b487aa78fcbe62b2d3e5c0d5b721f Mon Sep 17 00:00:00 2001 From: sanj <67624670+iodrift@users.noreply.github.com> Date: Tue, 30 Jul 2024 19:48:25 -0700 Subject: [PATCH] Auto-update: Tue Jul 30 19:48:25 PDT 2024 --- cf | 0 ddns | 0 tts copy | 85 ---------------------------------------------- txt-line-merge-abc | 0 4 files changed, 85 deletions(-) mode change 100644 => 100755 cf mode change 100644 => 100755 ddns delete mode 100755 tts copy mode change 100644 => 100755 txt-line-merge-abc diff --git a/cf b/cf old mode 100644 new mode 100755 diff --git a/ddns b/ddns old mode 100644 new mode 100755 diff --git a/tts copy b/tts copy deleted file mode 100755 index 4722b93..0000000 --- a/tts copy +++ /dev/null @@ -1,85 +0,0 @@ -#!/Users/sij/miniforge3/bin/python - -import sys -import os -import tempfile -from pathlib import Path -import uuid -import hashlib -from pydub import AudioSegment -import torch -from TTS.api import TTS # Adjust with actual import -from playsound import playsound - -from TTS.api import TTS - -device = torch.device('cpu') # keep trying 'mps' it will eventually be implemented -model_name = "tts_models/multilingual/multi-dataset/xtts_v2" -tts = TTS(model_name=model_name).to(device) -DEFAULT_VOICE = "kiel" - -def select_voice(voice_name: str) -> str: - voice_dir = Path('/Users/sij/AI/banana-phone/voices') - voice_file = voice_dir / f"{voice_name}.wav" - if voice_file.is_file(): - return str(voice_file) - else: - print(f"Voice file not found for {voice_name}, using default") - return str(voice_dir / f"{DEFAULT_VOICE}.wav") - -def generate_speech(text, speed, voice_file): - output_dir = Path(tempfile.gettempdir()) - output_dir.mkdir(exist_ok=True) - - short_uuid = str(uuid.uuid4())[:8] - output_file_name = f"{Path(voice_file).stem}-{short_uuid}.wav" - output_file = output_dir / output_file_name - - tts.tts_to_file( - text=text, - speed=speed, - file_path=output_file, - speaker_wav=[voice_file], - language="en" - ) - - return output_file - -def main(): - if len(sys.argv) < 2: - print("Usage: python script.py [voice] [speed]") - sys.exit(1) - - text_input = sys.argv[1] - if len(text_input) < 255 and os.path.isfile(text_input): - with open(text_input, 'r') as file: - text = file.read() - else: - text = text_input - - voice = sys.argv[2] if len(sys.argv) > 2 else DEFAULT_VOICE - speed = float(sys.argv[3]) if len(sys.argv) > 3 else 1.1 - - voice_file_path = select_voice(voice) - - print(f"Using voice file at {voice_file_path}") - - combined_audio = AudioSegment.silent(duration=0) - output_file = generate_speech(text, speed, voice_file_path) - combined_audio += AudioSegment.from_wav(str(output_file)) - - # Exporting combined audio - final_output_path = Path(tempfile.gettempdir()) / "output.wav" - combined_audio.export(str(final_output_path), format="wav") - - # Now playing the generated speech file - print(f"Playing generated speech from {final_output_path}") - playsound(str(final_output_path)) - - # Cleanup - os.remove(output_file) - os.remove(final_output_path) - -if __name__ == "__main__": - main() - diff --git a/txt-line-merge-abc b/txt-line-merge-abc old mode 100644 new mode 100755