From 772637e95746f60a2d0a6753ce23ff1cc9bacfad Mon Sep 17 00:00:00 2001 From: sanj <67624670+iodrift@users.noreply.github.com> Date: Thu, 8 Aug 2024 21:13:23 -0700 Subject: [PATCH] Auto-update: Thu Aug 8 21:13:23 PDT 2024 --- sijapi/__init__.py | 37 +++++++++++++ sijapi/classes.py | 79 +++++++++++++++++++-------- sijapi/routers/tts.py | 124 +++++++++++++++++++++++------------------- 3 files changed, 162 insertions(+), 78 deletions(-) diff --git a/sijapi/__init__.py b/sijapi/__init__.py index d49668e..87ab67d 100644 --- a/sijapi/__init__.py +++ b/sijapi/__init__.py @@ -1,4 +1,5 @@ # __init__.py + import os from pathlib import Path import ipaddress @@ -18,12 +19,15 @@ os.makedirs(LOGS_DIR, exist_ok=True) L = Logger("Central", LOGS_DIR) # API essentials +print("Loading API configuration...") API = APIConfig.load('api', 'secrets') Dir = DirConfig.load('dirs') + HOST = f"{API.BIND}:{API.PORT}" LOCAL_HOSTS = [ipaddress.ip_address(localhost.strip()) for localhost in os.getenv('LOCAL_HOSTS', '127.0.0.1').split(',')] + ['localhost'] SUBNET_BROADCAST = os.getenv("SUBNET_BROADCAST", '10.255.255.255') MAX_CPU_CORES = min(int(os.getenv("MAX_CPU_CORES", int(multiprocessing.cpu_count()/2))), multiprocessing.cpu_count()) + IMG = Configuration.load('img', 'secrets', Dir) Llm = Configuration.load('llm', 'secrets', Dir) News = Configuration.load('news', 'secrets', Dir) @@ -32,6 +36,39 @@ Scrape = Configuration.load('scrape', 'secrets', Dir) Serve = Configuration.load('serve', 'secrets', Dir) Tts = Configuration.load('tts', 'secrets', Dir) +print(f"Tts configuration loaded: {Tts}") +print(f"Tts.elevenlabs: {Tts.elevenlabs}") +print(f"Tts.elevenlabs.key: {Tts.elevenlabs.key}") +print(f"Tts.elevenlabs.voices: {Tts.elevenlabs.voices}") + +# Additional debug logging for Configuration class +print(f"Configuration.resolve_placeholders method: {Configuration.resolve_placeholders}") +print(f"Configuration.resolve_string_placeholders method: {Configuration.resolve_string_placeholders}") + +# Check if secrets are properly loaded +print(f"Secrets in Tts config: {[attr for attr in dir(Tts) if attr.isupper()]}") + +# Verify the structure of Tts.elevenlabs +print(f"Type of Tts.elevenlabs: {type(Tts.elevenlabs)}") +print(f"Attributes of Tts.elevenlabs: {dir(Tts.elevenlabs)}") + +# Check if the ElevenLabs API key is properly resolved +print(f"ElevenLabs API key (masked): {'*' * len(Tts.elevenlabs.key) if hasattr(Tts.elevenlabs, 'key') else 'Not found'}") + +# Verify the structure of Tts.elevenlabs.voices +print(f"Type of Tts.elevenlabs.voices: {type(Tts.elevenlabs.voices)}") +print(f"Attributes of Tts.elevenlabs.voices: {dir(Tts.elevenlabs.voices)}") + +# Check if the default voice is set +print(f"Default voice: {Tts.elevenlabs.default if hasattr(Tts.elevenlabs, 'default') else 'Not found'}") + +# Additional checks +print(f"Is 'get' method available on Tts.elevenlabs.voices? {'get' in dir(Tts.elevenlabs.voices)}") +print(f"Is 'values' method available on Tts.elevenlabs.voices? {'values' in dir(Tts.elevenlabs.voices)}") + +print("Initialization complete") + + # Directories & general paths ROUTER_DIR = BASE_DIR / "routers" DATA_DIR = BASE_DIR / "data" diff --git a/sijapi/classes.py b/sijapi/classes.py index a1c01c6..3265822 100644 --- a/sijapi/classes.py +++ b/sijapi/classes.py @@ -68,6 +68,7 @@ class Logger: def get_module_logger(self, module_name): return self.logger.bind(name=module_name) + L = Logger("classes", "classes") logger = L.get_module_logger("classes") def debug(text: str): logger.debug(text) @@ -93,28 +94,31 @@ class Configuration(BaseModel): yaml_path = cls._resolve_path(yaml_path, 'config') if secrets_path: secrets_path = cls._resolve_path(secrets_path, 'config') - + try: with yaml_path.open('r') as file: config_data = yaml.safe_load(file) - + debug(f"Loaded configuration data from {yaml_path}") if secrets_path: with secrets_path.open('r') as file: secrets_data = yaml.safe_load(file) debug(f"Loaded secrets data from {secrets_path}") + if isinstance(config_data, list): - for item in config_data: - if isinstance(item, dict): - item.update(secrets_data) + config_data = {"configurations": config_data, "SECRETS": secrets_data} + elif isinstance(config_data, dict): + config_data['SECRETS'] = secrets_data else: - config_data.update(secrets_data) - if isinstance(config_data, list): + raise ValueError(f"Unexpected configuration data type: {type(config_data)}") + + if not isinstance(config_data, dict): config_data = {"configurations": config_data} + if config_data.get('HOME') is None: config_data['HOME'] = str(Path.home()) - warn(f"HOME was None in config, set to default: {config_data['HOME']}") - + debug(f"HOME was None in config, set to default: {config_data['HOME']}") + load_dotenv() instance = cls.create_dynamic_model(**config_data) instance._dir_config = dir_config or instance @@ -122,11 +126,12 @@ class Configuration(BaseModel): instance = cls.create_dynamic_model(**resolved_data) instance._dir_config = dir_config or instance return instance - + except Exception as e: err(f"Error loading configuration: {str(e)}") raise + @classmethod def _resolve_path(cls, path: Union[str, Path], default_dir: str) -> Path: base_path = Path(__file__).parent.parent # This will be two levels up from this file @@ -137,6 +142,7 @@ class Configuration(BaseModel): path = base_path / path return path + def resolve_placeholders(self, data: Any) -> Any: if isinstance(data, dict): resolved_data = {k: self.resolve_placeholders(v) for k, v in data.items()} @@ -154,21 +160,13 @@ class Configuration(BaseModel): else: return data + def resolve_string_placeholders(self, value: str) -> Any: - pattern = r'\{\{\s*([^}]+)\s*\}\}' - matches = re.findall(pattern, value) - - for match in matches: - parts = match.split('.') + if isinstance(value, str) and value.startswith('{{') and value.endswith('}}'): + key = value[2:-2].strip() + parts = key.split('.') if len(parts) == 2 and parts[0] == 'SECRET': - replacement = getattr(self, parts[1].strip(), '') - if not replacement: - warn(f"Secret '{parts[1].strip()}' not found in secrets file") - else: - replacement = getattr(self, match, value) - - value = value.replace('{{' + match + '}}', str(replacement)) - + return getattr(self.SECRETS, parts[1], '') return value @@ -187,6 +185,41 @@ class Configuration(BaseModel): **{k: (Any, v) for k, v in data.items()} ) return DynamicModel(**data) + + + def has_key(self, key_path: str) -> bool: + """ + Check if a key exists in the configuration or its nested objects. + + :param key_path: Dot-separated path to the key (e.g., 'elevenlabs.voices.Victoria') + :return: True if the key exists, False otherwise + """ + parts = key_path.split('.') + current = self + for part in parts: + if hasattr(current, part): + current = getattr(current, part) + else: + return False + return True + + + def get_value(self, key_path: str, default=None): + """ + Get the value of a key in the configuration or its nested objects. + + :param key_path: Dot-separated path to the key (e.g., 'elevenlabs.voices.Victoria') + :param default: Default value to return if the key doesn't exist + :return: The value of the key if it exists, otherwise the default value + """ + parts = key_path.split('.') + current = self + for part in parts: + if hasattr(current, part): + current = getattr(current, part) + else: + return default + return current class Config: extra = "allow" diff --git a/sijapi/routers/tts.py b/sijapi/routers/tts.py index 947b6af..f07b420 100644 --- a/sijapi/routers/tts.py +++ b/sijapi/routers/tts.py @@ -63,7 +63,7 @@ async def list_11l_voices(): formatted_list += f"{name}: `{id}`\n" except Exception as e: - err(f"Error determining voice ID: {str(e)}") + err(f"Error determining voice ID: {e}") return PlainTextResponse(formatted_list, status_code=200) @@ -78,13 +78,13 @@ async def select_voice(voice_name: str) -> str: debug(f"Checking {item.name.lower()}") if item.name.lower() == f"{voice_name_lower}.wav": debug(f"select_voice received query to use voice: {voice_name}. Found {item} inside {VOICE_DIR}.") - return str(item) + return item err(f"Voice file not found") raise HTTPException(status_code=404, detail="Voice file not found") except Exception as e: - err(f"Voice file not found: {str(e)}") + err(f"Voice file not found: {e}") return None @@ -119,7 +119,7 @@ async def generate_speech_endpoint( else: return await generate_speech(bg_tasks, text_content, voice, voice_file, model, speed, podcast) except Exception as e: - err(f"Error in TTS: {str(e)}") + err(f"Error in TTS: {e}") err(traceback.format_exc()) raise HTTPException(status_code=666, detail="error in TTS") @@ -127,7 +127,7 @@ async def generate_speech_endpoint( async def generate_speech( bg_tasks: BackgroundTasks, text: str, - voice: str = None, + voice: Optional[str] = None, voice_file: UploadFile = None, model: str = None, speed: float = 1.1, @@ -153,9 +153,9 @@ async def generate_speech( output_path = output_dir / f"{dt_datetime.now().strftime('%Y%m%d%H%M%S')} {title}.wav" debug(f"Model: {model}") - debug(f"API.EXTENSIONS.elevenlabs: {getattr(API.EXTENSIONS, 'elevenlabs', None)}") - debug(f"API.EXTENSIONS.xtts: {getattr(API.EXTENSIONS, 'xtts', None)}") - + debug(f"Voice: {voice}") + debug(f"Tts.elevenlabs: {Tts.elevenlabs}") + if model == "eleven_turbo_v2" and getattr(API.EXTENSIONS, 'elevenlabs', False): info("Using ElevenLabs.") audio_file_path = await elevenlabs_tts(model, text, voice, title, output_dir) @@ -176,7 +176,7 @@ async def generate_speech( if podcast: podcast_path = Path(Dir.PODCAST) / Path(audio_file_path).name - shutil.copy(str(audio_file_path), str(podcast_path)) + shutil.copy(audio_file_path, podcast_path) if podcast_path.exists(): info(f"Saved to podcast path: {podcast_path}") else: @@ -184,19 +184,19 @@ async def generate_speech( if podcast_path != audio_file_path: info(f"Podcast mode enabled, so we will remove {audio_file_path}") - bg_tasks.add_task(os.remove, str(audio_file_path)) + bg_tasks.add_task(os.remove, audio_file_path) else: warn(f"Podcast path set to same as audio file path...") - return str(podcast_path) + return podcast_path - return str(audio_file_path) + return audio_file_path except Exception as e: - err(f"Failed to generate speech: {str(e)}") + err(f"Failed to generate speech: {e}") err(f"Traceback: {traceback.format_exc()}") - raise HTTPException(status_code=500, detail=f"Failed to generate speech: {str(e)}") - + raise HTTPException(status_code=500, detail=f"Failed to generate speech: {e}") + async def get_model(voice: str = None, voice_file: UploadFile = None): @@ -215,14 +215,15 @@ async def determine_voice_id(voice_name: str) -> str: debug(f"Searching for voice id for {voice_name}") debug(f"Tts.elevenlabs.voices: {Tts.elevenlabs.voices}") - voices = Tts.elevenlabs.voices - if voice_name in voices: - return voices[voice_name] + # Check if the voice is in the configured voices + if voice_name and Tts.has_key(f'elevenlabs.voices.{voice_name}'): + voice_id = Tts.get_value(f'elevenlabs.voices.{voice_name}') + debug(f"Found voice ID in config - {voice_id}") + return voice_id - debug(f"Requested voice not among the voices specified in config/tts.yaml. Checking with ElevenLabs API.") + debug(f"Requested voice not among the voices specified in config/tts.yaml. Checking with ElevenLabs API using api_key: {Tts.elevenlabs.key}.") url = "https://api.elevenlabs.io/v1/voices" headers = {"xi-api-key": Tts.elevenlabs.key} - debug(f"Using key: {Tts.elevenlabs.key}") async with httpx.AsyncClient() as client: try: response = await client.get(url, headers=headers) @@ -237,42 +238,55 @@ async def determine_voice_id(voice_name: str) -> str: err(f"Failed to get voices from ElevenLabs API. Status code: {response.status_code}") err(f"Response content: {response.text}") except Exception as e: - err(f"Error determining voice ID: {str(e)}") + err(f"Error determining voice ID: {e}") warn(f"Voice '{voice_name}' not found; using the default specified in config/tts.yaml: {Tts.elevenlabs.default}") - return voices.get(Tts.elevenlabs.default, next(iter(voices.values()))) + if Tts.has_key(f'elevenlabs.voices.{Tts.elevenlabs.default}'): + return Tts.get_value(f'elevenlabs.voices.{Tts.elevenlabs.default}') + else: + err(f"Default voice '{Tts.elevenlabs.default}' not found in configuration. Using first available voice.") + first_voice = next(iter(vars(Tts.elevenlabs.voices))) + return Tts.get_value(f'elevenlabs.voices.{first_voice}') async def elevenlabs_tts(model: str, input_text: str, voice: str, title: str = None, output_dir: str = None): - voice_id = await determine_voice_id(voice) + # Debug logging + debug(f"API.EXTENSIONS: {API.EXTENSIONS}") + debug(f"API.EXTENSIONS.elevenlabs: {getattr(API.EXTENSIONS, 'elevenlabs', None)}") + debug(f"Tts config: {Tts}") - url = f"https://api.elevenlabs.io/v1/text-to-speech/{voice_id}" - payload = { - "text": input_text, - "model_id": model - } - headers = {"Content-Type": "application/json", "xi-api-key": Tts.elevenlabs.key} - debug(f"Using ElevenLabs API key: {Tts.elevenlabs.key}") - try: - async with httpx.AsyncClient(timeout=httpx.Timeout(300.0)) as client: # 5 minutes timeout - response = await client.post(url, json=payload, headers=headers) - output_dir = output_dir if output_dir else TTS_OUTPUT_DIR - title = title if title else dt_datetime.now().strftime("%Y%m%d%H%M%S") - filename = f"{sanitize_filename(title)}.mp3" - file_path = Path(output_dir) / filename - if response.status_code == 200: - with open(file_path, "wb") as audio_file: - audio_file.write(response.content) - return file_path - else: - err(f"Error from ElevenLabs API. Status code: {response.status_code}") - err(f"Response content: {response.text}") - raise HTTPException(status_code=response.status_code, detail=f"Error from ElevenLabs API: {response.text}") - - except Exception as e: - err(f"Error from Elevenlabs API: {e}") - raise HTTPException(status_code=500, detail=f"Error from ElevenLabs API: {str(e)}") + if getattr(API.EXTENSIONS, 'elevenlabs', False): + voice_id = await determine_voice_id(voice) + + url = f"https://api.elevenlabs.io/v1/text-to-speech/{voice_id}" + payload = { + "text": input_text, + "model_id": model + } + # Make sure this is the correct way to access the API key + headers = {"Content-Type": "application/json", "xi-api-key": Tts.elevenlabs.key} + try: + async with httpx.AsyncClient(timeout=httpx.Timeout(300.0)) as client: + response = await client.post(url, json=payload, headers=headers) + output_dir = output_dir if output_dir else TTS_OUTPUT_DIR + title = title if title else dt_datetime.now().strftime("%Y%m%d%H%M%S") + filename = f"{sanitize_filename(title)}.mp3" + file_path = Path(output_dir) / filename + if response.status_code == 200: + with open(file_path, "wb") as audio_file: + audio_file.write(response.content) + return file_path + else: + raise HTTPException(status_code=response.status_code, detail="Error from ElevenLabs API") + + except Exception as e: + err(f"Error from Elevenlabs API: {e}") + raise HTTPException(status_code=500, detail=f"Error from ElevenLabs API: {e}") + + else: + warn(f"elevenlabs_tts called but ElevenLabs module is not enabled in config.") + raise HTTPException(status_code=400, detail="ElevenLabs TTS is not enabled") @@ -304,7 +318,7 @@ async def get_voice_file_path(voice: str = None, voice_file: UploadFile = None) existing_checksum = hashlib.md5(f.read()).hexdigest() if checksum == existing_checksum: - return str(existing_file) + return existing_file base_name = existing_file.stem counter = 1 @@ -315,7 +329,7 @@ async def get_voice_file_path(voice: str = None, voice_file: UploadFile = None) with open(new_file, 'wb') as f: f.write(content) - return str(new_file) + return new_file else: debug(f"No voice specified or file provided, using default voice: {Tts.xtts.default}") @@ -367,14 +381,14 @@ async def local_tts( XTTS.tts_to_file, text=segment, speed=speed, - file_path=str(segment_file_path), + file_path=segment_file_path, speaker_wav=[voice_file_path], language="en" ) debug(f"Segment file generated: {segment_file_path}") # Load and combine audio in a separate thread - segment_audio = await asyncio.to_thread(AudioSegment.from_wav, str(segment_file_path)) + segment_audio = await asyncio.to_thread(AudioSegment.from_wav, segment_file_path) combined_audio += segment_audio # Delete the segment file @@ -387,7 +401,7 @@ async def local_tts( await asyncio.to_thread(combined_audio.export, file_path, format="wav") - return str(file_path) + return file_path else: warn(f"local_tts called but xtts module disabled!") @@ -501,4 +515,4 @@ def copy_to_podcast_dir(file_path): print(f"Permission denied while copying the file: {file_path}") except Exception as e: print(f"An error occurred while copying the file: {file_path}") - print(f"Error details: {str(e)}") \ No newline at end of file + print(f"Error details: {e}") \ No newline at end of file