Auto-update: Tue Jul 9 16:14:52 PDT 2024

This commit is contained in:
sanj 2024-07-09 16:14:52 -07:00
parent 77a9e35f2b
commit 4973707596
7 changed files with 105 additions and 91 deletions

View file

@ -6,11 +6,11 @@ from dotenv import load_dotenv
from dateutil import tz from dateutil import tz
from pathlib import Path from pathlib import Path
from .logs import Logger from .logs import Logger
from .classes import Database, Geocoder, APIConfig, Configuration, Dir from .classes import Database, Geocoder, APIConfig, Configuration, EmailConfiguration, Dir
### Initial initialization ### Initial initialization
API = APIConfig.load('api', 'secrets') API = APIConfig.load('api', 'secrets')
Dir = Dir.load('dirs') Dir = Dir()
ENV_PATH = Dir.CONFIG / ".env" ENV_PATH = Dir.CONFIG / ".env"
LOGS_DIR = Dir.LOGS LOGS_DIR = Dir.LOGS
L = Logger("Central", LOGS_DIR) L = Logger("Central", LOGS_DIR)
@ -23,9 +23,11 @@ DB = Database.from_yaml('db.yaml')
ASR = Configuration.load('asr') ASR = Configuration.load('asr')
IMG = Configuration.load('img') IMG = Configuration.load('img')
Cal = Configuration.load('cal', 'secrets') Cal = Configuration.load('cal', 'secrets')
Email = Configuration.load('email', 'secrets') print(f"Cal configuration: {Cal.__dict__}")
Email = EmailConfiguration.load('email', 'secrets')
LLM = Configuration.load('llm', 'secrets') LLM = Configuration.load('llm', 'secrets')
News = Configuration.load('news', 'secrets') News = Configuration.load('news', 'secrets')
Obsidian = Configuration.load('obsidian')
TTS = Configuration.load('tts', 'secrets') TTS = Configuration.load('tts', 'secrets')
CourtListener = Configuration.load('courtlistener', 'secrets') CourtListener = Configuration.load('courtlistener', 'secrets')
Tailscale = Configuration.load('tailscale', 'secrets') Tailscale = Configuration.load('tailscale', 'secrets')

View file

@ -207,7 +207,6 @@ class Configuration(BaseModel):
try: try:
with yaml_path.open('r') as file: with yaml_path.open('r') as file:
config_data = yaml.safe_load(file) config_data = yaml.safe_load(file)
print(f"Loaded configuration data from {yaml_path}") print(f"Loaded configuration data from {yaml_path}")
if secrets_path: if secrets_path:
@ -220,7 +219,6 @@ class Configuration(BaseModel):
instance._dir_config = dir_config or instance instance._dir_config = dir_config or instance
resolved_data = instance.resolve_placeholders(config_data) resolved_data = instance.resolve_placeholders(config_data)
return cls._create_nested_config(resolved_data) return cls._create_nested_config(resolved_data)
except Exception as e: except Exception as e:
print(f"Error loading configuration: {str(e)}") print(f"Error loading configuration: {str(e)}")
@ -229,6 +227,8 @@ class Configuration(BaseModel):
@classmethod @classmethod
def _create_nested_config(cls, data): def _create_nested_config(cls, data):
if isinstance(data, dict): if isinstance(data, dict):
print(f"Creating nested config for: {cls.__name__}")
print(f"Data: {data}")
return cls(**{k: cls._create_nested_config(v) for k, v in data.items()}) return cls(**{k: cls._create_nested_config(v) for k, v in data.items()})
elif isinstance(data, list): elif isinstance(data, list):
return [cls._create_nested_config(item) for item in data] return [cls._create_nested_config(item) for item in data]
@ -267,15 +267,7 @@ class Configuration(BaseModel):
for match in matches: for match in matches:
parts = match.split('.') parts = match.split('.')
if len(parts) == 1: # Internal reference replacement = self._resolve_nested_placeholder(parts)
replacement = getattr(self._dir_config, parts[0], str(Path.home() / parts[0].lower()))
elif len(parts) == 2 and parts[0] == 'Dir':
replacement = getattr(self._dir_config, parts[1], str(Path.home() / parts[1].lower()))
elif len(parts) == 2 and parts[0] == 'ENV':
replacement = os.getenv(parts[1], '')
else:
replacement = value
value = value.replace('{{' + match + '}}', str(replacement)) value = value.replace('{{' + match + '}}', str(replacement))
# Convert to Path if it looks like a file path # Convert to Path if it looks like a file path
@ -283,6 +275,17 @@ class Configuration(BaseModel):
return Path(value).expanduser() return Path(value).expanduser()
return value return value
def _resolve_nested_placeholder(self, parts: List[str]) -> Any:
current = self._dir_config
for part in parts:
if part == 'ENV':
return os.getenv(parts[-1], '')
elif hasattr(current, part):
current = getattr(current, part)
else:
return str(Path.home() / part.lower())
return current
class APIConfig(BaseModel): class APIConfig(BaseModel):
HOST: str HOST: str
@ -788,6 +791,31 @@ class EmailConfiguration(Configuration):
autoresponders: List[AutoResponder] autoresponders: List[AutoResponder]
accounts: List[EmailAccount] accounts: List[EmailAccount]
@classmethod
def _create_nested_config(cls, data):
if isinstance(data, dict):
if 'imaps' in data:
return cls(
imaps=[IMAPConfig(**imap) for imap in data['imaps']],
smtps=[SMTPConfig(**smtp) for smtp in data['smtps']],
autoresponders=[AutoResponder(**ar) for ar in data['autoresponders']],
accounts=[EmailAccount(**account) for account in data['accounts']],
**{k: v for k, v in data.items() if k not in ['imaps', 'smtps', 'autoresponders', 'accounts']}
)
else:
return data # Return the dict as-is for nested structures
elif isinstance(data, list):
return [cls._create_nested_config(item) for item in data]
else:
return data
@classmethod
def load(cls, yaml_path: Union[str, Path], secrets_path: Optional[Union[str, Path]] = None, dir_config: Optional['Configuration'] = None) -> 'EmailConfiguration':
config_data = super().load(yaml_path, secrets_path, dir_config)
return cls._create_nested_config(config_data)
# ... (rest of the methods remain the same)
def get_imap(self, username: str) -> Optional[IMAPConfig]: def get_imap(self, username: str) -> Optional[IMAPConfig]:
return next((imap for imap in self.imaps if imap.username == username), None) return next((imap for imap in self.imaps if imap.username == username), None)
@ -800,6 +828,9 @@ class EmailConfiguration(Configuration):
def get_account(self, name: str) -> Optional[EmailAccount]: def get_account(self, name: str) -> Optional[EmailAccount]:
return next((account for account in self.accounts if account.name == name), None) return next((account for account in self.accounts if account.name == name), None)
def get_email_accounts(self) -> List[EmailAccount]:
return self.accounts
class EmailContact(BaseModel): class EmailContact(BaseModel):
email: str email: str
name: Optional[str] = None name: Optional[str] = None

View file

@ -1,16 +0,0 @@
HOME: ~
BASE: '{{ HOME }}/workshop/sijapi'
SIJAPI: '{{ BASE }}/sijapi'
CONFIG: '{{ SIJAPI }}/config'
CONFIG.email: '{{ CONFIG }}/email.yaml'
CONFIG.img: '{{ CONFIG }}/img.yaml'
CONFIG.news: '{{ CONFIG }}/news.yaml'
SECRETS: '{{ CONFIG }}/secrets.yaml'
DATA: '{{ SIJAPI }}/data'
DATA.ALERTS: '{{ DATA }}/alerts'
DATA.ASR: '{{ DATA }}/asr'
DATA.BASE: '{{ DATA }}/db'
DATA.IMG: '{{ DATA }}/img'
DATA.TTS: '{{ DATA }}/tts'
TTS.VOICES: '{{ TTS }}/voices'
LOGS: '{{ SIJAPI }}/logs'

View file

@ -23,7 +23,10 @@ cal = APIRouter()
oauth2_scheme = OAuth2PasswordBearer(tokenUrl="/token") oauth2_scheme = OAuth2PasswordBearer(tokenUrl="/token")
timeout = httpx.Timeout(12) timeout = httpx.Timeout(12)
print(f"Configuration MS365: {Cal.MS365}") print(f"Cal object: {Cal}")
print(f"Cal.__dict__: {Cal.__dict__}")
print(f"Cal.MS365: {Cal.MS365}")
if Cal.MS365.toggle == 'on': if Cal.MS365.toggle == 'on':
L.CRIT(f"Visit https://api.sij.ai/MS365/login to obtain your Microsoft 365 authentication token.") L.CRIT(f"Visit https://api.sij.ai/MS365/login to obtain your Microsoft 365 authentication token.")

View file

@ -363,7 +363,7 @@ async def save_processed_uid(filename: Path, account_name: str, uid: str):
async def process_all_accounts(): async def process_all_accounts():
email_accounts = load_email_accounts(EMAIL_CONFIG) email_accounts = Email.get_email_accounts()
summarization_tasks = [asyncio.create_task(process_account_archival(account)) for account in email_accounts] summarization_tasks = [asyncio.create_task(process_account_archival(account)) for account in email_accounts]
autoresponding_tasks = [asyncio.create_task(process_account_autoresponding(account)) for account in email_accounts] autoresponding_tasks = [asyncio.create_task(process_account_autoresponding(account)) for account in email_accounts]
await asyncio.gather(*summarization_tasks, *autoresponding_tasks) await asyncio.gather(*summarization_tasks, *autoresponding_tasks)
@ -371,4 +371,4 @@ async def process_all_accounts():
@email.on_event("startup") @email.on_event("startup")
async def startup_event(): async def startup_event():
await asyncio.sleep(5) await asyncio.sleep(5)
asyncio.create_task(process_all_accounts()) asyncio.create_task(process_all_accounts())

View file

@ -26,7 +26,7 @@ import tempfile
import shutil import shutil
import html2text import html2text
import markdown import markdown
from sijapi import L, Dir, API, LLM, TTS from sijapi import L, Dir, API, LLM, TTS, Obsidian
from sijapi.utilities import convert_to_unix_time, sanitize_filename, ocr_pdf, clean_text, should_use_ocr, extract_text_from_pdf, extract_text_from_docx, read_text_file, str_to_bool, get_extension from sijapi.utilities import convert_to_unix_time, sanitize_filename, ocr_pdf, clean_text, should_use_ocr, extract_text_from_pdf, extract_text_from_docx, read_text_file, str_to_bool, get_extension
from sijapi.routers import tts from sijapi.routers import tts
from sijapi.routers.asr import transcribe_audio from sijapi.routers.asr import transcribe_audio
@ -49,7 +49,7 @@ def read_markdown_files(folder: Path):
return documents, file_paths return documents, file_paths
# Read markdown files and generate embeddings # Read markdown files and generate embeddings
documents, file_paths = read_markdown_files(DOC_DIR) documents, file_paths = read_markdown_files(Obsidian.docs)
for i, doc in enumerate(documents): for i, doc in enumerate(documents):
response = ollama.embeddings(model="mxbai-embed-large", prompt=doc) response = ollama.embeddings(model="mxbai-embed-large", prompt=doc)
embedding = response["embedding"] embedding = response["embedding"]
@ -83,7 +83,7 @@ async def generate_response(prompt: str):
return {"response": output['response']} return {"response": output['response']}
async def query_ollama(usr: str, sys: str = LLM_SYS_MSG, model: str = DEFAULT_LLM, max_tokens: int = 200): async def query_ollama(usr: str, sys: str = LLM.chat.sys, model: str = LLM.chat.model, max_tokens: int = LLM.chat.max_tokens):
messages = [{"role": "system", "content": sys}, messages = [{"role": "system", "content": sys},
{"role": "user", "content": usr}] {"role": "user", "content": usr}]
LLM = Ollama() LLM = Ollama()
@ -100,8 +100,8 @@ async def query_ollama(usr: str, sys: str = LLM_SYS_MSG, model: str = DEFAULT_LL
async def query_ollama_multishot( async def query_ollama_multishot(
message_list: List[str], message_list: List[str],
sys: str = LLM_SYS_MSG, sys: str = LLM.chat.sys,
model: str = DEFAULT_LLM, model: str = LLM.chat.model,
max_tokens: int = 200 max_tokens: int = 200
): ):
if len(message_list) % 2 == 0: if len(message_list) % 2 == 0:
@ -130,7 +130,7 @@ async def chat_completions(request: Request):
body = await request.json() body = await request.json()
timestamp = dt_datetime.now().strftime("%Y%m%d_%H%M%S%f") timestamp = dt_datetime.now().strftime("%Y%m%d_%H%M%S%f")
filename = REQUESTS_DIR / f"request_{timestamp}.json" filename = Dir.logs.requests / f"request_{timestamp}.json"
async with aiofiles.open(filename, mode='w') as file: async with aiofiles.open(filename, mode='w') as file:
await file.write(json.dumps(body, indent=4)) await file.write(json.dumps(body, indent=4))
@ -227,9 +227,9 @@ async def stream_messages_with_vision(message: dict, model: str, num_predict: in
def get_appropriate_model(requested_model): def get_appropriate_model(requested_model):
if requested_model == "gpt-4-vision-preview": if requested_model == "gpt-4-vision-preview":
return DEFAULT_VISION return LLM.vision.model
elif not is_model_available(requested_model): elif not is_model_available(requested_model):
return DEFAULT_LLM return LLM.chat.model
else: else:
return requested_model return requested_model
@ -310,7 +310,7 @@ async def chat_completions_options(request: Request):
], ],
"created": int(time.time()), "created": int(time.time()),
"id": str(uuid.uuid4()), "id": str(uuid.uuid4()),
"model": DEFAULT_LLM, "model": LLM.chat.model,
"object": "chat.completion.chunk", "object": "chat.completion.chunk",
}, },
status_code=200, status_code=200,
@ -431,7 +431,7 @@ def llava(image_base64, prompt):
return "" if "pass" in response["response"].lower() else response["response"] return "" if "pass" in response["response"].lower() else response["response"]
def gpt4v(image_base64, prompt_sys: str, prompt_usr: str, max_tokens: int = 150): def gpt4v(image_base64, prompt_sys: str, prompt_usr: str, max_tokens: int = 150):
VISION_LLM = OpenAI(api_key=OPENAI_API_KEY) VISION_LLM = OpenAI(api_key=LLM.OPENAI_API_KEY)
response_1 = VISION_LLM.chat.completions.create( response_1 = VISION_LLM.chat.completions.create(
model="gpt-4-vision-preview", model="gpt-4-vision-preview",
messages=[ messages=[
@ -512,12 +512,12 @@ def gpt4v(image_base64, prompt_sys: str, prompt_usr: str, max_tokens: int = 150)
@llm.get("/summarize") @llm.get("/summarize")
async def summarize_get(text: str = Form(None), instruction: str = Form(SUMMARY_INSTRUCT)): async def summarize_get(text: str = Form(None), instruction: str = Form(LLM.summary.instruct)):
summarized_text = await summarize_text(text, instruction) summarized_text = await summarize_text(text, instruction)
return summarized_text return summarized_text
@llm.post("/summarize") @llm.post("/summarize")
async def summarize_post(file: Optional[UploadFile] = File(None), text: Optional[str] = Form(None), instruction: str = Form(SUMMARY_INSTRUCT)): async def summarize_post(file: Optional[UploadFile] = File(None), text: Optional[str] = Form(None), instruction: str = Form(LLM.summary.instruct)):
text_content = text if text else await extract_text(file) text_content = text if text else await extract_text(file)
summarized_text = await summarize_text(text_content, instruction) summarized_text = await summarize_text(text_content, instruction)
return summarized_text return summarized_text
@ -526,10 +526,10 @@ async def summarize_post(file: Optional[UploadFile] = File(None), text: Optional
@llm.post("/speaksummary") @llm.post("/speaksummary")
async def summarize_tts_endpoint( async def summarize_tts_endpoint(
bg_tasks: BackgroundTasks, bg_tasks: BackgroundTasks,
instruction: str = Form(SUMMARY_INSTRUCT), instruction: str = Form(LLM.summary.instruct),
file: Optional[UploadFile] = File(None), file: Optional[UploadFile] = File(None),
text: Optional[str] = Form(None), text: Optional[str] = Form(None),
voice: Optional[str] = Form(DEFAULT_VOICE), voice: Optional[str] = Form(TTS.xtts.voice),
speed: Optional[float] = Form(1.2), speed: Optional[float] = Form(1.2),
podcast: Union[bool, str] = Form(False) podcast: Union[bool, str] = Form(False)
): ):
@ -572,8 +572,8 @@ async def summarize_tts_endpoint(
async def summarize_tts( async def summarize_tts(
text: str, text: str,
instruction: str = SUMMARY_INSTRUCT, instruction: str = LLM.summary.instruct,
voice: Optional[str] = DEFAULT_VOICE, voice: Optional[str] = TTS.xtts.voice,
speed: float = 1.1, speed: float = 1.1,
podcast: bool = False, podcast: bool = False,
LLM: Ollama = None LLM: Ollama = None
@ -605,9 +605,9 @@ def split_text_into_chunks(text: str) -> List[str]:
sentences = re.split(r'(?<=[.!?])\s+', text) sentences = re.split(r'(?<=[.!?])\s+', text)
words = text.split() words = text.split()
total_words = len(words) total_words = len(words)
L.DEBUG(f"Total words: {total_words}. SUMMARY_CHUNK_SIZE: {SUMMARY_CHUNK_SIZE}. SUMMARY_TPW: {SUMMARY_TPW}.") L.DEBUG(f"Total words: {total_words}. LLM.summary.chunk_size: {LLM.summary.chunk_size}. LLM.tpw: {LLM.tpw}.")
max_words_per_chunk = int(SUMMARY_CHUNK_SIZE / SUMMARY_TPW) max_words_per_chunk = int(LLM.summary.chunk_size / LLM.tpw)
L.DEBUG(f"Maximum words per chunk: {max_words_per_chunk}") L.DEBUG(f"Maximum words per chunk: {max_words_per_chunk}")
chunks = [] chunks = []
@ -633,8 +633,8 @@ def split_text_into_chunks(text: str) -> List[str]:
def calculate_max_tokens(text: str) -> int: def calculate_max_tokens(text: str) -> int:
tokens_count = max(1, int(len(text.split()) * SUMMARY_TPW)) # Ensure at least 1 tokens_count = max(1, int(len(text.split()) * LLM.tpw)) # Ensure at least 1
return min(tokens_count // 4, SUMMARY_CHUNK_SIZE) return min(tokens_count // 4, LLM.summary.chunk_size)
@ -694,7 +694,7 @@ async def extract_text(file: Union[UploadFile, bytes, bytearray, str, Path], bg_
raise ValueError(f"Error extracting text: {str(e)}") raise ValueError(f"Error extracting text: {str(e)}")
async def summarize_text(text: str, instruction: str = SUMMARY_INSTRUCT, length_override: int = None, length_quotient: float = SUMMARY_LENGTH_RATIO, LLM: Ollama = None): async def summarize_text(text: str, instruction: str = LLM.summary.instruct, length_override: int = None, length_quotient: float = LLM.summary.length_ratio, LLM: Ollama = None):
LLM = LLM if LLM else Ollama() LLM = LLM if LLM else Ollama()
chunked_text = split_text_into_chunks(text) chunked_text = split_text_into_chunks(text)
@ -703,12 +703,12 @@ async def summarize_text(text: str, instruction: str = SUMMARY_INSTRUCT, length_
total_words_count = sum(len(chunk.split()) for chunk in chunked_text) total_words_count = sum(len(chunk.split()) for chunk in chunked_text)
L.DEBUG(f"Total words count: {total_words_count}") L.DEBUG(f"Total words count: {total_words_count}")
total_tokens_count = max(1, int(total_words_count * SUMMARY_TPW)) total_tokens_count = max(1, int(total_words_count * LLM.tpw))
L.DEBUG(f"Total tokens count: {total_tokens_count}") L.DEBUG(f"Total tokens count: {total_tokens_count}")
total_summary_length = length_override if length_override else total_tokens_count // length_quotient total_summary_length = length_override if length_override else total_tokens_count // length_quotient
L.DEBUG(f"Total summary length: {total_summary_length}") L.DEBUG(f"Total summary length: {total_summary_length}")
corrected_total_summary_length = min(total_summary_length, SUMMARY_TOKEN_LIMIT) corrected_total_summary_length = min(total_summary_length, LLM.summary.max_tokens)
L.DEBUG(f"Corrected total summary length: {corrected_total_summary_length}") L.DEBUG(f"Corrected total summary length: {corrected_total_summary_length}")
summaries = await asyncio.gather(*[ summaries = await asyncio.gather(*[
@ -738,11 +738,11 @@ async def process_chunk(instruction: str, text: str, part: int, total_parts: int
LLM = LLM if LLM else Ollama() LLM = LLM if LLM else Ollama()
words_count = len(text.split()) words_count = len(text.split())
tokens_count = max(1, int(words_count * SUMMARY_TPW)) tokens_count = max(1, int(words_count * LLM.tpw))
summary_length_ratio = length_ratio if length_ratio else SUMMARY_LENGTH_RATIO summary_length_ratio = length_ratio if length_ratio else LLM.summary.length_ratio
max_tokens = min(tokens_count // summary_length_ratio, SUMMARY_CHUNK_SIZE) max_tokens = min(tokens_count // summary_length_ratio, LLM.summary.chunk_size)
max_tokens = max(max_tokens, SUMMARY_MIN_LENGTH) max_tokens = max(max_tokens, LLM.summary.min_length)
L.DEBUG(f"Processing part {part} of {total_parts}: Words: {words_count}, Estimated tokens: {tokens_count}, Max output tokens: {max_tokens}") L.DEBUG(f"Processing part {part} of {total_parts}: Words: {words_count}, Estimated tokens: {tokens_count}, Max output tokens: {max_tokens}")
@ -753,7 +753,7 @@ async def process_chunk(instruction: str, text: str, part: int, total_parts: int
L.DEBUG(f"Starting LLM.generate for part {part} of {total_parts}") L.DEBUG(f"Starting LLM.generate for part {part} of {total_parts}")
response = await LLM.generate( response = await LLM.generate(
model=SUMMARY_MODEL, model=LLM.summary.model,
prompt=prompt, prompt=prompt,
stream=False, stream=False,
options={'num_predict': max_tokens, 'temperature': 0.5} options={'num_predict': max_tokens, 'temperature': 0.5}

View file

@ -12,7 +12,7 @@ import asyncio
from pydantic import BaseModel from pydantic import BaseModel
from typing import Optional, Union, List from typing import Optional, Union, List
from pydub import AudioSegment from pydub import AudioSegment
from TTS.api import TTS from TTS.api import TTS as XTTSv2
from pathlib import Path from pathlib import Path
from datetime import datetime as dt_datetime from datetime import datetime as dt_datetime
from time import time from time import time
@ -25,7 +25,7 @@ import tempfile
import random import random
import re import re
import os import os
from sijapi import L, DEFAULT_VOICE, TTS_SEGMENTS_DIR, VOICE_DIR, PODCAST_DIR, TTS_OUTPUT_DIR, ELEVENLABS_API_KEY from sijapi import L, Dir, API, TTS
from sijapi.utilities import sanitize_filename from sijapi.utilities import sanitize_filename
@ -39,14 +39,14 @@ MODEL_NAME = "tts_models/multilingual/multi-dataset/xtts_v2"
@tts.get("/tts/local_voices", response_model=List[str]) @tts.get("/tts/local_voices", response_model=List[str])
async def list_wav_files(): async def list_wav_files():
wav_files = [file.split('.')[0] for file in os.listdir(VOICE_DIR) if file.endswith(".wav")] wav_files = [file.split('.')[0] for file in os.listdir(Dir.data.tts.voices) if file.endswith(".wav")]
return wav_files return wav_files
@tts.get("/tts/elevenlabs_voices") @tts.get("/tts/elevenlabs_voices")
async def list_11l_voices(): async def list_11l_voices():
formatted_list = "" formatted_list = ""
url = "https://api.elevenlabs.io/v1/voices" url = "https://api.elevenlabs.io/v1/voices"
headers = {"xi-api-key": ELEVENLABS_API_KEY} headers = {"xi-api-key": TTS.elevenlabs.api_key}
async with httpx.AsyncClient() as client: async with httpx.AsyncClient() as client:
try: try:
response = await client.get(url, headers=headers) response = await client.get(url, headers=headers)
@ -71,10 +71,10 @@ async def select_voice(voice_name: str) -> str:
# Case Insensitive comparison # Case Insensitive comparison
voice_name_lower = voice_name.lower() voice_name_lower = voice_name.lower()
L.DEBUG(f"Looking for {voice_name_lower}") L.DEBUG(f"Looking for {voice_name_lower}")
for item in VOICE_DIR.iterdir(): for item in Dir.data.tts.voices.iterdir():
L.DEBUG(f"Checking {item.name.lower()}") L.DEBUG(f"Checking {item.name.lower()}")
if item.name.lower() == f"{voice_name_lower}.wav": if item.name.lower() == f"{voice_name_lower}.wav":
L.DEBUG(f"select_voice received query to use voice: {voice_name}. Found {item} inside {VOICE_DIR}.") L.DEBUG(f"select_voice received query to use voice: {voice_name}. Found {item} inside {Dir.data.tts.voices}.")
return str(item) return str(item)
L.ERR(f"Voice file not found") L.ERR(f"Voice file not found")
@ -131,7 +131,7 @@ async def generate_speech(
title: str = None, title: str = None,
output_dir = None output_dir = None
) -> str: ) -> str:
output_dir = Path(output_dir) if output_dir else TTS_OUTPUT_DIR output_dir = Path(output_dir) if output_dir else TTS.data.tts.outputs
if not output_dir.exists(): if not output_dir.exists():
output_dir.mkdir(parents=True) output_dir.mkdir(parents=True)
@ -149,7 +149,7 @@ async def generate_speech(
# raise HTTPException(status_code=400, detail="Invalid model specified") # raise HTTPException(status_code=400, detail="Invalid model specified")
if podcast == True: if podcast == True:
podcast_path = Path(PODCAST_DIR) / audio_file_path.name podcast_path = TTS.podcast_dir / audio_file_path.name
L.DEBUG(f"Podcast path: {podcast_path}") L.DEBUG(f"Podcast path: {podcast_path}")
shutil.copy(str(audio_file_path), str(podcast_path)) shutil.copy(str(audio_file_path), str(podcast_path))
bg_tasks.add_task(os.remove, str(audio_file_path)) bg_tasks.add_task(os.remove, str(audio_file_path))
@ -196,7 +196,7 @@ async def determine_voice_id(voice_name: str) -> str:
L.DEBUG(f"Requested voice not among the hardcoded options.. checking with 11L next.") L.DEBUG(f"Requested voice not among the hardcoded options.. checking with 11L next.")
url = "https://api.elevenlabs.io/v1/voices" url = "https://api.elevenlabs.io/v1/voices"
headers = {"xi-api-key": ELEVENLABS_API_KEY} headers = {"xi-api-key": TTS.elevenlabs.api_key}
async with httpx.AsyncClient() as client: async with httpx.AsyncClient() as client:
try: try:
response = await client.get(url, headers=headers) response = await client.get(url, headers=headers)
@ -222,10 +222,10 @@ async def elevenlabs_tts(model: str, input_text: str, voice: str, title: str = N
"text": input_text, "text": input_text,
"model_id": model "model_id": model
} }
headers = {"Content-Type": "application/json", "xi-api-key": ELEVENLABS_API_KEY} headers = {"Content-Type": "application/json", "xi-api-key": TTS.elevenlabs.api_key}
async with httpx.AsyncClient(timeout=httpx.Timeout(300.0)) as client: # 5 minutes timeout async with httpx.AsyncClient(timeout=httpx.Timeout(300.0)) as client: # 5 minutes timeout
response = await client.post(url, json=payload, headers=headers) response = await client.post(url, json=payload, headers=headers)
output_dir = output_dir if output_dir else TTS_OUTPUT_DIR output_dir = output_dir if output_dir else TTS.podcast_dir
title = title if title else dt_datetime.now().strftime("%Y%m%d%H%M%S") title = title if title else dt_datetime.now().strftime("%Y%m%d%H%M%S")
filename = f"{sanitize_filename(title)}.mp3" filename = f"{sanitize_filename(title)}.mp3"
file_path = Path(output_dir) / filename file_path = Path(output_dir) / filename
@ -236,9 +236,6 @@ async def elevenlabs_tts(model: str, input_text: str, voice: str, title: str = N
else: else:
raise HTTPException(status_code=response.status_code, detail="Error from ElevenLabs API") raise HTTPException(status_code=response.status_code, detail="Error from ElevenLabs API")
async def get_text_content(text: Optional[str], file: Optional[UploadFile]) -> str: async def get_text_content(text: Optional[str], file: Optional[UploadFile]) -> str:
if file: if file:
return (await file.read()).decode("utf-8").strip() return (await file.read()).decode("utf-8").strip()
@ -247,20 +244,17 @@ async def get_text_content(text: Optional[str], file: Optional[UploadFile]) -> s
else: else:
raise HTTPException(status_code=400, detail="No text provided") raise HTTPException(status_code=400, detail="No text provided")
async def get_voice_file_path(voice: str = None, voice_file: UploadFile = None) -> str: async def get_voice_file_path(voice: str = None, voice_file: UploadFile = None) -> str:
if voice: if voice:
L.DEBUG(f"Looking for voice: {voice}") L.DEBUG(f"Looking for voice: {voice}")
selected_voice = await select_voice(voice) selected_voice = await select_voice(voice)
return selected_voice return selected_voice
elif voice_file and isinstance(voice_file, UploadFile): elif voice_file and isinstance(voice_file, UploadFile):
VOICE_DIR.mkdir(exist_ok=True) Dir.data.tts.voices.mkdir(exist_ok=True)
content = await voice_file.read() content = await voice_file.read()
checksum = hashlib.md5(content).hexdigest() checksum = hashlib.md5(content).hexdigest()
existing_file = VOICE_DIR / voice_file.filename existing_file = Dir.data.tts.voices / voice_file.filename
if existing_file.is_file(): if existing_file.is_file():
with open(existing_file, 'rb') as f: with open(existing_file, 'rb') as f:
existing_checksum = hashlib.md5(f.read()).hexdigest() existing_checksum = hashlib.md5(f.read()).hexdigest()
@ -272,7 +266,7 @@ async def get_voice_file_path(voice: str = None, voice_file: UploadFile = None)
counter = 1 counter = 1
new_file = existing_file new_file = existing_file
while new_file.is_file(): while new_file.is_file():
new_file = VOICE_DIR / f"{base_name}{counter:02}.wav" new_file = Dir.data.tts.voices / f"{base_name}{counter:02}.wav"
counter += 1 counter += 1
with open(new_file, 'wb') as f: with open(new_file, 'wb') as f:
@ -280,8 +274,8 @@ async def get_voice_file_path(voice: str = None, voice_file: UploadFile = None)
return str(new_file) return str(new_file)
else: else:
L.DEBUG(f"{dt_datetime.now().strftime('%Y%m%d%H%M%S')}: No voice specified or file provided, using default voice: {DEFAULT_VOICE}") L.DEBUG(f"{dt_datetime.now().strftime('%Y%m%d%H%M%S')}: No voice specified or file provided, using default voice: {TTS.xtts.voice}")
selected_voice = await select_voice(DEFAULT_VOICE) selected_voice = await select_voice(TTS.xtts.voice)
return selected_voice return selected_voice
@ -302,7 +296,7 @@ async def local_tts(
datetime_str = dt_datetime.now().strftime("%Y%m%d%H%M%S") datetime_str = dt_datetime.now().strftime("%Y%m%d%H%M%S")
title = sanitize_filename(title) if title else "Audio" title = sanitize_filename(title) if title else "Audio"
filename = f"{datetime_str}_{title}.wav" filename = f"{datetime_str}_{title}.wav"
file_path = TTS_OUTPUT_DIR / filename file_path = Dir.data.tts.outputs / filename
# Ensure the parent directory exists # Ensure the parent directory exists
file_path.parent.mkdir(parents=True, exist_ok=True) file_path.parent.mkdir(parents=True, exist_ok=True)
@ -310,14 +304,14 @@ async def local_tts(
voice_file_path = await get_voice_file_path(voice, voice_file) voice_file_path = await get_voice_file_path(voice, voice_file)
# Initialize TTS model in a separate thread # Initialize TTS model in a separate thread
XTTS = await asyncio.to_thread(TTS, model_name=MODEL_NAME) XTTS = await asyncio.to_thread(XTTSv2, model_name=MODEL_NAME)
await asyncio.to_thread(XTTS.to, DEVICE) await asyncio.to_thread(XTTS.to, DEVICE)
segments = split_text(text_content) segments = split_text(text_content)
combined_audio = AudioSegment.silent(duration=0) combined_audio = AudioSegment.silent(duration=0)
for i, segment in enumerate(segments): for i, segment in enumerate(segments):
segment_file_path = TTS_SEGMENTS_DIR / f"segment_{i}.wav" segment_file_path = Dir.data.tts.segments / f"segment_{i}.wav"
L.DEBUG(f"Segment file path: {segment_file_path}") L.DEBUG(f"Segment file path: {segment_file_path}")
# Run TTS in a separate thread # Run TTS in a separate thread
@ -340,7 +334,7 @@ async def local_tts(
# Export the combined audio in a separate thread # Export the combined audio in a separate thread
if podcast: if podcast:
podcast_file_path = Path(PODCAST_DIR) / file_path.name podcast_file_path = Path(TTS.podcast_dir) / file_path.name
await asyncio.to_thread(combined_audio.export, podcast_file_path, format="wav") await asyncio.to_thread(combined_audio.export, podcast_file_path, format="wav")
await asyncio.to_thread(combined_audio.export, file_path, format="wav") await asyncio.to_thread(combined_audio.export, file_path, format="wav")
@ -368,7 +362,7 @@ async def stream_tts(text_content: str, speed: float, voice: str, voice_file) ->
async def generate_tts(text: str, speed: float, voice_file_path: str) -> str: async def generate_tts(text: str, speed: float, voice_file_path: str) -> str:
output_dir = tempfile.mktemp(suffix=".wav", dir=tempfile.gettempdir()) output_dir = tempfile.mktemp(suffix=".wav", dir=tempfile.gettempdir())
XTTS = TTS(model_name=MODEL_NAME).to(DEVICE) XTTS = XTTSv2(model_name=MODEL_NAME).to(DEVICE)
XTTS.tts_to_file(text=text, speed=speed, file_path=output_dir, speaker_wav=[voice_file_path], language="en") XTTS.tts_to_file(text=text, speed=speed, file_path=output_dir, speaker_wav=[voice_file_path], language="en")
return output_dir return output_dir
@ -381,7 +375,7 @@ async def get_audio_stream(model: str, input_text: str, voice: str):
"text": input_text, "text": input_text,
"model_id": "eleven_turbo_v2" "model_id": "eleven_turbo_v2"
} }
headers = {"Content-Type": "application/json", "xi-api-key": ELEVENLABS_API_KEY} headers = {"Content-Type": "application/json", "xi-api-key": TTS.elevenlabs.api_key}
response = requests.post(url, json=payload, headers=headers) response = requests.post(url, json=payload, headers=headers)
if response.status_code == 200: if response.status_code == 200:
@ -434,7 +428,7 @@ def copy_to_podcast_dir(file_path):
file_name = Path(file_path).name file_name = Path(file_path).name
# Construct the destination path in the PODCAST_DIR # Construct the destination path in the PODCAST_DIR
destination_path = Path(PODCAST_DIR) / file_name destination_path = TTS.podcast_dir / file_name
# Copy the file to the PODCAST_DIR # Copy the file to the PODCAST_DIR
shutil.copy(file_path, destination_path) shutil.copy(file_path, destination_path)