Auto-update: Sat Jun 29 16:58:00 PDT 2024
This commit is contained in:
parent
88612ab20a
commit
565a576c48
8 changed files with 252 additions and 142 deletions
|
@ -93,13 +93,13 @@ DEFAULT_VOICE = os.getenv("DEFAULT_VOICE", "Luna")
|
||||||
DEFAULT_11L_VOICE = os.getenv("DEFAULT_11L_VOICE", "Victoria")
|
DEFAULT_11L_VOICE = os.getenv("DEFAULT_11L_VOICE", "Victoria")
|
||||||
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
|
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
|
||||||
### Summarization
|
### Summarization
|
||||||
SUMMARY_CHUNK_SIZE = int(os.getenv("SUMMARY_CHUNK_SIZE", 4000)) # measured in tokens
|
SUMMARY_CHUNK_SIZE = int(os.getenv("SUMMARY_CHUNK_SIZE", 16384)) # measured in tokens
|
||||||
SUMMARY_CHUNK_OVERLAP = int(os.getenv("SUMMARY_CHUNK_OVERLAP", 100)) # measured in tokens
|
SUMMARY_CHUNK_OVERLAP = int(os.getenv("SUMMARY_CHUNK_OVERLAP", 256)) # measured in tokens
|
||||||
SUMMARY_TPW = float(os.getenv("SUMMARY_TPW", 1.3)) # measured in tokens
|
SUMMARY_TPW = float(os.getenv("SUMMARY_TPW", 1.3)) # measured in tokens
|
||||||
SUMMARY_LENGTH_RATIO = int(os.getenv("SUMMARY_LENGTH_RATIO", 4)) # measured as original to length ratio
|
SUMMARY_LENGTH_RATIO = int(os.getenv("SUMMARY_LENGTH_RATIO", 4)) # measured as original to length ratio
|
||||||
SUMMARY_MIN_LENGTH = int(os.getenv("SUMMARY_MIN_LENGTH", 150)) # measured in tokens
|
SUMMARY_MIN_LENGTH = int(os.getenv("SUMMARY_MIN_LENGTH", 150)) # measured in tokens
|
||||||
SUMMARY_MODEL = os.getenv("SUMMARY_MODEL", "dolphin-llama3:8b-256k")
|
SUMMARY_MODEL = os.getenv("SUMMARY_MODEL", "dolphin-llama3:8b-256k")
|
||||||
SUMMARY_TOKEN_LIMIT = int(os.getenv("SUMMARY_TOKEN_LIMIT", 4096))
|
SUMMARY_TOKEN_LIMIT = int(os.getenv("SUMMARY_TOKEN_LIMIT", 16384))
|
||||||
SUMMARY_INSTRUCT = os.getenv('SUMMARY_INSTRUCT', "You are an AI assistant that provides accurate summaries of text -- nothing more and nothing less. You must not include ANY extraneous text other than the sumary. Do not include comments apart from the summary, do not preface the summary, and do not provide any form of postscript. Do not add paragraph breaks. Do not add any kind of formatting. Your response should begin with, consist of, and end with an accurate plaintext summary.")
|
SUMMARY_INSTRUCT = os.getenv('SUMMARY_INSTRUCT', "You are an AI assistant that provides accurate summaries of text -- nothing more and nothing less. You must not include ANY extraneous text other than the sumary. Do not include comments apart from the summary, do not preface the summary, and do not provide any form of postscript. Do not add paragraph breaks. Do not add any kind of formatting. Your response should begin with, consist of, and end with an accurate plaintext summary.")
|
||||||
SUMMARY_INSTRUCT_TTS = os.getenv('SUMMARY_INSTRUCT_TTS', "You are an AI assistant that provides email summaries for Sanjay. Your response will undergo Text-To-Speech conversion and added to Sanjay's private podcast. Providing adequate context (Sanjay did not send this question to you, he will only hear your response) but aiming for conciseness and precision, and bearing in mind the Text-To-Speech conversion (avoiding acronyms and formalities), summarize the following email.")
|
SUMMARY_INSTRUCT_TTS = os.getenv('SUMMARY_INSTRUCT_TTS', "You are an AI assistant that provides email summaries for Sanjay. Your response will undergo Text-To-Speech conversion and added to Sanjay's private podcast. Providing adequate context (Sanjay did not send this question to you, he will only hear your response) but aiming for conciseness and precision, and bearing in mind the Text-To-Speech conversion (avoiding acronyms and formalities), summarize the following email.")
|
||||||
|
|
||||||
|
|
|
@ -338,8 +338,14 @@ class Geocoder:
|
||||||
processed_locations = []
|
processed_locations = []
|
||||||
for loc in locations:
|
for loc in locations:
|
||||||
if isinstance(loc, tuple):
|
if isinstance(loc, tuple):
|
||||||
processed_locations.append(Location(latitude=loc[0], longitude=loc[1]))
|
processed_locations.append(Location(
|
||||||
|
latitude=loc[0],
|
||||||
|
longitude=loc[1],
|
||||||
|
datetime=datetime.now(timezone.utc)
|
||||||
|
))
|
||||||
elif isinstance(loc, Location):
|
elif isinstance(loc, Location):
|
||||||
|
if loc.datetime is None:
|
||||||
|
loc.datetime = datetime.now(timezone.utc)
|
||||||
processed_locations.append(loc)
|
processed_locations.append(loc)
|
||||||
else:
|
else:
|
||||||
raise ValueError(f"Unsupported location type: {type(loc)}")
|
raise ValueError(f"Unsupported location type: {type(loc)}")
|
||||||
|
@ -348,26 +354,39 @@ class Geocoder:
|
||||||
|
|
||||||
geocode_results = await asyncio.gather(*[self.location(lat, lon) for lat, lon in coordinates])
|
geocode_results = await asyncio.gather(*[self.location(lat, lon) for lat, lon in coordinates])
|
||||||
elevations = await asyncio.gather(*[self.elevation(lat, lon) for lat, lon in coordinates])
|
elevations = await asyncio.gather(*[self.elevation(lat, lon) for lat, lon in coordinates])
|
||||||
timezones = await asyncio.gather(*[self.timezone(lat, lon) for lat, lon in coordinates])
|
timezone_results = await asyncio.gather(*[self.timezone(lat, lon) for lat, lon in coordinates])
|
||||||
|
|
||||||
|
|
||||||
|
def create_display_name(override_name, result):
|
||||||
|
parts = []
|
||||||
|
if override_name:
|
||||||
|
parts.append(override_name)
|
||||||
|
if result.get('name') and result['name'] != override_name:
|
||||||
|
parts.append(result['name'])
|
||||||
|
if result.get('admin1'):
|
||||||
|
parts.append(result['admin1'])
|
||||||
|
if result.get('cc'):
|
||||||
|
parts.append(result['cc'])
|
||||||
|
return ', '.join(filter(None, parts))
|
||||||
|
|
||||||
geocoded_locations = []
|
geocoded_locations = []
|
||||||
for location, result, elevation, timezone in zip(processed_locations, geocode_results, elevations, timezones):
|
for location, result, elevation, tz_result in zip(processed_locations, geocode_results, elevations, timezone_results):
|
||||||
result = result[0] # Unpack the first result
|
result = result[0] # Unpack the first result
|
||||||
override_name = result.get('override_name')
|
override_name = result.get('override_name')
|
||||||
geocoded_location = Location(
|
geocoded_location = Location(
|
||||||
latitude=location.latitude,
|
latitude=location.latitude,
|
||||||
longitude=location.longitude,
|
longitude=location.longitude,
|
||||||
elevation=elevation,
|
elevation=elevation,
|
||||||
datetime=location.datetime or datetime.now(timezone.utc),
|
datetime=location.datetime,
|
||||||
zip=result.get("admin2"),
|
zip=result.get("admin2"),
|
||||||
city=result.get("name"),
|
city=result.get("name"),
|
||||||
state=result.get("admin1"),
|
state=result.get("admin1"),
|
||||||
country=result.get("cc"),
|
country=result.get("cc"),
|
||||||
context=location.context or {},
|
context=location.context or {},
|
||||||
name=override_name or result.get("name"),
|
name=override_name or result.get("name"),
|
||||||
display_name=f"{override_name or result.get('name')}, {result.get('admin1')}, {result.get('cc')}",
|
display_name=create_display_name(override_name, result),
|
||||||
country_code=result.get("cc"),
|
country_code=result.get("cc"),
|
||||||
timezone=timezone
|
timezone=tz_result
|
||||||
)
|
)
|
||||||
|
|
||||||
# Merge original location data with geocoded data
|
# Merge original location data with geocoded data
|
||||||
|
|
|
@ -326,13 +326,13 @@ SYSTEM_MSG=You are a helpful AI assistant.
|
||||||
DEFAULT_LLM=dolphin-mistral
|
DEFAULT_LLM=dolphin-mistral
|
||||||
DEFAULT_VISION=llava-llama3
|
DEFAULT_VISION=llava-llama3
|
||||||
OPENAI_API_KEY=¿SECRET? # <--- not presently implemented for anything
|
OPENAI_API_KEY=¿SECRET? # <--- not presently implemented for anything
|
||||||
SUMMARY_MODEL=dolphin-mistral
|
SUMMARY_MODEL='dolphin-llama3:8b-256k'
|
||||||
SUMMARY_CHUNK_SIZE=4000
|
SUMMARY_CHUNK_SIZE=16384
|
||||||
SUMMARY_CHUNK_OVERLAP=100
|
SUMMARY_CHUNK_OVERLAP=100
|
||||||
SUMMARY_TPW=1.3
|
SUMMARY_TPW=1.3
|
||||||
SUMMARY_LENGTH_RATIO=4
|
SUMMARY_LENGTH_RATIO=4
|
||||||
SUMMARY_MIN_LENGTH=150
|
SUMMARY_MIN_LENGTH=64
|
||||||
SUMMARY_TOKEN_LIMIT=4096
|
SUMMARY_TOKEN_LIMIT=16384
|
||||||
SUMMARY_INSTRUCT='You are an AI assistant that provides accurate summaries of text -- nothing more and nothing less. You must not include ANY extraneous text other than the sumary. Do not include comments apart from the summary, do not preface the summary, and do not provide any form of postscript. Do not add paragraph breaks. Do not add any kind of formatting. Your response should begin with, consist of, and end with an accurate plaintext summary.'
|
SUMMARY_INSTRUCT='You are an AI assistant that provides accurate summaries of text -- nothing more and nothing less. You must not include ANY extraneous text other than the sumary. Do not include comments apart from the summary, do not preface the summary, and do not provide any form of postscript. Do not add paragraph breaks. Do not add any kind of formatting. Your response should begin with, consist of, and end with an accurate plaintext summary.'
|
||||||
SUMMARY_INSTRUCT_TTS='You are an AI assistant that summarizes emails -- nothing more and nothing less. You must not include ANY extraneous text other than the sumary. Do not include comments apart from the summary, do not preface the summary, and do not provide any form of postscript. Do not add paragraph breaks. Do not add any kind of formatting. Your response should begin with, consist of, and end with an accurate plaintext summary. Your response will undergo Text-To-Speech conversion and added to Sanjays private podcast. Providing adequate context (Sanjay did not send this question to you, he will only hear your response) but aiming for conciseness and precision, and bearing in mind the Text-To-Speech conversion (avoiding acronyms and formalities), summarize the following.'
|
SUMMARY_INSTRUCT_TTS='You are an AI assistant that summarizes emails -- nothing more and nothing less. You must not include ANY extraneous text other than the sumary. Do not include comments apart from the summary, do not preface the summary, and do not provide any form of postscript. Do not add paragraph breaks. Do not add any kind of formatting. Your response should begin with, consist of, and end with an accurate plaintext summary. Your response will undergo Text-To-Speech conversion and added to Sanjays private podcast. Providing adequate context (Sanjay did not send this question to you, he will only hear your response) but aiming for conciseness and precision, and bearing in mind the Text-To-Speech conversion (avoiding acronyms and formalities), summarize the following.'
|
||||||
DEFAULT_VOICE=joanne
|
DEFAULT_VOICE=joanne
|
||||||
|
|
|
@ -201,7 +201,7 @@ async def summarize_single_email(this_email: IncomingEmail, podcast: bool = Fals
|
||||||
md_summary += f'title: {this_email.subject}\n'
|
md_summary += f'title: {this_email.subject}\n'
|
||||||
md_summary += f'{summary}\n'
|
md_summary += f'{summary}\n'
|
||||||
md_summary += f'```\n\n'
|
md_summary += f'```\n\n'
|
||||||
md_summary += f'![[{tts_path}]]\n' if tts_path.exists() else ''
|
md_summary += f'![[{tts_relative}]]\n'# if tts_path.exists() else ''
|
||||||
|
|
||||||
return md_summary
|
return md_summary
|
||||||
|
|
||||||
|
|
|
@ -9,6 +9,7 @@ from typing import List, Dict, Any, Union, Optional
|
||||||
from pydantic import BaseModel, root_validator, ValidationError
|
from pydantic import BaseModel, root_validator, ValidationError
|
||||||
import aiofiles
|
import aiofiles
|
||||||
import os
|
import os
|
||||||
|
import re
|
||||||
import glob
|
import glob
|
||||||
import chromadb
|
import chromadb
|
||||||
from openai import OpenAI
|
from openai import OpenAI
|
||||||
|
@ -26,7 +27,7 @@ import html2text
|
||||||
import markdown
|
import markdown
|
||||||
from sijapi import L, LLM_SYS_MSG, DEFAULT_LLM, DEFAULT_VISION, REQUESTS_DIR, OBSIDIAN_CHROMADB_COLLECTION, OBSIDIAN_VAULT_DIR, DOC_DIR, OPENAI_API_KEY, DEFAULT_VOICE, SUMMARY_INSTRUCT, SUMMARY_CHUNK_SIZE, SUMMARY_TPW, SUMMARY_CHUNK_OVERLAP, SUMMARY_LENGTH_RATIO, SUMMARY_TOKEN_LIMIT, SUMMARY_MIN_LENGTH, SUMMARY_MODEL
|
from sijapi import L, LLM_SYS_MSG, DEFAULT_LLM, DEFAULT_VISION, REQUESTS_DIR, OBSIDIAN_CHROMADB_COLLECTION, OBSIDIAN_VAULT_DIR, DOC_DIR, OPENAI_API_KEY, DEFAULT_VOICE, SUMMARY_INSTRUCT, SUMMARY_CHUNK_SIZE, SUMMARY_TPW, SUMMARY_CHUNK_OVERLAP, SUMMARY_LENGTH_RATIO, SUMMARY_TOKEN_LIMIT, SUMMARY_MIN_LENGTH, SUMMARY_MODEL
|
||||||
from sijapi.utilities import convert_to_unix_time, sanitize_filename, ocr_pdf, clean_text, should_use_ocr, extract_text_from_pdf, extract_text_from_docx, read_text_file, str_to_bool, get_extension
|
from sijapi.utilities import convert_to_unix_time, sanitize_filename, ocr_pdf, clean_text, should_use_ocr, extract_text_from_pdf, extract_text_from_docx, read_text_file, str_to_bool, get_extension
|
||||||
from sijapi.routers.tts import generate_speech
|
from sijapi.routers import tts
|
||||||
from sijapi.routers.asr import transcribe_audio
|
from sijapi.routers.asr import transcribe_audio
|
||||||
|
|
||||||
|
|
||||||
|
@ -520,13 +521,52 @@ async def summarize_post(file: Optional[UploadFile] = File(None), text: Optional
|
||||||
summarized_text = await summarize_text(text_content, instruction)
|
summarized_text = await summarize_text(text_content, instruction)
|
||||||
return summarized_text
|
return summarized_text
|
||||||
|
|
||||||
@llm.post("/speaksummary")
|
|
||||||
async def summarize_tts_endpoint(bg_tasks: BackgroundTasks, instruction: str = Form(SUMMARY_INSTRUCT), file: Optional[UploadFile] = File(None), text: Optional[str] = Form(None), voice: Optional[str] = Form(DEFAULT_VOICE), speed: Optional[float] = Form(1.2), podcast: Union[bool, str] = Form(False)):
|
|
||||||
|
|
||||||
podcast = str_to_bool(str(podcast)) # Proper boolean conversion
|
@llm.post("/speaksummary")
|
||||||
text_content = text if text else extract_text(file)
|
async def summarize_tts_endpoint(
|
||||||
|
bg_tasks: BackgroundTasks,
|
||||||
|
instruction: str = Form(SUMMARY_INSTRUCT),
|
||||||
|
file: Optional[UploadFile] = File(None),
|
||||||
|
text: Optional[str] = Form(None),
|
||||||
|
voice: Optional[str] = Form(DEFAULT_VOICE),
|
||||||
|
speed: Optional[float] = Form(1.2),
|
||||||
|
podcast: Union[bool, str] = Form(False)
|
||||||
|
):
|
||||||
|
try:
|
||||||
|
podcast = str_to_bool(str(podcast))
|
||||||
|
|
||||||
|
if text:
|
||||||
|
text_content = text
|
||||||
|
elif file:
|
||||||
|
# Handle the UploadFile here
|
||||||
|
content = await file.read()
|
||||||
|
file_extension = os.path.splitext(file.filename)[1]
|
||||||
|
temp_file_path = tempfile.mktemp(suffix=file_extension)
|
||||||
|
with open(temp_file_path, 'wb') as temp_file:
|
||||||
|
temp_file.write(content)
|
||||||
|
bg_tasks.add_task(os.remove, temp_file_path)
|
||||||
|
|
||||||
|
# Now pass the file path to extract_text
|
||||||
|
text_content = await extract_text(temp_file_path)
|
||||||
|
else:
|
||||||
|
raise ValueError("Either text or file must be provided")
|
||||||
|
|
||||||
final_output_path = await summarize_tts(text_content, instruction, voice, speed, podcast)
|
final_output_path = await summarize_tts(text_content, instruction, voice, speed, podcast)
|
||||||
return FileResponse(path=final_output_path, filename=os.path.basename(final_output_path), media_type='audio/wav')
|
|
||||||
|
return FileResponse(
|
||||||
|
path=final_output_path,
|
||||||
|
filename=os.path.basename(final_output_path),
|
||||||
|
media_type='audio/wav',
|
||||||
|
background=bg_tasks
|
||||||
|
)
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
L.ERR(f"Error in summarize_tts_endpoint: {str(e)}")
|
||||||
|
return JSONResponse(
|
||||||
|
status_code=400,
|
||||||
|
content={"error": str(e)}
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
async def summarize_tts(
|
async def summarize_tts(
|
||||||
|
@ -539,14 +579,15 @@ async def summarize_tts(
|
||||||
):
|
):
|
||||||
LLM = LLM if LLM else Ollama()
|
LLM = LLM if LLM else Ollama()
|
||||||
summarized_text = await summarize_text(text, instruction, LLM=LLM)
|
summarized_text = await summarize_text(text, instruction, LLM=LLM)
|
||||||
filename = await summarize_text(summarized_text, "Provide a title for this summary no longer than 4 words")
|
filename = await summarize_text(summarized_text, "Provide a title for this summary no longer than 4 words", length_override=10)
|
||||||
filename = sanitize_filename(filename)
|
filename = sanitize_filename(filename)
|
||||||
filename = ' '.join(filename.split()[:5])
|
filename = ' '.join(filename.split()[:5])
|
||||||
timestamp = dt_datetime.now().strftime("%Y%m%d_%H%M%S")
|
timestamp = dt_datetime.now().strftime("%Y%m%d_%H%M%S")
|
||||||
filename = f"{timestamp}{filename}.wav"
|
filename = f"{timestamp}{filename}.wav"
|
||||||
|
|
||||||
bg_tasks = BackgroundTasks()
|
bg_tasks = BackgroundTasks()
|
||||||
final_output_path = await generate_speech(bg_tasks, summarized_text, voice, "xtts", speed=speed, podcast=podcast, title=filename)
|
model = await tts.get_model(voice)
|
||||||
|
final_output_path = await tts.generate_speech(bg_tasks, summarized_text, voice, model=model, speed=speed, podcast=podcast, title=filename)
|
||||||
L.DEBUG(f"summary_tts completed with final_output_path: {final_output_path}")
|
L.DEBUG(f"summary_tts completed with final_output_path: {final_output_path}")
|
||||||
return final_output_path
|
return final_output_path
|
||||||
|
|
||||||
|
@ -557,18 +598,36 @@ async def get_title(text: str, LLM: Ollama() = None):
|
||||||
title = sanitize_filename(title)
|
title = sanitize_filename(title)
|
||||||
return title
|
return title
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
def split_text_into_chunks(text: str) -> List[str]:
|
def split_text_into_chunks(text: str) -> List[str]:
|
||||||
"""
|
sentences = re.split(r'(?<=[.!?])\s+', text)
|
||||||
Splits the given text into manageable chunks based on predefined size and overlap.
|
|
||||||
"""
|
|
||||||
words = text.split()
|
words = text.split()
|
||||||
adjusted_chunk_size = max(1, int(SUMMARY_CHUNK_SIZE / SUMMARY_TPW)) # Ensure at least 1
|
total_words = len(words)
|
||||||
adjusted_overlap = max(0, int(SUMMARY_CHUNK_OVERLAP / SUMMARY_TPW)) # Ensure non-negative
|
L.DEBUG(f"Total words: {total_words}. SUMMARY_CHUNK_SIZE: {SUMMARY_CHUNK_SIZE}. SUMMARY_TPW: {SUMMARY_TPW}.")
|
||||||
|
|
||||||
|
max_words_per_chunk = int(SUMMARY_CHUNK_SIZE / SUMMARY_TPW)
|
||||||
|
L.DEBUG(f"Maximum words per chunk: {max_words_per_chunk}")
|
||||||
|
|
||||||
chunks = []
|
chunks = []
|
||||||
for i in range(0, len(words), adjusted_chunk_size - adjusted_overlap):
|
current_chunk = []
|
||||||
L.DEBUG(f"We are on iteration # {i} if split_text_into_chunks.")
|
current_word_count = 0
|
||||||
chunk = ' '.join(words[i:i + adjusted_chunk_size])
|
|
||||||
chunks.append(chunk)
|
for sentence in sentences:
|
||||||
|
sentence_words = sentence.split()
|
||||||
|
if current_word_count + len(sentence_words) <= max_words_per_chunk:
|
||||||
|
current_chunk.append(sentence)
|
||||||
|
current_word_count += len(sentence_words)
|
||||||
|
else:
|
||||||
|
if current_chunk:
|
||||||
|
chunks.append(' '.join(current_chunk))
|
||||||
|
current_chunk = [sentence]
|
||||||
|
current_word_count = len(sentence_words)
|
||||||
|
|
||||||
|
if current_chunk:
|
||||||
|
chunks.append(' '.join(current_chunk))
|
||||||
|
|
||||||
|
L.DEBUG(f"Split text into {len(chunks)} chunks.")
|
||||||
return chunks
|
return chunks
|
||||||
|
|
||||||
|
|
||||||
|
@ -577,12 +636,19 @@ def calculate_max_tokens(text: str) -> int:
|
||||||
return min(tokens_count // 4, SUMMARY_CHUNK_SIZE)
|
return min(tokens_count // 4, SUMMARY_CHUNK_SIZE)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
async def extract_text(file: Union[UploadFile, bytes, bytearray, str, Path], bg_tasks: BackgroundTasks = None) -> str:
|
async def extract_text(file: Union[UploadFile, bytes, bytearray, str, Path], bg_tasks: BackgroundTasks = None) -> str:
|
||||||
|
L.INFO(f"Attempting to extract text from file: {file}")
|
||||||
|
|
||||||
|
try:
|
||||||
if isinstance(file, UploadFile):
|
if isinstance(file, UploadFile):
|
||||||
file_extension = get_extension(file)
|
L.INFO("File is an UploadFile object")
|
||||||
|
file_extension = os.path.splitext(file.filename)[1]
|
||||||
temp_file_path = tempfile.mktemp(suffix=file_extension)
|
temp_file_path = tempfile.mktemp(suffix=file_extension)
|
||||||
with open(temp_file_path, 'wb') as buffer:
|
with open(temp_file_path, 'wb') as buffer:
|
||||||
shutil.copyfileobj(file.file, buffer)
|
content = await file.read()
|
||||||
|
buffer.write(content)
|
||||||
file_path = temp_file_path
|
file_path = temp_file_path
|
||||||
elif isinstance(file, (bytes, bytearray)):
|
elif isinstance(file, (bytes, bytearray)):
|
||||||
temp_file_path = tempfile.mktemp()
|
temp_file_path = tempfile.mktemp()
|
||||||
|
@ -592,11 +658,11 @@ async def extract_text(file: Union[UploadFile, bytes, bytearray, str, Path], bg_
|
||||||
elif isinstance(file, (str, Path)):
|
elif isinstance(file, (str, Path)):
|
||||||
file_path = str(file)
|
file_path = str(file)
|
||||||
else:
|
else:
|
||||||
raise ValueError("Unsupported file type")
|
raise ValueError(f"Unsupported file type: {type(file)}")
|
||||||
|
|
||||||
_, file_ext = os.path.splitext(file_path)
|
_, file_ext = os.path.splitext(file_path)
|
||||||
file_ext = file_ext.lower()
|
file_ext = file_ext.lower()
|
||||||
text_content = ""
|
L.INFO(f"File extension: {file_ext}")
|
||||||
|
|
||||||
if file_ext == '.pdf':
|
if file_ext == '.pdf':
|
||||||
text_content = await extract_text_from_pdf(file_path)
|
text_content = await extract_text_from_pdf(file_path)
|
||||||
|
@ -612,6 +678,8 @@ async def extract_text(file: Union[UploadFile, bytes, bytearray, str, Path], bg_
|
||||||
text_content = await read_text_file(file_path)
|
text_content = await read_text_file(file_path)
|
||||||
elif file_ext == '.docx':
|
elif file_ext == '.docx':
|
||||||
text_content = await extract_text_from_docx(file_path)
|
text_content = await extract_text_from_docx(file_path)
|
||||||
|
else:
|
||||||
|
raise ValueError(f"Unsupported file extension: {file_ext}")
|
||||||
|
|
||||||
if bg_tasks and 'temp_file_path' in locals():
|
if bg_tasks and 'temp_file_path' in locals():
|
||||||
bg_tasks.add_task(os.remove, temp_file_path)
|
bg_tasks.add_task(os.remove, temp_file_path)
|
||||||
|
@ -620,49 +688,62 @@ async def extract_text(file: Union[UploadFile, bytes, bytearray, str, Path], bg_
|
||||||
|
|
||||||
return text_content
|
return text_content
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
L.ERR(f"Error extracting text: {str(e)}")
|
||||||
|
raise ValueError(f"Error extracting text: {str(e)}")
|
||||||
|
|
||||||
|
|
||||||
async def summarize_text(text: str, instruction: str = SUMMARY_INSTRUCT, length_override: int = None, length_quotient: float = SUMMARY_LENGTH_RATIO, LLM: Ollama = None):
|
async def summarize_text(text: str, instruction: str = SUMMARY_INSTRUCT, length_override: int = None, length_quotient: float = SUMMARY_LENGTH_RATIO, LLM: Ollama = None):
|
||||||
"""
|
|
||||||
Process the given text: split into chunks, summarize each chunk, and
|
|
||||||
potentially summarize the concatenated summary for long texts.
|
|
||||||
"""
|
|
||||||
LLM = LLM if LLM else Ollama()
|
LLM = LLM if LLM else Ollama()
|
||||||
|
|
||||||
chunked_text = split_text_into_chunks(text)
|
chunked_text = split_text_into_chunks(text)
|
||||||
total_parts = max(1, len(chunked_text)) # Ensure at least 1
|
total_parts = len(chunked_text)
|
||||||
|
L.DEBUG(f"Total parts: {total_parts}. Length of chunked text: {len(chunked_text)}")
|
||||||
|
|
||||||
|
total_words_count = sum(len(chunk.split()) for chunk in chunked_text)
|
||||||
|
L.DEBUG(f"Total words count: {total_words_count}")
|
||||||
|
total_tokens_count = max(1, int(total_words_count * SUMMARY_TPW))
|
||||||
|
L.DEBUG(f"Total tokens count: {total_tokens_count}")
|
||||||
|
|
||||||
total_words_count = len(text.split())
|
|
||||||
total_tokens_count = max(1, int(total_words_count * SUMMARY_TPW)) # Ensure at least 1
|
|
||||||
total_summary_length = length_override if length_override else total_tokens_count // length_quotient
|
total_summary_length = length_override if length_override else total_tokens_count // length_quotient
|
||||||
|
L.DEBUG(f"Total summary length: {total_summary_length}")
|
||||||
corrected_total_summary_length = min(total_summary_length, SUMMARY_TOKEN_LIMIT)
|
corrected_total_summary_length = min(total_summary_length, SUMMARY_TOKEN_LIMIT)
|
||||||
individual_summary_length = max(1, corrected_total_summary_length // total_parts) # Ensure at least 1
|
L.DEBUG(f"Corrected total summary length: {corrected_total_summary_length}")
|
||||||
|
|
||||||
L.DEBUG(f"Text split into {total_parts} chunks.")
|
|
||||||
summaries = await asyncio.gather(*[
|
summaries = await asyncio.gather(*[
|
||||||
process_chunk(instruction, chunk, i+1, total_parts, individual_summary_length, LLM) for i, chunk in enumerate(chunked_text)
|
process_chunk(instruction, chunk, i+1, total_parts, LLM=LLM)
|
||||||
|
for i, chunk in enumerate(chunked_text)
|
||||||
])
|
])
|
||||||
|
|
||||||
|
if total_parts > 1:
|
||||||
|
summaries = [f"\n\n\nPART {i+1} of {total_parts}:\n\n{summary}" for i, summary in enumerate(summaries)]
|
||||||
|
|
||||||
concatenated_summary = ' '.join(summaries)
|
concatenated_summary = ' '.join(summaries)
|
||||||
|
L.DEBUG(f"Concatenated summary: {concatenated_summary}")
|
||||||
|
L.DEBUG(f"Concatenated summary length: {len(concatenated_summary.split())}")
|
||||||
|
|
||||||
if total_parts > 1:
|
if total_parts > 1:
|
||||||
concatenated_summary = await process_chunk(instruction, concatenated_summary, 1, 1)
|
L.DEBUG(f"Processing the concatenated_summary to smooth the edges...")
|
||||||
|
concatenated_instruct = f"The following text consists of the concatenated {total_parts} summaries of {total_parts} parts of a single document that had to be split for processing. Reword it for clarity and flow as a single cohesive summary, understanding that it all relates to a single document, but that document likely consists of multiple parts potentially from multiple authors. Do not shorten it and do not omit content, simply smooth out the edges between the parts."
|
||||||
|
final_summary = await process_chunk(concatenated_instruct, concatenated_summary, 1, 1, length_ratio=1, LLM=LLM)
|
||||||
|
L.DEBUG(f"Final summary length: {len(final_summary.split())}")
|
||||||
|
return final_summary
|
||||||
|
else:
|
||||||
return concatenated_summary
|
return concatenated_summary
|
||||||
|
|
||||||
async def process_chunk(instruction: str, text: str, part: int, total_parts: int, max_tokens: Optional[int] = None, LLM: Ollama = None) -> str:
|
|
||||||
"""
|
|
||||||
Process a portion of text using the ollama library asynchronously.
|
|
||||||
"""
|
|
||||||
|
|
||||||
|
async def process_chunk(instruction: str, text: str, part: int, total_parts: int, length_ratio: float = None, LLM: Ollama = None) -> str:
|
||||||
|
# L.DEBUG(f"Processing chunk: {text}")
|
||||||
LLM = LLM if LLM else Ollama()
|
LLM = LLM if LLM else Ollama()
|
||||||
|
|
||||||
words_count = max(1, len(text.split())) # Ensure at least 1
|
words_count = len(text.split())
|
||||||
tokens_count = max(1, int(words_count * SUMMARY_TPW)) # Ensure at least 1
|
tokens_count = max(1, int(words_count * SUMMARY_TPW))
|
||||||
fraction_tokens = max(1, tokens_count // SUMMARY_LENGTH_RATIO) # Ensure at least 1
|
|
||||||
if max_tokens is None:
|
|
||||||
max_tokens = min(fraction_tokens, SUMMARY_CHUNK_SIZE // max(1, total_parts)) # Ensure at least 1
|
|
||||||
max_tokens = max(max_tokens, SUMMARY_MIN_LENGTH) # Ensure a minimum token count to avoid tiny processing chunks
|
|
||||||
|
|
||||||
L.DEBUG(f"Summarizing part {part} of {total_parts}: Max_tokens: {max_tokens}")
|
summary_length_ratio = length_ratio if length_ratio else SUMMARY_LENGTH_RATIO
|
||||||
|
max_tokens = min(tokens_count // summary_length_ratio, SUMMARY_CHUNK_SIZE)
|
||||||
|
max_tokens = max(max_tokens, SUMMARY_MIN_LENGTH)
|
||||||
|
|
||||||
|
L.DEBUG(f"Processing part {part} of {total_parts}: Words: {words_count}, Estimated tokens: {tokens_count}, Max output tokens: {max_tokens}")
|
||||||
|
|
||||||
if part and total_parts > 1:
|
if part and total_parts > 1:
|
||||||
prompt = f"{instruction}. Part {part} of {total_parts}:\n{text}"
|
prompt = f"{instruction}. Part {part} of {total_parts}:\n{text}"
|
||||||
|
@ -674,12 +755,12 @@ async def process_chunk(instruction: str, text: str, part: int, total_parts: int
|
||||||
model=SUMMARY_MODEL,
|
model=SUMMARY_MODEL,
|
||||||
prompt=prompt,
|
prompt=prompt,
|
||||||
stream=False,
|
stream=False,
|
||||||
options={'num_predict': max_tokens, 'temperature': 0.6}
|
options={'num_predict': max_tokens, 'temperature': 0.5}
|
||||||
)
|
)
|
||||||
|
|
||||||
text_response = response['response']
|
text_response = response['response']
|
||||||
L.DEBUG(f"Completed LLM.generate for part {part} of {total_parts}")
|
L.DEBUG(f"Completed LLM.generate for part {part} of {total_parts}")
|
||||||
|
L.DEBUG(f"Result: {text_response}")
|
||||||
return text_response
|
return text_response
|
||||||
|
|
||||||
async def title_and_summary(extracted_text: str):
|
async def title_and_summary(extracted_text: str):
|
||||||
|
|
|
@ -273,6 +273,9 @@ async def generate_map(start_date: datetime, end_date: datetime):
|
||||||
return html_content
|
return html_content
|
||||||
|
|
||||||
async def post_location(location: Location):
|
async def post_location(location: Location):
|
||||||
|
if not location.datetime:
|
||||||
|
L.DEBUG(f"location appears to be missing datetime: {location}")
|
||||||
|
else:
|
||||||
L.DEBUG(f"post_location called with {location.datetime}")
|
L.DEBUG(f"post_location called with {location.datetime}")
|
||||||
|
|
||||||
async with DB.get_connection() as conn:
|
async with DB.get_connection() as conn:
|
||||||
|
@ -343,37 +346,43 @@ async def post_locate_endpoint(locations: Union[Location, List[Location]]):
|
||||||
locations = [locations]
|
locations = [locations]
|
||||||
|
|
||||||
# Prepare locations
|
# Prepare locations
|
||||||
for location in locations:
|
for lcn in locations:
|
||||||
if not location.datetime:
|
if not lcn.datetime:
|
||||||
tz = GEO.tz_current(location.latitude, location.longitude)
|
tz = await GEO.tz_at(lcn.latitude, lcn.longitude)
|
||||||
location.datetime = datetime.now(tz).isoformat()
|
lcn.datetime = datetime.now(ZoneInfo(tz)).isoformat()
|
||||||
|
|
||||||
if not location.context:
|
if not lcn.context:
|
||||||
location.context = {
|
lcn.context = {
|
||||||
"action": "manual",
|
"action": "missing",
|
||||||
"device_type": "Pythonista",
|
"device_type": "API",
|
||||||
"device_model": "Unknown",
|
"device_model": "Unknown",
|
||||||
"device_name": "Unknown",
|
"device_name": "Unknown",
|
||||||
"device_os": "Unknown"
|
"device_os": "Unknown"
|
||||||
}
|
}
|
||||||
L.DEBUG(f"Location received for processing: {location}")
|
L.DEBUG(f"Location received for processing: {lcn}")
|
||||||
|
|
||||||
geocoded_locations = await GEO.code(locations)
|
geocoded_locations = await GEO.code(locations)
|
||||||
|
|
||||||
responses = []
|
responses = []
|
||||||
|
if isinstance(geocoded_locations, List):
|
||||||
for location in geocoded_locations:
|
for location in geocoded_locations:
|
||||||
L.DEBUG(f"Final location submitted to database: {location}")
|
L.DEBUG(f"Final location to be submitted to database: {location}")
|
||||||
|
|
||||||
location_entry = await post_location(location)
|
location_entry = await post_location(location)
|
||||||
if location_entry:
|
if location_entry:
|
||||||
responses.append({"location_data": location_entry})
|
responses.append({"location_data": location_entry})
|
||||||
else:
|
else:
|
||||||
L.WARN(f"Posting location to database appears to have failed.")
|
L.WARN(f"Posting location to database appears to have failed.")
|
||||||
|
else:
|
||||||
|
L.DEBUG(f"Final location to be submitted to database: {geocoded_locations}")
|
||||||
|
location_entry = await post_location(geocoded_locations)
|
||||||
|
if location_entry:
|
||||||
|
responses.append({"location_data": location_entry})
|
||||||
|
else:
|
||||||
|
L.WARN(f"Posting location to database appears to have failed.")
|
||||||
|
|
||||||
return {"message": "Locations and weather updated", "results": responses}
|
return {"message": "Locations and weather updated", "results": responses}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
@loc.get("/locate", response_model=Location)
|
@loc.get("/locate", response_model=Location)
|
||||||
async def get_last_location_endpoint() -> JSONResponse:
|
async def get_last_location_endpoint() -> JSONResponse:
|
||||||
this_location = await get_last_location()
|
this_location = await get_last_location()
|
||||||
|
|
|
@ -135,7 +135,7 @@ Obsidian helper. Takes a datetime and creates a new daily note. Note: it uses th
|
||||||
places = await loc.fetch_locations(date_time)
|
places = await loc.fetch_locations(date_time)
|
||||||
lat, lon = places[0].latitude, places[0].longitude
|
lat, lon = places[0].latitude, places[0].longitude
|
||||||
|
|
||||||
location = await GEO.code(lat, lon)
|
location = await GEO.code((lat, lon))
|
||||||
|
|
||||||
timeslips = await build_daily_timeslips(date_time)
|
timeslips = await build_daily_timeslips(date_time)
|
||||||
|
|
||||||
|
@ -189,7 +189,7 @@ created: "{dt_datetime.now().strftime("%Y-%m-%d %H:%M:%S")}"
|
||||||
return absolute_path
|
return absolute_path
|
||||||
|
|
||||||
|
|
||||||
### Daily Note Component Builders ###
|
|
||||||
|
|
||||||
async def build_daily_timeslips(date):
|
async def build_daily_timeslips(date):
|
||||||
'''
|
'''
|
||||||
|
@ -325,7 +325,7 @@ async def generate_banner(dt, location: Location = None, forecast: str = None, m
|
||||||
display_name += f"{location.country} " if location.country else ""
|
display_name += f"{location.country} " if location.country else ""
|
||||||
|
|
||||||
if display_name == "Location: ":
|
if display_name == "Location: ":
|
||||||
geocoded_location = await GEO.code(lat, lon)
|
geocoded_location = await GEO.code((lat, lon))
|
||||||
if geocoded_location.display_name or geocoded_location.city or geocoded_location.country:
|
if geocoded_location.display_name or geocoded_location.city or geocoded_location.country:
|
||||||
return await generate_banner(dt, geocoded_location, forecast, mood, other_context)
|
return await generate_banner(dt, geocoded_location, forecast, mood, other_context)
|
||||||
else:
|
else:
|
||||||
|
@ -405,7 +405,7 @@ async def update_dn_weather(date_time: dt_datetime, lat: float = None, lon: floa
|
||||||
L.WARN(f"Using {date_time.strftime('%Y-%m-%d %H:%M:%S')} as our datetime in update_dn_weather.")
|
L.WARN(f"Using {date_time.strftime('%Y-%m-%d %H:%M:%S')} as our datetime in update_dn_weather.")
|
||||||
try:
|
try:
|
||||||
if lat and lon:
|
if lat and lon:
|
||||||
place = GEO.code(lat, lon)
|
place = GEO.code((lat, lon))
|
||||||
|
|
||||||
else:
|
else:
|
||||||
L.DEBUG(f"Updating weather for {date_time}")
|
L.DEBUG(f"Updating weather for {date_time}")
|
||||||
|
@ -425,7 +425,7 @@ async def update_dn_weather(date_time: dt_datetime, lat: float = None, lon: floa
|
||||||
L.INFO(f"City in data: {city}")
|
L.INFO(f"City in data: {city}")
|
||||||
|
|
||||||
else:
|
else:
|
||||||
location = await GEO.code(lat, lon)
|
location = await GEO.code((lat, lon))
|
||||||
L.DEBUG(f"location: {location}")
|
L.DEBUG(f"location: {location}")
|
||||||
city = location.name
|
city = location.name
|
||||||
city = city if city else location.city
|
city = city if city else location.city
|
||||||
|
|
|
@ -78,11 +78,10 @@ def select_voice(voice_name: str) -> str:
|
||||||
raise HTTPException(status_code=404, detail="Voice file not found")
|
raise HTTPException(status_code=404, detail="Voice file not found")
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
L.ERR(f"Voice file not found: {str(e)}")
|
L.ERR(f"Voice file not found: {str(e)}")
|
||||||
L.ERR(traceback.format_exc())
|
return None
|
||||||
raise HTTPException(status_code=404, detail="Voice file not found")
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
@tts.post("/tts")
|
||||||
@tts.post("/tts/speak")
|
@tts.post("/tts/speak")
|
||||||
@tts.post("/v1/audio/speech")
|
@tts.post("/v1/audio/speech")
|
||||||
async def generate_speech_endpoint(
|
async def generate_speech_endpoint(
|
||||||
|
@ -116,7 +115,6 @@ async def generate_speech_endpoint(
|
||||||
L.ERR(traceback.format_exc())
|
L.ERR(traceback.format_exc())
|
||||||
raise HTTPException(status_code=666, detail="error in TTS")
|
raise HTTPException(status_code=666, detail="error in TTS")
|
||||||
|
|
||||||
|
|
||||||
async def generate_speech(
|
async def generate_speech(
|
||||||
bg_tasks: BackgroundTasks,
|
bg_tasks: BackgroundTasks,
|
||||||
text: str,
|
text: str,
|
||||||
|
@ -136,33 +134,36 @@ async def generate_speech(
|
||||||
model = model if model else await get_model(voice, voice_file)
|
model = model if model else await get_model(voice, voice_file)
|
||||||
|
|
||||||
if model == "eleven_turbo_v2":
|
if model == "eleven_turbo_v2":
|
||||||
L.INFO(f"Using ElevenLabs.")
|
L.INFO("Using ElevenLabs.")
|
||||||
audio_file_path = await elevenlabs_tts(model, text, voice, title, output_dir)
|
audio_file_path = await elevenlabs_tts(model, text, voice, title, output_dir)
|
||||||
|
else: # if model == "xtts":
|
||||||
|
L.INFO("Using XTTS2")
|
||||||
|
audio_file_path = await local_tts(text, speed, voice, voice_file, podcast, bg_tasks, title, output_dir)
|
||||||
|
#else:
|
||||||
|
# raise HTTPException(status_code=400, detail="Invalid model specified")
|
||||||
|
|
||||||
|
if podcast == True:
|
||||||
|
podcast_path = PODCAST_DIR / audio_file_path.name
|
||||||
|
L.DEBUG(f"Podcast path: {podcast_path}")
|
||||||
|
shutil.copy(str(audio_file_path), str(podcast_path))
|
||||||
|
bg_tasks.add_task(os.remove, str(audio_file_path))
|
||||||
|
return str(podcast_path)
|
||||||
|
|
||||||
return str(audio_file_path)
|
return str(audio_file_path)
|
||||||
|
|
||||||
elif model == "xtts":
|
|
||||||
L.INFO(f"Using XTTS2")
|
|
||||||
final_output_dir = await local_tts(text, speed, voice, voice_file, podcast, bg_tasks, title, output_dir)
|
|
||||||
bg_tasks.add_task(os.remove, str(final_output_dir))
|
|
||||||
return str(final_output_dir)
|
|
||||||
else:
|
|
||||||
raise HTTPException(status_code=400, detail="Invalid model specified")
|
|
||||||
except HTTPException as e:
|
|
||||||
L.ERR(f"HTTP error: {e}")
|
|
||||||
L.ERR(traceback.format_exc())
|
|
||||||
raise e
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
L.ERR(f"Error: {e}")
|
L.ERROR(f"Failed to generate speech: {str(e)}")
|
||||||
L.ERR(traceback.format_exc())
|
raise HTTPException(status_code=500, detail=f"Failed to generate speech: {str(e)}")
|
||||||
raise e
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
async def get_model(voice: str = None, voice_file: UploadFile = None):
|
async def get_model(voice: str = None, voice_file: UploadFile = None):
|
||||||
if voice_file or (voice and select_voice(voice)):
|
if voice_file or (voice and select_voice(voice)):
|
||||||
return "xtts"
|
return "xtts"
|
||||||
|
|
||||||
elif voice and await determine_voice_id(voice):
|
elif voice and await determine_voice_id(voice):
|
||||||
return "eleven_turbo_v2"
|
return "eleven_turbo_v2"
|
||||||
|
|
||||||
else:
|
else:
|
||||||
raise HTTPException(status_code=400, detail="No model or voice specified")
|
raise HTTPException(status_code=400, detail="No model or voice specified")
|
||||||
|
|
||||||
|
@ -216,7 +217,7 @@ async def elevenlabs_tts(model: str, input_text: str, voice: str, title: str = N
|
||||||
"model_id": model
|
"model_id": model
|
||||||
}
|
}
|
||||||
headers = {"Content-Type": "application/json", "xi-api-key": ELEVENLABS_API_KEY}
|
headers = {"Content-Type": "application/json", "xi-api-key": ELEVENLABS_API_KEY}
|
||||||
async with httpx.AsyncClient() as client:
|
async with httpx.AsyncClient(timeout=httpx.Timeout(300.0)) as client: # 5 minutes timeout
|
||||||
response = await client.post(url, json=payload, headers=headers)
|
response = await client.post(url, json=payload, headers=headers)
|
||||||
output_dir = output_dir if output_dir else TTS_OUTPUT_DIR
|
output_dir = output_dir if output_dir else TTS_OUTPUT_DIR
|
||||||
title = title if title else datetime.now().strftime("%Y%m%d%H%M%S")
|
title = title if title else datetime.now().strftime("%Y%m%d%H%M%S")
|
||||||
|
|
Loading…
Reference in a new issue