From 6600381bb1af974afed5773379a8d48b9ad0bcc1 Mon Sep 17 00:00:00 2001
From: sanj <67624670+iodrift@users.noreply.github.com>
Date: Thu, 27 Jun 2024 19:27:06 -0700
Subject: [PATCH] Auto-update: Thu Jun 27 19:27:06 PDT 2024

---
 sijapi/routers/asr.py   | 28 +++++++++++++++++-------
 sijapi/routers/llm.py   | 12 +++++------
 sijapi/routers/note.py  | 48 ++++++++++++++++++++---------------------
 sijapi/routers/serve.py |  8 +++----
 sijapi/routers/tts.py   | 12 +++++------
 5 files changed, 59 insertions(+), 49 deletions(-)

diff --git a/sijapi/routers/asr.py b/sijapi/routers/asr.py
index 8b160b8..f518164 100644
--- a/sijapi/routers/asr.py
+++ b/sijapi/routers/asr.py
@@ -41,7 +41,7 @@ transcription_results = {}
 @asr.post("/transcribe")
 @asr.post("/v1/audio/transcription")
 async def transcribe_endpoint(
-    background_tasks: BackgroundTasks,
+    bg_tasks: BackgroundTasks,
     file: UploadFile = File(...),
     params: str = Form(...)
 ):
@@ -58,7 +58,7 @@ async def transcribe_endpoint(
         temp_file.write(await file.read())
         temp_file_path = temp_file.name
     
-    transcription_job = await transcribe_audio(file_path=temp_file_path, params=parameters, background_tasks=background_tasks)
+    transcription_job = await transcribe_audio(file_path=temp_file_path, params=parameters, bg_tasks=bg_tasks)
     job_id = transcription_job["job_id"]
 
     # Poll for completion
@@ -80,12 +80,13 @@ async def transcribe_endpoint(
     # If we've reached this point, the transcription has taken too long
     return JSONResponse(content={"status": "timeout", "message": "Transcription is taking longer than expected. Please check back later."}, status_code=202)
 
-async def transcribe_audio(file_path, params: TranscribeParams, background_tasks: BackgroundTasks):
+async def transcribe_audio(file_path, params: TranscribeParams, bg_tasks: BackgroundTasks):
+    L.DEBUG(f"Transcribing audio file from {file_path}...")
     file_path = await convert_to_wav(file_path)
-    model = params.model if params.model in WHISPER_CPP_MODELS else 'small' 
+    model = params.model if params.model in WHISPER_CPP_MODELS else 'small'
     model_path = WHISPER_CPP_DIR / 'models' / f'ggml-{model}.bin'
     command = [str(WHISPER_CPP_DIR / 'build' / 'bin' / 'main')]
-    command.extend(['-m', str(model_path)]) 
+    command.extend(['-m', str(model_path)])
     command.extend(['-t', str(max(1, min(params.threads or MAX_CPU_CORES, MAX_CPU_CORES)))])
     command.extend(['-np'])  # Always enable no-prints
 
@@ -117,7 +118,6 @@ async def transcribe_audio(file_path, params: TranscribeParams, background_tasks
         command.extend(['--dtw', params.dtw])
 
     command.extend(['-f', file_path])
-  
     L.DEBUG(f"Command: {command}")
 
     # Create a unique ID for this transcription job
@@ -127,9 +127,21 @@ async def transcribe_audio(file_path, params: TranscribeParams, background_tasks
     transcription_results[job_id] = {"status": "processing", "result": None}
 
     # Run the transcription in a background task
-    background_tasks.add_task(process_transcription, command, file_path, job_id)
+    bg_tasks.add_task(process_transcription, command, file_path, job_id)
 
-    return {"job_id": job_id}
+    max_wait_time = 300  # 5 minutes
+    poll_interval = 1  # 1 second
+    start_time = asyncio.get_event_loop().time()
+
+    while asyncio.get_event_loop().time() - start_time < max_wait_time:
+        job_status = transcription_results.get(job_id, {})
+        if job_status["status"] == "completed":
+            return job_status["result"]
+        elif job_status["status"] == "failed":
+            raise Exception(f"Transcription failed: {job_status.get('error', 'Unknown error')}")
+        await asyncio.sleep(poll_interval)
+
+    raise TimeoutError("Transcription timed out")
 
 async def process_transcription(command, file_path, job_id):
     try:
diff --git a/sijapi/routers/llm.py b/sijapi/routers/llm.py
index 3433140..ce80ecf 100644
--- a/sijapi/routers/llm.py
+++ b/sijapi/routers/llm.py
@@ -522,7 +522,7 @@ async def summarize_post(file: Optional[UploadFile] = File(None), text: Optional
     return summarized_text
 
 @llm.post("/speaksummary")
-async def summarize_tts_endpoint(background_tasks: BackgroundTasks, instruction: str = Form(SUMMARY_INSTRUCT), file: Optional[UploadFile] = File(None), text: Optional[str] = Form(None), voice: Optional[str] = Form(DEFAULT_VOICE), speed: Optional[float] = Form(1.2), podcast: Union[bool, str] = Form(False)):
+async def summarize_tts_endpoint(bg_tasks: BackgroundTasks, instruction: str = Form(SUMMARY_INSTRUCT), file: Optional[UploadFile] = File(None), text: Optional[str] = Form(None), voice: Optional[str] = Form(DEFAULT_VOICE), speed: Optional[float] = Form(1.2), podcast: Union[bool, str] = Form(False)):
     
     podcast = str_to_bool(str(podcast))  # Proper boolean conversion
     text_content = text if text else extract_text(file)
@@ -546,8 +546,8 @@ async def summarize_tts(
     timestamp = dt_datetime.now().strftime("%Y%m%d_%H%M%S")
     filename = f"{timestamp}{filename}.wav" 
     
-    background_tasks = BackgroundTasks()
-    final_output_path = await generate_speech(background_tasks, summarized_text, voice, "xtts", speed=speed, podcast=podcast, title=filename)
+    bg_tasks = BackgroundTasks()
+    final_output_path = await generate_speech(bg_tasks, summarized_text, voice, "xtts", speed=speed, podcast=podcast, title=filename)
     L.DEBUG(f"summary_tts completed with final_output_path: {final_output_path}")
     return final_output_path
     
@@ -578,7 +578,7 @@ def calculate_max_tokens(text: str) -> int:
     return min(tokens_count // 4, SUMMARY_CHUNK_SIZE)
 
 
-async def extract_text(file: Union[UploadFile, bytes, bytearray, str, Path], background_tasks: BackgroundTasks = None) -> str:
+async def extract_text(file: Union[UploadFile, bytes, bytearray, str, Path], bg_tasks: BackgroundTasks = None) -> str:
     if isinstance(file, UploadFile):
         file_extension = get_extension(file)
         temp_file_path = tempfile.mktemp(suffix=file_extension)
@@ -614,8 +614,8 @@ async def extract_text(file: Union[UploadFile, bytes, bytearray, str, Path], bac
     elif file_ext == '.docx':
         text_content = await extract_text_from_docx(file_path)
 
-    if background_tasks and 'temp_file_path' in locals():
-        background_tasks.add_task(os.remove, temp_file_path)
+    if bg_tasks and 'temp_file_path' in locals():
+        bg_tasks.add_task(os.remove, temp_file_path)
     elif 'temp_file_path' in locals():
         os.remove(temp_file_path)
 
diff --git a/sijapi/routers/note.py b/sijapi/routers/note.py
index b079db5..005ad64 100644
--- a/sijapi/routers/note.py
+++ b/sijapi/routers/note.py
@@ -133,7 +133,7 @@ async def build_daily_timeslips(date):
 ### CLIPPER ###
 @note.post("/clip")
 async def clip_post(
-    background_tasks: BackgroundTasks,
+    bg_tasks: BackgroundTasks,
     file: UploadFile = None,
     url: Optional[str] = Form(None),
     source: Optional[str] = Form(None),
@@ -142,65 +142,64 @@ async def clip_post(
     voice: str = Form(DEFAULT_VOICE),
     encoding: str = Form('utf-8')
 ):
-    markdown_filename = await process_article(background_tasks, url, title, encoding, source, tts, voice)
+    markdown_filename = await process_article(bg_tasks, url, title, encoding, source, tts, voice)
     return {"message": "Clip saved successfully", "markdown_filename": markdown_filename}
 
 @note.post("/archive")
 async def archive_post(
-    background_tasks: BackgroundTasks,
+    bg_tasks: BackgroundTasks,
     file: UploadFile = None,
     url: Optional[str] = Form(None),
     source: Optional[str] = Form(None),
     title: Optional[str] = Form(None),
     encoding: str = Form('utf-8')
 ):
-    markdown_filename = await process_archive(background_tasks, url, title, encoding, source)
+    markdown_filename = await process_archive(bg_tasks, url, title, encoding, source)
     return {"message": "Clip saved successfully", "markdown_filename": markdown_filename}
 
 @note.get("/clip")
 async def clip_get(
-    background_tasks: BackgroundTasks,
+    bg_tasks: BackgroundTasks,
     url: str,
     title: Optional[str] = Query(None),
     encoding: str = Query('utf-8'),
     tts: str = Query('summary'),
     voice: str = Query(DEFAULT_VOICE)
 ):
-    markdown_filename = await process_article(background_tasks, url, title, encoding, tts=tts, voice=voice)
+    markdown_filename = await process_article(bg_tasks, url, title, encoding, tts=tts, voice=voice)
     return {"message": "Clip saved successfully", "markdown_filename": markdown_filename}
 
 @note.post("/note/add")
-async def note_add_endpoint(file: Optional[UploadFile] = File(None), text: Optional[str] = Form(None), source: Optional[str] = Form(None)):
+async def note_add_endpoint(file: Optional[UploadFile] = File(None), text: Optional[str] = Form(None), source: Optional[str] = Form(None), bg_tasks: BackgroundTasks = None):
+    L.DEBUG(f"Received request on /note/add...")
     if not file and not text:
+        L.WARN(f"... without any file or text!")
         raise HTTPException(status_code=400, detail="Either text or a file must be provided")
     else:
-        result = await process_for_daily_note(file, text, source)
+        result = await process_for_daily_note(file, text, source, bg_tasks)
         L.INFO(f"Result on /note/add: {result}")
         return JSONResponse(result, status_code=204)
 
-async def process_for_daily_note(file: Optional[UploadFile] = File(None), text: Optional[str] = None, source: Optional[str] = None):
+async def process_for_daily_note(file: Optional[UploadFile] = File(None), text: Optional[str] = None, source: Optional[str] = None, bg_tasks: BackgroundTasks = None):
     now = datetime.now()
-
     transcription_entry = ""
     file_entry = ""
     if file:
+        L.DEBUG("File received...")
         file_content = await file.read()
         audio_io = BytesIO(file_content)
         file_type, _ = mimetypes.guess_type(file.filename) 
-
-        if 'audio' in file_type:
-            subdir = "Audio"
-        elif 'image' in file_type:
-            subdir = "Images"
-        else:
-            subdir = "Documents"
-
+        L.DEBUG(f"Processing as {file_type}...")
+        subdir = file_type.title() or "Documents"
         absolute_path, relative_path = assemble_journal_path(now, subdir=subdir, filename=file.filename)
+        L.DEBUG(f"Destination path: {absolute_path}")
+
         with open(absolute_path, 'wb') as f:
             f.write(file_content)
+            L.DEBUG(f"Processing {f.name}...")
 
         if 'audio' in file_type:
-            transcription = await asr.transcribe_audio(file_path=absolute_path, params=asr.TranscribeParams(model="small-en", language="en", threads=6))
+            transcription = await asr.transcribe_audio(file_path=absolute_path, params=asr.TranscribeParams(model="small-en", language="en", threads=6), bg_tasks=bg_tasks)
             file_entry = f"![[{relative_path}]]"
 
         elif 'image' in file_type:
@@ -209,7 +208,6 @@ async def process_for_daily_note(file: Optional[UploadFile] = File(None), text:
         else:
             file_entry = f"[Source]({relative_path})"
     
-
     text_entry = text if text else ""
     L.DEBUG(f"transcription: {transcription}\nfile_entry: {file_entry}\ntext_entry: {text_entry}")
     return await add_to_daily_note(transcription, file_entry, text_entry, now)
@@ -262,7 +260,7 @@ async def handle_text(title:str, summary:str, extracted_text:str, date_time: dat
 
 
 async def process_document(
-    background_tasks: BackgroundTasks,
+    bg_tasks: BackgroundTasks,
     document: File,
     title: Optional[str] = None,
     tts_mode: str = "summary",
@@ -301,7 +299,7 @@ added: {timestamp}
                 datetime_str = datetime.now().strftime("%Y%m%d%H%M%S")
                 audio_filename = f"{datetime_str} {readable_title}"
                 audio_path = await tts.generate_speech(
-                    background_tasks=background_tasks,
+                    bg_tasks=bg_tasks,
                     text=tts_text,
                     voice=voice,
                     model="eleven_turbo_v2",
@@ -336,7 +334,7 @@ added: {timestamp}
 
 
 async def process_article(
-    background_tasks: BackgroundTasks,
+    bg_tasks: BackgroundTasks,
     url: str,
     title: Optional[str] = None,
     encoding: str = 'utf-8',
@@ -397,7 +395,7 @@ tags:
             datetime_str = datetime.now().strftime("%Y%m%d%H%M%S")
             audio_filename = f"{datetime_str} {readable_title}"
             try:
-                audio_path = await tts.generate_speech(background_tasks=background_tasks, text=tts_text, voice=voice, model="eleven_turbo_v2", podcast=True, title=audio_filename,
+                audio_path = await tts.generate_speech(bg_tasks=bg_tasks, text=tts_text, voice=voice, model="eleven_turbo_v2", podcast=True, title=audio_filename,
                 output_dir=Path(OBSIDIAN_VAULT_DIR) / OBSIDIAN_RESOURCES_DIR)
                 audio_ext = Path(audio_path).suffix
                 obsidian_link = f"![[{OBSIDIAN_RESOURCES_DIR}/{audio_filename}{audio_ext}]]"
@@ -502,7 +500,7 @@ async def html_to_markdown(url: str = None, source: str = None) -> Optional[str]
 
 
 async def process_archive(
-    background_tasks: BackgroundTasks,
+    bg_tasks: BackgroundTasks,
     url: str,
     title: Optional[str] = None,
     encoding: str = 'utf-8',
diff --git a/sijapi/routers/serve.py b/sijapi/routers/serve.py
index 5452c4a..eea8725 100644
--- a/sijapi/routers/serve.py
+++ b/sijapi/routers/serve.py
@@ -136,7 +136,7 @@ async def hook_changedetection(webhook_data: dict):
 
 
 @serve.post("/cl/search")
-async def hook_cl_search(request: Request, background_tasks: BackgroundTasks):
+async def hook_cl_search(request: Request, bg_tasks: BackgroundTasks):
     client_ip = request.client.host
     L.DEBUG(f"Received request from IP: {client_ip}")
     data = await request.json()
@@ -150,7 +150,7 @@ async def hook_cl_search(request: Request, background_tasks: BackgroundTasks):
         json.dump(payload, file, indent=2)
 
     for result in results:
-        background_tasks.add_task(cl_search_process_result, result)
+        bg_tasks.add_task(cl_search_process_result, result)
     return JSONResponse(content={"message": "Received"}, status_code=status.HTTP_200_OK)
 
 @serve.post("/cl/docket")
@@ -283,7 +283,7 @@ def shellfish_run_widget_command(args: List[str]):
 
 
 ### COURTLISTENER FUNCTIONS ###
-async def cl_docket(data, client_ip, background_tasks: BackgroundTasks):
+async def cl_docket(data, client_ip, bg_tasks: BackgroundTasks):
     payload = data['payload']
     results = data['payload']['results']
     timestamp = datetime.now().strftime("%Y%m%d-%H%M%S")
@@ -292,7 +292,7 @@ async def cl_docket(data, client_ip, background_tasks: BackgroundTasks):
         json.dump(payload, file, indent=2)
 
     for result in results:
-        background_tasks.add_task(cl_docket_process, result)
+        bg_tasks.add_task(cl_docket_process, result)
     return JSONResponse(content={"message": "Received"}, status_code=status.HTTP_200_OK)
 
 async def cl_docket_process(result):
diff --git a/sijapi/routers/tts.py b/sijapi/routers/tts.py
index 34cfbd0..bb76645 100644
--- a/sijapi/routers/tts.py
+++ b/sijapi/routers/tts.py
@@ -87,7 +87,7 @@ def select_voice(voice_name: str) -> str:
 @tts.post("/v1/audio/speech")
 async def generate_speech_endpoint(
     request: Request,
-    background_tasks: BackgroundTasks,
+    bg_tasks: BackgroundTasks,
     model: str = Form("eleven_turbo_v2"),
     text: Optional[str] = Form(None),
     file: Optional[UploadFile] = File(None),
@@ -110,7 +110,7 @@ async def generate_speech_endpoint(
             else:
                 return await stream_tts(text_content, speed, voice, voice_file)
         else:
-            return await generate_speech(background_tasks, text_content, voice, voice_file, model, speed, podcast)
+            return await generate_speech(bg_tasks, text_content, voice, voice_file, model, speed, podcast)
     except Exception as e:
         L.ERR(f"Error in TTS: {str(e)}")
         L.ERR(traceback.format_exc())
@@ -118,7 +118,7 @@ async def generate_speech_endpoint(
 
 
 async def generate_speech(
-    background_tasks: BackgroundTasks,
+    bg_tasks: BackgroundTasks,
     text: str,
     voice: str = None,
     voice_file: UploadFile = None,
@@ -142,8 +142,8 @@ async def generate_speech(
         
         elif model == "xtts":
             L.INFO(f"Using XTTS2")
-            final_output_dir = await local_tts(text, speed, voice, voice_file, podcast, background_tasks, title, output_dir)
-            background_tasks.add_task(os.remove, str(final_output_dir))
+            final_output_dir = await local_tts(text, speed, voice, voice_file, podcast, bg_tasks, title, output_dir)
+            bg_tasks.add_task(os.remove, str(final_output_dir))
             return str(final_output_dir)
         else:
             raise HTTPException(status_code=400, detail="Invalid model specified")
@@ -282,7 +282,7 @@ async def local_tts(
     voice: str,
     voice_file = None,
     podcast: bool = False,
-    background_tasks: BackgroundTasks = None,
+    bg_tasks: BackgroundTasks = None,
     title: str = None,
     output_path: Optional[Path] = None
 ) -> str: