From e7c843b6c199677095fd11c60336c674b20c609e Mon Sep 17 00:00:00 2001 From: sanj <67624670+iodrift@users.noreply.github.com> Date: Wed, 26 Jun 2024 11:38:34 -0700 Subject: [PATCH] Auto-update: Wed Jun 26 11:38:34 PDT 2024 --- sijapi/__init__.py | 15 +- sijapi/__main__.py | 50 ++-- sijapi/config/.env-example | 2 +- sijapi/helpers/log_prior_emails.py | 15 +- sijapi/logs.py | 57 ++++- sijapi/routers/asr.py | 7 +- sijapi/routers/calendar.py | 91 ++++---- sijapi/routers/cf.py | 5 +- sijapi/routers/email.py | 50 ++-- sijapi/routers/health.py | 5 +- sijapi/routers/hooks.py | 358 ---------------------------- sijapi/routers/ig.py | 126 +++++----- sijapi/routers/llm.py | 58 ++--- sijapi/routers/locate.py | 70 +++--- sijapi/routers/note.py | 142 +++++------ sijapi/routers/sd.py | 25 +- sijapi/routers/serve.py | 364 ++++++++++++++++++++++++++++- sijapi/routers/time.py | 12 +- sijapi/routers/tts.py | 54 ++--- sijapi/routers/weather.py | 68 +++--- sijapi/utilities.py | 51 ++-- 21 files changed, 826 insertions(+), 799 deletions(-) diff --git a/sijapi/__init__.py b/sijapi/__init__.py index 973e44c..98bd70c 100644 --- a/sijapi/__init__.py +++ b/sijapi/__init__.py @@ -24,20 +24,7 @@ ENV_PATH = CONFIG_DIR / ".env" LOGS_DIR = BASE_DIR / "logs" # Create logger instance -package_logger = Logger(__name__, LOGS_DIR) -LOGGER = package_logger.get_logger() - -def DEBUG(log_message): LOGGER.debug(log_message) -def INFO(log_message): LOGGER.info(log_message) -def WARN(log_message): LOGGER.warning(log_message) - -def ERR(log_message): - LOGGER.error(log_message) - LOGGER.error(traceback.format_exc()) - -def CRITICAL(log_message): - LOGGER.critical(log_message) - LOGGER.critical(traceback.format_exc()) +L = Logger("your_logger_name", "your_logs_directory") os.makedirs(LOGS_DIR, exist_ok=True) load_dotenv(ENV_PATH) diff --git a/sijapi/__main__.py b/sijapi/__main__.py index 5f5b239..0740f28 100755 --- a/sijapi/__main__.py +++ b/sijapi/__main__.py @@ -18,30 +18,22 @@ from dotenv import load_dotenv from pathlib import Path from datetime import datetime import argparse -from . import LOGGER, LOGS_DIR, OBSIDIAN_VAULT_DIR +from . import L, LOGS_DIR, OBSIDIAN_VAULT_DIR from .logs import Logger from .utilities import list_and_correct_impermissible_files parser = argparse.ArgumentParser(description='Personal API.') -parser.add_argument('--debug', action='store_true', help='Set log level to INFO') +parser.add_argument('--debug', action='store_true', help='Set log level to L.INFO') parser.add_argument('--test', type=str, help='Load only the specified module.') args = parser.parse_args() -# Using the package logger -main_logger = Logger("main", LOGS_DIR) -main_logger.setup_from_args(args) -logger = LOGGER - -# Use the logger -logger.debug("Debug Log") -logger.info("Info Log") - - -from sijapi import DEBUG, INFO, WARN, ERR, CRITICAL +from sijapi import L +L.setup_from_args(args) from sijapi import HOST, ENV_PATH, GLOBAL_API_KEY, REQUESTS_DIR, ROUTER_DIR, REQUESTS_LOG_PATH, PUBLIC_SERVICES, TRUSTED_SUBNETS, ROUTERS + # Initialize a FastAPI application api = FastAPI() @@ -68,13 +60,13 @@ class SimpleAPIKeyMiddleware(BaseHTTPMiddleware): if api_key_header: api_key_header = api_key_header.lower().split("bearer ")[-1] if api_key_header != GLOBAL_API_KEY and api_key_query != GLOBAL_API_KEY: - ERR(f"Invalid API key provided by a requester.") + L.ERR(f"Invalid API key provided by a requester.") return JSONResponse( status_code=401, content={"detail": "Invalid or missing API key"} ) response = await call_next(request) - # DEBUG(f"Request from {client_ip} is complete") + # L.DEBUG(f"Request from {client_ip} is complete") return response api.add_middleware(SimpleAPIKeyMiddleware) @@ -82,22 +74,22 @@ api.add_middleware(SimpleAPIKeyMiddleware) canceled_middleware = """ @api.middleware("http") async def log_requests(request: Request, call_next): - DEBUG(f"Incoming request: {request.method} {request.url}") - DEBUG(f"Request headers: {request.headers}") - DEBUG(f"Request body: {await request.body()}") + L.DEBUG(f"Incoming request: {request.method} {request.url}") + L.DEBUG(f"Request headers: {request.headers}") + L.DEBUG(f"Request body: {await request.body()}") response = await call_next(request) return response async def log_outgoing_request(request): - INFO(f"Outgoing request: {request.method} {request.url}") - DEBUG(f"Request headers: {request.headers}") - DEBUG(f"Request body: {request.content}") + L.INFO(f"Outgoing request: {request.method} {request.url}") + L.DEBUG(f"Request headers: {request.headers}") + L.DEBUG(f"Request body: {request.content}") """ @api.exception_handler(HTTPException) async def http_exception_handler(request: Request, exc: HTTPException): - ERR(f"HTTP Exception: {exc.status_code} - {exc.detail}") - ERR(f"Request: {request.method} {request.url}") + L.ERR(f"HTTP Exception: {exc.status_code} - {exc.detail}") + L.ERR(f"Request: {request.method} {request.url}") return JSONResponse(status_code=exc.status_code, content={"detail": exc.detail}) @api.middleware("http") @@ -116,25 +108,25 @@ async def handle_exception_middleware(request: Request, call_next): def load_router(router_name): router_file = ROUTER_DIR / f'{router_name}.py' - DEBUG(f"Attempting to load {router_name.capitalize()}...") + L.DEBUG(f"Attempting to load {router_name.capitalize()}...") if router_file.exists(): module_path = f'sijapi.routers.{router_name}' try: module = importlib.import_module(module_path) router = getattr(module, router_name) api.include_router(router) - INFO(f"{router_name.capitalize()} router loaded.") + L.INFO(f"{router_name.capitalize()} router loaded.") except (ImportError, AttributeError) as e: - CRITICAL(f"Failed to load router {router_name}: {e}") + L.CRIT(f"Failed to load router {router_name}: {e}") else: - ERR(f"Router file for {router_name} does not exist.") + L.ERR(f"Router file for {router_name} does not exist.") def main(argv): if args.test: load_router(args.test) else: - CRITICAL(f"sijapi launched") - CRITICAL(f"{args._get_args}") + L.CRIT(f"sijapi launched") + L.CRIT(f"{args._get_args}") for router_name in ROUTERS: load_router(router_name) diff --git a/sijapi/config/.env-example b/sijapi/config/.env-example index e7f7986..5a23da9 100644 --- a/sijapi/config/.env-example +++ b/sijapi/config/.env-example @@ -96,7 +96,7 @@ TRUSTED_SUBNETS=127.0.0.1/32,10.13.37.0/24,100.64.64.0/24 # ────────── # #─── router selection: ──────────────────────────────────────────────────────────── -ROUTERS=asr,calendar,cf,email,health,hooks,llm,locate,note,rag,sd,serve,time,tts,weather +ROUTERS=asr,calendar,cf,email,health,llm,locate,note,rag,sd,serve,time,tts,weather UNLOADED=ig #─── notes: ────────────────────────────────────────────────────────────────────── # diff --git a/sijapi/helpers/log_prior_emails.py b/sijapi/helpers/log_prior_emails.py index 1be6267..113e480 100644 --- a/sijapi/helpers/log_prior_emails.py +++ b/sijapi/helpers/log_prior_emails.py @@ -1,7 +1,6 @@ import asyncio from pathlib import Path -from sijapi import DEBUG, INFO, ERR -from sijapi import EMAIL_CONFIG, EMAIL_LOGS +from sijapi import L, EMAIL_CONFIG, EMAIL_LOGS from sijapi.classes import EmailAccount from sijapi.routers import email @@ -12,13 +11,13 @@ async def initialize_log_files(): for log_file in [summarized_log, autoresponded_log, diagnostic_log]: log_file.parent.mkdir(parents=True, exist_ok=True) log_file.write_text("") - DEBUG(f"Log files initialized: {summarized_log}, {autoresponded_log}, {diagnostic_log}") + L.DEBUG(f"Log files initialized: {summarized_log}, {autoresponded_log}, {diagnostic_log}") return summarized_log, autoresponded_log, diagnostic_log async def process_all_emails(account: EmailAccount, summarized_log: Path, autoresponded_log: Path, diagnostic_log: Path): try: with email.get_imap_connection(account) as inbox: - DEBUG(f"Connected to {account.name}, processing all emails...") + L.DEBUG(f"Connected to {account.name}, processing all emails...") all_messages = inbox.messages() unread_messages = set(uid for uid, _ in inbox.messages(unread=True)) @@ -42,15 +41,15 @@ async def process_all_emails(account: EmailAccount, summarized_log: Path, autore with open(log_file, 'a') as f: f.write(f"{id_str}\n") - INFO(f"Processed {processed_count} non-unread emails for account {account.name}") + L.INFO(f"Processed {processed_count} non-unread emails for account {account.name}") except Exception as e: - ERR(f"An error occurred while processing emails for account {account.name}: {e}") + L.ERR(f"An error occurred while processing emails for account {account.name}: {e}") async def main(): email_accounts = email.load_email_accounts(EMAIL_CONFIG) summarized_log, autoresponded_log, diagnostic_log = await initialize_log_files() - DEBUG(f"Processing {len(email_accounts)} email accounts") + L.DEBUG(f"Processing {len(email_accounts)} email accounts") tasks = [process_all_emails(account, summarized_log, autoresponded_log, diagnostic_log) for account in email_accounts] await asyncio.gather(*tasks) @@ -58,7 +57,7 @@ async def main(): # Final verification with open(summarized_log, 'r') as f: final_count = len(f.readlines()) - INFO(f"Final non-unread email count: {final_count}") + L.INFO(f"Final non-unread email count: {final_count}") if __name__ == "__main__": asyncio.run(main()) \ No newline at end of file diff --git a/sijapi/logs.py b/sijapi/logs.py index dab689b..2142a51 100644 --- a/sijapi/logs.py +++ b/sijapi/logs.py @@ -21,21 +21,60 @@ class Logger: logger.add(handler_path, rotation="2 MB", compression="zip", level="DEBUG", format="{time:YYYY-MM-DD HH:mm:ss} - {name} - {level} - {message}") # Console handler - log_format = "{time:YYYY-MM-DD HH:mm:ss} - {name} - {level: <8} - {message}" + log_format = ( + "{time:YYYY-MM-DD HH:mm:ss} - " + "{name} - " + "{level: <8} - " + "" + "{message}" + "" + ) console_level = "DEBUG" if args.debug else "INFO" - logger.add(sys.stdout, format=log_format, level=console_level, colorize=True) + logger.add( + sys.stdout, + format=log_format, + level=console_level, + colorize=True, + filter=lambda record: record["level"].name != "INFO", # Apply colors to all levels except INFO + ) + + # Add a separate handler for INFO level without colors + logger.add( + sys.stdout, + format="{time:YYYY-MM-DD HH:mm:ss} - {name} - {level: <8} - {message}", + level="INFO", + filter=lambda record: record["level"].name == "INFO", + ) + + + # Custom color and style mappings + logger.level("CRITICAL", color="") + logger.level("ERROR", color="") + logger.level("WARNING", color="") + logger.level("DEBUG", color="") # Test color output - self.logger.debug("Debug message (should be Cyan)") - self.logger.info("Info message (should be Green)") - self.logger.warning("Warning message (should be Yellow)") - self.logger.error("Error message (should be Red)") - self.logger.critical("Critical message (should be Magenta)") + self.logger.debug("Debug message (should be italic green)") + self.logger.info("Info message (should be uncolored)") + self.logger.warning("Warning message (should be bold orange/yellow)") + self.logger.error("Error message (should be bold red)") + self.logger.critical("Critical message (should be bold yellow on magenta)") + + + def DEBUG(self, log_message): self.logger.debug(log_message) + def INFO(self, log_message): self.logger.info(log_message) + def WARN(self, log_message): self.logger.warning(log_message) + def ERR(self, log_message): + self.logger.error(log_message) + self.logger.error(traceback.format_exc()) + def CRIT(self, log_message): + self.logger.critical(log_message) + self.logger.critical(traceback.format_exc()) def get_logger(self): - return self.logger + return self + -# Add this at the end of the file for testing if __name__ == "__main__": import argparse parser = argparse.ArgumentParser() diff --git a/sijapi/routers/asr.py b/sijapi/routers/asr.py index 10a14eb..8b160b8 100644 --- a/sijapi/routers/asr.py +++ b/sijapi/routers/asr.py @@ -1,3 +1,6 @@ +''' +Uses whisper_cpp to create an OpenAI-compatible Whisper web service. +''' import os import sys import uuid @@ -11,7 +14,7 @@ from fastapi.responses import JSONResponse from pydantic import BaseModel, Field from typing import Optional -from sijapi import DEBUG, INFO, WARN, ERR, CRITICAL, ASR_DIR, WHISPER_CPP_MODELS, GARBAGE_COLLECTION_INTERVAL, GARBAGE_TTL, WHISPER_CPP_DIR, MAX_CPU_CORES +from sijapi import L, ASR_DIR, WHISPER_CPP_MODELS, GARBAGE_COLLECTION_INTERVAL, GARBAGE_TTL, WHISPER_CPP_DIR, MAX_CPU_CORES asr = APIRouter() @@ -115,7 +118,7 @@ async def transcribe_audio(file_path, params: TranscribeParams, background_tasks command.extend(['-f', file_path]) - DEBUG(f"Command: {command}") + L.DEBUG(f"Command: {command}") # Create a unique ID for this transcription job job_id = str(uuid.uuid4()) diff --git a/sijapi/routers/calendar.py b/sijapi/routers/calendar.py index d3aac21..b1525be 100644 --- a/sijapi/routers/calendar.py +++ b/sijapi/routers/calendar.py @@ -16,40 +16,39 @@ from typing import Dict, List, Any from datetime import datetime, timedelta from Foundation import NSDate, NSRunLoop import EventKit as EK -from sijapi import ICAL_TOGGLE, ICALENDARS, MS365_TOGGLE, MS365_CLIENT_ID, MS365_SECRET, MS365_AUTHORITY_URL, MS365_SCOPE, MS365_REDIRECT_PATH, MS365_TOKEN_PATH +from sijapi import L, ICAL_TOGGLE, ICALENDARS, MS365_TOGGLE, MS365_CLIENT_ID, MS365_SECRET, MS365_AUTHORITY_URL, MS365_SCOPE, MS365_REDIRECT_PATH, MS365_TOKEN_PATH from sijapi.routers.locate import localize_datetime -from sijapi import DEBUG, INFO, WARN, ERR, CRITICAL calendar = APIRouter() oauth2_scheme = OAuth2PasswordBearer(tokenUrl="/token") timeout = httpx.Timeout(12) if MS365_TOGGLE is True: - CRITICAL(f"Visit https://api.sij.ai/o365/login to obtain your Microsoft 365 authentication token.") + L.CRIT(f"Visit https://api.sij.ai/o365/login to obtain your Microsoft 365 authentication token.") @calendar.get("/o365/login") async def login(): - DEBUG(f"Received request to /o365/login") - DEBUG(f"SCOPE: {MS365_SCOPE}") + L.DEBUG(f"Received request to /o365/login") + L.DEBUG(f"SCOPE: {MS365_SCOPE}") if not MS365_SCOPE: - ERR("No scopes defined for authorization.") + L.ERR("No scopes defined for authorization.") raise HTTPException( status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, detail="No scopes defined for authorization." ) authorization_url = f"{MS365_AUTHORITY_URL}/oauth2/v2.0/authorize?client_id={MS365_CLIENT_ID}&response_type=code&redirect_uri={MS365_REDIRECT_PATH}&scope={'+'.join(MS365_SCOPE)}" - INFO(f"Redirecting to authorization URL: {authorization_url}") + L.INFO(f"Redirecting to authorization URL: {authorization_url}") return RedirectResponse(authorization_url) @calendar.get("/o365/oauth_redirect") async def oauth_redirect(code: str = None, error: str = None): - INFO(f"Received request to /o365/oauth_redirect") + L.INFO(f"Received request to /o365/oauth_redirect") if error: - ERR(f"OAuth2 Error: {error}") + L.ERR(f"OAuth2 Error: {error}") raise HTTPException( status_code=status.HTTP_400_BAD_REQUEST, detail="OAuth2 Error" ) - INFO(f"Requesting token with authorization code: {code}") + L.INFO(f"Requesting token with authorization code: {code}") token_url = f"{MS365_AUTHORITY_URL}/oauth2/v2.0/token" data = { "client_id": MS365_CLIENT_ID, @@ -60,15 +59,15 @@ if MS365_TOGGLE is True: } async with httpx.AsyncClient(timeout=timeout) as client: response = await client.post(token_url, data=data) - DEBUG(f"Token endpoint response status code: {response.status_code}") - INFO(f"Token endpoint response text: {response.text}") + L.DEBUG(f"Token endpoint response status code: {response.status_code}") + L.INFO(f"Token endpoint response text: {response.text}") result = response.json() if 'access_token' in result: await save_token(result) - INFO("Access token obtained successfully") + L.INFO("Access token obtained successfully") return {"message": "Access token stored successfully"} else: - CRITICAL(f"Failed to obtain access token. Response: {result}") + L.CRIT(f"Failed to obtain access token. Response: {result}") raise HTTPException( status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, detail="Failed to obtain access token" @@ -76,7 +75,7 @@ if MS365_TOGGLE is True: @calendar.get("/o365/me") async def read_items(): - INFO(f"Received request to /o365/me") + L.INFO(f"Received request to /o365/me") token = await load_token() if not token: raise HTTPException( @@ -89,10 +88,10 @@ if MS365_TOGGLE is True: response = await client.get(graph_url, headers=headers) if response.status_code == 200: user = response.json() - INFO(f"User retrieved: {user}") + L.INFO(f"User retrieved: {user}") return user else: - ERR("Invalid or expired token") + L.ERR("Invalid or expired token") raise HTTPException( status_code=status.HTTP_401_UNAUTHORIZED, detail="Invalid or expired token", @@ -100,14 +99,14 @@ if MS365_TOGGLE is True: ) async def save_token(token): - DEBUG(f"Saving token: {token}") + L.DEBUG(f"Saving token: {token}") try: token["expires_at"] = int(time.time()) + token["expires_in"] with open(MS365_TOKEN_PATH, "w") as file: json.dump(token, file) - DEBUG(f"Saved token to {MS365_TOKEN_PATH}") + L.DEBUG(f"Saved token to {MS365_TOKEN_PATH}") except Exception as e: - ERR(f"Failed to save token: {e}") + L.ERR(f"Failed to save token: {e}") async def load_token(): if os.path.exists(MS365_TOKEN_PATH): @@ -115,21 +114,21 @@ if MS365_TOGGLE is True: with open(MS365_TOKEN_PATH, "r") as file: token = json.load(file) except FileNotFoundError: - ERR("Token file not found.") + L.ERR("Token file not found.") return None except json.JSONDecodeError: - ERR("Failed to decode token JSON") + L.ERR("Failed to decode token JSON") return None if token: token["expires_at"] = int(time.time()) + token["expires_in"] - DEBUG(f"Loaded token: {token}") # Add this line to log the loaded token + L.DEBUG(f"Loaded token: {token}") # Add this line to log the loaded token return token else: - DEBUG("No token found.") + L.DEBUG("No token found.") return None else: - ERR(f"No file found at {MS365_TOKEN_PATH}") + L.ERR(f"No file found at {MS365_TOKEN_PATH}") return None @@ -159,39 +158,39 @@ if MS365_TOGGLE is True: response = await client.post(token_url, data=data) result = response.json() if "access_token" in result: - INFO("Access token refreshed successfully") + L.INFO("Access token refreshed successfully") return result else: - ERR("Failed to refresh access token") + L.ERR("Failed to refresh access token") return None async def refresh_token(): token = await load_token() if not token: - ERR("No token found in storage") + L.ERR("No token found in storage") raise HTTPException( status_code=status.HTTP_401_UNAUTHORIZED, detail="No token found", ) if 'refresh_token' not in token: - ERR("Refresh token not found in the loaded token") + L.ERR("Refresh token not found in the loaded token") raise HTTPException( status_code=status.HTTP_401_UNAUTHORIZED, detail="Refresh token not found", ) refresh_token = token['refresh_token'] - DEBUG("Found refresh token, attempting to refresh access token") + L.DEBUG("Found refresh token, attempting to refresh access token") new_token = await get_new_token_with_refresh_token(refresh_token) if new_token: await save_token(new_token) - INFO("Token refreshed and saved successfully") + L.INFO("Token refreshed and saved successfully") else: - ERR("Failed to refresh token") + L.ERR("Failed to refresh token") raise HTTPException( status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, detail="Failed to refresh token", @@ -205,7 +204,7 @@ def get_calendar_ids() -> Dict[str, str]: calendar_identifiers = { calendar.title() : calendar.calendarIdentifier() for calendar in all_calendars } - INFO(f"{calendar_identifiers}") + L.INFO(f"{calendar_identifiers}") return calendar_identifiers # Helper to convert datetime to NSDate @@ -246,7 +245,7 @@ def get_macos_calendar_events(start_date: datetime, end_date: datetime, calendar def completion_handler(granted, error): if error is not None: - ERR(f"Error: {error}") + L.ERR(f"Error: {error}") access_granted.append(granted) # Notify the main thread that the completion handler has executed with access_granted_condition: @@ -261,11 +260,11 @@ def get_macos_calendar_events(start_date: datetime, end_date: datetime, calendar if access_granted: return access_granted[0] else: - ERR("Request access timed out or failed") + L.ERR("Request access timed out or failed") return False if not request_access(): - ERR("Access to calendar data was not granted") + L.ERR("Access to calendar data was not granted") return [] ns_start_date = datetime_to_nsdate(start_date) @@ -331,7 +330,7 @@ async def get_ms365_events(start_date: datetime, end_date: datetime): response = await client.get(graph_url, headers=headers) if response.status_code != 200: - ERR("Failed to retrieve events from Microsoft 365") + L.ERR("Failed to retrieve events from Microsoft 365") raise HTTPException( status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, detail="Failed to retrieve events", @@ -347,33 +346,33 @@ async def parse_calendar_for_day(range_start: datetime, range_end: datetime, eve event_list = [] for event in events: - INFO(f"Event: {event}") + L.INFO(f"Event: {event}") start_str = event.get('start') end_str = event.get('end') if isinstance(start_str, dict): start_str = start_str.get('dateTime') else: - INFO(f"Start date string not a dict") + L.INFO(f"Start date string not a dict") if isinstance(end_str, dict): end_str = end_str.get('dateTime') else: - INFO(f"End date string not a dict") + L.INFO(f"End date string not a dict") try: start_date = await localize_datetime(start_str) if start_str else None except (ValueError, TypeError) as e: - ERR(f"Invalid start date format: {start_str}, error: {e}") + L.ERR(f"Invalid start date format: {start_str}, error: {e}") continue try: end_date = await localize_datetime(end_str) if end_str else None except (ValueError, TypeError) as e: - ERR(f"Invalid end date format: {end_str}, error: {e}") + L.ERR(f"Invalid end date format: {end_str}, error: {e}") continue - DEBUG(f"Comparing {start_date} with range {range_start} to {range_end}") + L.DEBUG(f"Comparing {start_date} with range {range_start} to {range_end}") if start_date: # Ensure start_date is timezone-aware @@ -405,11 +404,11 @@ async def parse_calendar_for_day(range_start: datetime, range_end: datetime, eve "busy": event.get('showAs', '') in ['busy', 'tentative'], "all_day": event.get('isAllDay', False) } - INFO(f"Event_data: {event_data}") + L.INFO(f"Event_data: {event_data}") event_list.append(event_data) else: - DEBUG(f"Event outside of specified range: {start_date} to {end_date}") + L.DEBUG(f"Event outside of specified range: {start_date} to {end_date}") else: - ERR(f"Invalid or missing start date for event: {event.get('id', 'Unknown ID')}") + L.ERR(f"Invalid or missing start date for event: {event.get('id', 'Unknown ID')}") return event_list \ No newline at end of file diff --git a/sijapi/routers/cf.py b/sijapi/routers/cf.py index 737f2e8..1067b44 100644 --- a/sijapi/routers/cf.py +++ b/sijapi/routers/cf.py @@ -5,8 +5,7 @@ from fastapi import APIRouter, HTTPException from pydantic import BaseModel from fastapi.responses import PlainTextResponse, JSONResponse from typing import Optional -from sijapi import DEBUG, INFO, WARN, ERR, CRITICAL -from sijapi import CF_TOKEN, CADDYFILE_PATH, CF_API_BASE_URL, CF_IP +from sijapi import L, CF_TOKEN, CADDYFILE_PATH, CF_API_BASE_URL, CF_IP import httpx import asyncio from asyncio import sleep @@ -70,7 +69,7 @@ async def retry_request(url, headers, max_retries=5, backoff_factor=1): response.raise_for_status() return response except (httpx.HTTPError, httpx.ConnectTimeout) as e: - ERR(f"Request failed: {e}. Retrying {retry + 1}/{max_retries}...") + L.ERR(f"Request failed: {e}. Retrying {retry + 1}/{max_retries}...") await sleep(backoff_factor * (2 ** retry)) raise HTTPException(status_code=500, detail="Max retries exceeded for Cloudflare API request") diff --git a/sijapi/routers/email.py b/sijapi/routers/email.py index 4dafbb0..5069e45 100644 --- a/sijapi/routers/email.py +++ b/sijapi/routers/email.py @@ -18,12 +18,10 @@ import ssl import yaml from typing import List, Dict, Optional, Set from datetime import datetime as dt_datetime -from sijapi import DEBUG, INFO, WARN, ERR, CRITICAL -from sijapi import PODCAST_DIR, DEFAULT_VOICE, EMAIL_CONFIG, EMAIL_LOGS +from sijapi import L, PODCAST_DIR, DEFAULT_VOICE, EMAIL_CONFIG, EMAIL_LOGS from sijapi.routers import tts, llm, sd, locate from sijapi.utilities import clean_text, assemble_journal_path, extract_text, prefix_lines from sijapi.classes import EmailAccount, IMAPConfig, SMTPConfig, IncomingEmail, EmailContact, AutoResponder -from sijapi import DEBUG, INFO, ERR, LOGS_DIR from sijapi.classes import EmailAccount email = APIRouter(tags=["private"]) @@ -72,6 +70,7 @@ def get_smtp_connection(account: EmailAccount): def get_matching_autoresponders(this_email: IncomingEmail, account: EmailAccount) -> List[AutoResponder]: + L.DEBUG(f"Called get_matching_autoresponders for email \"{this_email.subject},\" account name \"{account.name}\"") def matches_list(item: str, this_email: IncomingEmail) -> bool: if '@' in item: return item in this_email.sender @@ -82,11 +81,15 @@ def get_matching_autoresponders(this_email: IncomingEmail, account: EmailAccount whitelist_match = not profile.whitelist or any(matches_list(item, this_email) for item in profile.whitelist) blacklist_match = any(matches_list(item, this_email) for item in profile.blacklist) if whitelist_match and not blacklist_match: + L.DEBUG(f"We have a match for {whitelist_match} and no blacklist matches.") matching_profiles.append(profile) + elif whitelist_match and blacklist_match: + L.DEBUG(f"Matched whitelist for {whitelist_match}, but also matched blacklist for {blacklist_match}") + else: + L.DEBUG(f"No whitelist or blacklist matches.") return matching_profiles - async def generate_auto_response_body(this_email: IncomingEmail, profile: AutoResponder, account: EmailAccount) -> str: now = await locate.localize_datetime(dt_datetime.now()) then = await locate.localize_datetime(this_email.datetime_received) @@ -107,7 +110,7 @@ async def generate_auto_response_body(this_email: IncomingEmail, profile: AutoRe # async def query_ollama(usr: str, sys: str = LLM_SYS_MSG, model: str = DEFAULT_LLM, max_tokens: int = 200): response = await llm.query_ollama(usr_prompt, sys_prompt, profile.ollama_model, 400) - DEBUG(f"query_ollama response: {response}") + L.DEBUG(f"query_ollama response: {response}") if isinstance(response, str): response += "\n\n" @@ -116,15 +119,15 @@ async def generate_auto_response_body(this_email: IncomingEmail, profile: AutoRe if "message" in response and "content" in response["message"]: return response["message"]["content"] else: - ERR(f"Unexpected response structure from query_ollama: {response}") + L.ERR(f"Unexpected response structure from query_ollama: {response}") else: - ERR(f"Unexpected response type from query_ollama: {type(response)}") + L.ERR(f"Unexpected response type from query_ollama: {type(response)}") # If we reach here, we couldn't extract a valid response raise ValueError("Could not extract valid response from query_ollama") except Exception as e: - ERR(f"Error generating auto-response: {str(e)}") + L.ERR(f"Error generating auto-response: {str(e)}") return f"Thank you for your email regarding '{this_email.subject}'. We are currently experiencing technical difficulties with our auto-response system. We will review your email and respond as soon as possible. We apologize for any inconvenience." @@ -201,26 +204,26 @@ tags: with open(md_path, 'w', encoding='utf-8') as md_file: md_file.write(markdown_content) - DEBUG(f"Saved markdown to {md_path}") + L.INFO(f"Saved markdown to {md_path}") return True except Exception as e: - ERR(f"Exception: {e}") + L.ERR(f"Exception: {e}") return False async def autorespond(this_email: IncomingEmail, account: EmailAccount): + L.DEBUG(f"Evaluating {this_email.subject} for autoresponse-worthiness...") matching_profiles = get_matching_autoresponders(this_email, account) - DEBUG(f"Matching profiles: {matching_profiles}") + L.DEBUG(f"Matching profiles: {matching_profiles}") for profile in matching_profiles: - DEBUG(f"Auto-responding to {this_email.subject} with profile: {profile.name}") + L.INFO(f"Generating auto-response to {this_email.subject} with profile: {profile.name}") auto_response_subject = f"Auto-Response Re: {this_email.subject}" auto_response_body = await generate_auto_response_body(this_email, profile, account) - DEBUG(f"Auto-response: {auto_response_body}") + L.DEBUG(f"Auto-response: {auto_response_body}") await send_auto_response(this_email.sender, auto_response_subject, auto_response_body, profile, account) async def send_auto_response(to_email, subject, body, profile, account): - DEBUG(f"Sending auto response to {to_email}...") try: message = MIMEMultipart() message['From'] = account.smtp.username @@ -235,15 +238,16 @@ async def send_auto_response(to_email, subject, body, profile, account): img = MIMEImage(img_file.read(), name=os.path.basename(jpg_path)) message.attach(img) + L.DEBUG(f"Sending auto-response {to_email} concerning {subject} from account {account.name}...") with get_smtp_connection(account) as server: server.login(account.smtp.username, account.smtp.password) server.send_message(message) - INFO(f"Auto-response sent to {to_email} concerning {subject} from account {account.name}") + L.INFO(f"Auto-response sent to {to_email} concerning {subject} from account {account.name}!") return True except Exception as e: - ERR(f"Error in preparing/sending auto-response from account {account.name}: {e}") + L.ERR(f"Error in preparing/sending auto-response from account {account.name}: {e}") return False @@ -261,6 +265,7 @@ async def save_processed_uid(filename: Path, account_name: str, uid: str): async def process_account_summarization(account: EmailAccount): summarized_log = EMAIL_LOGS / "summarized.txt" + while True: try: processed_uids = await load_processed_uids(summarized_log) @@ -283,17 +288,19 @@ async def process_account_summarization(account: EmailAccount): save_success = await save_email(this_email, account) if save_success: await save_processed_uid(summarized_log, account.name, uid_str) - DEBUG(f"Summarized email: {uid_str}") + L.INFO(f"Summarized email: {uid_str}") except Exception as e: - ERR(f"An error occurred during summarization for account {account.name}: {e}") + L.ERR(f"An error occurred during summarization for account {account.name}: {e}") await asyncio.sleep(account.refresh) async def process_account_autoresponding(account: EmailAccount): autoresponded_log = EMAIL_LOGS / "autoresponded.txt" + while True: try: processed_uids = await load_processed_uids(autoresponded_log) + L.DEBUG(f"{len(processed_uids)} already processed emails are being ignored.") with get_imap_connection(account) as inbox: unread_messages = inbox.messages(unread=True) for uid, message in unread_messages: @@ -309,16 +316,18 @@ async def process_account_autoresponding(account: EmailAccount): body=clean_email_content(message.body['html'][0]) if message.body['html'] else clean_email_content(message.body['plain'][0]) or "", attachments=message.attachments ) + L.DEBUG(f"Attempting autoresponse on {this_email.subject}") respond_success = await autorespond(this_email, account) if respond_success: await save_processed_uid(autoresponded_log, account.name, uid_str) - DEBUG(f"Auto-responded to email: {uid_str}") + L.WARN(f"Auto-responded to email: {uid_str}") except Exception as e: - ERR(f"An error occurred during auto-responding for account {account.name}: {e}") + L.ERR(f"An error occurred during auto-responding for account {account.name}: {e}") await asyncio.sleep(account.refresh) async def process_all_accounts(): + email_accounts = load_email_accounts(EMAIL_CONFIG) summarization_tasks = [asyncio.create_task(process_account_summarization(account)) for account in email_accounts] autoresponding_tasks = [asyncio.create_task(process_account_autoresponding(account)) for account in email_accounts] @@ -326,4 +335,5 @@ async def process_all_accounts(): @email.on_event("startup") async def startup_event(): + await asyncio.sleep(5) asyncio.create_task(process_all_accounts()) \ No newline at end of file diff --git a/sijapi/routers/health.py b/sijapi/routers/health.py index f3f1eb9..bb339d0 100644 --- a/sijapi/routers/health.py +++ b/sijapi/routers/health.py @@ -8,8 +8,7 @@ import httpx import socket from fastapi import APIRouter from tailscale import Tailscale -from sijapi import DEBUG, INFO, WARN, ERR, CRITICAL -from sijapi import TS_ID, ROUTERS, SUBNET_BROADCAST +from sijapi import L, TS_ID, ROUTERS, SUBNET_BROADCAST health = APIRouter(tags=["public", "trusted", "private"]) @@ -49,7 +48,7 @@ async def get_wan_ip(): wan_info = response.json() return wan_info.get('ip', 'Unavailable') except Exception as e: - ERR(f"Error fetching WAN IP: {e}") + L.ERR(f"Error fetching WAN IP: {e}") return "Unavailable" @health.get("/ts_ip") diff --git a/sijapi/routers/hooks.py b/sijapi/routers/hooks.py index c64aa42..e69de29 100644 --- a/sijapi/routers/hooks.py +++ b/sijapi/routers/hooks.py @@ -1,358 +0,0 @@ -''' -Webhook module for specific use cases. -Depends on: - LOGGER, LOGS_DIR, TS_ID, CASETABLE_PATH, COURTLISTENER_DOCKETS_URL, COURTLISTENER_API_KEY, COURTLISTENER_BASE_URL, COURTLISTENER_DOCKETS_DIR, COURTLISTENER_SEARCH_DIR, ALERTS_DIR, MAC_UN, MAC_PW, MAC_ID, TS_TAILNET -''' -from fastapi import APIRouter, Request, BackgroundTasks, HTTPException, status -from fastapi.responses import JSONResponse, RedirectResponse -import httpx -import json -from pathlib import Path -import asyncio -from datetime import datetime -import os, io -from PyPDF2 import PdfReader -import aiohttp -import paramiko -import time -import subprocess -from pydantic import BaseModel -from typing import List, Optional -import requests -import base64 -from hashlib import sha256 -# from O365 import Account, FileSystemTokenBackend -from typing import List -from sijapi import DEBUG, INFO, WARN, ERR, CRITICAL -from sijapi import LOGS_DIR, TS_ID, CASETABLE_PATH, COURTLISTENER_DOCKETS_URL, COURTLISTENER_API_KEY, COURTLISTENER_BASE_URL, COURTLISTENER_DOCKETS_DIR, COURTLISTENER_SEARCH_DIR, ALERTS_DIR, MAC_UN, MAC_PW, MAC_ID, TS_TAILNET - -hooks = APIRouter() - -with open(CASETABLE_PATH, 'r') as file: - CASETABLE = json.load(file) - -class WidgetUpdate(BaseModel): - text: Optional[str] = None - progress: Optional[str] = None - icon: Optional[str] = None - color: Optional[str] = None - url: Optional[str] = None - shortcut: Optional[str] = None - graph: Optional[str] = None - - -@hooks.get("/health_check") -def hook_health(): - shellfish_health_check() - -@hooks.post("/update_widget") -def hook_widget_update(update: WidgetUpdate): - shellfish_update_widget(update) - -@hooks.get("/alert") -async def hook_alert(request: Request): - alert = request.query_params.get('alert') - if not alert: - raise HTTPException(status_code=400, detail='No alert provided.') - - return await notify(alert) - -@hooks.post("/alert/cd") -async def hook_changedetection(webhook_data: dict): - body = webhook_data.get("body", {}) - message = body.get("message", "") - - if message and any(word in message.split() for word in ["SPI", "sierra", "pacific"]): - filename = ALERTS_DIR / f"alert_{int(time.time())}.json" - filename.write_text(json.dumps(webhook_data, indent=4)) - - notify(message) - - return {"status": "received"} - - -@hooks.post("/cl/search") -async def hook_cl_search(request: Request, background_tasks: BackgroundTasks): - client_ip = request.client.host - DEBUG(f"Received request from IP: {client_ip}") - data = await request.json() - payload = data['payload'] - results = data['payload']['results'] - - # Save the payload data - timestamp = datetime.now().strftime("%Y%m%d-%H%M%S") - payload_file = LOGS_DIR / f"{timestamp}-{client_ip}_search.json" - with open(payload_file, 'w') as file: - json.dump(payload, file, indent=2) - - for result in results: - background_tasks.add_task(cl_search_process_result, result) - return JSONResponse(content={"message": "Received"}, status_code=status.HTTP_200_OK) - -@hooks.post("/cl/docket") -async def hook_cl_docket(request: Request): - client_ip = request.client.host - DEBUG(f"Received request from IP: {client_ip}") - data = await request.json() - await cl_docket(data, client_ip) - -async def notify(alert: str): - try: - await notify_shellfish(alert) - - if TS_ID == MAC_ID: - await notify_local(alert) - else: - await notify_remote(f"{MAC_ID}.{TS_TAILNET}.net", alert, MAC_UN, MAC_PW) - - except Exception as e: - raise HTTPException(status_code=500, detail=f"Failed to send alert: {str(e)}") - - return {"message": alert} - -async def notify_local(message: str): - await asyncio.to_thread(os.system, f'osascript -e \'display notification "{message}" with title "Notification Title"\'') - - -# Asynchronous remote notification using paramiko SSH -async def notify_remote(host: str, message: str, username: str = None, password: str = None, key_filename: str = None): - ssh = paramiko.SSHClient() - ssh.set_missing_host_key_policy(paramiko.AutoAddPolicy()) - - connect_kwargs = {'hostname': host, 'username': username} - if key_filename: - connect_kwargs['key_filename'] = key_filename - else: - connect_kwargs['password'] = password - - await asyncio.to_thread(ssh.connect, **connect_kwargs) - await asyncio.to_thread(ssh.exec_command, f'osascript -e \'display notification "{message}" with title "Notification Title"\'') - ssh.close() - - - -async def notify_shellfish(alert: str): - key = "d7e810e7601cd296a05776c169b4fe97a6a5ee1fd46abe38de54f415732b3f4b" - user = "WuqPwm1VpGijF4U5AnIKzqNMVWGioANTRjJoonPm" - iv = "ab5bbeb426015da7eedcee8bee3dffb7" - - plain = "Secure ShellFish Notify 2.0\n" + base64.b64encode(alert.encode()).decode() + "\n" - - openssl_command = [ - "openssl", "enc", "-aes-256-cbc", "-base64", "-K", key, "-iv", iv - ] - - process = await asyncio.to_thread(subprocess.Popen, openssl_command, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE) - stdout, stderr = await asyncio.to_thread(process.communicate, plain.encode()) - - if process.returncode != 0: - raise Exception(f"OpenSSL encryption failed: {stderr.decode()}") - - base64_encoded = stdout.decode().strip() - - url = f"https://secureshellfish.app/push/?user={user}&mutable" - headers = {"Content-Type": "text/plain"} - async with aiohttp.ClientSession() as session: - async with session.post(url, headers=headers, data=base64_encoded) as response: - if response.status != 200: - raise Exception(f"Failed to send notification: {response.status_code}") - -## SHELLFISH ## -def shellfish_health_check(): - addresses = [ - "https://api.sij.ai/health", - "http://100.64.64.20:4444/health", - "http://100.64.64.30:4444/health", - "http://100.64.64.11:4444/health", - "http://100.64.64.15:4444/health" - ] - - results = [] - up_count = 0 - for address in addresses: - try: - response = requests.get(address) - if response.status_code == 200: - results.append(f"{address} is up") - up_count += 1 - else: - results.append(f"{address} returned status code {response.status_code}") - except requests.exceptions.RequestException: - results.append(f"{address} is down") - - # Generate a simple text-based graph - graph = '|' * up_count + '.' * (len(addresses) - up_count) - text_update = "\n".join(results) - - widget_command = ["widget", "--text", text_update, "--text", f"Graph: {graph}", "--icon", "network"] - output = shellfish_run_widget_command(widget_command) - return {"output": output, "graph": graph} - - -def shellfish_update_widget(update: WidgetUpdate): - widget_command = ["widget"] - - if update.text: - widget_command.extend(["--text", update.text]) - if update.progress: - widget_command.extend(["--progress", update.progress]) - if update.icon: - widget_command.extend(["--icon", update.icon]) - if update.color: - widget_command.extend(["--color", update.color]) - if update.url: - widget_command.extend(["--url", update.url]) - if update.shortcut: - widget_command.extend(["--shortcut", update.shortcut]) - if update.graph: - widget_command.extend(["--text", update.graph]) - - output = shellfish_run_widget_command(widget_command) - return {"output": output} - - -def shellfish_run_widget_command(args: List[str]): - result = subprocess.run(args, capture_output=True, text=True, shell=True) - if result.returncode != 0: - raise HTTPException(status_code=500, detail=result.stderr) - return result.stdout - - -### COURTLISTENER FUNCTIONS ### -async def cl_docket(data, client_ip, background_tasks: BackgroundTasks): - payload = data['payload'] - results = data['payload']['results'] - timestamp = datetime.now().strftime("%Y%m%d-%H%M%S") - payload_file = LOGS_DIR / f"{timestamp}-{client_ip}_docket.json" - with open(payload_file, 'w') as file: - json.dump(payload, file, indent=2) - - for result in results: - background_tasks.add_task(cl_docket_process, result) - return JSONResponse(content={"message": "Received"}, status_code=status.HTTP_200_OK) - -async def cl_docket_process(result): - async with httpx.AsyncClient() as session: - await cl_docket_process_result(result, session) - -async def cl_docket_process_result(result, session): - docket = str(result.get('docket')) - case_code, case_shortname = cl_case_details(docket) - date_filed = result.get('date_filed', 'No Date Filed') - - try: - date_filed_formatted = datetime.strptime(date_filed, '%Y-%m-%d').strftime('%Y%m%d') - except ValueError: - date_filed_formatted = 'NoDateFiled' - - # Fetching court docket information from the API - url = f"{COURTLISTENER_DOCKETS_URL}?id={docket}" - headers = {'Authorization': f'Token {COURTLISTENER_API_KEY}'} - async with aiohttp.ClientSession() as session: - async with session.get(url, headers=headers) as response: - if response.status == 200: - DEBUG(f"Fetching CourtListener docket information for {docket}...") - data = await response.json() - court_docket = data['results'][0]['docket_number_core'] - court_docket = f"{court_docket[:2]}-cv-{court_docket[2:]}" # Formatting the docket number - case_name = data['results'][0]['case_name'] - DEBUG(f"Obtained from CourtListener: docket {court_docket}, case name {case_name}.") - else: - DEBUG("Failed to fetch data from CourtListener API.") - court_docket = 'NoCourtDocket' - case_name = 'NoCaseName' - - for document in result.get('recap_documents', []): - filepath_ia = document.get('filepath_ia') - filepath_local = document.get('filepath_local') - - if filepath_ia: - file_url = filepath_ia - DEBUG(f"Found IA file at {file_url}.") - elif filepath_local: - file_url = f"{COURTLISTENER_BASE_URL}/{filepath_local}" - DEBUG(f"Found local file at {file_url}.") - else: - DEBUG(f"No file URL found in filepath_ia or filepath_local for one of the documents.") - continue - - document_number = document.get('document_number', 'NoDocumentNumber') - description = document.get('description', 'NoDescription').replace(" ", "_").replace("/", "_") - description = description[:50] # Truncate description - # case_shortname = case_name # TEMPORARY OVERRIDE - file_name = f"{case_code}_{document_number}_{date_filed_formatted}_{description}.pdf" - target_path = Path(COURTLISTENER_DOCKETS_DIR) / case_shortname / "Docket" / file_name - target_path.parent.mkdir(parents=True, exist_ok=True) - await cl_download_file(file_url, target_path, session) - DEBUG(f"Downloaded {file_name} to {target_path}") - -def cl_case_details(docket): - case_info = CASETABLE.get(str(docket), {"code": "000", "shortname": "UNKNOWN"}) - case_code = case_info.get("code") - short_name = case_info.get("shortname") - return case_code, short_name - -async def cl_download_file(url: str, path: Path, session: aiohttp.ClientSession = None): - headers = { - 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/93.0.4577.82 Safari/537.36' - } - async with aiohttp.ClientSession() as session: - DEBUG(f"Attempting to download {url} to {path}.") - try: - async with session.get(url, headers=headers, allow_redirects=True) as response: - if response.status == 403: - ERR(f"Access denied (403 Forbidden) for URL: {url}. Skipping download.") - return - response.raise_for_status() - - # Check if the response content type is a PDF - content_type = response.headers.get('Content-Type') - if content_type != 'application/pdf': - ERR(f"Invalid content type: {content_type}. Skipping download.") - return - - # Create an in-memory buffer to store the downloaded content - buffer = io.BytesIO() - async for chunk in response.content.iter_chunked(1024): - buffer.write(chunk) - - # Reset the buffer position to the beginning - buffer.seek(0) - - # Validate the downloaded PDF content - try: - PdfReader(buffer) - except Exception as e: - ERR(f"Invalid PDF content: {str(e)}. Skipping download.") - return - - # If the PDF is valid, write the content to the file on disk - path.parent.mkdir(parents=True, exist_ok=True) - with path.open('wb') as file: - file.write(buffer.getvalue()) - - except Exception as e: - ERR(f"Error downloading file: {str(e)}") - - -async def cl_search_process_result(result): - async with httpx.AsyncClient() as session: - download_url = result.get('download_url') - court_id = result.get('court_id') - case_name_short = result.get('caseNameShort') - case_name = result.get('caseName') - DEBUG(f"Received payload for case {case_name} ({court_id}) and download url {download_url}") - - court_folder = court_id - - if case_name_short: - case_folder = case_name_short - else: - case_folder = case_name - - file_name = download_url.split('/')[-1] - target_path = Path(COURTLISTENER_SEARCH_DIR) / court_folder / case_folder / file_name - target_path.parent.mkdir(parents=True, exist_ok=True) - - await cl_download_file(download_url, target_path, session) - DEBUG(f"Downloaded {file_name} to {target_path}") diff --git a/sijapi/routers/ig.py b/sijapi/routers/ig.py index 89e7b3a..ea37daf 100644 --- a/sijapi/routers/ig.py +++ b/sijapi/routers/ig.py @@ -34,7 +34,7 @@ import json from ollama import Client as oLlama from sd import sd from dotenv import load_dotenv -from sijapi import DEBUG, INFO, WARN, ERR, CRITICAL, COMFYUI_DIR +from sijapi import L, COMFYUI_DIR import io from io import BytesIO @@ -186,7 +186,7 @@ def get_user_media(username, amount=30): Fetch recent media for a given username, return List of medias """ - DEBUG(f"Fetching recent media for {username}...") + L.DEBUG(f"Fetching recent media for {username}...") user_id = cl.user_id_from_username(username) medias = cl.user_medias(user_id, amount) final_medias = [] @@ -201,7 +201,7 @@ def get_user_image_urls(username, amount=30) -> List[str]: """ Fetch recent media URLs for a given username, return List of media URLs """ - DEBUG(f"Fetching recent media URLs for {username}...") + L.DEBUG(f"Fetching recent media URLs for {username}...") user_id = cl.user_id_from_username(username) medias = cl.user_medias(user_id, amount) @@ -229,7 +229,7 @@ def get_random_follower(): def get_medias_by_hashtag(hashtag: str, days_ago_max:int = 14, ht_type:str = None, amount:int = args.count): if not ht_type: ht_type = args.commentmode - DEBUG(f"Fetching {ht_type} media for hashtag: {hashtag}") + L.DEBUG(f"Fetching {ht_type} media for hashtag: {hashtag}") ht_medias = [] while True: sleep(SHORT) @@ -239,10 +239,10 @@ def get_medias_by_hashtag(hashtag: str, days_ago_max:int = 14, ht_type:str = Non ht_medias.extend(cl.hashtag_medias_recent(name=hashtag, amount=amount*10)) filtered_medias = filter_medias(ht_medias, days_ago_max=days_ago_max) - DEBUG(f"Filtered {ht_type} media count obtained for '#{hashtag}': {len(filtered_medias)}") + L.DEBUG(f"Filtered {ht_type} media count obtained for '#{hashtag}': {len(filtered_medias)}") if len(filtered_medias) >= amount: - DEBUG(f"Desired amount of {amount} filtered media reached.") + L.DEBUG(f"Desired amount of {amount} filtered media reached.") break return filtered_medias @@ -250,11 +250,11 @@ def get_medias_by_hashtag(hashtag: str, days_ago_max:int = 14, ht_type:str = Non def get_medias_from_all_hashtags(days_ago_max=14, ht_type:str = None, amount:int = args.count): if not ht_type: ht_type = args.commentmode - DEBUG(f"Fetching {ht_type} media.") + L.DEBUG(f"Fetching {ht_type} media.") filtered_medias = [] while len(filtered_medias) < amount: hashtag = random.choice(HASHTAGS) - DEBUG(f"Using hashtag: {hashtag}") + L.DEBUG(f"Using hashtag: {hashtag}") fetched_medias = [] sleep(SHORT) if ht_type == "top": @@ -264,15 +264,15 @@ def get_medias_from_all_hashtags(days_ago_max=14, ht_type:str = None, amount:int current_filtered_medias = filter_medias(fetched_medias, days_ago_max=days_ago_max) filtered_medias.extend(current_filtered_medias) - DEBUG(f"Filtered {ht_type} media count obtained for '#{hashtag}': {len(current_filtered_medias)}") + L.DEBUG(f"Filtered {ht_type} media count obtained for '#{hashtag}': {len(current_filtered_medias)}") # Trim the list if we've collected more than needed if len(filtered_medias) > amount: filtered_medias = filtered_medias[:amount] - DEBUG(f"Desired amount of {amount} filtered media reached.") + L.DEBUG(f"Desired amount of {amount} filtered media reached.") break else: - DEBUG(f"Total filtered media count so far: {len(filtered_medias)}") + L.DEBUG(f"Total filtered media count so far: {len(filtered_medias)}") return filtered_medias @@ -345,7 +345,7 @@ def download_and_resize_image(url: str, download_path: str = None, max_dimension return download_path except Exception as e: # Handle or log the error as needed - DEBUG(f"Error downloading or resizing image: {e}") + L.DEBUG(f"Error downloading or resizing image: {e}") return None @@ -365,17 +365,17 @@ def comment_on_user_media(user: str, comment_type: str = "default", amount=5): comment_text = llava(encoded_media, COMMENT_PROMPT_SYS, comment_prompt_usr) if args.llava or not args.openai else gpt4v(encoded_media, COMMENT_PROMPT_SYS, comment_prompt_usr) if comment_text: cl.media_comment(media.pk, comment_text) - DEBUG(f"Commented on media: {media.pk}") + L.DEBUG(f"Commented on media: {media.pk}") else: - DEBUG(f"Failed to generate comment for media: {media.pk}") + L.DEBUG(f"Failed to generate comment for media: {media.pk}") add_media_to_completed_lists(media) sleep(SHORT) else: - DEBUG(f"We received a nonetype! {media_path}") + L.DEBUG(f"We received a nonetype! {media_path}") else: - DEBUG(f"URL for {media.pk} disappeared it seems...") + L.DEBUG(f"URL for {media.pk} disappeared it seems...") else: - DEBUG(f"Media already interacted with: {media.pk}") + L.DEBUG(f"Media already interacted with: {media.pk}") def comment_on_hashtagged_media(comment_type: str = args.commenttype, amount=3, hashtag: str = None): """ @@ -400,22 +400,22 @@ def comment_on_hashtagged_media(comment_type: str = args.commenttype, amount=3, try: like_result = cl.media_like(media) if like_result: - DEBUG(f"Liked media: https://instagram.com/p/{media.pk}/") + L.DEBUG(f"Liked media: https://instagram.com/p/{media.pk}/") except instagrapi.exceptions.FeedbackRequired as e: - DEBUG(f"Cannot like media {media.pk}: {str(e)}") + L.DEBUG(f"Cannot like media {media.pk}: {str(e)}") if comment_text: try: cl.media_comment(media.pk, comment_text) - DEBUG(f"Commented on media: https://instagram.com/p/{media.pk}/") + L.DEBUG(f"Commented on media: https://instagram.com/p/{media.pk}/") except instagrapi.exceptions.FeedbackRequired as e: - DEBUG(f"Cannot comment on media {media.pk}: {str(e)}") + L.DEBUG(f"Cannot comment on media {media.pk}: {str(e)}") else: - DEBUG(f"Failed to generate comment for media: https://instagram.com/p/{media.pk}") + L.DEBUG(f"Failed to generate comment for media: https://instagram.com/p/{media.pk}") add_media_to_completed_lists(media) sleep(SHORT) else: - DEBUG(f"Media already interacted with: {media.pk}") + L.DEBUG(f"Media already interacted with: {media.pk}") def comment_on_specific_media(media_url, comment_type: str = "default"): @@ -436,11 +436,11 @@ def comment_on_specific_media(media_url, comment_type: str = "default"): if comment_text: try: cl.media_comment(media.pk, comment_text) - DEBUG(f"Commented on specific media: https://instagram.com/p/{media.pk}/") + L.DEBUG(f"Commented on specific media: https://instagram.com/p/{media.pk}/") except instagrapi.exceptions.FeedbackRequired as e: - DEBUG(f"Failed to comment on specific media: https://instagram.com/p/{media.pk}/ due to error: {str(e)}") + L.DEBUG(f"Failed to comment on specific media: https://instagram.com/p/{media.pk}/ due to error: {str(e)}") else: - DEBUG(f"Failed to generate comment for specific media: https://instagram.com/p/{media.pk}/") + L.DEBUG(f"Failed to generate comment for specific media: https://instagram.com/p/{media.pk}/") @@ -485,16 +485,16 @@ def update_prompt(workflow: dict, post: dict, positive: str, found_key=[None], p if value == "API_PPrompt": workflow[key] = post.get(value, "") + positive - DEBUG(f"Updated API_PPrompt to: {workflow[key]}") + L.DEBUG(f"Updated API_PPrompt to: {workflow[key]}") elif value == "API_SPrompt": workflow[key] = post.get(value, "") - DEBUG(f"Updated API_SPrompt to: {workflow[key]}") + L.DEBUG(f"Updated API_SPrompt to: {workflow[key]}") elif value == "API_NPrompt": workflow[key] = post.get(value, "") - DEBUG(f"Updated API_NPrompt to: {workflow[key]}") + L.DEBUG(f"Updated API_NPrompt to: {workflow[key]}") elif key == "seed" or key == "noise_seed": workflow[key] = random.randint(1000000000000, 9999999999999) - DEBUG(f"Updated seed to: {workflow[key]}") + L.DEBUG(f"Updated seed to: {workflow[key]}") elif (key == "width" or key == "max_width" or key == "scaled_width" or key == "side_length") and (value == 1023 or value == 1025): # workflow[key] = post.get(value, "") workflow[key] = post.get("width", 1024) @@ -502,7 +502,7 @@ def update_prompt(workflow: dict, post: dict, positive: str, found_key=[None], p # workflow[key] = post.get(value, "") workflow[key] = post.get("height", 1024) except Exception as e: - DEBUG(f"Error in update_prompt at path {' -> '.join(path)}: {e}") + L.DEBUG(f"Error in update_prompt at path {' -> '.join(path)}: {e}") raise return found_key[0] @@ -527,22 +527,22 @@ def update_prompt_custom(workflow: dict, API_PPrompt: str, API_SPrompt: str, API if value == "API_PPrompt": workflow[key] = API_PPrompt - DEBUG(f"Updated API_PPrompt to: {workflow[key]}") + L.DEBUG(f"Updated API_PPrompt to: {workflow[key]}") elif value == "API_SPrompt": workflow[key] = API_SPrompt - DEBUG(f"Updated API_SPrompt to: {workflow[key]}") + L.DEBUG(f"Updated API_SPrompt to: {workflow[key]}") elif value == "API_NPrompt": workflow[key] = API_NPrompt - DEBUG(f"Updated API_NPrompt to: {workflow[key]}") + L.DEBUG(f"Updated API_NPrompt to: {workflow[key]}") elif key == "seed" or key == "noise_seed": workflow[key] = random.randint(1000000000000, 9999999999999) - DEBUG(f"Updated seed to: {workflow[key]}") + L.DEBUG(f"Updated seed to: {workflow[key]}") elif (key == "width" or key == "max_width" or key == "scaled_width") and (value == 1023 or value == 1025): workflow[key] = 1024 elif (key == "dimension" or key == "height" or key == "max_height" or key == "scaled_height") and (value == 1023 or value == 1025): workflow[key] = 1024 except Exception as e: - DEBUG(f"Error in update_prompt_custom at path {' -> '.join(path)}: {e}") + L.DEBUG(f"Error in update_prompt_custom at path {' -> '.join(path)}: {e}") raise return found_key[0] @@ -582,14 +582,14 @@ def poll_status(prompt_id): elapsed_time = int(time.time() - start_time) # Calculate elapsed time in seconds status_response = requests.get(f"{COMFYUI_URL}/history/{prompt_id}") # Use \r to return to the start of the line, and end='' to prevent newline - DEBUG(f"\rGenerating {prompt_id}. Elapsed time: {elapsed_time} seconds", end='') + L.DEBUG(f"\rGenerating {prompt_id}. Elapsed time: {elapsed_time} seconds", end='') if status_response.status_code != 200: raise Exception("Failed to get job status") status_data = status_response.json() job_data = status_data.get(prompt_id, {}) if job_data.get("status", {}).get("completed", False): - DEBUG() - DEBUG(f"{prompt_id} completed in {elapsed_time} seconds.") + L.DEBUG() + L.DEBUG(f"{prompt_id} completed in {elapsed_time} seconds.") return job_data time.sleep(1) @@ -600,14 +600,14 @@ def poll_status(prompt_id): elapsed_time = int(time.time() - start_time) # Calculate elapsed time in seconds status_response = requests.get(f"{COMFYUI_URL}/history/{prompt_id}") # Use \r to return to the start of the line, and end='' to prevent newline - DEBUG(f"\rGenerating {prompt_id}. Elapsed time: {elapsed_time} seconds", end='') + L.DEBUG(f"\rGenerating {prompt_id}. Elapsed time: {elapsed_time} seconds", end='') if status_response.status_code != 200: raise Exception("Failed to get job status") status_data = status_response.json() job_data = status_data.get(prompt_id, {}) if job_data.get("status", {}).get("completed", False): - DEBUG() - DEBUG(f"{prompt_id} completed in {elapsed_time} seconds.") + L.DEBUG() + L.DEBUG(f"{prompt_id} completed in {elapsed_time} seconds.") return job_data time.sleep(1) @@ -618,12 +618,12 @@ def poll_status(prompt_id): def load_post(chosen_post: str = "default"): if chosen_post in PROFILE_CONFIG['posts']: post = PROFILE_CONFIG['posts'][chosen_post] - DEBUG(f"Loaded post for {chosen_post}") + L.DEBUG(f"Loaded post for {chosen_post}") else: - DEBUG(f"Unable to load post for {chosen_post}. Choosing a default post.") + L.DEBUG(f"Unable to load post for {chosen_post}. Choosing a default post.") chosen_post = choose_post(PROFILE_CONFIG['posts']) post = PROFILE_CONFIG['posts'][chosen_post] - DEBUG(f"Defaulted to {chosen_post}") + L.DEBUG(f"Defaulted to {chosen_post}") return post @@ -639,12 +639,12 @@ def handle_image_workflow(chosen_post=None): workflow_name = args.workflow if args.workflow else random.choice(post['workflows']) - DEBUG(f"Workflow name: {workflow_name}") + L.DEBUG(f"Workflow name: {workflow_name}") - DEBUG(f"Generating image concept for {chosen_post} and {workflow_name} now.") + L.DEBUG(f"Generating image concept for {chosen_post} and {workflow_name} now.") image_concept = query_ollama(llmPrompt = post['llmPrompt'], max_tokens = 180) if args.local or not args.openai else query_gpt4(llmPrompt = post['llmPrompt'], max_tokens = 180) - DEBUG(f"Image concept for {chosen_post}: {image_concept}") + L.DEBUG(f"Image concept for {chosen_post}: {image_concept}") workflow_data = None @@ -658,9 +658,9 @@ def handle_image_workflow(chosen_post=None): jpg_file_path = image_gen(image_concept, "dall-e-3") else: saved_file_key = update_prompt(workflow=workflow_data, post=post, positive=image_concept) - DEBUG(f"Saved file key: {saved_file_key}") + L.DEBUG(f"Saved file key: {saved_file_key}") prompt_id = queue_prompt(workflow_data) - DEBUG(f"Prompt ID: {prompt_id}") + L.DEBUG(f"Prompt ID: {prompt_id}") status_data = poll_status(prompt_id) image_data = get_image(status_data, saved_file_key) if chosen_post == "landscape": @@ -699,17 +699,17 @@ def handle_custom_image(custom_post: str): system_msg = "You are a helpful AI who assists in generating prompts that will be used to generate highly realistic images. Always use the most visually descriptive terms possible, and avoid any vague or abstract concepts. Do not include any words or descriptions based on other senses or emotions. Strive to show rather than tell. Space is limited, so be efficient with your words." image_concept = query_ollama(system_msg=system_msg, user_msg=custom_post, max_tokens = 180) if args.local or not args.openai else query_gpt4(system_msg=system_msg, user_msg=custom_post, max_tokens = 180) - DEBUG(f"Image concept: {image_concept}") + L.DEBUG(f"Image concept: {image_concept}") if args.dalle and not args.local: jpg_file_path = image_gen(image_concept, "dall-e-3") else: saved_file_key = update_prompt(workflow=workflow_data, post=post, positive=image_concept) - DEBUG(f"Saved file key: {saved_file_key}") + L.DEBUG(f"Saved file key: {saved_file_key}") prompt_id = queue_prompt(workflow_data) - DEBUG(f"Prompt ID: {prompt_id}") + L.DEBUG(f"Prompt ID: {prompt_id}") status_data = poll_status(prompt_id) image_data = get_image(status_data, saved_file_key) @@ -728,7 +728,7 @@ def image_aftergen(jpg_file_path: str, chosen_post: str = None, post: Dict = Non if chosen_post and not prompt: prompt = PROFILE_CONFIG['posts'][chosen_post]['Vision_Prompt'] encoded_string = encode_image_to_base64(jpg_file_path) - DEBUG(f"Image successfully encoded from {jpg_file_path}") + L.DEBUG(f"Image successfully encoded from {jpg_file_path}") instagram_description = llava(encoded_string, prompt) if args.local or args.llava or not args.openai else gpt4v(encoded_string, prompt, 150) instagram_description = re.sub(r'^["\'](.*)["\']$', r'\1', instagram_description) @@ -759,24 +759,24 @@ Tags: {', '.join(ghost_tags)} with open(markdown_filename, "w") as md_file: md_file.write(markdown_content) - DEBUG(f"Markdown file created at {markdown_filename}") + L.DEBUG(f"Markdown file created at {markdown_filename}") if args.wallpaper: change_wallpaper(jpg_file_path) - DEBUG(f"Wallpaper changed.") + L.DEBUG(f"Wallpaper changed.") if not args.local: ig_footer = "" if not args.noig: post_url = upload_photo(jpg_file_path, instagram_description) - DEBUG(f"Image posted at {post_url}") + L.DEBUG(f"Image posted at {post_url}") ig_footer = f"\nInstagram link" if not args.noghost: ghost_text = f"{instagram_description}" ghost_url = post_to_ghost(img_title, jpg_file_path, ghost_text, ghost_tags) - DEBUG(f"Ghost post: {ghost_url}\n{ig_footer}") + L.DEBUG(f"Ghost post: {ghost_url}\n{ig_footer}") def choose_post(posts): @@ -836,12 +836,12 @@ def save_as_jpg(image_data, prompt_id, chosen_post:str = None, max_size=2160, qu return new_file_path except Exception as e: - DEBUG(f"Error processing image: {e}") + L.DEBUG(f"Error processing image: {e}") return None def upload_photo(path, caption, title: str=None): - DEBUG(f"Uploading photo from {path}...") + L.DEBUG(f"Uploading photo from {path}...") media = cl.photo_upload(path, caption) post_url = f"https://www.instagram.com/p/{media.code}/" return post_url @@ -958,16 +958,16 @@ async def ig_flow_endpoint(new_session: bool = False): time_remaining = 30 - (time_since_rollover % 30) if time_remaining < 4: - DEBUG("Too close to end of TOTP counter. Waiting.") + L.DEBUG("Too close to end of TOTP counter. Waiting.") sleepupto(5, 5) if not new_session and os.path.exists(IG_SESSION_PATH): cl.load_settings(IG_SESSION_PATH) - DEBUG("Loaded past session.") + L.DEBUG("Loaded past session.") elif new_session and cl.login(IG_USERNAME, IG_PASSWORD, verification_code=TOTP.now()): cl.dump_settings(IG_SESSION_PATH) - DEBUG("Logged in and saved new session.") + L.DEBUG("Logged in and saved new session.") else: raise Exception(f"Failed to login as {IG_USERNAME}.") diff --git a/sijapi/routers/llm.py b/sijapi/routers/llm.py index 7338c49..87c8417 100644 --- a/sijapi/routers/llm.py +++ b/sijapi/routers/llm.py @@ -1,4 +1,6 @@ -#routers/llm.py +''' +Interfaces with Ollama and creates an OpenAI-compatible relay API. +''' from fastapi import APIRouter, HTTPException, Request, Response, BackgroundTasks, File, Form, UploadFile from fastapi.responses import StreamingResponse, JSONResponse, FileResponse from datetime import datetime as dt_datetime @@ -22,7 +24,7 @@ import tempfile import shutil import html2text import markdown -from sijapi import LLM_SYS_MSG, DEFAULT_LLM, DEFAULT_VISION, REQUESTS_DIR, OBSIDIAN_CHROMADB_COLLECTION, OBSIDIAN_VAULT_DIR, DOC_DIR, OPENAI_API_KEY, DEBUG, INFO, WARN, ERR, CRITICAL, DEFAULT_VOICE, SUMMARY_INSTRUCT, SUMMARY_CHUNK_SIZE, SUMMARY_TPW, SUMMARY_CHUNK_OVERLAP, SUMMARY_LENGTH_RATIO, SUMMARY_TOKEN_LIMIT, SUMMARY_MIN_LENGTH, SUMMARY_MODEL +from sijapi import L, LLM_SYS_MSG, DEFAULT_LLM, DEFAULT_VISION, REQUESTS_DIR, OBSIDIAN_CHROMADB_COLLECTION, OBSIDIAN_VAULT_DIR, DOC_DIR, OPENAI_API_KEY, DEFAULT_VOICE, SUMMARY_INSTRUCT, SUMMARY_CHUNK_SIZE, SUMMARY_TPW, SUMMARY_CHUNK_OVERLAP, SUMMARY_LENGTH_RATIO, SUMMARY_TOKEN_LIMIT, SUMMARY_MIN_LENGTH, SUMMARY_MODEL from sijapi.utilities import convert_to_unix_time, sanitize_filename, ocr_pdf, clean_text, should_use_ocr, extract_text_from_pdf, extract_text_from_docx, read_text_file, str_to_bool, get_extension from sijapi.routers.tts import generate_speech from sijapi.routers.asr import transcribe_audio @@ -84,13 +86,13 @@ async def query_ollama(usr: str, sys: str = LLM_SYS_MSG, model: str = DEFAULT_LL LLM = Ollama() response = await LLM.chat(model=model, messages=messages, options={"num_predict": max_tokens}) - DEBUG(response) + L.DEBUG(response) if "message" in response: if "content" in response["message"]: content = response["message"]["content"] return content else: - DEBUG("No choices found in response") + L.DEBUG("No choices found in response") return None def is_vision_request(content): @@ -111,21 +113,21 @@ async def chat_completions(request: Request): raise HTTPException(status_code=400, detail="Message data is required in the request body.") requested_model = body.get('model', 'default-model') - DEBUG(f"Requested model: {requested_model}") + L.DEBUG(f"Requested model: {requested_model}") stream = body.get('stream') token_limit = body.get('max_tokens') or body.get('num_predict') # Check if the most recent message contains an image_url recent_message = messages[-1] if recent_message.get('role') == 'user' and is_vision_request(recent_message.get('content')): - DEBUG("Processing as a vision request") + L.DEBUG("Processing as a vision request") model = "llava" - DEBUG(f"Using model: {model}") + L.DEBUG(f"Using model: {model}") return StreamingResponse(stream_messages_with_vision(recent_message, model, token_limit), media_type="application/json") else: - DEBUG("Processing as a standard request") + L.DEBUG("Processing as a standard request") model = requested_model - DEBUG(f"Using model: {model}") + L.DEBUG(f"Using model: {model}") if stream: return StreamingResponse(stream_messages(messages, model, token_limit), media_type="application/json") else: @@ -250,17 +252,17 @@ async def generate_messages(messages: list, model: str = "llama3"): def is_model_available(model_name): model_data = OllamaList() available_models = [model['name'] for model in model_data['models']] - DEBUG(f"Available models: {available_models}") # Log using the configured LOGGER + L.DEBUG(f"Available models: {available_models}") # Log using the configured LOGGER matching_models = [model for model in available_models if model.startswith(model_name + ':') or model == model_name] if len(matching_models) == 1: - DEBUG(f"Unique match found: {matching_models[0]}") + L.DEBUG(f"Unique match found: {matching_models[0]}") return True elif len(matching_models) > 1: - ERR(f"Ambiguous match found, models: {matching_models}") + L.ERR(f"Ambiguous match found, models: {matching_models}") return True else: - ERR(f"No match found for model: {model_name}") + L.ERR(f"No match found for model: {model_name}") return False @@ -383,12 +385,12 @@ def query_gpt4(llmPrompt: List = [], system_msg: str = "", user_msg: str = "", m if hasattr(first_choice, "message") and hasattr(first_choice.message, "content"): return first_choice.message.content else: - DEBUG("No content attribute in the first choice's message") - DEBUG(f"No content found in message string: {response.choices}") - DEBUG("Trying again!") + L.DEBUG("No content attribute in the first choice's message") + L.DEBUG(f"No content found in message string: {response.choices}") + L.DEBUG("Trying again!") query_gpt4(messages, max_tokens) else: - DEBUG(f"No content found in message string: {response}") + L.DEBUG(f"No content found in message string: {response}") return "" def llava(image_base64, prompt): @@ -398,7 +400,7 @@ def llava(image_base64, prompt): prompt = f"This is a chat between a user and an assistant. The assistant is helping the user to describe an image. {prompt}", images = [image_base64] ) - DEBUG(response) + L.DEBUG(response) return "" if "pass" in response["response"].lower() else response["response"] def gpt4v(image_base64, prompt_sys: str, prompt_usr: str, max_tokens: int = 150): @@ -429,7 +431,7 @@ def gpt4v(image_base64, prompt_sys: str, prompt_usr: str, max_tokens: int = 150) comment_content = first_choice.message.content if "PASS" in comment_content: return "" - DEBUG(f"Generated comment: {comment_content}") + L.DEBUG(f"Generated comment: {comment_content}") response_2 = VISION_LLM.chat.completions.create( model="gpt-4-vision-preview", @@ -467,15 +469,15 @@ def gpt4v(image_base64, prompt_sys: str, prompt_usr: str, max_tokens: int = 150) first_choice = response_2.choices[0] if first_choice.message and first_choice.message.content: final_content = first_choice.message.content - DEBUG(f"Generated comment: {final_content}") + L.DEBUG(f"Generated comment: {final_content}") if "PASS" in final_content: return "" else: return final_content - DEBUG("Vision response did not contain expected data.") - DEBUG(f"Vision response: {response_1}") + L.DEBUG("Vision response did not contain expected data.") + L.DEBUG(f"Vision response: {response_1}") asyncio.sleep(15) try_again = gpt4v(image_base64, prompt_sys, prompt_usr, max_tokens) @@ -520,7 +522,7 @@ async def summarize_tts( background_tasks = BackgroundTasks() final_output_path = await generate_speech(background_tasks, summarized_text, voice, "xtts", speed=speed, podcast=podcast, title=filename) - DEBUG(f"summary_tts completed with final_output_path: {final_output_path}") + L.DEBUG(f"summary_tts completed with final_output_path: {final_output_path}") return final_output_path @@ -539,7 +541,7 @@ def split_text_into_chunks(text: str) -> List[str]: adjusted_overlap = max(0, int(SUMMARY_CHUNK_OVERLAP / SUMMARY_TPW)) # Ensure non-negative chunks = [] for i in range(0, len(words), adjusted_chunk_size - adjusted_overlap): - DEBUG(f"We are on iteration # {i} if split_text_into_chunks.") + L.DEBUG(f"We are on iteration # {i} if split_text_into_chunks.") chunk = ' '.join(words[i:i + adjusted_chunk_size]) chunks.append(chunk) return chunks @@ -609,7 +611,7 @@ async def summarize_text(text: str, instruction: str = SUMMARY_INSTRUCT, length_ corrected_total_summary_length = min(total_summary_length, SUMMARY_TOKEN_LIMIT) individual_summary_length = max(1, corrected_total_summary_length // total_parts) # Ensure at least 1 - DEBUG(f"Text split into {total_parts} chunks.") + L.DEBUG(f"Text split into {total_parts} chunks.") summaries = await asyncio.gather(*[ process_chunk(instruction, chunk, i+1, total_parts, individual_summary_length, LLM) for i, chunk in enumerate(chunked_text) ]) @@ -635,14 +637,14 @@ async def process_chunk(instruction: str, text: str, part: int, total_parts: int max_tokens = min(fraction_tokens, SUMMARY_CHUNK_SIZE // max(1, total_parts)) # Ensure at least 1 max_tokens = max(max_tokens, SUMMARY_MIN_LENGTH) # Ensure a minimum token count to avoid tiny processing chunks - DEBUG(f"Summarizing part {part} of {total_parts}: Max_tokens: {max_tokens}") + L.DEBUG(f"Summarizing part {part} of {total_parts}: Max_tokens: {max_tokens}") if part and total_parts > 1: prompt = f"{instruction}. Part {part} of {total_parts}:\n{text}" else: prompt = f"{instruction}:\n\n{text}" - DEBUG(f"Starting LLM.generate for part {part} of {total_parts}") + L.DEBUG(f"Starting LLM.generate for part {part} of {total_parts}") response = await LLM.generate( model=SUMMARY_MODEL, prompt=prompt, @@ -651,7 +653,7 @@ async def process_chunk(instruction: str, text: str, part: int, total_parts: int ) text_response = response['response'] - DEBUG(f"Completed LLM.generate for part {part} of {total_parts}") + L.DEBUG(f"Completed LLM.generate for part {part} of {total_parts}") return text_response diff --git a/sijapi/routers/locate.py b/sijapi/routers/locate.py index bb3ad8c..8bcb2ad 100644 --- a/sijapi/routers/locate.py +++ b/sijapi/routers/locate.py @@ -1,3 +1,6 @@ +''' +Uses Postgres/PostGIS for for location tracking (data obtained via the companion mobile Pythonista scripts), and for geocoding purposes. +''' from fastapi import APIRouter, HTTPException, Query from fastapi.responses import HTMLResponse, JSONResponse import requests @@ -17,8 +20,7 @@ from pathlib import Path from pydantic import BaseModel from typing import Optional, Any, Dict, List, Union from datetime import datetime, timedelta, time -from sijapi import NAMED_LOCATIONS, TZ, DynamicTZ -from sijapi import DEBUG, INFO, WARN, ERR, CRITICAL, DB +from sijapi import L, DB, TZ, NAMED_LOCATIONS, DynamicTZ from sijapi.classes import Location from sijapi.utilities import haversine # from osgeo import gdal @@ -29,7 +31,7 @@ locate = APIRouter() async def reverse_geocode(latitude: float, longitude: float) -> Optional[Location]: url = f"https://nominatim.openstreetmap.org/reverse?format=json&lat={latitude}&lon={longitude}" - INFO(f"Calling Nominatim API at {url}") + L.INFO(f"Calling Nominatim API at {url}") headers = { 'User-Agent': 'sij.law/1.0 (sij@sij.law)', # replace with your app name and email } @@ -64,10 +66,10 @@ async def reverse_geocode(latitude: float, longitude: float) -> Optional[Locatio county=address.get("county"), country_code=address.get("country_code") ) - INFO(f"Created Location object: {location}") + L.INFO(f"Created Location object: {location}") return location except aiohttp.ClientError as e: - ERR(f"Error: {e}") + L.ERR(f"Error: {e}") return None @@ -75,7 +77,7 @@ async def reverse_geocode(latitude: float, longitude: float) -> Optional[Locatio ## NOT YET IMPLEMENTED async def geocode(zip_code: Optional[str] = None, latitude: Optional[float] = None, longitude: Optional[float] = None, city: Optional[str] = None, state: Optional[str] = None, country_code: str = 'US') -> Location: if (latitude is None or longitude is None) and (zip_code is None) and (city is None or state is None): - ERR(f"Must provide sufficient information for geocoding!") + L.ERR(f"Must provide sufficient information for geocoding!") return None try: @@ -105,7 +107,7 @@ async def geocode(zip_code: Optional[str] = None, latitude: Optional[float] = No query += " ORDER BY distance LIMIT 1;" - DEBUG(f"Executing query: {query} with params: {params}") + L.DEBUG(f"Executing query: {query} with params: {params}") # Execute the query with the provided parameters result = await conn.fetchrow(query, *params) @@ -126,14 +128,14 @@ async def geocode(zip_code: Optional[str] = None, latitude: Optional[float] = No elevation=result.get('elevation', 0), distance=result.get('distance') ) - DEBUG(f"Found location: {location_info}") + L.DEBUG(f"Found location: {location_info}") return location_info else: - DEBUG("No location found with provided parameters.") + L.DEBUG("No location found with provided parameters.") return Location() except Exception as e: - ERR(f"Error occurred: {e}") + L.ERR(f"Error occurred: {e}") raise Exception("An error occurred while processing your request") @@ -149,22 +151,22 @@ async def localize_datetime(dt, fetch_loc: bool = False): try: if isinstance(dt, str): dt = dateutil_parse(dt) - DEBUG(f"{initial_dt} was a string so we attempted converting to datetime. Result: {dt}") + L.DEBUG(f"{initial_dt} was a string so we attempted converting to datetime. Result: {dt}") if isinstance(dt, datetime): - DEBUG(f"{dt} is a datetime object, so we will ensure it is tz-aware.") + L.DEBUG(f"{dt} is a datetime object, so we will ensure it is tz-aware.") if dt.tzinfo is None: dt = dt.replace(tzinfo=TZ) - # DEBUG(f"{dt} should now be tz-aware. Returning it now.") + # L.DEBUG(f"{dt} should now be tz-aware. Returning it now.") return dt else: - # DEBUG(f"{dt} already was tz-aware. Returning it now.") + # L.DEBUG(f"{dt} already was tz-aware. Returning it now.") return dt else: - ERR(f"Conversion failed") + L.ERR(f"Conversion failed") raise TypeError("Conversion failed") except Exception as e: - ERR(f"Error parsing datetime: {e}") + L.ERR(f"Error parsing datetime: {e}") raise TypeError("Input must be a string or datetime object") @@ -220,7 +222,7 @@ def get_elevation(latitude, longitude): return None except requests.exceptions.RequestException as e: - ERR(f"Error: {e}") + L.ERR(f"Error: {e}") return None @@ -235,7 +237,7 @@ async def fetch_locations(start: datetime, end: datetime = None) -> List[Locatio if start_datetime.time() == datetime.min.time() and end_datetime.time() == datetime.min.time(): end_datetime = end_datetime.replace(hour=23, minute=59, second=59) - DEBUG(f"Fetching locations between {start_datetime} and {end_datetime}") + L.DEBUG(f"Fetching locations between {start_datetime} and {end_datetime}") async with DB.get_connection() as conn: locations = [] @@ -252,7 +254,7 @@ async def fetch_locations(start: datetime, end: datetime = None) -> List[Locatio ORDER BY datetime DESC ''', start_datetime.replace(tzinfo=None), end_datetime.replace(tzinfo=None)) - DEBUG(f"Range locations query returned: {range_locations}") + L.DEBUG(f"Range locations query returned: {range_locations}") locations.extend(range_locations) if not locations and (end is None or start_datetime.date() == end_datetime.date()): @@ -269,11 +271,11 @@ async def fetch_locations(start: datetime, end: datetime = None) -> List[Locatio LIMIT 1 ''', start_datetime.replace(tzinfo=None)) - DEBUG(f"Fallback query returned: {location_data}") + L.DEBUG(f"Fallback query returned: {location_data}") if location_data: locations.append(location_data) - DEBUG(f"Locations found: {locations}") + L.DEBUG(f"Locations found: {locations}") # Sort location_data based on the datetime field in descending order sorted_locations = sorted(locations, key=lambda x: x['datetime'], reverse=True) @@ -305,7 +307,7 @@ async def fetch_locations(start: datetime, end: datetime = None) -> List[Locatio async def fetch_last_location_before(datetime: datetime) -> Optional[Location]: datetime = await localize_datetime(datetime) - DEBUG(f"Fetching last location before {datetime}") + L.DEBUG(f"Fetching last location before {datetime}") async with DB.get_connection() as conn: @@ -325,10 +327,10 @@ async def fetch_last_location_before(datetime: datetime) -> Optional[Location]: await conn.close() if location_data: - DEBUG(f"Last location found: {location_data}") + L.DEBUG(f"Last location found: {location_data}") return Location(**location_data) else: - DEBUG("No location found before the specified datetime") + L.DEBUG("No location found before the specified datetime") return None @@ -385,7 +387,7 @@ async def generate_map(start_date: datetime, end_date: datetime): async def post_location(location: Location): - DEBUG(f"post_location called with {location.datetime}") + L.DEBUG(f"post_location called with {location.datetime}") async with DB.get_connection() as conn: try: @@ -404,7 +406,7 @@ async def post_location(location: Location): VALUES ($1, ST_SetSRID(ST_MakePoint($2, $3, $4), 4326), $5, $6, $7, $8, $9, $10, $11, $12, $13) ''', localized_datetime, location.longitude, location.latitude, location.elevation, location.city, location.state, location.zip, location.street, action, device_type, device_model, device_name, device_os) await conn.close() - INFO(f"Successfully posted location: {location.latitude}, {location.longitude} on {localized_datetime}") + L.INFO(f"Successfully posted location: {location.latitude}, {location.longitude} on {localized_datetime}") return { 'datetime': localized_datetime, 'latitude': location.latitude, @@ -421,8 +423,8 @@ async def post_location(location: Location): 'device_os': device_os } except Exception as e: - ERR(f"Error posting location {e}") - ERR(traceback.format_exc()) + L.ERR(f"Error posting location {e}") + L.ERR(traceback.format_exc()) return None @@ -449,10 +451,10 @@ async def post_locate_endpoint(locations: Union[Location, List[Location]]): "device_os": "Unknown" } - DEBUG(f"datetime before localization: {location.datetime}") + L.DEBUG(f"datetime before localization: {location.datetime}") # Convert datetime string to timezone-aware datetime object location.datetime = await localize_datetime(location.datetime) - DEBUG(f"datetime after localization: {location.datetime}") + L.DEBUG(f"datetime after localization: {location.datetime}") # Perform reverse geocoding geocoded_location = await reverse_geocode(location.latitude, location.longitude) @@ -474,12 +476,12 @@ async def post_locate_endpoint(locations: Union[Location, List[Location]]): async def get_last_location() -> Optional[Location]: query_datetime = datetime.now(TZ) - DEBUG(f"Query_datetime: {query_datetime}") + L.DEBUG(f"Query_datetime: {query_datetime}") location = await fetch_last_location_before(query_datetime) if location: - DEBUG(f"location: {location}") + L.DEBUG(f"location: {location}") return location return None @@ -500,7 +502,7 @@ async def get_locate(datetime_str: str, all: bool = False): try: date_time = await localize_datetime(datetime_str) except ValueError as e: - ERR(f"Invalid datetime string provided: {datetime_str}") + L.ERR(f"Invalid datetime string provided: {datetime_str}") return ["ERROR: INVALID DATETIME PROVIDED. USE YYYYMMDDHHmmss or YYYYMMDD format."] locations = await fetch_locations(date_time) @@ -537,7 +539,7 @@ def get_elevation_srtm(latitude, longitude, srtm_file): return elevation except Exception as e: - ERR(f"Error: {e}") + L.ERR(f"Error: {e}") return None """ diff --git a/sijapi/routers/note.py b/sijapi/routers/note.py index f83157a..afd2d76 100644 --- a/sijapi/routers/note.py +++ b/sijapi/routers/note.py @@ -1,3 +1,6 @@ +''' +Manages an Obsidian vault, in particular daily notes, using information and functionality drawn from the other routers, primarily calendar, email, ig, llm, rag, sd, serve, time, tts, and weather. +''' from fastapi import APIRouter, BackgroundTasks, File, UploadFile, Form, HTTPException, Response, Query, Path as FastAPIPath from fastapi.responses import JSONResponse, PlainTextResponse from io import BytesIO @@ -20,8 +23,7 @@ from datetime import timedelta, datetime, time as dt_time, date as dt_date from fastapi import HTTPException, status from pathlib import Path from fastapi import APIRouter, Query, HTTPException -from sijapi import DEBUG, INFO, WARN, ERR, CRITICAL, INFO -from sijapi import YEAR_FMT, MONTH_FMT, DAY_FMT, DAY_SHORT_FMT, OBSIDIAN_VAULT_DIR, OBSIDIAN_RESOURCES_DIR, BASE_URL, OBSIDIAN_BANNER_SCENE, DEFAULT_11L_VOICE, DEFAULT_VOICE, TZ +from sijapi import L, OBSIDIAN_VAULT_DIR, OBSIDIAN_RESOURCES_DIR, BASE_URL, OBSIDIAN_BANNER_SCENE, DEFAULT_11L_VOICE, DEFAULT_VOICE, TZ from sijapi.routers import tts, llm, time, sd, locate, weather, asr, calendar from sijapi.routers.locate import Location from sijapi.utilities import assemble_journal_path, convert_to_12_hour_format, sanitize_filename, convert_degrees_to_cardinal, HOURLY_COLUMNS_MAPPING @@ -169,7 +171,7 @@ async def note_add_endpoint(file: Optional[UploadFile] = File(None), text: Optio raise HTTPException(status_code=400, detail="Either text or a file must be provided") else: result = await process_for_daily_note(file, text, source) - INFO(f"Result on /note/add: {result}") + L.INFO(f"Result on /note/add: {result}") return JSONResponse(result, status_code=204) async def process_for_daily_note(file: Optional[UploadFile] = File(None), text: Optional[str] = None, source: Optional[str] = None): @@ -205,7 +207,7 @@ async def process_for_daily_note(file: Optional[UploadFile] = File(None), text: text_entry = text if text else "" - INFO(f"transcription: {transcription}\nfile_entry: {file_entry}\ntext_entry: {text_entry}") + L.INFO(f"transcription: {transcription}\nfile_entry: {file_entry}\ntext_entry: {text_entry}") return await add_to_daily_note(transcription, file_entry, text_entry, now) @@ -307,7 +309,7 @@ added: {timestamp} obsidian_link = f"![[{OBSIDIAN_RESOURCES_DIR}/{audio_filename}{audio_ext}]]" body += f"{obsidian_link}\n\n" except Exception as e: - ERR(f"Failed in the TTS portion of clipping: {e}") + L.ERR(f"Failed in the TTS portion of clipping: {e}") body += f"> [!summary]+\n" body += f"> {summary}\n\n" @@ -320,12 +322,12 @@ added: {timestamp} with open(markdown_filename, 'w', encoding=encoding) as md_file: md_file.write(markdown_content) - INFO(f"Successfully saved to {markdown_filename}") + L.INFO(f"Successfully saved to {markdown_filename}") return markdown_filename except Exception as e: - ERR(f"Failed to clip: {str(e)}") + L.ERR(f"Failed to clip: {str(e)}") raise HTTPException(status_code=500, detail=str(e)) @@ -368,7 +370,7 @@ async def process_article( banner_markdown = f"![[{OBSIDIAN_RESOURCES_DIR}/{banner_image}]]" except Exception as e: - ERR(f"No image found in article") + L.ERR(f"No image found in article") authors = ', '.join('[[{}]]'.format(author) for author in parsed_content.get('authors', ['Unknown'])) @@ -397,7 +399,7 @@ tags: obsidian_link = f"![[{OBSIDIAN_RESOURCES_DIR}/{audio_filename}{audio_ext}]]" body += f"{obsidian_link}\n\n" except Exception as e: - ERR(f"Failed to generate TTS for np3k. {e}") + L.ERR(f"Failed to generate TTS for np3k. {e}") try: body += f"by {authors} in [{parsed_content.get('domain', urlparse(url).netloc.replace('www.', ''))}]({url}).\n\n" @@ -407,22 +409,22 @@ tags: markdown_content = frontmatter + body except Exception as e: - ERR(f"Failed to combine elements of article markdown.") + L.ERR(f"Failed to combine elements of article markdown.") try: with open(markdown_filename, 'w', encoding=encoding) as md_file: md_file.write(markdown_content) - INFO(f"Successfully saved to {markdown_filename}") + L.INFO(f"Successfully saved to {markdown_filename}") add_to_daily_note return markdown_filename except Exception as e: - ERR(f"Failed to write markdown file") + L.ERR(f"Failed to write markdown file") raise HTTPException(status_code=500, detail=str(e)) except Exception as e: - ERR(f"Failed to clip {url}: {str(e)}") + L.ERR(f"Failed to clip {url}: {str(e)}") raise HTTPException(status_code=500, detail=str(e)) @@ -435,7 +437,7 @@ async def parse_article(url: str, source: Optional[str] = None): np3k.set_html(source) np3k.parse() - INFO(f"Parsed {np3k.title}") + L.INFO(f"Parsed {np3k.title}") title = np3k.title or traf.title @@ -445,7 +447,7 @@ async def parse_article(url: str, source: Optional[str] = None): try: date = await locate.localize_datetime(date) except: - DEBUG(f"Failed to localize {date}") + L.DEBUG(f"Failed to localize {date}") date = await locate.localize_datetime(datetime.now()) excerpt = np3k.meta_description or traf.description content = trafilatura.extract(source, output_format="markdown", include_comments=False) or np3k.text @@ -509,22 +511,22 @@ excerpt: {parsed_content.get('excerpt', '')} body += content markdown_content = frontmatter + body except Exception as e: - ERR(f"Failed to combine elements of article markdown.") + L.ERR(f"Failed to combine elements of article markdown.") try: with open(markdown_path, 'w', encoding=encoding) as md_file: md_file.write(markdown_content) - INFO(f"Successfully saved to {markdown_path}") + L.INFO(f"Successfully saved to {markdown_path}") add_to_daily_note return markdown_path except Exception as e: - ERR(f"Failed to write markdown file") + L.ERR(f"Failed to write markdown file") raise HTTPException(status_code=500, detail=str(e)) except Exception as e: - ERR(f"Failed to clip {url}: {str(e)}") + L.ERR(f"Failed to clip {url}: {str(e)}") raise HTTPException(status_code=500, detail=str(e)) @@ -549,13 +551,13 @@ def download_file(url, folder): with open(filepath, 'wb') as f: f.write(response.content) else: - ERR(f"Failed to download image: {url}, invalid content type: {response.headers.get('Content-Type')}") + L.ERR(f"Failed to download image: {url}, invalid content type: {response.headers.get('Content-Type')}") return None else: - ERR(f"Failed to download image: {url}, status code: {response.status_code}") + L.ERR(f"Failed to download image: {url}, status code: {response.status_code}") return None except Exception as e: - ERR(f"Failed to download image: {url}, error: {str(e)}") + L.ERR(f"Failed to download image: {url}, error: {str(e)}") return None return filename @@ -591,7 +593,7 @@ async def update_frontmatter(date_time: datetime, key: str, value: str): # Check if the file exists if not file_path.exists(): - CRITICAL(f"Markdown file not found at {file_path}") + L.CRIT(f"Markdown file not found at {file_path}") raise HTTPException(status_code=404, detail="Markdown file not found.") # Read the file @@ -638,19 +640,19 @@ async def banner_endpoint(dt: str, location: str = None, mood: str = None, other ''' Endpoint (POST) that generates a new banner image for the Obsidian daily note for a specified date, taking into account optional additional information, then updates the frontmatter if necessary. ''' - DEBUG(f"banner_endpoint requested with date: {dt} ({type(dt)})") + L.DEBUG(f"banner_endpoint requested with date: {dt} ({type(dt)})") date_time = await locate.localize_datetime(dt) - DEBUG(f"date_time after localization: {date_time} ({type(date_time)})") + L.DEBUG(f"date_time after localization: {date_time} ({type(date_time)})") jpg_path = await generate_banner(date_time, location, mood=mood, other_context=other_context) return jpg_path async def generate_banner(dt, location: Location = None, forecast: str = None, mood: str = None, other_context: str = None): - DEBUG(f"Location: {location}, forecast: {forecast}, mood: {mood}, other_context: {other_context}") + L.DEBUG(f"Location: {location}, forecast: {forecast}, mood: {mood}, other_context: {other_context}") date_time = await locate.localize_datetime(dt) - DEBUG(f"generate_banner called with date_time: {date_time}") + L.DEBUG(f"generate_banner called with date_time: {date_time}") destination_path, local_path = assemble_journal_path(date_time, filename="Banner", extension=".jpg", no_timestamp = True) - DEBUG(f"destination path generated: {destination_path}") + L.DEBUG(f"destination path generated: {destination_path}") if not location: locations = await locate.fetch_locations(date_time) @@ -683,11 +685,11 @@ async def generate_banner(dt, location: Location = None, forecast: str = None, m prompt = "Generate an aesthetically appealing banner image for a daily note that helps to visualize the following scene information: " prompt += "\n".join([display_name, forecast, mood, other_context]) - DEBUG(f"Prompt: {prompt}") + L.DEBUG(f"Prompt: {prompt}") # sd.workflow(prompt: str, scene: str = None, size: str = None, style: str = "photorealistic", earlyurl: bool = False, destination_path: str = None): final_path = await sd.workflow(prompt, scene=OBSIDIAN_BANNER_SCENE, size="1080x512", style="romantic", earlyout="local", destination_path=destination_path) if not str(local_path) in str(final_path): - INFO(f"Apparent mismatch between local path, {local_path}, and final_path, {final_path}") + L.INFO(f"Apparent mismatch between local path, {local_path}, and final_path, {final_path}") jpg_embed = f"\"![[{local_path}]]\"" await update_frontmatter(date_time, "banner", jpg_embed) @@ -704,7 +706,7 @@ async def note_weather_get( try: date_time = datetime.now() if date == "0" else locate.localize_datetime(date) - DEBUG(f"date: {date} .. date_time: {date_time}") + L.DEBUG(f"date: {date} .. date_time: {date_time}") content = await update_dn_weather(date_time) #, lat, lon) return JSONResponse(content={"forecast": content}, status_code=200) @@ -712,7 +714,7 @@ async def note_weather_get( return JSONResponse(content={"detail": str(e.detail)}, status_code=e.status_code) except Exception as e: - ERR(f"Error in note_weather_get: {str(e)}") + L.ERR(f"Error in note_weather_get: {str(e)}") raise HTTPException(status_code=500, detail=f"An error occurred: {str(e)}") @@ -726,7 +728,7 @@ async def post_update_daily_weather_and_calendar_and_timeslips(date: str) -> Pla async def update_dn_weather(date_time: datetime): try: - DEBUG(f"Updating weather for {date_time}") + L.DEBUG(f"Updating weather for {date_time}") places = await locate.fetch_locations(date_time) place = places[0] @@ -735,37 +737,37 @@ async def update_dn_weather(date_time: datetime): city = locate.find_override_locations(lat, lon) if city: - INFO(f"Using override location: {city}") + L.INFO(f"Using override location: {city}") else: if place.city and place.city != "": city = place.city - INFO(f"City in data: {city}") + L.INFO(f"City in data: {city}") else: loc = locate.reverse_geocode(lat, lon) - DEBUG(f"loc: {loc}") + L.DEBUG(f"loc: {loc}") city = loc.name city = city if city else loc.city city = city if city else loc.house_number + ' ' + loc.road - INFO(f"City geocoded: {city}") + L.INFO(f"City geocoded: {city}") # Assemble journal path absolute_path, relative_path = assemble_journal_path(date_time, filename="Weather", extension=".md", no_timestamp = True) - DEBUG(f"Journal path: absolute_path={absolute_path}, relative_path={relative_path}") + L.DEBUG(f"Journal path: absolute_path={absolute_path}, relative_path={relative_path}") try: - DEBUG(f"passing date_time {date_time}, {lat}/{lon} into fetch_and_store") + L.DEBUG(f"passing date_time {date_time}, {lat}/{lon} into fetch_and_store") day = await weather.get_weather(date_time, lat, lon) - DEBUG(f"day information obtained from get_weather: {day}") + L.DEBUG(f"day information obtained from get_weather: {day}") if day: DailyWeather = day.get('DailyWeather') HourlyWeather = day.get('HourlyWeather') if DailyWeather: - DEBUG(f"Day: {DailyWeather}") + L.DEBUG(f"Day: {DailyWeather}") icon = DailyWeather.get('icon') - DEBUG(f"Icon: {icon}") + L.DEBUG(f"Icon: {icon}") weather_icon, admonition = get_icon_and_admonition(icon) if icon else (":LiSunMoon:", "ad-weather") @@ -833,39 +835,39 @@ async def update_dn_weather(date_time: datetime): detailed_forecast += assemble_hourly_data_table(times, condition_symbols, temps, winds) detailed_forecast += f"```\n\n" - DEBUG(f"Detailed forecast: {detailed_forecast}.") + L.DEBUG(f"Detailed forecast: {detailed_forecast}.") with open(absolute_path, 'w', encoding='utf-8') as note_file: note_file.write(detailed_forecast) - DEBUG(f"Operation complete.") + L.DEBUG(f"Operation complete.") return narrative else: - ERR(f"Failed to get DailyWeather from day: {day}") + L.ERR(f"Failed to get DailyWeather from day: {day}") else: - ERR(f"Failed to get day") + L.ERR(f"Failed to get day") raise HTTPException(status_code=500, detail="Failed to retrieve weather data") except HTTPException as e: - ERR(f"HTTP error: {e}") - ERR(traceback.format_exc()) + L.ERR(f"HTTP error: {e}") + L.ERR(traceback.format_exc()) raise e except Exception as e: - ERR(f"Error: {e}") - ERR(traceback.format_exc()) + L.ERR(f"Error: {e}") + L.ERR(traceback.format_exc()) raise HTTPException(status_code=999, detail=f"Error: {e}") except ValueError as ve: - ERR(f"Value error in update_dn_weather: {str(ve)}") - ERR(traceback.format_exc()) + L.ERR(f"Value error in update_dn_weather: {str(ve)}") + L.ERR(traceback.format_exc()) raise HTTPException(status_code=400, detail=f"Value error: {str(ve)}") except Exception as e: - ERR(f"Error in update_dn_weather: {str(e)}") - ERR(traceback.format_exc()) + L.ERR(f"Error in update_dn_weather: {str(e)}") + L.ERR(traceback.format_exc()) raise HTTPException(status_code=500, detail=f"Error in update_dn_weather: {str(e)}") def format_hourly_time(hour): @@ -873,8 +875,8 @@ def format_hourly_time(hour): hour_12 = convert_to_12_hour_format(hour.get("datetime")) return hour_12 except Exception as e: - ERR(f"Error in format_hourly_time: {str(e)}") - ERR(traceback.format_exc()) + L.ERR(f"Error in format_hourly_time: {str(e)}") + L.ERR(traceback.format_exc()) return "" @@ -885,7 +887,7 @@ def format_hourly_icon(hour, sunrise, sunset): precip = hour.get('precip', float(0.0)) precip_prob = hour.get('precipprob', float(0.0)) - DEBUG(f"precip: {precip}, prob: {precip_prob}") + L.DEBUG(f"precip: {precip}, prob: {precip_prob}") sp_str = None @@ -909,8 +911,8 @@ def format_hourly_icon(hour, sunrise, sunset): return formatted except Exception as e: - ERR(f"Error in format_hourly_special: {str(e)}") - ERR(traceback.format_exc()) + L.ERR(f"Error in format_hourly_special: {str(e)}") + L.ERR(traceback.format_exc()) return "" def format_hourly_temperature(hour): @@ -918,8 +920,8 @@ def format_hourly_temperature(hour): temp_str = f"{hour.get('temp', '')}˚ F" return temp_str except Exception as e: - ERR(f"Error in format_hourly_temperature: {str(e)}") - ERR(traceback.format_exc()) + L.ERR(f"Error in format_hourly_temperature: {str(e)}") + L.ERR(traceback.format_exc()) return "" def format_hourly_wind(hour): @@ -929,8 +931,8 @@ def format_hourly_wind(hour): wind_str = f"{str(windspeed)}:LiWind: {winddir}" return wind_str except Exception as e: - ERR(f"Error in format_hourly_wind: {str(e)}") - ERR(traceback.format_exc()) + L.ERR(f"Error in format_hourly_wind: {str(e)}") + L.ERR(traceback.format_exc()) return "" def assemble_hourly_data_table(times, condition_symbols, temps, winds): @@ -943,7 +945,7 @@ def assemble_hourly_data_table(times, condition_symbols, temps, winds): def get_icon_and_admonition(icon_str) -> Tuple: - DEBUG(f"Received request for emoji {icon_str}") + L.DEBUG(f"Received request for emoji {icon_str}") if icon_str.startswith(":") and icon_str.endswith(":"): return icon_str @@ -1048,7 +1050,7 @@ async def format_events_as_markdown(event_data: Dict[str, Union[str, List[Dict[s total_events = len(event_data["events"]) event_markdown = f"```ad-events" for event in event_data["events"]: - DEBUG(f"event busy status: {event['busy']}; all day status: {event['all_day']}") + L.DEBUG(f"event busy status: {event['busy']}; all day status: {event['all_day']}") if not event['name'].startswith('TC '): url = f"hook://ical/eventID={event['uid']}calendarID=17" if event['url']: @@ -1126,18 +1128,18 @@ async def note_events_endpoint(date: str = Query(None)): return PlainTextResponse(content=response, status_code=200) async def update_daily_note_events(date_time: datetime): - DEBUG(f"Looking up events on date: {date_time.strftime('%Y-%m-%d')}") + L.DEBUG(f"Looking up events on date: {date_time.strftime('%Y-%m-%d')}") try: events = await calendar.get_events(date_time, date_time) - DEBUG(f"Raw events: {events}") + L.DEBUG(f"Raw events: {events}") event_data = { "date": date_time.strftime('%Y-%m-%d'), "events": events } events_markdown = await format_events_as_markdown(event_data) - DEBUG(f"Markdown events: {events_markdown}") + L.DEBUG(f"Markdown events: {events_markdown}") absolute_path, _ = assemble_journal_path(date_time, filename="Events", extension=".md", no_timestamp = True) - DEBUG(f"Writing events to file: {absolute_path}") + L.DEBUG(f"Writing events to file: {absolute_path}") with open(absolute_path, 'w', encoding='utf-8') as note_file: note_file.write(events_markdown) @@ -1145,6 +1147,6 @@ async def update_daily_note_events(date_time: datetime): return events_markdown except Exception as e: - ERR(f"Error processing events: {e}") + L.ERR(f"Error processing events: {e}") raise HTTPException(status_code=500, detail=str(e)) diff --git a/sijapi/routers/sd.py b/sijapi/routers/sd.py index 641f7b5..8ae25b4 100644 --- a/sijapi/routers/sd.py +++ b/sijapi/routers/sd.py @@ -30,8 +30,7 @@ import shutil # from photoprism.Photo import Photo # from webdav3.client import Client from sijapi.routers.llm import query_ollama -from sijapi import DEBUG, INFO, WARN, ERR, CRITICAL -from sijapi import COMFYUI_URL, COMFYUI_LAUNCH_CMD, COMFYUI_DIR, COMFYUI_OUTPUT_DIR, HOST_PORT, TS_SUBNET, SD_CONFIG_PATH, SD_IMAGE_DIR, SD_WORKFLOWS_DIR, LOCAL_HOSTS, BASE_URL +from sijapi import L, COMFYUI_URL, COMFYUI_LAUNCH_CMD, COMFYUI_DIR, COMFYUI_OUTPUT_DIR, HOST_PORT, TS_SUBNET, SD_CONFIG_PATH, SD_IMAGE_DIR, SD_WORKFLOWS_DIR, LOCAL_HOSTS, BASE_URL sd = APIRouter() @@ -79,12 +78,12 @@ async def workflow(prompt: str, scene: str = None, size: str = None, style: str scene_workflow = random.choice(scene_data['workflows']) if size: - DEBUG(f"Specified size: {size}") + L.DEBUG(f"Specified size: {size}") size = size if size else scene_workflow.get('size', '1024x1024') width, height = map(int, size.split('x')) - DEBUG(f"Parsed width: {width}; parsed height: {height}") + L.DEBUG(f"Parsed width: {width}; parsed height: {height}") workflow_path = Path(SD_WORKFLOWS_DIR) / scene_workflow['workflow'] workflow_data = json.loads(workflow_path.read_text()) @@ -123,7 +122,7 @@ async def generate_and_save_image(prompt_id, saved_file_key, max_size, destinati jpg_file_path = await save_as_jpg(image_data, prompt_id, quality=90, max_size=max_size, destination_path=destination_path) if Path(jpg_file_path) != Path(destination_path): - ERR(f"Mismatch between jpg_file_path, {jpg_file_path}, and detination_path, {destination_path}") + L.ERR(f"Mismatch between jpg_file_path, {jpg_file_path}, and detination_path, {destination_path}") except Exception as e: print(f"Error in generate_and_save_image: {e}") @@ -215,11 +214,11 @@ def set_presets(workflow_data, preset_values): if 'inputs' in workflow_data.get(preset_node, {}): workflow_data[preset_node]['inputs'][preset_key] = preset_value else: - DEBUG("Node not found in workflow_data") + L.DEBUG("Node not found in workflow_data") else: - DEBUG("Required data missing in preset_values") + L.DEBUG("Required data missing in preset_values") else: - DEBUG("No preset_values found") + L.DEBUG("No preset_values found") def get_return_path(destination_path): @@ -318,10 +317,10 @@ async def ensure_comfy(retries: int = 4, timeout: float = 6.0): # shareable_link = f"https://{PHOTOPRISM_URL}/p/{photo_uuid}" # return shareable_link # else: - # ERR("Could not find the uploaded photo details.") + # L.ERR("Could not find the uploaded photo details.") # return None # except Exception as e: - # ERR(f"Error in upload_and_get_shareable_link: {e}") + # L.ERR(f"Error in upload_and_get_shareable_link: {e}") # return None @@ -434,13 +433,13 @@ Even more important, it finds and returns the key to the filepath where the file workflow[key] = random.randint(1000000000000, 9999999999999) elif key in ["width", "max_width", "scaled_width", "height", "max_height", "scaled_height", "side_length", "size", "value", "dimension", "dimensions", "long", "long_side", "short", "short_side", "length"]: - DEBUG(f"Got a hit for a dimension: {key} {value}") + L.DEBUG(f"Got a hit for a dimension: {key} {value}") if value == 1023: workflow[key] = post.get("width", 1024) - DEBUG(f"Set {key} to {workflow[key]}.") + L.DEBUG(f"Set {key} to {workflow[key]}.") elif value == 1025: workflow[key] = post.get("height", 1024) - DEBUG(f"Set {key} to {workflow[key]}.") + L.DEBUG(f"Set {key} to {workflow[key]}.") update_recursive(workflow) return found_key[0] diff --git a/sijapi/routers/serve.py b/sijapi/routers/serve.py index 8ea6cc3..b5240e7 100644 --- a/sijapi/routers/serve.py +++ b/sijapi/routers/serve.py @@ -2,21 +2,39 @@ Web server module. Used by other modules when serving static content is required, e.g. the sd image generation module. Also used to serve PUBLIC_KEY. ''' import os -from fastapi import APIRouter, Form, HTTPException, Request, Response -from fastapi.responses import FileResponse, PlainTextResponse -from pathlib import Path +import io +import json +import time +import base64 +import asyncio +import subprocess +import requests +import paramiko +import aiohttp +import httpx from datetime import datetime +from hashlib import sha256 +from pathlib import Path +from typing import List, Optional +from pydantic import BaseModel +from PyPDF2 import PdfReader +from fastapi import APIRouter, Form, HTTPException, Request, Response, BackgroundTasks, status +from fastapi.responses import FileResponse, PlainTextResponse, JSONResponse, RedirectResponse from selenium.webdriver.chrome.service import Service from selenium.webdriver.chrome.options import Options from webdriver_manager.chrome import ChromeDriverManager from selenium.webdriver.common.by import By from selenium.webdriver.support.ui import WebDriverWait from selenium.webdriver.support import expected_conditions as EC -from pathlib import Path -from sijapi import DEBUG, INFO, WARN, ERR, CRITICAL + +from sijapi import ( + L, LOGS_DIR, TS_ID, CASETABLE_PATH, COURTLISTENER_DOCKETS_URL, COURTLISTENER_API_KEY, + COURTLISTENER_BASE_URL, COURTLISTENER_DOCKETS_DIR, COURTLISTENER_SEARCH_DIR, ALERTS_DIR, + MAC_UN, MAC_PW, MAC_ID, TS_TAILNET, DATA_DIR, SD_IMAGE_DIR, PUBLIC_KEY, OBSIDIAN_VAULT_DIR +) from sijapi.utilities import bool_convert, sanitize_filename, assemble_journal_path from sijapi.routers.locate import localize_datetime -from sijapi import DATA_DIR, SD_IMAGE_DIR, PUBLIC_KEY, OBSIDIAN_VAULT_DIR + serve = APIRouter(tags=["public"]) @@ -54,13 +72,13 @@ async def get_file(file_path: str): date_time = await localize_datetime(file_path); absolute_path, local_path = assemble_journal_path(date_time, no_timestamp = True) except ValueError as e: - DEBUG(f"Unable to parse {file_path} as a date, now trying to use it as a local path") + L.DEBUG(f"Unable to parse {file_path} as a date, now trying to use it as a local path") absolute_path = OBSIDIAN_VAULT_DIR / file_path if not absolute_path.suffix: absolute_path = Path(absolute_path.with_suffix(".md")) if not absolute_path.is_file(): - WARN(f"{absolute_path} is not a valid file it seems.") + L.WARN(f"{absolute_path} is not a valid file it seems.") elif absolute_path.suffix == '.md': try: with open(absolute_path, 'r', encoding='utf-8') as file: @@ -72,3 +90,333 @@ async def get_file(file_path: str): return FileResponse(absolute_path) else: raise HTTPException(status_code=400, detail="Unsupported file type") + + +with open(CASETABLE_PATH, 'r') as file: + CASETABLE = json.load(file) + +class WidgetUpdate(BaseModel): + text: Optional[str] = None + progress: Optional[str] = None + icon: Optional[str] = None + color: Optional[str] = None + url: Optional[str] = None + shortcut: Optional[str] = None + graph: Optional[str] = None + + +@serve.get("/health_check") +def hook_health(): + shellfish_health_check() + +@serve.post("/update_widget") +def hook_widget_update(update: WidgetUpdate): + shellfish_update_widget(update) + +@serve.get("/alert") +async def hook_alert(request: Request): + alert = request.query_params.get('alert') + if not alert: + raise HTTPException(status_code=400, detail='No alert provided.') + + return await notify(alert) + +@serve.post("/alert/cd") +async def hook_changedetection(webhook_data: dict): + body = webhook_data.get("body", {}) + message = body.get("message", "") + + if message and any(word in message.split() for word in ["SPI", "sierra", "pacific"]): + filename = ALERTS_DIR / f"alert_{int(time.time())}.json" + filename.write_text(json.dumps(webhook_data, indent=4)) + + notify(message) + + return {"status": "received"} + + +@serve.post("/cl/search") +async def hook_cl_search(request: Request, background_tasks: BackgroundTasks): + client_ip = request.client.host + L.DEBUG(f"Received request from IP: {client_ip}") + data = await request.json() + payload = data['payload'] + results = data['payload']['results'] + + # Save the payload data + timestamp = datetime.now().strftime("%Y%m%d-%H%M%S") + payload_file = LOGS_DIR / f"{timestamp}-{client_ip}_search.json" + with open(payload_file, 'w') as file: + json.dump(payload, file, indent=2) + + for result in results: + background_tasks.add_task(cl_search_process_result, result) + return JSONResponse(content={"message": "Received"}, status_code=status.HTTP_200_OK) + +@serve.post("/cl/docket") +async def hook_cl_docket(request: Request): + client_ip = request.client.host + L.DEBUG(f"Received request from IP: {client_ip}") + data = await request.json() + await cl_docket(data, client_ip) + +async def notify(alert: str): + try: + await notify_shellfish(alert) + + if TS_ID == MAC_ID: + await notify_local(alert) + else: + await notify_remote(f"{MAC_ID}.{TS_TAILNET}.net", alert, MAC_UN, MAC_PW) + + except Exception as e: + raise HTTPException(status_code=500, detail=f"Failed to send alert: {str(e)}") + + return {"message": alert} + +async def notify_local(message: str): + await asyncio.to_thread(os.system, f'osascript -e \'display notification "{message}" with title "Notification Title"\'') + + +# Asynchronous remote notification using paramiko SSH +async def notify_remote(host: str, message: str, username: str = None, password: str = None, key_filename: str = None): + ssh = paramiko.SSHClient() + ssh.set_missing_host_key_policy(paramiko.AutoAddPolicy()) + + connect_kwargs = {'hostname': host, 'username': username} + if key_filename: + connect_kwargs['key_filename'] = key_filename + else: + connect_kwargs['password'] = password + + await asyncio.to_thread(ssh.connect, **connect_kwargs) + await asyncio.to_thread(ssh.exec_command, f'osascript -e \'display notification "{message}" with title "Notification Title"\'') + ssh.close() + + + +async def notify_shellfish(alert: str): + key = "d7e810e7601cd296a05776c169b4fe97a6a5ee1fd46abe38de54f415732b3f4b" + user = "WuqPwm1VpGijF4U5AnIKzqNMVWGioANTRjJoonPm" + iv = "ab5bbeb426015da7eedcee8bee3dffb7" + + plain = "Secure ShellFish Notify 2.0\n" + base64.b64encode(alert.encode()).decode() + "\n" + + openssl_command = [ + "openssl", "enc", "-aes-256-cbc", "-base64", "-K", key, "-iv", iv + ] + + process = await asyncio.to_thread(subprocess.Popen, openssl_command, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE) + stdout, stderr = await asyncio.to_thread(process.communicate, plain.encode()) + + if process.returncode != 0: + raise Exception(f"OpenSSL encryption failed: {stderr.decode()}") + + base64_encoded = stdout.decode().strip() + + url = f"https://secureshellfish.app/push/?user={user}&mutable" + headers = {"Content-Type": "text/plain"} + async with aiohttp.ClientSession() as session: + async with session.post(url, headers=headers, data=base64_encoded) as response: + if response.status != 200: + raise Exception(f"Failed to send notification: {response.status_code}") + +## SHELLFISH ## +def shellfish_health_check(): + addresses = [ + "https://api.sij.ai/health", + "http://100.64.64.20:4444/health", + "http://100.64.64.30:4444/health", + "http://100.64.64.11:4444/health", + "http://100.64.64.15:4444/health" + ] + + results = [] + up_count = 0 + for address in addresses: + try: + response = requests.get(address) + if response.status_code == 200: + results.append(f"{address} is up") + up_count += 1 + else: + results.append(f"{address} returned status code {response.status_code}") + except requests.exceptions.RequestException: + results.append(f"{address} is down") + + # Generate a simple text-based graph + graph = '|' * up_count + '.' * (len(addresses) - up_count) + text_update = "\n".join(results) + + widget_command = ["widget", "--text", text_update, "--text", f"Graph: {graph}", "--icon", "network"] + output = shellfish_run_widget_command(widget_command) + return {"output": output, "graph": graph} + + +def shellfish_update_widget(update: WidgetUpdate): + widget_command = ["widget"] + + if update.text: + widget_command.extend(["--text", update.text]) + if update.progress: + widget_command.extend(["--progress", update.progress]) + if update.icon: + widget_command.extend(["--icon", update.icon]) + if update.color: + widget_command.extend(["--color", update.color]) + if update.url: + widget_command.extend(["--url", update.url]) + if update.shortcut: + widget_command.extend(["--shortcut", update.shortcut]) + if update.graph: + widget_command.extend(["--text", update.graph]) + + output = shellfish_run_widget_command(widget_command) + return {"output": output} + + +def shellfish_run_widget_command(args: List[str]): + result = subprocess.run(args, capture_output=True, text=True, shell=True) + if result.returncode != 0: + raise HTTPException(status_code=500, detail=result.stderr) + return result.stdout + + +### COURTLISTENER FUNCTIONS ### +async def cl_docket(data, client_ip, background_tasks: BackgroundTasks): + payload = data['payload'] + results = data['payload']['results'] + timestamp = datetime.now().strftime("%Y%m%d-%H%M%S") + payload_file = LOGS_DIR / f"{timestamp}-{client_ip}_docket.json" + with open(payload_file, 'w') as file: + json.dump(payload, file, indent=2) + + for result in results: + background_tasks.add_task(cl_docket_process, result) + return JSONResponse(content={"message": "Received"}, status_code=status.HTTP_200_OK) + +async def cl_docket_process(result): + async with httpx.AsyncClient() as session: + await cl_docket_process_result(result, session) + +async def cl_docket_process_result(result, session): + docket = str(result.get('docket')) + case_code, case_shortname = cl_case_details(docket) + date_filed = result.get('date_filed', 'No Date Filed') + + try: + date_filed_formatted = datetime.strptime(date_filed, '%Y-%m-%d').strftime('%Y%m%d') + except ValueError: + date_filed_formatted = 'NoDateFiled' + + # Fetching court docket information from the API + url = f"{COURTLISTENER_DOCKETS_URL}?id={docket}" + headers = {'Authorization': f'Token {COURTLISTENER_API_KEY}'} + async with aiohttp.ClientSession() as session: + async with session.get(url, headers=headers) as response: + if response.status == 200: + L.DEBUG(f"Fetching CourtListener docket information for {docket}...") + data = await response.json() + court_docket = data['results'][0]['docket_number_core'] + court_docket = f"{court_docket[:2]}-cv-{court_docket[2:]}" # Formatting the docket number + case_name = data['results'][0]['case_name'] + L.DEBUG(f"Obtained from CourtListener: docket {court_docket}, case name {case_name}.") + else: + L.DEBUG("Failed to fetch data from CourtListener API.") + court_docket = 'NoCourtDocket' + case_name = 'NoCaseName' + + for document in result.get('recap_documents', []): + filepath_ia = document.get('filepath_ia') + filepath_local = document.get('filepath_local') + + if filepath_ia: + file_url = filepath_ia + L.DEBUG(f"Found IA file at {file_url}.") + elif filepath_local: + file_url = f"{COURTLISTENER_BASE_URL}/{filepath_local}" + L.DEBUG(f"Found local file at {file_url}.") + else: + L.DEBUG(f"No file URL found in filepath_ia or filepath_local for one of the documents.") + continue + + document_number = document.get('document_number', 'NoDocumentNumber') + description = document.get('description', 'NoDescription').replace(" ", "_").replace("/", "_") + description = description[:50] # Truncate description + # case_shortname = case_name # TEMPORARY OVERRIDE + file_name = f"{case_code}_{document_number}_{date_filed_formatted}_{description}.pdf" + target_path = Path(COURTLISTENER_DOCKETS_DIR) / case_shortname / "Docket" / file_name + target_path.parent.mkdir(parents=True, exist_ok=True) + await cl_download_file(file_url, target_path, session) + L.DEBUG(f"Downloaded {file_name} to {target_path}") + +def cl_case_details(docket): + case_info = CASETABLE.get(str(docket), {"code": "000", "shortname": "UNKNOWN"}) + case_code = case_info.get("code") + short_name = case_info.get("shortname") + return case_code, short_name + +async def cl_download_file(url: str, path: Path, session: aiohttp.ClientSession = None): + headers = { + 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/93.0.4577.82 Safari/537.36' + } + async with aiohttp.ClientSession() as session: + L.DEBUG(f"Attempting to download {url} to {path}.") + try: + async with session.get(url, headers=headers, allow_redirects=True) as response: + if response.status == 403: + L.ERR(f"Access denied (403 Forbidden) for URL: {url}. Skipping download.") + return + response.raise_for_status() + + # Check if the response content type is a PDF + content_type = response.headers.get('Content-Type') + if content_type != 'application/pdf': + L.ERR(f"Invalid content type: {content_type}. Skipping download.") + return + + # Create an in-memory buffer to store the downloaded content + buffer = io.BytesIO() + async for chunk in response.content.iter_chunked(1024): + buffer.write(chunk) + + # Reset the buffer position to the beginning + buffer.seek(0) + + # Validate the downloaded PDF content + try: + PdfReader(buffer) + except Exception as e: + L.ERR(f"Invalid PDF content: {str(e)}. Skipping download.") + return + + # If the PDF is valid, write the content to the file on disk + path.parent.mkdir(parents=True, exist_ok=True) + with path.open('wb') as file: + file.write(buffer.getvalue()) + + except Exception as e: + L.ERR(f"Error downloading file: {str(e)}") + + +async def cl_search_process_result(result): + async with httpx.AsyncClient() as session: + download_url = result.get('download_url') + court_id = result.get('court_id') + case_name_short = result.get('caseNameShort') + case_name = result.get('caseName') + L.DEBUG(f"Received payload for case {case_name} ({court_id}) and download url {download_url}") + + court_folder = court_id + + if case_name_short: + case_folder = case_name_short + else: + case_folder = case_name + + file_name = download_url.split('/')[-1] + target_path = Path(COURTLISTENER_SEARCH_DIR) / court_folder / case_folder / file_name + target_path.parent.mkdir(parents=True, exist_ok=True) + + await cl_download_file(download_url, target_path, session) + L.DEBUG(f"Downloaded {file_name} to {target_path}") diff --git a/sijapi/routers/time.py b/sijapi/routers/time.py index ccb936c..e04e893 100644 --- a/sijapi/routers/time.py +++ b/sijapi/routers/time.py @@ -1,3 +1,6 @@ +''' +Uses the Timing.app API to get nicely formatted timeslip charts and spreadsheets. +''' import tempfile import os import json @@ -22,8 +25,7 @@ from typing import Optional, List, Dict, Union, Tuple from collections import defaultdict from dotenv import load_dotenv from traceback import format_exc -from sijapi import DEBUG, INFO, WARN, ERR, CRITICAL -from sijapi import HOME_DIR, TIMING_API_KEY, TIMING_API_URL +from sijapi import L, HOME_DIR, TIMING_API_KEY, TIMING_API_URL from sijapi.routers.locate import localize_datetime ### INITIALIZATIONS ### @@ -61,17 +63,17 @@ async def post_time_entry_to_timing(entry: Dict): 'Accept': 'application/json', 'X-Time-Zone': 'America/Los_Angeles' } - DEBUG(f"Received entry: {entry}") + L.DEBUG(f"Received entry: {entry}") response = None # Initialize response try: async with httpx.AsyncClient() as client: response = await client.post(url, headers=headers, json=entry) response.raise_for_status() # This will only raise for 4xx and 5xx responses except httpx.HTTPStatusError as exc: - DEBUG(f"HTTPStatusError caught: Status code: {exc.response.status_code}, Detail: {exc.response.text}") + L.DEBUG(f"HTTPStatusError caught: Status code: {exc.response.status_code}, Detail: {exc.response.text}") raise HTTPException(status_code=exc.response.status_code, detail=str(exc.response.text)) except Exception as exc: - DEBUG(f"General exception caught: {exc}") + L.DEBUG(f"General exception caught: {exc}") raise HTTPException(status_code=500, detail="An unexpected error occurred") if response: diff --git a/sijapi/routers/tts.py b/sijapi/routers/tts.py index 2eca2d2..34cfbd0 100644 --- a/sijapi/routers/tts.py +++ b/sijapi/routers/tts.py @@ -1,3 +1,6 @@ +''' +Uses xtts-v2 and/or the Elevenlabs API for text to speech. +''' from fastapi import APIRouter, UploadFile, HTTPException, Response, Form, File, BackgroundTasks, Depends, Request from fastapi.responses import Response, StreamingResponse, FileResponse from fastapi.responses import StreamingResponse, PlainTextResponse @@ -22,8 +25,7 @@ import tempfile import random import re import os -from sijapi import DEBUG, INFO, WARN, ERR, CRITICAL -from sijapi import HOME_DIR, DATA_DIR, DEFAULT_VOICE, TTS_DIR, TTS_SEGMENTS_DIR, VOICE_DIR, PODCAST_DIR, TTS_OUTPUT_DIR, ELEVENLABS_API_KEY +from sijapi import L, HOME_DIR, DATA_DIR, DEFAULT_VOICE, TTS_DIR, TTS_SEGMENTS_DIR, VOICE_DIR, PODCAST_DIR, TTS_OUTPUT_DIR, ELEVENLABS_API_KEY from sijapi.utilities import sanitize_filename @@ -48,7 +50,7 @@ async def list_11l_voices(): async with httpx.AsyncClient() as client: try: response = await client.get(url, headers=headers) - DEBUG(f"Response: {response}") + L.DEBUG(f"Response: {response}") if response.status_code == 200: voices_data = response.json().get("voices", []) formatted_list = "" @@ -58,7 +60,7 @@ async def list_11l_voices(): formatted_list += f"{name}: `{id}`\n" except Exception as e: - ERR(f"Error determining voice ID: {str(e)}") + L.ERR(f"Error determining voice ID: {str(e)}") return PlainTextResponse(formatted_list, status_code=200) @@ -68,15 +70,15 @@ async def list_11l_voices(): def select_voice(voice_name: str) -> str: try: voice_file = VOICE_DIR / f"{voice_name}.wav" - DEBUG(f"select_voice received query to use voice: {voice_name}. Looking for {voice_file} inside {VOICE_DIR}.") + L.DEBUG(f"select_voice received query to use voice: {voice_name}. Looking for {voice_file} inside {VOICE_DIR}.") if voice_file.is_file(): return str(voice_file) else: raise HTTPException(status_code=404, detail="Voice file not found") except Exception as e: - ERR(f"Voice file not found: {str(e)}") - ERR(traceback.format_exc()) + L.ERR(f"Voice file not found: {str(e)}") + L.ERR(traceback.format_exc()) raise HTTPException(status_code=404, detail="Voice file not found") @@ -110,8 +112,8 @@ async def generate_speech_endpoint( else: return await generate_speech(background_tasks, text_content, voice, voice_file, model, speed, podcast) except Exception as e: - ERR(f"Error in TTS: {str(e)}") - ERR(traceback.format_exc()) + L.ERR(f"Error in TTS: {str(e)}") + L.ERR(traceback.format_exc()) raise HTTPException(status_code=666, detail="error in TTS") @@ -134,24 +136,24 @@ async def generate_speech( model = model if model else await get_model(voice, voice_file) if model == "eleven_turbo_v2": - INFO(f"Using ElevenLabs.") + L.INFO(f"Using ElevenLabs.") audio_file_path = await elevenlabs_tts(model, text, voice, title, output_dir) return str(audio_file_path) elif model == "xtts": - INFO(f"Using XTTS2") + L.INFO(f"Using XTTS2") final_output_dir = await local_tts(text, speed, voice, voice_file, podcast, background_tasks, title, output_dir) background_tasks.add_task(os.remove, str(final_output_dir)) return str(final_output_dir) else: raise HTTPException(status_code=400, detail="Invalid model specified") except HTTPException as e: - ERR(f"HTTP error: {e}") - ERR(traceback.format_exc()) + L.ERR(f"HTTP error: {e}") + L.ERR(traceback.format_exc()) raise e except Exception as e: - ERR(f"Error: {e}") - ERR(traceback.format_exc()) + L.ERR(f"Error: {e}") + L.ERR(traceback.format_exc()) raise e @@ -165,7 +167,7 @@ async def get_model(voice: str = None, voice_file: UploadFile = None): raise HTTPException(status_code=400, detail="No model or voice specified") async def determine_voice_id(voice_name: str) -> str: - DEBUG(f"Searching for voice id for {voice_name}") + L.DEBUG(f"Searching for voice id for {voice_name}") hardcoded_voices = { "alloy": "E3A1KVbKoWSIKSZwSUsW", @@ -182,23 +184,23 @@ async def determine_voice_id(voice_name: str) -> str: if voice_name in hardcoded_voices: voice_id = hardcoded_voices[voice_name] - DEBUG(f"Found voice ID - {voice_id}") + L.DEBUG(f"Found voice ID - {voice_id}") return voice_id - DEBUG(f"Requested voice not among the hardcoded options.. checking with 11L next.") + L.DEBUG(f"Requested voice not among the hardcoded options.. checking with 11L next.") url = "https://api.elevenlabs.io/v1/voices" headers = {"xi-api-key": ELEVENLABS_API_KEY} async with httpx.AsyncClient() as client: try: response = await client.get(url, headers=headers) - DEBUG(f"Response: {response}") + L.DEBUG(f"Response: {response}") if response.status_code == 200: voices_data = response.json().get("voices", []) for voice in voices_data: if voice_name == voice["voice_id"] or voice_name == voice["name"]: return voice["voice_id"] except Exception as e: - ERR(f"Error determining voice ID: {str(e)}") + L.ERR(f"Error determining voice ID: {str(e)}") # as a last fallback, rely on David Attenborough return "b42GBisbu9r5m5n6pHF7" @@ -269,7 +271,7 @@ async def get_voice_file_path(voice: str = None, voice_file: UploadFile = None) return str(new_file) else: - DEBUG(f"{datetime.now().strftime('%Y%m%d%H%M%S')}: No voice specified or file provided, using default voice: {DEFAULT_VOICE}") + L.DEBUG(f"{datetime.now().strftime('%Y%m%d%H%M%S')}: No voice specified or file provided, using default voice: {DEFAULT_VOICE}") return select_voice(DEFAULT_VOICE) @@ -306,7 +308,7 @@ async def local_tts( for i, segment in enumerate(segments): segment_file_path = TTS_SEGMENTS_DIR / f"segment_{i}.wav" - DEBUG(f"Segment file path: {segment_file_path}") + L.DEBUG(f"Segment file path: {segment_file_path}") # Run TTS in a separate thread await asyncio.to_thread( @@ -317,7 +319,7 @@ async def local_tts( speaker_wav=[voice_file_path], language="en" ) - DEBUG(f"Segment file generated: {segment_file_path}") + L.DEBUG(f"Segment file generated: {segment_file_path}") # Load and combine audio in a separate thread segment_audio = await asyncio.to_thread(AudioSegment.from_wav, str(segment_file_path)) @@ -392,7 +394,7 @@ def split_text(text, target_length=35, max_length=50): if segment_length + len(sentence_words) > max_length: segments.append(' '.join(current_segment)) - DEBUG(f"split_text - segment: {' '.join(current_segment)}, word count: {segment_length}") + L.DEBUG(f"split_text - segment: {' '.join(current_segment)}, word count: {segment_length}") current_segment = [sentence] else: @@ -400,7 +402,7 @@ def split_text(text, target_length=35, max_length=50): if current_segment: segments.append(' '.join(current_segment)) - DEBUG(f"split_text - segment: {' '.join(current_segment)}, word count: {len(current_segment)}") + L.DEBUG(f"split_text - segment: {' '.join(current_segment)}, word count: {len(current_segment)}") return segments @@ -412,7 +414,7 @@ def clean_text_for_tts(text: str) -> str: text = re.sub(r'\s+', ' ', text).strip() return text else: - DEBUG(f"No text received.") + L.DEBUG(f"No text received.") diff --git a/sijapi/routers/weather.py b/sijapi/routers/weather.py index a2a825f..7175684 100644 --- a/sijapi/routers/weather.py +++ b/sijapi/routers/weather.py @@ -1,3 +1,6 @@ +''' +Uses the VisualCrossing API and Postgres/PostGIS to source local weather forecasts and history. +''' import asyncio from fastapi import APIRouter, HTTPException from fastapi import HTTPException @@ -7,8 +10,7 @@ from typing import Dict from datetime import datetime from shapely.wkb import loads from binascii import unhexlify -from sijapi import DEBUG, INFO, WARN, ERR, CRITICAL -from sijapi import VISUALCROSSING_API_KEY, TZ, DB +from sijapi import L, VISUALCROSSING_API_KEY, TZ, DB from sijapi.utilities import haversine from sijapi.routers import locate @@ -17,12 +19,12 @@ weather = APIRouter() async def get_weather(date_time: datetime, latitude: float, longitude: float): # request_date_str = date_time.strftime("%Y-%m-%d") - DEBUG(f"Called get_weather with lat: {latitude}, lon: {longitude}, date_time: {date_time}") + L.DEBUG(f"Called get_weather with lat: {latitude}, lon: {longitude}, date_time: {date_time}") daily_weather_data = await get_weather_from_db(date_time, latitude, longitude) fetch_new_data = True if daily_weather_data: try: - DEBUG(f"Daily weather data from db: {daily_weather_data}") + L.DEBUG(f"Daily weather data from db: {daily_weather_data}") last_updated = str(daily_weather_data['DailyWeather'].get('last_updated')) last_updated = await locate.localize_datetime(last_updated) stored_loc_data = unhexlify(daily_weather_data['DailyWeather'].get('location')) @@ -34,50 +36,50 @@ async def get_weather(date_time: datetime, latitude: float, longitude: float): hourly_weather = daily_weather_data.get('HourlyWeather') - DEBUG(f"Hourly: {hourly_weather}") + L.DEBUG(f"Hourly: {hourly_weather}") - DEBUG(f"\nINFO:\nlast updated {last_updated}\nstored lat: {stored_lat} - requested lat: {latitude}\nstored lon: {stored_lon} - requested lon: {longitude}\n") + L.DEBUG(f"\nINFO:\nlast updated {last_updated}\nstored lat: {stored_lat} - requested lat: {latitude}\nstored lon: {stored_lon} - requested lon: {longitude}\n") request_haversine = haversine(latitude, longitude, stored_lat, stored_lon) - DEBUG(f"\nINFO:\nlast updated {last_updated}\nstored lat: {stored_lat} - requested lat: {latitude}\nstored lon: {stored_lon} - requested lon: {longitude}\nHaversine: {request_haversine}") + L.DEBUG(f"\nINFO:\nlast updated {last_updated}\nstored lat: {stored_lat} - requested lat: {latitude}\nstored lon: {stored_lon} - requested lon: {longitude}\nHaversine: {request_haversine}") if last_updated and (date_time <= datetime.now(TZ) and last_updated > date_time and request_haversine < 8) and hourly_weather and len(hourly_weather) > 0: - DEBUG(f"We can use existing data... :')") + L.DEBUG(f"We can use existing data... :')") fetch_new_data = False except Exception as e: - ERR(f"Error in get_weather: {e}") + L.ERR(f"Error in get_weather: {e}") if fetch_new_data: - DEBUG(f"We require new data!") + L.DEBUG(f"We require new data!") request_date_str = date_time.strftime("%Y-%m-%d") url = f"https://weather.visualcrossing.com/VisualCrossingWebServices/rest/services/timeline/{latitude},{longitude}/{request_date_str}/{request_date_str}?unitGroup=us&key={VISUALCROSSING_API_KEY}" try: async with AsyncClient() as client: response = await client.get(url) if response.status_code == 200: - DEBUG(f"Successfully obtained data from VC...") + L.DEBUG(f"Successfully obtained data from VC...") try: weather_data = response.json() store_result = await store_weather_to_db(date_time, weather_data) if store_result == "SUCCESS": - DEBUG(f"New weather data for {request_date_str} stored in database...") + L.DEBUG(f"New weather data for {request_date_str} stored in database...") else: - ERR(f"Failed to store weather data for {request_date_str} in database! {store_result}") + L.ERR(f"Failed to store weather data for {request_date_str} in database! {store_result}") - DEBUG(f"Attempting to retrieve data for {date_time}, {latitude}, {longitude}") + L.DEBUG(f"Attempting to retrieve data for {date_time}, {latitude}, {longitude}") daily_weather_data = await get_weather_from_db(date_time, latitude, longitude) if daily_weather_data is not None: return daily_weather_data else: raise HTTPException(status_code=500, detail="Weather data was not properly stored.") except Exception as e: - ERR(f"Problem parsing VC response or storing data: {e}") + L.ERR(f"Problem parsing VC response or storing data: {e}") raise HTTPException(status_code=500, detail="Weather data was not properly stored.") else: - ERR(f"Failed to fetch weather data: {response.status_code}, {response.text}") + L.ERR(f"Failed to fetch weather data: {response.status_code}, {response.text}") except Exception as e: - ERR(f"Exception during API call: {e}") + L.ERR(f"Exception during API call: {e}") return daily_weather_data @@ -86,7 +88,7 @@ async def store_weather_to_db(date_time: datetime, weather_data: dict): async with DB.get_connection() as conn: try: day_data = weather_data.get('days')[0] - DEBUG(f"day_data.get('sunrise'): {day_data.get('sunrise')}") + L.DEBUG(f"day_data.get('sunrise'): {day_data.get('sunrise')}") # Handle preciptype and stations as PostgreSQL arrays preciptype_array = day_data.get('preciptype', []) or [] @@ -127,7 +129,7 @@ async def store_weather_to_db(date_time: datetime, weather_data: dict): location_point ) except Exception as e: - ERR(f"Failed to prepare database query in store_weather_to_db! {e}") + L.ERR(f"Failed to prepare database query in store_weather_to_db! {e}") try: daily_weather_query = ''' @@ -144,8 +146,8 @@ async def store_weather_to_db(date_time: datetime, weather_data: dict): ''' # Debug logs for better insights - # DEBUG("Executing query: %s", daily_weather_query) - # DEBUG("With parameters: %s", daily_weather_params) + # L.DEBUG("Executing query: %s", daily_weather_query) + # L.DEBUG("With parameters: %s", daily_weather_params) # Execute the query to insert daily weather data async with conn.transaction(): @@ -159,8 +161,8 @@ async def store_weather_to_db(date_time: datetime, weather_data: dict): # hour_data['datetime'] = parse_date(hour_data.get('datetime')) hour_timestamp = date_str + ' ' + hour_data['datetime'] hour_data['datetime'] = await locate.localize_datetime(hour_timestamp) - DEBUG(f"Processing hours now...") - # DEBUG(f"Processing {hour_data['datetime']}") + L.DEBUG(f"Processing hours now...") + # L.DEBUG(f"Processing {hour_data['datetime']}") hour_preciptype_array = hour_data.get('preciptype', []) or [] hour_stations_array = hour_data.get('stations', []) or [] @@ -202,24 +204,24 @@ async def store_weather_to_db(date_time: datetime, weather_data: dict): RETURNING id ''' # Debug logs for better insights - # DEBUG("Executing query: %s", hourly_weather_query) - # DEBUG("With parameters: %s", hourly_weather_params) + # L.DEBUG("Executing query: %s", hourly_weather_query) + # L.DEBUG("With parameters: %s", hourly_weather_params) # Execute the query to insert hourly weather data async with conn.transaction(): hourly_weather_id = await conn.fetchval(hourly_weather_query, *hourly_weather_params) - # ERR(f"\n{hourly_weather_id}") + # L.ERR(f"\n{hourly_weather_id}") except Exception as e: - ERR(f"EXCEPTION: {e}") + L.ERR(f"EXCEPTION: {e}") except Exception as e: - ERR(f"EXCEPTION: {e}") + L.ERR(f"EXCEPTION: {e}") return "SUCCESS" except Exception as e: - ERR(f"Error in dailyweather storage: {e}") + L.ERR(f"Error in dailyweather storage: {e}") @@ -239,10 +241,10 @@ async def get_weather_from_db(date_time: datetime, latitude: float, longitude: f daily_weather_data = await conn.fetchrow(query, query_date, longitude, latitude, longitude, latitude) if daily_weather_data is None: - DEBUG(f"No daily weather data retrieved from database.") + L.DEBUG(f"No daily weather data retrieved from database.") return None # else: - # DEBUG(f"Daily_weather_data: {daily_weather_data}") + # L.DEBUG(f"Daily_weather_data: {daily_weather_data}") # Query to get hourly weather data query = ''' SELECT HW.* FROM HourlyWeather HW @@ -254,9 +256,9 @@ async def get_weather_from_db(date_time: datetime, latitude: float, longitude: f 'DailyWeather': dict(daily_weather_data), 'HourlyWeather': [dict(row) for row in hourly_weather_data], } - # DEBUG(f"day: {day}") + # L.DEBUG(f"day: {day}") return day except Exception as e: - ERR(f"Unexpected error occurred: {e}") + L.ERR(f"Unexpected error occurred: {e}") diff --git a/sijapi/utilities.py b/sijapi/utilities.py index a89fae0..f5151e8 100644 --- a/sijapi/utilities.py +++ b/sijapi/utilities.py @@ -25,8 +25,7 @@ import asyncpg from sshtunnel import SSHTunnelForwarder from fastapi import Depends, HTTPException, Request, UploadFile from fastapi.security.api_key import APIKeyHeader -from sijapi import DEBUG, INFO, WARN, ERR, CRITICAL -from sijapi import GLOBAL_API_KEY, YEAR_FMT, MONTH_FMT, DAY_FMT, DAY_SHORT_FMT, OBSIDIAN_VAULT_DIR, ALLOWED_FILENAME_CHARS, MAX_FILENAME_LENGTH +from sijapi import L, GLOBAL_API_KEY, YEAR_FMT, MONTH_FMT, DAY_FMT, DAY_SHORT_FMT, OBSIDIAN_VAULT_DIR, ALLOWED_FILENAME_CHARS, MAX_FILENAME_LENGTH api_key_header = APIKeyHeader(name="Authorization") @@ -73,15 +72,15 @@ def assemble_journal_path(date_time: datetime, subdir: str = None, filename: str else: if has_valid_extension(filename, [".md", ".m4a", ".wav", ".aiff", ".flac", ".mp3", ".mp4", ".pdf", ".js", ".json", ".yaml", ".py"]): - DEBUG(f"Provided filename has a valid extension, so we use that.") + L.DEBUG(f"Provided filename has a valid extension, so we use that.") else: filename = f"{filename}.md" - DEBUG(f"We are forcing the file to be a .md") + L.DEBUG(f"We are forcing the file to be a .md") relative_path = relative_path / filename else: - DEBUG(f"This only happens, theoretically, when no filename nor subdirectory are provided, but an extension is. Which is kinda silly.") + L.DEBUG(f"This only happens, theoretically, when no filename nor subdirectory are provided, but an extension is. Which is kinda silly.") return None, None absolute_path = OBSIDIAN_VAULT_DIR / relative_path @@ -134,14 +133,14 @@ def get_extension(file): return file_extension except Exception as e: - ERR(f"Unable to get extension of {file}") + L.ERR(f"Unable to get extension of {file}") raise e def sanitize_filename(text, max_length=MAX_FILENAME_LENGTH): """Sanitize a string to be used as a safe filename while protecting the file extension.""" - DEBUG(f"Filename before sanitization: {text}") + L.DEBUG(f"Filename before sanitization: {text}") text = re.sub(r'\s+', ' ', text) sanitized = re.sub(ALLOWED_FILENAME_CHARS, '', text) @@ -153,7 +152,7 @@ def sanitize_filename(text, max_length=MAX_FILENAME_LENGTH): base_name = base_name[:max_base_length].rstrip() final_filename = base_name + extension - DEBUG(f"Filename after sanitization: {final_filename}") + L.DEBUG(f"Filename after sanitization: {final_filename}") return final_filename @@ -163,16 +162,16 @@ def check_file_name(file_name, max_length=255): needs_sanitization = False if len(file_name) > max_length: - DEBUG(f"Filename exceeds maximum length of {max_length}: {file_name}") + L.DEBUG(f"Filename exceeds maximum length of {max_length}: {file_name}") needs_sanitization = True if re.search(ALLOWED_FILENAME_CHARS, file_name): - DEBUG(f"Filename contains non-word characters (except space, dot, and hyphen): {file_name}") + L.DEBUG(f"Filename contains non-word characters (except space, dot, and hyphen): {file_name}") needs_sanitization = True if re.search(r'\s{2,}', file_name): - DEBUG(f"Filename contains multiple consecutive spaces: {file_name}") + L.DEBUG(f"Filename contains multiple consecutive spaces: {file_name}") needs_sanitization = True if file_name != file_name.strip(): - DEBUG(f"Filename has leading or trailing spaces: {file_name}") + L.DEBUG(f"Filename has leading or trailing spaces: {file_name}") needs_sanitization = True return needs_sanitization @@ -186,7 +185,7 @@ def list_and_correct_impermissible_files(root_dir, rename: bool = False): if check_file_name(filename): file_path = Path(dirpath) / filename impermissible_files.append(file_path) - DEBUG(f"Impermissible file found: {file_path}") + L.DEBUG(f"Impermissible file found: {file_path}") # Sanitize the file name new_filename = sanitize_filename(filename) @@ -204,7 +203,7 @@ def list_and_correct_impermissible_files(root_dir, rename: bool = False): # Rename the file if rename: os.rename(file_path, new_file_path) - DEBUG(f"Renamed: {file_path} -> {new_file_path}") + L.DEBUG(f"Renamed: {file_path} -> {new_file_path}") return impermissible_files @@ -246,13 +245,13 @@ async def ocr_pdf(file_path: str) -> str: texts = await asyncio.gather(*(asyncio.to_thread(pytesseract.image_to_string, image) for image in images)) return ' '.join(texts) except Exception as e: - ERR(f"Error during OCR: {str(e)}") + L.ERR(f"Error during OCR: {str(e)}") return "" async def extract_text_from_pdf(file_path: str) -> str: if not await is_valid_pdf(file_path): - ERR(f"Invalid PDF file: {file_path}") + L.ERR(f"Invalid PDF file: {file_path}") return "" text = '' @@ -270,7 +269,7 @@ async def extract_text_from_pdf(file_path: str) -> str: if text and not should_use_ocr(text, num_pages): return clean_text(text) except Exception as e: - ERR(f"Error extracting text with PyPDF2: {str(e)}") + L.ERR(f"Error extracting text with PyPDF2: {str(e)}") # If PyPDF2 extraction fails or is insufficient, fall back to pdfminer.six try: @@ -278,10 +277,10 @@ async def extract_text_from_pdf(file_path: str) -> str: if text_pdfminer and not should_use_ocr(text_pdfminer, num_pages): return clean_text(text_pdfminer) except Exception as e: - ERR(f"Error extracting text with pdfminer.six: {e}") + L.ERR(f"Error extracting text with pdfminer.six: {e}") # If both methods fail or are deemed insufficient, use OCR as the last resort - INFO("Falling back to OCR for text extraction...") + L.INFO("Falling back to OCR for text extraction...") return await ocr_pdf(file_path) async def is_valid_pdf(file_path: str) -> bool: @@ -290,12 +289,12 @@ async def is_valid_pdf(file_path: str) -> bool: kind = filetype.guess(file_path) return kind.mime == 'application/pdf' except Exception as e: - ERR(f"Error checking file type: {e}") + L.ERR(f"Error checking file type: {e}") return False async def extract_text_from_pdf(file_path: str) -> str: if not await is_valid_pdf(file_path): - ERR(f"Invalid PDF file: {file_path}") + L.ERR(f"Invalid PDF file: {file_path}") return "" text = '' @@ -307,23 +306,23 @@ async def extract_text_from_pdf(file_path: str) -> str: if text.strip(): # Successfully extracted text return clean_text(text) except Exception as e: - ERR(f"Error extracting text with PyPDF2: {str(e)}") + L.ERR(f"Error extracting text with PyPDF2: {str(e)}") try: text_pdfminer = await asyncio.to_thread(pdfminer_extract_text, file_path) if text_pdfminer.strip(): # Successfully extracted text return clean_text(text_pdfminer) except Exception as e: - ERR(f"Error extracting text with pdfminer.six: {str(e)}") + L.ERR(f"Error extracting text with pdfminer.six: {str(e)}") # Fall back to OCR - INFO("Falling back to OCR for text extraction...") + L.INFO("Falling back to OCR for text extraction...") try: images = convert_from_path(file_path) ocr_texts = await asyncio.gather(*(asyncio.to_thread(pytesseract.image_to_string, img) for img in images)) return ' '.join(ocr_texts).strip() except Exception as e: - ERR(f"OCR failed: {str(e)}") + L.ERR(f"OCR failed: {str(e)}") return "" async def extract_text_from_docx(file_path: str) -> str: @@ -426,7 +425,7 @@ def encode_image_to_base64(image_path): base64_str = base64.b64encode(byte_data).decode('utf-8') return base64_str else: - DEBUG(f"Error: File does not exist at {image_path}") + L.DEBUG(f"Error: File does not exist at {image_path}") def resize_and_convert_image(image_path, max_size=2160, quality=80): with Image.open(image_path) as img: