Compare commits

...

3 commits

Author SHA1 Message Date
sanj
39a9375649 repaired 2024-07-11 14:29:16 -07:00
sanj
b981efb3c4 repaired 2024-07-11 14:23:37 -07:00
sanj
93e20ca83f Auto-update: Wed Jul 10 10:27:18 PDT 2024 2024-07-10 10:27:18 -07:00
24 changed files with 1031 additions and 1185 deletions

View file

@ -1,53 +1,63 @@
# __init__.py # __init__.py
import os import os
import json
import yaml
from pathlib import Path from pathlib import Path
import ipaddress import ipaddress
import multiprocessing
from dotenv import load_dotenv from dotenv import load_dotenv
from dateutil import tz from dateutil import tz
from pathlib import Path from pathlib import Path
from pydantic import BaseModel
from typing import List, Optional
from .classes import AutoResponder, IMAPConfig, SMTPConfig, EmailAccount, EmailContact, IncomingEmail, Database, Geocoder, APIConfig, Configuration
from .logs import Logger from .logs import Logger
from .classes import Database, Geocoder, APIConfig, Configuration, EmailConfiguration, Dir
### Initial initialization # INITIALization
API = APIConfig.load('api', 'secrets') BASE_DIR = Path(__file__).resolve().parent
Dir = Dir() CONFIG_DIR = BASE_DIR / "config"
ENV_PATH = Dir.CONFIG / ".env" ENV_PATH = CONFIG_DIR / ".env"
LOGS_DIR = Dir.LOGS
L = Logger("Central", LOGS_DIR)
os.makedirs(LOGS_DIR, exist_ok=True)
load_dotenv(ENV_PATH) load_dotenv(ENV_PATH)
LOGS_DIR = BASE_DIR / "logs"
os.makedirs(LOGS_DIR, exist_ok=True)
L = Logger("Central", LOGS_DIR)
### API essentials # API essentials
DB = Database.from_yaml('db.yaml') API = APIConfig.load('api', 'secrets')
Dir = Configuration.load('dirs')
HOST = f"{API.BIND}:{API.PORT}"
LOCAL_HOSTS = [ipaddress.ip_address(localhost.strip()) for localhost in os.getenv('LOCAL_HOSTS', '127.0.0.1').split(',')] + ['localhost']
SUBNET_BROADCAST = os.getenv("SUBNET_BROADCAST", '10.255.255.255')
MAX_CPU_CORES = min(int(os.getenv("MAX_CPU_CORES", int(multiprocessing.cpu_count()/2))), multiprocessing.cpu_count())
DB = Database.from_env()
ASR = Configuration.load('asr')
IMG = Configuration.load('img')
Cal = Configuration.load('cal', 'secrets')
print(f"Cal configuration: {Cal.__dict__}")
Email = EmailConfiguration.load('email', 'secrets')
LLM = Configuration.load('llm', 'secrets')
News = Configuration.load('news', 'secrets') News = Configuration.load('news', 'secrets')
Obsidian = Configuration.load('obsidian') IMG = Configuration.load('img', 'secrets')
TTS = Configuration.load('tts', 'secrets')
CourtListener = Configuration.load('courtlistener', 'secrets')
Tailscale = Configuration.load('tailscale', 'secrets')
Cloudflare = Configuration.load('cloudflare', 'secrets')
# Directories & general paths
### Directories & general paths ROUTER_DIR = BASE_DIR / "routers"
DATA_DIR = BASE_DIR / "data"
os.makedirs(DATA_DIR, exist_ok=True)
ALERTS_DIR = DATA_DIR / "alerts"
os.makedirs(ALERTS_DIR, exist_ok=True)
REQUESTS_DIR = LOGS_DIR / "requests" REQUESTS_DIR = LOGS_DIR / "requests"
os.makedirs(REQUESTS_DIR, exist_ok=True) os.makedirs(REQUESTS_DIR, exist_ok=True)
REQUESTS_LOG_PATH = LOGS_DIR / "requests.log" REQUESTS_LOG_PATH = LOGS_DIR / "requests.log"
### LOCATE AND WEATHER LOCALIZATIONS # LOCATE AND WEATHER LOCALIZATIONS
# DB = DATA_DIR / "weatherlocate.db" # deprecated USER_FULLNAME = os.getenv('USER_FULLNAME')
USER_BIO = os.getenv('USER_BIO')
NAMED_LOCATIONS = CONFIG_DIR / "named-locations.yaml"
VISUALCROSSING_BASE_URL = os.getenv("VISUALCROSSING_BASE_URL", "https://weather.visualcrossing.com/VisualCrossingWebServices/rest/services/timeline") VISUALCROSSING_BASE_URL = os.getenv("VISUALCROSSING_BASE_URL", "https://weather.visualcrossing.com/VisualCrossingWebServices/rest/services/timeline")
VISUALCROSSING_API_KEY = os.getenv("VISUALCROSSING_API_KEY") VISUALCROSSING_API_KEY = os.getenv("VISUALCROSSING_API_KEY")
GEONAMES_TXT = DATA_DIR / "geonames.txt"
LOCATIONS_CSV = DATA_DIR / "US.csv"
TZ = tz.gettz(os.getenv("TZ", "America/Los_Angeles")) TZ = tz.gettz(os.getenv("TZ", "America/Los_Angeles"))
TZ_CACHE = Dir.DATA / "tzcache.json" TZ_CACHE = DATA_DIR / "tzcache.json"
GEO = Geocoder(Dir.config.locations, TZ_CACHE) GEO = Geocoder(NAMED_LOCATIONS, TZ_CACHE)
### Obsidian & notes # Obsidian & notes
ALLOWED_FILENAME_CHARS = r'[^\w \.-]' ALLOWED_FILENAME_CHARS = r'[^\w \.-]'
MAX_PATH_LENGTH = 254 MAX_PATH_LENGTH = 254
OBSIDIAN_VAULT_DIR = Path(os.getenv("OBSIDIAN_BASE_DIR") or Path(Dir.HOME) / "Nextcloud" / "notes") OBSIDIAN_VAULT_DIR = Path(os.getenv("OBSIDIAN_BASE_DIR") or Path(Dir.HOME) / "Nextcloud" / "notes")
@ -59,22 +69,135 @@ OBSIDIAN_BANNER_SCENE = os.getenv("OBSIDIAN_BANNER_SCENE", "wallpaper")
OBSIDIAN_CHROMADB_COLLECTION = os.getenv("OBSIDIAN_CHROMADB_COLLECTION", "obsidian") OBSIDIAN_CHROMADB_COLLECTION = os.getenv("OBSIDIAN_CHROMADB_COLLECTION", "obsidian")
ARCHIVE_DIR = Path(os.getenv("ARCHIVE_DIR", OBSIDIAN_VAULT_DIR / "archive")) ARCHIVE_DIR = Path(os.getenv("ARCHIVE_DIR", OBSIDIAN_VAULT_DIR / "archive"))
os.makedirs(ARCHIVE_DIR, exist_ok=True) os.makedirs(ARCHIVE_DIR, exist_ok=True)
DOC_DIR = DATA_DIR / "docs"
os.makedirs(DOC_DIR, exist_ok=True)
### DATETIME SCHEMA FOR DAILY NOTE FOLDER HIERARCHY FORMATTING ### # DATETIME SCHEMA FOR DAILY NOTE FOLDER HIERARCHY FORMATTING ###
YEAR_FMT = os.getenv("YEAR_FMT") YEAR_FMT = os.getenv("YEAR_FMT")
MONTH_FMT = os.getenv("MONTH_FMT") MONTH_FMT = os.getenv("MONTH_FMT")
DAY_FMT = os.getenv("DAY_FMT") DAY_FMT = os.getenv("DAY_FMT")
DAY_SHORT_FMT = os.getenv("DAY_SHORT_FMT") DAY_SHORT_FMT = os.getenv("DAY_SHORT_FMT")
### Keys & passwords # Large language model
LLM_URL = os.getenv("LLM_URL", "http://localhost:11434")
LLM_SYS_MSG = os.getenv("SYSTEM_MSG", "You are a helpful AI assistant.")
DEFAULT_LLM = os.getenv("DEFAULT_LLM", "llama3")
DEFAULT_VISION = os.getenv("DEFAULT_VISION", "llava")
DEFAULT_VOICE = os.getenv("DEFAULT_VOICE", "Luna")
DEFAULT_11L_VOICE = os.getenv("DEFAULT_11L_VOICE", "Victoria")
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
# Summarization
SUMMARY_CHUNK_SIZE = int(os.getenv("SUMMARY_CHUNK_SIZE", 16384)) # measured in tokens
SUMMARY_CHUNK_OVERLAP = int(os.getenv("SUMMARY_CHUNK_OVERLAP", 256)) # measured in tokens
SUMMARY_TPW = float(os.getenv("SUMMARY_TPW", 1.3)) # measured in tokens
SUMMARY_LENGTH_RATIO = int(os.getenv("SUMMARY_LENGTH_RATIO", 4)) # measured as original to length ratio
SUMMARY_MIN_LENGTH = int(os.getenv("SUMMARY_MIN_LENGTH", 150)) # measured in tokens
SUMMARY_MODEL = os.getenv("SUMMARY_MODEL", "dolphin-llama3:8b-256k")
SUMMARY_TOKEN_LIMIT = int(os.getenv("SUMMARY_TOKEN_LIMIT", 16384))
SUMMARY_INSTRUCT = os.getenv('SUMMARY_INSTRUCT', "You are an AI assistant that provides accurate summaries of text -- nothing more and nothing less. You must not include ANY extraneous text other than the sumary. Do not include comments apart from the summary, do not preface the summary, and do not provide any form of postscript. Do not add paragraph breaks. Do not add any kind of formatting. Your response should begin with, consist of, and end with an accurate plaintext summary.")
SUMMARY_INSTRUCT_TTS = os.getenv('SUMMARY_INSTRUCT_TTS', "You are an AI assistant that provides email summaries for Sanjay. Your response will undergo Text-To-Speech conversion and added to Sanjay's private podcast. Providing adequate context (Sanjay did not send this question to you, he will only hear your response) but aiming for conciseness and precision, and bearing in mind the Text-To-Speech conversion (avoiding acronyms and formalities), summarize the following email.")
# Stable diffusion
IMG_DIR = DATA_DIR / "img" / "images"
os.makedirs(IMG_DIR, exist_ok=True)
IMG_WORKFLOWS_DIR = DATA_DIR / "img" / "workflows"
os.makedirs(IMG_WORKFLOWS_DIR, exist_ok=True)
COMFYUI_URL = os.getenv('COMFYUI_URL', "http://localhost:8188")
COMFYUI_DIR = Path(os.getenv('COMFYUI_DIR'))
COMFYUI_OUTPUT_DIR = COMFYUI_DIR / 'output'
COMFYUI_LAUNCH_CMD = os.getenv('COMFYUI_LAUNCH_CMD', 'mamba activate comfyui && python main.py')
IMG_CONFIG_PATH = CONFIG_DIR / 'img.yaml'
# ASR
ASR_DIR = DATA_DIR / "asr"
os.makedirs(ASR_DIR, exist_ok=True)
WHISPER_CPP_DIR = Path(Dir.HOME) / str(os.getenv("WHISPER_CPP_DIR"))
WHISPER_CPP_MODELS = os.getenv('WHISPER_CPP_MODELS', 'NULL,VOID').split(',')
# TTS
PREFERRED_TTS = os.getenv("PREFERRED_TTS", "None")
TTS_DIR = DATA_DIR / "tts"
os.makedirs(TTS_DIR, exist_ok=True)
VOICE_DIR = TTS_DIR / 'voices'
os.makedirs(VOICE_DIR, exist_ok=True)
PODCAST_DIR = os.getenv("PODCAST_DIR", TTS_DIR / "sideloads")
os.makedirs(PODCAST_DIR, exist_ok=True)
TTS_OUTPUT_DIR = TTS_DIR / 'outputs'
os.makedirs(TTS_OUTPUT_DIR, exist_ok=True)
TTS_SEGMENTS_DIR = TTS_DIR / 'segments'
os.makedirs(TTS_SEGMENTS_DIR, exist_ok=True)
ELEVENLABS_API_KEY = os.getenv("ELEVENLABS_API_KEY")
# Calendar & email account
MS365_TOGGLE = True if os.getenv("MS365_TOGGLE") == "True" else False
ICAL_TOGGLE = True if os.getenv("ICAL_TOGGLE") == "True" else False
ICS_PATH = DATA_DIR / 'calendar.ics' # deprecated now, but maybe revive?
ICALENDARS = os.getenv('ICALENDARS', 'NULL,VOID').split(',')
EMAIL_CONFIG = CONFIG_DIR / "email.yaml"
EMAIL_LOGS = LOGS_DIR / "email"
os.makedirs(EMAIL_LOGS, exist_ok = True)
# Courtlistener & other webhooks
COURTLISTENER_DOCKETS_DIR = DATA_DIR / "courtlistener" / "dockets"
os.makedirs(COURTLISTENER_DOCKETS_DIR, exist_ok=True)
COURTLISTENER_SEARCH_DIR = DATA_DIR / "courtlistener" / "cases"
os.makedirs(COURTLISTENER_SEARCH_DIR, exist_ok=True)
CASETABLE_PATH = DATA_DIR / "courtlistener" / "cases.json"
COURTLISTENER_API_KEY = os.getenv("COURTLISTENER_API_KEY")
COURTLISTENER_BASE_URL = os.getenv("COURTLISTENER_BASE_URL", "https://www.courtlistener.com")
COURTLISTENER_DOCKETS_URL = "https://www.courtlistener.com/api/rest/v3/dockets/"
# Keys & passwords
PUBLIC_KEY_FILE = os.getenv("PUBLIC_KEY_FILE", 'you_public_key.asc')
PUBLIC_KEY = (BASE_DIR.parent / PUBLIC_KEY_FILE).read_text()
MAC_ID = os.getenv("MAC_ID") MAC_ID = os.getenv("MAC_ID")
MAC_UN = os.getenv("MAC_UN") MAC_UN = os.getenv("MAC_UN")
MAC_PW = os.getenv("MAC_PW") MAC_PW = os.getenv("MAC_PW")
TIMING_API_KEY = os.getenv("TIMING_API_KEY") TIMING_API_KEY = os.getenv("TIMING_API_KEY")
TIMING_API_URL = os.getenv("TIMING_API_URL", "https://web.timingapp.com/api/v1") TIMING_API_URL = os.getenv("TIMING_API_URL", "https://web.timingapp.com/api/v1")
PHOTOPRISM_URL = os.getenv("PHOTOPRISM_URL")
PHOTOPRISM_USER = os.getenv("PHOTOPRISM_USER")
PHOTOPRISM_PASS = os.getenv("PHOTOPRISM_PASS")
### Caddy - not fully implemented # Tailscale
TS_IP = ipaddress.ip_address(os.getenv("TS_IP", "NULL"))
TS_SUBNET = ipaddress.ip_network(os.getenv("TS_SUBNET")) if os.getenv("TS_SUBNET") else None
TS_ID = os.getenv("TS_ID", "NULL")
TS_TAILNET = os.getenv("TS_TAILNET", "NULL")
TS_ADDRESS = f"http://{TS_ID}.{TS_TAILNET}.ts.net"
# Cloudflare
CF_API_BASE_URL = os.getenv("CF_API_BASE_URL")
CF_TOKEN = os.getenv("CF_TOKEN")
CF_IP = DATA_DIR / "cf_ip.txt" # to be deprecated soon
CF_DOMAINS_PATH = DATA_DIR / "cf_domains.json" # to be deprecated soon
# Caddy - not fully implemented
API.URL = os.getenv("API.URL") API.URL = os.getenv("API.URL")
CADDY_SERVER = os.getenv('CADDY_SERVER', None) CADDY_SERVER = os.getenv('CADDY_SERVER', None)
CADDYFILE_PATH = os.getenv("CADDYFILE_PATH", "") if CADDY_SERVER is not None else None CADDYFILE_PATH = os.getenv("CADDYFILE_PATH", "") if CADDY_SERVER is not None else None
CADDY_API_KEY = os.getenv("CADDY_API_KEY") CADDY_API_KEY = os.getenv("CADDY_API_KEY")
# Microsoft Graph
MS365_CLIENT_ID = os.getenv('MS365_CLIENT_ID')
MS365_SECRET = os.getenv('MS365_SECRET')
MS365_TENANT_ID = os.getenv('MS365_TENANT_ID')
MS365_CERT_PATH = CONFIG_DIR / 'MS365' / '.cert.pem' # deprecated
MS365_KEY_PATH = CONFIG_DIR / 'MS365' / '.cert.key' # deprecated
MS365_KEY = MS365_KEY_PATH.read_text()
MS365_TOKEN_PATH = CONFIG_DIR / 'MS365' / '.token.txt'
MS365_THUMBPRINT = os.getenv('MS365_THUMBPRINT')
MS365_LOGIN_URL = os.getenv("MS365_LOGIN_URL", "https://login.microsoftonline.com")
MS365_AUTHORITY_URL = f"{MS365_LOGIN_URL}/{MS365_TENANT_ID}"
MS365_REDIRECT_PATH = os.getenv("MS365_REDIRECT_PATH", "https://api.sij.ai/o365/oauth_redirect")
MS365_SCOPE = os.getenv("MS365_SCOPE", 'Calendars.Read,Calendars.ReadWrite,offline_access').split(',')
# Maintenance
GARBAGE_COLLECTION_INTERVAL = 60 * 60 # Run cleanup every hour
GARBAGE_TTL = 60 * 60 * 24 # Delete files older than 24 hours

View file

@ -4,25 +4,34 @@ from fastapi import FastAPI, Request, HTTPException, Response
from fastapi.responses import JSONResponse from fastapi.responses import JSONResponse
from fastapi.middleware.cors import CORSMiddleware from fastapi.middleware.cors import CORSMiddleware
from starlette.middleware.base import BaseHTTPMiddleware from starlette.middleware.base import BaseHTTPMiddleware
from starlette.middleware.base import BaseHTTPMiddleware
from starlette.requests import ClientDisconnect
from hypercorn.asyncio import serve from hypercorn.asyncio import serve
from hypercorn.config import Config as HypercornConfig from hypercorn.config import Config as HypercornConfig
import sys import sys
import asyncio import asyncio
import httpx
import argparse import argparse
import json
import ipaddress import ipaddress
import importlib import importlib
from dotenv import load_dotenv
from pathlib import Path
from datetime import datetime
import argparse import argparse
from . import L, API, ROUTER_DIR
parser = argparse.ArgumentParser(description='Personal API.') parser = argparse.ArgumentParser(description='Personal API.')
parser.add_argument('--debug', action='store_true', help='Set log level to L.INFO') parser.add_argument('--log', type=str, default='INFO', help='Set overall log level (e.g., DEBUG, INFO, WARNING)')
parser.add_argument('--debug', nargs='+', default=[], help='Set DEBUG log level for specific modules')
parser.add_argument('--test', type=str, help='Load only the specified module.') parser.add_argument('--test', type=str, help='Load only the specified module.')
args = parser.parse_args() args = parser.parse_args()
from . import L, API, Dir
L.setup_from_args(args) L.setup_from_args(args)
app = FastAPI() print(f"Debug modules after setup: {L.debug_modules}") # Debug print
# CORSMiddleware
app = FastAPI()
app.add_middleware( app.add_middleware(
CORSMiddleware, CORSMiddleware,
allow_origins=['*'], allow_origins=['*'],
@ -51,8 +60,10 @@ class SimpleAPIKeyMiddleware(BaseHTTPMiddleware):
content={"detail": "Invalid or missing API key"} content={"detail": "Invalid or missing API key"}
) )
response = await call_next(request) response = await call_next(request)
# L.DEBUG(f"Request from {client_ip} is complete")
return response return response
# Add the middleware to your FastAPI app
app.add_middleware(SimpleAPIKeyMiddleware) app.add_middleware(SimpleAPIKeyMiddleware)
@app.exception_handler(HTTPException) @app.exception_handler(HTTPException)
@ -61,12 +72,14 @@ async def http_exception_handler(request: Request, exc: HTTPException):
L.ERR(f"Request: {request.method} {request.url}") L.ERR(f"Request: {request.method} {request.url}")
return JSONResponse(status_code=exc.status_code, content={"detail": exc.detail}) return JSONResponse(status_code=exc.status_code, content={"detail": exc.detail})
@app.middleware("http") @app.middleware("http")
async def handle_exception_middleware(request: Request, call_next): async def handle_exception_middleware(request: Request, call_next):
try: try:
response = await call_next(request) response = await call_next(request)
except RuntimeError as exc: except RuntimeError as exc:
if str(exc) == "Response content longer than Content-Length": if str(exc) == "Response content longer than Content-Length":
# Update the Content-Length header to match the actual response content length
response.headers["Content-Length"] = str(len(response.body)) response.headers["Content-Length"] = str(len(response.body))
else: else:
raise raise
@ -74,32 +87,33 @@ async def handle_exception_middleware(request: Request, call_next):
def load_router(router_name): def load_router(router_name):
router_file = Dir.ROUTERS / f'{router_name}.py' router_file = ROUTER_DIR / f'{router_name}.py'
L.DEBUG(f"Attempting to load {router_name.capitalize()}...") module_logger = L.get_module_logger(router_name)
module_logger.debug(f"Attempting to load {router_name.capitalize()}...")
if router_file.exists(): if router_file.exists():
module_path = f'sijapi.routers.{router_name}' module_path = f'sijapi.routers.{router_name}'
try: try:
module = importlib.import_module(module_path) module = importlib.import_module(module_path)
router = getattr(module, router_name) router = getattr(module, router_name)
app.include_router(router) app.include_router(router)
L.INFO(f"{router_name.capitalize()} router loaded.") # module_logger.info(f"{router_name.capitalize()} router loaded.")
except (ImportError, AttributeError) as e: except (ImportError, AttributeError) as e:
L.CRIT(f"Failed to load router {router_name}: {e}") module_logger.critical(f"Failed to load router {router_name}: {e}")
else: else:
L.ERR(f"Router file for {router_name} does not exist.") module_logger.error(f"Router file for {router_name} does not exist.")
def main(argv): def main(argv):
if args.test: if args.test:
load_router(args.test) load_router(args.test)
else: else:
L.CRIT(f"sijapi launched") L.logger.critical(f"sijapi launched")
L.CRIT(f"{args._get_args}") L.logger.critical(f"Arguments: {args}")
for module_name in API.MODULES.__fields__: for module_name in API.MODULES.__fields__:
if getattr(API.MODULES, module_name): if getattr(API.MODULES, module_name):
load_router(module_name) load_router(module_name)
config = HypercornConfig() config = HypercornConfig()
config.bind = [API.BIND] # Use the resolved BIND value config.bind = [API.BIND]
asyncio.run(serve(app, config)) asyncio.run(serve(app, config))
if __name__ == "__main__": if __name__ == "__main__":

View file

@ -2,7 +2,6 @@
import asyncio import asyncio
import json import json
import math import math
import multiprocessing
import os import os
import re import re
from concurrent.futures import ThreadPoolExecutor from concurrent.futures import ThreadPoolExecutor
@ -14,189 +13,27 @@ from zoneinfo import ZoneInfo
import aiofiles import aiofiles
import aiohttp import aiohttp
import asyncpg import asyncpg
from typing import Union, Any
from pydantic import BaseModel, Field, ConfigDict
import reverse_geocoder as rg import reverse_geocoder as rg
import yaml
from dotenv import load_dotenv from dotenv import load_dotenv
from pydantic import BaseModel, Field, create_model, ConfigDict, validator from pydantic import BaseModel, Field, create_model
from srtm import get_data from srtm import get_data
from timezonefinder import TimezoneFinder from timezonefinder import TimezoneFinder
from pathlib import Path
from typing import Any, Dict, List, Optional, Union, TypeVar, Type
import yaml
from typing import List, Optional
from dotenv import load_dotenv
T = TypeVar('T', bound='Configuration') T = TypeVar('T', bound='Configuration')
class HierarchicalPath(os.PathLike):
def __init__(self, path=None, base=None, home=None):
self.home = Path(home).expanduser() if home else Path.home()
self.base = Path(base).resolve() if base else self._find_base()
self.path = self._resolve_path(path) if path else self.base
def _find_base(self):
current = Path(__file__).resolve().parent
while current.name != 'sijapi' and current != current.parent:
current = current.parent
return current
def _resolve_path(self, path):
if isinstance(path, HierarchicalPath):
return path.path
if isinstance(path, Path):
return path
path = self._resolve_placeholders(path)
if path.startswith(('~', 'HOME')):
return self.home / path.lstrip('~').lstrip('HOME').lstrip('/')
if path.startswith('/'):
return Path(path)
return self._resolve_relative_path(self.base / path)
def _resolve_placeholders(self, path):
placeholders = {
'HOME': str(self.home),
'BASE': str(self.base),
}
pattern = r'\{\{\s*([^}]+)\s*\}\}'
return re.sub(pattern, lambda m: placeholders.get(m.group(1).strip(), m.group(0)), path)
def _resolve_relative_path(self, path):
if path.is_file():
return path
if path.is_dir():
return path
yaml_path = path.with_suffix('.yaml')
if yaml_path.is_file():
return yaml_path
return path
def __truediv__(self, other):
return HierarchicalPath(self.path / other, base=self.base, home=self.home)
def __getattr__(self, name):
return HierarchicalPath(self.path / name, base=self.base, home=self.home)
def __str__(self):
return str(self.path)
def __repr__(self):
return f"HierarchicalPath('{self.path}')"
def __fspath__(self):
return os.fspath(self.path)
def __eq__(self, other):
if isinstance(other, (HierarchicalPath, Path, str)):
return str(self.path) == str(other)
return False
def __lt__(self, other):
if isinstance(other, (HierarchicalPath, Path, str)):
return str(self.path) < str(other)
return False
def __le__(self, other):
if isinstance(other, (HierarchicalPath, Path, str)):
return str(self.path) <= str(other)
return False
def __gt__(self, other):
if isinstance(other, (HierarchicalPath, Path, str)):
return str(self.path) > str(other)
return False
def __ge__(self, other):
if isinstance(other, (HierarchicalPath, Path, str)):
return str(self.path) >= str(other)
return False
def __hash__(self):
return hash(self.path)
def __getattribute__(self, name):
try:
return super().__getattribute__(name)
except AttributeError:
return getattr(self.path, name)
class Dir(BaseModel):
HOME: HierarchicalPath = Field(default_factory=lambda: HierarchicalPath(Path.home()))
BASE: HierarchicalPath | None = None
model_config = ConfigDict(arbitrary_types_allowed=True)
@classmethod
def determine_base(cls) -> HierarchicalPath:
return HierarchicalPath(HierarchicalPath()._find_base())
def __init__(self, **data):
super().__init__(**data)
if self.BASE is None:
self.BASE = self.determine_base()
@classmethod
def load(cls, yaml_path: Union[str, Path] = None) -> 'Dir':
yaml_path = cls._resolve_path(yaml_path) if yaml_path else None
if yaml_path:
with open(yaml_path, 'r') as file:
config_data = yaml.safe_load(file)
print(f"Loaded directory configuration from {yaml_path}")
resolved_data = cls.resolve_placeholders(config_data)
else:
resolved_data = {}
return cls(**resolved_data)
@classmethod
def _resolve_path(cls, path: Union[str, Path]) -> Path:
base_path = cls.determine_base().path.parent
path = Path(path)
if not path.suffix:
path = base_path / 'sijapi' / 'config' / f"{path.name}.yaml"
elif not path.is_absolute():
path = base_path / path
return path
@classmethod
def resolve_placeholders(cls, data: Any) -> Any:
if isinstance(data, dict):
return {k: cls.resolve_placeholders(v) for k, v in data.items()}
elif isinstance(data, list):
return [cls.resolve_placeholders(v) for v in data]
elif isinstance(data, str):
return cls.resolve_string_placeholders(data)
return data
@classmethod
def resolve_string_placeholders(cls, value: str) -> Any:
if value.startswith('{{') and value.endswith('}}'):
parts = value.strip('{}').strip().split('.')
result = cls.HOME
for part in parts:
result = getattr(result, part)
return result
elif value == '*~*':
return cls.HOME
return HierarchicalPath(value)
def __getattr__(self, name):
return HierarchicalPath(self.BASE / name.lower(), base=self.BASE.path, home=self.HOME.path)
def model_dump(self, *args, **kwargs):
d = super().model_dump(*args, **kwargs)
return {k: str(v) for k, v in d.items()}
import os
from pathlib import Path
from typing import Union, Optional, Any, Dict, List
import yaml
import re
from pydantic import BaseModel, create_model
from dotenv import load_dotenv
class Configuration(BaseModel): class Configuration(BaseModel):
HOME: Path = Field(default_factory=Path.home) HOME: Path = Path.home()
_dir_config: Optional['Configuration'] = None _dir_config: Optional['Configuration'] = None
dir: Dir = Field(default_factory=Dir)
class Config:
arbitrary_types_allowed = True
extra = "allow" # This allows extra fields
@classmethod @classmethod
def load(cls, yaml_path: Union[str, Path], secrets_path: Optional[Union[str, Path]] = None, dir_config: Optional['Configuration'] = None) -> 'Configuration': def load(cls, yaml_path: Union[str, Path], secrets_path: Optional[Union[str, Path]] = None, dir_config: Optional['Configuration'] = None) -> 'Configuration':
@ -207,6 +44,7 @@ class Configuration(BaseModel):
try: try:
with yaml_path.open('r') as file: with yaml_path.open('r') as file:
config_data = yaml.safe_load(file) config_data = yaml.safe_load(file)
print(f"Loaded configuration data from {yaml_path}") print(f"Loaded configuration data from {yaml_path}")
if secrets_path: if secrets_path:
@ -215,32 +53,25 @@ class Configuration(BaseModel):
print(f"Loaded secrets data from {secrets_path}") print(f"Loaded secrets data from {secrets_path}")
config_data.update(secrets_data) config_data.update(secrets_data)
instance = cls(**config_data) # Ensure HOME is set
if config_data.get('HOME') is None:
config_data['HOME'] = str(Path.home())
print(f"HOME was None in config, set to default: {config_data['HOME']}")
load_dotenv()
instance = cls.create_dynamic_model(**config_data)
instance._dir_config = dir_config or instance instance._dir_config = dir_config or instance
resolved_data = instance.resolve_placeholders(config_data) resolved_data = instance.resolve_placeholders(config_data)
return cls._create_nested_config(resolved_data) instance = cls.create_dynamic_model(**resolved_data)
instance._dir_config = dir_config or instance
return instance
except Exception as e: except Exception as e:
print(f"Error loading configuration: {str(e)}") print(f"Error loading configuration: {str(e)}")
raise raise
@classmethod
def _create_nested_config(cls, data):
if isinstance(data, dict):
print(f"Creating nested config for: {cls.__name__}")
print(f"Data: {data}")
return cls(**{k: cls._create_nested_config(v) for k, v in data.items()})
elif isinstance(data, list):
return [cls._create_nested_config(item) for item in data]
else:
return data
def __getattr__(self, name):
value = self.__dict__.get(name)
if isinstance(value, dict):
return Configuration(**value)
return value
@classmethod @classmethod
def _resolve_path(cls, path: Union[str, Path], default_dir: str) -> Path: def _resolve_path(cls, path: Union[str, Path], default_dir: str) -> Path:
base_path = Path(__file__).parent.parent # This will be two levels up from this file base_path = Path(__file__).parent.parent # This will be two levels up from this file
@ -267,7 +98,15 @@ class Configuration(BaseModel):
for match in matches: for match in matches:
parts = match.split('.') parts = match.split('.')
replacement = self._resolve_nested_placeholder(parts) if len(parts) == 1: # Internal reference
replacement = getattr(self._dir_config, parts[0], str(Path.home() / parts[0].lower()))
elif len(parts) == 2 and parts[0] == 'Dir':
replacement = getattr(self._dir_config, parts[1], str(Path.home() / parts[1].lower()))
elif len(parts) == 2 and parts[0] == 'ENV':
replacement = os.getenv(parts[1], '')
else:
replacement = value # Keep original if not recognized
value = value.replace('{{' + match + '}}', str(replacement)) value = value.replace('{{' + match + '}}', str(replacement))
# Convert to Path if it looks like a file path # Convert to Path if it looks like a file path
@ -275,16 +114,25 @@ class Configuration(BaseModel):
return Path(value).expanduser() return Path(value).expanduser()
return value return value
def _resolve_nested_placeholder(self, parts: List[str]) -> Any: @classmethod
current = self._dir_config def create_dynamic_model(cls, **data):
for part in parts: for key, value in data.items():
if part == 'ENV': if isinstance(value, dict):
return os.getenv(parts[-1], '') data[key] = cls.create_dynamic_model(**value)
elif hasattr(current, part): elif isinstance(value, list) and all(isinstance(item, dict) for item in value):
current = getattr(current, part) data[key] = [cls.create_dynamic_model(**item) for item in value]
else:
return str(Path.home() / part.lower()) DynamicModel = create_model(
return current f'Dynamic{cls.__name__}',
__base__=cls,
**{k: (Any, v) for k, v in data.items()}
)
return DynamicModel(**data)
class Config:
extra = "allow"
arbitrary_types_allowed = True
class APIConfig(BaseModel): class APIConfig(BaseModel):
@ -297,25 +145,23 @@ class APIConfig(BaseModel):
MODULES: Any # This will be replaced with a dynamic model MODULES: Any # This will be replaced with a dynamic model
TZ: str TZ: str
KEYS: List[str] KEYS: List[str]
MAX_CPU_CORES: int = Field(default_factory=lambda: min(
int(os.getenv("MAX_CPU_CORES", multiprocessing.cpu_count() // 2)), multiprocessing.cpu_count()
))
@classmethod @classmethod
def load(cls, config_path: Union[str, Path], secrets_path: Union[str, Path]): def load(cls, config_path: Union[str, Path], secrets_path: Union[str, Path]):
config_path = cls._resolve_path(config_path, 'config') config_path = cls._resolve_path(config_path, 'config')
secrets_path = cls._resolve_path(secrets_path, 'config') secrets_path = cls._resolve_path(secrets_path, 'config')
# Load main configuration
with open(config_path, 'r') as file: with open(config_path, 'r') as file:
config_data = yaml.safe_load(file) config_data = yaml.safe_load(file)
print(f"Loaded main config: {config_data}") print(f"Loaded main config: {config_data}") # Debug print
# Load secrets # Load secrets
try: try:
with open(secrets_path, 'r') as file: with open(secrets_path, 'r') as file:
secrets_data = yaml.safe_load(file) secrets_data = yaml.safe_load(file)
print(f"Loaded secrets: {secrets_data}") print(f"Loaded secrets: {secrets_data}") # Debug print
except FileNotFoundError: except FileNotFoundError:
print(f"Secrets file not found: {secrets_path}") print(f"Secrets file not found: {secrets_path}")
secrets_data = {} secrets_data = {}
@ -326,7 +172,7 @@ class APIConfig(BaseModel):
# Resolve internal placeholders # Resolve internal placeholders
config_data = cls.resolve_placeholders(config_data) config_data = cls.resolve_placeholders(config_data)
print(f"Resolved config: {config_data}") print(f"Resolved config: {config_data}") # Debug print
# Handle KEYS placeholder # Handle KEYS placeholder
if isinstance(config_data.get('KEYS'), list) and len(config_data['KEYS']) == 1: if isinstance(config_data.get('KEYS'), list) and len(config_data['KEYS']) == 1:
@ -338,7 +184,7 @@ class APIConfig(BaseModel):
secret_key = parts[1] secret_key = parts[1]
if secret_key in secrets_data: if secret_key in secrets_data:
config_data['KEYS'] = secrets_data[secret_key] config_data['KEYS'] = secrets_data[secret_key]
print(f"Replaced KEYS with secret: {config_data['KEYS']}") print(f"Replaced KEYS with secret: {config_data['KEYS']}") # Debug print
else: else:
print(f"Secret key '{secret_key}' not found in secrets file") print(f"Secret key '{secret_key}' not found in secrets file")
else: else:
@ -406,6 +252,8 @@ class APIConfig(BaseModel):
def active_modules(self) -> List[str]: def active_modules(self) -> List[str]:
return [module for module, is_active in self.MODULES.__dict__.items() if is_active] return [module for module, is_active in self.MODULES.__dict__.items() if is_active]
class Location(BaseModel): class Location(BaseModel):
latitude: float latitude: float
longitude: float longitude: float
@ -633,6 +481,7 @@ class Geocoder:
timezone=await self.timezone(latitude, longitude) timezone=await self.timezone(latitude, longitude)
) )
def round_coords(self, lat: float, lon: float, decimal_places: int = 2) -> Tuple[float, float]: def round_coords(self, lat: float, lon: float, decimal_places: int = 2) -> Tuple[float, float]:
return (round(lat, decimal_places), round(lon, decimal_places)) return (round(lat, decimal_places), round(lon, decimal_places))
@ -733,103 +582,55 @@ class Database(BaseModel):
await conn.close() await conn.close()
@classmethod @classmethod
def from_yaml(cls, yaml_path: Union[str, Path]): def from_env(cls):
yaml_path = Path(yaml_path) import os
if not yaml_path.is_absolute(): return cls(
yaml_path = Path(__file__).parent / 'config' / yaml_path host=os.getenv("DB_HOST", "localhost"),
port=int(os.getenv("DB_PORT", 5432)),
with open(yaml_path, 'r') as file: user=os.getenv("DB_USER"),
config = yaml.safe_load(file) password=os.getenv("DB_PASSWORD"),
return cls(**config) database=os.getenv("DB_NAME"),
db_schema=os.getenv("DB_SCHEMA")
)
def to_dict(self): def to_dict(self):
return self.dict(exclude_none=True) return self.dict(exclude_none=True)
class IMAPConfig(BaseModel): class IMAPConfig(BaseModel):
username: str username: str
password: str password: str
host: str host: str
port: int port: int
encryption: Optional[str] encryption: str = None
class SMTPConfig(BaseModel): class SMTPConfig(BaseModel):
username: str username: str
password: str password: str
host: str host: str
port: int port: int
encryption: Optional[str] encryption: str = None
class AutoResponder(BaseModel): class AutoResponder(BaseModel):
name: str name: str
style: str style: str
context: str context: str
ollama_model: str = "llama3" ollama_model: str = "llama3"
image_prompt: Optional[str] = None
image_scene: Optional[str] = None
class AccountAutoResponder(BaseModel):
name: str
smtp: str
whitelist: List[str] whitelist: List[str]
blacklist: List[str] blacklist: List[str]
image_prompt: Optional[str] = None
image_scene: Optional[str] = None
smtp: SMTPConfig
class EmailAccount(BaseModel): class EmailAccount(BaseModel):
name: str name: str
refresh: int
fullname: Optional[str] fullname: Optional[str]
bio: Optional[str] bio: Optional[str]
refresh: int
summarize: bool = False summarize: bool = False
podcast: bool = False podcast: bool = False
imap: str imap: IMAPConfig
autoresponders: List[AccountAutoResponder] autoresponders: Optional[List[AutoResponder]]
class EmailConfiguration(Configuration):
imaps: List[IMAPConfig]
smtps: List[SMTPConfig]
autoresponders: List[AutoResponder]
accounts: List[EmailAccount]
@classmethod
def _create_nested_config(cls, data):
if isinstance(data, dict):
if 'imaps' in data:
return cls(
imaps=[IMAPConfig(**imap) for imap in data['imaps']],
smtps=[SMTPConfig(**smtp) for smtp in data['smtps']],
autoresponders=[AutoResponder(**ar) for ar in data['autoresponders']],
accounts=[EmailAccount(**account) for account in data['accounts']],
**{k: v for k, v in data.items() if k not in ['imaps', 'smtps', 'autoresponders', 'accounts']}
)
else:
return data # Return the dict as-is for nested structures
elif isinstance(data, list):
return [cls._create_nested_config(item) for item in data]
else:
return data
@classmethod
def load(cls, yaml_path: Union[str, Path], secrets_path: Optional[Union[str, Path]] = None, dir_config: Optional['Configuration'] = None) -> 'EmailConfiguration':
config_data = super().load(yaml_path, secrets_path, dir_config)
return cls._create_nested_config(config_data)
# ... (rest of the methods remain the same)
def get_imap(self, username: str) -> Optional[IMAPConfig]:
return next((imap for imap in self.imaps if imap.username == username), None)
def get_smtp(self, username: str) -> Optional[SMTPConfig]:
return next((smtp for smtp in self.smtps if smtp.username == username), None)
def get_autoresponder(self, name: str) -> Optional[AutoResponder]:
return next((ar for ar in self.autoresponders if ar.name == name), None)
def get_account(self, name: str) -> Optional[EmailAccount]:
return next((account for account in self.accounts if account.name == name), None)
def get_email_accounts(self) -> List[EmailAccount]:
return self.accounts
class EmailContact(BaseModel): class EmailContact(BaseModel):
email: str email: str

View file

@ -1,99 +1,42 @@
# logs.py
import os import os
import sys import sys
from loguru import logger from loguru import logger
import traceback
class Logger: class Logger:
def __init__(self, name, logs_dir): def __init__(self, name, logs_dir):
self.logs_dir = logs_dir self.logs_dir = logs_dir
self.name = name self.name = name
self.logger = logger.bind(name=name) self.logger = logger
self.debug_modules = set()
def setup_from_args(self, args): def setup_from_args(self, args):
if not os.path.exists(self.logs_dir): if not os.path.exists(self.logs_dir):
os.makedirs(self.logs_dir) os.makedirs(self.logs_dir)
# Remove default logger self.logger.remove()
logger.remove()
log_format = "{time:YYYY-MM-DD HH:mm:ss} - {name} - <level>{level: <8}</level> - <level>{message}</level>"
# File handler # File handler
handler_path = os.path.join(self.logs_dir, 'app.log') self.logger.add(os.path.join(self.logs_dir, 'app.log'), rotation="2 MB", level="DEBUG", format=log_format)
logger.add(handler_path, rotation="2 MB", compression="zip", level="DEBUG", format="{time:YYYY-MM-DD HH:mm:ss} - {name} - {level} - {message}")
# Console handler # Set debug modules
log_format = ( self.debug_modules = set(args.debug)
"{time:YYYY-MM-DD HH:mm:ss} - "
"{name} - "
"<level>{level: <8}</level> - "
"<level>"
"{message}"
"</level>"
)
console_level = "DEBUG" if args.debug else "INFO"
logger.add(
sys.stdout,
format=log_format,
level=console_level,
colorize=True,
filter=lambda record: record["level"].name != "INFO", # Apply colors to all levels except INFO
)
# Add a separate handler for INFO level without colors # Console handler with custom filter
logger.add( def module_filter(record):
sys.stdout, return (record["level"].no >= logger.level(args.log.upper()).no or
format="{time:YYYY-MM-DD HH:mm:ss} - {name} - {level: <8} - {message}", record["name"] in self.debug_modules)
level="INFO",
filter=lambda record: record["level"].name == "INFO",
)
self.logger.add(sys.stdout, level="DEBUG", format=log_format, filter=module_filter, colorize=True)
# Custom color and style mappings # Custom color and style mappings
logger.level("CRITICAL", color="<yellow><bold><MAGENTA>") self.logger.level("CRITICAL", color="<yellow><bold><MAGENTA>")
logger.level("ERROR", color="<red><bold>") self.logger.level("ERROR", color="<red><bold>")
logger.level("WARNING", color="<yellow><bold>") self.logger.level("WARNING", color="<yellow><bold>")
logger.level("DEBUG", color="<green><bold>") self.logger.level("DEBUG", color="<green><bold>")
# Test color output self.logger.info(f"Debug modules: {self.debug_modules}")
self.logger.debug("Debug message (should be italic green)")
self.logger.info("Info message (should be uncolored)")
self.logger.warning("Warning message (should be bold orange/yellow)")
self.logger.error("Error message (should be bold red)")
self.logger.critical("Critical message (should be bold yellow on magenta)")
def get_module_logger(self, module_name):
def DEBUG(self, log_message): self.logger.debug(log_message) return self.logger.bind(name=module_name)
def INFO(self, log_message): self.logger.info(log_message)
def WARN(self, log_message): self.logger.warning(log_message)
def ERR(self, log_message):
self.logger.error(log_message)
self.logger.error(traceback.format_exc())
def CRIT(self, log_message):
self.logger.critical(log_message)
self.logger.critical(traceback.format_exc())
def get_logger(self):
return self
if __name__ == "__main__":
import argparse
parser = argparse.ArgumentParser()
parser.add_argument('--debug', action='store_true')
args = parser.parse_args()
logger_instance = Logger("test", "logs")
logger_instance.setup_from_args(args)
test_logger = logger_instance.get_logger()
print("FORCE_COLOR:", os.environ.get('FORCE_COLOR'))
print("NO_COLOR:", os.environ.get('NO_COLOR'))
print("TERM:", os.environ.get('TERM'))
print("PYCHARM_HOSTED:", os.environ.get('PYCHARM_HOSTED'))
print("PYTHONIOENCODING:", os.environ.get('PYTHONIOENCODING'))
test_logger.debug("This is a debug message")
test_logger.info("This is an info message")
test_logger.warning("This is a warning message")
test_logger.error("This is an error message")
test_logger.critical("This is a critical message")

View file

@ -14,10 +14,10 @@ from fastapi import APIRouter, HTTPException, Form, UploadFile, File, Background
from fastapi.responses import JSONResponse from fastapi.responses import JSONResponse
from pydantic import BaseModel, Field from pydantic import BaseModel, Field
from typing import Optional from typing import Optional
from sijapi import L, ASR_DIR, WHISPER_CPP_MODELS, GARBAGE_COLLECTION_INTERVAL, GARBAGE_TTL, WHISPER_CPP_DIR, MAX_CPU_CORES
from sijapi import L, API, Dir, ASR
asr = APIRouter() asr = APIRouter()
logger = L.get_module_logger("asr")
class TranscribeParams(BaseModel): class TranscribeParams(BaseModel):
model: str = Field(default="small") model: str = Field(default="small")
@ -81,13 +81,13 @@ async def transcribe_endpoint(
return JSONResponse(content={"status": "timeout", "message": "Transcription is taking longer than expected. Please check back later."}, status_code=202) return JSONResponse(content={"status": "timeout", "message": "Transcription is taking longer than expected. Please check back later."}, status_code=202)
async def transcribe_audio(file_path, params: TranscribeParams): async def transcribe_audio(file_path, params: TranscribeParams):
L.DEBUG(f"Transcribing audio file from {file_path}...") logger.debug(f"Transcribing audio file from {file_path}...")
file_path = await convert_to_wav(file_path) file_path = await convert_to_wav(file_path)
model = params.model if params.model in ASR.MODELS else 'small' model = params.model if params.model in WHISPER_CPP_MODELS else 'small'
model_path = ASR.WHISPER_DIR.models / f'ggml-{model}.bin' model_path = WHISPER_CPP_DIR / 'models' / f'ggml-{model}.bin'
command = [str(ASR.WHISPER_DIR.build.bin.main)] command = [str(WHISPER_CPP_DIR / 'build' / 'bin' / 'main')]
command.extend(['-m', str(model_path)]) command.extend(['-m', str(model_path)])
command.extend(['-t', str(max(1, min(params.threads or API.MAX_CPU_CORES, API.MAX_CPU_CORES)))]) command.extend(['-t', str(max(1, min(params.threads or MAX_CPU_CORES, MAX_CPU_CORES)))])
command.extend(['-np']) # Always enable no-prints command.extend(['-np']) # Always enable no-prints
@ -119,11 +119,11 @@ async def transcribe_audio(file_path, params: TranscribeParams):
command.extend(['--dtw', params.dtw]) command.extend(['--dtw', params.dtw])
command.extend(['-f', file_path]) command.extend(['-f', file_path])
L.DEBUG(f"Command: {command}") logger.debug(f"Command: {command}")
# Create a unique ID for this transcription job # Create a unique ID for this transcription job
job_id = str(uuid.uuid4()) job_id = str(uuid.uuid4())
L.DEBUG(f"Created job ID: {job_id}") logger.debug(f"Created job ID: {job_id}")
# Store the job status # Store the job status
transcription_results[job_id] = {"status": "processing", "result": None} transcription_results[job_id] = {"status": "processing", "result": None}
@ -135,20 +135,20 @@ async def transcribe_audio(file_path, params: TranscribeParams):
poll_interval = 1 # 1 second poll_interval = 1 # 1 second
start_time = asyncio.get_event_loop().time() start_time = asyncio.get_event_loop().time()
L.DEBUG(f"Starting to poll for job {job_id}") logger.debug(f"Starting to poll for job {job_id}")
try: try:
while asyncio.get_event_loop().time() - start_time < max_wait_time: while asyncio.get_event_loop().time() - start_time < max_wait_time:
job_status = transcription_results.get(job_id, {}) job_status = transcription_results.get(job_id, {})
L.DEBUG(f"Current status for job {job_id}: {job_status['status']}") logger.debug(f"Current status for job {job_id}: {job_status['status']}")
if job_status["status"] == "completed": if job_status["status"] == "completed":
L.INFO(f"Transcription completed for job {job_id}") logger.info(f"Transcription completed for job {job_id}")
return job_status["result"] return job_status["result"]
elif job_status["status"] == "failed": elif job_status["status"] == "failed":
L.ERR(f"Transcription failed for job {job_id}: {job_status.get('error', 'Unknown error')}") logger.error(f"Transcription failed for job {job_id}: {job_status.get('error', 'Unknown error')}")
raise Exception(f"Transcription failed: {job_status.get('error', 'Unknown error')}") raise Exception(f"Transcription failed: {job_status.get('error', 'Unknown error')}")
await asyncio.sleep(poll_interval) await asyncio.sleep(poll_interval)
L.ERR(f"Transcription timed out for job {job_id}") logger.error(f"Transcription timed out for job {job_id}")
raise TimeoutError("Transcription timed out") raise TimeoutError("Transcription timed out")
finally: finally:
# Ensure the task is cancelled if we exit the loop # Ensure the task is cancelled if we exit the loop
@ -159,20 +159,20 @@ async def transcribe_audio(file_path, params: TranscribeParams):
async def process_transcription(command, file_path, job_id): async def process_transcription(command, file_path, job_id):
try: try:
L.DEBUG(f"Starting transcription process for job {job_id}") logger.debug(f"Starting transcription process for job {job_id}")
result = await run_transcription(command, file_path) result = await run_transcription(command, file_path)
transcription_results[job_id] = {"status": "completed", "result": result} transcription_results[job_id] = {"status": "completed", "result": result}
L.DEBUG(f"Transcription completed for job {job_id}") logger.debug(f"Transcription completed for job {job_id}")
except Exception as e: except Exception as e:
L.ERR(f"Transcription failed for job {job_id}: {str(e)}") logger.error(f"Transcription failed for job {job_id}: {str(e)}")
transcription_results[job_id] = {"status": "failed", "error": str(e)} transcription_results[job_id] = {"status": "failed", "error": str(e)}
finally: finally:
# Clean up the temporary file # Clean up the temporary file
os.remove(file_path) os.remove(file_path)
L.DEBUG(f"Cleaned up temporary file for job {job_id}") logger.debug(f"Cleaned up temporary file for job {job_id}")
async def run_transcription(command, file_path): async def run_transcription(command, file_path):
L.DEBUG(f"Running transcription command: {' '.join(command)}") logger.debug(f"Running transcription command: {' '.join(command)}")
proc = await asyncio.create_subprocess_exec( proc = await asyncio.create_subprocess_exec(
*command, *command,
stdout=asyncio.subprocess.PIPE, stdout=asyncio.subprocess.PIPE,
@ -181,13 +181,13 @@ async def run_transcription(command, file_path):
stdout, stderr = await proc.communicate() stdout, stderr = await proc.communicate()
if proc.returncode != 0: if proc.returncode != 0:
error_message = f"Error running command: {stderr.decode()}" error_message = f"Error running command: {stderr.decode()}"
L.ERR(error_message) logger.error(error_message)
raise Exception(error_message) raise Exception(error_message)
L.DEBUG("Transcription command completed successfully") logger.debug("Transcription command completed successfully")
return stdout.decode().strip() return stdout.decode().strip()
async def convert_to_wav(file_path: str): async def convert_to_wav(file_path: str):
wav_file_path = os.path.join(Dir.data.asr, f"{uuid.uuid4()}.wav") wav_file_path = os.path.join(ASR_DIR, f"{uuid.uuid4()}.wav")
proc = await asyncio.create_subprocess_exec( proc = await asyncio.create_subprocess_exec(
"ffmpeg", "-y", "-i", file_path, "-acodec", "pcm_s16le", "-ar", "16000", "-ac", "1", wav_file_path, "ffmpeg", "-y", "-i", file_path, "-acodec", "pcm_s16le", "-ar", "16000", "-ac", "1", wav_file_path,
stdout=asyncio.subprocess.PIPE, stdout=asyncio.subprocess.PIPE,

View file

@ -1,7 +1,7 @@
''' '''
Calendar module using macOS Calendars and/or Microsoft 365 via its Graph API. Calendar module using macOS Calendars and/or Microsoft 365 via its Graph API.
Depends on: Depends on:
LOGGER, ICAL_TOGGLE, ICALENDARS, MS365_TOGGLE, MS365_CLIENT_ID, MS365_SECRET, MS365_AUTHORITY_URL, MS365_SCOPE, MS365_REDIRECT_PATH, Cal.MS365.auth.token LOGGER, ICAL_TOGGLE, ICALENDARS, MS365_TOGGLE, MS365_CLIENT_ID, MS365_SECRET, MS365_AUTHORITY_URL, MS365_SCOPE, MS365_REDIRECT_PATH, MS365_TOKEN_PATH
''' '''
from fastapi import APIRouter, Depends, HTTPException, status, Request from fastapi import APIRouter, Depends, HTTPException, status, Request
from fastapi.responses import RedirectResponse, JSONResponse from fastapi.responses import RedirectResponse, JSONResponse
@ -16,70 +16,67 @@ from typing import Dict, List, Any
from datetime import datetime, timedelta from datetime import datetime, timedelta
from Foundation import NSDate, NSRunLoop from Foundation import NSDate, NSRunLoop
import EventKit as EK import EventKit as EK
from sijapi import L, Cal from sijapi import L, ICAL_TOGGLE, ICALENDARS, MS365_TOGGLE, MS365_CLIENT_ID, MS365_SECRET, MS365_AUTHORITY_URL, MS365_SCOPE, MS365_REDIRECT_PATH, MS365_TOKEN_PATH
from sijapi.routers import loc from sijapi.routers import loc
cal = APIRouter() cal = APIRouter()
oauth2_scheme = OAuth2PasswordBearer(tokenUrl="/token") oauth2_scheme = OAuth2PasswordBearer(tokenUrl="/token")
timeout = httpx.Timeout(12) timeout = httpx.Timeout(12)
logger = L.get_module_logger("cal")
print(f"Cal object: {Cal}") if MS365_TOGGLE is True:
print(f"Cal.__dict__: {Cal.__dict__}") logger.critical(f"Visit https://api.sij.ai/o365/login to obtain your Microsoft 365 authentication token.")
print(f"Cal.MS365: {Cal.MS365}")
if Cal.MS365.toggle == 'on': @cal.get("/o365/login")
L.CRIT(f"Visit https://api.sij.ai/MS365/login to obtain your Microsoft 365 authentication token.")
@cal.get("/MS365/login")
async def login(): async def login():
L.DEBUG(f"Received request to /MS365/login") logger.debug(f"Received request to /o365/login")
L.DEBUG(f"SCOPE: {Cal.MS365.auth.scopes}") logger.debug(f"SCOPE: {MS365_SCOPE}")
if not Cal.MS365.auth.scopes: if not MS365_SCOPE:
L.ERR("No scopes defined for authorization.") logger.error("No scopes defined for authorization.")
raise HTTPException( raise HTTPException(
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
detail="No scopes defined for authorization." detail="No scopes defined for authorization."
) )
authorization_url = f"{Cal.MS365.auth.url}/oauth2/v2.0/authorize?client_id={Cal.MS365.client}&response_type=code&redirect_uri={Cal.MS365.auth.redirect}&scope={'+'.join(Cal.MS365.auth.scopes)}" authorization_url = f"{MS365_AUTHORITY_URL}/oauth2/v2.0/authorize?client_id={MS365_CLIENT_ID}&response_type=code&redirect_uri={MS365_REDIRECT_PATH}&scope={'+'.join(MS365_SCOPE)}"
L.INFO(f"Redirecting to authorization URL: {authorization_url}") logger.info(f"Redirecting to authorization URL: {authorization_url}")
return RedirectResponse(authorization_url) return RedirectResponse(authorization_url)
@cal.get("/MS365/oauth_redirect") @cal.get("/o365/oauth_redirect")
async def oauth_redirect(code: str = None, error: str = None): async def oauth_redirect(code: str = None, error: str = None):
L.DEBUG(f"Received request to /MS365/oauth_redirect") logger.debug(f"Received request to /o365/oauth_redirect")
if error: if error:
L.ERR(f"OAuth2 Error: {error}") logger.error(f"OAuth2 Error: {error}")
raise HTTPException( raise HTTPException(
status_code=status.HTTP_400_BAD_REQUEST, detail="OAuth2 Error" status_code=status.HTTP_400_BAD_REQUEST, detail="OAuth2 Error"
) )
L.INFO(f"Requesting token with authorization code: {code}") logger.info(f"Requesting token with authorization code: {code}")
token_url = f"{Cal.MS365.auth.url}/oauth2/v2.0/token" token_url = f"{MS365_AUTHORITY_URL}/oauth2/v2.0/token"
data = { data = {
"client_id": Cal.MS365.client, "client_id": MS365_CLIENT_ID,
"client_secret": Cal.MS365.auth.secret, "client_secret": MS365_SECRET,
"code": code, "code": code,
"redirect_uri": Cal.MS365.auth.redirect, "redirect_uri": MS365_REDIRECT_PATH,
"grant_type": "authorization_code" "grant_type": "authorization_code"
} }
async with httpx.AsyncClient(timeout=timeout) as client: async with httpx.AsyncClient(timeout=timeout) as client:
response = await client.post(token_url, data=data) response = await client.post(token_url, data=data)
L.DEBUG(f"Token endpoint response status code: {response.status_code}") logger.debug(f"Token endpoint response status code: {response.status_code}")
L.INFO(f"Token endpoint response text: {response.text}") logger.info(f"Token endpoint response text: {response.text}")
result = response.json() result = response.json()
if 'access_token' in result: if 'access_token' in result:
await save_token(result) await save_token(result)
L.INFO("Access token obtained successfully") logger.info("Access token obtained successfully")
return {"message": "Access token stored successfully"} return {"message": "Access token stored successfully"}
else: else:
L.CRIT(f"Failed to obtain access token. Response: {result}") logger.critical(f"Failed to obtain access token. Response: {result}")
raise HTTPException( raise HTTPException(
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
detail="Failed to obtain access token" detail="Failed to obtain access token"
) )
@cal.get("/MS365/me") @cal.get("/o365/me")
async def read_items(): async def read_items():
L.DEBUG(f"Received request to /MS365/me") logger.debug(f"Received request to /o365/me")
token = await load_token() token = await load_token()
if not token: if not token:
raise HTTPException( raise HTTPException(
@ -92,10 +89,10 @@ if Cal.MS365.toggle == 'on':
response = await client.get(graph_url, headers=headers) response = await client.get(graph_url, headers=headers)
if response.status_code == 200: if response.status_code == 200:
user = response.json() user = response.json()
L.INFO(f"User retrieved: {user}") logger.info(f"User retrieved: {user}")
return user return user
else: else:
L.ERR("Invalid or expired token") logger.error("Invalid or expired token")
raise HTTPException( raise HTTPException(
status_code=status.HTTP_401_UNAUTHORIZED, status_code=status.HTTP_401_UNAUTHORIZED,
detail="Invalid or expired token", detail="Invalid or expired token",
@ -103,36 +100,36 @@ if Cal.MS365.toggle == 'on':
) )
async def save_token(token): async def save_token(token):
L.DEBUG(f"Saving token: {token}") logger.debug(f"Saving token: {token}")
try: try:
token["expires_at"] = int(time.time()) + token["expires_in"] token["expires_at"] = int(time.time()) + token["expires_in"]
with open(Cal.MS365.auth.token, "w") as file: with open(MS365_TOKEN_PATH, "w") as file:
json.dump(token, file) json.dump(token, file)
L.DEBUG(f"Saved token to {Cal.MS365.auth.token}") logger.debug(f"Saved token to {MS365_TOKEN_PATH}")
except Exception as e: except Exception as e:
L.ERR(f"Failed to save token: {e}") logger.error(f"Failed to save token: {e}")
async def load_token(): async def load_token():
if os.path.exists(Cal.MS365.auth.token): if os.path.exists(MS365_TOKEN_PATH):
try: try:
with open(Cal.MS365.auth.token, "r") as file: with open(MS365_TOKEN_PATH, "r") as file:
token = json.load(file) token = json.load(file)
except FileNotFoundError: except FileNotFoundError:
L.ERR("Token file not found.") logger.error("Token file not found.")
return None return None
except json.JSONDecodeError: except json.JSONDecodeError:
L.ERR("Failed to decode token JSON") logger.error("Failed to decode token JSON")
return None return None
if token: if token:
token["expires_at"] = int(time.time()) + token["expires_in"] token["expires_at"] = int(time.time()) + token["expires_in"]
L.DEBUG(f"Loaded token: {token}") # Add this line to log the loaded token logger.debug(f"Loaded token: {token}") # Add this line to log the loaded token
return token return token
else: else:
L.DEBUG("No token found.") logger.debug("No token found.")
return None return None
else: else:
L.ERR(f"No file found at {Cal.MS365.auth.token}") logger.error(f"No file found at {MS365_TOKEN_PATH}")
return None return None
@ -150,81 +147,51 @@ if Cal.MS365.toggle == 'on':
return response.status_code == 401 return response.status_code == 401
async def get_new_token_with_refresh_token(refresh_token): async def get_new_token_with_refresh_token(refresh_token):
token_url = f"{Cal.MS365.auth.url}/oauth2/v2.0/token" token_url = f"{MS365_AUTHORITY_URL}/oauth2/v2.0/token"
data = { data = {
"client_id": Cal.MS365.client, "client_id": MS365_CLIENT_ID,
"client_secret": Cal.MS365.auth.secret, "client_secret": MS365_SECRET,
"refresh_token": refresh_token, "refresh_token": refresh_token,
"grant_type": "refresh_token", "grant_type": "refresh_token",
"scope": " ".join(Cal.MS365.auth.scopes), "scope": " ".join(MS365_SCOPE),
} }
async with httpx.AsyncClient(timeout=timeout) as client: async with httpx.AsyncClient(timeout=timeout) as client:
response = await client.post(token_url, data=data) response = await client.post(token_url, data=data)
result = response.json() result = response.json()
if "access_token" in result: if "access_token" in result:
L.INFO("Access token refreshed successfully") logger.info("Access token refreshed successfully")
return result return result
else: else:
L.ERR("Failed to refresh access token") logger.error("Failed to refresh access token")
return None return None
async def get_ms365_events(start_date: datetime, end_date: datetime):
token = await load_token()
if token:
if await is_token_expired(token):
await refresh_token()
else:
raise HTTPException(
status_code=status.HTTP_401_UNAUTHORIZED,
detail="Access token not found",
)
# this looks like it might need updating to use tz-aware datetimes converted to UTC...
graph_url = f"https://graph.microsoft.com/v1.0/me/events?$filter=start/dateTime ge '{start_date}T00:00:00' and end/dateTime le '{end_date}T23:59:59'"
headers = {
"Authorization": f"Bearer {token['access_token']}",
"Prefer": 'outlook.timezone="Pacific Standard Time"',
}
async with httpx.AsyncClient() as client:
response = await client.get(graph_url, headers=headers)
if response.status_code != 200:
L.ERR("Failed to retrieve events from Microsoft 365")
raise HTTPException(
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
detail="Failed to retrieve events",
)
ms_events = response.json().get("value", [])
return ms_events
async def refresh_token(): async def refresh_token():
token = await load_token() token = await load_token()
if not token: if not token:
L.ERR("No token found in storage") logger.error("No token found in storage")
raise HTTPException( raise HTTPException(
status_code=status.HTTP_401_UNAUTHORIZED, status_code=status.HTTP_401_UNAUTHORIZED,
detail="No token found", detail="No token found",
) )
if 'refresh_token' not in token: if 'refresh_token' not in token:
L.ERR("Refresh token not found in the loaded token") logger.error("Refresh token not found in the loaded token")
raise HTTPException( raise HTTPException(
status_code=status.HTTP_401_UNAUTHORIZED, status_code=status.HTTP_401_UNAUTHORIZED,
detail="Refresh token not found", detail="Refresh token not found",
) )
refresh_token = token['refresh_token'] refresh_token = token['refresh_token']
L.DEBUG("Found refresh token, attempting to refresh access token") logger.debug("Found refresh token, attempting to refresh access token")
new_token = await get_new_token_with_refresh_token(refresh_token) new_token = await get_new_token_with_refresh_token(refresh_token)
if new_token: if new_token:
await save_token(new_token) await save_token(new_token)
L.INFO("Token refreshed and saved successfully") logger.info("Token refreshed and saved successfully")
else: else:
L.ERR("Failed to refresh token") logger.error("Failed to refresh token")
raise HTTPException( raise HTTPException(
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
detail="Failed to refresh token", detail="Failed to refresh token",
@ -238,7 +205,7 @@ def get_calendar_ids() -> Dict[str, str]:
calendar_identifiers = { calendar_identifiers = {
calendar.title() : calendar.calendarIdentifier() for calendar in all_calendars calendar.title() : calendar.calendarIdentifier() for calendar in all_calendars
} }
L.DEBUG(f"{calendar_identifiers}") logger.debug(f"{calendar_identifiers}")
return calendar_identifiers return calendar_identifiers
# Helper to convert datetime to NSDate # Helper to convert datetime to NSDate
@ -257,12 +224,12 @@ async def get_events_endpoint(start_date: str, end_date: str):
async def get_events(start_dt: datetime, end_dt: datetime) -> List: async def get_events(start_dt: datetime, end_dt: datetime) -> List:
combined_events = [] combined_events = []
if Cal.MS365.toggle == "on": if MS365_TOGGLE:
ms_events = await get_ms365_events(start_dt, end_dt) ms_events = await get_ms365_events(start_dt, end_dt)
combined_events.extend(ms_events) # Use extend instead of append combined_events.extend(ms_events) # Use extend instead of append
if Cal.ICAL.toggle == "on": if ICAL_TOGGLE:
calendar_ids = Cal.ICAL.calendars calendar_ids = ICALENDARS
macos_events = get_macos_calendar_events(start_dt, end_dt, calendar_ids) macos_events = get_macos_calendar_events(start_dt, end_dt, calendar_ids)
combined_events.extend(macos_events) # Use extend instead of append combined_events.extend(macos_events) # Use extend instead of append
@ -270,7 +237,6 @@ async def get_events(start_dt: datetime, end_dt: datetime) -> List:
return parsed_events return parsed_events
if Cal.ICAL.toggle == "on":
def get_macos_calendar_events(start_date: datetime, end_date: datetime, calendar_ids: List[str] = None) -> List[Dict]: def get_macos_calendar_events(start_date: datetime, end_date: datetime, calendar_ids: List[str] = None) -> List[Dict]:
event_store = EK.EKEventStore.alloc().init() event_store = EK.EKEventStore.alloc().init()
@ -280,7 +246,7 @@ if Cal.ICAL.toggle == "on":
def completion_handler(granted, error): def completion_handler(granted, error):
if error is not None: if error is not None:
L.ERR(f"Error: {error}") logger.error(f"Error: {error}")
access_granted.append(granted) access_granted.append(granted)
# Notify the main thread that the completion handler has executed # Notify the main thread that the completion handler has executed
with access_granted_condition: with access_granted_condition:
@ -295,11 +261,11 @@ if Cal.ICAL.toggle == "on":
if access_granted: if access_granted:
return access_granted[0] return access_granted[0]
else: else:
L.ERR("Request access timed out or failed") logger.error("Request access timed out or failed")
return False return False
if not request_access(): if not request_access():
L.ERR("Access to calendar data was not granted") logger.error("Access to calendar data was not granted")
return [] return []
ns_start_date = datetime_to_nsdate(start_date) ns_start_date = datetime_to_nsdate(start_date)
@ -345,6 +311,35 @@ if Cal.ICAL.toggle == "on":
return event_list return event_list
async def get_ms365_events(start_date: datetime, end_date: datetime):
token = await load_token()
if token:
if await is_token_expired(token):
await refresh_token()
else:
raise HTTPException(
status_code=status.HTTP_401_UNAUTHORIZED,
detail="Access token not found",
)
# this looks like it might need updating to use tz-aware datetimes converted to UTC...
graph_url = f"https://graph.microsoft.com/v1.0/me/events?$filter=start/dateTime ge '{start_date}T00:00:00' and end/dateTime le '{end_date}T23:59:59'"
headers = {
"Authorization": f"Bearer {token['access_token']}",
"Prefer": 'outlook.timezone="Pacific Standard Time"',
}
async with httpx.AsyncClient() as client:
response = await client.get(graph_url, headers=headers)
if response.status_code != 200:
logger.error("Failed to retrieve events from Microsoft 365")
raise HTTPException(
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
detail="Failed to retrieve events",
)
ms_events = response.json().get("value", [])
return ms_events
async def parse_calendar_for_day(range_start: datetime, range_end: datetime, events: List[Dict[str, Any]]): async def parse_calendar_for_day(range_start: datetime, range_end: datetime, events: List[Dict[str, Any]]):
range_start = await loc.dt(range_start) range_start = await loc.dt(range_start)
@ -352,33 +347,33 @@ async def parse_calendar_for_day(range_start: datetime, range_end: datetime, eve
event_list = [] event_list = []
for event in events: for event in events:
L.INFO(f"Event: {event}") logger.info(f"Event: {event}")
start_str = event.get('start') start_str = event.get('start')
end_str = event.get('end') end_str = event.get('end')
if isinstance(start_str, dict): if isinstance(start_str, dict):
start_str = start_str.get('dateTime') start_str = start_str.get('dateTime')
else: else:
L.INFO(f"Start date string not a dict") logger.info(f"Start date string not a dict")
if isinstance(end_str, dict): if isinstance(end_str, dict):
end_str = end_str.get('dateTime') end_str = end_str.get('dateTime')
else: else:
L.INFO(f"End date string not a dict") logger.info(f"End date string not a dict")
try: try:
start_date = await loc.dt(start_str) if start_str else None start_date = await loc.dt(start_str) if start_str else None
except (ValueError, TypeError) as e: except (ValueError, TypeError) as e:
L.ERR(f"Invalid start date format: {start_str}, error: {e}") logger.error(f"Invalid start date format: {start_str}, error: {e}")
continue continue
try: try:
end_date = await loc.dt(end_str) if end_str else None end_date = await loc.dt(end_str) if end_str else None
except (ValueError, TypeError) as e: except (ValueError, TypeError) as e:
L.ERR(f"Invalid end date format: {end_str}, error: {e}") logger.error(f"Invalid end date format: {end_str}, error: {e}")
continue continue
L.DEBUG(f"Comparing {start_date} with range {range_start} to {range_end}") logger.debug(f"Comparing {start_date} with range {range_start} to {range_end}")
if start_date: if start_date:
# Ensure start_date is timezone-aware # Ensure start_date is timezone-aware
@ -410,11 +405,11 @@ async def parse_calendar_for_day(range_start: datetime, range_end: datetime, eve
"busy": event.get('showAs', '') in ['busy', 'tentative'], "busy": event.get('showAs', '') in ['busy', 'tentative'],
"all_day": event.get('isAllDay', False) "all_day": event.get('isAllDay', False)
} }
L.INFO(f"Event_data: {event_data}") logger.info(f"Event_data: {event_data}")
event_list.append(event_data) event_list.append(event_data)
else: else:
L.DEBUG(f"Event outside of specified range: {start_date} to {end_date}") logger.debug(f"Event outside of specified range: {start_date} to {end_date}")
else: else:
L.ERR(f"Invalid or missing start date for event: {event.get('id', 'Unknown ID')}") logger.error(f"Invalid or missing start date for event: {event.get('id', 'Unknown ID')}")
return event_list return event_list

View file

@ -12,6 +12,7 @@ from asyncio import sleep
import os import os
cf = APIRouter() cf = APIRouter()
logger = L.get_module_logger("cal")
class DNSRecordRequest(BaseModel): class DNSRecordRequest(BaseModel):
full_domain: str full_domain: str
@ -69,7 +70,7 @@ async def retry_request(url, headers, max_retries=5, backoff_factor=1):
response.raise_for_status() response.raise_for_status()
return response return response
except (httpx.HTTPError, httpx.ConnectTimeout) as e: except (httpx.HTTPError, httpx.ConnectTimeout) as e:
L.ERR(f"Request failed: {e}. Retrying {retry + 1}/{max_retries}...") logger.error(f"Request failed: {e}. Retrying {retry + 1}/{max_retries}...")
await sleep(backoff_factor * (2 ** retry)) await sleep(backoff_factor * (2 ** retry))
raise HTTPException(status_code=500, detail="Max retries exceeded for Cloudflare API request") raise HTTPException(status_code=500, detail="Max retries exceeded for Cloudflare API request")

View file

@ -5,10 +5,10 @@ from fastapi import APIRouter, HTTPException
import asyncio import asyncio
import logging import logging
from sijapi.utilities import run_ssh_command from sijapi.utilities import run_ssh_command
from sijapi import REBOOT_SCRIPT_PATH, HOST_CONFIG, API_CONFIG from sijapi import L, REBOOT_SCRIPT_PATH, HOST_CONFIG, API_CONFIG
dist = APIRouter() dist = APIRouter()
logger = logging.getLogger(__name__) logger = L.get_module_logger("dist")
@dist.get("/update-restart-others") @dist.get("/update-restart-others")
async def update_and_restart_others(): async def update_and_restart_others():

View file

@ -20,19 +20,26 @@ import ssl
import yaml import yaml
from typing import List, Dict, Optional, Set from typing import List, Dict, Optional, Set
from datetime import datetime as dt_datetime from datetime import datetime as dt_datetime
from sijapi import L, TTS, Email, Dir from sijapi import L, PODCAST_DIR, DEFAULT_VOICE, EMAIL_CONFIG, EMAIL_LOGS
from sijapi.routers import img, loc, tts, llm from sijapi.routers import img, loc, tts, llm
from sijapi.utilities import clean_text, assemble_journal_path, extract_text, prefix_lines from sijapi.utilities import clean_text, assemble_journal_path, extract_text, prefix_lines
from sijapi.classes import EmailAccount, IMAPConfig, SMTPConfig, IncomingEmail, EmailContact, AutoResponder from sijapi.classes import EmailAccount, IMAPConfig, SMTPConfig, IncomingEmail, EmailContact, AutoResponder
from sijapi.classes import EmailAccount
email = APIRouter(tags=["private"]) email = APIRouter()
logger = L.get_module_logger("email")
print(f"Email logger level: {logger.level}") # Debug print
logger.debug("This is a debug message from email module")
logger.info("This is an info message from email module")
def load_email_accounts(yaml_path: str) -> List[EmailAccount]: def load_email_accounts(yaml_path: str) -> List[EmailAccount]:
with open(yaml_path, 'r') as file: with open(yaml_path, 'r') as file:
config = yaml.safe_load(file) config = yaml.safe_load(file)
return [EmailAccount(**account) for account in config['accounts']] return [EmailAccount(**account) for account in config['accounts']]
def get_imap_connection(account: EmailAccount): def get_imap_connection(account: EmailAccount):
return Imbox(account.imap.host, return Imbox(account.imap.host,
username=account.imap.username, username=account.imap.username,
@ -41,42 +48,45 @@ def get_imap_connection(account: EmailAccount):
ssl=account.imap.encryption == 'SSL', ssl=account.imap.encryption == 'SSL',
starttls=account.imap.encryption == 'STARTTLS') starttls=account.imap.encryption == 'STARTTLS')
def get_smtp_connection(autoresponder):
def get_smtp_connection(autoresponder: AutoResponder):
# Create an SSL context that doesn't verify certificates # Create an SSL context that doesn't verify certificates
context = ssl.create_default_context() context = ssl.create_default_context()
context.check_hostname = False context.check_hostname = False
context.verify_mode = ssl.CERT_NONE context.verify_mode = ssl.CERT_NONE
if autoresponder.smtp.encryption == 'SSL': smtp_config = autoresponder.smtp
if smtp_config.encryption == 'SSL':
try: try:
L.DEBUG(f"Attempting SSL connection to {autoresponder.smtp.host}:{autoresponder.smtp.port}") logger.debug(f"Attempting SSL connection to {smtp_config.host}:{smtp_config.port}")
return SMTP_SSL(autoresponder.smtp.host, autoresponder.smtp.port, context=context) return SMTP_SSL(smtp_config.host, smtp_config.port, context=context)
except ssl.SSLError as e: except ssl.SSLError as e:
L.ERR(f"SSL connection failed: {str(e)}") logger.error(f"SSL connection failed: {str(e)}")
# If SSL fails, try TLS # If SSL fails, try TLS
try: try:
L.DEBUG(f"Attempting STARTTLS connection to {autoresponder.smtp.host}:{autoresponder.smtp.port}") logger.debug(f"Attempting STARTTLS connection to {smtp_config.host}:{smtp_config.port}")
smtp = SMTP(autoresponder.smtp.host, autoresponder.smtp.port) smtp = SMTP(smtp_config.host, smtp_config.port)
smtp.starttls(context=context) smtp.starttls(context=context)
return smtp return smtp
except Exception as e: except Exception as e:
L.ERR(f"STARTTLS connection failed: {str(e)}") logger.error(f"STARTTLS connection failed: {str(e)}")
raise raise
elif autoresponder.smtp.encryption == 'STARTTLS': elif smtp_config.encryption == 'STARTTLS':
try: try:
L.DEBUG(f"Attempting STARTTLS connection to {autoresponder.smtp.host}:{autoresponder.smtp.port}") logger.debug(f"Attempting STARTTLS connection to {smtp_config.host}:{smtp_config.port}")
smtp = SMTP(autoresponder.smtp.host, autoresponder.smtp.port) smtp = SMTP(smtp_config.host, smtp_config.port)
smtp.starttls(context=context) smtp.starttls(context=context)
return smtp return smtp
except Exception as e: except Exception as e:
L.ERR(f"STARTTLS connection failed: {str(e)}") logger.error(f"STARTTLS connection failed: {str(e)}")
raise raise
else: else:
try: try:
L.DEBUG(f"Attempting unencrypted connection to {autoresponder.smtp.host}:{autoresponder.smtp.port}") logger.debug(f"Attempting unencrypted connection to {smtp_config.host}:{smtp_config.port}")
return SMTP(autoresponder.smtp.host, autoresponder.smtp.port) return SMTP(smtp_config.host, smtp_config.port)
except Exception as e: except Exception as e:
L.ERR(f"Unencrypted connection failed: {str(e)}") logger.error(f"Unencrypted connection failed: {str(e)}")
raise raise
async def send_response(to_email: str, subject: str, body: str, profile: AutoResponder, image_attachment: Path = None) -> bool: async def send_response(to_email: str, subject: str, body: str, profile: AutoResponder, image_attachment: Path = None) -> bool:
@ -93,21 +103,20 @@ async def send_response(to_email: str, subject: str, body: str, profile: AutoRes
img = MIMEImage(img_file.read(), name=os.path.basename(image_attachment)) img = MIMEImage(img_file.read(), name=os.path.basename(image_attachment))
message.attach(img) message.attach(img)
L.DEBUG(f"Sending auto-response to {to_email} concerning {subject} from account {profile.name}...") logger.debug(f"Sending auto-response to {to_email} concerning {subject} from account {profile.name}...")
server = get_smtp_connection(profile) server = get_smtp_connection(profile)
L.DEBUG(f"SMTP connection established: {type(server)}") logger.debug(f"SMTP connection established: {type(server)}")
server.login(profile.smtp.username, profile.smtp.password) server.login(profile.smtp.username, profile.smtp.password)
server.send_message(message) server.send_message(message)
L.INFO(f"Auto-response sent to {to_email} concerning {subject} from account {profile.name}!") logger.info(f"Auto-response sent to {to_email} concerning {subject} from account {profile.name}!")
return True return True
except Exception as e: except Exception as e:
L.ERR(f"Error in preparing/sending auto-response from account {profile.name}: {str(e)}") logger.error(f"Error in preparing/sending auto-response from account {profile.name}: {str(e)}")
L.ERR(f"SMTP details - Host: {profile.smtp.host}, Port: {profile.smtp.port}, Encryption: {profile.smtp.encryption}") logger.error(f"SMTP details - Host: {profile.smtp.host}, Port: {profile.smtp.port}, Encryption: {profile.smtp.encryption}")
L.ERR(traceback.format_exc()) logger.error(traceback.format_exc())
return False return False
finally: finally:
@ -115,7 +124,9 @@ async def send_response(to_email: str, subject: str, body: str, profile: AutoRes
try: try:
server.quit() server.quit()
except Exception as e: except Exception as e:
L.ERR(f"Error closing SMTP connection: {str(e)}") logger.error(f"Error closing SMTP connection: {str(e)}")
def clean_email_content(html_content): def clean_email_content(html_content):
@ -144,16 +155,16 @@ async def extract_attachments(attachments) -> List[str]:
return attachment_texts return attachment_texts
async def process_account_archival(account: EmailAccount): async def process_account_archival(account: EmailAccount):
summarized_log = Dir.logs.email / account.name / "summarized.txt" summarized_log = EMAIL_LOGS / account.name / "summarized.txt"
os.makedirs(summarized_log.parent, exist_ok = True) os.makedirs(summarized_log.parent, exist_ok = True)
while True: while True:
try: try:
processed_uids = await load_processed_uids(summarized_log) processed_uids = await load_processed_uids(summarized_log)
L.DEBUG(f"{len(processed_uids)} emails marked as already summarized are being ignored.") logger.debug(f"{len(processed_uids)} emails marked as already summarized are being ignored.")
with get_imap_connection(account) as inbox: with get_imap_connection(account) as inbox:
unread_messages = inbox.messages(unread=True) unread_messages = inbox.messages(unread=True)
L.DEBUG(f"There are {len(unread_messages)} unread messages.") logger.debug(f"There are {len(unread_messages)} unread messages.")
for uid, message in unread_messages: for uid, message in unread_messages:
uid_str = uid.decode() if isinstance(uid, bytes) else str(uid) uid_str = uid.decode() if isinstance(uid, bytes) else str(uid)
if uid_str not in processed_uids: if uid_str not in processed_uids:
@ -173,13 +184,13 @@ async def process_account_archival(account: EmailAccount):
save_success = await save_email(md_path, md_content) save_success = await save_email(md_path, md_content)
if save_success: if save_success:
await save_processed_uid(summarized_log, account.name, uid_str) await save_processed_uid(summarized_log, account.name, uid_str)
L.INFO(f"Summarized email: {uid_str}") logger.info(f"Summarized email: {uid_str}")
else: else:
L.WARN(f"Failed to summarize {this_email.subject}") logger.warn(f"Failed to summarize {this_email.subject}")
else: else:
L.DEBUG(f"Skipping {uid_str} because it was already processed.") logger.debug(f"Skipping {uid_str} because it was already processed.")
except Exception as e: except Exception as e:
L.ERR(f"An error occurred during summarization for account {account.name}: {e}") logger.error(f"An error occurred during summarization for account {account.name}: {e}")
await asyncio.sleep(account.refresh) await asyncio.sleep(account.refresh)
@ -191,7 +202,7 @@ async def summarize_single_email(this_email: IncomingEmail, podcast: bool = Fals
attachment_texts = await extract_attachments(this_email.attachments) attachment_texts = await extract_attachments(this_email.attachments)
email_content += "\n—--\n" + "\n—--\n".join([f"Attachment: {text}" for text in attachment_texts]) email_content += "\n—--\n" + "\n—--\n".join([f"Attachment: {text}" for text in attachment_texts])
summary = await llm.summarize_text(email_content) summary = await llm.summarize_text(email_content)
await tts.local_tts(text_content = summary, speed = 1.1, voice = TTS.xtts.voice, podcast = podcast, output_path = tts_path) await tts.local_tts(text_content = summary, speed = 1.1, voice = DEFAULT_VOICE, podcast = podcast, output_path = tts_path)
md_summary = f'```ad.summary\n' md_summary = f'```ad.summary\n'
md_summary += f'title: {this_email.subject}\n' md_summary += f'title: {this_email.subject}\n'
md_summary += f'{summary}\n' md_summary += f'{summary}\n'
@ -225,7 +236,7 @@ tags:
return markdown_content return markdown_content
except Exception as e: except Exception as e:
L.ERR(f"Exception: {e}") logger.error(f"Exception: {e}")
return False return False
async def save_email(md_path, md_content): async def save_email(md_path, md_content):
@ -233,14 +244,14 @@ async def save_email(md_path, md_content):
with open(md_path, 'w', encoding='utf-8') as md_file: with open(md_path, 'w', encoding='utf-8') as md_file:
md_file.write(md_content) md_file.write(md_content)
L.DEBUG(f"Saved markdown to {md_path}") logger.debug(f"Saved markdown to {md_path}")
return True return True
except Exception as e: except Exception as e:
L.ERR(f"Failed to save email: {e}") logger.error(f"Failed to save email: {e}")
return False return False
def get_matching_autoresponders(this_email: IncomingEmail, account: EmailAccount) -> List[AutoResponder]: def get_matching_autoresponders(this_email: IncomingEmail, account: EmailAccount) -> List[AutoResponder]:
L.DEBUG(f"Called get_matching_autoresponders for email \"{this_email.subject},\" account name \"{account.name}\"") logger.debug(f"Called get_matching_autoresponders for email \"{this_email.subject},\" account name \"{account.name}\"")
def matches_list(item: str, this_email: IncomingEmail) -> bool: def matches_list(item: str, this_email: IncomingEmail) -> bool:
if '@' in item: if '@' in item:
return item in this_email.sender return item in this_email.sender
@ -251,46 +262,46 @@ def get_matching_autoresponders(this_email: IncomingEmail, account: EmailAccount
whitelist_match = not profile.whitelist or any(matches_list(item, this_email) for item in profile.whitelist) whitelist_match = not profile.whitelist or any(matches_list(item, this_email) for item in profile.whitelist)
blacklist_match = any(matches_list(item, this_email) for item in profile.blacklist) blacklist_match = any(matches_list(item, this_email) for item in profile.blacklist)
if whitelist_match and not blacklist_match: if whitelist_match and not blacklist_match:
L.DEBUG(f"We have a match for {whitelist_match} and no blacklist matches.") logger.debug(f"We have a match for {whitelist_match} and no blacklist matches.")
matching_profiles.append(profile) matching_profiles.append(profile)
elif whitelist_match and blacklist_match: elif whitelist_match and blacklist_match:
L.DEBUG(f"Matched whitelist for {whitelist_match}, but also matched blacklist for {blacklist_match}") logger.debug(f"Matched whitelist for {whitelist_match}, but also matched blacklist for {blacklist_match}")
else: else:
L.DEBUG(f"No whitelist or blacklist matches.") logger.debug(f"No whitelist or blacklist matches.")
return matching_profiles return matching_profiles
async def process_account_autoresponding(account: EmailAccount): async def process_account_autoresponding(account: EmailAccount):
EMAIL_AUTORESPONSE_LOG = Dir.logs.email / account.name / "autoresponded.txt" EMAIL_AUTORESPONSE_LOG = EMAIL_LOGS / account.name / "autoresponded.txt"
os.makedirs(EMAIL_AUTORESPONSE_LOG.parent, exist_ok=True) os.makedirs(EMAIL_AUTORESPONSE_LOG.parent, exist_ok=True)
while True: while True:
try: try:
processed_uids = await load_processed_uids(EMAIL_AUTORESPONSE_LOG) processed_uids = await load_processed_uids(EMAIL_AUTORESPONSE_LOG)
L.DEBUG(f"{len(processed_uids)} emails marked as already responded to are being ignored.") logger.debug(f"{len(processed_uids)} emails marked as already responded to are being ignored.")
with get_imap_connection(account) as inbox: with get_imap_connection(account) as inbox:
unread_messages = inbox.messages(unread=True) unread_messages = inbox.messages(unread=True)
L.DEBUG(f"There are {len(unread_messages)} unread messages.") logger.debug(f"There are {len(unread_messages)} unread messages.")
for uid, message in unread_messages: for uid, message in unread_messages:
uid_str = uid.decode() if isinstance(uid, bytes) else str(uid) uid_str = uid.decode() if isinstance(uid, bytes) else str(uid)
if uid_str not in processed_uids: if uid_str not in processed_uids:
await autorespond_single_email(message, uid_str, account, EMAIL_AUTORESPONSE_LOG) await autorespond_single_email(message, uid_str, account, EMAIL_AUTORESPONSE_LOG)
else: else:
L.DEBUG(f"Skipping {uid_str} because it was already processed.") logger.debug(f"Skipping {uid_str} because it was already processed.")
except Exception as e: except Exception as e:
L.ERR(f"An error occurred during auto-responding for account {account.name}: {e}") logger.error(f"An error occurred during auto-responding for account {account.name}: {e}")
await asyncio.sleep(account.refresh) await asyncio.sleep(account.refresh)
async def autorespond_single_email(message, uid_str: str, account: EmailAccount, log_file: Path): async def autorespond_single_email(message, uid_str: str, account: EmailAccount, log_file: Path):
this_email = await create_incoming_email(message) this_email = await create_incoming_email(message)
L.DEBUG(f"Evaluating {this_email.subject} for autoresponse-worthiness...") logger.debug(f"Evaluating {this_email.subject} for autoresponse-worthiness...")
matching_profiles = get_matching_autoresponders(this_email, account) matching_profiles = get_matching_autoresponders(this_email, account)
L.DEBUG(f"Matching profiles: {matching_profiles}") logger.debug(f"Matching profiles: {matching_profiles}")
for profile in matching_profiles: for profile in matching_profiles:
response_body = await generate_response(this_email, profile, account) response_body = await generate_response(this_email, profile, account)
@ -300,15 +311,15 @@ async def autorespond_single_email(message, uid_str: str, account: EmailAccount,
jpg_path = await img.workflow(profile.image_prompt, earlyout=False, downscale_to_fit=True) if profile.image_prompt else None jpg_path = await img.workflow(profile.image_prompt, earlyout=False, downscale_to_fit=True) if profile.image_prompt else None
success = await send_response(this_email.sender, subject, response_body, profile, jpg_path) success = await send_response(this_email.sender, subject, response_body, profile, jpg_path)
if success: if success:
L.WARN(f"Auto-responded to email: {this_email.subject}") logger.warn(f"Auto-responded to email: {this_email.subject}")
await save_processed_uid(log_file, account.name, uid_str) await save_processed_uid(log_file, account.name, uid_str)
else: else:
L.WARN(f"Failed to send auto-response to {this_email.subject}") logger.warn(f"Failed to send auto-response to {this_email.subject}")
else: else:
L.WARN(f"Unable to generate auto-response for {this_email.subject}") logger.warn(f"Unable to generate auto-response for {this_email.subject}")
async def generate_response(this_email: IncomingEmail, profile: AutoResponder, account: EmailAccount) -> Optional[str]: async def generate_response(this_email: IncomingEmail, profile: AutoResponder, account: EmailAccount) -> Optional[str]:
L.INFO(f"Generating auto-response to {this_email.subject} with profile: {profile.name}") logger.info(f"Generating auto-response to {this_email.subject} with profile: {profile.name}")
now = await loc.dt(dt_datetime.now()) now = await loc.dt(dt_datetime.now())
then = await loc.dt(this_email.datetime_received) then = await loc.dt(this_email.datetime_received)
@ -326,7 +337,7 @@ Respond on behalf of {account.fullname}, who is unable to respond personally bec
try: try:
response = await llm.query_ollama(usr_prompt, sys_prompt, profile.ollama_model, 400) response = await llm.query_ollama(usr_prompt, sys_prompt, profile.ollama_model, 400)
L.DEBUG(f"query_ollama response: {response}") logger.debug(f"query_ollama response: {response}")
if isinstance(response, dict) and "message" in response and "content" in response["message"]: if isinstance(response, dict) and "message" in response and "content" in response["message"]:
response = response["message"]["content"] response = response["message"]["content"]
@ -334,7 +345,7 @@ Respond on behalf of {account.fullname}, who is unable to respond personally bec
return response + "\n\n" return response + "\n\n"
except Exception as e: except Exception as e:
L.ERR(f"Error generating auto-response: {str(e)}") logger.error(f"Error generating auto-response: {str(e)}")
return None return None
@ -363,7 +374,7 @@ async def save_processed_uid(filename: Path, account_name: str, uid: str):
async def process_all_accounts(): async def process_all_accounts():
email_accounts = Email.get_email_accounts() email_accounts = load_email_accounts(EMAIL_CONFIG)
summarization_tasks = [asyncio.create_task(process_account_archival(account)) for account in email_accounts] summarization_tasks = [asyncio.create_task(process_account_archival(account)) for account in email_accounts]
autoresponding_tasks = [asyncio.create_task(process_account_autoresponding(account)) for account in email_accounts] autoresponding_tasks = [asyncio.create_task(process_account_autoresponding(account)) for account in email_accounts]
await asyncio.gather(*summarization_tasks, *autoresponding_tasks) await asyncio.gather(*summarization_tasks, *autoresponding_tasks)

50
sijapi/routers/ghost.py Normal file
View file

@ -0,0 +1,50 @@
def generate_jwt_token():
key_id, key_secret = GHOST_API_KEY.split(':')
iat = int(date.now().timestamp())
exp = iat + 5 * 60 # Token expiration time set to 5 minutes from now for consistency with the working script
payload = {
'iat': iat,
'exp': exp,
'aud': '/admin/' # Adjusted to match the working script
}
token = jwt.encode(payload, bytes.fromhex(key_secret), algorithm='HS256', headers={'kid': key_id})
return token.decode('utf-8') if isinstance(token, bytes) else token # Ensure the token is decoded to UTF-8 string
def post_to_ghost(title, image_path, html_content, ghost_tags):
jwt_token = generate_jwt_token()
ghost_headers = {'Authorization': f'Ghost {jwt_token}'}
# Upload the image to Ghost
with open(image_path, 'rb') as f:
files = {'file': (os.path.basename(image_path), f, 'image/jpg')}
image_response = requests.post(f"{GHOST_API_URL}/images/upload/", headers=ghost_headers, files=files)
image_response.raise_for_status() # Ensure the request was successful
image_url = image_response.json()['images'][0]['url']
# Prepare the post content
updated_html_content = f'<img src="{image_url}" alt="Image"/><hr/> {html_content}'
mobiledoc = {
"version": "0.3.1",
"atoms": [],
"cards": [["html", {"cardName": "html", "html": updated_html_content}]],
"markups": [],
"sections": [[10, 0]]
}
mobiledoc = json.dumps(mobiledoc)
post_data = {
'posts': [{
'title': title,
'mobiledoc': mobiledoc,
'status': 'published',
'tags': ghost_tags
}]
}
# Create a new post
post_response = requests.post(f"{GHOST_API_URL}/posts/", json=post_data, headers=ghost_headers)
post_response.raise_for_status()
post_url = post_response.json()['posts'][0]['url']
return post_url

View file

@ -11,6 +11,7 @@ from tailscale import Tailscale
from sijapi import L, API, TS_ID, SUBNET_BROADCAST from sijapi import L, API, TS_ID, SUBNET_BROADCAST
health = APIRouter(tags=["public", "trusted", "private"]) health = APIRouter(tags=["public", "trusted", "private"])
logger = L.get_module_logger("health")
@health.get("/health") @health.get("/health")
def get_health(): def get_health():
@ -48,7 +49,7 @@ async def get_wan_ip():
wan_info = response.json() wan_info = response.json()
return wan_info.get('ip', 'Unavailable') return wan_info.get('ip', 'Unavailable')
except Exception as e: except Exception as e:
L.ERR(f"Error fetching WAN IP: {e}") logger.error(f"Error fetching WAN IP: {e}")
return "Unavailable" return "Unavailable"
@health.get("/ts_ip") @health.get("/ts_ip")

View file

@ -41,6 +41,7 @@ from io import BytesIO
import base64 import base64
ig = APIRouter() ig = APIRouter()
logger = L.get_module_logger("ig")
class IG_Request(BaseModel): class IG_Request(BaseModel):
file: Optional[UploadFile] = None # upload a particular file to Instagram file: Optional[UploadFile] = None # upload a particular file to Instagram
@ -186,7 +187,7 @@ def get_user_media(username, amount=30):
Fetch recent media for a given username, return List of medias Fetch recent media for a given username, return List of medias
""" """
L.DEBUG(f"Fetching recent media for {username}...") logger.debug(f"Fetching recent media for {username}...")
user_id = cl.user_id_from_username(username) user_id = cl.user_id_from_username(username)
medias = cl.user_medias(user_id, amount) medias = cl.user_medias(user_id, amount)
final_medias = [] final_medias = []
@ -201,7 +202,7 @@ def get_user_image_urls(username, amount=30) -> List[str]:
""" """
Fetch recent media URLs for a given username, return List of media URLs Fetch recent media URLs for a given username, return List of media URLs
""" """
L.DEBUG(f"Fetching recent media URLs for {username}...") logger.debug(f"Fetching recent media URLs for {username}...")
user_id = cl.user_id_from_username(username) user_id = cl.user_id_from_username(username)
medias = cl.user_medias(user_id, amount) medias = cl.user_medias(user_id, amount)
@ -229,7 +230,7 @@ def get_random_follower():
def get_medias_by_hashtag(hashtag: str, days_ago_max:int = 14, ht_type:str = None, amount:int = args.count): def get_medias_by_hashtag(hashtag: str, days_ago_max:int = 14, ht_type:str = None, amount:int = args.count):
if not ht_type: if not ht_type:
ht_type = args.commentmode ht_type = args.commentmode
L.DEBUG(f"Fetching {ht_type} media for hashtag: {hashtag}") logger.debug(f"Fetching {ht_type} media for hashtag: {hashtag}")
ht_medias = [] ht_medias = []
while True: while True:
sleep(SHORT) sleep(SHORT)
@ -239,10 +240,10 @@ def get_medias_by_hashtag(hashtag: str, days_ago_max:int = 14, ht_type:str = Non
ht_medias.extend(cl.hashtag_medias_recent(name=hashtag, amount=amount*10)) ht_medias.extend(cl.hashtag_medias_recent(name=hashtag, amount=amount*10))
filtered_medias = filter_medias(ht_medias, days_ago_max=days_ago_max) filtered_medias = filter_medias(ht_medias, days_ago_max=days_ago_max)
L.DEBUG(f"Filtered {ht_type} media count obtained for '#{hashtag}': {len(filtered_medias)}") logger.debug(f"Filtered {ht_type} media count obtained for '#{hashtag}': {len(filtered_medias)}")
if len(filtered_medias) >= amount: if len(filtered_medias) >= amount:
L.DEBUG(f"Desired amount of {amount} filtered media reached.") logger.debug(f"Desired amount of {amount} filtered media reached.")
break break
return filtered_medias return filtered_medias
@ -250,11 +251,11 @@ def get_medias_by_hashtag(hashtag: str, days_ago_max:int = 14, ht_type:str = Non
def get_medias_from_all_hashtags(days_ago_max=14, ht_type:str = None, amount:int = args.count): def get_medias_from_all_hashtags(days_ago_max=14, ht_type:str = None, amount:int = args.count):
if not ht_type: if not ht_type:
ht_type = args.commentmode ht_type = args.commentmode
L.DEBUG(f"Fetching {ht_type} media.") logger.debug(f"Fetching {ht_type} media.")
filtered_medias = [] filtered_medias = []
while len(filtered_medias) < amount: while len(filtered_medias) < amount:
hashtag = random.choice(HASHTAGS) hashtag = random.choice(HASHTAGS)
L.DEBUG(f"Using hashtag: {hashtag}") logger.debug(f"Using hashtag: {hashtag}")
fetched_medias = [] fetched_medias = []
sleep(SHORT) sleep(SHORT)
if ht_type == "top": if ht_type == "top":
@ -264,15 +265,15 @@ def get_medias_from_all_hashtags(days_ago_max=14, ht_type:str = None, amount:int
current_filtered_medias = filter_medias(fetched_medias, days_ago_max=days_ago_max) current_filtered_medias = filter_medias(fetched_medias, days_ago_max=days_ago_max)
filtered_medias.extend(current_filtered_medias) filtered_medias.extend(current_filtered_medias)
L.DEBUG(f"Filtered {ht_type} media count obtained for '#{hashtag}': {len(current_filtered_medias)}") logger.debug(f"Filtered {ht_type} media count obtained for '#{hashtag}': {len(current_filtered_medias)}")
# Trim the list if we've collected more than needed # Trim the list if we've collected more than needed
if len(filtered_medias) > amount: if len(filtered_medias) > amount:
filtered_medias = filtered_medias[:amount] filtered_medias = filtered_medias[:amount]
L.DEBUG(f"Desired amount of {amount} filtered media reached.") logger.debug(f"Desired amount of {amount} filtered media reached.")
break break
else: else:
L.DEBUG(f"Total filtered media count so far: {len(filtered_medias)}") logger.debug(f"Total filtered media count so far: {len(filtered_medias)}")
return filtered_medias return filtered_medias
@ -345,7 +346,7 @@ def download_and_resize_image(url: str, download_path: str = None, max_dimension
return download_path return download_path
except Exception as e: except Exception as e:
# Handle or log the error as needed # Handle or log the error as needed
L.DEBUG(f"Error downloading or resizing image: {e}") logger.debug(f"Error downloading or resizing image: {e}")
return None return None
@ -365,17 +366,17 @@ def comment_on_user_media(user: str, comment_type: str = "default", amount=5):
comment_text = llava(encoded_media, COMMENT_PROMPT_SYS, comment_prompt_usr) if args.llava or not args.openai else gpt4v(encoded_media, COMMENT_PROMPT_SYS, comment_prompt_usr) comment_text = llava(encoded_media, COMMENT_PROMPT_SYS, comment_prompt_usr) if args.llava or not args.openai else gpt4v(encoded_media, COMMENT_PROMPT_SYS, comment_prompt_usr)
if comment_text: if comment_text:
cl.media_comment(media.pk, comment_text) cl.media_comment(media.pk, comment_text)
L.DEBUG(f"Commented on media: {media.pk}") logger.debug(f"Commented on media: {media.pk}")
else: else:
L.DEBUG(f"Failed to generate comment for media: {media.pk}") logger.debug(f"Failed to generate comment for media: {media.pk}")
add_media_to_completed_lists(media) add_media_to_completed_lists(media)
sleep(SHORT) sleep(SHORT)
else: else:
L.DEBUG(f"We received a nonetype! {media_path}") logger.debug(f"We received a nonetype! {media_path}")
else: else:
L.DEBUG(f"URL for {media.pk} disappeared it seems...") logger.debug(f"URL for {media.pk} disappeared it seems...")
else: else:
L.DEBUG(f"Media already interacted with: {media.pk}") logger.debug(f"Media already interacted with: {media.pk}")
def comment_on_hashtagged_media(comment_type: str = args.commenttype, amount=3, hashtag: str = None): def comment_on_hashtagged_media(comment_type: str = args.commenttype, amount=3, hashtag: str = None):
""" """
@ -400,22 +401,22 @@ def comment_on_hashtagged_media(comment_type: str = args.commenttype, amount=3,
try: try:
like_result = cl.media_like(media) like_result = cl.media_like(media)
if like_result: if like_result:
L.DEBUG(f"Liked media: https://instagram.com/p/{media.pk}/") logger.debug(f"Liked media: https://instagram.com/p/{media.pk}/")
except instagrapi.exceptions.FeedbackRequired as e: except instagrapi.exceptions.FeedbackRequired as e:
L.DEBUG(f"Cannot like media {media.pk}: {str(e)}") logger.debug(f"Cannot like media {media.pk}: {str(e)}")
if comment_text: if comment_text:
try: try:
cl.media_comment(media.pk, comment_text) cl.media_comment(media.pk, comment_text)
L.DEBUG(f"Commented on media: https://instagram.com/p/{media.pk}/") logger.debug(f"Commented on media: https://instagram.com/p/{media.pk}/")
except instagrapi.exceptions.FeedbackRequired as e: except instagrapi.exceptions.FeedbackRequired as e:
L.DEBUG(f"Cannot comment on media {media.pk}: {str(e)}") logger.debug(f"Cannot comment on media {media.pk}: {str(e)}")
else: else:
L.DEBUG(f"Failed to generate comment for media: https://instagram.com/p/{media.pk}") logger.debug(f"Failed to generate comment for media: https://instagram.com/p/{media.pk}")
add_media_to_completed_lists(media) add_media_to_completed_lists(media)
sleep(SHORT) sleep(SHORT)
else: else:
L.DEBUG(f"Media already interacted with: {media.pk}") logger.debug(f"Media already interacted with: {media.pk}")
def comment_on_specific_media(media_url, comment_type: str = "default"): def comment_on_specific_media(media_url, comment_type: str = "default"):
@ -436,11 +437,11 @@ def comment_on_specific_media(media_url, comment_type: str = "default"):
if comment_text: if comment_text:
try: try:
cl.media_comment(media.pk, comment_text) cl.media_comment(media.pk, comment_text)
L.DEBUG(f"Commented on specific media: https://instagram.com/p/{media.pk}/") logger.debug(f"Commented on specific media: https://instagram.com/p/{media.pk}/")
except instagrapi.exceptions.FeedbackRequired as e: except instagrapi.exceptions.FeedbackRequired as e:
L.DEBUG(f"Failed to comment on specific media: https://instagram.com/p/{media.pk}/ due to error: {str(e)}") logger.debug(f"Failed to comment on specific media: https://instagram.com/p/{media.pk}/ due to error: {str(e)}")
else: else:
L.DEBUG(f"Failed to generate comment for specific media: https://instagram.com/p/{media.pk}/") logger.debug(f"Failed to generate comment for specific media: https://instagram.com/p/{media.pk}/")
@ -485,16 +486,16 @@ def update_prompt(workflow: dict, post: dict, positive: str, found_key=[None], p
if value == "API_PrePrompt": if value == "API_PrePrompt":
workflow[key] = post.get(value, "") + positive workflow[key] = post.get(value, "") + positive
L.DEBUG(f"Updated API_PrePrompt to: {workflow[key]}") logger.debug(f"Updated API_PrePrompt to: {workflow[key]}")
elif value == "API_StylePrompt": elif value == "API_StylePrompt":
workflow[key] = post.get(value, "") workflow[key] = post.get(value, "")
L.DEBUG(f"Updated API_StylePrompt to: {workflow[key]}") logger.debug(f"Updated API_StylePrompt to: {workflow[key]}")
elif value == "API_NegativePrompt": elif value == "API_NegativePrompt":
workflow[key] = post.get(value, "") workflow[key] = post.get(value, "")
L.DEBUG(f"Updated API_NegativePrompt to: {workflow[key]}") logger.debug(f"Updated API_NegativePrompt to: {workflow[key]}")
elif key == "seed" or key == "noise_seed": elif key == "seed" or key == "noise_seed":
workflow[key] = random.randint(1000000000000, 9999999999999) workflow[key] = random.randint(1000000000000, 9999999999999)
L.DEBUG(f"Updated seed to: {workflow[key]}") logger.debug(f"Updated seed to: {workflow[key]}")
elif (key == "width" or key == "max_width" or key == "scaled_width" or key == "side_length") and (value == 1023 or value == 1025): elif (key == "width" or key == "max_width" or key == "scaled_width" or key == "side_length") and (value == 1023 or value == 1025):
# workflow[key] = post.get(value, "") # workflow[key] = post.get(value, "")
workflow[key] = post.get("width", 1024) workflow[key] = post.get("width", 1024)
@ -502,7 +503,7 @@ def update_prompt(workflow: dict, post: dict, positive: str, found_key=[None], p
# workflow[key] = post.get(value, "") # workflow[key] = post.get(value, "")
workflow[key] = post.get("height", 1024) workflow[key] = post.get("height", 1024)
except Exception as e: except Exception as e:
L.DEBUG(f"Error in update_prompt at path {' -> '.join(path)}: {e}") logger.debug(f"Error in update_prompt at path {' -> '.join(path)}: {e}")
raise raise
return found_key[0] return found_key[0]
@ -527,22 +528,22 @@ def update_prompt_custom(workflow: dict, API_PrePrompt: str, API_StylePrompt: st
if value == "API_PrePrompt": if value == "API_PrePrompt":
workflow[key] = API_PrePrompt workflow[key] = API_PrePrompt
L.DEBUG(f"Updated API_PrePrompt to: {workflow[key]}") logger.debug(f"Updated API_PrePrompt to: {workflow[key]}")
elif value == "API_StylePrompt": elif value == "API_StylePrompt":
workflow[key] = API_StylePrompt workflow[key] = API_StylePrompt
L.DEBUG(f"Updated API_StylePrompt to: {workflow[key]}") logger.debug(f"Updated API_StylePrompt to: {workflow[key]}")
elif value == "API_NegativePrompt": elif value == "API_NegativePrompt":
workflow[key] = API_NegativePrompt workflow[key] = API_NegativePrompt
L.DEBUG(f"Updated API_NegativePrompt to: {workflow[key]}") logger.debug(f"Updated API_NegativePrompt to: {workflow[key]}")
elif key == "seed" or key == "noise_seed": elif key == "seed" or key == "noise_seed":
workflow[key] = random.randint(1000000000000, 9999999999999) workflow[key] = random.randint(1000000000000, 9999999999999)
L.DEBUG(f"Updated seed to: {workflow[key]}") logger.debug(f"Updated seed to: {workflow[key]}")
elif (key == "width" or key == "max_width" or key == "scaled_width") and (value == 1023 or value == 1025): elif (key == "width" or key == "max_width" or key == "scaled_width") and (value == 1023 or value == 1025):
workflow[key] = 1024 workflow[key] = 1024
elif (key == "dimension" or key == "height" or key == "max_height" or key == "scaled_height") and (value == 1023 or value == 1025): elif (key == "dimension" or key == "height" or key == "max_height" or key == "scaled_height") and (value == 1023 or value == 1025):
workflow[key] = 1024 workflow[key] = 1024
except Exception as e: except Exception as e:
L.DEBUG(f"Error in update_prompt_custom at path {' -> '.join(path)}: {e}") logger.debug(f"Error in update_prompt_custom at path {' -> '.join(path)}: {e}")
raise raise
return found_key[0] return found_key[0]
@ -582,14 +583,14 @@ def poll_status(prompt_id):
elapsed_time = int(time.time() - start_time) # Calculate elapsed time in seconds elapsed_time = int(time.time() - start_time) # Calculate elapsed time in seconds
status_response = requests.get(f"{COMFYUI_URL}/history/{prompt_id}") status_response = requests.get(f"{COMFYUI_URL}/history/{prompt_id}")
# Use \r to return to the start of the line, and end='' to prevent newline # Use \r to return to the start of the line, and end='' to prevent newline
L.DEBUG(f"\rGenerating {prompt_id}. Elapsed time: {elapsed_time} seconds", end='') logger.debug(f"\rGenerating {prompt_id}. Elapsed time: {elapsed_time} seconds", end='')
if status_response.status_code != 200: if status_response.status_code != 200:
raise Exception("Failed to get job status") raise Exception("Failed to get job status")
status_data = status_response.json() status_data = status_response.json()
job_data = status_data.get(prompt_id, {}) job_data = status_data.get(prompt_id, {})
if job_data.get("status", {}).get("completed", False): if job_data.get("status", {}).get("completed", False):
L.DEBUG() logger.debug()
L.DEBUG(f"{prompt_id} completed in {elapsed_time} seconds.") logger.debug(f"{prompt_id} completed in {elapsed_time} seconds.")
return job_data return job_data
time.sleep(1) time.sleep(1)
@ -600,14 +601,14 @@ def poll_status(prompt_id):
elapsed_time = int(time.time() - start_time) # Calculate elapsed time in seconds elapsed_time = int(time.time() - start_time) # Calculate elapsed time in seconds
status_response = requests.get(f"{COMFYUI_URL}/history/{prompt_id}") status_response = requests.get(f"{COMFYUI_URL}/history/{prompt_id}")
# Use \r to return to the start of the line, and end='' to prevent newline # Use \r to return to the start of the line, and end='' to prevent newline
L.DEBUG(f"\rGenerating {prompt_id}. Elapsed time: {elapsed_time} seconds", end='') logger.debug(f"\rGenerating {prompt_id}. Elapsed time: {elapsed_time} seconds", end='')
if status_response.status_code != 200: if status_response.status_code != 200:
raise Exception("Failed to get job status") raise Exception("Failed to get job status")
status_data = status_response.json() status_data = status_response.json()
job_data = status_data.get(prompt_id, {}) job_data = status_data.get(prompt_id, {})
if job_data.get("status", {}).get("completed", False): if job_data.get("status", {}).get("completed", False):
L.DEBUG() logger.debug()
L.DEBUG(f"{prompt_id} completed in {elapsed_time} seconds.") logger.debug(f"{prompt_id} completed in {elapsed_time} seconds.")
return job_data return job_data
time.sleep(1) time.sleep(1)
@ -618,12 +619,12 @@ def poll_status(prompt_id):
def load_post(chosen_post: str = "default"): def load_post(chosen_post: str = "default"):
if chosen_post in PROFILE_CONFIG['posts']: if chosen_post in PROFILE_CONFIG['posts']:
post = PROFILE_CONFIG['posts'][chosen_post] post = PROFILE_CONFIG['posts'][chosen_post]
L.DEBUG(f"Loaded post for {chosen_post}") logger.debug(f"Loaded post for {chosen_post}")
else: else:
L.DEBUG(f"Unable to load post for {chosen_post}. Choosing a default post.") logger.debug(f"Unable to load post for {chosen_post}. Choosing a default post.")
chosen_post = choose_post(PROFILE_CONFIG['posts']) chosen_post = choose_random_post(PROFILE_CONFIG['posts'])
post = PROFILE_CONFIG['posts'][chosen_post] post = PROFILE_CONFIG['posts'][chosen_post]
L.DEBUG(f"Defaulted to {chosen_post}") logger.debug(f"Defaulted to {chosen_post}")
return post return post
@ -633,18 +634,18 @@ def handle_image_workflow(chosen_post=None):
or posting to Instagram based on the local flag. or posting to Instagram based on the local flag.
""" """
if chosen_post is None: if chosen_post is None:
chosen_post = choose_post(PROFILE_CONFIG['posts']) chosen_post = choose_random_post(PROFILE_CONFIG['posts'])
post = load_post(chosen_post) post = load_post(chosen_post)
workflow_name = args.workflow if args.workflow else random.choice(post['workflows']) workflow_name = args.workflow if args.workflow else random.choice(post['workflows'])
L.DEBUG(f"Workflow name: {workflow_name}") logger.debug(f"Workflow name: {workflow_name}")
L.DEBUG(f"Generating image concept for {chosen_post} and {workflow_name} now.") logger.debug(f"Generating image concept for {chosen_post} and {workflow_name} now.")
image_concept = query_ollama(llmPrompt = post['llmPrompt'], max_tokens = 180) if args.local or not args.openai else query_gpt4(llmPrompt = post['llmPrompt'], max_tokens = 180) image_concept = query_ollama(llmPrompt = post['llmPrompt'], max_tokens = 180) if args.local or not args.openai else query_gpt4(llmPrompt = post['llmPrompt'], max_tokens = 180)
L.DEBUG(f"Image concept for {chosen_post}: {image_concept}") logger.debug(f"Image concept for {chosen_post}: {image_concept}")
workflow_data = None workflow_data = None
@ -658,9 +659,9 @@ def handle_image_workflow(chosen_post=None):
jpg_file_path = image_gen(image_concept, "dall-e-3") jpg_file_path = image_gen(image_concept, "dall-e-3")
else: else:
saved_file_key = update_prompt(workflow=workflow_data, post=post, positive=image_concept) saved_file_key = update_prompt(workflow=workflow_data, post=post, positive=image_concept)
L.DEBUG(f"Saved file key: {saved_file_key}") logger.debug(f"Saved file key: {saved_file_key}")
prompt_id = queue_prompt(workflow_data) prompt_id = queue_prompt(workflow_data)
L.DEBUG(f"Prompt ID: {prompt_id}") logger.debug(f"Prompt ID: {prompt_id}")
status_data = poll_status(prompt_id) status_data = poll_status(prompt_id)
image_data = get_image(status_data, saved_file_key) image_data = get_image(status_data, saved_file_key)
if chosen_post == "landscape": if chosen_post == "landscape":
@ -699,17 +700,17 @@ def handle_custom_image(custom_post: str):
system_msg = "You are a helpful AI who assists in generating prompts that will be used to generate highly realistic images. Always use the most visually descriptive terms possible, and avoid any vague or abstract concepts. Do not include any words or descriptions based on other senses or emotions. Strive to show rather than tell. Space is limited, so be efficient with your words." system_msg = "You are a helpful AI who assists in generating prompts that will be used to generate highly realistic images. Always use the most visually descriptive terms possible, and avoid any vague or abstract concepts. Do not include any words or descriptions based on other senses or emotions. Strive to show rather than tell. Space is limited, so be efficient with your words."
image_concept = query_ollama(system_msg=system_msg, user_msg=custom_post, max_tokens = 180) if args.local or not args.openai else query_gpt4(system_msg=system_msg, user_msg=custom_post, max_tokens = 180) image_concept = query_ollama(system_msg=system_msg, user_msg=custom_post, max_tokens = 180) if args.local or not args.openai else query_gpt4(system_msg=system_msg, user_msg=custom_post, max_tokens = 180)
L.DEBUG(f"Image concept: {image_concept}") logger.debug(f"Image concept: {image_concept}")
if args.dalle and not args.local: if args.dalle and not args.local:
jpg_file_path = image_gen(image_concept, "dall-e-3") jpg_file_path = image_gen(image_concept, "dall-e-3")
else: else:
saved_file_key = update_prompt(workflow=workflow_data, post=post, positive=image_concept) saved_file_key = update_prompt(workflow=workflow_data, post=post, positive=image_concept)
L.DEBUG(f"Saved file key: {saved_file_key}") logger.debug(f"Saved file key: {saved_file_key}")
prompt_id = queue_prompt(workflow_data) prompt_id = queue_prompt(workflow_data)
L.DEBUG(f"Prompt ID: {prompt_id}") logger.debug(f"Prompt ID: {prompt_id}")
status_data = poll_status(prompt_id) status_data = poll_status(prompt_id)
image_data = get_image(status_data, saved_file_key) image_data = get_image(status_data, saved_file_key)
@ -728,7 +729,7 @@ def image_aftergen(jpg_file_path: str, chosen_post: str = None, post: Dict = Non
if chosen_post and not prompt: if chosen_post and not prompt:
prompt = PROFILE_CONFIG['posts'][chosen_post]['Vision_Prompt'] prompt = PROFILE_CONFIG['posts'][chosen_post]['Vision_Prompt']
encoded_string = encode_image_to_base64(jpg_file_path) encoded_string = encode_image_to_base64(jpg_file_path)
L.DEBUG(f"Image successfully encoded from {jpg_file_path}") logger.debug(f"Image successfully encoded from {jpg_file_path}")
instagram_description = llava(encoded_string, prompt) if args.local or args.llava or not args.openai else gpt4v(encoded_string, prompt, 150) instagram_description = llava(encoded_string, prompt) if args.local or args.llava or not args.openai else gpt4v(encoded_string, prompt, 150)
instagram_description = re.sub(r'^["\'](.*)["\']$', r'\1', instagram_description) instagram_description = re.sub(r'^["\'](.*)["\']$', r'\1', instagram_description)
@ -759,27 +760,27 @@ Tags: {', '.join(ghost_tags)}
with open(markdown_filename, "w") as md_file: with open(markdown_filename, "w") as md_file:
md_file.write(markdown_content) md_file.write(markdown_content)
L.DEBUG(f"Markdown file created at {markdown_filename}") logger.debug(f"Markdown file created at {markdown_filename}")
if args.wallpaper: if args.wallpaper:
change_wallpaper(jpg_file_path) change_wallpaper(jpg_file_path)
L.DEBUG(f"Wallpaper changed.") logger.debug(f"Wallpaper changed.")
if not args.local: if not args.local:
ig_footer = "" ig_footer = ""
if not args.noig: if not args.noig:
post_url = upload_photo(jpg_file_path, instagram_description) post_url = upload_photo(jpg_file_path, instagram_description)
L.DEBUG(f"Image posted at {post_url}") logger.debug(f"Image posted at {post_url}")
ig_footer = f"\n<a href=\"{post_url}\">Instagram link</a>" ig_footer = f"\n<a href=\"{post_url}\">Instagram link</a>"
if not args.noghost: if not args.noghost:
ghost_text = f"{instagram_description}" ghost_text = f"{instagram_description}"
ghost_url = post_to_ghost(img_title, jpg_file_path, ghost_text, ghost_tags) ghost_url = post_to_ghost(img_title, jpg_file_path, ghost_text, ghost_tags)
L.DEBUG(f"Ghost post: {ghost_url}\n{ig_footer}") logger.debug(f"Ghost post: {ghost_url}\n{ig_footer}")
def choose_post(posts): def choose_random_post(posts):
total_frequency = sum(posts[post_type]['frequency'] for post_type in posts) total_frequency = sum(posts[post_type]['frequency'] for post_type in posts)
random_choice = random.randint(1, total_frequency) random_choice = random.randint(1, total_frequency)
current_sum = 0 current_sum = 0
@ -800,8 +801,6 @@ def load_json(json_payload, workflow):
raise ValueError("No valid input provided.") raise ValueError("No valid input provided.")
def save_as_jpg(image_data, prompt_id, chosen_post:str = None, max_size=2160, quality=80): def save_as_jpg(image_data, prompt_id, chosen_post:str = None, max_size=2160, quality=80):
chosen_post = chosen_post if chosen_post else "custom" chosen_post = chosen_post if chosen_post else "custom"
filename_png = f"{prompt_id}.png" filename_png = f"{prompt_id}.png"
@ -836,121 +835,19 @@ def save_as_jpg(image_data, prompt_id, chosen_post:str = None, max_size=2160, qu
return new_file_path return new_file_path
except Exception as e: except Exception as e:
L.DEBUG(f"Error processing image: {e}") logger.debug(f"Error processing image: {e}")
return None return None
def upload_photo(path, caption, title: str=None): def upload_photo(path, caption, title: str=None):
L.DEBUG(f"Uploading photo from {path}...") logger.debug(f"Uploading photo from {path}...")
media = cl.photo_upload(path, caption) media = cl.photo_upload(path, caption)
post_url = f"https://www.instagram.com/p/{media.code}/" post_url = f"https://www.instagram.com/p/{media.code}/"
return post_url return post_url
def format_duration(seconds):
"""Return a string representing the duration in a human-readable format."""
if seconds < 120:
return f"{int(seconds)} sec"
elif seconds < 6400:
return f"{int(seconds // 60)} min"
else:
return f"{seconds / 3600:.2f} hr"
########################
### HELPER FUNCTIONS ###
########################
import subprocess
def change_wallpaper(image_path):
command = """
osascript -e 'tell application "Finder" to set desktop picture to POSIX file "{}"'
""".format(image_path)
subprocess.run(command, shell=True)
def sleep(seconds):
"""Sleep for a random amount of time, approximately the given number of seconds."""
sleepupto(seconds*0.66, seconds*1.5)
def sleepupto(min_seconds, max_seconds=None):
interval = random.uniform(min_seconds if max_seconds is not None else 0, max_seconds if max_seconds is not None else min_seconds)
start_time = time.time()
end_time = start_time + interval
with tqdm(total=interval, desc=f"Sleeping for {format_duration(interval)}", unit=" sec", ncols=75, bar_format='{desc}: {bar} {remaining}') as pbar:
while True:
current_time = time.time()
elapsed_time = current_time - start_time
remaining_time = end_time - current_time
if elapsed_time >= interval:
break
duration = min(1, interval - elapsed_time) # Adjust sleep time to not exceed interval
time.sleep(duration)
pbar.update(duration)
# Update remaining time display
pbar.set_postfix_str(f"{format_duration(remaining_time)} remaining")
########################
### GHOST FUNCTIONS ###
########################
def generate_jwt_token():
key_id, key_secret = GHOST_API_KEY.split(':')
iat = int(date.now().timestamp())
exp = iat + 5 * 60 # Token expiration time set to 5 minutes from now for consistency with the working script
payload = {
'iat': iat,
'exp': exp,
'aud': '/admin/' # Adjusted to match the working script
}
token = jwt.encode(payload, bytes.fromhex(key_secret), algorithm='HS256', headers={'kid': key_id})
return token.decode('utf-8') if isinstance(token, bytes) else token # Ensure the token is decoded to UTF-8 string
def post_to_ghost(title, image_path, html_content, ghost_tags):
jwt_token = generate_jwt_token()
ghost_headers = {'Authorization': f'Ghost {jwt_token}'}
# Upload the image to Ghost
with open(image_path, 'rb') as f:
files = {'file': (os.path.basename(image_path), f, 'image/jpg')}
image_response = requests.post(f"{GHOST_API_URL}/images/upload/", headers=ghost_headers, files=files)
image_response.raise_for_status() # Ensure the request was successful
image_url = image_response.json()['images'][0]['url']
# Prepare the post content
updated_html_content = f'<img src="{image_url}" alt="Image"/><hr/> {html_content}'
mobiledoc = {
"version": "0.3.1",
"atoms": [],
"cards": [["html", {"cardName": "html", "html": updated_html_content}]],
"markups": [],
"sections": [[10, 0]]
}
mobiledoc = json.dumps(mobiledoc)
post_data = {
'posts': [{
'title': title,
'mobiledoc': mobiledoc,
'status': 'published',
'tags': ghost_tags
}]
}
# Create a new post
post_response = requests.post(f"{GHOST_API_URL}/posts/", json=post_data, headers=ghost_headers)
post_response.raise_for_status()
post_url = post_response.json()['posts'][0]['url']
return post_url
######################################################## ########################################################
@ig.post("/ig/flow") @ig.post("/ig/flow")
async def ig_flow_endpoint(new_session: bool = False): async def ig_flow_endpoint(new_session: bool = False):
current_unix_time = int(date.now().timestamp()) current_unix_time = int(date.now().timestamp())
@ -958,16 +855,16 @@ async def ig_flow_endpoint(new_session: bool = False):
time_remaining = 30 - (time_since_rollover % 30) time_remaining = 30 - (time_since_rollover % 30)
if time_remaining < 4: if time_remaining < 4:
L.DEBUG("Too close to end of TOTP counter. Waiting.") logger.debug("Too close to end of TOTP counter. Waiting.")
sleepupto(5, 5) sleepupto(5, 5)
if not new_session and os.path.exists(IG_SESSION_PATH): if not new_session and os.path.exists(IG_SESSION_PATH):
cl.load_settings(IG_SESSION_PATH) cl.load_settings(IG_SESSION_PATH)
L.DEBUG("Loaded past session.") logger.debug("Loaded past session.")
elif new_session and cl.login(IG_USERNAME, IG_PASSWORD, verification_code=TOTP.now()): elif new_session and cl.login(IG_USERNAME, IG_PASSWORD, verification_code=TOTP.now()):
cl.dump_settings(IG_SESSION_PATH) cl.dump_settings(IG_SESSION_PATH)
L.DEBUG("Logged in and saved new session.") logger.debug("Logged in and saved new session.")
else: else:
raise Exception(f"Failed to login as {IG_USERNAME}.") raise Exception(f"Failed to login as {IG_USERNAME}.")

View file

@ -33,7 +33,7 @@ from sijapi.routers.llm import query_ollama
from sijapi import API, L, COMFYUI_URL, COMFYUI_OUTPUT_DIR, IMG_CONFIG_PATH, IMG_DIR, IMG_WORKFLOWS_DIR from sijapi import API, L, COMFYUI_URL, COMFYUI_OUTPUT_DIR, IMG_CONFIG_PATH, IMG_DIR, IMG_WORKFLOWS_DIR
img = APIRouter() img = APIRouter()
logger = L.get_module_logger("img")
CLIENT_ID = str(uuid.uuid4()) CLIENT_ID = str(uuid.uuid4())
@img.post("/img") @img.post("/img")
@ -79,12 +79,12 @@ async def workflow(prompt: str, scene: str = None, size: str = None, earlyout: s
scene_workflow = random.choice(scene_data['workflows']) scene_workflow = random.choice(scene_data['workflows'])
if size: if size:
L.DEBUG(f"Specified size: {size}") logger.debug(f"Specified size: {size}")
size = size if size else scene_workflow.get('size', '1024x1024') size = size if size else scene_workflow.get('size', '1024x1024')
width, height = map(int, size.split('x')) width, height = map(int, size.split('x'))
L.DEBUG(f"Parsed width: {width}; parsed height: {height}") logger.debug(f"Parsed width: {width}; parsed height: {height}")
workflow_path = Path(IMG_WORKFLOWS_DIR) / scene_workflow['workflow'] workflow_path = Path(IMG_WORKFLOWS_DIR) / scene_workflow['workflow']
workflow_data = json.loads(workflow_path.read_text()) workflow_data = json.loads(workflow_path.read_text())
@ -108,12 +108,12 @@ async def workflow(prompt: str, scene: str = None, size: str = None, earlyout: s
if earlyout: if earlyout:
asyncio.create_task(generate_and_save_image(prompt_id, saved_file_key, max_size, destination_path)) asyncio.create_task(generate_and_save_image(prompt_id, saved_file_key, max_size, destination_path))
L.DEBUG(f"Returning {destination_path}") logger.debug(f"Returning {destination_path}")
return destination_path return destination_path
else: else:
await generate_and_save_image(prompt_id, saved_file_key, max_size, destination_path) await generate_and_save_image(prompt_id, saved_file_key, max_size, destination_path)
L.DEBUG(f"Returning {destination_path}") logger.debug(f"Returning {destination_path}")
return destination_path return destination_path
@ -124,7 +124,7 @@ async def generate_and_save_image(prompt_id, saved_file_key, max_size, destinati
jpg_file_path = await save_as_jpg(image_data, prompt_id, quality=90, max_size=max_size, destination_path=destination_path) jpg_file_path = await save_as_jpg(image_data, prompt_id, quality=90, max_size=max_size, destination_path=destination_path)
if Path(jpg_file_path) != Path(destination_path): if Path(jpg_file_path) != Path(destination_path):
L.ERR(f"Mismatch between jpg_file_path, {jpg_file_path}, and detination_path, {destination_path}") logger.error(f"Mismatch between jpg_file_path, {jpg_file_path}, and detination_path, {destination_path}")
except Exception as e: except Exception as e:
print(f"Error in generate_and_save_image: {e}") print(f"Error in generate_and_save_image: {e}")
@ -216,11 +216,11 @@ def set_presets(workflow_data, preset_values):
if 'inputs' in workflow_data.get(preset_node, {}): if 'inputs' in workflow_data.get(preset_node, {}):
workflow_data[preset_node]['inputs'][preset_key] = preset_value workflow_data[preset_node]['inputs'][preset_key] = preset_value
else: else:
L.DEBUG("Node not found in workflow_data") logger.debug("Node not found in workflow_data")
else: else:
L.DEBUG("Required data missing in preset_values") logger.debug("Required data missing in preset_values")
else: else:
L.DEBUG("No preset_values found") logger.debug("No preset_values found")
def get_return_path(destination_path): def get_return_path(destination_path):
@ -235,7 +235,7 @@ def get_scene(scene):
IMG_CONFIG = yaml.safe_load(IMG_CONFIG_file) IMG_CONFIG = yaml.safe_load(IMG_CONFIG_file)
for scene_data in IMG_CONFIG['scenes']: for scene_data in IMG_CONFIG['scenes']:
if scene_data['scene'] == scene: if scene_data['scene'] == scene:
L.DEBUG(f"Found scene for \"{scene}\".") logger.debug(f"Found scene for \"{scene}\".")
return scene_data return scene_data
return None return None
@ -254,11 +254,11 @@ def get_matching_scene(prompt):
max_count = count max_count = count
scene_data = sc scene_data = sc
if scene_data: if scene_data:
L.DEBUG(f"Found better-matching scene: the prompt contains {max_count} words that match triggers for {scene_data.get('name')}!") logger.debug(f"Found better-matching scene: the prompt contains {max_count} words that match triggers for {scene_data.get('name')}!")
if scene_data: if scene_data:
return scene_data return scene_data
else: else:
L.DEBUG(f"No matching scenes found, falling back to default scene.") logger.debug(f"No matching scenes found, falling back to default scene.")
return IMG_CONFIG['scenes'][0] return IMG_CONFIG['scenes'][0]
@ -326,10 +326,10 @@ async def ensure_comfy(retries: int = 4, timeout: float = 6.0):
# shareable_link = f"https://{PHOTOPRISM_URL}/p/{photo_uuid}" # shareable_link = f"https://{PHOTOPRISM_URL}/p/{photo_uuid}"
# return shareable_link # return shareable_link
# else: # else:
# L.ERR("Could not find the uploaded photo details.") # logger.error("Could not find the uploaded photo details.")
# return None # return None
# except Exception as e: # except Exception as e:
# L.ERR(f"Error in upload_and_get_shareable_link: {e}") # logger.error(f"Error in upload_and_get_shareable_link: {e}")
# return None # return None
@ -436,13 +436,13 @@ Even more important, it finds and returns the key to the filepath where the file
workflow[key] = random.randint(1000000000000, 9999999999999) workflow[key] = random.randint(1000000000000, 9999999999999)
elif key in ["width", "max_width", "scaled_width", "height", "max_height", "scaled_height", "side_length", "size", "value", "dimension", "dimensions", "long", "long_side", "short", "short_side", "length"]: elif key in ["width", "max_width", "scaled_width", "height", "max_height", "scaled_height", "side_length", "size", "value", "dimension", "dimensions", "long", "long_side", "short", "short_side", "length"]:
L.DEBUG(f"Got a hit for a dimension: {key} {value}") logger.debug(f"Got a hit for a dimension: {key} {value}")
if value == 1023: if value == 1023:
workflow[key] = post.get("width", 1024) workflow[key] = post.get("width", 1024)
L.DEBUG(f"Set {key} to {workflow[key]}.") logger.debug(f"Set {key} to {workflow[key]}.")
elif value == 1025: elif value == 1025:
workflow[key] = post.get("height", 1024) workflow[key] = post.get("height", 1024)
L.DEBUG(f"Set {key} to {workflow[key]}.") logger.debug(f"Set {key} to {workflow[key]}.")
update_recursive(workflow) update_recursive(workflow)
return found_key[0] return found_key[0]

View file

@ -26,13 +26,13 @@ import tempfile
import shutil import shutil
import html2text import html2text
import markdown import markdown
from sijapi import L, Dir, API, LLM, TTS, Obsidian from sijapi import L, LLM_SYS_MSG, DEFAULT_LLM, DEFAULT_VISION, REQUESTS_DIR, OBSIDIAN_CHROMADB_COLLECTION, OBSIDIAN_VAULT_DIR, DOC_DIR, OPENAI_API_KEY, DEFAULT_VOICE, SUMMARY_INSTRUCT, SUMMARY_CHUNK_SIZE, SUMMARY_TPW, SUMMARY_CHUNK_OVERLAP, SUMMARY_LENGTH_RATIO, SUMMARY_TOKEN_LIMIT, SUMMARY_MIN_LENGTH, SUMMARY_MODEL
from sijapi.utilities import convert_to_unix_time, sanitize_filename, ocr_pdf, clean_text, should_use_ocr, extract_text_from_pdf, extract_text_from_docx, read_text_file, str_to_bool, get_extension from sijapi.utilities import convert_to_unix_time, sanitize_filename, ocr_pdf, clean_text, should_use_ocr, extract_text_from_pdf, extract_text_from_docx, read_text_file, str_to_bool, get_extension
from sijapi.routers import tts from sijapi.routers import tts
from sijapi.routers.asr import transcribe_audio from sijapi.routers.asr import transcribe_audio
llm = APIRouter() llm = APIRouter()
logger = L.get_module_logger("llm")
# Initialize chromadb client # Initialize chromadb client
client = chromadb.Client() client = chromadb.Client()
@ -49,7 +49,7 @@ def read_markdown_files(folder: Path):
return documents, file_paths return documents, file_paths
# Read markdown files and generate embeddings # Read markdown files and generate embeddings
documents, file_paths = read_markdown_files(Obsidian.docs) documents, file_paths = read_markdown_files(DOC_DIR)
for i, doc in enumerate(documents): for i, doc in enumerate(documents):
response = ollama.embeddings(model="mxbai-embed-large", prompt=doc) response = ollama.embeddings(model="mxbai-embed-large", prompt=doc)
embedding = response["embedding"] embedding = response["embedding"]
@ -83,25 +83,25 @@ async def generate_response(prompt: str):
return {"response": output['response']} return {"response": output['response']}
async def query_ollama(usr: str, sys: str = LLM.chat.sys, model: str = LLM.chat.model, max_tokens: int = LLM.chat.max_tokens): async def query_ollama(usr: str, sys: str = LLM_SYS_MSG, model: str = DEFAULT_LLM, max_tokens: int = 200):
messages = [{"role": "system", "content": sys}, messages = [{"role": "system", "content": sys},
{"role": "user", "content": usr}] {"role": "user", "content": usr}]
LLM = Ollama() LLM = Ollama()
response = await LLM.chat(model=model, messages=messages, options={"num_predict": max_tokens}) response = await LLM.chat(model=model, messages=messages, options={"num_predict": max_tokens})
L.DEBUG(response) logger.debug(response)
if "message" in response: if "message" in response:
if "content" in response["message"]: if "content" in response["message"]:
content = response["message"]["content"] content = response["message"]["content"]
return content return content
else: else:
L.DEBUG("No choices found in response") logger.debug("No choices found in response")
return None return None
async def query_ollama_multishot( async def query_ollama_multishot(
message_list: List[str], message_list: List[str],
sys: str = LLM.chat.sys, sys: str = LLM_SYS_MSG,
model: str = LLM.chat.model, model: str = DEFAULT_LLM,
max_tokens: int = 200 max_tokens: int = 200
): ):
if len(message_list) % 2 == 0: if len(message_list) % 2 == 0:
@ -116,12 +116,12 @@ async def query_ollama_multishot(
LLM = Ollama() LLM = Ollama()
response = await LLM.chat(model=model, messages=messages, options={"num_predict": max_tokens}) response = await LLM.chat(model=model, messages=messages, options={"num_predict": max_tokens})
L.DEBUG(response) logger.debug(response)
if "message" in response and "content" in response["message"]: if "message" in response and "content" in response["message"]:
return response["message"]["content"] return response["message"]["content"]
else: else:
L.DEBUG("No content found in response") logger.debug("No content found in response")
return None return None
@ -130,7 +130,7 @@ async def chat_completions(request: Request):
body = await request.json() body = await request.json()
timestamp = dt_datetime.now().strftime("%Y%m%d_%H%M%S%f") timestamp = dt_datetime.now().strftime("%Y%m%d_%H%M%S%f")
filename = Dir.logs.requests / f"request_{timestamp}.json" filename = REQUESTS_DIR / f"request_{timestamp}.json"
async with aiofiles.open(filename, mode='w') as file: async with aiofiles.open(filename, mode='w') as file:
await file.write(json.dumps(body, indent=4)) await file.write(json.dumps(body, indent=4))
@ -140,21 +140,21 @@ async def chat_completions(request: Request):
raise HTTPException(status_code=400, detail="Message data is required in the request body.") raise HTTPException(status_code=400, detail="Message data is required in the request body.")
requested_model = body.get('model', 'default-model') requested_model = body.get('model', 'default-model')
L.DEBUG(f"Requested model: {requested_model}") logger.debug(f"Requested model: {requested_model}")
stream = body.get('stream') stream = body.get('stream')
token_limit = body.get('max_tokens') or body.get('num_predict') token_limit = body.get('max_tokens') or body.get('num_predict')
# Check if the most recent message contains an image_url # Check if the most recent message contains an image_url
recent_message = messages[-1] recent_message = messages[-1]
if recent_message.get('role') == 'user' and is_vision_request(recent_message.get('content')): if recent_message.get('role') == 'user' and is_vision_request(recent_message.get('content')):
L.DEBUG("Processing as a vision request") logger.debug("Processing as a vision request")
model = "llava" model = "llava"
L.DEBUG(f"Using model: {model}") logger.debug(f"Using model: {model}")
return StreamingResponse(stream_messages_with_vision(recent_message, model, token_limit), media_type="application/json") return StreamingResponse(stream_messages_with_vision(recent_message, model, token_limit), media_type="application/json")
else: else:
L.DEBUG("Processing as a standard request") logger.debug("Processing as a standard request")
model = requested_model model = requested_model
L.DEBUG(f"Using model: {model}") logger.debug(f"Using model: {model}")
if stream: if stream:
return StreamingResponse(stream_messages(messages, model, token_limit), media_type="application/json") return StreamingResponse(stream_messages(messages, model, token_limit), media_type="application/json")
else: else:
@ -227,9 +227,9 @@ async def stream_messages_with_vision(message: dict, model: str, num_predict: in
def get_appropriate_model(requested_model): def get_appropriate_model(requested_model):
if requested_model == "gpt-4-vision-preview": if requested_model == "gpt-4-vision-preview":
return LLM.vision.model return DEFAULT_VISION
elif not is_model_available(requested_model): elif not is_model_available(requested_model):
return LLM.chat.model return DEFAULT_LLM
else: else:
return requested_model return requested_model
@ -279,17 +279,17 @@ async def generate_messages(messages: list, model: str = "llama3"):
def is_model_available(model_name): def is_model_available(model_name):
model_data = OllamaList() model_data = OllamaList()
available_models = [model['name'] for model in model_data['models']] available_models = [model['name'] for model in model_data['models']]
L.DEBUG(f"Available models: {available_models}") # Log using the configured LOGGER logger.debug(f"Available models: {available_models}") # Log using the configured LOGGER
matching_models = [model for model in available_models if model.startswith(model_name + ':') or model == model_name] matching_models = [model for model in available_models if model.startswith(model_name + ':') or model == model_name]
if len(matching_models) == 1: if len(matching_models) == 1:
L.DEBUG(f"Unique match found: {matching_models[0]}") logger.debug(f"Unique match found: {matching_models[0]}")
return True return True
elif len(matching_models) > 1: elif len(matching_models) > 1:
L.ERR(f"Ambiguous match found, models: {matching_models}") logger.error(f"Ambiguous match found, models: {matching_models}")
return True return True
else: else:
L.ERR(f"No match found for model: {model_name}") logger.error(f"No match found for model: {model_name}")
return False return False
@ -310,7 +310,7 @@ async def chat_completions_options(request: Request):
], ],
"created": int(time.time()), "created": int(time.time()),
"id": str(uuid.uuid4()), "id": str(uuid.uuid4()),
"model": LLM.chat.model, "model": DEFAULT_LLM,
"object": "chat.completion.chunk", "object": "chat.completion.chunk",
}, },
status_code=200, status_code=200,
@ -401,7 +401,7 @@ def query_gpt4(llmPrompt: List = [], system_msg: str = "", user_msg: str = "", m
{"role": "system", "content": system_msg}, {"role": "system", "content": system_msg},
{"role": "user", "content": user_msg} {"role": "user", "content": user_msg}
] ]
LLM = OpenAI(api_key=LLM.OPENAI_API_KEY) LLM = OpenAI(api_key=OPENAI_API_KEY)
response = LLM.chat.completions.create( response = LLM.chat.completions.create(
model="gpt-4", model="gpt-4",
messages=messages, messages=messages,
@ -412,12 +412,12 @@ def query_gpt4(llmPrompt: List = [], system_msg: str = "", user_msg: str = "", m
if hasattr(first_choice, "message") and hasattr(first_choice.message, "content"): if hasattr(first_choice, "message") and hasattr(first_choice.message, "content"):
return first_choice.message.content return first_choice.message.content
else: else:
L.DEBUG("No content attribute in the first choice's message") logger.debug("No content attribute in the first choice's message")
L.DEBUG(f"No content found in message string: {response.choices}") logger.debug(f"No content found in message string: {response.choices}")
L.DEBUG("Trying again!") logger.debug("Trying again!")
query_gpt4(messages, max_tokens) query_gpt4(messages, max_tokens)
else: else:
L.DEBUG(f"No content found in message string: {response}") logger.debug(f"No content found in message string: {response}")
return "" return ""
def llava(image_base64, prompt): def llava(image_base64, prompt):
@ -427,11 +427,11 @@ def llava(image_base64, prompt):
prompt = f"This is a chat between a user and an assistant. The assistant is helping the user to describe an image. {prompt}", prompt = f"This is a chat between a user and an assistant. The assistant is helping the user to describe an image. {prompt}",
images = [image_base64] images = [image_base64]
) )
L.DEBUG(response) logger.debug(response)
return "" if "pass" in response["response"].lower() else response["response"] return "" if "pass" in response["response"].lower() else response["response"]
def gpt4v(image_base64, prompt_sys: str, prompt_usr: str, max_tokens: int = 150): def gpt4v(image_base64, prompt_sys: str, prompt_usr: str, max_tokens: int = 150):
VISION_LLM = OpenAI(api_key=LLM.OPENAI_API_KEY) VISION_LLM = OpenAI(api_key=OPENAI_API_KEY)
response_1 = VISION_LLM.chat.completions.create( response_1 = VISION_LLM.chat.completions.create(
model="gpt-4-vision-preview", model="gpt-4-vision-preview",
messages=[ messages=[
@ -458,7 +458,7 @@ def gpt4v(image_base64, prompt_sys: str, prompt_usr: str, max_tokens: int = 150)
comment_content = first_choice.message.content comment_content = first_choice.message.content
if "PASS" in comment_content: if "PASS" in comment_content:
return "" return ""
L.DEBUG(f"Generated comment: {comment_content}") logger.debug(f"Generated comment: {comment_content}")
response_2 = VISION_LLM.chat.completions.create( response_2 = VISION_LLM.chat.completions.create(
model="gpt-4-vision-preview", model="gpt-4-vision-preview",
@ -496,15 +496,15 @@ def gpt4v(image_base64, prompt_sys: str, prompt_usr: str, max_tokens: int = 150)
first_choice = response_2.choices[0] first_choice = response_2.choices[0]
if first_choice.message and first_choice.message.content: if first_choice.message and first_choice.message.content:
final_content = first_choice.message.content final_content = first_choice.message.content
L.DEBUG(f"Generated comment: {final_content}") logger.debug(f"Generated comment: {final_content}")
if "PASS" in final_content: if "PASS" in final_content:
return "" return ""
else: else:
return final_content return final_content
L.DEBUG("Vision response did not contain expected data.") logger.debug("Vision response did not contain expected data.")
L.DEBUG(f"Vision response: {response_1}") logger.debug(f"Vision response: {response_1}")
asyncio.sleep(15) asyncio.sleep(15)
try_again = gpt4v(image_base64, prompt_sys, prompt_usr, max_tokens) try_again = gpt4v(image_base64, prompt_sys, prompt_usr, max_tokens)
@ -512,12 +512,12 @@ def gpt4v(image_base64, prompt_sys: str, prompt_usr: str, max_tokens: int = 150)
@llm.get("/summarize") @llm.get("/summarize")
async def summarize_get(text: str = Form(None), instruction: str = Form(LLM.summary.instruct)): async def summarize_get(text: str = Form(None), instruction: str = Form(SUMMARY_INSTRUCT)):
summarized_text = await summarize_text(text, instruction) summarized_text = await summarize_text(text, instruction)
return summarized_text return summarized_text
@llm.post("/summarize") @llm.post("/summarize")
async def summarize_post(file: Optional[UploadFile] = File(None), text: Optional[str] = Form(None), instruction: str = Form(LLM.summary.instruct)): async def summarize_post(file: Optional[UploadFile] = File(None), text: Optional[str] = Form(None), instruction: str = Form(SUMMARY_INSTRUCT)):
text_content = text if text else await extract_text(file) text_content = text if text else await extract_text(file)
summarized_text = await summarize_text(text_content, instruction) summarized_text = await summarize_text(text_content, instruction)
return summarized_text return summarized_text
@ -526,10 +526,10 @@ async def summarize_post(file: Optional[UploadFile] = File(None), text: Optional
@llm.post("/speaksummary") @llm.post("/speaksummary")
async def summarize_tts_endpoint( async def summarize_tts_endpoint(
bg_tasks: BackgroundTasks, bg_tasks: BackgroundTasks,
instruction: str = Form(LLM.summary.instruct), instruction: str = Form(SUMMARY_INSTRUCT),
file: Optional[UploadFile] = File(None), file: Optional[UploadFile] = File(None),
text: Optional[str] = Form(None), text: Optional[str] = Form(None),
voice: Optional[str] = Form(TTS.xtts.voice), voice: Optional[str] = Form(DEFAULT_VOICE),
speed: Optional[float] = Form(1.2), speed: Optional[float] = Form(1.2),
podcast: Union[bool, str] = Form(False) podcast: Union[bool, str] = Form(False)
): ):
@ -562,7 +562,7 @@ async def summarize_tts_endpoint(
) )
except Exception as e: except Exception as e:
L.ERR(f"Error in summarize_tts_endpoint: {str(e)}") logger.error(f"Error in summarize_tts_endpoint: {str(e)}")
return JSONResponse( return JSONResponse(
status_code=400, status_code=400,
content={"error": str(e)} content={"error": str(e)}
@ -572,8 +572,8 @@ async def summarize_tts_endpoint(
async def summarize_tts( async def summarize_tts(
text: str, text: str,
instruction: str = LLM.summary.instruct, instruction: str = SUMMARY_INSTRUCT,
voice: Optional[str] = TTS.xtts.voice, voice: Optional[str] = DEFAULT_VOICE,
speed: float = 1.1, speed: float = 1.1,
podcast: bool = False, podcast: bool = False,
LLM: Ollama = None LLM: Ollama = None
@ -589,7 +589,7 @@ async def summarize_tts(
bg_tasks = BackgroundTasks() bg_tasks = BackgroundTasks()
model = await tts.get_model(voice) model = await tts.get_model(voice)
final_output_path = await tts.generate_speech(bg_tasks, summarized_text, voice, model=model, speed=speed, podcast=podcast, title=filename) final_output_path = await tts.generate_speech(bg_tasks, summarized_text, voice, model=model, speed=speed, podcast=podcast, title=filename)
L.DEBUG(f"summary_tts completed with final_output_path: {final_output_path}") logger.debug(f"summary_tts completed with final_output_path: {final_output_path}")
return final_output_path return final_output_path
@ -605,10 +605,10 @@ def split_text_into_chunks(text: str) -> List[str]:
sentences = re.split(r'(?<=[.!?])\s+', text) sentences = re.split(r'(?<=[.!?])\s+', text)
words = text.split() words = text.split()
total_words = len(words) total_words = len(words)
L.DEBUG(f"Total words: {total_words}. LLM.summary.chunk_size: {LLM.summary.chunk_size}. LLM.tpw: {LLM.tpw}.") logger.debug(f"Total words: {total_words}. SUMMARY_CHUNK_SIZE: {SUMMARY_CHUNK_SIZE}. SUMMARY_TPW: {SUMMARY_TPW}.")
max_words_per_chunk = int(LLM.summary.chunk_size / LLM.tpw) max_words_per_chunk = int(SUMMARY_CHUNK_SIZE / SUMMARY_TPW)
L.DEBUG(f"Maximum words per chunk: {max_words_per_chunk}") logger.debug(f"Maximum words per chunk: {max_words_per_chunk}")
chunks = [] chunks = []
current_chunk = [] current_chunk = []
@ -628,23 +628,23 @@ def split_text_into_chunks(text: str) -> List[str]:
if current_chunk: if current_chunk:
chunks.append(' '.join(current_chunk)) chunks.append(' '.join(current_chunk))
L.DEBUG(f"Split text into {len(chunks)} chunks.") logger.debug(f"Split text into {len(chunks)} chunks.")
return chunks return chunks
def calculate_max_tokens(text: str) -> int: def calculate_max_tokens(text: str) -> int:
tokens_count = max(1, int(len(text.split()) * LLM.tpw)) # Ensure at least 1 tokens_count = max(1, int(len(text.split()) * SUMMARY_TPW)) # Ensure at least 1
return min(tokens_count // 4, LLM.summary.chunk_size) return min(tokens_count // 4, SUMMARY_CHUNK_SIZE)
async def extract_text(file: Union[UploadFile, bytes, bytearray, str, Path], bg_tasks: BackgroundTasks = None) -> str: async def extract_text(file: Union[UploadFile, bytes, bytearray, str, Path], bg_tasks: BackgroundTasks = None) -> str:
L.INFO(f"Attempting to extract text from file: {file}") logger.info(f"Attempting to extract text from file: {file}")
try: try:
if isinstance(file, UploadFile): if isinstance(file, UploadFile):
L.INFO("File is an UploadFile object") logger.info("File is an UploadFile object")
file_extension = os.path.splitext(file.filename)[1] file_extension = os.path.splitext(file.filename)[1]
temp_file_path = tempfile.mktemp(suffix=file_extension) temp_file_path = tempfile.mktemp(suffix=file_extension)
with open(temp_file_path, 'wb') as buffer: with open(temp_file_path, 'wb') as buffer:
@ -663,7 +663,7 @@ async def extract_text(file: Union[UploadFile, bytes, bytearray, str, Path], bg_
_, file_ext = os.path.splitext(file_path) _, file_ext = os.path.splitext(file_path)
file_ext = file_ext.lower() file_ext = file_ext.lower()
L.INFO(f"File extension: {file_ext}") logger.info(f"File extension: {file_ext}")
if file_ext == '.pdf': if file_ext == '.pdf':
text_content = await extract_text_from_pdf(file_path) text_content = await extract_text_from_pdf(file_path)
@ -690,26 +690,26 @@ async def extract_text(file: Union[UploadFile, bytes, bytearray, str, Path], bg_
return text_content return text_content
except Exception as e: except Exception as e:
L.ERR(f"Error extracting text: {str(e)}") logger.error(f"Error extracting text: {str(e)}")
raise ValueError(f"Error extracting text: {str(e)}") raise ValueError(f"Error extracting text: {str(e)}")
async def summarize_text(text: str, instruction: str = LLM.summary.instruct, length_override: int = None, length_quotient: float = LLM.summary.length_ratio, LLM: Ollama = None): async def summarize_text(text: str, instruction: str = SUMMARY_INSTRUCT, length_override: int = None, length_quotient: float = SUMMARY_LENGTH_RATIO, LLM: Ollama = None):
LLM = LLM if LLM else Ollama() LLM = LLM if LLM else Ollama()
chunked_text = split_text_into_chunks(text) chunked_text = split_text_into_chunks(text)
total_parts = len(chunked_text) total_parts = len(chunked_text)
L.DEBUG(f"Total parts: {total_parts}. Length of chunked text: {len(chunked_text)}") logger.debug(f"Total parts: {total_parts}. Length of chunked text: {len(chunked_text)}")
total_words_count = sum(len(chunk.split()) for chunk in chunked_text) total_words_count = sum(len(chunk.split()) for chunk in chunked_text)
L.DEBUG(f"Total words count: {total_words_count}") logger.debug(f"Total words count: {total_words_count}")
total_tokens_count = max(1, int(total_words_count * LLM.tpw)) total_tokens_count = max(1, int(total_words_count * SUMMARY_TPW))
L.DEBUG(f"Total tokens count: {total_tokens_count}") logger.debug(f"Total tokens count: {total_tokens_count}")
total_summary_length = length_override if length_override else total_tokens_count // length_quotient total_summary_length = length_override if length_override else total_tokens_count // length_quotient
L.DEBUG(f"Total summary length: {total_summary_length}") logger.debug(f"Total summary length: {total_summary_length}")
corrected_total_summary_length = min(total_summary_length, LLM.summary.max_tokens) corrected_total_summary_length = min(total_summary_length, SUMMARY_TOKEN_LIMIT)
L.DEBUG(f"Corrected total summary length: {corrected_total_summary_length}") logger.debug(f"Corrected total summary length: {corrected_total_summary_length}")
summaries = await asyncio.gather(*[ summaries = await asyncio.gather(*[
process_chunk(instruction, chunk, i+1, total_parts, LLM=LLM) process_chunk(instruction, chunk, i+1, total_parts, LLM=LLM)
@ -720,48 +720,48 @@ async def summarize_text(text: str, instruction: str = LLM.summary.instruct, len
summaries = [f"\n\n\nPART {i+1} of {total_parts}:\n\n{summary}" for i, summary in enumerate(summaries)] summaries = [f"\n\n\nPART {i+1} of {total_parts}:\n\n{summary}" for i, summary in enumerate(summaries)]
concatenated_summary = ' '.join(summaries) concatenated_summary = ' '.join(summaries)
L.DEBUG(f"Concatenated summary: {concatenated_summary}") logger.debug(f"Concatenated summary: {concatenated_summary}")
L.DEBUG(f"Concatenated summary length: {len(concatenated_summary.split())}") logger.debug(f"Concatenated summary length: {len(concatenated_summary.split())}")
if total_parts > 1: if total_parts > 1:
L.DEBUG(f"Processing the concatenated_summary to smooth the edges...") logger.debug(f"Processing the concatenated_summary to smooth the edges...")
concatenated_instruct = f"The following text consists of the concatenated {total_parts} summaries of {total_parts} parts of a single document that had to be split for processing. Reword it for clarity and flow as a single cohesive summary, understanding that it all relates to a single document, but that document likely consists of multiple parts potentially from multiple authors. Do not shorten it and do not omit content, simply smooth out the edges between the parts." concatenated_instruct = f"The following text consists of the concatenated {total_parts} summaries of {total_parts} parts of a single document that had to be split for processing. Reword it for clarity and flow as a single cohesive summary, understanding that it all relates to a single document, but that document likely consists of multiple parts potentially from multiple authors. Do not shorten it and do not omit content, simply smooth out the edges between the parts."
final_summary = await process_chunk(concatenated_instruct, concatenated_summary, 1, 1, length_ratio=1, LLM=LLM) final_summary = await process_chunk(concatenated_instruct, concatenated_summary, 1, 1, length_ratio=1, LLM=LLM)
L.DEBUG(f"Final summary length: {len(final_summary.split())}") logger.debug(f"Final summary length: {len(final_summary.split())}")
return final_summary return final_summary
else: else:
return concatenated_summary return concatenated_summary
async def process_chunk(instruction: str, text: str, part: int, total_parts: int, length_ratio: float = None, LLM: Ollama = None) -> str: async def process_chunk(instruction: str, text: str, part: int, total_parts: int, length_ratio: float = None, LLM: Ollama = None) -> str:
# L.DEBUG(f"Processing chunk: {text}") # logger.debug(f"Processing chunk: {text}")
LLM = LLM if LLM else Ollama() LLM = LLM if LLM else Ollama()
words_count = len(text.split()) words_count = len(text.split())
tokens_count = max(1, int(words_count * LLM.tpw)) tokens_count = max(1, int(words_count * SUMMARY_TPW))
summary_length_ratio = length_ratio if length_ratio else LLM.summary.length_ratio summary_length_ratio = length_ratio if length_ratio else SUMMARY_LENGTH_RATIO
max_tokens = min(tokens_count // summary_length_ratio, LLM.summary.chunk_size) max_tokens = min(tokens_count // summary_length_ratio, SUMMARY_CHUNK_SIZE)
max_tokens = max(max_tokens, LLM.summary.min_length) max_tokens = max(max_tokens, SUMMARY_MIN_LENGTH)
L.DEBUG(f"Processing part {part} of {total_parts}: Words: {words_count}, Estimated tokens: {tokens_count}, Max output tokens: {max_tokens}") logger.debug(f"Processing part {part} of {total_parts}: Words: {words_count}, Estimated tokens: {tokens_count}, Max output tokens: {max_tokens}")
if part and total_parts > 1: if part and total_parts > 1:
prompt = f"{instruction}. Part {part} of {total_parts}:\n{text}" prompt = f"{instruction}. Part {part} of {total_parts}:\n{text}"
else: else:
prompt = f"{instruction}:\n\n{text}" prompt = f"{instruction}:\n\n{text}"
L.DEBUG(f"Starting LLM.generate for part {part} of {total_parts}") logger.debug(f"Starting LLM.generate for part {part} of {total_parts}")
response = await LLM.generate( response = await LLM.generate(
model=LLM.summary.model, model=SUMMARY_MODEL,
prompt=prompt, prompt=prompt,
stream=False, stream=False,
options={'num_predict': max_tokens, 'temperature': 0.5} options={'num_predict': max_tokens, 'temperature': 0.5}
) )
text_response = response['response'] text_response = response['response']
L.DEBUG(f"Completed LLM.generate for part {part} of {total_parts}") logger.debug(f"Completed LLM.generate for part {part} of {total_parts}")
L.DEBUG(f"Result: {text_response}") logger.debug(f"Result: {text_response}")
return text_response return text_response
async def title_and_summary(extracted_text: str): async def title_and_summary(extracted_text: str):

View file

@ -18,7 +18,7 @@ from sijapi.classes import Location
from sijapi.utilities import haversine from sijapi.utilities import haversine
loc = APIRouter() loc = APIRouter()
logger = L.get_module_logger("loc")
async def dt( async def dt(
date_time: Union[str, int, datetime], date_time: Union[str, int, datetime],
@ -28,11 +28,11 @@ async def dt(
# Convert integer (epoch time) to UTC datetime # Convert integer (epoch time) to UTC datetime
if isinstance(date_time, int): if isinstance(date_time, int):
date_time = datetime.utcfromtimestamp(date_time).replace(tzinfo=timezone.utc) date_time = datetime.utcfromtimestamp(date_time).replace(tzinfo=timezone.utc)
L.DEBUG(f"Converted epoch time {date_time} to UTC datetime object.") logger.debug(f"Converted epoch time {date_time} to UTC datetime object.")
# Convert string to datetime if necessary # Convert string to datetime if necessary
elif isinstance(date_time, str): elif isinstance(date_time, str):
date_time = dateutil_parse(date_time) date_time = dateutil_parse(date_time)
L.DEBUG(f"Converted string '{date_time}' to datetime object.") logger.debug(f"Converted string '{date_time}' to datetime object.")
if not isinstance(date_time, datetime): if not isinstance(date_time, datetime):
raise ValueError(f"Input must be a string, integer (epoch time), or datetime object. What we received: {date_time}, type {type(date_time)}") raise ValueError(f"Input must be a string, integer (epoch time), or datetime object. What we received: {date_time}, type {type(date_time)}")
@ -40,7 +40,7 @@ async def dt(
# Ensure the datetime is timezone-aware (UTC if not specified) # Ensure the datetime is timezone-aware (UTC if not specified)
if date_time.tzinfo is None: if date_time.tzinfo is None:
date_time = date_time.replace(tzinfo=timezone.utc) date_time = date_time.replace(tzinfo=timezone.utc)
L.DEBUG("Added UTC timezone to naive datetime.") logger.debug("Added UTC timezone to naive datetime.")
# Handle provided timezone # Handle provided timezone
if tz is not None: if tz is not None:
@ -48,12 +48,12 @@ async def dt(
if tz == "local": if tz == "local":
last_loc = await get_timezone_without_timezone(date_time) last_loc = await get_timezone_without_timezone(date_time)
tz = await GEO.tz_at(last_loc.latitude, last_loc.longitude) tz = await GEO.tz_at(last_loc.latitude, last_loc.longitude)
L.DEBUG(f"Using local timezone: {tz}") logger.debug(f"Using local timezone: {tz}")
else: else:
try: try:
tz = ZoneInfo(tz) tz = ZoneInfo(tz)
except Exception as e: except Exception as e:
L.ERR(f"Invalid timezone string '{tz}'. Error: {e}") logger.error(f"Invalid timezone string '{tz}'. Error: {e}")
raise ValueError(f"Invalid timezone string: {tz}") raise ValueError(f"Invalid timezone string: {tz}")
elif isinstance(tz, ZoneInfo): elif isinstance(tz, ZoneInfo):
pass # tz is already a ZoneInfo object pass # tz is already a ZoneInfo object
@ -62,14 +62,14 @@ async def dt(
# Convert to the provided or determined timezone # Convert to the provided or determined timezone
date_time = date_time.astimezone(tz) date_time = date_time.astimezone(tz)
L.DEBUG(f"Converted datetime to timezone: {tz}") logger.debug(f"Converted datetime to timezone: {tz}")
return date_time return date_time
except ValueError as e: except ValueError as e:
L.ERR(f"Error in dt: {e}") logger.error(f"Error in dt: {e}")
raise raise
except Exception as e: except Exception as e:
L.ERR(f"Unexpected error in dt: {e}") logger.error(f"Unexpected error in dt: {e}")
raise ValueError(f"Failed to process datetime: {e}") raise ValueError(f"Failed to process datetime: {e}")
@ -93,12 +93,12 @@ async def get_timezone_without_timezone(date_time):
async def get_last_location() -> Optional[Location]: async def get_last_location() -> Optional[Location]:
query_datetime = datetime.now(TZ) query_datetime = datetime.now(TZ)
L.DEBUG(f"Query_datetime: {query_datetime}") logger.debug(f"Query_datetime: {query_datetime}")
this_location = await fetch_last_location_before(query_datetime) this_location = await fetch_last_location_before(query_datetime)
if this_location: if this_location:
L.DEBUG(f"location: {this_location}") logger.debug(f"location: {this_location}")
return this_location return this_location
return None return None
@ -114,7 +114,7 @@ async def fetch_locations(start: datetime, end: datetime = None) -> List[Locatio
if start_datetime.time() == datetime.min.time() and end_datetime.time() == datetime.min.time(): if start_datetime.time() == datetime.min.time() and end_datetime.time() == datetime.min.time():
end_datetime = end_datetime.replace(hour=23, minute=59, second=59) end_datetime = end_datetime.replace(hour=23, minute=59, second=59)
L.DEBUG(f"Fetching locations between {start_datetime} and {end_datetime}") logger.debug(f"Fetching locations between {start_datetime} and {end_datetime}")
async with DB.get_connection() as conn: async with DB.get_connection() as conn:
locations = [] locations = []
@ -131,7 +131,7 @@ async def fetch_locations(start: datetime, end: datetime = None) -> List[Locatio
ORDER BY datetime DESC ORDER BY datetime DESC
''', start_datetime.replace(tzinfo=None), end_datetime.replace(tzinfo=None)) ''', start_datetime.replace(tzinfo=None), end_datetime.replace(tzinfo=None))
L.DEBUG(f"Range locations query returned: {range_locations}") logger.debug(f"Range locations query returned: {range_locations}")
locations.extend(range_locations) locations.extend(range_locations)
if not locations and (end is None or start_datetime.date() == end_datetime.date()): if not locations and (end is None or start_datetime.date() == end_datetime.date()):
@ -148,11 +148,11 @@ async def fetch_locations(start: datetime, end: datetime = None) -> List[Locatio
LIMIT 1 LIMIT 1
''', start_datetime.replace(tzinfo=None)) ''', start_datetime.replace(tzinfo=None))
L.DEBUG(f"Fallback query returned: {location_data}") logger.debug(f"Fallback query returned: {location_data}")
if location_data: if location_data:
locations.append(location_data) locations.append(location_data)
L.DEBUG(f"Locations found: {locations}") logger.debug(f"Locations found: {locations}")
# Sort location_data based on the datetime field in descending order # Sort location_data based on the datetime field in descending order
sorted_locations = sorted(locations, key=lambda x: x['datetime'], reverse=True) sorted_locations = sorted(locations, key=lambda x: x['datetime'], reverse=True)
@ -184,7 +184,7 @@ async def fetch_locations(start: datetime, end: datetime = None) -> List[Locatio
async def fetch_last_location_before(datetime: datetime) -> Optional[Location]: async def fetch_last_location_before(datetime: datetime) -> Optional[Location]:
datetime = await dt(datetime) datetime = await dt(datetime)
L.DEBUG(f"Fetching last location before {datetime}") logger.debug(f"Fetching last location before {datetime}")
async with DB.get_connection() as conn: async with DB.get_connection() as conn:
@ -204,10 +204,10 @@ async def fetch_last_location_before(datetime: datetime) -> Optional[Location]:
await conn.close() await conn.close()
if location_data: if location_data:
L.DEBUG(f"Last location found: {location_data}") logger.debug(f"Last location found: {location_data}")
return Location(**location_data) return Location(**location_data)
else: else:
L.DEBUG("No location found before the specified datetime") logger.debug("No location found before the specified datetime")
return None return None
@loc.get("/map/start_date={start_date_str}&end_date={end_date_str}", response_class=HTMLResponse) @loc.get("/map/start_date={start_date_str}&end_date={end_date_str}", response_class=HTMLResponse)
@ -262,9 +262,9 @@ async def generate_map(start_date: datetime, end_date: datetime):
async def post_location(location: Location): async def post_location(location: Location):
if not location.datetime: if not location.datetime:
L.DEBUG(f"location appears to be missing datetime: {location}") logger.debug(f"location appears to be missing datetime: {location}")
else: else:
L.DEBUG(f"post_location called with {location.datetime}") logger.debug(f"post_location called with {location.datetime}")
async with DB.get_connection() as conn: async with DB.get_connection() as conn:
try: try:
@ -293,7 +293,7 @@ async def post_location(location: Location):
location.suburb, location.county, location.country_code, location.country) location.suburb, location.county, location.country_code, location.country)
await conn.close() await conn.close()
L.INFO(f"Successfully posted location: {location.latitude}, {location.longitude}, {location.elevation} on {localized_datetime}") logger.info(f"Successfully posted location: {location.latitude}, {location.longitude}, {location.elevation} on {localized_datetime}")
return { return {
'datetime': localized_datetime, 'datetime': localized_datetime,
'latitude': location.latitude, 'latitude': location.latitude,
@ -323,8 +323,8 @@ async def post_location(location: Location):
'country': location.country 'country': location.country
} }
except Exception as e: except Exception as e:
L.ERR(f"Error posting location {e}") logger.error(f"Error posting location {e}")
L.ERR(traceback.format_exc()) logger.error(traceback.format_exc())
return None return None
@ -347,26 +347,26 @@ async def post_locate_endpoint(locations: Union[Location, List[Location]]):
"device_name": "Unknown", "device_name": "Unknown",
"device_os": "Unknown" "device_os": "Unknown"
} }
L.DEBUG(f"Location received for processing: {lcn}") logger.debug(f"Location received for processing: {lcn}")
geocoded_locations = await GEO.code(locations) geocoded_locations = await GEO.code(locations)
responses = [] responses = []
if isinstance(geocoded_locations, List): if isinstance(geocoded_locations, List):
for location in geocoded_locations: for location in geocoded_locations:
L.DEBUG(f"Final location to be submitted to database: {location}") logger.debug(f"Final location to be submitted to database: {location}")
location_entry = await post_location(location) location_entry = await post_location(location)
if location_entry: if location_entry:
responses.append({"location_data": location_entry}) responses.append({"location_data": location_entry})
else: else:
L.WARN(f"Posting location to database appears to have failed.") logger.warn(f"Posting location to database appears to have failed.")
else: else:
L.DEBUG(f"Final location to be submitted to database: {geocoded_locations}") logger.debug(f"Final location to be submitted to database: {geocoded_locations}")
location_entry = await post_location(geocoded_locations) location_entry = await post_location(geocoded_locations)
if location_entry: if location_entry:
responses.append({"location_data": location_entry}) responses.append({"location_data": location_entry})
else: else:
L.WARN(f"Posting location to database appears to have failed.") logger.warn(f"Posting location to database appears to have failed.")
return {"message": "Locations and weather updated", "results": responses} return {"message": "Locations and weather updated", "results": responses}
@ -387,7 +387,7 @@ async def get_locate(datetime_str: str, all: bool = False):
try: try:
date_time = await dt(datetime_str) date_time = await dt(datetime_str)
except ValueError as e: except ValueError as e:
L.ERR(f"Invalid datetime string provided: {datetime_str}") logger.error(f"Invalid datetime string provided: {datetime_str}")
return ["ERROR: INVALID DATETIME PROVIDED. USE YYYYMMDDHHmmss or YYYYMMDD format."] return ["ERROR: INVALID DATETIME PROVIDED. USE YYYYMMDDHHmmss or YYYYMMDD format."]
locations = await fetch_locations(date_time) locations = await fetch_locations(date_time)

View file

@ -32,9 +32,8 @@ from sijapi.routers import llm, tts, asr, loc
from newspaper import Article from newspaper import Article
news = APIRouter() news = APIRouter()
logger = L.get_module_logger("news")
async def download_and_save_article(article, site_name, earliest_date, bg_tasks: BackgroundTasks, tts_mode: str = "summary", voice: str = DEFAULT_11L_VOICE): async def download_and_save_article(article, site_name, earliest_date, bg_tasks: BackgroundTasks, tts_mode: str = "summary", voice: str = DEFAULT_11L_VOICE):
try: try:
@ -58,7 +57,7 @@ async def download_and_save_article(article, site_name, earliest_date, bg_tasks:
try: try:
article.publish_date = await loc.dt(article.publish_date, "UTC") article.publish_date = await loc.dt(article.publish_date, "UTC")
except: except:
L.DEBUG(f"Failed to localize {article.publish_date}") logger.debug(f"Failed to localize {article.publish_date}")
article.publish_date = await loc.dt(dt_datetime.now(), "UTC") article.publish_date = await loc.dt(dt_datetime.now(), "UTC")
article.meta_description = traf.description if traf and traf.description else article.meta_description article.meta_description = traf.description if traf and traf.description else article.meta_description
article.text = trafilatura.extract(source, output_format="markdown", include_comments=False) if source else article.text article.text = trafilatura.extract(source, output_format="markdown", include_comments=False) if source else article.text
@ -93,7 +92,7 @@ async def download_and_save_article(article, site_name, earliest_date, bg_tasks:
if banner_image: if banner_image:
banner_markdown = f"![[{OBSIDIAN_RESOURCES_DIR}/{banner_image}]]" banner_markdown = f"![[{OBSIDIAN_RESOURCES_DIR}/{banner_image}]]"
except Exception as e: except Exception as e:
L.ERR(f"No image found in article") logger.error(f"No image found in article")
authors = ', '.join(['[[{}]]'.format(author.strip()) for author in article.authors if author.strip()]) authors = ', '.join(['[[{}]]'.format(author.strip()) for author in article.authors if author.strip()])
@ -129,11 +128,11 @@ tags:
obsidian_link = f"![[{audio_path.name}]]" obsidian_link = f"![[{audio_path.name}]]"
body += f"{obsidian_link}\n\n" body += f"{obsidian_link}\n\n"
else: else:
L.WARN(f"Unexpected audio_path type: {type(audio_path)}. Value: {audio_path}") logger.warn(f"Unexpected audio_path type: {type(audio_path)}. Value: {audio_path}")
except Exception as e: except Exception as e:
L.ERR(f"Failed to generate TTS for {audio_filename}. Error: {str(e)}") logger.error(f"Failed to generate TTS for {audio_filename}. Error: {str(e)}")
L.ERR(f"TTS error details - voice: {voice}, model: eleven_turbo_v2, podcast: True") logger.error(f"TTS error details - voice: {voice}, model: eleven_turbo_v2, podcast: True")
L.ERR(f"Output directory: {Path(OBSIDIAN_VAULT_DIR) / OBSIDIAN_RESOURCES_DIR}") logger.error(f"Output directory: {Path(OBSIDIAN_VAULT_DIR) / OBSIDIAN_RESOURCES_DIR}")
body += f"by {authors} in {article.source_url}\n\n" body += f"by {authors} in {article.source_url}\n\n"
body += f"> [!summary]+\n" body += f"> [!summary]+\n"
@ -145,14 +144,14 @@ tags:
with open(markdown_filename, 'w') as md_file: with open(markdown_filename, 'w') as md_file:
md_file.write(markdown_content) md_file.write(markdown_content)
L.INFO(f"Successfully saved to {markdown_filename}") logger.info(f"Successfully saved to {markdown_filename}")
add_to_daily_note(relative_path) add_to_daily_note(relative_path)
print(f"Saved article: {relative_path}") print(f"Saved article: {relative_path}")
return True return True
except Exception as e: except Exception as e:
L.ERR(f"Error processing article from {article.url}: {str(e)}") logger.error(f"Error processing article from {article.url}: {str(e)}")
return False return False
# You'll need to update your is_article_within_date_range function: # You'll need to update your is_article_within_date_range function:
@ -240,18 +239,18 @@ async def clip_get(
voice: str = Query(DEFAULT_VOICE) voice: str = Query(DEFAULT_VOICE)
): ):
parsed_content = await parse_article(url) parsed_content = await parse_article(url)
markdown_filename = await process_article(bg_tasks, parsed_content, tts, voice) markdown_filename = await process_article2(bg_tasks, parsed_content, tts, voice)
return {"message": "Clip saved successfully", "markdown_filename": markdown_filename} return {"message": "Clip saved successfully", "markdown_filename": markdown_filename}
@news.post("/note/add") @news.post("/note/add")
async def note_add_endpoint(file: Optional[UploadFile] = File(None), text: Optional[str] = Form(None), source: Optional[str] = Form(None), bg_tasks: BackgroundTasks = None): async def note_add_endpoint(file: Optional[UploadFile] = File(None), text: Optional[str] = Form(None), source: Optional[str] = Form(None), bg_tasks: BackgroundTasks = None):
L.DEBUG(f"Received request on /note/add...") logger.debug(f"Received request on /note/add...")
if not file and not text: if not file and not text:
L.WARN(f"... without any file or text!") logger.warn(f"... without any file or text!")
raise HTTPException(status_code=400, detail="Either text or a file must be provided") raise HTTPException(status_code=400, detail="Either text or a file must be provided")
else: else:
result = await process_for_daily_note(file, text, source, bg_tasks) result = await process_for_daily_note(file, text, source, bg_tasks)
L.INFO(f"Result on /note/add: {result}") logger.info(f"Result on /note/add: {result}")
return JSONResponse(result, status_code=204) return JSONResponse(result, status_code=204)
async def process_for_daily_note(file: Optional[UploadFile] = File(None), text: Optional[str] = None, source: Optional[str] = None, bg_tasks: BackgroundTasks = None): async def process_for_daily_note(file: Optional[UploadFile] = File(None), text: Optional[str] = None, source: Optional[str] = None, bg_tasks: BackgroundTasks = None):
@ -259,7 +258,7 @@ async def process_for_daily_note(file: Optional[UploadFile] = File(None), text:
transcription_entry = "" transcription_entry = ""
file_entry = "" file_entry = ""
if file: if file:
L.DEBUG("File received...") logger.debug("File received...")
file_content = await file.read() file_content = await file.read()
audio_io = BytesIO(file_content) audio_io = BytesIO(file_content)
@ -267,18 +266,18 @@ async def process_for_daily_note(file: Optional[UploadFile] = File(None), text:
guessed_type = mimetypes.guess_type(file.filename) guessed_type = mimetypes.guess_type(file.filename)
file_type = guessed_type[0] if guessed_type[0] else "application/octet-stream" file_type = guessed_type[0] if guessed_type[0] else "application/octet-stream"
L.DEBUG(f"Processing as {file_type}...") logger.debug(f"Processing as {file_type}...")
# Extract the main type (e.g., 'audio', 'image', 'video') # Extract the main type (e.g., 'audio', 'image', 'video')
main_type = file_type.split('/')[0] main_type = file_type.split('/')[0]
subdir = main_type.title() if main_type else "Documents" subdir = main_type.title() if main_type else "Documents"
absolute_path, relative_path = assemble_journal_path(now, subdir=subdir, filename=file.filename) absolute_path, relative_path = assemble_journal_path(now, subdir=subdir, filename=file.filename)
L.DEBUG(f"Destination path: {absolute_path}") logger.debug(f"Destination path: {absolute_path}")
with open(absolute_path, 'wb') as f: with open(absolute_path, 'wb') as f:
f.write(file_content) f.write(file_content)
L.DEBUG(f"Processing {f.name}...") logger.debug(f"Processing {f.name}...")
if main_type == 'audio': if main_type == 'audio':
transcription = await asr.transcribe_audio(file_path=absolute_path, params=asr.TranscribeParams(model="small-en", language="en", threads=6)) transcription = await asr.transcribe_audio(file_path=absolute_path, params=asr.TranscribeParams(model="small-en", language="en", threads=6))
@ -289,7 +288,7 @@ async def process_for_daily_note(file: Optional[UploadFile] = File(None), text:
file_entry = f"[Source]({relative_path})" file_entry = f"[Source]({relative_path})"
text_entry = text if text else "" text_entry = text if text else ""
L.DEBUG(f"transcription: {transcription_entry}\nfile_entry: {file_entry}\ntext_entry: {text_entry}") logger.debug(f"transcription: {transcription_entry}\nfile_entry: {file_entry}\ntext_entry: {text_entry}")
return await add_to_daily_note(transcription_entry, file_entry, text_entry, now) return await add_to_daily_note(transcription_entry, file_entry, text_entry, now)
async def add_to_daily_note(transcription: str = None, file_link: str = None, additional_text: str = None, date_time: dt_datetime = None): async def add_to_daily_note(transcription: str = None, file_link: str = None, additional_text: str = None, date_time: dt_datetime = None):
@ -381,7 +380,7 @@ added: {timestamp}
obsidian_link = f"![[{OBSIDIAN_RESOURCES_DIR}/{audio_filename}{audio_ext}]]" obsidian_link = f"![[{OBSIDIAN_RESOURCES_DIR}/{audio_filename}{audio_ext}]]"
body += f"{obsidian_link}\n\n" body += f"{obsidian_link}\n\n"
except Exception as e: except Exception as e:
L.ERR(f"Failed in the TTS portion of clipping: {e}") logger.error(f"Failed in the TTS portion of clipping: {e}")
body += f"> [!summary]+\n" body += f"> [!summary]+\n"
body += f"> {summary}\n\n" body += f"> {summary}\n\n"
@ -394,17 +393,17 @@ added: {timestamp}
with open(markdown_filename, 'w', encoding=encoding) as md_file: with open(markdown_filename, 'w', encoding=encoding) as md_file:
md_file.write(markdown_content) md_file.write(markdown_content)
L.INFO(f"Successfully saved to {markdown_filename}") logger.info(f"Successfully saved to {markdown_filename}")
return markdown_filename return markdown_filename
except Exception as e: except Exception as e:
L.ERR(f"Failed to clip: {str(e)}") logger.error(f"Failed to clip: {str(e)}")
raise HTTPException(status_code=500, detail=str(e)) raise HTTPException(status_code=500, detail=str(e))
async def process_article( async def process_article2(
bg_tasks: BackgroundTasks, bg_tasks: BackgroundTasks,
parsed_content: Article, parsed_content: Article,
tts_mode: str = "summary", tts_mode: str = "summary",
@ -435,7 +434,7 @@ async def process_article(
banner_markdown = f"![[{OBSIDIAN_RESOURCES_DIR}/{banner_image}]]" banner_markdown = f"![[{OBSIDIAN_RESOURCES_DIR}/{banner_image}]]"
except Exception as e: except Exception as e:
L.ERR(f"No image found in article") logger.error(f"No image found in article")
authors = ', '.join('[[{}]]'.format(author) for author in parsed_content.authors) authors = ', '.join('[[{}]]'.format(author) for author in parsed_content.authors)
published_date = parsed_content.publish_date published_date = parsed_content.publish_date
@ -461,7 +460,7 @@ tags:
obsidian_link = f"![[{OBSIDIAN_RESOURCES_DIR}/{audio_filename}{audio_ext}]]" obsidian_link = f"![[{OBSIDIAN_RESOURCES_DIR}/{audio_filename}{audio_ext}]]"
body += f"{obsidian_link}\n\n" body += f"{obsidian_link}\n\n"
except Exception as e: except Exception as e:
L.ERR(f"Failed to generate TTS for np3k. {e}") logger.error(f"Failed to generate TTS for np3k. {e}")
try: try:
body += f"by {authors} in {parsed_content.canonical_link}" # update with method for getting the newspaper name body += f"by {authors} in {parsed_content.canonical_link}" # update with method for getting the newspaper name
@ -471,26 +470,26 @@ tags:
markdown_content = frontmatter + body markdown_content = frontmatter + body
except Exception as e: except Exception as e:
L.ERR(f"Failed to combine elements of article markdown.") logger.error(f"Failed to combine elements of article markdown.")
try: try:
with open(markdown_filename, 'w') as md_file: with open(markdown_filename, 'w') as md_file:
md_file.write(markdown_content) md_file.write(markdown_content)
L.INFO(f"Successfully saved to {markdown_filename}") logger.info(f"Successfully saved to {markdown_filename}")
add_to_daily_note add_to_daily_note
return markdown_filename return markdown_filename
except Exception as e: except Exception as e:
L.ERR(f"Failed to write markdown file") logger.error(f"Failed to write markdown file")
raise HTTPException(status_code=500, detail=str(e)) raise HTTPException(status_code=500, detail=str(e))
except Exception as e: except Exception as e:
L.ERR(f"Failed to clip: {str(e)}") logger.error(f"Failed to clip: {str(e)}")
raise HTTPException(status_code=500, detail=str(e)) raise HTTPException(status_code=500, detail=str(e))
async def process_article2( async def process_article(
bg_tasks: BackgroundTasks, bg_tasks: BackgroundTasks,
url: str, url: str,
title: Optional[str] = None, title: Optional[str] = None,
@ -529,7 +528,7 @@ async def process_article2(
banner_markdown = f"![[{OBSIDIAN_RESOURCES_DIR}/{banner_image}]]" banner_markdown = f"![[{OBSIDIAN_RESOURCES_DIR}/{banner_image}]]"
except Exception as e: except Exception as e:
L.ERR(f"No image found in article") logger.error(f"No image found in article")
authors = ', '.join('[[{}]]'.format(author) for author in parsed_content.get('authors', ['Unknown'])) authors = ', '.join('[[{}]]'.format(author) for author in parsed_content.get('authors', ['Unknown']))
@ -558,7 +557,7 @@ tags:
obsidian_link = f"![[{OBSIDIAN_RESOURCES_DIR}/{audio_filename}{audio_ext}]]" obsidian_link = f"![[{OBSIDIAN_RESOURCES_DIR}/{audio_filename}{audio_ext}]]"
body += f"{obsidian_link}\n\n" body += f"{obsidian_link}\n\n"
except Exception as e: except Exception as e:
L.ERR(f"Failed to generate TTS for np3k. {e}") logger.error(f"Failed to generate TTS for np3k. {e}")
try: try:
body += f"by {authors} in [{parsed_content.get('domain', urlparse(url).netloc.replace('www.', ''))}]({url}).\n\n" body += f"by {authors} in [{parsed_content.get('domain', urlparse(url).netloc.replace('www.', ''))}]({url}).\n\n"
@ -568,22 +567,22 @@ tags:
markdown_content = frontmatter + body markdown_content = frontmatter + body
except Exception as e: except Exception as e:
L.ERR(f"Failed to combine elements of article markdown.") logger.error(f"Failed to combine elements of article markdown.")
try: try:
with open(markdown_filename, 'w', encoding=encoding) as md_file: with open(markdown_filename, 'w', encoding=encoding) as md_file:
md_file.write(markdown_content) md_file.write(markdown_content)
L.INFO(f"Successfully saved to {markdown_filename}") logger.info(f"Successfully saved to {markdown_filename}")
add_to_daily_note add_to_daily_note
return markdown_filename return markdown_filename
except Exception as e: except Exception as e:
L.ERR(f"Failed to write markdown file") logger.error(f"Failed to write markdown file")
raise HTTPException(status_code=500, detail=str(e)) raise HTTPException(status_code=500, detail=str(e))
except Exception as e: except Exception as e:
L.ERR(f"Failed to clip {url}: {str(e)}") logger.error(f"Failed to clip {url}: {str(e)}")
raise HTTPException(status_code=500, detail=str(e)) raise HTTPException(status_code=500, detail=str(e))
@ -597,7 +596,7 @@ async def parse_article(url: str, source: Optional[str] = None) -> Article:
article.set_html(source) article.set_html(source)
article.parse() article.parse()
L.INFO(f"Parsed {article.title}") logger.info(f"Parsed {article.title}")
# Update or set properties based on trafilatura and additional processing # Update or set properties based on trafilatura and additional processing
article.title = article.title or traf.title or url article.title = article.title or traf.title or url
@ -607,7 +606,7 @@ async def parse_article(url: str, source: Optional[str] = None) -> Article:
try: try:
article.publish_date = await loc.dt(article.publish_date, "UTC") article.publish_date = await loc.dt(article.publish_date, "UTC")
except: except:
L.DEBUG(f"Failed to localize {article.publish_date}") logger.debug(f"Failed to localize {article.publish_date}")
article.publish_date = await loc.dt(dt_datetime.now(), "UTC") article.publish_date = await loc.dt(dt_datetime.now(), "UTC")
article.meta_description = article.meta_description or traf.description article.meta_description = article.meta_description or traf.description
@ -637,7 +636,7 @@ async def html_to_markdown(url: str = None, source: str = None) -> Optional[str]
async with session.get(url) as response: async with session.get(url) as response:
html_content = await response.text() html_content = await response.text()
else: else:
L.ERR(f"Unable to convert nothing to markdown.") logger.error(f"Unable to convert nothing to markdown.")
return None return None
# Use readability to extract the main content # Use readability to extract the main content
@ -686,10 +685,10 @@ async def process_archive(
markdown_path.parent.mkdir(parents=True, exist_ok=True) markdown_path.parent.mkdir(parents=True, exist_ok=True)
with open(markdown_path, 'w', encoding=encoding) as md_file: with open(markdown_path, 'w', encoding=encoding) as md_file:
md_file.write(markdown_content) md_file.write(markdown_content)
L.DEBUG(f"Successfully saved to {markdown_path}") logger.debug(f"Successfully saved to {markdown_path}")
return markdown_path return markdown_path
except Exception as e: except Exception as e:
L.WARN(f"Failed to write markdown file: {str(e)}") logger.warn(f"Failed to write markdown file: {str(e)}")
return None return None
def download_file(url, folder): def download_file(url, folder):
@ -713,13 +712,13 @@ def download_file(url, folder):
with open(filepath, 'wb') as f: with open(filepath, 'wb') as f:
f.write(response.content) f.write(response.content)
else: else:
L.ERR(f"Failed to download image: {url}, invalid content type: {response.headers.get('Content-Type')}") logger.error(f"Failed to download image: {url}, invalid content type: {response.headers.get('Content-Type')}")
return None return None
else: else:
L.ERR(f"Failed to download image: {url}, status code: {response.status_code}") logger.error(f"Failed to download image: {url}, status code: {response.status_code}")
return None return None
except Exception as e: except Exception as e:
L.ERR(f"Failed to download image: {url}, error: {str(e)}") logger.error(f"Failed to download image: {url}, error: {str(e)}")
return None return None
return filename return filename

View file

@ -21,6 +21,7 @@ from sijapi.classes import Location
note = APIRouter() note = APIRouter()
logger = L.get_module_logger("note")
def list_and_correct_impermissible_files(root_dir, rename: bool = False): def list_and_correct_impermissible_files(root_dir, rename: bool = False):
"""List and correct all files with impermissible names.""" """List and correct all files with impermissible names."""
@ -30,7 +31,7 @@ def list_and_correct_impermissible_files(root_dir, rename: bool = False):
if check_file_name(filename): if check_file_name(filename):
file_path = Path(dirpath) / filename file_path = Path(dirpath) / filename
impermissible_files.append(file_path) impermissible_files.append(file_path)
L.DEBUG(f"Impermissible file found: {file_path}") logger.debug(f"Impermissible file found: {file_path}")
# Sanitize the file name # Sanitize the file name
new_filename = sanitize_filename(filename) new_filename = sanitize_filename(filename)
@ -48,7 +49,7 @@ def list_and_correct_impermissible_files(root_dir, rename: bool = False):
# Rename the file # Rename the file
if rename: if rename:
os.rename(file_path, new_file_path) os.rename(file_path, new_file_path)
L.DEBUG(f"Renamed: {file_path} -> {new_file_path}") logger.debug(f"Renamed: {file_path} -> {new_file_path}")
return impermissible_files return impermissible_files
@ -90,7 +91,7 @@ async def build_daily_note_endpoint(
else: else:
raise ValueError("Location is not provided or invalid.") raise ValueError("Location is not provided or invalid.")
except (ValueError, AttributeError, TypeError) as e: except (ValueError, AttributeError, TypeError) as e:
L.WARN(f"Falling back to localized datetime due to error: {e}") logger.warn(f"Falling back to localized datetime due to error: {e}")
try: try:
date_time = await loc.dt(date_str) date_time = await loc.dt(date_str)
places = await loc.fetch_locations(date_time) places = await loc.fetch_locations(date_time)
@ -110,7 +111,7 @@ async def build_daily_note(date_time: dt_datetime, lat: float = None, lon: float
Obsidian helper. Takes a datetime and creates a new daily note. Note: it uses the sijapi configuration file to place the daily note and does NOT presently interface with Obsidian's daily note or periodic notes extensions. It is your responsibility to ensure they match. Obsidian helper. Takes a datetime and creates a new daily note. Note: it uses the sijapi configuration file to place the daily note and does NOT presently interface with Obsidian's daily note or periodic notes extensions. It is your responsibility to ensure they match.
''' '''
absolute_path, _ = assemble_journal_path(date_time) absolute_path, _ = assemble_journal_path(date_time)
L.WARN(f"Using {date_time.strftime('%Y-%m-%d %H:%M:%S')} as our datetime in build_daily_note.") logger.warn(f"Using {date_time.strftime('%Y-%m-%d %H:%M:%S')} as our datetime in build_daily_note.")
formatted_day = date_time.strftime("%A %B %d, %Y") # Monday May 27, 2024 formatting formatted_day = date_time.strftime("%A %B %d, %Y") # Monday May 27, 2024 formatting
day_before = (date_time - timedelta(days=1)).strftime("%Y-%m-%d %A") # 2024-05-26 Sunday formatting day_before = (date_time - timedelta(days=1)).strftime("%Y-%m-%d %A") # 2024-05-26 Sunday formatting
day_after = (date_time + timedelta(days=1)).strftime("%Y-%m-%d %A") # 2024-05-28 Tuesday formatting day_after = (date_time + timedelta(days=1)).strftime("%Y-%m-%d %A") # 2024-05-28 Tuesday formatting
@ -201,7 +202,7 @@ async def update_frontmatter(date_time: dt_datetime, key: str, value: str):
# Check if the file exists # Check if the file exists
if not file_path.exists(): if not file_path.exists():
L.CRIT(f"Markdown file not found at {file_path}") logger.critical(f"Markdown file not found at {file_path}")
raise HTTPException(status_code=404, detail="Markdown file not found.") raise HTTPException(status_code=404, detail="Markdown file not found.")
# Read the file # Read the file
@ -248,20 +249,20 @@ async def banner_endpoint(dt: str, location: str = None, forecast: str = None, m
''' '''
Endpoint (POST) that generates a new banner image for the Obsidian daily note for a specified date, taking into account optional additional information, then updates the frontmatter if necessary. Endpoint (POST) that generates a new banner image for the Obsidian daily note for a specified date, taking into account optional additional information, then updates the frontmatter if necessary.
''' '''
L.DEBUG(f"banner_endpoint requested with date: {dt} ({type(dt)})") logger.debug(f"banner_endpoint requested with date: {dt} ({type(dt)})")
date_time = await loc.dt(dt) date_time = await loc.dt(dt)
L.DEBUG(f"date_time after localization: {date_time} ({type(date_time)})") logger.debug(f"date_time after localization: {date_time} ({type(date_time)})")
context = await generate_context(dt, location, forecast, mood, other_context) context = await generate_context(dt, location, forecast, mood, other_context)
jpg_path = await generate_banner(date_time, location, mood=mood, other_context=other_context) jpg_path = await generate_banner(date_time, location, mood=mood, other_context=other_context)
return jpg_path return jpg_path
async def generate_banner(dt, location: Location = None, forecast: str = None, mood: str = None, other_context: str = None): async def generate_banner(dt, location: Location = None, forecast: str = None, mood: str = None, other_context: str = None):
# L.DEBUG(f"Location: {location}, forecast: {forecast}, mood: {mood}, other_context: {other_context}") # logger.debug(f"Location: {location}, forecast: {forecast}, mood: {mood}, other_context: {other_context}")
date_time = await loc.dt(dt) date_time = await loc.dt(dt)
L.DEBUG(f"generate_banner called with date_time: {date_time}") logger.debug(f"generate_banner called with date_time: {date_time}")
destination_path, local_path = assemble_journal_path(date_time, filename="Banner", extension=".jpg", no_timestamp = True) destination_path, local_path = assemble_journal_path(date_time, filename="Banner", extension=".jpg", no_timestamp = True)
L.DEBUG(f"destination path generated: {destination_path}") logger.debug(f"destination path generated: {destination_path}")
if not location or not isinstance(location, Location): if not location or not isinstance(location, Location):
locations = await loc.fetch_locations(date_time) locations = await loc.fetch_locations(date_time)
if locations: if locations:
@ -270,10 +271,10 @@ async def generate_banner(dt, location: Location = None, forecast: str = None, m
forecast = await update_dn_weather(date_time, False, location.latitude, location.longitude) forecast = await update_dn_weather(date_time, False, location.latitude, location.longitude)
prompt = await generate_context(date_time, location, forecast, mood, other_context) prompt = await generate_context(date_time, location, forecast, mood, other_context)
L.DEBUG(f"Prompt: {prompt}") logger.debug(f"Prompt: {prompt}")
final_path = await img.workflow(prompt, scene=OBSIDIAN_BANNER_SCENE, destination_path=destination_path) final_path = await img.workflow(prompt, scene=OBSIDIAN_BANNER_SCENE, destination_path=destination_path)
if not str(local_path) in str(final_path): if not str(local_path) in str(final_path):
L.INFO(f"Apparent mismatch between local path, {local_path}, and final_path, {final_path}") logger.info(f"Apparent mismatch between local path, {local_path}, and final_path, {final_path}")
jpg_embed = f"\"![[{local_path}]]\"" jpg_embed = f"\"![[{local_path}]]\""
await update_frontmatter(date_time, "banner", jpg_embed) await update_frontmatter(date_time, "banner", jpg_embed)
return local_path return local_path
@ -301,7 +302,7 @@ async def generate_context(date_time, location: Location, forecast: str, mood: s
if geocoded_location.display_name or geocoded_location.city or geocoded_location.country: if geocoded_location.display_name or geocoded_location.city or geocoded_location.country:
return await generate_context(date_time, geocoded_location, forecast, mood, other_context) return await generate_context(date_time, geocoded_location, forecast, mood, other_context)
else: else:
L.WARN(f"Failed to get a useable location for purposes of generating a banner, but we'll generate one anyway.") logger.warn(f"Failed to get a useable location for purposes of generating a banner, but we'll generate one anyway.")
elif location and isinstance(location, str): elif location and isinstance(location, str):
display_name = f"Location: {location}\n" display_name = f"Location: {location}\n"
else: else:
@ -369,8 +370,8 @@ async def note_weather_get(
force_refresh_weather = refresh == "True" force_refresh_weather = refresh == "True"
try: try:
date_time = dt_datetime.now() if date == "0" else await loc.dt(date) date_time = dt_datetime.now() if date == "0" else await loc.dt(date)
L.WARN(f"Using {date_time.strftime('%Y-%m-%d %H:%M:%S')} as our dt_datetime in note_weather_get.") logger.warn(f"Using {date_time.strftime('%Y-%m-%d %H:%M:%S')} as our dt_datetime in note_weather_get.")
L.DEBUG(f"date: {date} .. date_time: {date_time}") logger.debug(f"date: {date} .. date_time: {date_time}")
content = await update_dn_weather(date_time, force_refresh_weather) #, lat, lon) content = await update_dn_weather(date_time, force_refresh_weather) #, lat, lon)
return JSONResponse(content={"forecast": content}, status_code=200) return JSONResponse(content={"forecast": content}, status_code=200)
@ -378,14 +379,14 @@ async def note_weather_get(
return JSONResponse(content={"detail": str(e.detail)}, status_code=e.status_code) return JSONResponse(content={"detail": str(e.detail)}, status_code=e.status_code)
except Exception as e: except Exception as e:
L.ERR(f"Error in note_weather_get: {str(e)}") logger.error(f"Error in note_weather_get: {str(e)}")
raise HTTPException(status_code=500, detail=f"An error occurred: {str(e)}") raise HTTPException(status_code=500, detail=f"An error occurred: {str(e)}")
@note.post("/update/note/{date}") @note.post("/update/note/{date}")
async def post_update_daily_weather_and_calendar_and_timeslips(date: str, refresh: str="False") -> PlainTextResponse: async def post_update_daily_weather_and_calendar_and_timeslips(date: str, refresh: str="False") -> PlainTextResponse:
date_time = await loc.dt(date) date_time = await loc.dt(date)
L.WARN(f"Using {date_time.strftime('%Y-%m-%d %H:%M:%S')} as our dt_datetime in post_update_daily_weather_and_calendar_and_timeslips.") logger.warn(f"Using {date_time.strftime('%Y-%m-%d %H:%M:%S')} as our dt_datetime in post_update_daily_weather_and_calendar_and_timeslips.")
force_refresh_weather = refresh == "True" force_refresh_weather = refresh == "True"
await update_dn_weather(date_time, force_refresh_weather) await update_dn_weather(date_time, force_refresh_weather)
await update_daily_note_events(date_time) await update_daily_note_events(date_time)
@ -393,52 +394,52 @@ async def post_update_daily_weather_and_calendar_and_timeslips(date: str, refres
return f"[Refresh]({API.URL}/update/note/{date_time.strftime('%Y-%m-%d')}" return f"[Refresh]({API.URL}/update/note/{date_time.strftime('%Y-%m-%d')}"
async def update_dn_weather(date_time: dt_datetime, force_refresh: bool = False, lat: float = None, lon: float = None): async def update_dn_weather(date_time: dt_datetime, force_refresh: bool = False, lat: float = None, lon: float = None):
L.WARN(f"Using {date_time.strftime('%Y-%m-%d %H:%M:%S')} as our datetime in update_dn_weather.") logger.warn(f"Using {date_time.strftime('%Y-%m-%d %H:%M:%S')} as our datetime in update_dn_weather.")
try: try:
if lat and lon: if lat and lon:
place = await GEO.code((lat, lon)) place = await GEO.code((lat, lon))
else: else:
L.DEBUG(f"Updating weather for {date_time}") logger.debug(f"Updating weather for {date_time}")
places = await loc.fetch_locations(date_time) places = await loc.fetch_locations(date_time)
place = places[0] place = places[0]
lat = place.latitude lat = place.latitude
lon = place.longitude lon = place.longitude
tz = await GEO.tz_at(lat, lon)
L.DEBUG(f"lat: {lat}, lon: {lon}, place: {place}") logger.debug(f"lat: {lat}, lon: {lon}, place: {place}")
city = GEO.find_override_location(lat, lon) city = GEO.find_override_location(lat, lon)
if city: if city:
L.INFO(f"Using override location: {city}") logger.info(f"Using override location: {city}")
else: else:
if place.city and place.city != "": if place.city and place.city != "":
city = place.city city = place.city
L.INFO(f"City in data: {city}") logger.info(f"City in data: {city}")
else: else:
location = await GEO.code((lat, lon)) location = await GEO.code((lat, lon))
L.DEBUG(f"location: {location}") logger.debug(f"location: {location}")
city = location.name city = location.name
city = city if city else location.city city = city if city else location.city
city = city if city else location.house_number + ' ' + location.road city = city if city else location.house_number + ' ' + location.road
L.DEBUG(f"City geocoded: {city}") logger.debug(f"City geocoded: {city}")
# Assemble journal path # Assemble journal path
absolute_path, relative_path = assemble_journal_path(date_time, filename="Weather", extension=".md", no_timestamp = True) absolute_path, relative_path = assemble_journal_path(date_time, filename="Weather", extension=".md", no_timestamp = True)
L.DEBUG(f"Journal path: absolute_path={absolute_path}, relative_path={relative_path}") logger.debug(f"Journal path: absolute_path={absolute_path}, relative_path={relative_path}")
try: try:
L.DEBUG(f"passing date_time {date_time.strftime('%Y-%m-%d %H:%M:%S')}, {lat}/{lon} into get_weather") logger.debug(f"passing date_time {date_time.strftime('%Y-%m-%d %H:%M:%S')}, {lat}/{lon} into get_weather")
day = await weather.get_weather(date_time, lat, lon, force_refresh) day = await weather.get_weather(date_time, lat, lon, force_refresh)
L.DEBUG(f"day information obtained from get_weather: {day}") logger.debug(f"day information obtained from get_weather: {day}")
if day: if day:
DailyWeather = day.get('DailyWeather') DailyWeather = day.get('DailyWeather')
HourlyWeather = day.get('HourlyWeather') HourlyWeather = day.get('HourlyWeather')
if DailyWeather: if DailyWeather:
# L.DEBUG(f"Day: {DailyWeather}") # logger.debug(f"Day: {DailyWeather}")
icon = DailyWeather.get('icon') icon = DailyWeather.get('icon')
L.DEBUG(f"Icon: {icon}") logger.debug(f"Icon: {icon}")
weather_icon, admonition = get_icon_and_admonition(icon) if icon else (":LiSunMoon:", "ad-weather") weather_icon, admonition = get_icon_and_admonition(icon) if icon else (":LiSunMoon:", "ad-weather")
@ -460,8 +461,8 @@ async def update_dn_weather(date_time: dt_datetime, force_refresh: bool = False,
uvindex = DailyWeather.get('uvindex', 0) uvindex = DailyWeather.get('uvindex', 0)
uvwarn = f" - :LiRadiation: Caution! UVI today is {uvindex}! :LiRadiation:\n" if (uvindex and uvindex > 8) else "" uvwarn = f" - :LiRadiation: Caution! UVI today is {uvindex}! :LiRadiation:\n" if (uvindex and uvindex > 8) else ""
sunrise = await loc.dt(DailyWeather.get('sunrise'), tz) sunrise = DailyWeather.get('sunrise')
sunset = await loc.dt(DailyWeather.get('sunset'), tz) sunset = DailyWeather.get('sunset')
srise_str = sunrise.time().strftime("%H:%M") srise_str = sunrise.time().strftime("%H:%M")
sset_str = sunset.time().strftime("%H:%M") sset_str = sunset.time().strftime("%H:%M")
@ -494,47 +495,51 @@ async def update_dn_weather(date_time: dt_datetime, force_refresh: bool = False,
times, condition_symbols, temps, winds = [], [], [], [] times, condition_symbols, temps, winds = [], [], [], []
for hour in HourlyWeather: for hour in HourlyWeather:
hour['datetime'] = await loc.dt(hour.get('datetime'), tz)
if hour.get('datetime').strftime("%H:%M:%S") in HOURLY_COLUMNS_MAPPING.values(): if hour.get('datetime').strftime("%H:%M:%S") in HOURLY_COLUMNS_MAPPING.values():
times.append(format_hourly_time(hour)) times.append(format_hourly_time(hour))
condition_symbols.append(format_hourly_icon(hour, sunrise, sunset)) condition_symbols.append(format_hourly_icon(hour, sunrise, sunset))
temps.append(format_hourly_temperature(hour)) temps.append(format_hourly_temperature(hour))
winds.append(format_hourly_wind(hour)) winds.append(format_hourly_wind(hour))
detailed_forecast += assemble_hourly_data_table(times, condition_symbols, temps, winds) detailed_forecast += assemble_hourly_data_table(times, condition_symbols, temps, winds)
detailed_forecast += f"```\n\n" detailed_forecast += f"```\n\n"
L.DEBUG(f"Detailed forecast: {detailed_forecast}.") logger.debug(f"Detailed forecast: {detailed_forecast}.")
with open(absolute_path, 'w', encoding='utf-8') as note_file: with open(absolute_path, 'w', encoding='utf-8') as note_file:
note_file.write(detailed_forecast) note_file.write(detailed_forecast)
L.DEBUG(f"Operation complete.") logger.debug(f"Operation complete.")
return narrative return narrative
else: else:
L.ERR(f"Failed to get DailyWeather from day: {day}") logger.error(f"Failed to get DailyWeather from day: {day}")
else: else:
L.ERR(f"Failed to get day") logger.error(f"Failed to get day")
raise HTTPException(status_code=500, detail="Failed to retrieve weather data") raise HTTPException(status_code=500, detail="Failed to retrieve weather data")
except HTTPException as e: except HTTPException as e:
L.ERR(f"HTTP error: {e}") logger.error(f"HTTP error: {e}")
L.ERR(traceback.format_exc()) logger.error(traceback.format_exc())
raise e raise e
except Exception as e: except Exception as e:
L.ERR(f"Error: {e}") logger.error(f"Error: {e}")
L.ERR(traceback.format_exc()) logger.error(traceback.format_exc())
raise HTTPException(status_code=999, detail=f"Error: {e}") raise HTTPException(status_code=999, detail=f"Error: {e}")
except ValueError as ve: except ValueError as ve:
L.ERR(f"Value error in update_dn_weather: {str(ve)}") logger.error(f"Value error in update_dn_weather: {str(ve)}")
L.ERR(traceback.format_exc()) logger.error(traceback.format_exc())
raise HTTPException(status_code=400, detail=f"Value error: {str(ve)}") raise HTTPException(status_code=400, detail=f"Value error: {str(ve)}")
except Exception as e: except Exception as e:
L.ERR(f"Error in update_dn_weather: {str(e)}") logger.error(f"Error in update_dn_weather: {str(e)}")
L.ERR(traceback.format_exc()) logger.error(traceback.format_exc())
raise HTTPException(status_code=500, detail=f"Error in update_dn_weather: {str(e)}") raise HTTPException(status_code=500, detail=f"Error in update_dn_weather: {str(e)}")
def format_hourly_time(hour): def format_hourly_time(hour):
@ -542,8 +547,8 @@ def format_hourly_time(hour):
hour_12 = convert_to_12_hour_format(hour.get("datetime")) hour_12 = convert_to_12_hour_format(hour.get("datetime"))
return hour_12 return hour_12
except Exception as e: except Exception as e:
L.ERR(f"Error in format_hourly_time: {str(e)}") logger.error(f"Error in format_hourly_time: {str(e)}")
L.ERR(traceback.format_exc()) logger.error(traceback.format_exc())
return "" return ""
def format_hourly_icon(hour, sunrise, sunset): def format_hourly_icon(hour, sunrise, sunset):
@ -553,7 +558,7 @@ def format_hourly_icon(hour, sunrise, sunset):
precip = hour.get('precip', float(0.0)) precip = hour.get('precip', float(0.0))
precip_prob = hour.get('precipprob', float(0.0)) precip_prob = hour.get('precipprob', float(0.0))
L.DEBUG(f"precip: {precip}, prob: {precip_prob}") logger.debug(f"precip: {precip}, prob: {precip_prob}")
sp_str = None sp_str = None
@ -577,8 +582,8 @@ def format_hourly_icon(hour, sunrise, sunset):
return formatted return formatted
except Exception as e: except Exception as e:
L.ERR(f"Error in format_hourly_special: {str(e)}") logger.error(f"Error in format_hourly_special: {str(e)}")
L.ERR(traceback.format_exc()) logger.error(traceback.format_exc())
return "" return ""
def format_hourly_temperature(hour): def format_hourly_temperature(hour):
@ -586,8 +591,8 @@ def format_hourly_temperature(hour):
temp_str = f"{hour.get('temp', '')}˚ F" temp_str = f"{hour.get('temp', '')}˚ F"
return temp_str return temp_str
except Exception as e: except Exception as e:
L.ERR(f"Error in format_hourly_temperature: {str(e)}") logger.error(f"Error in format_hourly_temperature: {str(e)}")
L.ERR(traceback.format_exc()) logger.error(traceback.format_exc())
return "" return ""
def format_hourly_wind(hour): def format_hourly_wind(hour):
@ -597,8 +602,8 @@ def format_hourly_wind(hour):
wind_str = f"{str(windspeed)}:LiWind: {winddir}" wind_str = f"{str(windspeed)}:LiWind: {winddir}"
return wind_str return wind_str
except Exception as e: except Exception as e:
L.ERR(f"Error in format_hourly_wind: {str(e)}") logger.error(f"Error in format_hourly_wind: {str(e)}")
L.ERR(traceback.format_exc()) logger.error(traceback.format_exc())
return "" return ""
def assemble_hourly_data_table(times, condition_symbols, temps, winds): def assemble_hourly_data_table(times, condition_symbols, temps, winds):
@ -611,7 +616,7 @@ def assemble_hourly_data_table(times, condition_symbols, temps, winds):
def get_icon_and_admonition(icon_str) -> Tuple: def get_icon_and_admonition(icon_str) -> Tuple:
L.DEBUG(f"Received request for emoji {icon_str}") logger.debug(f"Received request for emoji {icon_str}")
if icon_str.startswith(":") and icon_str.endswith(":"): if icon_str.startswith(":") and icon_str.endswith(":"):
return icon_str return icon_str
@ -712,7 +717,7 @@ async def format_events_as_markdown(event_data: Dict[str, Union[str, List[Dict[s
total_events = len(event_data["events"]) total_events = len(event_data["events"])
event_markdown = f"```ad-events" event_markdown = f"```ad-events"
for event in event_data["events"]: for event in event_data["events"]:
L.DEBUG(f"event busy status: {event['busy']}; all day status: {event['all_day']}") logger.debug(f"event busy status: {event['busy']}; all day status: {event['all_day']}")
if not event['name'].startswith('TC '): if not event['name'].startswith('TC '):
url = f"hook://ical/eventID={event['uid']}calendarID=17" url = f"hook://ical/eventID={event['uid']}calendarID=17"
if event['url']: if event['url']:
@ -790,18 +795,18 @@ async def note_events_endpoint(date: str = Query(None)):
return PlainTextResponse(content=response, status_code=200) return PlainTextResponse(content=response, status_code=200)
async def update_daily_note_events(date_time: dt_datetime): async def update_daily_note_events(date_time: dt_datetime):
L.DEBUG(f"Looking up events on date: {date_time.strftime('%Y-%m-%d')}") logger.debug(f"Looking up events on date: {date_time.strftime('%Y-%m-%d')}")
try: try:
events = await cal.get_events(date_time, date_time) events = await cal.get_events(date_time, date_time)
L.DEBUG(f"Raw events: {events}") logger.debug(f"Raw events: {events}")
event_data = { event_data = {
"date": date_time.strftime('%Y-%m-%d'), "date": date_time.strftime('%Y-%m-%d'),
"events": events "events": events
} }
events_markdown = await format_events_as_markdown(event_data) events_markdown = await format_events_as_markdown(event_data)
L.DEBUG(f"Markdown events: {events_markdown}") logger.debug(f"Markdown events: {events_markdown}")
absolute_path, _ = assemble_journal_path(date_time, filename="Events", extension=".md", no_timestamp = True) absolute_path, _ = assemble_journal_path(date_time, filename="Events", extension=".md", no_timestamp = True)
L.DEBUG(f"Writing events to file: {absolute_path}") logger.debug(f"Writing events to file: {absolute_path}")
with open(absolute_path, 'w', encoding='utf-8') as note_file: with open(absolute_path, 'w', encoding='utf-8') as note_file:
note_file.write(events_markdown) note_file.write(events_markdown)
@ -809,7 +814,7 @@ async def update_daily_note_events(date_time: dt_datetime):
return events_markdown return events_markdown
except Exception as e: except Exception as e:
L.ERR(f"Error processing events: {e}") logger.error(f"Error processing events: {e}")
raise HTTPException(status_code=500, detail=str(e)) raise HTTPException(status_code=500, detail=str(e))

View file

@ -4,8 +4,10 @@ NOTES: Haven't yet decided if this should depend on the Obsidian and Chat module
''' '''
from fastapi import APIRouter from fastapi import APIRouter
from sijapi import L
rag = APIRouter() rag = APIRouter()
logger = L.get_module_logger("rag")
rag.get("/rag/search") rag.get("/rag/search")
async def rag_search_endpoint(query: str, scope: str): async def rag_search_endpoint(query: str, scope: str):

View file

@ -37,6 +37,7 @@ from sijapi.routers import loc, note
serve = APIRouter(tags=["public"]) serve = APIRouter(tags=["public"])
logger = L.get_module_logger("serve")
@serve.get("/pgp") @serve.get("/pgp")
async def get_pgp(): async def get_pgp():
@ -72,13 +73,13 @@ async def get_file_endpoint(file_path: str):
date_time = await loc.dt(file_path); date_time = await loc.dt(file_path);
absolute_path, local_path = assemble_journal_path(date_time, no_timestamp = True) absolute_path, local_path = assemble_journal_path(date_time, no_timestamp = True)
except ValueError as e: except ValueError as e:
L.DEBUG(f"Unable to parse {file_path} as a date, now trying to use it as a local path") logger.debug(f"Unable to parse {file_path} as a date, now trying to use it as a local path")
absolute_path = OBSIDIAN_VAULT_DIR / file_path absolute_path = OBSIDIAN_VAULT_DIR / file_path
if not absolute_path.suffix: if not absolute_path.suffix:
absolute_path = Path(absolute_path.with_suffix(".md")) absolute_path = Path(absolute_path.with_suffix(".md"))
if not absolute_path.is_file(): if not absolute_path.is_file():
L.WARN(f"{absolute_path} is not a valid file it seems.") logger.warn(f"{absolute_path} is not a valid file it seems.")
elif absolute_path.suffix == '.md': elif absolute_path.suffix == '.md':
try: try:
with open(absolute_path, 'r', encoding='utf-8') as file: with open(absolute_path, 'r', encoding='utf-8') as file:
@ -138,7 +139,7 @@ async def hook_changedetection(webhook_data: dict):
@serve.post("/cl/search") @serve.post("/cl/search")
async def hook_cl_search(request: Request, bg_tasks: BackgroundTasks): async def hook_cl_search(request: Request, bg_tasks: BackgroundTasks):
client_ip = request.client.host client_ip = request.client.host
L.DEBUG(f"Received request from IP: {client_ip}") logger.debug(f"Received request from IP: {client_ip}")
data = await request.json() data = await request.json()
payload = data['payload'] payload = data['payload']
results = data['payload']['results'] results = data['payload']['results']
@ -156,7 +157,7 @@ async def hook_cl_search(request: Request, bg_tasks: BackgroundTasks):
@serve.post("/cl/docket") @serve.post("/cl/docket")
async def hook_cl_docket(request: Request): async def hook_cl_docket(request: Request):
client_ip = request.client.host client_ip = request.client.host
L.DEBUG(f"Received request from IP: {client_ip}") logger.debug(f"Received request from IP: {client_ip}")
data = await request.json() data = await request.json()
await cl_docket(data, client_ip) await cl_docket(data, client_ip)
@ -313,14 +314,14 @@ async def cl_docket_process_result(result, session):
async with aiohttp.ClientSession() as session: async with aiohttp.ClientSession() as session:
async with session.get(url, headers=headers) as response: async with session.get(url, headers=headers) as response:
if response.status == 200: if response.status == 200:
L.DEBUG(f"Fetching CourtListener docket information for {docket}...") logger.debug(f"Fetching CourtListener docket information for {docket}...")
data = await response.json() data = await response.json()
court_docket = data['results'][0]['docket_number_core'] court_docket = data['results'][0]['docket_number_core']
court_docket = f"{court_docket[:2]}-cv-{court_docket[2:]}" # Formatting the docket number court_docket = f"{court_docket[:2]}-cv-{court_docket[2:]}" # Formatting the docket number
case_name = data['results'][0]['case_name'] case_name = data['results'][0]['case_name']
L.DEBUG(f"Obtained from CourtListener: docket {court_docket}, case name {case_name}.") logger.debug(f"Obtained from CourtListener: docket {court_docket}, case name {case_name}.")
else: else:
L.DEBUG("Failed to fetch data from CourtListener API.") logger.debug("Failed to fetch data from CourtListener API.")
court_docket = 'NoCourtDocket' court_docket = 'NoCourtDocket'
case_name = 'NoCaseName' case_name = 'NoCaseName'
@ -330,12 +331,12 @@ async def cl_docket_process_result(result, session):
if filepath_ia: if filepath_ia:
file_url = filepath_ia file_url = filepath_ia
L.DEBUG(f"Found IA file at {file_url}.") logger.debug(f"Found IA file at {file_url}.")
elif filepath_local: elif filepath_local:
file_url = f"{COURTLISTENER_BASE_URL}/{filepath_local}" file_url = f"{COURTLISTENER_BASE_URL}/{filepath_local}"
L.DEBUG(f"Found local file at {file_url}.") logger.debug(f"Found local file at {file_url}.")
else: else:
L.DEBUG(f"No file URL found in filepath_ia or filepath_local for one of the documents.") logger.debug(f"No file URL found in filepath_ia or filepath_local for one of the documents.")
continue continue
document_number = document.get('document_number', 'NoDocumentNumber') document_number = document.get('document_number', 'NoDocumentNumber')
@ -346,7 +347,7 @@ async def cl_docket_process_result(result, session):
target_path = Path(COURTLISTENER_DOCKETS_DIR) / case_shortname / "Docket" / file_name target_path = Path(COURTLISTENER_DOCKETS_DIR) / case_shortname / "Docket" / file_name
target_path.parent.mkdir(parents=True, exist_ok=True) target_path.parent.mkdir(parents=True, exist_ok=True)
await cl_download_file(file_url, target_path, session) await cl_download_file(file_url, target_path, session)
L.DEBUG(f"Downloaded {file_name} to {target_path}") logger.debug(f"Downloaded {file_name} to {target_path}")
def cl_case_details(docket): def cl_case_details(docket):
case_info = CASETABLE.get(str(docket), {"code": "000", "shortname": "UNKNOWN"}) case_info = CASETABLE.get(str(docket), {"code": "000", "shortname": "UNKNOWN"})
@ -359,18 +360,18 @@ async def cl_download_file(url: str, path: Path, session: aiohttp.ClientSession
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/93.0.4577.82 Safari/537.36' 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/93.0.4577.82 Safari/537.36'
} }
async with aiohttp.ClientSession() as session: async with aiohttp.ClientSession() as session:
L.DEBUG(f"Attempting to download {url} to {path}.") logger.debug(f"Attempting to download {url} to {path}.")
try: try:
async with session.get(url, headers=headers, allow_redirects=True) as response: async with session.get(url, headers=headers, allow_redirects=True) as response:
if response.status == 403: if response.status == 403:
L.ERR(f"Access denied (403 Forbidden) for URL: {url}. Skipping download.") logger.error(f"Access denied (403 Forbidden) for URL: {url}. Skipping download.")
return return
response.raise_for_status() response.raise_for_status()
# Check if the response content type is a PDF # Check if the response content type is a PDF
content_type = response.headers.get('Content-Type') content_type = response.headers.get('Content-Type')
if content_type != 'application/pdf': if content_type != 'application/pdf':
L.ERR(f"Invalid content type: {content_type}. Skipping download.") logger.error(f"Invalid content type: {content_type}. Skipping download.")
return return
# Create an in-memory buffer to store the downloaded content # Create an in-memory buffer to store the downloaded content
@ -385,7 +386,7 @@ async def cl_download_file(url: str, path: Path, session: aiohttp.ClientSession
try: try:
PdfReader(buffer) PdfReader(buffer)
except Exception as e: except Exception as e:
L.ERR(f"Invalid PDF content: {str(e)}. Skipping download.") logger.error(f"Invalid PDF content: {str(e)}. Skipping download.")
return return
# If the PDF is valid, write the content to the file on disk # If the PDF is valid, write the content to the file on disk
@ -394,7 +395,7 @@ async def cl_download_file(url: str, path: Path, session: aiohttp.ClientSession
file.write(buffer.getvalue()) file.write(buffer.getvalue())
except Exception as e: except Exception as e:
L.ERR(f"Error downloading file: {str(e)}") logger.error(f"Error downloading file: {str(e)}")
async def cl_search_process_result(result): async def cl_search_process_result(result):
@ -403,7 +404,7 @@ async def cl_search_process_result(result):
court_id = result.get('court_id') court_id = result.get('court_id')
case_name_short = result.get('caseNameShort') case_name_short = result.get('caseNameShort')
case_name = result.get('caseName') case_name = result.get('caseName')
L.DEBUG(f"Received payload for case {case_name} ({court_id}) and download url {download_url}") logger.debug(f"Received payload for case {case_name} ({court_id}) and download url {download_url}")
court_folder = court_id court_folder = court_id
@ -417,4 +418,4 @@ async def cl_search_process_result(result):
target_path.parent.mkdir(parents=True, exist_ok=True) target_path.parent.mkdir(parents=True, exist_ok=True)
await cl_download_file(download_url, target_path, session) await cl_download_file(download_url, target_path, session)
L.DEBUG(f"Downloaded {file_name} to {target_path}") logger.debug(f"Downloaded {file_name} to {target_path}")

View file

@ -2,9 +2,12 @@
Signal Bot example, repeats received messages. Signal Bot example, repeats received messages.
""" """
import os import os
from fastapi import APIRouter
from semaphore import Bot, ChatContext from semaphore import Bot, ChatContext
from sijapi import L
signal = APIRouter()
logger = L.get_module_logger("signal")
async def echo(ctx: ChatContext) -> None: async def echo(ctx: ChatContext) -> None:
if not ctx.message.empty(): if not ctx.message.empty():

View file

@ -28,14 +28,9 @@ from traceback import format_exc
from sijapi import L, TIMING_API_KEY, TIMING_API_URL from sijapi import L, TIMING_API_KEY, TIMING_API_URL
from sijapi.routers import loc from sijapi.routers import loc
### INITIALIZATIONS ###
time = APIRouter(tags=["private"]) time = APIRouter(tags=["private"])
logger = L.get_module_logger("time")
########################
#### INITIALIZATION ####
########################
script_directory = os.path.dirname(os.path.abspath(__file__)) script_directory = os.path.dirname(os.path.abspath(__file__))
# Configuration constants # Configuration constants
@ -63,17 +58,17 @@ async def post_time_entry_to_timing(entry: Dict):
'Accept': 'application/json', 'Accept': 'application/json',
'X-Time-Zone': 'America/Los_Angeles' 'X-Time-Zone': 'America/Los_Angeles'
} }
L.DEBUG(f"Received entry: {entry}") logger.debug(f"Received entry: {entry}")
response = None # Initialize response response = None # Initialize response
try: try:
async with httpx.AsyncClient() as client: async with httpx.AsyncClient() as client:
response = await client.post(url, headers=headers, json=entry) response = await client.post(url, headers=headers, json=entry)
response.raise_for_status() # This will only raise for 4xx and 5xx responses response.raise_for_status() # This will only raise for 4xx and 5xx responses
except httpx.HTTPStatusError as exc: except httpx.HTTPStatusError as exc:
L.DEBUG(f"HTTPStatusError caught: Status code: {exc.response.status_code}, Detail: {exc.response.text}") logger.debug(f"HTTPStatusError caught: Status code: {exc.response.status_code}, Detail: {exc.response.text}")
raise HTTPException(status_code=exc.response.status_code, detail=str(exc.response.text)) raise HTTPException(status_code=exc.response.status_code, detail=str(exc.response.text))
except Exception as exc: except Exception as exc:
L.DEBUG(f"General exception caught: {exc}") logger.debug(f"General exception caught: {exc}")
raise HTTPException(status_code=500, detail="An unexpected error occurred") raise HTTPException(status_code=500, detail="An unexpected error occurred")
if response: if response:

View file

@ -12,7 +12,7 @@ import asyncio
from pydantic import BaseModel from pydantic import BaseModel
from typing import Optional, Union, List from typing import Optional, Union, List
from pydub import AudioSegment from pydub import AudioSegment
from TTS.api import TTS as XTTSv2 from TTS.api import TTS
from pathlib import Path from pathlib import Path
from datetime import datetime as dt_datetime from datetime import datetime as dt_datetime
from time import time from time import time
@ -25,32 +25,29 @@ import tempfile
import random import random
import re import re
import os import os
from sijapi import L, Dir, API, TTS from sijapi import L, DEFAULT_VOICE, TTS_SEGMENTS_DIR, VOICE_DIR, PODCAST_DIR, TTS_OUTPUT_DIR, ELEVENLABS_API_KEY
from sijapi.utilities import sanitize_filename from sijapi.utilities import sanitize_filename
### INITIALIZATIONS ### ### INITIALIZATIONS ###
tts = APIRouter(tags=["trusted", "private"]) tts = APIRouter(tags=["trusted", "private"])
logger = L.get_module_logger("tts")
DEVICE = torch.device('cpu') DEVICE = torch.device('cpu')
MODEL_NAME = "tts_models/multilingual/multi-dataset/xtts_v2" MODEL_NAME = "tts_models/multilingual/multi-dataset/xtts_v2"
@tts.get("/tts/local_voices", response_model=List[str]) @tts.get("/tts/local_voices", response_model=List[str])
async def list_wav_files(): async def list_wav_files():
wav_files = [file.split('.')[0] for file in os.listdir(Dir.data.tts.voices) if file.endswith(".wav")] wav_files = [file.split('.')[0] for file in os.listdir(VOICE_DIR) if file.endswith(".wav")]
return wav_files return wav_files
@tts.get("/tts/elevenlabs_voices") @tts.get("/tts/elevenlabs_voices")
async def list_11l_voices(): async def list_11l_voices():
formatted_list = "" formatted_list = ""
url = "https://api.elevenlabs.io/v1/voices" url = "https://api.elevenlabs.io/v1/voices"
headers = {"xi-api-key": TTS.elevenlabs.api_key} headers = {"xi-api-key": ELEVENLABS_API_KEY}
async with httpx.AsyncClient() as client: async with httpx.AsyncClient() as client:
try: try:
response = await client.get(url, headers=headers) response = await client.get(url, headers=headers)
L.DEBUG(f"Response: {response}") logger.debug(f"Response: {response}")
if response.status_code == 200: if response.status_code == 200:
voices_data = response.json().get("voices", []) voices_data = response.json().get("voices", [])
formatted_list = "" formatted_list = ""
@ -60,7 +57,7 @@ async def list_11l_voices():
formatted_list += f"{name}: `{id}`\n" formatted_list += f"{name}: `{id}`\n"
except Exception as e: except Exception as e:
L.ERR(f"Error determining voice ID: {str(e)}") logger.error(f"Error determining voice ID: {str(e)}")
return PlainTextResponse(formatted_list, status_code=200) return PlainTextResponse(formatted_list, status_code=200)
@ -70,18 +67,18 @@ async def select_voice(voice_name: str) -> str:
try: try:
# Case Insensitive comparison # Case Insensitive comparison
voice_name_lower = voice_name.lower() voice_name_lower = voice_name.lower()
L.DEBUG(f"Looking for {voice_name_lower}") logger.debug(f"Looking for {voice_name_lower}")
for item in Dir.data.tts.voices.iterdir(): for item in VOICE_DIR.iterdir():
L.DEBUG(f"Checking {item.name.lower()}") logger.debug(f"Checking {item.name.lower()}")
if item.name.lower() == f"{voice_name_lower}.wav": if item.name.lower() == f"{voice_name_lower}.wav":
L.DEBUG(f"select_voice received query to use voice: {voice_name}. Found {item} inside {Dir.data.tts.voices}.") logger.debug(f"select_voice received query to use voice: {voice_name}. Found {item} inside {VOICE_DIR}.")
return str(item) return str(item)
L.ERR(f"Voice file not found") logger.error(f"Voice file not found")
raise HTTPException(status_code=404, detail="Voice file not found") raise HTTPException(status_code=404, detail="Voice file not found")
except Exception as e: except Exception as e:
L.ERR(f"Voice file not found: {str(e)}") logger.error(f"Voice file not found: {str(e)}")
return None return None
@ -116,8 +113,8 @@ async def generate_speech_endpoint(
else: else:
return await generate_speech(bg_tasks, text_content, voice, voice_file, model, speed, podcast) return await generate_speech(bg_tasks, text_content, voice, voice_file, model, speed, podcast)
except Exception as e: except Exception as e:
L.ERR(f"Error in TTS: {str(e)}") logger.error(f"Error in TTS: {str(e)}")
L.ERR(traceback.format_exc()) logger.error(traceback.format_exc())
raise HTTPException(status_code=666, detail="error in TTS") raise HTTPException(status_code=666, detail="error in TTS")
async def generate_speech( async def generate_speech(
@ -131,7 +128,7 @@ async def generate_speech(
title: str = None, title: str = None,
output_dir = None output_dir = None
) -> str: ) -> str:
output_dir = Path(output_dir) if output_dir else TTS.data.tts.outputs output_dir = Path(output_dir) if output_dir else TTS_OUTPUT_DIR
if not output_dir.exists(): if not output_dir.exists():
output_dir.mkdir(parents=True) output_dir.mkdir(parents=True)
@ -140,17 +137,17 @@ async def generate_speech(
title = title if title else "TTS audio" title = title if title else "TTS audio"
output_path = output_dir / f"{dt_datetime.now().strftime('%Y%m%d%H%M%S')} {title}.wav" output_path = output_dir / f"{dt_datetime.now().strftime('%Y%m%d%H%M%S')} {title}.wav"
if model == "eleven_turbo_v2": if model == "eleven_turbo_v2":
L.INFO("Using ElevenLabs.") logger.info("Using ElevenLabs.")
audio_file_path = await elevenlabs_tts(model, text, voice, title, output_dir) audio_file_path = await elevenlabs_tts(model, text, voice, title, output_dir)
else: # if model == "xtts": else: # if model == "xtts":
L.INFO("Using XTTS2") logger.info("Using XTTS2")
audio_file_path = await local_tts(text, speed, voice, voice_file, podcast, bg_tasks, title, output_path) audio_file_path = await local_tts(text, speed, voice, voice_file, podcast, bg_tasks, title, output_path)
#else: #else:
# raise HTTPException(status_code=400, detail="Invalid model specified") # raise HTTPException(status_code=400, detail="Invalid model specified")
if podcast == True: if podcast == True:
podcast_path = TTS.podcast_dir / audio_file_path.name podcast_path = Path(PODCAST_DIR) / audio_file_path.name
L.DEBUG(f"Podcast path: {podcast_path}") logger.debug(f"Podcast path: {podcast_path}")
shutil.copy(str(audio_file_path), str(podcast_path)) shutil.copy(str(audio_file_path), str(podcast_path))
bg_tasks.add_task(os.remove, str(audio_file_path)) bg_tasks.add_task(os.remove, str(audio_file_path))
return str(podcast_path) return str(podcast_path)
@ -158,7 +155,7 @@ async def generate_speech(
return str(audio_file_path) return str(audio_file_path)
except Exception as e: except Exception as e:
L.ERR(f"Failed to generate speech: {str(e)}") logger.error(f"Failed to generate speech: {str(e)}")
raise HTTPException(status_code=500, detail=f"Failed to generate speech: {str(e)}") raise HTTPException(status_code=500, detail=f"Failed to generate speech: {str(e)}")
@ -174,7 +171,7 @@ async def get_model(voice: str = None, voice_file: UploadFile = None):
raise HTTPException(status_code=400, detail="No model or voice specified") raise HTTPException(status_code=400, detail="No model or voice specified")
async def determine_voice_id(voice_name: str) -> str: async def determine_voice_id(voice_name: str) -> str:
L.DEBUG(f"Searching for voice id for {voice_name}") logger.debug(f"Searching for voice id for {voice_name}")
hardcoded_voices = { hardcoded_voices = {
"alloy": "E3A1KVbKoWSIKSZwSUsW", "alloy": "E3A1KVbKoWSIKSZwSUsW",
@ -191,23 +188,23 @@ async def determine_voice_id(voice_name: str) -> str:
if voice_name in hardcoded_voices: if voice_name in hardcoded_voices:
voice_id = hardcoded_voices[voice_name] voice_id = hardcoded_voices[voice_name]
L.DEBUG(f"Found voice ID - {voice_id}") logger.debug(f"Found voice ID - {voice_id}")
return voice_id return voice_id
L.DEBUG(f"Requested voice not among the hardcoded options.. checking with 11L next.") logger.debug(f"Requested voice not among the hardcoded options.. checking with 11L next.")
url = "https://api.elevenlabs.io/v1/voices" url = "https://api.elevenlabs.io/v1/voices"
headers = {"xi-api-key": TTS.elevenlabs.api_key} headers = {"xi-api-key": ELEVENLABS_API_KEY}
async with httpx.AsyncClient() as client: async with httpx.AsyncClient() as client:
try: try:
response = await client.get(url, headers=headers) response = await client.get(url, headers=headers)
L.DEBUG(f"Response: {response}") logger.debug(f"Response: {response}")
if response.status_code == 200: if response.status_code == 200:
voices_data = response.json().get("voices", []) voices_data = response.json().get("voices", [])
for voice in voices_data: for voice in voices_data:
if voice_name == voice["voice_id"] or voice_name == voice["name"]: if voice_name == voice["voice_id"] or voice_name == voice["name"]:
return voice["voice_id"] return voice["voice_id"]
except Exception as e: except Exception as e:
L.ERR(f"Error determining voice ID: {str(e)}") logger.error(f"Error determining voice ID: {str(e)}")
# as a last fallback, rely on David Attenborough # as a last fallback, rely on David Attenborough
return "b42GBisbu9r5m5n6pHF7" return "b42GBisbu9r5m5n6pHF7"
@ -222,10 +219,10 @@ async def elevenlabs_tts(model: str, input_text: str, voice: str, title: str = N
"text": input_text, "text": input_text,
"model_id": model "model_id": model
} }
headers = {"Content-Type": "application/json", "xi-api-key": TTS.elevenlabs.api_key} headers = {"Content-Type": "application/json", "xi-api-key": ELEVENLABS_API_KEY}
async with httpx.AsyncClient(timeout=httpx.Timeout(300.0)) as client: # 5 minutes timeout async with httpx.AsyncClient(timeout=httpx.Timeout(300.0)) as client: # 5 minutes timeout
response = await client.post(url, json=payload, headers=headers) response = await client.post(url, json=payload, headers=headers)
output_dir = output_dir if output_dir else TTS.podcast_dir output_dir = output_dir if output_dir else TTS_OUTPUT_DIR
title = title if title else dt_datetime.now().strftime("%Y%m%d%H%M%S") title = title if title else dt_datetime.now().strftime("%Y%m%d%H%M%S")
filename = f"{sanitize_filename(title)}.mp3" filename = f"{sanitize_filename(title)}.mp3"
file_path = Path(output_dir) / filename file_path = Path(output_dir) / filename
@ -236,6 +233,9 @@ async def elevenlabs_tts(model: str, input_text: str, voice: str, title: str = N
else: else:
raise HTTPException(status_code=response.status_code, detail="Error from ElevenLabs API") raise HTTPException(status_code=response.status_code, detail="Error from ElevenLabs API")
async def get_text_content(text: Optional[str], file: Optional[UploadFile]) -> str: async def get_text_content(text: Optional[str], file: Optional[UploadFile]) -> str:
if file: if file:
return (await file.read()).decode("utf-8").strip() return (await file.read()).decode("utf-8").strip()
@ -244,17 +244,20 @@ async def get_text_content(text: Optional[str], file: Optional[UploadFile]) -> s
else: else:
raise HTTPException(status_code=400, detail="No text provided") raise HTTPException(status_code=400, detail="No text provided")
async def get_voice_file_path(voice: str = None, voice_file: UploadFile = None) -> str: async def get_voice_file_path(voice: str = None, voice_file: UploadFile = None) -> str:
if voice: if voice:
L.DEBUG(f"Looking for voice: {voice}") logger.debug(f"Looking for voice: {voice}")
selected_voice = await select_voice(voice) selected_voice = await select_voice(voice)
return selected_voice return selected_voice
elif voice_file and isinstance(voice_file, UploadFile): elif voice_file and isinstance(voice_file, UploadFile):
Dir.data.tts.voices.mkdir(exist_ok=True) VOICE_DIR.mkdir(exist_ok=True)
content = await voice_file.read() content = await voice_file.read()
checksum = hashlib.md5(content).hexdigest() checksum = hashlib.md5(content).hexdigest()
existing_file = Dir.data.tts.voices / voice_file.filename existing_file = VOICE_DIR / voice_file.filename
if existing_file.is_file(): if existing_file.is_file():
with open(existing_file, 'rb') as f: with open(existing_file, 'rb') as f:
existing_checksum = hashlib.md5(f.read()).hexdigest() existing_checksum = hashlib.md5(f.read()).hexdigest()
@ -266,7 +269,7 @@ async def get_voice_file_path(voice: str = None, voice_file: UploadFile = None)
counter = 1 counter = 1
new_file = existing_file new_file = existing_file
while new_file.is_file(): while new_file.is_file():
new_file = Dir.data.tts.voices / f"{base_name}{counter:02}.wav" new_file = VOICE_DIR / f"{base_name}{counter:02}.wav"
counter += 1 counter += 1
with open(new_file, 'wb') as f: with open(new_file, 'wb') as f:
@ -274,8 +277,8 @@ async def get_voice_file_path(voice: str = None, voice_file: UploadFile = None)
return str(new_file) return str(new_file)
else: else:
L.DEBUG(f"{dt_datetime.now().strftime('%Y%m%d%H%M%S')}: No voice specified or file provided, using default voice: {TTS.xtts.voice}") logger.debug(f"{dt_datetime.now().strftime('%Y%m%d%H%M%S')}: No voice specified or file provided, using default voice: {DEFAULT_VOICE}")
selected_voice = await select_voice(TTS.xtts.voice) selected_voice = await select_voice(DEFAULT_VOICE)
return selected_voice return selected_voice
@ -296,7 +299,7 @@ async def local_tts(
datetime_str = dt_datetime.now().strftime("%Y%m%d%H%M%S") datetime_str = dt_datetime.now().strftime("%Y%m%d%H%M%S")
title = sanitize_filename(title) if title else "Audio" title = sanitize_filename(title) if title else "Audio"
filename = f"{datetime_str}_{title}.wav" filename = f"{datetime_str}_{title}.wav"
file_path = Dir.data.tts.outputs / filename file_path = TTS_OUTPUT_DIR / filename
# Ensure the parent directory exists # Ensure the parent directory exists
file_path.parent.mkdir(parents=True, exist_ok=True) file_path.parent.mkdir(parents=True, exist_ok=True)
@ -304,15 +307,15 @@ async def local_tts(
voice_file_path = await get_voice_file_path(voice, voice_file) voice_file_path = await get_voice_file_path(voice, voice_file)
# Initialize TTS model in a separate thread # Initialize TTS model in a separate thread
XTTS = await asyncio.to_thread(XTTSv2, model_name=MODEL_NAME) XTTS = await asyncio.to_thread(TTS, model_name=MODEL_NAME)
await asyncio.to_thread(XTTS.to, DEVICE) await asyncio.to_thread(XTTS.to, DEVICE)
segments = split_text(text_content) segments = split_text(text_content)
combined_audio = AudioSegment.silent(duration=0) combined_audio = AudioSegment.silent(duration=0)
for i, segment in enumerate(segments): for i, segment in enumerate(segments):
segment_file_path = Dir.data.tts.segments / f"segment_{i}.wav" segment_file_path = TTS_SEGMENTS_DIR / f"segment_{i}.wav"
L.DEBUG(f"Segment file path: {segment_file_path}") logger.debug(f"Segment file path: {segment_file_path}")
# Run TTS in a separate thread # Run TTS in a separate thread
await asyncio.to_thread( await asyncio.to_thread(
@ -323,7 +326,7 @@ async def local_tts(
speaker_wav=[voice_file_path], speaker_wav=[voice_file_path],
language="en" language="en"
) )
L.DEBUG(f"Segment file generated: {segment_file_path}") logger.debug(f"Segment file generated: {segment_file_path}")
# Load and combine audio in a separate thread # Load and combine audio in a separate thread
segment_audio = await asyncio.to_thread(AudioSegment.from_wav, str(segment_file_path)) segment_audio = await asyncio.to_thread(AudioSegment.from_wav, str(segment_file_path))
@ -334,7 +337,7 @@ async def local_tts(
# Export the combined audio in a separate thread # Export the combined audio in a separate thread
if podcast: if podcast:
podcast_file_path = Path(TTS.podcast_dir) / file_path.name podcast_file_path = Path(PODCAST_DIR) / file_path.name
await asyncio.to_thread(combined_audio.export, podcast_file_path, format="wav") await asyncio.to_thread(combined_audio.export, podcast_file_path, format="wav")
await asyncio.to_thread(combined_audio.export, file_path, format="wav") await asyncio.to_thread(combined_audio.export, file_path, format="wav")
@ -362,7 +365,7 @@ async def stream_tts(text_content: str, speed: float, voice: str, voice_file) ->
async def generate_tts(text: str, speed: float, voice_file_path: str) -> str: async def generate_tts(text: str, speed: float, voice_file_path: str) -> str:
output_dir = tempfile.mktemp(suffix=".wav", dir=tempfile.gettempdir()) output_dir = tempfile.mktemp(suffix=".wav", dir=tempfile.gettempdir())
XTTS = XTTSv2(model_name=MODEL_NAME).to(DEVICE) XTTS = TTS(model_name=MODEL_NAME).to(DEVICE)
XTTS.tts_to_file(text=text, speed=speed, file_path=output_dir, speaker_wav=[voice_file_path], language="en") XTTS.tts_to_file(text=text, speed=speed, file_path=output_dir, speaker_wav=[voice_file_path], language="en")
return output_dir return output_dir
@ -375,7 +378,7 @@ async def get_audio_stream(model: str, input_text: str, voice: str):
"text": input_text, "text": input_text,
"model_id": "eleven_turbo_v2" "model_id": "eleven_turbo_v2"
} }
headers = {"Content-Type": "application/json", "xi-api-key": TTS.elevenlabs.api_key} headers = {"Content-Type": "application/json", "xi-api-key": ELEVENLABS_API_KEY}
response = requests.post(url, json=payload, headers=headers) response = requests.post(url, json=payload, headers=headers)
if response.status_code == 200: if response.status_code == 200:
@ -398,7 +401,7 @@ def split_text(text, target_length=35, max_length=50):
if segment_length + len(sentence_words) > max_length: if segment_length + len(sentence_words) > max_length:
segments.append(' '.join(current_segment)) segments.append(' '.join(current_segment))
L.DEBUG(f"split_text - segment: {' '.join(current_segment)}, word count: {segment_length}") logger.debug(f"split_text - segment: {' '.join(current_segment)}, word count: {segment_length}")
current_segment = [sentence] current_segment = [sentence]
else: else:
@ -406,7 +409,7 @@ def split_text(text, target_length=35, max_length=50):
if current_segment: if current_segment:
segments.append(' '.join(current_segment)) segments.append(' '.join(current_segment))
L.DEBUG(f"split_text - segment: {' '.join(current_segment)}, word count: {len(current_segment)}") logger.debug(f"split_text - segment: {' '.join(current_segment)}, word count: {len(current_segment)}")
return segments return segments
@ -418,7 +421,7 @@ def clean_text_for_tts(text: str) -> str:
text = re.sub(r'\s+', ' ', text).strip() text = re.sub(r'\s+', ' ', text).strip()
return text return text
else: else:
L.DEBUG(f"No text received.") logger.debug(f"No text received.")
@ -428,7 +431,7 @@ def copy_to_podcast_dir(file_path):
file_name = Path(file_path).name file_name = Path(file_path).name
# Construct the destination path in the PODCAST_DIR # Construct the destination path in the PODCAST_DIR
destination_path = TTS.podcast_dir / file_name destination_path = Path(PODCAST_DIR) / file_name
# Copy the file to the PODCAST_DIR # Copy the file to the PODCAST_DIR
shutil.copy(file_path, destination_path) shutil.copy(file_path, destination_path)

View file

@ -16,6 +16,7 @@ from sijapi.utilities import haversine
from sijapi.routers import loc from sijapi.routers import loc
weather = APIRouter() weather = APIRouter()
logger = L.get_module_logger("weather")
@weather.get("/weather/refresh", response_class=JSONResponse) @weather.get("/weather/refresh", response_class=JSONResponse)
async def get_refreshed_weather( async def get_refreshed_weather(
@ -34,7 +35,7 @@ async def get_refreshed_weather(
tz = await GEO.tz_at(lat, lon) tz = await GEO.tz_at(lat, lon)
date_time = await loc.dt(date, tz) date_time = await loc.dt(date, tz)
L.DEBUG(f"passing date_time {date_time.strftime('%Y-%m-%d %H:%M:%S')}, {lat}/{lon} into get_weather") logger.debug(f"passing date_time {date_time.strftime('%Y-%m-%d %H:%M:%S')}, {lat}/{lon} into get_weather")
day = await get_weather(date_time, lat, lon, force_refresh=True) day = await get_weather(date_time, lat, lon, force_refresh=True)
day_str = str(day) day_str = str(day)
return JSONResponse(content={"weather": day_str}, status_code=200) return JSONResponse(content={"weather": day_str}, status_code=200)
@ -43,18 +44,18 @@ async def get_refreshed_weather(
return JSONResponse(content={"detail": str(e.detail)}, status_code=e.status_code) return JSONResponse(content={"detail": str(e.detail)}, status_code=e.status_code)
except Exception as e: except Exception as e:
L.ERR(f"Error in note_weather_get: {str(e)}") logger.error(f"Error in note_weather_get: {str(e)}")
raise HTTPException(status_code=500, detail=f"An error occurred: {str(e)}") raise HTTPException(status_code=500, detail=f"An error occurred: {str(e)}")
async def get_weather(date_time: dt_datetime, latitude: float, longitude: float, force_refresh: bool = False): async def get_weather(date_time: dt_datetime, latitude: float, longitude: float, force_refresh: bool = False):
L.DEBUG(f"Called get_weather with lat: {latitude}, lon: {longitude}, date_time: {date_time}") logger.debug(f"Called get_weather with lat: {latitude}, lon: {longitude}, date_time: {date_time}")
L.WARN(f"Using {date_time} as our datetime in get_weather.") logger.warn(f"Using {date_time} as our datetime in get_weather.")
fetch_new_data = True fetch_new_data = True
if force_refresh == False: if force_refresh == False:
daily_weather_data = await get_weather_from_db(date_time, latitude, longitude) daily_weather_data = await get_weather_from_db(date_time, latitude, longitude)
if daily_weather_data: if daily_weather_data:
try: try:
L.DEBUG(f"Daily weather data from db: {daily_weather_data}") logger.debug(f"Daily weather data from db: {daily_weather_data}")
last_updated = str(daily_weather_data['DailyWeather'].get('last_updated')) last_updated = str(daily_weather_data['DailyWeather'].get('last_updated'))
last_updated = await loc.dt(last_updated) last_updated = await loc.dt(last_updated)
stored_loc_data = unhexlify(daily_weather_data['DailyWeather'].get('location')) stored_loc_data = unhexlify(daily_weather_data['DailyWeather'].get('location'))
@ -65,67 +66,67 @@ async def get_weather(date_time: dt_datetime, latitude: float, longitude: float,
hourly_weather = daily_weather_data.get('HourlyWeather') hourly_weather = daily_weather_data.get('HourlyWeather')
L.DEBUG(f"Hourly: {hourly_weather}") logger.debug(f"Hourly: {hourly_weather}")
L.DEBUG(f"\nINFO:\nlast updated {last_updated}\nstored lat: {stored_lat} - requested lat: {latitude}\nstored lon: {stored_lon} - requested lon: {longitude}\n") logger.debug(f"\nINFO:\nlast updated {last_updated}\nstored lat: {stored_lat} - requested lat: {latitude}\nstored lon: {stored_lon} - requested lon: {longitude}\n")
request_haversine = haversine(latitude, longitude, stored_lat, stored_lon) request_haversine = haversine(latitude, longitude, stored_lat, stored_lon)
L.DEBUG(f"\nINFO:\nlast updated {last_updated}\nstored lat: {stored_lat} - requested lat: {latitude}\nstored lon: {stored_lon} - requested lon: {longitude}\nHaversine: {request_haversine}") logger.debug(f"\nINFO:\nlast updated {last_updated}\nstored lat: {stored_lat} - requested lat: {latitude}\nstored lon: {stored_lon} - requested lon: {longitude}\nHaversine: {request_haversine}")
if last_updated and (date_time <= dt_datetime.now(TZ) and last_updated > date_time and request_haversine < 8) and hourly_weather and len(hourly_weather) > 0: if last_updated and (date_time <= dt_datetime.now(TZ) and last_updated > date_time and request_haversine < 8) and hourly_weather and len(hourly_weather) > 0:
L.DEBUG(f"We can use existing data... :')") logger.debug(f"We can use existing data... :')")
fetch_new_data = False fetch_new_data = False
except Exception as e: except Exception as e:
L.ERR(f"Error in get_weather: {e}") logger.error(f"Error in get_weather: {e}")
if fetch_new_data: if fetch_new_data:
L.DEBUG(f"We require new data!") logger.debug(f"We require new data!")
request_date_str = date_time.strftime("%Y-%m-%d") request_date_str = date_time.strftime("%Y-%m-%d")
L.WARN(f"Using {date_time.strftime('%Y-%m-%d')} as our datetime for fetching new data.") logger.warn(f"Using {date_time.strftime('%Y-%m-%d')} as our datetime for fetching new data.")
url = f"https://weather.visualcrossing.com/VisualCrossingWebServices/rest/services/timeline/{latitude},{longitude}/{request_date_str}/{request_date_str}?unitGroup=us&key={VISUALCROSSING_API_KEY}" url = f"https://weather.visualcrossing.com/VisualCrossingWebServices/rest/services/timeline/{latitude},{longitude}/{request_date_str}/{request_date_str}?unitGroup=us&key={VISUALCROSSING_API_KEY}"
try: try:
async with AsyncClient() as client: async with AsyncClient() as client:
response = await client.get(url) response = await client.get(url)
if response.status_code == 200: if response.status_code == 200:
L.DEBUG(f"Successfully obtained data from VC...") logger.debug(f"Successfully obtained data from VC...")
try: try:
weather_data = response.json() weather_data = response.json()
store_result = await store_weather_to_db(date_time, weather_data) store_result = await store_weather_to_db(date_time, weather_data)
if store_result == "SUCCESS": if store_result == "SUCCESS":
L.DEBUG(f"New weather data for {request_date_str} stored in database...") logger.debug(f"New weather data for {request_date_str} stored in database...")
else: else:
L.ERR(f"Failed to store weather data for {request_date_str} in database! {store_result}") logger.error(f"Failed to store weather data for {request_date_str} in database! {store_result}")
L.DEBUG(f"Attempting to retrieve data for {date_time}, {latitude}, {longitude}") logger.debug(f"Attempting to retrieve data for {date_time}, {latitude}, {longitude}")
daily_weather_data = await get_weather_from_db(date_time, latitude, longitude) daily_weather_data = await get_weather_from_db(date_time, latitude, longitude)
if daily_weather_data is not None: if daily_weather_data is not None:
return daily_weather_data return daily_weather_data
else: else:
raise HTTPException(status_code=500, detail="Weather data was not properly stored.") raise HTTPException(status_code=500, detail="Weather data was not properly stored.")
except Exception as e: except Exception as e:
L.ERR(f"Problem parsing VC response or storing data: {e}") logger.error(f"Problem parsing VC response or storing data: {e}")
raise HTTPException(status_code=500, detail="Weather data was not properly stored.") raise HTTPException(status_code=500, detail="Weather data was not properly stored.")
else: else:
L.ERR(f"Failed to fetch weather data: {response.status_code}, {response.text}") logger.error(f"Failed to fetch weather data: {response.status_code}, {response.text}")
except Exception as e: except Exception as e:
L.ERR(f"Exception during API call: {e}") logger.error(f"Exception during API call: {e}")
return daily_weather_data return daily_weather_data
async def store_weather_to_db(date_time: dt_datetime, weather_data: dict): async def store_weather_to_db(date_time: dt_datetime, weather_data: dict):
L.WARN(f"Using {date_time.strftime('%Y-%m-%d %H:%M:%S')} as our datetime in store_weather_to_db") logger.warn(f"Using {date_time.strftime('%Y-%m-%d %H:%M:%S')} as our datetime in store_weather_to_db")
async with DB.get_connection() as conn: async with DB.get_connection() as conn:
try: try:
day_data = weather_data.get('days')[0] day_data = weather_data.get('days')[0]
L.DEBUG(f"RAW DAY_DATA: {day_data}") logger.debug(f"RAW DAY_DATA: {day_data}")
# Handle preciptype and stations as PostgreSQL arrays # Handle preciptype and stations as PostgreSQL arrays
preciptype_array = day_data.get('preciptype', []) or [] preciptype_array = day_data.get('preciptype', []) or []
stations_array = day_data.get('stations', []) or [] stations_array = day_data.get('stations', []) or []
date_str = date_time.strftime("%Y-%m-%d") date_str = date_time.strftime("%Y-%m-%d")
L.WARN(f"Using {date_str} in our query in store_weather_to_db.") logger.warn(f"Using {date_str} in our query in store_weather_to_db.")
# Get location details from weather data if available # Get location details from weather data if available
longitude = weather_data.get('longitude') longitude = weather_data.get('longitude')
@ -134,11 +135,11 @@ async def store_weather_to_db(date_time: dt_datetime, weather_data: dict):
elevation = await GEO.elevation(latitude, longitude) elevation = await GEO.elevation(latitude, longitude)
location_point = f"POINTZ({longitude} {latitude} {elevation})" if longitude and latitude and elevation else None location_point = f"POINTZ({longitude} {latitude} {elevation})" if longitude and latitude and elevation else None
L.WARN(f"Uncorrected datetimes in store_weather_to_db: {day_data['datetime']}, sunrise: {day_data['sunrise']}, sunset: {day_data['sunset']}") logger.warn(f"Uncorrected datetimes in store_weather_to_db: {day_data['datetime']}, sunrise: {day_data['sunrise']}, sunset: {day_data['sunset']}")
day_data['datetime'] = await loc.dt(day_data.get('datetimeEpoch')) day_data['datetime'] = await loc.dt(day_data.get('datetimeEpoch'))
day_data['sunrise'] = await loc.dt(day_data.get('sunriseEpoch')) day_data['sunrise'] = await loc.dt(day_data.get('sunriseEpoch'))
day_data['sunset'] = await loc.dt(day_data.get('sunsetEpoch')) day_data['sunset'] = await loc.dt(day_data.get('sunsetEpoch'))
L.WARN(f"Corrected datetimes in store_weather_to_db: {day_data['datetime']}, sunrise: {day_data['sunrise']}, sunset: {day_data['sunset']}") logger.warn(f"Corrected datetimes in store_weather_to_db: {day_data['datetime']}, sunrise: {day_data['sunrise']}, sunset: {day_data['sunset']}")
daily_weather_params = ( daily_weather_params = (
day_data.get('sunrise'), day_data.get('sunriseEpoch'), day_data.get('sunrise'), day_data.get('sunriseEpoch'),
@ -162,7 +163,7 @@ async def store_weather_to_db(date_time: dt_datetime, weather_data: dict):
location_point location_point
) )
except Exception as e: except Exception as e:
L.ERR(f"Failed to prepare database query in store_weather_to_db! {e}") logger.error(f"Failed to prepare database query in store_weather_to_db! {e}")
try: try:
daily_weather_query = ''' daily_weather_query = '''
@ -182,7 +183,7 @@ async def store_weather_to_db(date_time: dt_datetime, weather_data: dict):
daily_weather_id = await conn.fetchval(daily_weather_query, *daily_weather_params) daily_weather_id = await conn.fetchval(daily_weather_query, *daily_weather_params)
if 'hours' in day_data: if 'hours' in day_data:
L.DEBUG(f"Processing hours now...") logger.debug(f"Processing hours now...")
for hour_data in day_data['hours']: for hour_data in day_data['hours']:
try: try:
await asyncio.sleep(0.01) await asyncio.sleep(0.01)
@ -228,22 +229,22 @@ async def store_weather_to_db(date_time: dt_datetime, weather_data: dict):
''' '''
async with conn.transaction(): async with conn.transaction():
hourly_weather_id = await conn.fetchval(hourly_weather_query, *hourly_weather_params) hourly_weather_id = await conn.fetchval(hourly_weather_query, *hourly_weather_params)
L.DEBUG(f"Done processing hourly_weather_id {hourly_weather_id}") logger.debug(f"Done processing hourly_weather_id {hourly_weather_id}")
except Exception as e: except Exception as e:
L.ERR(f"EXCEPTION: {e}") logger.error(f"EXCEPTION: {e}")
except Exception as e: except Exception as e:
L.ERR(f"EXCEPTION: {e}") logger.error(f"EXCEPTION: {e}")
return "SUCCESS" return "SUCCESS"
except Exception as e: except Exception as e:
L.ERR(f"Error in dailyweather storage: {e}") logger.error(f"Error in dailyweather storage: {e}")
async def get_weather_from_db(date_time: dt_datetime, latitude: float, longitude: float): async def get_weather_from_db(date_time: dt_datetime, latitude: float, longitude: float):
L.WARN(f"Using {date_time.strftime('%Y-%m-%d %H:%M:%S')} as our datetime in get_weather_from_db.") logger.warn(f"Using {date_time.strftime('%Y-%m-%d %H:%M:%S')} as our datetime in get_weather_from_db.")
async with DB.get_connection() as conn: async with DB.get_connection() as conn:
query_date = date_time.date() query_date = date_time.date()
try: try:
@ -259,19 +260,19 @@ async def get_weather_from_db(date_time: dt_datetime, latitude: float, longitude
daily_weather_record = await conn.fetchrow(query, query_date, longitude, latitude, longitude, latitude) daily_weather_record = await conn.fetchrow(query, query_date, longitude, latitude, longitude, latitude)
if daily_weather_record is None: if daily_weather_record is None:
L.DEBUG(f"No daily weather data retrieved from database.") logger.debug(f"No daily weather data retrieved from database.")
return None return None
# Convert asyncpg.Record to a mutable dictionary # Convert asyncpg.Record to a mutable dictionary
daily_weather_data = dict(daily_weather_record) daily_weather_data = dict(daily_weather_record)
# L.DEBUG(f"Daily weather data prior to tz corrections: {daily_weather_data}") # logger.debug(f"Daily weather data prior to tz corrections: {daily_weather_data}")
# Now we can modify the dictionary # Now we can modify the dictionary
# tz = await GEO.tz_at(latitude, longitude) # tz = await GEO.tz_at(latitude, longitude)
# daily_weather_data['datetime'] = await loc.dt(daily_weather_data.get('datetime'), tz) # daily_weather_data['datetime'] = await loc.dt(daily_weather_data.get('datetime'), tz)
# daily_weather_data['sunrise'] = await loc.dt(daily_weather_data.get('sunrise'), tz) # daily_weather_data['sunrise'] = await loc.dt(daily_weather_data.get('sunrise'), tz)
# daily_weather_data['sunset'] = await loc.dt(daily_weather_data.get('sunset'), tz) # daily_weather_data['sunset'] = await loc.dt(daily_weather_data.get('sunset'), tz)
# L.DEBUG(f"Daily weather data after tz corrections: {daily_weather_data}") # logger.debug(f"Daily weather data after tz corrections: {daily_weather_data}")
# Query to get hourly weather data # Query to get hourly weather data
query = ''' query = '''
@ -287,14 +288,14 @@ async def get_weather_from_db(date_time: dt_datetime, latitude: float, longitude
# hour_data['datetime'] = await loc.dt(hour_data.get('datetime'), tz) # hour_data['datetime'] = await loc.dt(hour_data.get('datetime'), tz)
hourly_weather_data.append(hour_data) hourly_weather_data.append(hour_data)
# L.DEBUG(f"Hourly weather data after tz corrections: {hourly_weather_data}") # logger.debug(f"Hourly weather data after tz corrections: {hourly_weather_data}")
day = { day = {
'DailyWeather': daily_weather_data, 'DailyWeather': daily_weather_data,
'HourlyWeather': hourly_weather_data, 'HourlyWeather': hourly_weather_data,
} }
# L.DEBUG(f"day: {day}") # logger.debug(f"day: {day}")
return day return day
except Exception as e: except Exception as e:
L.ERR(f"Unexpected error occurred: {e}") logger.error(f"Unexpected error occurred: {e}")

View file

@ -29,6 +29,7 @@ from fastapi import Depends, HTTPException, Request, UploadFile
from fastapi.security.api_key import APIKeyHeader from fastapi.security.api_key import APIKeyHeader
from sijapi import L, API, YEAR_FMT, MONTH_FMT, DAY_FMT, DAY_SHORT_FMT, OBSIDIAN_VAULT_DIR, ALLOWED_FILENAME_CHARS, MAX_PATH_LENGTH, ARCHIVE_DIR from sijapi import L, API, YEAR_FMT, MONTH_FMT, DAY_FMT, DAY_SHORT_FMT, OBSIDIAN_VAULT_DIR, ALLOWED_FILENAME_CHARS, MAX_PATH_LENGTH, ARCHIVE_DIR
logger = L.get_module_logger('utilities')
api_key_header = APIKeyHeader(name="Authorization", auto_error=False) api_key_header = APIKeyHeader(name="Authorization", auto_error=False)
@ -117,7 +118,7 @@ def assemble_journal_path(date_time: datetime, subdir: str = None, filename: str
relative_path = relative_path / filename relative_path = relative_path / filename
else: else:
L.DEBUG(f"This only happens, theoretically, when no filename nor subdirectory are provided, but an extension is. Which is kinda silly.") logger.debug(f"This only happens, theoretically, when no filename nor subdirectory are provided, but an extension is. Which is kinda silly.")
return None, None return None, None
absolute_path = OBSIDIAN_VAULT_DIR / relative_path absolute_path = OBSIDIAN_VAULT_DIR / relative_path
@ -166,14 +167,14 @@ def get_extension(file):
return file_extension return file_extension
except Exception as e: except Exception as e:
L.ERR(f"Unable to get extension of {file}") logger.error(f"Unable to get extension of {file}")
raise e raise e
def sanitize_filename(text, extension: str = None, max_length: int = MAX_PATH_LENGTH): def sanitize_filename(text, extension: str = None, max_length: int = MAX_PATH_LENGTH):
"""Sanitize a string to be used as a safe filename while protecting the file extension.""" """Sanitize a string to be used as a safe filename while protecting the file extension."""
L.DEBUG(f"Filename before sanitization: {text}") logger.debug(f"Filename before sanitization: {text}")
text = re.sub(r'\s+', ' ', text) text = re.sub(r'\s+', ' ', text)
sanitized = re.sub(ALLOWED_FILENAME_CHARS, '', text) sanitized = re.sub(ALLOWED_FILENAME_CHARS, '', text)
@ -185,7 +186,7 @@ def sanitize_filename(text, extension: str = None, max_length: int = MAX_PATH_LE
base_name = base_name[:max_base_length - 5].rstrip() base_name = base_name[:max_base_length - 5].rstrip()
final_filename = base_name + extension final_filename = base_name + extension
L.DEBUG(f"Filename after sanitization: {final_filename}") logger.debug(f"Filename after sanitization: {final_filename}")
return final_filename return final_filename
@ -195,16 +196,16 @@ def check_file_name(file_name, max_length=255):
needs_sanitization = False needs_sanitization = False
if len(file_name) > max_length: if len(file_name) > max_length:
L.DEBUG(f"Filename exceeds maximum length of {max_length}: {file_name}") logger.debug(f"Filename exceeds maximum length of {max_length}: {file_name}")
needs_sanitization = True needs_sanitization = True
if re.search(ALLOWED_FILENAME_CHARS, file_name): if re.search(ALLOWED_FILENAME_CHARS, file_name):
L.DEBUG(f"Filename contains non-word characters (except space, dot, and hyphen): {file_name}") logger.debug(f"Filename contains non-word characters (except space, dot, and hyphen): {file_name}")
needs_sanitization = True needs_sanitization = True
if re.search(r'\s{2,}', file_name): if re.search(r'\s{2,}', file_name):
L.DEBUG(f"Filename contains multiple consecutive spaces: {file_name}") logger.debug(f"Filename contains multiple consecutive spaces: {file_name}")
needs_sanitization = True needs_sanitization = True
if file_name != file_name.strip(): if file_name != file_name.strip():
L.DEBUG(f"Filename has leading or trailing spaces: {file_name}") logger.debug(f"Filename has leading or trailing spaces: {file_name}")
needs_sanitization = True needs_sanitization = True
return needs_sanitization return needs_sanitization
@ -247,13 +248,13 @@ async def ocr_pdf(file_path: str) -> str:
texts = await asyncio.gather(*(asyncio.to_thread(pytesseract.image_to_string, image) for image in images)) texts = await asyncio.gather(*(asyncio.to_thread(pytesseract.image_to_string, image) for image in images))
return ' '.join(texts) return ' '.join(texts)
except Exception as e: except Exception as e:
L.ERR(f"Error during OCR: {str(e)}") logger.error(f"Error during OCR: {str(e)}")
return "" return ""
async def extract_text_from_pdf(file_path: str) -> str: async def extract_text_from_pdf(file_path: str) -> str:
if not await is_valid_pdf(file_path): if not await is_valid_pdf(file_path):
L.ERR(f"Invalid PDF file: {file_path}") logger.error(f"Invalid PDF file: {file_path}")
return "" return ""
text = '' text = ''
@ -271,7 +272,7 @@ async def extract_text_from_pdf(file_path: str) -> str:
if text and not should_use_ocr(text, num_pages): if text and not should_use_ocr(text, num_pages):
return clean_text(text) return clean_text(text)
except Exception as e: except Exception as e:
L.ERR(f"Error extracting text with PyPDF2: {str(e)}") logger.error(f"Error extracting text with PyPDF2: {str(e)}")
# If PyPDF2 extraction fails or is insufficient, fall back to pdfminer.six # If PyPDF2 extraction fails or is insufficient, fall back to pdfminer.six
try: try:
@ -279,10 +280,10 @@ async def extract_text_from_pdf(file_path: str) -> str:
if text_pdfminer and not should_use_ocr(text_pdfminer, num_pages): if text_pdfminer and not should_use_ocr(text_pdfminer, num_pages):
return clean_text(text_pdfminer) return clean_text(text_pdfminer)
except Exception as e: except Exception as e:
L.ERR(f"Error extracting text with pdfminer.six: {e}") logger.error(f"Error extracting text with pdfminer.six: {e}")
# If both methods fail or are deemed insufficient, use OCR as the last resort # If both methods fail or are deemed insufficient, use OCR as the last resort
L.DEBUG("Falling back to OCR for text extraction...") logger.debug("Falling back to OCR for text extraction...")
return await ocr_pdf(file_path) return await ocr_pdf(file_path)
async def is_valid_pdf(file_path: str) -> bool: async def is_valid_pdf(file_path: str) -> bool:
@ -291,12 +292,12 @@ async def is_valid_pdf(file_path: str) -> bool:
kind = filetype.guess(file_path) kind = filetype.guess(file_path)
return kind.mime == 'application/pdf' return kind.mime == 'application/pdf'
except Exception as e: except Exception as e:
L.ERR(f"Error checking file type: {e}") logger.error(f"Error checking file type: {e}")
return False return False
async def extract_text_from_pdf(file_path: str) -> str: async def extract_text_from_pdf(file_path: str) -> str:
if not await is_valid_pdf(file_path): if not await is_valid_pdf(file_path):
L.ERR(f"Invalid PDF file: {file_path}") logger.error(f"Invalid PDF file: {file_path}")
return "" return ""
text = '' text = ''
@ -308,23 +309,23 @@ async def extract_text_from_pdf(file_path: str) -> str:
if text.strip(): # Successfully extracted text if text.strip(): # Successfully extracted text
return clean_text(text) return clean_text(text)
except Exception as e: except Exception as e:
L.ERR(f"Error extracting text with PyPDF2: {str(e)}") logger.error(f"Error extracting text with PyPDF2: {str(e)}")
try: try:
text_pdfminer = await asyncio.to_thread(pdfminer_extract_text, file_path) text_pdfminer = await asyncio.to_thread(pdfminer_extract_text, file_path)
if text_pdfminer.strip(): # Successfully extracted text if text_pdfminer.strip(): # Successfully extracted text
return clean_text(text_pdfminer) return clean_text(text_pdfminer)
except Exception as e: except Exception as e:
L.ERR(f"Error extracting text with pdfminer.six: {str(e)}") logger.error(f"Error extracting text with pdfminer.six: {str(e)}")
# Fall back to OCR # Fall back to OCR
L.DEBUG("Falling back to OCR for text extraction...") logger.debug("Falling back to OCR for text extraction...")
try: try:
images = convert_from_path(file_path) images = convert_from_path(file_path)
ocr_texts = await asyncio.gather(*(asyncio.to_thread(pytesseract.image_to_string, img) for img in images)) ocr_texts = await asyncio.gather(*(asyncio.to_thread(pytesseract.image_to_string, img) for img in images))
return ' '.join(ocr_texts).strip() return ' '.join(ocr_texts).strip()
except Exception as e: except Exception as e:
L.ERR(f"OCR failed: {str(e)}") logger.error(f"OCR failed: {str(e)}")
return "" return ""
async def extract_text_from_docx(file_path: str) -> str: async def extract_text_from_docx(file_path: str) -> str:
@ -427,7 +428,7 @@ def encode_image_to_base64(image_path):
base64_str = base64.b64encode(byte_data).decode('utf-8') base64_str = base64.b64encode(byte_data).decode('utf-8')
return base64_str return base64_str
else: else:
L.DEBUG(f"Error: File does not exist at {image_path}") logger.debug(f"Error: File does not exist at {image_path}")
def resize_and_convert_image(image_path, max_size=2160, quality=80): def resize_and_convert_image(image_path, max_size=2160, quality=80):
with Image.open(image_path) as img: with Image.open(image_path) as img:
@ -471,5 +472,5 @@ async def run_ssh_command(server, command):
ssh.close() ssh.close()
return output, error return output, error
except Exception as e: except Exception as e:
L.ERR(f"SSH command failed for server {server.id}: {str(e)}") logger.error(f"SSH command failed for server {server.id}: {str(e)}")
raise raise