Major update to database and logging methods

This commit is contained in:
sanj 2024-08-12 10:30:45 -07:00
parent ee6ee1ed87
commit b60e60ec1e
49 changed files with 46694 additions and 2266 deletions

View file

@ -2,44 +2,41 @@
import os import os
from pathlib import Path from pathlib import Path
import ipaddress
import multiprocessing
from dotenv import load_dotenv from dotenv import load_dotenv
from dateutil import tz from .logs import L, get_logger
from pathlib import Path
from .classes import Logger, Configuration, APIConfig, Database, DirConfig, Geocoder
# INITIALization
BASE_DIR = Path(__file__).resolve().parent BASE_DIR = Path(__file__).resolve().parent
CONFIG_DIR = BASE_DIR / "config" CONFIG_DIR = BASE_DIR / "config"
ENV_PATH = CONFIG_DIR / ".env" ENV_PATH = CONFIG_DIR / ".env"
load_dotenv(ENV_PATH) load_dotenv(ENV_PATH)
LOGS_DIR = BASE_DIR / "logs" LOGS_DIR = BASE_DIR / "logs"
os.makedirs(LOGS_DIR, exist_ok=True) os.makedirs(LOGS_DIR, exist_ok=True)
L = Logger("Central", LOGS_DIR) L.init('sys', LOGS_DIR)
l = get_logger("init")
import ipaddress
import multiprocessing
from dateutil import tz
from pathlib import Path
from .database import Database
from .classes import Config, SysConfig, DirConfig, Geocoder
# API essentials # API essentials
API = APIConfig.load('sys', 'secrets') Sys = SysConfig.init('sys', 'secrets')
Dir = DirConfig.load('dirs') Dir = DirConfig.init('dirs')
Db = Database.load('sys') l.debug(f"Dir configuration initialized: {Dir}")
l.debug(f"ROUTER path: {Dir.ROUTER}")
Db = Database.init('db')
# HOST = f"{API.BIND}:{API.PORT}" Img = Config.init('img', 'secrets', Dir)
# LOCAL_HOSTS = [ipaddress.ip_address(localhost.strip()) for localhost in os.getenv('LOCAL_HOSTS', '127.0.0.1').split(',')] + ['localhost'] Llm = Config.init('llm', 'secrets', Dir)
# SUBNET_BROADCAST = os.getenv("SUBNET_BROADCAST", '10.255.255.255') News = Config.init('news', 'secrets', Dir)
Archivist = Config.init('archivist', 'secrets', Dir)
MAX_CPU_CORES = min(int(os.getenv("MAX_CPU_CORES", int(multiprocessing.cpu_count()/2))), multiprocessing.cpu_count()) Scrape = Config.init('scrape', 'secrets', Dir)
Serve = Config.init('serve', 'secrets', Dir)
IMG = Configuration.load('img', 'secrets', Dir) Tts = Config.init('tts', 'secrets', Dir)
Llm = Configuration.load('llm', 'secrets', Dir)
News = Configuration.load('news', 'secrets', Dir)
Archivist = Configuration.load('archivist', 'secrets', Dir)
Scrape = Configuration.load('scrape', 'secrets', Dir)
Serve = Configuration.load('serve', 'secrets', Dir)
Tts = Configuration.load('tts', 'secrets', Dir)
# Directories & general paths # Directories & general paths
ROUTER_DIR = BASE_DIR / "routers"
DATA_DIR = BASE_DIR / "data" DATA_DIR = BASE_DIR / "data"
os.makedirs(DATA_DIR, exist_ok=True) os.makedirs(DATA_DIR, exist_ok=True)
ALERTS_DIR = DATA_DIR / "alerts" ALERTS_DIR = DATA_DIR / "alerts"
@ -172,7 +169,7 @@ CF_IP = DATA_DIR / "cf_ip.txt" # to be deprecated soon
CF_DOMAINS_PATH = DATA_DIR / "cf_domains.json" # to be deprecated soon CF_DOMAINS_PATH = DATA_DIR / "cf_domains.json" # to be deprecated soon
# Caddy - not fully implemented # Caddy - not fully implemented
API.URL = os.getenv("API.URL") Sys.URL = os.getenv("Sys.URL")
CADDY_SERVER = os.getenv('CADDY_SERVER', None) CADDY_SERVER = os.getenv('CADDY_SERVER', None)
CADDYFILE_PATH = os.getenv("CADDYFILE_PATH", "") if CADDY_SERVER is not None else None CADDYFILE_PATH = os.getenv("CADDYFILE_PATH", "") if CADDY_SERVER is not None else None
CADDY_API_KEY = os.getenv("CADDY_API_KEY") CADDY_API_KEY = os.getenv("CADDY_API_KEY")

View file

@ -1,81 +1,86 @@
#!/Users/sij/miniforge3/envs/api/bin/python #!/Users/sij/miniforge3/envs/api/bin/python
#__main__.py #__main__.py
from contextlib import asynccontextmanager from contextlib import asynccontextmanager
from fastapi import FastAPI, Request, HTTPException, Response from fastapi import FastAPI, Request, HTTPException
from fastapi.responses import JSONResponse from fastapi.responses import JSONResponse
from fastapi.middleware.cors import CORSMiddleware from fastapi.middleware.cors import CORSMiddleware
from starlette.middleware.base import BaseHTTPMiddleware from starlette.middleware.base import BaseHTTPMiddleware
from starlette.requests import ClientDisconnect
from hypercorn.asyncio import serve from hypercorn.asyncio import serve
from hypercorn.config import Config as HypercornConfig from hypercorn.config import Config as HypercornConfig
import sys import sys
import os import os
import traceback import traceback
import asyncio import asyncio
import httpx
import argparse
import json
import ipaddress import ipaddress
import importlib import importlib
from dotenv import load_dotenv
from pathlib import Path from pathlib import Path
from datetime import datetime
import argparse import argparse
from . import L, API, Db, ROUTER_DIR from . import Sys, Db, Dir
from .logs import L, get_logger
parser = argparse.ArgumentParser(description='Personal API.') def parse_args():
parser.add_argument('--log', type=str, default='INFO', help='Set overall log level (e.g., DEBUG, INFO, WARNING)') parser = argparse.ArgumentParser(description='Personal API.')
parser.add_argument('--debug', nargs='+', default=[], help='Set DEBUG log level for specific modules') parser.add_argument('--log', type=str, default='INFO',
parser.add_argument('--test', type=str, help='Load only the specified module.') choices=['DEBUG', 'INFO', 'WARNING', 'ERROR', 'CRITICAL'],
args = parser.parse_args() help='Set overall log level (e.g., DEBUG, INFO, WARNING)')
parser.add_argument('--debug', nargs='+', default=[],
help='Set DEBUG log level for specific modules')
parser.add_argument('--info', nargs='+', default=[],
help='Set INFO log level for specific modules')
parser.add_argument('--test', type=str, help='Load only the specified module.')
return parser.parse_args()
args = parse_args()
# Setup logging
L.setup_from_args(args) L.setup_from_args(args)
print(f"Debug modules after setup: {L.debug_modules}") l = get_logger("main")
l.info(f"Logging initialized. Debug modules: {L.debug_modules}")
l.info(f"Command line arguments: {args}")
logger = L.get_module_logger("main") l.debug(f"Current working directory: {os.getcwd()}")
def debug(text: str): logger.debug(text) l.debug(f"__file__ path: {__file__}")
def info(text: str): logger.info(text) l.debug(f"Absolute path of __file__: {os.path.abspath(__file__)}")
def warn(text: str): logger.warning(text)
def err(text: str): logger.error(text)
def crit(text: str): logger.critical(text)
@asynccontextmanager @asynccontextmanager
async def lifespan(app: FastAPI): async def lifespan(app: FastAPI):
# Startup # Startup
crit("sijapi launched") l.critical("sijapi launched")
info(f"Arguments: {args}") l.info(f"Arguments: {args}")
# Log the router directory path
l.debug(f"Router directory path: {Dir.ROUTER.absolute()}")
l.debug(f"Router directory exists: {Dir.ROUTER.exists()}")
l.debug(f"Router directory is a directory: {Dir.ROUTER.is_dir()}")
l.debug(f"Contents of router directory: {list(Dir.ROUTER.iterdir())}")
# Load routers # Load routers
if args.test: if args.test:
load_router(args.test) load_router(args.test)
else: else:
for module_name in API.MODULES.__fields__: for module_name in Sys.MODULES.__fields__:
if getattr(API.MODULES, module_name): if getattr(Sys.MODULES, module_name):
load_router(module_name) load_router(module_name)
try: try:
# Initialize sync structures on all databases
# await API.initialize_sync()
await Db.initialize_engines() await Db.initialize_engines()
except Exception as e: except Exception as e:
crit(f"Error during startup: {str(e)}") l.critical(f"Error during startup: {str(e)}")
crit(f"Traceback: {traceback.format_exc()}") l.critical(f"Traceback: {traceback.format_exc()}")
try: try:
yield # This is where the app runs yield # This is where the app runs
finally: finally:
# Shutdown # Shutdown
crit("Shutting down...") l.critical("Shutting down...")
try: try:
await asyncio.wait_for(API.close_db_pools(), timeout=20) await asyncio.wait_for(Db.close(), timeout=20)
crit("Database pools closed.") l.critical("Database pools closed.")
except asyncio.TimeoutError: except asyncio.TimeoutError:
crit("Timeout while closing database pools.") l.critical("Timeout while closing database pools.")
except Exception as e: except Exception as e:
crit(f"Error during shutdown: {str(e)}") l.critical(f"Error during shutdown: {str(e)}")
crit(f"Traceback: {traceback.format_exc()}") l.critical(f"Traceback: {traceback.format_exc()}")
app = FastAPI(lifespan=lifespan) app = FastAPI(lifespan=lifespan)
@ -87,86 +92,83 @@ app.add_middleware(
allow_headers=['*'], allow_headers=['*'],
) )
class SimpleAPIKeyMiddleware(BaseHTTPMiddleware): class SimpleAPIKeyMiddleware(BaseHTTPMiddleware):
async def dispatch(self, request: Request, call_next): async def dispatch(self, request: Request, call_next):
client_ip = ipaddress.ip_address(request.client.host) client_ip = ipaddress.ip_address(request.client.host)
if request.method == "OPTIONS": if request.method == "OPTIONS":
# Allow CORS preflight requests # Allow CORS preflight requests
return JSONResponse(status_code=200) return JSONResponse(status_code=200)
if request.url.path not in API.PUBLIC: if request.url.path not in Sys.PUBLIC:
trusted_subnets = [ipaddress.ip_network(subnet) for subnet in API.TRUSTED_SUBNETS] trusted_subnets = [ipaddress.ip_network(subnet) for subnet in Sys.TRUSTED_SUBNETS]
if not any(client_ip in subnet for subnet in trusted_subnets): if not any(client_ip in subnet for subnet in trusted_subnets):
api_key_header = request.headers.get("Authorization") api_key_header = request.headers.get("Authorization")
api_key_query = request.query_params.get("api_key") api_key_query = request.query_params.get("api_key")
# Convert API.KEYS to lowercase for case-insensitive comparison # Convert Sys.KEYS to lowercase for case-insensitive comparison
api_keys_lower = [key.lower() for key in API.KEYS] api_keys_lower = [key.lower() for key in Sys.KEYS]
debug(f"API.KEYS (lowercase): {api_keys_lower}") l.debug(f"Sys.KEYS (lowercase): {api_keys_lower}")
if api_key_header: if api_key_header:
api_key_header = api_key_header.lower().split("bearer ")[-1] api_key_header = api_key_header.lower().split("bearer ")[-1]
debug(f"API key provided in header: {api_key_header}") l.debug(f"API key provided in header: {api_key_header}")
if api_key_query: if api_key_query:
api_key_query = api_key_query.lower() api_key_query = api_key_query.lower()
debug(f"API key provided in query: {api_key_query}") l.debug(f"API key provided in query: {api_key_query}")
if api_key_header.lower() not in api_keys_lower and api_key_query.lower() not in api_keys_lower: if (api_key_header is None or api_key_header.lower() not in api_keys_lower) and \
err(f"Invalid API key provided by a requester.") (api_key_query is None or api_key_query.lower() not in api_keys_lower):
l.error(f"Invalid API key provided by a requester.")
if api_key_header: if api_key_header:
debug(f"Invalid API key in header: {api_key_header}") l.debug(f"Invalid API key in header: {api_key_header}")
if api_key_query: if api_key_query:
debug(f"Invalid API key in query: {api_key_query}") l.debug(f"Invalid API key in query: {api_key_query}")
return JSONResponse( return JSONResponse(
status_code=401, status_code=401,
content={"detail": "Invalid or missing API key"} content={"detail": "Invalid or missing API key"}
) )
else: else:
if api_key_header.lower() in api_keys_lower: if api_key_header and api_key_header.lower() in api_keys_lower:
debug(f"Valid API key provided in header: {api_key_header}") l.debug(f"Valid API key provided in header: {api_key_header}")
if api_key_query and api_key_query.lower() in api_keys_lower: if api_key_query and api_key_query.lower() in api_keys_lower:
debug(f"Valid API key provided in query: {api_key_query}") l.debug(f"Valid API key provided in query: {api_key_query}")
response = await call_next(request) response = await call_next(request)
return response return response
# Add the middleware to your FastAPI app # Add the middleware to your FastAPI app
app.add_middleware(SimpleAPIKeyMiddleware) app.add_middleware(SimpleAPIKeyMiddleware)
@app.exception_handler(HTTPException) @app.exception_handler(HTTPException)
async def http_exception_handler(request: Request, exc: HTTPException): async def http_exception_handler(request: Request, exc: HTTPException):
err(f"HTTP Exception: {exc.status_code} - {exc.detail}") l.error(f"HTTP Exception: {exc.status_code} - {exc.detail}")
err(f"Request: {request.method} {request.url}") l.error(f"Request: {request.method} {request.url}")
return JSONResponse(status_code=exc.status_code, content={"detail": exc.detail}) return JSONResponse(status_code=exc.status_code, content={"detail": exc.detail})
@app.middleware("http") @app.middleware("http")
async def handle_exception_middleware(request: Request, call_next): async def handle_exception_middleware(request: Request, call_next):
try: try:
response = await call_next(request) response = await call_next(request)
return response return response
except Exception as exc: except Exception as exc:
err(f"Unhandled exception in request: {request.method} {request.url}") l.error(f"Unhandled exception in request: {request.method} {request.url}")
err(f"Exception: {str(exc)}") l.error(f"Exception: {str(exc)}")
err(f"Traceback: {traceback.format_exc()}") l.error(f"Traceback: {traceback.format_exc()}")
return JSONResponse( return JSONResponse(
status_code=500, status_code=500,
content={"detail": "Internal Server Error"} content={"detail": "Internal Server Error"}
) )
@app.post("/sync/pull") @app.post("/sync/pull")
async def pull_changes(): async def pull_changes():
info(f"Received request to /sync/pull") l.info(f"Received request to /sync/pull")
try: try:
await API.add_primary_keys_to_local_tables() await Sys.add_primary_keys_to_local_tables()
await API.add_primary_keys_to_remote_tables() await Sys.add_primary_keys_to_remote_tables()
try: try:
source = await API.get_most_recent_source() source = await Sys.get_most_recent_source()
if source: if source:
# Pull changes from the source # Pull changes from the source
total_changes = await API.pull_changes(source) total_changes = await Sys.pull_changes(source)
return JSONResponse(content={ return JSONResponse(content={
"status": "success", "status": "success",
@ -179,39 +181,48 @@ async def pull_changes():
"status": "info", "status": "info",
"message": "No instances with more recent data found or all instances are offline." "message": "No instances with more recent data found or all instances are offline."
}) })
except Exception as e: except Exception as e:
err(f"Error in /sync/pull: {str(e)}") l.error(f"Error in /sync/pull: {str(e)}")
err(f"Traceback: {traceback.format_exc()}") l.error(f"Traceback: {traceback.format_exc()}")
raise HTTPException(status_code=500, detail=f"Error during pull: {str(e)}") raise HTTPException(status_code=500, detail=f"Error during pull: {str(e)}")
finally: finally:
info(f"Finished processing /sync/pull request") l.info(f"Finished processing /sync/pull request")
except Exception as e: except Exception as e:
err(f"Error while ensuring primary keys to tables: {str(e)}") l.error(f"Error while ensuring primary keys to tables: {str(e)}")
err(f"Traceback: {traceback.format_exc()}") l.error(f"Traceback: {traceback.format_exc()}")
raise HTTPException(status_code=500, detail=f"Error during primary key insurance: {str(e)}") raise HTTPException(status_code=500, detail=f"Error during primary key insurance: {str(e)}")
def load_router(router_name): def load_router(router_name):
router_file = ROUTER_DIR / f'{router_name}.py' router_logger = get_logger(f"router.{router_name}")
module_logger = L.get_module_logger(router_name) router_logger.debug(f"Attempting to load {router_name.capitalize()}...")
module_logger.debug(f"Attempting to load {router_name.capitalize()}...")
# Log the full path being checked
router_file = Dir.ROUTER / f'{router_name}.py'
router_logger.debug(f"Checking for router file at: {router_file.absolute()}")
if router_file.exists(): if router_file.exists():
router_logger.debug(f"Router file found: {router_file}")
module_path = f'sijapi.routers.{router_name}' module_path = f'sijapi.routers.{router_name}'
router_logger.debug(f"Attempting to import module: {module_path}")
try: try:
module = importlib.import_module(module_path) module = importlib.import_module(module_path)
router_logger.debug(f"Module imported successfully: {module}")
router = getattr(module, router_name) router = getattr(module, router_name)
router_logger.debug(f"Router object retrieved: {router}")
app.include_router(router) app.include_router(router)
router_logger.info(f"Router {router_name} loaded successfully")
except (ImportError, AttributeError) as e: except (ImportError, AttributeError) as e:
module_logger.critical(f"Failed to load router {router_name}: {e}") router_logger.critical(f"Failed to load router {router_name}: {e}")
router_logger.debug(f"Current working directory: {os.getcwd()}")
router_logger.debug(f"Python path: {sys.path}")
else: else:
module_logger.error(f"Router file for {router_name} does not exist.") router_logger.error(f"Router file for {router_name} does not exist at {router_file.absolute()}")
router_logger.debug(f"Contents of router directory: {list(Dir.ROUTER.iterdir())}")
def main(argv): def main(argv):
config = HypercornConfig() config = HypercornConfig()
config.bind = [API.BIND] config.bind = [Sys.BIND]
config.startup_timeout = 300 # 5 minutes config.startup_timeout = 300 # 5 minutes
config.shutdown_timeout = 15 # 15 seconds config.shutdown_timeout = 15 # 15 seconds
asyncio.run(serve(app, config)) asyncio.run(serve(app, config))

File diff suppressed because it is too large Load diff

View file

@ -56,8 +56,8 @@
#─── notes: ────────────────────────────────────────────────────────────────────── #─── notes: ──────────────────────────────────────────────────────────────────────
# #
# HOST_NET† and HOST_PORT comprise HOST and determine the ip and port the server binds to. # HOST_NET† and HOST_PORT comprise HOST and determine the ip and port the server binds to.
# API.URL is used to assemble URLs, e.g. in the MS authentication flow and for serving images generated on the img router. # Sys.URL is used to assemble URLs, e.g. in the MS authentication flow and for serving images generated on the img router.
# API.URL should match the base URL used to access sijapi sans endpoint, e.g. http://localhost:4444 or https://api.sij.ai # Sys.URL should match the base URL used to access sijapi sans endpoint, e.g. http://localhost:4444 or https://api.sij.ai
# #
# † Take care here! Please ensure you understand the implications of setting HOST_NET to anything besides 127.0.0.1, and configure your firewall and router appropriately if you do. Setting HOST_NET to 0.0.0.0, for instance, opens sijapi to any device the server running it is accessible to — including potentially frightening internet randos (depending how your firewall, router, and NAT are configured). # † Take care here! Please ensure you understand the implications of setting HOST_NET to anything besides 127.0.0.1, and configure your firewall and router appropriately if you do. Setting HOST_NET to 0.0.0.0, for instance, opens sijapi to any device the server running it is accessible to — including potentially frightening internet randos (depending how your firewall, router, and NAT are configured).
# #
@ -159,7 +159,7 @@ UNLOADED=ig
# apps that together make SetApp an incredible value for macOS users!) # apps that together make SetApp an incredible value for macOS users!)
# #
# tts: designed for use with coqui — $ pip install coqui — and/or the # tts: designed for use with coqui — $ pip install coqui — and/or the
# ElevenLabs API. # ElevenLabs Sys.
# #
# weather: requires a VisualCrossing API key and is designed for (but doesn't # weather: requires a VisualCrossing API key and is designed for (but doesn't
# itself strictly require) Postgresql with the PostGIS extension; # itself strictly require) Postgresql with the PostGIS extension;

View file

@ -0,0 +1,69 @@
POOL:
- ts_id: 'server1'
ts_ip: '192.168.0.10'
app_port: 4444
db_port: 5432
db_name: mydb
db_user: dbuser
db_pass: 'password123'
ssh_port: 22
ssh_user: sshuser
ssh_pass: 'password456'
path: '/Users/sij/workshop/sijapi'
tmux: '/opt/homebrew/bin/tmux'
tailscale: '/usr/local/bin/tailscale'
conda: '/Users/sij/miniforge3/bin/mamba'
conda_env: '/Users/sij/miniforge3/envs/sijapi'
vitals: '/Users/sij/workshop/scripts/gitea/pathScripts/vitals'
vpn: '/Users/sij/workshop/scripts/gitea/pathScripts/vpn'
- ts_id: 'server2'
ts_ip: '192.168.0.11'
app_port: 4444
db_port: 5432
db_name: mydb
db_user: dbuser
db_pass: 'password123'
ssh_port: 22
ssh_user: sshuser
ssh_pass: 'password456'
path: '/Users/sij/workshop/sijapi'
tmux: '/opt/homebrew/bin/tmux'
tailscale: '/usr/local/bin/tailscale'
conda: '/Users/sij/miniforge3/bin/mamba'
conda_env: '/Users/sij/miniforge3/envs/sijapi'
vitals: '/Users/sij/workshop/scripts/gitea/pathScripts/vitals'
vpn: '/Users/sij/workshop/scripts/gitea/pathScripts/vpn'
- ts_id: 'server3'
ts_ip: '192.168.0.12'
app_port: 4444
db_port: 5432
db_name: mydb
db_user: dbuser
db_pass: 'password123'
ssh_port: 22
ssh_user: sshuser
ssh_pass: 'password456'
path: '/Users/sij/workshop/sijapi'
tmux: '/opt/homebrew/bin/tmux'
tailscale: '/usr/local/bin/tailscale'
conda: '/Users/sij/miniforge3/bin/mamba'
conda_env: '/Users/sij/miniforge3/envs/sijapi'
vitals: '/Users/sij/workshop/scripts/gitea/pathScripts/vitals'
vpn: '/Users/sij/workshop/scripts/gitea/pathScripts/vpn'
TABLES:
locations:
primary_key: id
use_guid: true
dailyweather:
primary_key: id
use_guid: true
hourlyweather:
primary_key: id
use_guid: true
click_logs:
primary_key: id
use_guid: true
short_urls:
primary_key: id
use_guid: true

View file

@ -0,0 +1,13 @@
custom_locations:
- name: Echo Valley Ranch
latitude: 42.8098216
longitude: -123.049396
radius: 2
layers:
- url: "https://gis.blm.gov/arcgis/rest/services/Cadastral/BLM_Natl_PLSS_CadNSDI/MapServer/1/query"
table_name: "public.plss_townships"
layer_name: "Townships"
- url: "https://gis.blm.gov/arcgis/rest/services/Cadastral/BLM_Natl_PLSS_CadNSDI/MapServer/2/query"
table_name: "public.plss_sections"
layer_name: "Sections"

View file

@ -1,4 +0,0 @@
- name: Echo Valley Ranch
latitude: 42.8098216
longitude: -123.049396
radius: 1.5

View file

@ -1,9 +1,10 @@
# config/sys.yaml
# Primary configuration file # Primary configuration file
HOST: "0.0.0.0"
HOST: '0.0.0.0'
PORT: 4444 PORT: 4444
BIND: '{{ HOST }}:{{ PORT }}' BIND: "{{ HOST }}:{{ PORT }}"
URL: 'https://api.example.com' URL: "https://api.sij.ai"
MAX_CPU_CORES: 7
PUBLIC: PUBLIC:
- /id - /id
@ -15,20 +16,21 @@ PUBLIC:
- /cd/alert - /cd/alert
TRUSTED_SUBNETS: TRUSTED_SUBNETS:
- 127.0.0.1/32 - "127.0.0.1/32"
- 10.13.37.0/24 - "10.13.37.0/24"
- 100.64.64.0/24
SUBNET_BROADCAST: "10.255.255.255"
MODULES: MODULES:
archivist: on archivist: off
asr: on asr: on
cal: on cal: off
cf: off cf: off
dist: off dist: off
email: on email: off
gis: on gis: on
ig: off ig: off
img: on img: off
llm: on llm: on
news: on news: on
note: on note: on
@ -36,77 +38,54 @@ MODULES:
scrape: on scrape: on
serve: on serve: on
sys: on sys: on
timing: on timing: off
tts: on tts: on
weather: on weather: on
POOL: LOGS:
- ts_id: 'server1' default: info
ts_ip: '192.168.0.10' init: debug
app_port: 4444 classes: debug
db_port: 5432 database: debug
db_name: mydb serialization: debug
db_user: dbuser utilities: debug
db_pass: 'password123' logs: debug
ssh_port: 22 main: debug
ssh_user: sshuser archivist: info
ssh_pass: 'password456' asr: info
path: '/Users/sij/workshop/sijapi' cal: info
tmux: '/opt/homebrew/bin/tmux' cf: info
tailscale: '/usr/local/bin/tailscale' dist: info
conda: '/Users/sij/miniforge3/bin/mamba' email: info
conda_env: '/Users/sij/miniforge3/envs/sijapi' gis: debug
vitals: '/Users/sij/workshop/scripts/gitea/pathScripts/vitals' ig: info
vpn: '/Users/sij/workshop/scripts/gitea/pathScripts/vpn' img: debug
- ts_id: 'server2' llm: debug
ts_ip: '192.168.0.11' news: debug
app_port: 4444 note: debug
db_port: 5432 rag: debug
db_name: mydb scrape: debug
db_user: dbuser serve: info
db_pass: 'password123' sys: debug
ssh_port: 22 timing: warn
ssh_user: sshuser tts: info
ssh_pass: 'password456' weather: info
path: '/Users/sij/workshop/sijapi'
tmux: '/opt/homebrew/bin/tmux'
tailscale: '/usr/local/bin/tailscale'
conda: '/Users/sij/miniforge3/bin/mamba'
conda_env: '/Users/sij/miniforge3/envs/sijapi'
vitals: '/Users/sij/workshop/scripts/gitea/pathScripts/vitals'
vpn: '/Users/sij/workshop/scripts/gitea/pathScripts/vpn'
- ts_id: 'server3'
ts_ip: '192.168.0.12'
app_port: 4444
db_port: 5432
db_name: mydb
db_user: dbuser
db_pass: 'password123'
ssh_port: 22
ssh_user: sshuser
ssh_pass: 'password456'
path: '/Users/sij/workshop/sijapi'
tmux: '/opt/homebrew/bin/tmux'
tailscale: '/usr/local/bin/tailscale'
conda: '/Users/sij/miniforge3/bin/mamba'
conda_env: '/Users/sij/miniforge3/envs/sijapi'
vitals: '/Users/sij/workshop/scripts/gitea/pathScripts/vitals'
vpn: '/Users/sij/workshop/scripts/gitea/pathScripts/vpn'
EXTENSIONS: EXTENSIONS:
pgp: on archivist: off
archivist: on
courtlistener: off courtlistener: off
elevenlabs: on
macnotify: on macnotify: on
pgp: on
shellfish: on shellfish: on
xtts: off
url_shortener: off url_shortener: off
TZ: 'America/Los_Angeles'
KEYS: KEYS:
- 'sk-YOUR_FIRST_API_KEY' - "sk-NhrtQwCHNdK5sRZC"
- 'sk-YOUR_SECOND_API_KEY' - "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"
- 'sk-YOUR_THIRD_API_KEY'
TZ: "America/Los_Angeles"
GARBAGE: GARBAGE:
COLLECTION_INTERVAL: 60 * 60 COLLECTION_INTERVAL: 60 * 60

324
sijapi/database.py Normal file
View file

@ -0,0 +1,324 @@
# database.py
import json
import yaml
import time
import aiohttp
import asyncio
from datetime import datetime as dt_datetime, date
from tqdm.asyncio import tqdm
import reverse_geocoder as rg
from pathlib import Path
from typing import Any, Dict, List, Optional, Tuple, Union, TypeVar, ClassVar
from dotenv import load_dotenv
from pydantic import BaseModel, Field, create_model, PrivateAttr
from concurrent.futures import ThreadPoolExecutor
from contextlib import asynccontextmanager
from datetime import datetime, timedelta, timezone
from zoneinfo import ZoneInfo
from srtm import get_data
import os
import sys
from loguru import logger
from sqlalchemy import text
from sqlalchemy.ext.asyncio import create_async_engine, AsyncSession
from sqlalchemy.orm import sessionmaker, declarative_base
from sqlalchemy.exc import OperationalError
from sqlalchemy import Column, Integer, String, DateTime, JSON, Text, select, func
from sqlalchemy.dialects.postgresql import JSONB
from urllib.parse import urljoin
import hashlib
import random
from .logs import get_logger
from .serialization import json_dumps, json_serial, serialize
l = get_logger(__name__)
Base = declarative_base()
BASE_DIR = Path(__file__).resolve().parent
CONFIG_DIR = BASE_DIR / "config"
ENV_PATH = CONFIG_DIR / ".env"
load_dotenv(ENV_PATH)
TS_ID = os.environ.get('TS_ID')
class QueryTracking(Base):
__tablename__ = 'query_tracking'
id = Column(Integer, primary_key=True)
ts_id = Column(String, nullable=False)
query = Column(Text, nullable=False)
args = Column(JSONB)
executed_at = Column(DateTime(timezone=True), server_default=func.now())
completed_by = Column(JSONB, default={})
result_checksum = Column(String)
class Database:
@classmethod
def init(cls, config_name: str):
return cls(config_name)
def __init__(self, config_path: str):
self.config = self.load_config(config_path)
self.engines: Dict[str, Any] = {}
self.sessions: Dict[str, Any] = {}
self.online_servers: set = set()
self.local_ts_id = self.get_local_ts_id()
def load_config(self, config_path: str) -> Dict[str, Any]:
base_path = Path(__file__).parent.parent
full_path = base_path / "sijapi" / "config" / f"{config_path}.yaml"
with open(full_path, 'r') as file:
config = yaml.safe_load(file)
return config
def get_local_ts_id(self) -> str:
return os.environ.get('TS_ID')
async def initialize_engines(self):
for db_info in self.config['POOL']:
url = f"postgresql+asyncpg://{db_info['db_user']}:{db_info['db_pass']}@{db_info['ts_ip']}:{db_info['db_port']}/{db_info['db_name']}"
try:
engine = create_async_engine(url, pool_pre_ping=True, pool_size=5, max_overflow=10)
self.engines[db_info['ts_id']] = engine
self.sessions[db_info['ts_id']] = sessionmaker(engine, class_=AsyncSession, expire_on_commit=False)
l.info(f"Initialized engine and session for {db_info['ts_id']}")
except Exception as e:
l.error(f"Failed to initialize engine for {db_info['ts_id']}: {str(e)}")
if self.local_ts_id not in self.sessions:
l.error(f"Failed to initialize session for local server {self.local_ts_id}")
else:
try:
# Create tables if they don't exist
async with self.engines[self.local_ts_id].begin() as conn:
await conn.run_sync(Base.metadata.create_all)
l.info(f"Initialized tables for local server {self.local_ts_id}")
except Exception as e:
l.error(f"Failed to create tables for local server {self.local_ts_id}: {str(e)}")
async def get_online_servers(self) -> List[str]:
online_servers = []
for ts_id, engine in self.engines.items():
try:
async with engine.connect() as conn:
await conn.execute(text("SELECT 1"))
online_servers.append(ts_id)
except OperationalError:
pass
self.online_servers = set(online_servers)
return online_servers
async def read(self, query: str, **kwargs):
if self.local_ts_id not in self.sessions:
l.error(f"No session found for local server {self.local_ts_id}. Database may not be properly initialized.")
return None
async with self.sessions[self.local_ts_id]() as session:
try:
result = await session.execute(text(query), kwargs)
# Convert the result to a list of dictionaries
rows = result.fetchall()
if rows:
columns = result.keys()
return [dict(zip(columns, row)) for row in rows]
else:
return []
except Exception as e:
l.error(f"Failed to execute read query: {str(e)}")
return None
async def write(self, query: str, **kwargs):
if self.local_ts_id not in self.sessions:
l.error(f"No session found for local server {self.local_ts_id}. Database may not be properly initialized.")
return None
async with self.sessions[self.local_ts_id]() as session:
try:
# Serialize the kwargs using
serialized_kwargs = {key: serialize(value) for key, value in kwargs.items()}
# Execute the write query
result = await session.execute(text(query), serialized_kwargs)
# Log the query (use json_dumps for logging purposes)
new_query = QueryTracking(
ts_id=self.local_ts_id,
query=query,
args=json_dumps(kwargs) # Use original kwargs for logging
)
session.add(new_query)
await session.flush()
query_id = new_query.id
await session.commit()
l.info(f"Successfully executed write query: {query[:50]}...")
checksum = await self._local_compute_checksum(query, serialized_kwargs)
# Update query_tracking with checksum
await self.update_query_checksum(query_id, checksum)
# Replicate to online servers
online_servers = await self.get_online_servers()
for ts_id in online_servers:
if ts_id != self.local_ts_id:
asyncio.create_task(self._replicate_write(ts_id, query_id, query, serialized_kwargs, checksum))
return result # Return the CursorResult
except Exception as e:
l.error(f"Failed to execute write query: {str(e)}")
l.error(f"Query: {query}")
l.error(f"Kwargs: {kwargs}")
l.error(f"Serialized kwargs: {serialized_kwargs}")
l.error(f"Traceback: {traceback.format_exc()}")
return None
async def get_primary_server(self) -> str:
url = urljoin(self.config['URL'], '/id')
async with aiohttp.ClientSession() as session:
try:
async with session.get(url) as response:
if response.status == 200:
primary_ts_id = await response.text()
return primary_ts_id.strip()
else:
l.error(f"Failed to get primary server. Status: {response.status}")
return None
except aiohttp.ClientError as e:
l.error(f"Error connecting to load balancer: {str(e)}")
return None
async def get_checksum_server(self) -> dict:
primary_ts_id = await self.get_primary_server()
online_servers = await self.get_online_servers()
checksum_servers = [server for server in self.config['POOL'] if server['ts_id'] in online_servers and server['ts_id'] != primary_ts_id]
if not checksum_servers:
return next(server for server in self.config['POOL'] if server['ts_id'] == primary_ts_id)
return random.choice(checksum_servers)
async def _local_compute_checksum(self, query: str, params: dict):
async with self.sessions[self.local_ts_id]() as session:
result = await session.execute(text(query), params)
if result.returns_rows:
data = result.fetchall()
else:
data = str(result.rowcount) + query + str(params)
checksum = hashlib.md5(str(data).encode()).hexdigest()
return checksum
async def _delegate_compute_checksum(self, server: Dict[str, Any], query: str, params: dict):
url = f"http://{server['ts_ip']}:{server['app_port']}/sync/checksum"
async with aiohttp.ClientSession() as session:
try:
async with session.post(url, json={"query": query, "params": params}) as response:
if response.status == 200:
result = await response.json()
return result['checksum']
else:
l.error(f"Failed to get checksum from {server['ts_id']}. Status: {response.status}")
return await self._local_compute_checksum(query, params)
except aiohttp.ClientError as e:
l.error(f"Error connecting to {server['ts_id']} for checksum: {str(e)}")
return await self._local_compute_checksum(query, params)
async def update_query_checksum(self, query_id: int, checksum: str):
async with self.sessions[self.local_ts_id]() as session:
await session.execute(
text("UPDATE query_tracking SET result_checksum = :checksum WHERE id = :id"),
{"checksum": checksum, "id": query_id}
)
await session.commit()
async def _replicate_write(self, ts_id: str, query_id: int, query: str, params: dict, expected_checksum: str):
try:
async with self.sessions[ts_id]() as session:
await session.execute(text(query), params)
actual_checksum = await self._local_compute_checksum(query, params)
if actual_checksum != expected_checksum:
raise ValueError(f"Checksum mismatch on {ts_id}")
await self.mark_query_completed(query_id, ts_id)
await session.commit()
l.info(f"Successfully replicated write to {ts_id}")
except Exception as e:
l.error(f"Failed to replicate write on {ts_id}: {str(e)}")
async def mark_query_completed(self, query_id: int, ts_id: str):
async with self.sessions[self.local_ts_id]() as session:
query = await session.get(QueryTracking, query_id)
if query:
completed_by = query.completed_by or {}
completed_by[ts_id] = True
query.completed_by = completed_by
await session.commit()
async def sync_local_server(self):
async with self.sessions[self.local_ts_id]() as session:
last_synced = await session.execute(
text("SELECT MAX(id) FROM query_tracking WHERE completed_by ? :ts_id"),
{"ts_id": self.local_ts_id}
)
last_synced_id = last_synced.scalar() or 0
unexecuted_queries = await session.execute(
text("SELECT * FROM query_tracking WHERE id > :last_id ORDER BY id"),
{"last_id": last_synced_id}
)
for query in unexecuted_queries:
try:
params = json.loads(query.args)
await session.execute(text(query.query), params)
actual_checksum = await self._local_compute_checksum(query.query, params)
if actual_checksum != query.result_checksum:
raise ValueError(f"Checksum mismatch for query ID {query.id}")
await self.mark_query_completed(query.id, self.local_ts_id)
except Exception as e:
l.error(f"Failed to execute query ID {query.id} during local sync: {str(e)}")
await session.commit()
l.info(f"Local server sync completed. Executed {unexecuted_queries.rowcount} queries.")
async def purge_completed_queries(self):
async with self.sessions[self.local_ts_id]() as session:
all_ts_ids = [db['ts_id'] for db in self.config['POOL']]
result = await session.execute(
text("""
DELETE FROM query_tracking
WHERE id <= (
SELECT MAX(id)
FROM query_tracking
WHERE completed_by ?& :ts_ids
)
"""),
{"ts_ids": all_ts_ids}
)
await session.commit()
deleted_count = result.rowcount
l.info(f"Purged {deleted_count} completed queries.")
async def close(self):
for engine in self.engines.values():
await engine.dispose()

237
sijapi/helpers/CaPLSS.py Normal file
View file

@ -0,0 +1,237 @@
#!/usr/bin/env python3
import requests
import json
import time
import os
import subprocess
import sys
from requests.adapters import HTTPAdapter
from urllib3.util.retry import Retry
from datetime import datetime
# Environment variables for database connection
DB_NAME = os.getenv('DB_NAME', 'sij')
DB_USER = os.getenv('DB_USER', 'sij')
DB_PASSWORD = os.getenv('DB_PASSWORD', 'Synchr0!')
DB_HOST = os.getenv('DB_HOST', 'localhost')
DB_PORT = os.getenv('DB_PORT', '5432')
def get_feature_count(url):
params = {
'where': '1=1',
'returnCountOnly': 'true',
'f': 'json'
}
retries = Retry(total=10, backoff_factor=0.5, status_forcelist=[500, 502, 503, 504])
with requests.Session() as session:
session.mount("https://", HTTPAdapter(max_retries=retries))
response = session.get(url, params=params, timeout=30)
response.raise_for_status()
data = response.json()
return data.get('count', 0)
def fetch_features(url, offset, num, max_retries=5):
params = {
'where': '1=1',
'outFields': '*',
'geometryPrecision': 6,
'outSR': 4326,
'f': 'json',
'resultOffset': offset,
'resultRecordCount': num
}
for attempt in range(max_retries):
try:
retries = Retry(total=5, backoff_factor=1, status_forcelist=[500, 502, 503, 504])
with requests.Session() as session:
session.mount("https://", HTTPAdapter(max_retries=retries))
response = session.get(url, params=params, timeout=30)
response.raise_for_status()
return response.json()
except requests.exceptions.RequestException as e:
print(f"Error fetching features (attempt {attempt + 1}/{max_retries}): {e}")
if attempt == max_retries - 1:
raise
time.sleep(5 * (attempt + 1)) # Exponential backoff
def download_layer(layer_num, layer_name):
base_dir = os.path.expanduser('~/data')
os.makedirs(base_dir, exist_ok=True)
file_path = os.path.join(base_dir, f'PLSS_{layer_name}.geojson')
temp_file_path = os.path.join(base_dir, f'PLSS_{layer_name}_temp.json')
url = f"https://gis.blm.gov/arcgis/rest/services/Cadastral/BLM_Natl_PLSS_CadNSDI/MapServer/{layer_num}/query"
total_count = get_feature_count(url)
print(f"Total {layer_name} features: {total_count}")
batch_size = 1000
chunk_size = 10000 # Write to file every 10,000 features
offset = 0
all_features = []
# Check if temporary file exists and load its content
if os.path.exists(temp_file_path):
try:
with open(temp_file_path, 'r') as f:
all_features = json.load(f)
offset = len(all_features)
print(f"Resuming download from offset {offset}")
except json.JSONDecodeError:
print("Error reading temporary file. Starting download from the beginning.")
offset = 0
all_features = []
try:
while offset < total_count:
print(f"Fetching {layer_name} features {offset} to {offset + batch_size}...")
data = fetch_features(url, offset, batch_size)
new_features = data.get('features', [])
if not new_features:
break
all_features.extend(new_features)
offset += len(new_features)
# Progress indicator
progress = offset / total_count
bar_length = 30
filled_length = int(bar_length * progress)
bar = '=' * filled_length + '-' * (bar_length - filled_length)
print(f'\rProgress: [{bar}] {progress:.1%} ({offset}/{total_count} features)', end='', flush=True)
# Save progress to temporary file every chunk_size features
if len(all_features) % chunk_size == 0:
with open(temp_file_path, 'w') as f:
json.dump(all_features, f)
time.sleep(1)
print(f"\nTotal {layer_name} features fetched: {len(all_features)}")
# Write final GeoJSON file
with open(file_path, 'w') as f:
f.write('{"type": "FeatureCollection", "features": [\n')
for i, feature in enumerate(all_features):
geojson_feature = {
"type": "Feature",
"properties": feature['attributes'],
"geometry": feature['geometry']
}
json.dump(geojson_feature, f)
if i < len(all_features) - 1:
f.write(',\n')
f.write('\n]}')
print(f"GeoJSON file saved as '{file_path}'")
# Remove temporary file
if os.path.exists(temp_file_path):
os.remove(temp_file_path)
return file_path
except Exception as e:
print(f"\nError during download: {e}")
print(f"Partial data saved in {temp_file_path}")
return None
def check_postgres_connection():
try:
subprocess.run(['psql', '-h', DB_HOST, '-p', DB_PORT, '-U', DB_USER, '-d', DB_NAME, '-c', 'SELECT 1;'],
check=True, capture_output=True, text=True)
return True
except subprocess.CalledProcessError:
return False
def check_postgis_extension():
try:
result = subprocess.run(['psql', '-h', DB_HOST, '-p', DB_PORT, '-U', DB_USER, '-d', DB_NAME,
'-c', "SELECT 1 FROM pg_extension WHERE extname = 'postgis';"],
check=True, capture_output=True, text=True)
return '1' in result.stdout
except subprocess.CalledProcessError:
return False
def create_postgis_extension():
try:
subprocess.run(['psql', '-h', DB_HOST, '-p', DB_PORT, '-U', DB_USER, '-d', DB_NAME,
'-c', "CREATE EXTENSION IF NOT EXISTS postgis;"],
check=True, capture_output=True, text=True)
print("PostGIS extension created successfully.")
except subprocess.CalledProcessError as e:
print(f"Error creating PostGIS extension: {e}")
sys.exit(1)
def import_to_postgis(file_path, table_name):
if not check_postgres_connection():
print("Error: Unable to connect to PostgreSQL. Please check your connection settings.")
sys.exit(1)
if not check_postgis_extension():
print("PostGIS extension not found. Attempting to create it...")
create_postgis_extension()
ogr2ogr_command = [
'ogr2ogr',
'-f', 'PostgreSQL',
f'PG:dbname={DB_NAME} user={DB_USER} password={DB_PASSWORD} host={DB_HOST} port={DB_PORT}',
file_path,
'-nln', table_name,
'-overwrite'
]
try:
subprocess.run(ogr2ogr_command, check=True, capture_output=True, text=True)
print(f"Data successfully imported into PostGIS table: {table_name}")
except subprocess.CalledProcessError as e:
print(f"Error importing data into PostGIS: {e}")
print(f"Command that failed: {e.cmd}")
print(f"Error output: {e.stderr}")
def check_ogr2ogr():
try:
subprocess.run(['ogr2ogr', '--version'], check=True, capture_output=True, text=True)
return True
except subprocess.CalledProcessError:
return False
except FileNotFoundError:
return False
def main():
if not check_ogr2ogr():
print("Error: ogr2ogr not found. Please install GDAL/OGR tools.")
print("On Debian: sudo apt-get install gdal-bin")
print("On macOS with Homebrew: brew install gdal")
sys.exit(1)
try:
township_file = os.path.expanduser('~/data/PLSS_Townships.geojson')
if not os.path.exists(township_file):
township_file = download_layer(1, "Townships")
if township_file:
import_to_postgis(township_file, "public.plss_townships")
else:
print("Failed to download Townships data. Skipping import.")
section_file = os.path.expanduser('~/data/PLSS_Sections.geojson')
if not os.path.exists(section_file):
section_file = download_layer(2, "Sections")
if section_file:
import_to_postgis(section_file, "public.plss_sections")
else:
print("Failed to download Sections data. Skipping import.")
except requests.exceptions.RequestException as e:
print(f"Error fetching data: {e}")
except Exception as e:
print(f"An unexpected error occurred: {e}")
if __name__ == "__main__":
main()

View file

@ -1,8 +1,12 @@
# CaPLSS_downloader_and_importer.py
import requests import requests
import json import json
import time import time
import os import os
import subprocess import subprocess
import requests
from requests.adapters import HTTPAdapter
from urllib3.util.retry import Retry
def get_feature_count(url): def get_feature_count(url):
params = { params = {
@ -10,11 +14,17 @@ def get_feature_count(url):
'returnCountOnly': 'true', 'returnCountOnly': 'true',
'f': 'json' 'f': 'json'
} }
response = requests.get(url, params=params) retries = Retry(total=10, backoff_factor=0.5)
adapter = HTTPAdapter(max_retries=retries)
session = requests.Session()
session.mount("https://", adapter)
response = session.get(url, params=params, timeout=15) # Add timeout parameter
response.raise_for_status() response.raise_for_status()
data = response.json() data = response.json()
return data.get('count', 0) return data.get('count', 0)
def fetch_features(url, offset, num): def fetch_features(url, offset, num):
params = { params = {
'where': '1=1', 'where': '1=1',
@ -70,8 +80,14 @@ def download_layer(layer_num, layer_name):
"features": geojson_features "features": geojson_features
} }
# Define a base directory that exists on both macOS and Debian
base_dir = os.path.expanduser('~/data')
os.makedirs(base_dir, exist_ok=True) # Create the directory if it doesn't exist
# Use os.path.join to construct the file path
file_path = os.path.join(base_dir, f'PLSS_{layer_name}.geojson')
# Save to file # Save to file
file_path = f'/Users/sij/workshop/sijapi/sijapi/data/PLSS_{layer_name}.geojson'
with open(file_path, 'w') as f: with open(file_path, 'w') as f:
json.dump(full_geojson, f) json.dump(full_geojson, f)

View file

@ -0,0 +1 @@
{"offset": 50000}

71
sijapi/helpers/db.py Normal file
View file

@ -0,0 +1,71 @@
import asyncio
import asyncpg
import yaml
from pathlib import Path
async def load_config():
config_path = Path(__file__).parent.parent / 'config' / 'db.yaml'
with open(config_path, 'r') as file:
return yaml.safe_load(file)
async def add_foreign_key_constraint(conn):
# Ensure short_code is not null in both tables
await conn.execute("""
ALTER TABLE short_urls
ALTER COLUMN short_code SET NOT NULL;
""")
await conn.execute("""
ALTER TABLE click_logs
ALTER COLUMN short_code SET NOT NULL;
""")
# Add unique constraint to short_urls.short_code if it doesn't exist
await conn.execute("""
DO $$
BEGIN
IF NOT EXISTS (
SELECT 1
FROM pg_constraint
WHERE conname = 'short_urls_short_code_key'
) THEN
ALTER TABLE short_urls
ADD CONSTRAINT short_urls_short_code_key UNIQUE (short_code);
END IF;
END $$;
""")
# Add foreign key constraint
await conn.execute("""
ALTER TABLE click_logs
ADD CONSTRAINT fk_click_logs_short_urls
FOREIGN KEY (short_code)
REFERENCES short_urls(short_code)
ON DELETE CASCADE;
""")
print("Foreign key constraint added successfully.")
async def main():
config = await load_config()
source_server = config['POOL'][0] # sij-mbp16
conn_params = {
'database': source_server['db_name'],
'user': source_server['db_user'],
'password': source_server['db_pass'],
'host': source_server['ts_ip'],
'port': source_server['db_port']
}
conn = await asyncpg.connect(**conn_params)
try:
await add_foreign_key_constraint(conn)
except Exception as e:
print(f"An error occurred: {str(e)}")
finally:
await conn.close()
if __name__ == "__main__":
asyncio.run(main())

View file

@ -0,0 +1,89 @@
import psycopg2
from psycopg2 import sql
def connect_to_db():
return psycopg2.connect(
dbname='sij',
user='sij',
password='Synchr0!',
host='localhost' # Adjust if your database is not on localhost
)
def get_table_info(conn):
with conn.cursor() as cur:
# Get all tables in the public schema
cur.execute("""
SELECT table_name
FROM information_schema.tables
WHERE table_schema = 'public'
""")
tables = cur.fetchall()
table_info = {}
for (table_name,) in tables:
table_info[table_name] = {
'primary_keys': get_primary_keys(cur, table_name),
'foreign_keys': get_foreign_keys(cur, table_name)
}
return table_info
def get_primary_keys(cur, table_name):
cur.execute("""
SELECT a.attname
FROM pg_index i
JOIN pg_attribute a ON a.attrelid = i.indrelid
AND a.attnum = ANY(i.indkey)
WHERE i.indrelid = %s::regclass
AND i.indisprimary
""", (table_name,))
return [row[0] for row in cur.fetchall()]
def get_foreign_keys(cur, table_name):
cur.execute("""
SELECT
tc.constraint_name,
kcu.column_name,
ccu.table_name AS foreign_table_name,
ccu.column_name AS foreign_column_name
FROM
information_schema.table_constraints AS tc
JOIN information_schema.key_column_usage AS kcu
ON tc.constraint_name = kcu.constraint_name
AND tc.table_schema = kcu.table_schema
JOIN information_schema.constraint_column_usage AS ccu
ON ccu.constraint_name = tc.constraint_name
AND ccu.table_schema = tc.table_schema
WHERE tc.constraint_type = 'FOREIGN KEY' AND tc.table_name=%s
""", (table_name,))
return cur.fetchall()
def main():
try:
with connect_to_db() as conn:
table_info = get_table_info(conn)
for table_name, info in table_info.items():
print(f"\n## Table: {table_name}")
print("\nPrimary Keys:")
if info['primary_keys']:
for pk in info['primary_keys']:
print(f"- {pk}")
else:
print("- No primary keys found")
print("\nForeign Keys:")
if info['foreign_keys']:
for fk in info['foreign_keys']:
print(f"- {fk[1]} -> {fk[2]}.{fk[3]} (Constraint: {fk[0]})")
else:
print("- No foreign keys found")
except psycopg2.Error as e:
print(f"Database error: {e}")
except Exception as e:
print(f"An unexpected error occurred: {e}")
if __name__ == "__main__":
main()

83
sijapi/helpers/db_repl.py Normal file
View file

@ -0,0 +1,83 @@
import yaml
import subprocess
import os
import sys
def load_config():
with open('../config/sys.yaml', 'r') as file:
return yaml.safe_load(file)
def run_command(command):
process = subprocess.Popen(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=True)
stdout, stderr = process.communicate()
return process.returncode, stdout.decode(), stderr.decode()
def pg_dump(host, port, db_name, user, password, tables):
dump_command = f"PGPASSWORD={password} pg_dump -h {host} -p {port} -U {user} -d {db_name} -t {' -t '.join(tables)} -c --no-owner"
return run_command(dump_command)
def pg_restore(host, port, db_name, user, password, dump_data):
restore_command = f"PGPASSWORD={password} psql -h {host} -p {port} -U {user} -d {db_name}"
process = subprocess.Popen(restore_command, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=True)
stdout, stderr = process.communicate(input=dump_data.encode())
return process.returncode, stdout.decode(), stderr.decode()
def check_postgres_version(host, port, user, password):
version_command = f"PGPASSWORD={password} psql -h {host} -p {port} -U {user} -c 'SELECT version();'"
returncode, stdout, stderr = run_command(version_command)
if returncode == 0:
return stdout.strip()
else:
return f"Error checking version: {stderr}"
def replicate_databases():
config = load_config()
pool = config['POOL']
tables_to_replicate = ['click_logs', 'dailyweather', 'hourlyweather', 'locations', 'short_urls']
source_db = pool[0]
target_dbs = pool[1:]
# Check source database version
source_version = check_postgres_version(source_db['ts_ip'], source_db['db_port'], source_db['db_user'], source_db['db_pass'])
print(f"Source database version: {source_version}")
for target_db in target_dbs:
print(f"\nReplicating to {target_db['ts_id']}...")
# Check target database version
target_version = check_postgres_version(target_db['ts_ip'], target_db['db_port'], target_db['db_user'], target_db['db_pass'])
print(f"Target database version: {target_version}")
# Perform dump
returncode, dump_data, stderr = pg_dump(
source_db['ts_ip'],
source_db['db_port'],
source_db['db_name'],
source_db['db_user'],
source_db['db_pass'],
tables_to_replicate
)
if returncode != 0:
print(f"Error during dump: {stderr}")
continue
# Perform restore
returncode, stdout, stderr = pg_restore(
target_db['ts_ip'],
target_db['db_port'],
target_db['db_name'],
target_db['db_user'],
target_db['db_pass'],
dump_data
)
if returncode != 0:
print(f"Error during restore: {stderr}")
else:
print(f"Replication to {target_db['ts_id']} completed successfully.")
if __name__ == "__main__":
replicate_databases()

76
sijapi/helpers/db_replicator.py Executable file
View file

@ -0,0 +1,76 @@
#!/usr/bin/env python3
import os
import yaml
import subprocess
def load_config():
script_dir = os.path.dirname(os.path.abspath(__file__))
sys_config_path = os.path.join(script_dir, '..', 'config', 'sys.yaml')
gis_config_path = os.path.join(script_dir, '..', 'config', 'gis.yaml')
with open(sys_config_path, 'r') as f:
sys_config = yaml.safe_load(f)
with open(gis_config_path, 'r') as f:
gis_config = yaml.safe_load(f)
return sys_config, gis_config
def replicate_table(source, targets, table_name):
print(f"Replicating {table_name}")
# Dump the table from the source
dump_command = [
'pg_dump',
'-h', source['ts_ip'],
'-p', str(source['db_port']),
'-U', source['db_user'],
'-d', source['db_name'],
'-t', table_name,
'--no-owner',
'--no-acl'
]
env = os.environ.copy()
env['PGPASSWORD'] = source['db_pass']
with open(f"{table_name}.sql", 'w') as f:
subprocess.run(dump_command, env=env, stdout=f, check=True)
# Restore the table to each target
for target in targets:
print(f"Replicating to {target['ts_id']}")
restore_command = [
'psql',
'-h', target['ts_ip'],
'-p', str(target['db_port']),
'-U', target['db_user'],
'-d', target['db_name'],
'-c', f"DROP TABLE IF EXISTS {table_name} CASCADE;",
'-f', f"{table_name}.sql"
]
env = os.environ.copy()
env['PGPASSWORD'] = target['db_pass']
subprocess.run(restore_command, env=env, check=True)
# Clean up the dump file
os.remove(f"{table_name}.sql")
def main():
sys_config, gis_config = load_config()
source_server = sys_config['POOL'][0]
target_servers = sys_config['POOL'][1:]
tables = [layer['table_name'] for layer in gis_config['layers']]
for table in tables:
replicate_table(source_server, target_servers, table)
print("Replication complete!")
if __name__ == "__main__":
main()

42510
sijapi/helpers/locations.sql Normal file

File diff suppressed because it is too large Load diff

View file

@ -1,8 +1,12 @@
import asyncio import asyncio
from pathlib import Path from pathlib import Path
from sijapi import L, EMAIL_CONFIG, EMAIL_LOGS from sijapi import EMAIL_CONFIG, EMAIL_LOGS
from sijapi.classes import EmailAccount from sijapi.utilities import EmailAccount
from sijapi.routers import email from sijapi.routers import email
from sijapi.logs import get_logger
l = get_logger(__name__)
async def initialize_log_files(): async def initialize_log_files():
summarized_log = EMAIL_LOGS / "summarized.txt" summarized_log = EMAIL_LOGS / "summarized.txt"
@ -11,13 +15,13 @@ async def initialize_log_files():
for log_file in [summarized_log, autoresponded_log, diagnostic_log]: for log_file in [summarized_log, autoresponded_log, diagnostic_log]:
log_file.parent.mkdir(parents=True, exist_ok=True) log_file.parent.mkdir(parents=True, exist_ok=True)
log_file.write_text("") log_file.write_text("")
L.DEBUG(f"Log files initialized: {summarized_log}, {autoresponded_log}, {diagnostic_log}") l.debug(f"Log files initialized: {summarized_log}, {autoresponded_log}, {diagnostic_log}")
return summarized_log, autoresponded_log, diagnostic_log return summarized_log, autoresponded_log, diagnostic_log
async def process_all_emails(account: EmailAccount, summarized_log: Path, autoresponded_log: Path, diagnostic_log: Path): async def process_all_emails(account: EmailAccount, summarized_log: Path, autoresponded_log: Path, diagnostic_log: Path):
try: try:
with email.get_imap_connection(account) as inbox: with email.get_imap_connection(account) as inbox:
L.DEBUG(f"Connected to {account.name}, processing all emails...") l.debug(f"Connected to {account.name}, processing all emails...")
all_messages = inbox.messages() all_messages = inbox.messages()
unread_messages = set(uid for uid, _ in inbox.messages(unread=True)) unread_messages = set(uid for uid, _ in inbox.messages(unread=True))
@ -41,15 +45,15 @@ async def process_all_emails(account: EmailAccount, summarized_log: Path, autore
with open(log_file, 'a') as f: with open(log_file, 'a') as f:
f.write(f"{id_str}\n") f.write(f"{id_str}\n")
L.INFO(f"Processed {processed_count} non-unread emails for account {account.name}") l.info(f"Processed {processed_count} non-unread emails for account {account.name}")
except Exception as e: except Exception as e:
L.logger.error(f"An error occurred while processing emails for account {account.name}: {e}") l.logger.error(f"An error occurred while processing emails for account {account.name}: {e}")
async def main(): async def main():
email_accounts = email.load_email_accounts(EMAIL_CONFIG) email_accounts = email.load_email_accounts(EMAIL_CONFIG)
summarized_log, autoresponded_log, diagnostic_log = await initialize_log_files() summarized_log, autoresponded_log, diagnostic_log = await initialize_log_files()
L.DEBUG(f"Processing {len(email_accounts)} email accounts") l.debug(f"Processing {len(email_accounts)} email accounts")
tasks = [process_all_emails(account, summarized_log, autoresponded_log, diagnostic_log) for account in email_accounts] tasks = [process_all_emails(account, summarized_log, autoresponded_log, diagnostic_log) for account in email_accounts]
await asyncio.gather(*tasks) await asyncio.gather(*tasks)
@ -57,7 +61,7 @@ async def main():
# Final verification # Final verification
with open(summarized_log, 'r') as f: with open(summarized_log, 'r') as f:
final_count = len(f.readlines()) final_count = len(f.readlines())
L.INFO(f"Final non-unread email count: {final_count}") l.info(f"Final non-unread email count: {final_count}")
if __name__ == "__main__": if __name__ == "__main__":
asyncio.run(main()) asyncio.run(main())

View file

@ -0,0 +1,191 @@
import psycopg2
from psycopg2 import sql
import sys
def connect_to_db():
return psycopg2.connect(
dbname='sij',
user='sij',
password='Synchr0!',
host='localhost'
)
def get_tables(cur):
cur.execute("""
SELECT table_name
FROM information_schema.tables
WHERE table_schema = 'public' AND table_type = 'BASE TABLE'
AND table_name NOT LIKE '%_uuid' AND table_name NOT LIKE '%_orig'
AND table_name != 'spatial_ref_sys'
""")
return [row[0] for row in cur.fetchall()]
def get_columns(cur, table_name):
cur.execute("""
SELECT column_name, udt_name,
is_nullable, column_default,
character_maximum_length, numeric_precision, numeric_scale
FROM information_schema.columns
WHERE table_name = %s
ORDER BY ordinal_position
""", (table_name,))
return cur.fetchall()
def get_constraints(cur, table_name):
cur.execute("""
SELECT conname, contype, pg_get_constraintdef(c.oid)
FROM pg_constraint c
JOIN pg_namespace n ON n.oid = c.connamespace
WHERE conrelid = %s::regclass
AND n.nspname = 'public'
""", (table_name,))
return cur.fetchall()
def drop_table_if_exists(cur, table_name):
cur.execute(sql.SQL("DROP TABLE IF EXISTS {} CASCADE").format(sql.Identifier(table_name)))
def create_uuid_table(cur, old_table, new_table):
drop_table_if_exists(cur, new_table)
columns = get_columns(cur, old_table)
constraints = get_constraints(cur, old_table)
column_defs = []
has_id_column = any(col[0] == 'id' for col in columns)
for col in columns:
col_name, udt_name, is_nullable, default, max_length, precision, scale = col
if col_name == 'id' and has_id_column:
column_defs.append(sql.SQL("{} UUID PRIMARY KEY DEFAULT gen_random_uuid()").format(sql.Identifier(col_name)))
else:
type_sql = sql.SQL("{}").format(sql.Identifier(udt_name))
if max_length:
type_sql = sql.SQL("{}({})").format(type_sql, sql.Literal(max_length))
elif precision and scale:
type_sql = sql.SQL("{}({},{})").format(type_sql, sql.Literal(precision), sql.Literal(scale))
column_def = sql.SQL("{} {}").format(sql.Identifier(col_name), type_sql)
if is_nullable == 'NO':
column_def = sql.SQL("{} NOT NULL").format(column_def)
if default and 'nextval' not in default: # Skip auto-increment defaults
column_def = sql.SQL("{} DEFAULT {}").format(column_def, sql.SQL(default))
column_defs.append(column_def)
constraint_defs = []
for constraint in constraints:
conname, contype, condef = constraint
if contype != 'p' or not has_id_column: # Keep primary key if there's no id column
constraint_defs.append(sql.SQL(condef))
if not has_id_column:
column_defs.append(sql.SQL("uuid UUID DEFAULT gen_random_uuid()"))
query = sql.SQL("CREATE TABLE {} ({})").format(
sql.Identifier(new_table),
sql.SQL(", ").join(column_defs + constraint_defs)
)
cur.execute(query)
def migrate_data(cur, old_table, new_table):
columns = get_columns(cur, old_table)
column_names = [col[0] for col in columns]
has_id_column = 'id' in column_names
if has_id_column:
column_names.remove('id')
old_cols = sql.SQL(", ").join(map(sql.Identifier, column_names))
new_cols = sql.SQL(", ").join(map(sql.Identifier, ['id'] + column_names))
query = sql.SQL("INSERT INTO {} ({}) SELECT gen_random_uuid(), {} FROM {}").format(
sql.Identifier(new_table),
new_cols,
old_cols,
sql.Identifier(old_table)
)
else:
old_cols = sql.SQL(", ").join(map(sql.Identifier, column_names))
new_cols = sql.SQL(", ").join(map(sql.Identifier, column_names + ['uuid']))
query = sql.SQL("INSERT INTO {} ({}) SELECT {}, gen_random_uuid() FROM {}").format(
sql.Identifier(new_table),
new_cols,
old_cols,
sql.Identifier(old_table)
)
cur.execute(query)
def update_foreign_keys(cur, tables):
for table in tables:
constraints = get_constraints(cur, table)
for constraint in constraints:
conname, contype, condef = constraint
if contype == 'f': # Foreign key constraint
referenced_table = condef.split('REFERENCES ')[1].split('(')[0].strip()
referenced_column = condef.split('(')[2].split(')')[0].strip()
local_column = condef.split('(')[1].split(')')[0].strip()
cur.execute(sql.SQL("""
UPDATE {table_uuid}
SET {local_column} = subquery.new_id::text::{local_column_type}
FROM (
SELECT old.{ref_column} AS old_id, new_table.id AS new_id
FROM {ref_table} old
JOIN public.{ref_table_uuid} new_table ON new_table.{ref_column}::text = old.{ref_column}::text
) AS subquery
WHERE {local_column}::text = subquery.old_id::text
""").format(
table_uuid=sql.Identifier(f"{table}_uuid"),
local_column=sql.Identifier(local_column),
local_column_type=sql.SQL(get_column_type(cur, f"{table}_uuid", local_column)),
ref_column=sql.Identifier(referenced_column),
ref_table=sql.Identifier(referenced_table),
ref_table_uuid=sql.Identifier(f"{referenced_table}_uuid")
))
def get_column_type(cur, table_name, column_name):
cur.execute("""
SELECT data_type
FROM information_schema.columns
WHERE table_name = %s AND column_name = %s
""", (table_name, column_name))
return cur.fetchone()[0]
def rename_tables(cur, tables):
for table in tables:
drop_table_if_exists(cur, f"{table}_orig")
cur.execute(sql.SQL("ALTER TABLE IF EXISTS {} RENAME TO {}").format(
sql.Identifier(table), sql.Identifier(f"{table}_orig")
))
cur.execute(sql.SQL("ALTER TABLE IF EXISTS {} RENAME TO {}").format(
sql.Identifier(f"{table}_uuid"), sql.Identifier(table)
))
def main():
try:
with connect_to_db() as conn:
with conn.cursor() as cur:
tables = get_tables(cur)
# Create new UUID tables
for table in tables:
print(f"Creating UUID table for {table}...")
create_uuid_table(cur, table, f"{table}_uuid")
# Migrate data
for table in tables:
print(f"Migrating data for {table}...")
migrate_data(cur, table, f"{table}_uuid")
# Update foreign keys
print("Updating foreign key references...")
update_foreign_keys(cur, tables)
# Rename tables
print("Renaming tables...")
rename_tables(cur, tables)
conn.commit()
print("Migration completed successfully.")
except Exception as e:
print(f"An error occurred: {e}")
conn.rollback()
if __name__ == "__main__":
main()

366
sijapi/helpers/plss.py Normal file
View file

@ -0,0 +1,366 @@
#!/usr/bin/env python3
import requests
import json
import time
import os
import subprocess
import sys
import yaml
from requests.adapters import HTTPAdapter
from urllib3.util.retry import Retry
import argparse
import psycopg2
from psycopg2.extras import execute_values
def load_config():
script_dir = os.path.dirname(os.path.abspath(__file__))
sys_config_path = os.path.join(script_dir, '..', 'config', 'sys.yaml')
gis_config_path = os.path.join(script_dir, '..', 'config', 'gis.yaml')
with open(sys_config_path, 'r') as f:
sys_config = yaml.safe_load(f)
with open(gis_config_path, 'r') as f:
gis_config = yaml.safe_load(f)
return sys_config, gis_config
def get_db_config(sys_config):
pool = sys_config.get('POOL', [])
if pool:
db_config = pool[0]
return {
'DB_NAME': db_config.get('db_name'),
'DB_USER': db_config.get('db_user'),
'DB_PASSWORD': db_config.get('db_pass'),
'DB_HOST': db_config.get('ts_ip'),
'DB_PORT': str(db_config.get('db_port'))
}
return {}
def get_feature_count(url):
params = {
'where': '1=1',
'returnCountOnly': 'true',
'f': 'json'
}
retries = Retry(total=10, backoff_factor=0.5, status_forcelist=[500, 502, 503, 504])
with requests.Session() as session:
session.mount("https://", HTTPAdapter(max_retries=retries))
response = session.get(url, params=params, timeout=30)
response.raise_for_status()
data = response.json()
return data.get('count', 0)
def fetch_features(url, offset, num, max_retries=5):
params = {
'where': '1=1',
'outFields': '*',
'geometryPrecision': 6,
'outSR': 4326,
'f': 'json',
'resultOffset': offset,
'resultRecordCount': num,
'orderByFields': 'OBJECTID'
}
for attempt in range(max_retries):
try:
retries = Retry(total=5, backoff_factor=1, status_forcelist=[500, 502, 503, 504])
with requests.Session() as session:
session.mount("https://", HTTPAdapter(max_retries=retries))
response = session.get(url, params=params, timeout=30)
response.raise_for_status()
return response.json()
except requests.exceptions.RequestException as e:
print(f"Error fetching features (attempt {attempt + 1}/{max_retries}): {e}")
if attempt == max_retries - 1:
raise
time.sleep(5 * (attempt + 1)) # Exponential backoff
def create_table(db_config, table_name, gis_config):
conn = psycopg2.connect(
dbname=db_config['DB_NAME'],
user=db_config['DB_USER'],
password=db_config['DB_PASSWORD'],
host=db_config['DB_HOST'],
port=db_config['DB_PORT']
)
try:
with conn.cursor() as cur:
# Check if the table already exists
cur.execute(f"SELECT to_regclass('{table_name}')")
if cur.fetchone()[0] is None:
# If the table doesn't exist, create it based on the first feature
url = next(layer['url'] for layer in gis_config['layers'] if layer['table_name'] == table_name)
first_feature = fetch_features(url, 0, 1)['features'][0]
columns = []
for attr, value in first_feature['attributes'].items():
column_name = attr.lower().replace('.', '_').replace('()', '')
if isinstance(value, int):
columns.append(f'"{column_name}" INTEGER')
elif isinstance(value, float):
columns.append(f'"{column_name}" DOUBLE PRECISION')
else:
columns.append(f'"{column_name}" TEXT')
create_sql = f"""
CREATE TABLE {table_name} (
id SERIAL PRIMARY KEY,
geom GEOMETRY(Polygon, 4326),
{', '.join(columns)}
)
"""
cur.execute(create_sql)
# Create index on plssid
cur.execute(f'CREATE INDEX idx_{table_name.split(".")[-1]}_plssid ON {table_name}("plssid")')
print(f"Created table: {table_name}")
else:
print(f"Table {table_name} already exists")
conn.commit()
except psycopg2.Error as e:
print(f"Error creating table {table_name}: {e}")
finally:
conn.close()
def insert_features_to_db(features, table_name, db_config):
conn = psycopg2.connect(
dbname=db_config['DB_NAME'],
user=db_config['DB_USER'],
password=db_config['DB_PASSWORD'],
host=db_config['DB_HOST'],
port=db_config['DB_PORT']
)
try:
with conn.cursor() as cur:
# Get the column names from the table
cur.execute(f"SELECT column_name FROM information_schema.columns WHERE table_name = '{table_name.split('.')[-1]}'")
db_columns = [row[0] for row in cur.fetchall() if row[0] != 'id']
# Prepare the SQL statement
sql = f"""
INSERT INTO {table_name} ({', '.join([f'"{col}"' for col in db_columns])})
VALUES %s
"""
# Prepare the template for execute_values
template = f"({', '.join(['%s' for _ in db_columns])})"
values = []
for feature in features:
geom = feature.get('geometry')
attrs = feature.get('attributes')
if geom and attrs:
rings = geom['rings'][0]
wkt = f"POLYGON(({','.join([f'{x} {y}' for x, y in rings])}))"
row = []
for col in db_columns:
if col == 'geom':
row.append(wkt)
else:
# Map database column names back to original attribute names
attr_name = col.upper()
if attr_name == 'SHAPE_STAREA':
attr_name = 'Shape.STArea()'
elif attr_name == 'SHAPE_STLENGTH':
attr_name = 'Shape.STLength()'
row.append(attrs.get(attr_name))
values.append(tuple(row))
else:
print(f"Skipping invalid feature: {feature}")
if values:
execute_values(cur, sql, values, template=template, page_size=100)
print(f"Inserted {len(values)} features")
else:
print("No valid features to insert")
conn.commit()
except Exception as e:
print(f"Error inserting features: {e}")
print(f"First feature for debugging: {features[0] if features else 'No features'}")
conn.rollback()
finally:
conn.close()
def download_and_import_layer(layer_config, db_config, gis_config, force_refresh):
url = layer_config['url']
layer_name = layer_config['layer_name']
table_name = layer_config['table_name']
batch_size = layer_config['batch_size']
delay = layer_config['delay'] / 1000 # Convert to seconds
total_count = get_feature_count(url)
print(f"Total {layer_name} features: {total_count}")
# Check existing records in the database
existing_count = get_existing_record_count(db_config, table_name)
if existing_count == total_count and not force_refresh:
print(f"Table {table_name} already contains all {total_count} features. Skipping.")
return
if force_refresh:
delete_existing_table(db_config, table_name)
create_table(db_config, table_name, gis_config)
existing_count = 0
elif existing_count == 0:
create_table(db_config, table_name, gis_config)
offset = existing_count
start_time = time.time()
try:
while offset < total_count:
batch_start_time = time.time()
print(f"Fetching {layer_name} features {offset} to {offset + batch_size}...")
try:
data = fetch_features(url, offset, batch_size)
new_features = data.get('features', [])
if not new_features:
break
insert_features_to_db(new_features, table_name, db_config)
offset += len(new_features)
batch_end_time = time.time()
batch_duration = batch_end_time - batch_start_time
print(f"Batch processed in {batch_duration:.2f} seconds")
# Progress indicator
progress = offset / total_count
bar_length = 30
filled_length = int(bar_length * progress)
bar = '=' * filled_length + '-' * (bar_length - filled_length)
print(f'\rProgress: [{bar}] {progress:.1%} ({offset}/{total_count} features)', end='', flush=True)
time.sleep(delay)
except Exception as e:
print(f"\nError processing batch starting at offset {offset}: {e}")
print("Continuing with next batch...")
offset += batch_size
end_time = time.time()
total_duration = end_time - start_time
print(f"\nTotal {layer_name} features fetched and imported: {offset}")
print(f"Total time: {total_duration:.2f} seconds")
except Exception as e:
print(f"\nError during download and import: {e}")
print(f"Last successful offset: {offset}")
def get_existing_record_count(db_config, table_name):
conn = psycopg2.connect(
dbname=db_config['DB_NAME'],
user=db_config['DB_USER'],
password=db_config['DB_PASSWORD'],
host=db_config['DB_HOST'],
port=db_config['DB_PORT']
)
try:
with conn.cursor() as cur:
cur.execute(f"SELECT COUNT(*) FROM {table_name}")
count = cur.fetchone()[0]
return count
except psycopg2.Error:
return 0
finally:
conn.close()
def delete_existing_table(db_config, table_name):
conn = psycopg2.connect(
dbname=db_config['DB_NAME'],
user=db_config['DB_USER'],
password=db_config['DB_PASSWORD'],
host=db_config['DB_HOST'],
port=db_config['DB_PORT']
)
try:
with conn.cursor() as cur:
# Drop the index if it exists
cur.execute(f"DROP INDEX IF EXISTS idx_{table_name.split('.')[-1]}_plssid")
# Then drop the table
cur.execute(f"DROP TABLE IF EXISTS {table_name} CASCADE")
conn.commit()
print(f"Deleted existing table and index: {table_name}")
except psycopg2.Error as e:
print(f"Error deleting table {table_name}: {e}")
finally:
conn.close()
def check_postgres_connection(db_config):
try:
subprocess.run(['psql',
'-h', db_config['DB_HOST'],
'-p', db_config['DB_PORT'],
'-U', db_config['DB_USER'],
'-d', db_config['DB_NAME'],
'-c', 'SELECT 1;'],
check=True, capture_output=True, text=True)
return True
except subprocess.CalledProcessError:
return False
def check_postgis_extension(db_config):
try:
result = subprocess.run(['psql',
'-h', db_config['DB_HOST'],
'-p', db_config['DB_PORT'],
'-U', db_config['DB_USER'],
'-d', db_config['DB_NAME'],
'-c', "SELECT 1 FROM pg_extension WHERE extname = 'postgis';"],
check=True, capture_output=True, text=True)
return '1' in result.stdout
except subprocess.CalledProcessError:
return False
def create_postgis_extension(db_config):
try:
subprocess.run(['psql',
'-h', db_config['DB_HOST'],
'-p', db_config['DB_PORT'],
'-U', db_config['DB_USER'],
'-d', db_config['DB_NAME'],
'-c', "CREATE EXTENSION IF NOT EXISTS postgis;"],
check=True, capture_output=True, text=True)
print("PostGIS extension created successfully.")
except subprocess.CalledProcessError as e:
print(f"Error creating PostGIS extension: {e}")
sys.exit(1)
def main():
parser = argparse.ArgumentParser(description="Download and import PLSS data")
parser.add_argument("--force-refresh", nargs='*', help="Force refresh of specified layers or all if none specified")
args = parser.parse_args()
sys_config, gis_config = load_config()
db_config = get_db_config(sys_config)
if not check_postgres_connection(db_config):
print("Error: Unable to connect to PostgreSQL. Please check your connection settings.")
sys.exit(1)
if not check_postgis_extension(db_config):
print("PostGIS extension not found. Attempting to create it...")
create_postgis_extension(db_config)
try:
for layer in gis_config['layers']:
if args.force_refresh is None or not args.force_refresh or layer['layer_name'] in args.force_refresh:
download_and_import_layer(layer, db_config, gis_config, bool(args.force_refresh))
except requests.exceptions.RequestException as e:
print(f"Error fetching data: {e}")
except Exception as e:
print(f"An unexpected error occurred: {e}")
if __name__ == "__main__":
main()

View file

@ -0,0 +1,101 @@
import asyncio
import asyncpg
import yaml
from pathlib import Path
import subprocess
async def load_config():
config_path = Path(__file__).parent.parent / 'config' / 'db.yaml'
with open(config_path, 'r') as file:
return yaml.safe_load(file)
async def get_table_size(conn, table_name):
return await conn.fetchval(f"SELECT COUNT(*) FROM {table_name}")
async def check_postgres_version(conn):
return await conn.fetchval("SELECT version()")
async def replicate_table(source, target, table_name):
print(f"Replicating {table_name} from {source['ts_id']} to {target['ts_id']}")
source_conn = await asyncpg.connect(**{k: source[k] for k in ['db_name', 'db_user', 'db_pass', 'ts_ip', 'db_port']})
target_conn = await asyncpg.connect(**{k: target[k] for k in ['db_name', 'db_user', 'db_pass', 'ts_ip', 'db_port']})
try:
source_version = await check_postgres_version(source_conn)
target_version = await check_postgres_version(target_conn)
print(f"Source database version: {source_version}")
print(f"Target database version: {target_version}")
table_size = await get_table_size(source_conn, table_name)
print(f"Table size: {table_size} rows")
# Dump the table
dump_command = [
'pg_dump',
'-h', source['ts_ip'],
'-p', str(source['db_port']),
'-U', source['db_user'],
'-d', source['db_name'],
'-t', table_name,
'--no-owner',
'--no-acl'
]
env = {'PGPASSWORD': source['db_pass']}
dump_result = subprocess.run(dump_command, env=env, capture_output=True, text=True)
if dump_result.returncode != 0:
raise Exception(f"Dump failed: {dump_result.stderr}")
print("Dump completed successfully")
# Drop and recreate the table on the target
await target_conn.execute(f"DROP TABLE IF EXISTS {table_name} CASCADE")
print(f"Dropped table {table_name} on target")
# Restore the table
restore_command = [
'psql',
'-h', target['ts_ip'],
'-p', str(target['db_port']),
'-U', target['db_user'],
'-d', target['db_name'],
]
env = {'PGPASSWORD': target['db_pass']}
restore_result = subprocess.run(restore_command, input=dump_result.stdout, env=env, capture_output=True, text=True)
if restore_result.returncode != 0:
raise Exception(f"Restore failed: {restore_result.stderr}")
print(f"Table {table_name} restored successfully")
# Verify the number of rows in the target table
target_size = await get_table_size(target_conn, table_name)
if target_size == table_size:
print(f"Replication successful. {target_size} rows copied.")
else:
print(f"Warning: Source had {table_size} rows, but target has {target_size} rows.")
except Exception as e:
print(f"An error occurred while replicating {table_name}: {str(e)}")
finally:
await source_conn.close()
await target_conn.close()
async def main():
config = await load_config()
source_server = config['POOL'][0] # sij-mbp16
target_servers = config['POOL'][1:] # sij-vm and sij-vps
tables_to_replicate = [
'click_logs', 'dailyweather', 'hourlyweather', 'locations', 'short_urls'
]
for table_name in tables_to_replicate:
for target_server in target_servers:
await replicate_table(source_server, target_server, table_name)
print("All replications completed!")
if __name__ == "__main__":
asyncio.run(main())

132
sijapi/helpers/repl.py Normal file
View file

@ -0,0 +1,132 @@
#!/usr/bin/env python3
import os
import yaml
import subprocess
import time
from tqdm import tqdm
def load_config():
script_dir = os.path.dirname(os.path.abspath(__file__))
sys_config_path = os.path.join(script_dir, '..', 'config', 'sys.yaml')
gis_config_path = os.path.join(script_dir, '..', 'config', 'gis.yaml')
with open(sys_config_path, 'r') as f:
sys_config = yaml.safe_load(f)
with open(gis_config_path, 'r') as f:
gis_config = yaml.safe_load(f)
return sys_config, gis_config
def get_table_size(server, table_name):
env = os.environ.copy()
env['PGPASSWORD'] = server['db_pass']
command = [
'psql',
'-h', server['ts_ip'],
'-p', str(server['db_port']),
'-U', server['db_user'],
'-d', server['db_name'],
'-t',
'-c', f"SELECT COUNT(*) FROM {table_name}"
]
result = subprocess.run(command, env=env, capture_output=True, text=True, check=True)
return int(result.stdout.strip())
def replicate_table(source, targets, table_name):
print(f"Replicating {table_name}")
# Get table size for progress bar
table_size = get_table_size(source, table_name)
print(f"Table size: {table_size} rows")
# Dump the table from the source
dump_command = [
'pg_dump',
'-h', source['ts_ip'],
'-p', str(source['db_port']),
'-U', source['db_user'],
'-d', source['db_name'],
'-t', table_name,
'--no-owner',
'--no-acl'
]
env = os.environ.copy()
env['PGPASSWORD'] = source['db_pass']
print("Dumping table...")
with open(f"{table_name}.sql", 'w') as f:
subprocess.run(dump_command, env=env, stdout=f, check=True)
print("Dump complete")
# Restore the table to each target
for target in targets:
print(f"Replicating to {target['ts_id']}")
# Drop table and its sequence
drop_commands = [
f"DROP TABLE IF EXISTS {table_name} CASCADE;",
f"DROP SEQUENCE IF EXISTS {table_name}_id_seq CASCADE;"
]
restore_command = [
'psql',
'-h', target['ts_ip'],
'-p', str(target['db_port']),
'-U', target['db_user'],
'-d', target['db_name'],
]
env = os.environ.copy()
env['PGPASSWORD'] = target['db_pass']
# Execute drop commands
for cmd in drop_commands:
print(f"Executing: {cmd}")
subprocess.run(restore_command + ['-c', cmd], env=env, check=True)
# Restore the table
print("Restoring table...")
process = subprocess.Popen(restore_command + ['-f', f"{table_name}.sql"], env=env,
stdout=subprocess.PIPE, stderr=subprocess.PIPE, universal_newlines=True)
pbar = tqdm(total=table_size, desc="Copying rows")
copied_rows = 0
for line in process.stderr:
if line.startswith("COPY"):
copied_rows = int(line.split()[1])
pbar.update(copied_rows - pbar.n)
print(line, end='') # Print all output for visibility
pbar.close()
process.wait()
if process.returncode != 0:
print(f"Error occurred during restoration to {target['ts_id']}")
print(process.stderr.read())
else:
print(f"Restoration to {target['ts_id']} completed successfully")
# Clean up the dump file
os.remove(f"{table_name}.sql")
print(f"Replication of {table_name} completed")
def main():
sys_config, gis_config = load_config()
source_server = sys_config['POOL'][0]
target_servers = sys_config['POOL'][1:]
tables = [layer['table_name'] for layer in gis_config['layers']]
for table in tables:
replicate_table(source_server, target_servers, table)
print("All replications completed!")
if __name__ == "__main__":
main()

90
sijapi/helpers/repl.sh Executable file
View file

@ -0,0 +1,90 @@
#!/bin/bash
# Configuration
SOURCE_HOST="100.64.64.20"
SOURCE_PORT="5432"
SOURCE_DB="sij"
SOURCE_USER="sij"
SOURCE_PASS="Synchr0!"
# Target servers
declare -a TARGETS=(
"sij-vm:100.64.64.11:5432:sij:sij:Synchr0!"
"sij-vps:100.64.64.15:5432:sij:sij:Synchr0!"
)
# Tables to replicate
TABLES=("dailyweather" "hourlyweather" "short_urls" "click_logs" "locations")
# PostgreSQL binaries
PSQL="/Applications/Postgres.app/Contents/Versions/latest/bin/psql"
PG_DUMP="/Applications/Postgres.app/Contents/Versions/latest/bin/pg_dump"
# Function to run SQL and display results
run_sql() {
local host=$1
local port=$2
local db=$3
local user=$4
local pass=$5
local sql=$6
PGPASSWORD=$pass $PSQL -h $host -p $port -U $user -d $db -c "$sql"
}
# Replicate to a target
replicate_to_target() {
local target_info=$1
IFS=':' read -r target_name target_host target_port target_db target_user target_pass <<< "$target_info"
echo "Replicating to $target_name ($target_host)"
# Check source tables
echo "Checking source tables:"
for table in "${TABLES[@]}"; do
run_sql $SOURCE_HOST $SOURCE_PORT $SOURCE_DB $SOURCE_USER $SOURCE_PASS "SELECT COUNT(*) FROM $table;"
done
# Dump and restore each table
for table in "${TABLES[@]}"; do
echo "Replicating $table"
# Dump table
PGPASSWORD=$SOURCE_PASS $PG_DUMP -h $SOURCE_HOST -p $SOURCE_PORT -U $SOURCE_USER -d $SOURCE_DB -t $table --no-owner --no-acl > ${table}_dump.sql
if [ $? -ne 0 ]; then
echo "Error dumping $table"
continue
fi
# Drop and recreate table on target
run_sql $target_host $target_port $target_db $target_user $target_pass "DROP TABLE IF EXISTS $table CASCADE; "
# Restore table
PGPASSWORD=$target_pass $PSQL -h $target_host -p $target_port -U $target_user -d $target_db -f ${table}_dump.sql
if [ $? -ne 0 ]; then
echo "Error restoring $table"
else
echo "$table replicated successfully"
fi
# Clean up dump file
rm ${table}_dump.sql
done
# Verify replication
echo "Verifying replication:"
for table in "${TABLES[@]}"; do
echo "Checking $table on target:"
run_sql $target_host $target_port $target_db $target_user $target_pass "SELECT COUNT(*) FROM $table;"
done
}
# Main replication process
for target in "${TARGETS[@]}"; do
replicate_to_target "$target"
done
echo "Replication completed"

View file

@ -0,0 +1,125 @@
import asyncio
import asyncpg
import yaml
from pathlib import Path
import subprocess
import sys
import os
async def load_config():
config_path = Path(__file__).parent.parent / 'config' / 'db.yaml'
with open(config_path, 'r') as file:
return yaml.safe_load(file)
async def check_table_existence(conn, tables):
for table in tables:
exists = await conn.fetchval(f"""
SELECT EXISTS (
SELECT FROM information_schema.tables
WHERE table_schema = 'public'
AND table_name = $1
)
""", table)
print(f"Table {table} {'exists' if exists else 'does not exist'} in the database.")
async def check_user_permissions(conn, tables):
for table in tables:
has_permission = await conn.fetchval(f"""
SELECT has_table_privilege(current_user, $1, 'SELECT')
""", table)
print(f"User {'has' if has_permission else 'does not have'} SELECT permission on table {table}.")
async def replicate_tables(source, target, tables):
print(f"Replicating tables from {source['ts_id']} to {target['ts_id']}")
conn_params = {
'database': 'db_name',
'user': 'db_user',
'password': 'db_pass',
'host': 'ts_ip',
'port': 'db_port'
}
source_conn = await asyncpg.connect(**{k: source[v] for k, v in conn_params.items()})
target_conn = await asyncpg.connect(**{k: target[v] for k, v in conn_params.items()})
try:
source_version = await source_conn.fetchval("SELECT version()")
target_version = await target_conn.fetchval("SELECT version()")
print(f"Source database version: {source_version}")
print(f"Target database version: {target_version}")
print("Checking table existence in source database:")
await check_table_existence(source_conn, tables)
print("\nChecking user permissions in source database:")
await check_user_permissions(source_conn, tables)
# Dump all tables to a file
dump_file = 'dump.sql'
dump_command = [
'/Applications/Postgres.app/Contents/Versions/latest/bin/pg_dump',
'-h', source['ts_ip'],
'-p', str(source['db_port']),
'-U', source['db_user'],
'-d', source['db_name'],
'-t', ' -t '.join(tables),
'--no-owner',
'--no-acl',
'-f', dump_file
]
env = {'PGPASSWORD': source['db_pass']}
print(f"\nExecuting dump command: {' '.join(dump_command)}")
dump_result = subprocess.run(dump_command, env=env, capture_output=True, text=True)
if dump_result.returncode != 0:
print(f"Dump stderr: {dump_result.stderr}")
raise Exception(f"Dump failed: {dump_result.stderr}")
print("Dump completed successfully.")
# Restore from the dump file
restore_command = [
'/Applications/Postgres.app/Contents/Versions/latest/bin/psql',
'-h', target['ts_ip'],
'-p', str(target['db_port']),
'-U', target['db_user'],
'-d', target['db_name'],
'-f', dump_file
]
env = {'PGPASSWORD': target['db_pass']}
print(f"\nExecuting restore command: {' '.join(restore_command)}")
restore_result = subprocess.run(restore_command, env=env, capture_output=True, text=True)
if restore_result.returncode != 0:
print(f"Restore stderr: {restore_result.stderr}")
raise Exception(f"Restore failed: {restore_result.stderr}")
print("Restore completed successfully.")
# Clean up the dump file
os.remove(dump_file)
except Exception as e:
print(f"An error occurred during replication: {str(e)}")
print("Exception details:", sys.exc_info())
finally:
await source_conn.close()
await target_conn.close()
async def main():
config = await load_config()
source_server = config['POOL'][0] # sij-mbp16
target_servers = config['POOL'][1:] # sij-vm and sij-vps
tables_to_replicate = [
'dailyweather', 'hourlyweather', 'short_urls', 'click_logs', 'locations'
]
for target_server in target_servers:
await replicate_tables(source_server, target_server, tables_to_replicate)
print("All replications completed!")
if __name__ == "__main__":
asyncio.run(main())

File diff suppressed because it is too large Load diff

View file

@ -12,7 +12,7 @@ import sys
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s') logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
def load_config(): def load_config():
config_path = Path(__file__).parent.parent / 'config' / 'api.yaml' config_path = Path(__file__).parent.parent / 'config' / 'sys.yaml'
with open(config_path, 'r') as file: with open(config_path, 'r') as file:
return yaml.safe_load(file) return yaml.safe_load(file)

110
sijapi/logs.py Normal file
View file

@ -0,0 +1,110 @@
# logs.py
import os
import sys
from pathlib import Path
import yaml
from loguru import logger as loguru_logger
from typing import Union, Optional
class LogLevels:
def __init__(self):
self.levels = {}
self.default_level = "INFO"
self.HOME = Path.home()
def init(self, yaml_path: Union[str, Path]):
yaml_path = self._resolve_path(yaml_path, 'config')
try:
with yaml_path.open('r') as file:
config_data = yaml.safe_load(file)
logs_config = config_data.get('LOGS', {})
self.default_level = logs_config.get('default', "INFO")
self.levels = {k: v for k, v in logs_config.items() if k != 'default'}
loguru_logger.info(f"Loaded log levels configuration from {yaml_path}")
except Exception as e:
loguru_logger.error(f"Error loading log levels configuration: {str(e)}")
raise
def _resolve_path(self, path: Union[str, Path], default_dir: str) -> Path:
base_path = Path(__file__).parent.parent
path = Path(path)
if not path.suffix:
path = base_path / 'sijapi' / default_dir / f"{path.name}.yaml"
elif not path.is_absolute():
path = base_path / path
return path
def set_level(self, module, level):
self.levels[module] = level
def set_default_level(self, level):
self.default_level = level
def get_level(self, module):
return self.levels.get(module, self.default_level)
class Logger:
def __init__(self, name):
self.name = name
self.logger = loguru_logger
self.debug_modules = set()
self.log_levels = LogLevels()
self.logs_dir = None
def init(self, yaml_path: Union[str, Path], logs_dir: Path):
self.log_levels.init(yaml_path)
self.logs_dir = logs_dir
os.makedirs(self.logs_dir, exist_ok=True)
# Set up initial logging configuration
self.logger.remove()
log_format = "{time:YYYY-MM-DD HH:mm:ss} - {name} - <level>{level: <8}</level> - <level>{message}</level>"
self.logger.add(self.logs_dir / 'app.log', rotation="2 MB", level="DEBUG", format=log_format)
self.logger.add(sys.stdout, level="DEBUG", format=log_format, colorize=True,
filter=self._level_filter)
def setup_from_args(self, args):
if not self.logs_dir:
raise ValueError("Logger not initialized. Call init() before setup_from_args().")
# Update log levels based on command line arguments
for module in args.debug:
self.log_levels.set_level(module, "DEBUG")
if hasattr(args, 'info'):
for module in args.info:
self.log_levels.set_level(module, "INFO")
if args.log:
self.log_levels.set_default_level(args.log.upper())
# Set debug modules
self.debug_modules = set(args.debug)
# Custom color and style mappings
self.logger.level("CRITICAL", color="<yellow><bold><MAGENTA>")
self.logger.level("ERROR", color="<red><bold>")
self.logger.level("WARNING", color="<yellow><bold>")
self.logger.level("DEBUG", color="<green><bold>")
self.logger.info(f"Debug modules: {self.debug_modules}")
self.logger.info(f"Log levels: {self.log_levels.levels}")
self.logger.info(f"Default log level: {self.log_levels.default_level}")
def _level_filter(self, record):
module_level = self.log_levels.get_level(record["name"])
return record["level"].no >= self.logger.level(module_level).no
def get_logger(self, module_name):
level = self.log_levels.get_level(module_name)
self.logger.debug(f"Creating logger for {module_name} with level {level}")
return self.logger.bind(name=module_name)
# Global logger instance
L = Logger("Central")
# Function to get module-specific logger
def get_logger(module_name):
return L.get_logger(module_name)

View file

@ -29,18 +29,14 @@ from requests.adapters import HTTPAdapter
from urllib3.util.retry import Retry from urllib3.util.retry import Retry
from datetime import datetime as dt_datetime from datetime import datetime as dt_datetime
from better_profanity import profanity from better_profanity import profanity
from sijapi.logs import get_logger
from sijapi.utilities import html_to_markdown, sanitize_filename, assemble_journal_path, assemble_archive_path, contains_profanity, is_ad_or_tracker, initialize_adblock_rules, contains_blacklisted_word from sijapi.utilities import html_to_markdown, sanitize_filename, assemble_journal_path, assemble_archive_path, contains_profanity, is_ad_or_tracker, initialize_adblock_rules, contains_blacklisted_word
from sijapi import L, API, Archivist, BLOCKLISTS_DIR, OBSIDIAN_VAULT_DIR, OBSIDIAN_RESOURCES_DIR from sijapi import Sys, Archivist, BLOCKLISTS_DIR, OBSIDIAN_VAULT_DIR, OBSIDIAN_RESOURCES_DIR
from sijapi.logs import get_logger
l = get_logger(__name__)
archivist = APIRouter() archivist = APIRouter()
logger = L.get_module_logger("news")
def debug(text: str): logger.debug(text)
def info(text: str): logger.info(text)
def warn(text: str): logger.warning(text)
def err(text: str): logger.error(text)
def crit(text: str): logger.critical(text)
adblock_rules = initialize_adblock_rules(BLOCKLISTS_DIR) adblock_rules = initialize_adblock_rules(BLOCKLISTS_DIR)
@archivist.post("/archive") @archivist.post("/archive")
@ -51,11 +47,11 @@ async def archive_post(
encoding: str = Form('utf-8') encoding: str = Form('utf-8')
): ):
if not url: if not url:
warn(f"No URL provided to /archive endpoint.") l.warning(f"No URL provided to /archive endpoint.")
raise HTTPException(status_code=400, detail="URL is required") raise HTTPException(status_code=400, detail="URL is required")
if is_ad_or_tracker(url, adblock_rules): if is_ad_or_tracker(url, adblock_rules):
debug(f"Skipping likely ad or tracker URL: {url}") l.debug(f"Skipping likely ad or tracker URL: {url}")
raise HTTPException(status_code=400, detail="URL is likely an ad or tracker") raise HTTPException(status_code=400, detail="URL is likely an ad or tracker")
markdown_filename = await process_archive(url, title, encoding, source) markdown_filename = await process_archive(url, title, encoding, source)
@ -70,7 +66,7 @@ async def process_archive(
# Check URL against blacklist # Check URL against blacklist
if contains_blacklisted_word(url, Archivist.blacklist): if contains_blacklisted_word(url, Archivist.blacklist):
info(f"Not archiving {url} due to blacklisted word in URL") l.info(f"Not archiving {url} due to blacklisted word in URL")
return None return None
timestamp = dt_datetime.now().strftime('%b %d, %Y at %H:%M') timestamp = dt_datetime.now().strftime('%b %d, %Y at %H:%M')
@ -82,13 +78,13 @@ async def process_archive(
# Check content for profanity # Check content for profanity
if contains_profanity(content, threshold=0.01, custom_words=Archivist.blacklist): if contains_profanity(content, threshold=0.01, custom_words=Archivist.blacklist):
info(f"Not archiving {url} due to profanity in content") l.info(f"Not archiving {url} due to profanity in content")
return None return None
try: try:
markdown_path, relative_path = assemble_archive_path(filename=readable_title, extension=".md") markdown_path, relative_path = assemble_archive_path(filename=readable_title, extension=".md")
except Exception as e: except Exception as e:
warn(f"Failed to assemble archive path for {url}: {str(e)}") l.warning(f"Failed to assemble archive path for {url}: {str(e)}")
return None return None
markdown_content = f"---\n" markdown_content = f"---\n"
@ -105,8 +101,8 @@ async def process_archive(
markdown_path.parent.mkdir(parents=True, exist_ok=True) markdown_path.parent.mkdir(parents=True, exist_ok=True)
with open(markdown_path, 'w', encoding=encoding) as md_file: with open(markdown_path, 'w', encoding=encoding) as md_file:
md_file.write(markdown_content) md_file.write(markdown_content)
debug(f"Successfully saved to {markdown_path}") l.debug(f"Successfully saved to {markdown_path}")
return markdown_path return markdown_path
except Exception as e: except Exception as e:
warn(f"Failed to write markdown file: {str(e)}") l.warning(f"Failed to write markdown file: {str(e)}")
return None return None

View file

@ -13,15 +13,10 @@ from fastapi import APIRouter, HTTPException, Form, UploadFile, File, Background
from fastapi.responses import JSONResponse from fastapi.responses import JSONResponse
from pydantic import BaseModel, Field from pydantic import BaseModel, Field
from typing import Optional from typing import Optional
from sijapi import L, ASR_DIR, WHISPER_CPP_MODELS, WHISPER_CPP_DIR, MAX_CPU_CORES from sijapi import Sys, ASR_DIR, WHISPER_CPP_MODELS, WHISPER_CPP_DIR
from sijapi.logs import get_logger
l = get_logger(__name__)
asr = APIRouter() asr = APIRouter()
logger = L.get_module_logger("asr")
def debug(text: str): logger.debug(text)
def info(text: str): logger.info(text)
def warn(text: str): logger.warning(text)
def err(text: str): logger.error(text)
def crit(text: str): logger.critical(text)
transcription_results = {} transcription_results = {}
class TranscribeParams(BaseModel): class TranscribeParams(BaseModel):
@ -84,13 +79,13 @@ async def transcribe_endpoint(
async def transcribe_audio(file_path, params: TranscribeParams): async def transcribe_audio(file_path, params: TranscribeParams):
debug(f"Transcribing audio file from {file_path}...") l.debug(f"Transcribing audio file from {file_path}...")
file_path = await convert_to_wav(file_path) file_path = await convert_to_wav(file_path)
model = params.model if params.model in WHISPER_CPP_MODELS else 'small' model = params.model if params.model in WHISPER_CPP_MODELS else 'small'
model_path = WHISPER_CPP_DIR / 'models' / f'ggml-{model}.bin' model_path = WHISPER_CPP_DIR / 'models' / f'ggml-{model}.bin'
command = [str(WHISPER_CPP_DIR / 'build' / 'bin' / 'main')] command = [str(WHISPER_CPP_DIR / 'build' / 'bin' / 'main')]
command.extend(['-m', str(model_path)]) command.extend(['-m', str(model_path)])
command.extend(['-t', str(max(1, min(params.threads or MAX_CPU_CORES, MAX_CPU_CORES)))]) command.extend(['-t', str(max(1, min(params.threads or Sys.MAX_CPU_CORES, Sys.MAX_CPU_CORES)))])
command.extend(['-np']) command.extend(['-np'])
if params.split_on_word: if params.split_on_word:
@ -121,11 +116,11 @@ async def transcribe_audio(file_path, params: TranscribeParams):
command.extend(['--dtw', params.dtw]) command.extend(['--dtw', params.dtw])
command.extend(['-f', file_path]) command.extend(['-f', file_path])
debug(f"Command: {command}") l.debug(f"Command: {command}")
# Create a unique ID for this transcription job # Create a unique ID for this transcription job
job_id = str(uuid.uuid4()) job_id = str(uuid.uuid4())
debug(f"Created job ID: {job_id}") l.debug(f"Created job ID: {job_id}")
# Store the job status # Store the job status
transcription_results[job_id] = {"status": "processing", "result": None} transcription_results[job_id] = {"status": "processing", "result": None}
@ -137,20 +132,20 @@ async def transcribe_audio(file_path, params: TranscribeParams):
poll_interval = 10 # 10 seconds poll_interval = 10 # 10 seconds
start_time = asyncio.get_event_loop().time() start_time = asyncio.get_event_loop().time()
debug(f"Starting to poll for job {job_id}") l.debug(f"Starting to poll for job {job_id}")
try: try:
while asyncio.get_event_loop().time() - start_time < max_wait_time: while asyncio.get_event_loop().time() - start_time < max_wait_time:
job_status = transcription_results.get(job_id, {}) job_status = transcription_results.get(job_id, {})
debug(f"Current status for job {job_id}: {job_status['status']}") l.debug(f"Current status for job {job_id}: {job_status['status']}")
if job_status["status"] == "completed": if job_status["status"] == "completed":
info(f"Transcription completed for job {job_id}") l.info(f"Transcription completed for job {job_id}")
return job_id # This is the only change return job_id # This is the only change
elif job_status["status"] == "failed": elif job_status["status"] == "failed":
err(f"Transcription failed for job {job_id}: {job_status.get('error', 'Unknown error')}") l.error(f"Transcription failed for job {job_id}: {job_status.get('error', 'Unknown error')}")
raise Exception(f"Transcription failed: {job_status.get('error', 'Unknown error')}") raise Exception(f"Transcription failed: {job_status.get('error', 'Unknown error')}")
await asyncio.sleep(poll_interval) await asyncio.sleep(poll_interval)
err(f"Transcription timed out for job {job_id}") l.error(f"Transcription timed out for job {job_id}")
raise TimeoutError("Transcription timed out") raise TimeoutError("Transcription timed out")
finally: finally:
# Ensure the task is cancelled if we exit the loop # Ensure the task is cancelled if we exit the loop
@ -160,20 +155,20 @@ async def transcribe_audio(file_path, params: TranscribeParams):
async def process_transcription(command, file_path, job_id): async def process_transcription(command, file_path, job_id):
try: try:
debug(f"Starting transcription process for job {job_id}") l.debug(f"Starting transcription process for job {job_id}")
result = await run_transcription(command, file_path) result = await run_transcription(command, file_path)
transcription_results[job_id] = {"status": "completed", "result": result} transcription_results[job_id] = {"status": "completed", "result": result}
debug(f"Transcription completed for job {job_id}") l.debug(f"Transcription completed for job {job_id}")
except Exception as e: except Exception as e:
err(f"Transcription failed for job {job_id}: {str(e)}") l.error(f"Transcription failed for job {job_id}: {str(e)}")
transcription_results[job_id] = {"status": "failed", "error": str(e)} transcription_results[job_id] = {"status": "failed", "error": str(e)}
finally: finally:
# Clean up the temporary file # Clean up the temporary file
os.remove(file_path) os.remove(file_path)
debug(f"Cleaned up temporary file for job {job_id}") l.debug(f"Cleaned up temporary file for job {job_id}")
async def run_transcription(command, file_path): async def run_transcription(command, file_path):
debug(f"Running transcription command: {' '.join(command)}") l.debug(f"Running transcription command: {' '.join(command)}")
proc = await asyncio.create_subprocess_exec( proc = await asyncio.create_subprocess_exec(
*command, *command,
stdout=asyncio.subprocess.PIPE, stdout=asyncio.subprocess.PIPE,
@ -182,9 +177,9 @@ async def run_transcription(command, file_path):
stdout, stderr = await proc.communicate() stdout, stderr = await proc.communicate()
if proc.returncode != 0: if proc.returncode != 0:
error_message = f"Error running command: {stderr.decode()}" error_message = f"Error running command: {stderr.decode()}"
err(error_message) l.error(error_message)
raise Exception(error_message) raise Exception(error_message)
debug("Transcription command completed successfully") l.debug("Transcription command completed successfully")
return stdout.decode().strip() return stdout.decode().strip()
async def convert_to_wav(file_path: str): async def convert_to_wav(file_path: str):

View file

@ -17,45 +17,42 @@ import threading
from typing import Dict, List, Any from typing import Dict, List, Any
from datetime import datetime, timedelta from datetime import datetime, timedelta
from sijapi import L, ICAL_TOGGLE, ICALENDARS, MS365_TOGGLE, MS365_CLIENT_ID, MS365_SECRET, MS365_AUTHORITY_URL, MS365_SCOPE, MS365_REDIRECT_PATH, MS365_TOKEN_PATH from sijapi import ICAL_TOGGLE, ICALENDARS, MS365_TOGGLE, MS365_CLIENT_ID, MS365_SECRET, MS365_AUTHORITY_URL, MS365_SCOPE, MS365_REDIRECT_PATH, MS365_TOKEN_PATH
from sijapi.routers import gis from sijapi.routers import gis
from sijapi.logs import get_logger
l = get_logger(__name__)
cal = APIRouter() cal = APIRouter()
oauth2_scheme = OAuth2PasswordBearer(tokenUrl="/token") oauth2_scheme = OAuth2PasswordBearer(tokenUrl="/token")
timeout = httpx.Timeout(12) timeout = httpx.Timeout(12)
logger = L.get_module_logger("cal")
def debug(text: str): logger.debug(text)
def info(text: str): logger.info(text)
def warn(text: str): logger.warning(text)
def err(text: str): logger.error(text)
def crit(text: str): logger.critical(text)
if MS365_TOGGLE is True: if MS365_TOGGLE is True:
crit(f"Visit https://api.sij.ai/o365/login to obtain your Microsoft 365 authentication token.") l.critical(f"Visit https://api.sij.ai/o365/login to obtain your Microsoft 365 authentication token.")
@cal.get("/o365/login") @cal.get("/o365/login")
async def login(): async def login():
debug(f"Received request to /o365/login") l.debug(f"Received request to /o365/login")
debug(f"SCOPE: {MS365_SCOPE}") l.debug(f"SCOPE: {MS365_SCOPE}")
if not MS365_SCOPE: if not MS365_SCOPE:
err("No scopes defined for authorization.") l.error("No scopes defined for authorization.")
raise HTTPException( raise HTTPException(
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
detail="No scopes defined for authorization." detail="No scopes defined for authorization."
) )
authorization_url = f"{MS365_AUTHORITY_URL}/oauth2/v2.0/authorize?client_id={MS365_CLIENT_ID}&response_type=code&redirect_uri={MS365_REDIRECT_PATH}&scope={'+'.join(MS365_SCOPE)}" authorization_url = f"{MS365_AUTHORITY_URL}/oauth2/v2.0/authorize?client_id={MS365_CLIENT_ID}&response_type=code&redirect_uri={MS365_REDIRECT_PATH}&scope={'+'.join(MS365_SCOPE)}"
info(f"Redirecting to authorization URL: {authorization_url}") l.info(f"Redirecting to authorization URL: {authorization_url}")
return RedirectResponse(authorization_url) return RedirectResponse(authorization_url)
@cal.get("/o365/oauth_redirect") @cal.get("/o365/oauth_redirect")
async def oauth_redirect(code: str = None, error: str = None): async def oauth_redirect(code: str = None, error: str = None):
debug(f"Received request to /o365/oauth_redirect") l.debug(f"Received request to /o365/oauth_redirect")
if error: if error:
err(f"OAuth2 Error: {error}") l.error(f"OAuth2 Error: {error}")
raise HTTPException( raise HTTPException(
status_code=status.HTTP_400_BAD_REQUEST, detail="OAuth2 Error" status_code=status.HTTP_400_BAD_REQUEST, detail="OAuth2 Error"
) )
info(f"Requesting token with authorization code: {code}") l.info(f"Requesting token with authorization code: {code}")
token_url = f"{MS365_AUTHORITY_URL}/oauth2/v2.0/token" token_url = f"{MS365_AUTHORITY_URL}/oauth2/v2.0/token"
data = { data = {
"client_id": MS365_CLIENT_ID, "client_id": MS365_CLIENT_ID,
@ -66,15 +63,15 @@ if MS365_TOGGLE is True:
} }
async with httpx.AsyncClient(timeout=timeout) as client: async with httpx.AsyncClient(timeout=timeout) as client:
response = await client.post(token_url, data=data) response = await client.post(token_url, data=data)
debug(f"Token endpoint response status code: {response.status_code}") l.debug(f"Token endpoint response status code: {response.status_code}")
info(f"Token endpoint response text: {response.text}") l.info(f"Token endpoint response text: {response.text}")
result = response.json() result = response.json()
if 'access_token' in result: if 'access_token' in result:
await save_token(result) await save_token(result)
info("Access token obtained successfully") l.info("Access token obtained successfully")
return {"message": "Access token stored successfully"} return {"message": "Access token stored successfully"}
else: else:
crit(f"Failed to obtain access token. Response: {result}") l.critical(f"Failed to obtain access token. Response: {result}")
raise HTTPException( raise HTTPException(
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
detail="Failed to obtain access token" detail="Failed to obtain access token"
@ -82,7 +79,7 @@ if MS365_TOGGLE is True:
@cal.get("/o365/me") @cal.get("/o365/me")
async def read_items(): async def read_items():
debug(f"Received request to /o365/me") l.debug(f"Received request to /o365/me")
token = await load_token() token = await load_token()
if not token: if not token:
raise HTTPException( raise HTTPException(
@ -95,10 +92,10 @@ if MS365_TOGGLE is True:
response = await client.get(graph_url, headers=headers) response = await client.get(graph_url, headers=headers)
if response.status_code == 200: if response.status_code == 200:
user = response.json() user = response.json()
info(f"User retrieved: {user}") l.info(f"User retrieved: {user}")
return user return user
else: else:
err("Invalid or expired token") l.error("Invalid or expired token")
raise HTTPException( raise HTTPException(
status_code=status.HTTP_401_UNAUTHORIZED, status_code=status.HTTP_401_UNAUTHORIZED,
detail="Invalid or expired token", detail="Invalid or expired token",
@ -106,14 +103,14 @@ if MS365_TOGGLE is True:
) )
async def save_token(token): async def save_token(token):
debug(f"Saving token: {token}") l.debug(f"Saving token: {token}")
try: try:
token["expires_at"] = int(time.time()) + token["expires_in"] token["expires_at"] = int(time.time()) + token["expires_in"]
with open(MS365_TOKEN_PATH, "w") as file: with open(MS365_TOKEN_PATH, "w") as file:
json.dump(token, file) json.dump(token, file)
debug(f"Saved token to {MS365_TOKEN_PATH}") l.debug(f"Saved token to {MS365_TOKEN_PATH}")
except Exception as e: except Exception as e:
err(f"Failed to save token: {e}") l.error(f"Failed to save token: {e}")
async def load_token(): async def load_token():
if os.path.exists(MS365_TOKEN_PATH): if os.path.exists(MS365_TOKEN_PATH):
@ -121,21 +118,21 @@ if MS365_TOGGLE is True:
with open(MS365_TOKEN_PATH, "r") as file: with open(MS365_TOKEN_PATH, "r") as file:
token = json.load(file) token = json.load(file)
except FileNotFoundError: except FileNotFoundError:
err("Token file not found.") l.error("Token file not found.")
return None return None
except json.JSONDecodeError: except json.JSONDecodeError:
err("Failed to decode token JSON") l.error("Failed to decode token JSON")
return None return None
if token: if token:
token["expires_at"] = int(time.time()) + token["expires_in"] token["expires_at"] = int(time.time()) + token["expires_in"]
debug(f"Loaded token: {token}") # Add this line to log the loaded token l.debug(f"Loaded token: {token}") # Add this line to log the loaded token
return token return token
else: else:
debug("No token found.") l.debug("No token found.")
return None return None
else: else:
err(f"No file found at {MS365_TOKEN_PATH}") l.error(f"No file found at {MS365_TOKEN_PATH}")
return None return None
@ -165,39 +162,39 @@ if MS365_TOGGLE is True:
response = await client.post(token_url, data=data) response = await client.post(token_url, data=data)
result = response.json() result = response.json()
if "access_token" in result: if "access_token" in result:
info("Access token refreshed successfully") l.info("Access token refreshed successfully")
return result return result
else: else:
err("Failed to refresh access token") l.error("Failed to refresh access token")
return None return None
async def refresh_token(): async def refresh_token():
token = await load_token() token = await load_token()
if not token: if not token:
err("No token found in storage") l.error("No token found in storage")
raise HTTPException( raise HTTPException(
status_code=status.HTTP_401_UNAUTHORIZED, status_code=status.HTTP_401_UNAUTHORIZED,
detail="No token found", detail="No token found",
) )
if 'refresh_token' not in token: if 'refresh_token' not in token:
err("Refresh token not found in the loaded token") l.error("Refresh token not found in the loaded token")
raise HTTPException( raise HTTPException(
status_code=status.HTTP_401_UNAUTHORIZED, status_code=status.HTTP_401_UNAUTHORIZED,
detail="Refresh token not found", detail="Refresh token not found",
) )
refresh_token = token['refresh_token'] refresh_token = token['refresh_token']
debug("Found refresh token, attempting to refresh access token") l.debug("Found refresh token, attempting to refresh access token")
new_token = await get_new_token_with_refresh_token(refresh_token) new_token = await get_new_token_with_refresh_token(refresh_token)
if new_token: if new_token:
await save_token(new_token) await save_token(new_token)
info("Token refreshed and saved successfully") l.info("Token refreshed and saved successfully")
else: else:
err("Failed to refresh token") l.error("Failed to refresh token")
raise HTTPException( raise HTTPException(
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
detail="Failed to refresh token", detail="Failed to refresh token",
@ -218,7 +215,7 @@ if ICAL_TOGGLE is True:
calendar_identifiers = { calendar_identifiers = {
calendar.title() : calendar.calendarIdentifier() for calendar in all_calendars calendar.title() : calendar.calendarIdentifier() for calendar in all_calendars
} }
debug(f"{calendar_identifiers}") l.debug(f"{calendar_identifiers}")
return calendar_identifiers return calendar_identifiers
def get_macos_calendar_events(start_date: datetime, end_date: datetime, calendar_ids: List[str] = None) -> List[Dict]: def get_macos_calendar_events(start_date: datetime, end_date: datetime, calendar_ids: List[str] = None) -> List[Dict]:
@ -230,7 +227,7 @@ if ICAL_TOGGLE is True:
def completion_handler(granted, error): def completion_handler(granted, error):
if error is not None: if error is not None:
err(f"Error: {error}") l.error(f"Error: {error}")
access_granted.append(granted) access_granted.append(granted)
with access_granted_condition: with access_granted_condition:
access_granted_condition.notify() access_granted_condition.notify()
@ -242,11 +239,11 @@ if ICAL_TOGGLE is True:
if access_granted: if access_granted:
return access_granted[0] return access_granted[0]
else: else:
err("Request access timed out or failed") l.error("Request access timed out or failed")
return False return False
if not request_access(): if not request_access():
err("Access to calendar data was not granted") l.error("Access to calendar data was not granted")
return [] return []
ns_start_date = datetime_to_nsdate(start_date) ns_start_date = datetime_to_nsdate(start_date)
@ -336,7 +333,7 @@ async def get_ms365_events(start_date: datetime, end_date: datetime):
response = await client.get(graph_url, headers=headers) response = await client.get(graph_url, headers=headers)
if response.status_code != 200: if response.status_code != 200:
err("Failed to retrieve events from Microsoft 365") l.error("Failed to retrieve events from Microsoft 365")
raise HTTPException( raise HTTPException(
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
detail="Failed to retrieve events", detail="Failed to retrieve events",
@ -352,33 +349,33 @@ async def parse_calendar_for_day(range_start: datetime, range_end: datetime, eve
event_list = [] event_list = []
for event in events: for event in events:
info(f"Event: {event}") l.info(f"Event: {event}")
start_str = event.get('start') start_str = event.get('start')
end_str = event.get('end') end_str = event.get('end')
if isinstance(start_str, dict): if isinstance(start_str, dict):
start_str = start_str.get('dateTime') start_str = start_str.get('dateTime')
else: else:
info(f"Start date string not a dict") l.info(f"Start date string not a dict")
if isinstance(end_str, dict): if isinstance(end_str, dict):
end_str = end_str.get('dateTime') end_str = end_str.get('dateTime')
else: else:
info(f"End date string not a dict") l.info(f"End date string not a dict")
try: try:
start_date = await gis.dt(start_str) if start_str else None start_date = await gis.dt(start_str) if start_str else None
except (ValueError, TypeError) as e: except (ValueError, TypeError) as e:
err(f"Invalid start date format: {start_str}, error: {e}") l.error(f"Invalid start date format: {start_str}, error: {e}")
continue continue
try: try:
end_date = await gis.dt(end_str) if end_str else None end_date = await gis.dt(end_str) if end_str else None
except (ValueError, TypeError) as e: except (ValueError, TypeError) as e:
err(f"Invalid end date format: {end_str}, error: {e}") l.error(f"Invalid end date format: {end_str}, error: {e}")
continue continue
debug(f"Comparing {start_date} with range {range_start} to {range_end}") l.debug(f"Comparing {start_date} with range {range_start} to {range_end}")
if start_date: if start_date:
# Ensure start_date is timezone-aware # Ensure start_date is timezone-aware
@ -410,11 +407,11 @@ async def parse_calendar_for_day(range_start: datetime, range_end: datetime, eve
"busy": event.get('showAs', '') in ['busy', 'tentative'], "busy": event.get('showAs', '') in ['busy', 'tentative'],
"all_day": event.get('isAllDay', False) "all_day": event.get('isAllDay', False)
} }
info(f"Event_data: {event_data}") l.info(f"Event_data: {event_data}")
event_list.append(event_data) event_list.append(event_data)
else: else:
debug(f"Event outside of specified range: {start_date} to {end_date}") l.debug(f"Event outside of specified range: {start_date} to {end_date}")
else: else:
err(f"Invalid or missing start date for event: {event.get('id', 'Unknown ID')}") l.error(f"Invalid or missing start date for event: {event.get('id', 'Unknown ID')}")
return event_list return event_list

View file

@ -7,19 +7,15 @@ from fastapi import APIRouter, HTTPException
from pydantic import BaseModel from pydantic import BaseModel
from fastapi.responses import PlainTextResponse, JSONResponse from fastapi.responses import PlainTextResponse, JSONResponse
from typing import Optional from typing import Optional
from sijapi import L, CF_TOKEN, CADDYFILE_PATH, CF_API_BASE_URL, CF_IP
import httpx import httpx
import asyncio import asyncio
from asyncio import sleep from asyncio import sleep
import os import os
from sijapi import CF_TOKEN, CADDYFILE_PATH, CF_API_BASE_URL, CF_IP
from sijapi.logs import get_logger
l = get_logger(__name__)
cf = APIRouter() cf = APIRouter()
logger = L.get_module_logger("cal")
def debug(text: str): logger.debug(text)
def info(text: str): logger.info(text)
def warn(text: str): logger.warning(text)
def err(text: str): logger.error(text)
def crit(text: str): logger.critical(text)
class DNSRecordRequest(BaseModel): class DNSRecordRequest(BaseModel):
full_domain: str full_domain: str
@ -77,7 +73,7 @@ async def retry_request(url, headers, max_retries=5, backoff_factor=1):
response.raise_for_status() response.raise_for_status()
return response return response
except (httpx.HTTPError, httpx.ConnectTimeout) as e: except (httpx.HTTPError, httpx.ConnectTimeout) as e:
err(f"Request failed: {e}. Retrying {retry + 1}/{max_retries}...") l.error(f"Request failed: {e}. Retrying {retry + 1}/{max_retries}...")
await sleep(backoff_factor * (2 ** retry)) await sleep(backoff_factor * (2 ** retry))
raise HTTPException(status_code=500, detail="Max retries exceeded for Cloudflare API request") raise HTTPException(status_code=500, detail="Max retries exceeded for Cloudflare API request")

View file

@ -22,19 +22,15 @@ import ssl
import yaml import yaml
from typing import List, Dict, Optional, Set from typing import List, Dict, Optional, Set
from datetime import datetime as dt_datetime from datetime import datetime as dt_datetime
from sijapi import L, Dir, EMAIL_CONFIG, EMAIL_LOGS from sijapi import Dir, Tts, EMAIL_CONFIG, EMAIL_LOGS
from sijapi.routers import gis, img, tts, llm from sijapi.routers import gis, img, tts, llm
from sijapi.utilities import clean_text, assemble_journal_path, extract_text, prefix_lines from sijapi.utilities import clean_text, assemble_journal_path, extract_text, prefix_lines
from sijapi.classes import EmailAccount, IMAPConfig, SMTPConfig, IncomingEmail, EmailContact, AutoResponder from sijapi.classes import EmailAccount, IMAPConfig, SMTPConfig, IncomingEmail, EmailContact, AutoResponder
from sijapi.logs import get_logger
l = get_logger(__name__)
email = APIRouter() email = APIRouter()
logger = L.get_module_logger("email")
def debug(text: str): logger.debug(text)
def info(text: str): logger.info(text)
def warn(text: str): logger.warning(text)
def err(text: str): logger.error(text)
def crit(text: str): logger.critical(text)
def load_email_accounts(yaml_path: str) -> List[EmailAccount]: def load_email_accounts(yaml_path: str) -> List[EmailAccount]:
with open(yaml_path, 'r') as file: with open(yaml_path, 'r') as file:
@ -60,36 +56,36 @@ def get_smtp_connection(autoresponder: AutoResponder):
if smtp_config.encryption == 'SSL': if smtp_config.encryption == 'SSL':
try: try:
debug(f"Attempting SSL connection to {smtp_config.host}:{smtp_config.port}") l.debug(f"Attempting SSL connection to {smtp_config.host}:{smtp_config.port}")
return SMTP_SSL(smtp_config.host, smtp_config.port, context=context) return SMTP_SSL(smtp_config.host, smtp_config.port, context=context)
except ssl.SSLError as e: except ssl.SSLError as e:
err(f"SSL connection failed: {str(e)}") l.error(f"SSL connection failed: {str(e)}")
# If SSL fails, try TLS # If SSL fails, try TLS
try: try:
debug(f"Attempting STARTTLS connection to {smtp_config.host}:{smtp_config.port}") l.debug(f"Attempting STARTTLS connection to {smtp_config.host}:{smtp_config.port}")
smtp = SMTP(smtp_config.host, smtp_config.port) smtp = SMTP(smtp_config.host, smtp_config.port)
smtp.starttls(context=context) smtp.starttls(context=context)
return smtp return smtp
except Exception as e: except Exception as e:
err(f"STARTTLS connection failed: {str(e)}") l.error(f"STARTTLS connection failed: {str(e)}")
raise raise
elif smtp_config.encryption == 'STARTTLS': elif smtp_config.encryption == 'STARTTLS':
try: try:
debug(f"Attempting STARTTLS connection to {smtp_config.host}:{smtp_config.port}") l.debug(f"Attempting STARTTLS connection to {smtp_config.host}:{smtp_config.port}")
smtp = SMTP(smtp_config.host, smtp_config.port) smtp = SMTP(smtp_config.host, smtp_config.port)
smtp.starttls(context=context) smtp.starttls(context=context)
return smtp return smtp
except Exception as e: except Exception as e:
err(f"STARTTLS connection failed: {str(e)}") l.error(f"STARTTLS connection failed: {str(e)}")
raise raise
else: else:
try: try:
debug(f"Attempting unencrypted connection to {smtp_config.host}:{smtp_config.port}") l.debug(f"Attempting unencrypted connection to {smtp_config.host}:{smtp_config.port}")
return SMTP(smtp_config.host, smtp_config.port) return SMTP(smtp_config.host, smtp_config.port)
except Exception as e: except Exception as e:
err(f"Unencrypted connection failed: {str(e)}") l.error(f"Unencrypted connection failed: {str(e)}")
raise raise
async def send_response(to_email: str, subject: str, body: str, profile: AutoResponder, image_attachment: Path = None) -> bool: async def send_response(to_email: str, subject: str, body: str, profile: AutoResponder, image_attachment: Path = None) -> bool:
@ -106,20 +102,20 @@ async def send_response(to_email: str, subject: str, body: str, profile: AutoRes
img = MIMEImage(img_file.read(), name=os.path.basename(image_attachment)) img = MIMEImage(img_file.read(), name=os.path.basename(image_attachment))
message.attach(img) message.attach(img)
debug(f"Sending auto-response to {to_email} concerning {subject} from account {profile.name}...") l.debug(f"Sending auto-response to {to_email} concerning {subject} from account {profile.name}...")
server = get_smtp_connection(profile) server = get_smtp_connection(profile)
debug(f"SMTP connection established: {type(server)}") l.debug(f"SMTP connection established: {type(server)}")
server.login(profile.smtp.username, profile.smtp.password) server.login(profile.smtp.username, profile.smtp.password)
server.send_message(message) server.send_message(message)
info(f"Auto-response sent to {to_email} concerning {subject} from account {profile.name}!") l.info(f"Auto-response sent to {to_email} concerning {subject} from account {profile.name}!")
return True return True
except Exception as e: except Exception as e:
err(f"Error in preparing/sending auto-response from account {profile.name}: {str(e)}") l.error(f"Error in preparing/sending auto-response from account {profile.name}: {str(e)}")
err(f"SMTP details - Host: {profile.smtp.host}, Port: {profile.smtp.port}, Encryption: {profile.smtp.encryption}") l.error(f"SMTP details - Host: {profile.smtp.host}, Port: {profile.smtp.port}, Encryption: {profile.smtp.encryption}")
err(traceback.format_exc()) l.error(traceback.format_exc())
return False return False
finally: finally:
@ -127,7 +123,7 @@ async def send_response(to_email: str, subject: str, body: str, profile: AutoRes
try: try:
server.quit() server.quit()
except Exception as e: except Exception as e:
err(f"Error closing SMTP connection: {str(e)}") l.error(f"Error closing SMTP connection: {str(e)}")
def clean_email_content(html_content): def clean_email_content(html_content):
@ -163,10 +159,10 @@ async def process_account_archival(account: EmailAccount):
while True: while True:
try: try:
processed_uids = await load_processed_uids(summarized_log) processed_uids = await load_processed_uids(summarized_log)
debug(f"{len(processed_uids)} emails marked as already summarized are being ignored.") l.debug(f"{len(processed_uids)} emails marked as already summarized are being ignored.")
with get_imap_connection(account) as inbox: with get_imap_connection(account) as inbox:
unread_messages = inbox.messages(unread=True) unread_messages = inbox.messages(unread=True)
debug(f"There are {len(unread_messages)} unread messages.") l.debug(f"There are {len(unread_messages)} unread messages.")
for uid, message in unread_messages: for uid, message in unread_messages:
uid_str = uid.decode() if isinstance(uid, bytes) else str(uid) uid_str = uid.decode() if isinstance(uid, bytes) else str(uid)
if uid_str not in processed_uids: if uid_str not in processed_uids:
@ -186,13 +182,13 @@ async def process_account_archival(account: EmailAccount):
save_success = await save_email(md_path, md_content) save_success = await save_email(md_path, md_content)
if save_success: if save_success:
await save_processed_uid(summarized_log, account.name, uid_str) await save_processed_uid(summarized_log, account.name, uid_str)
info(f"Summarized email: {uid_str}") l.info(f"Summarized email: {uid_str}")
else: else:
warn(f"Failed to summarize {this_email.subject}") l.warning(f"Failed to summarize {this_email.subject}")
# else: # else:
# debug(f"Skipping {uid_str} because it was already processed.") # l.debug(f"Skipping {uid_str} because it was already processed.")
except Exception as e: except Exception as e:
err(f"An error occurred during summarization for account {account.name}: {e}") l.error(f"An error occurred during summarization for account {account.name}: {e}")
await asyncio.sleep(account.refresh) await asyncio.sleep(account.refresh)
@ -240,7 +236,7 @@ tags:
return markdown_content return markdown_content
except Exception as e: except Exception as e:
err(f"Exception: {e}") l.error(f"Exception: {e}")
return False return False
@ -249,15 +245,15 @@ async def save_email(md_path, md_content):
with open(md_path, 'w', encoding='utf-8') as md_file: with open(md_path, 'w', encoding='utf-8') as md_file:
md_file.write(md_content) md_file.write(md_content)
debug(f"Saved markdown to {md_path}") l.debug(f"Saved markdown to {md_path}")
return True return True
except Exception as e: except Exception as e:
err(f"Failed to save email: {e}") l.error(f"Failed to save email: {e}")
return False return False
def get_matching_autoresponders(this_email: IncomingEmail, account: EmailAccount) -> List[AutoResponder]: def get_matching_autoresponders(this_email: IncomingEmail, account: EmailAccount) -> List[AutoResponder]:
debug(f"Called get_matching_autoresponders for email \"{this_email.subject},\" account name \"{account.name}\"") l.debug(f"Called get_matching_autoresponders for email \"{this_email.subject},\" account name \"{account.name}\"")
def matches_list(item: str, this_email: IncomingEmail) -> bool: def matches_list(item: str, this_email: IncomingEmail) -> bool:
if '@' in item: if '@' in item:
return item in this_email.sender return item in this_email.sender
@ -268,12 +264,12 @@ def get_matching_autoresponders(this_email: IncomingEmail, account: EmailAccount
whitelist_match = not profile.whitelist or any(matches_list(item, this_email) for item in profile.whitelist) whitelist_match = not profile.whitelist or any(matches_list(item, this_email) for item in profile.whitelist)
blacklist_match = any(matches_list(item, this_email) for item in profile.blacklist) blacklist_match = any(matches_list(item, this_email) for item in profile.blacklist)
if whitelist_match and not blacklist_match: if whitelist_match and not blacklist_match:
debug(f"We have a match for {whitelist_match} and no blacklist matches.") l.debug(f"We have a match for {whitelist_match} and no blacklist matches.")
matching_profiles.append(profile) matching_profiles.append(profile)
elif whitelist_match and blacklist_match: elif whitelist_match and blacklist_match:
debug(f"Matched whitelist for {whitelist_match}, but also matched blacklist for {blacklist_match}") l.debug(f"Matched whitelist for {whitelist_match}, but also matched blacklist for {blacklist_match}")
else: else:
debug(f"No whitelist or blacklist matches.") l.debug(f"No whitelist or blacklist matches.")
return matching_profiles return matching_profiles
@ -284,31 +280,31 @@ async def process_account_autoresponding(account: EmailAccount):
while True: while True:
try: try:
processed_uids = await load_processed_uids(EMAIL_AUTORESPONSE_LOG) processed_uids = await load_processed_uids(EMAIL_AUTORESPONSE_LOG)
debug(f"{len(processed_uids)} emails marked as already responded to are being ignored.") l.debug(f"{len(processed_uids)} emails marked as already responded to are being ignored.")
with get_imap_connection(account) as inbox: with get_imap_connection(account) as inbox:
unread_messages = inbox.messages(unread=True) unread_messages = inbox.messages(unread=True)
debug(f"There are {len(unread_messages)} unread messages.") l.debug(f"There are {len(unread_messages)} unread messages.")
for uid, message in unread_messages: for uid, message in unread_messages:
uid_str = uid.decode() if isinstance(uid, bytes) else str(uid) uid_str = uid.decode() if isinstance(uid, bytes) else str(uid)
if uid_str not in processed_uids: if uid_str not in processed_uids:
await autorespond_single_email(message, uid_str, account, EMAIL_AUTORESPONSE_LOG) await autorespond_single_email(message, uid_str, account, EMAIL_AUTORESPONSE_LOG)
else: else:
debug(f"Skipping {uid_str} because it was already processed.") l.debug(f"Skipping {uid_str} because it was already processed.")
except Exception as e: except Exception as e:
err(f"An error occurred during auto-responding for account {account.name}: {e}") l.error(f"An error occurred during auto-responding for account {account.name}: {e}")
await asyncio.sleep(account.refresh) await asyncio.sleep(account.refresh)
async def autorespond_single_email(message, uid_str: str, account: EmailAccount, log_file: Path): async def autorespond_single_email(message, uid_str: str, account: EmailAccount, log_file: Path):
this_email = await create_incoming_email(message) this_email = await create_incoming_email(message)
debug(f"Evaluating {this_email.subject} for autoresponse-worthiness...") l.debug(f"Evaluating {this_email.subject} for autoresponse-worthiness...")
matching_profiles = get_matching_autoresponders(this_email, account) matching_profiles = get_matching_autoresponders(this_email, account)
debug(f"Matching profiles: {matching_profiles}") l.debug(f"Matching profiles: {matching_profiles}")
for profile in matching_profiles: for profile in matching_profiles:
response_body = await generate_response(this_email, profile, account) response_body = await generate_response(this_email, profile, account)
@ -318,16 +314,16 @@ async def autorespond_single_email(message, uid_str: str, account: EmailAccount,
jpg_path = await img.workflow(profile.image_prompt, earlyout=False, downscale_to_fit=True) if profile.image_prompt else None jpg_path = await img.workflow(profile.image_prompt, earlyout=False, downscale_to_fit=True) if profile.image_prompt else None
success = await send_response(this_email.sender, subject, response_body, profile, jpg_path) success = await send_response(this_email.sender, subject, response_body, profile, jpg_path)
if success: if success:
warn(f"Auto-responded to email: {this_email.subject}") l.warning(f"Auto-responded to email: {this_email.subject}")
await save_processed_uid(log_file, account.name, uid_str) await save_processed_uid(log_file, account.name, uid_str)
else: else:
warn(f"Failed to send auto-response to {this_email.subject}") l.warning(f"Failed to send auto-response to {this_email.subject}")
else: else:
warn(f"Unable to generate auto-response for {this_email.subject}") l.warning(f"Unable to generate auto-response for {this_email.subject}")
async def generate_response(this_email: IncomingEmail, profile: AutoResponder, account: EmailAccount) -> Optional[str]: async def generate_response(this_email: IncomingEmail, profile: AutoResponder, account: EmailAccount) -> Optional[str]:
info(f"Generating auto-response to {this_email.subject} with profile: {profile.name}") l.info(f"Generating auto-response to {this_email.subject} with profile: {profile.name}")
now = await gis.dt(dt_datetime.now()) now = await gis.dt(dt_datetime.now())
then = await gis.dt(this_email.datetime_received) then = await gis.dt(this_email.datetime_received)
@ -345,7 +341,7 @@ Respond on behalf of {account.fullname}, who is unable to respond personally bec
try: try:
response = await llm.query_ollama(usr_prompt, sys_prompt, profile.ollama_model, 400) response = await llm.query_ollama(usr_prompt, sys_prompt, profile.ollama_model, 400)
debug(f"query_ollama response: {response}") l.debug(f"query_ollama response: {response}")
if isinstance(response, dict) and "message" in response and "content" in response["message"]: if isinstance(response, dict) and "message" in response and "content" in response["message"]:
response = response["message"]["content"] response = response["message"]["content"]
@ -353,7 +349,7 @@ Respond on behalf of {account.fullname}, who is unable to respond personally bec
return response + "\n\n" return response + "\n\n"
except Exception as e: except Exception as e:
err(f"Error generating auto-response: {str(e)}") l.error(f"Error generating auto-response: {str(e)}")
return None return None

View file

@ -33,29 +33,25 @@ from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC from selenium.webdriver.support import expected_conditions as EC
from sijapi import ( from sijapi import (
L, API, Serve, LOGS_DIR, TS_ID, CASETABLE_PATH, COURTLISTENER_DOCKETS_URL, COURTLISTENER_API_KEY, Sys, Serve, LOGS_DIR, TS_ID, CASETABLE_PATH, COURTLISTENER_DOCKETS_URL, COURTLISTENER_API_KEY,
COURTLISTENER_BASE_URL, COURTLISTENER_DOCKETS_DIR, COURTLISTENER_SEARCH_DIR, ALERTS_DIR, COURTLISTENER_BASE_URL, COURTLISTENER_DOCKETS_DIR, COURTLISTENER_SEARCH_DIR, ALERTS_DIR,
MAC_UN, MAC_PW, MAC_ID, TS_TAILNET, IMG_DIR, PUBLIC_KEY, OBSIDIAN_VAULT_DIR MAC_UN, MAC_PW, MAC_ID, TS_TAILNET, IMG_DIR, PUBLIC_KEY, OBSIDIAN_VAULT_DIR
) )
from sijapi.classes import WidgetUpdate from sijapi.classes import WidgetUpdate
from sijapi.utilities import bool_convert, sanitize_filename, assemble_journal_path from sijapi.utilities import bool_convert, sanitize_filename, assemble_journal_path
from sijapi.routers import gis from sijapi.routers import gis
from sijapi.logs import get_logger
l = get_logger(__name__)
forward = APIRouter() forward = APIRouter()
logger = L.get_module_logger("email")
def debug(text: str): logger.debug(text)
def info(text: str): logger.info(text)
def warn(text: str): logger.warning(text)
def err(text: str): logger.error(text)
def crit(text: str): logger.critical(text)
async def forward_traffic(reader: asyncio.StreamReader, writer: asyncio.StreamWriter, destination: str): async def forward_traffic(reader: asyncio.StreamReader, writer: asyncio.StreamWriter, destination: str):
try: try:
dest_host, dest_port = destination.split(':') dest_host, dest_port = destination.split(':')
dest_port = int(dest_port) dest_port = int(dest_port)
except ValueError: except ValueError:
warn(f"Invalid destination format: {destination}. Expected 'host:port'.") l.warning(f"Invalid destination format: {destination}. Expected 'host:port'.")
writer.close() writer.close()
await writer.wait_closed() await writer.wait_closed()
return return
@ -63,7 +59,7 @@ async def forward_traffic(reader: asyncio.StreamReader, writer: asyncio.StreamWr
try: try:
dest_reader, dest_writer = await asyncio.open_connection(dest_host, dest_port) dest_reader, dest_writer = await asyncio.open_connection(dest_host, dest_port)
except Exception as e: except Exception as e:
warn(f"Failed to connect to destination {destination}: {str(e)}") l.warning(f"Failed to connect to destination {destination}: {str(e)}")
writer.close() writer.close()
await writer.wait_closed() await writer.wait_closed()
return return
@ -77,7 +73,7 @@ async def forward_traffic(reader: asyncio.StreamReader, writer: asyncio.StreamWr
dst.write(data) dst.write(data)
await dst.drain() await dst.drain()
except Exception as e: except Exception as e:
warn(f"Error in forwarding: {str(e)}") l.warning(f"Error in forwarding: {str(e)}")
finally: finally:
dst.close() dst.close()
await dst.wait_closed() await dst.wait_closed()
@ -110,7 +106,7 @@ async def start_port_forwarding():
for rule in Serve.forwarding_rules: for rule in Serve.forwarding_rules:
asyncio.create_task(start_server(rule.source, rule.destination)) asyncio.create_task(start_server(rule.source, rule.destination))
else: else:
warn("No forwarding rules found in the configuration.") l.warning("No forwarding rules found in the configuration.")
@forward.get("/forward_status") @forward.get("/forward_status")

View file

@ -11,6 +11,8 @@ import json
import yaml import yaml
import jwt import jwt
from sijapi import GHOST_API_KEY, GHOST_API_URL from sijapi import GHOST_API_KEY, GHOST_API_URL
from sijapi.logs import get_logger
l = get_logger(__name__)
ghost = APIRouter() ghost = APIRouter()

View file

@ -16,17 +16,14 @@ from folium.plugins import Fullscreen, MiniMap, MousePosition, Geocoder, Draw, M
from zoneinfo import ZoneInfo from zoneinfo import ZoneInfo
from dateutil.parser import parse as dateutil_parse from dateutil.parser import parse as dateutil_parse
from typing import Optional, List, Union from typing import Optional, List, Union
from sijapi import L, API, Db, TZ, GEO from sijapi import Sys, Db, TZ, GEO
from sijapi.classes import Location from sijapi.classes import Location
from sijapi.utilities import haversine, assemble_journal_path, json_serial from sijapi.utilities import haversine, assemble_journal_path
from sijapi.serialization import json_dumps
from sijapi.logs import get_logger
l = get_logger(__name__)
gis = APIRouter() gis = APIRouter()
logger = L.get_module_logger("gis")
def debug(text: str): logger.debug(text)
def info(text: str): logger.info(text)
def warn(text: str): logger.warning(text)
def err(text: str): logger.error(text)
def crit(text: str): logger.critical(text)
async def dt( async def dt(
date_time: Union[str, int, datetime], date_time: Union[str, int, datetime],
@ -36,12 +33,12 @@ async def dt(
# Convert integer (epoch time) to UTC datetime # Convert integer (epoch time) to UTC datetime
if isinstance(date_time, int): if isinstance(date_time, int):
date_time = datetime.fromtimestamp(date_time, tz=timezone.utc) date_time = datetime.fromtimestamp(date_time, tz=timezone.utc)
debug(f"Converted epoch time {date_time} to UTC datetime object.") l.debug(f"Converted epoch time {date_time} to UTC datetime object.")
# Convert string to datetime if necessary # Convert string to datetime if necessary
elif isinstance(date_time, str): elif isinstance(date_time, str):
date_time = dateutil_parse(date_time) date_time = dateutil_parse(date_time)
debug(f"Converted string '{date_time}' to datetime object.") l.debug(f"Converted string '{date_time}' to datetime object.")
if not isinstance(date_time, datetime): if not isinstance(date_time, datetime):
raise ValueError(f"Input must be a string, integer (epoch time), or datetime object. What we received: {date_time}, type {type(date_time)}") raise ValueError(f"Input must be a string, integer (epoch time), or datetime object. What we received: {date_time}, type {type(date_time)}")
@ -49,7 +46,7 @@ async def dt(
# Ensure the datetime is timezone-aware (UTC if not specified) # Ensure the datetime is timezone-aware (UTC if not specified)
if date_time.tzinfo is None: if date_time.tzinfo is None:
date_time = date_time.replace(tzinfo=timezone.utc) date_time = date_time.replace(tzinfo=timezone.utc)
debug("Added UTC timezone to naive datetime.") l.debug("Added UTC timezone to naive datetime.")
# Handle provided timezone # Handle provided timezone
if tz is not None: if tz is not None:
@ -57,12 +54,12 @@ async def dt(
if tz == "local": if tz == "local":
last_loc = await get_timezone_without_timezone(date_time) last_loc = await get_timezone_without_timezone(date_time)
tz = await GEO.tz_at(last_loc.latitude, last_loc.longitude) tz = await GEO.tz_at(last_loc.latitude, last_loc.longitude)
debug(f"Using local timezone: {tz}") l.debug(f"Using local timezone: {tz}")
else: else:
try: try:
tz = ZoneInfo(tz) tz = ZoneInfo(tz)
except Exception as e: except Exception as e:
err(f"Invalid timezone string '{tz}'. Error: {e}") l.error(f"Invalid timezone string '{tz}'. Error: {e}")
raise ValueError(f"Invalid timezone string: {tz}") raise ValueError(f"Invalid timezone string: {tz}")
elif isinstance(tz, ZoneInfo): elif isinstance(tz, ZoneInfo):
pass # tz is already a ZoneInfo object pass # tz is already a ZoneInfo object
@ -71,14 +68,14 @@ async def dt(
# Convert to the provided or determined timezone # Convert to the provided or determined timezone
date_time = date_time.astimezone(tz) date_time = date_time.astimezone(tz)
debug(f"Converted datetime to timezone: {tz}") l.debug(f"Converted datetime to timezone: {tz}")
return date_time return date_time
except ValueError as e: except ValueError as e:
err(f"Error in dt: {e}") l.error(f"Error in dt: {e}")
raise raise
except Exception as e: except Exception as e:
err(f"Unexpected error in dt: {e}") l.error(f"Unexpected error in dt: {e}")
raise ValueError(f"Failed to process datetime: {e}") raise ValueError(f"Failed to process datetime: {e}")
@ -112,12 +109,12 @@ async def get_timezone_without_timezone(date_time):
async def get_last_location() -> Optional[Location]: async def get_last_location() -> Optional[Location]:
query_datetime = datetime.now(TZ) query_datetime = datetime.now(TZ)
debug(f"Query_datetime: {query_datetime}") l.debug(f"Query_datetime: {query_datetime}")
this_location = await fetch_last_location_before(query_datetime) this_location = await fetch_last_location_before(query_datetime)
if this_location: if this_location:
debug(f"location: {this_location}") l.debug(f"location: {this_location}")
return this_location return this_location
return None return None
@ -164,15 +161,15 @@ Generate a heatmap for the given date range and save it as a PNG file using Foli
m.save(str(output_path)) m.save(str(output_path))
info(f"Heatmap saved as PNG: {output_path}") l.info(f"Heatmap saved as PNG: {output_path}")
return output_path return output_path
except Exception as e: except Exception as e:
err(f"Error saving heatmap: {str(e)}") l.error(f"Error saving heatmap: {str(e)}")
raise raise
except Exception as e: except Exception as e:
err(f"Error generating heatmap: {str(e)}") l.error(f"Error generating heatmap: {str(e)}")
raise raise
async def generate_map(start_date: datetime, end_date: datetime, max_points: int): async def generate_map(start_date: datetime, end_date: datetime, max_points: int):
@ -180,7 +177,7 @@ async def generate_map(start_date: datetime, end_date: datetime, max_points: int
if not locations: if not locations:
raise HTTPException(status_code=404, detail="No locations found for the given date range") raise HTTPException(status_code=404, detail="No locations found for the given date range")
info(f"Found {len(locations)} locations for the given date range") l.info(f"Found {len(locations)} locations for the given date range")
if len(locations) > max_points: if len(locations) > max_points:
locations = random.sample(locations, max_points) locations = random.sample(locations, max_points)
@ -291,6 +288,7 @@ map.on(L.Draw.Event.CREATED, function (event) {
return m.get_root().render() return m.get_root().render()
async def fetch_locations(start: Union[str, int, datetime], end: Union[str, int, datetime, None] = None) -> List[Location]: async def fetch_locations(start: Union[str, int, datetime], end: Union[str, int, datetime, None] = None) -> List[Location]:
start_datetime = await dt(start) start_datetime = await dt(start)
if end is None: if end is None:
@ -301,7 +299,7 @@ async def fetch_locations(start: Union[str, int, datetime], end: Union[str, int,
if start_datetime.time() == datetime.min.time() and end_datetime.time() == datetime.min.time(): if start_datetime.time() == datetime.min.time() and end_datetime.time() == datetime.min.time():
end_datetime = await dt(end_datetime.replace(hour=23, minute=59, second=59)) end_datetime = await dt(end_datetime.replace(hour=23, minute=59, second=59))
debug(f"Fetching locations between {start_datetime} and {end_datetime}") l.debug(f"Fetching locations between {start_datetime} and {end_datetime}")
query = ''' query = '''
SELECT id, datetime, SELECT id, datetime,
@ -315,9 +313,12 @@ async def fetch_locations(start: Union[str, int, datetime], end: Union[str, int,
ORDER BY datetime DESC ORDER BY datetime DESC
''' '''
locations = await Db.execute_read(query, start_datetime=start_datetime.replace(tzinfo=None), end_datetime=end_datetime.replace(tzinfo=None)) try:
locations = await Db.read(query, start_datetime=start_datetime, end_datetime=end_datetime)
debug(f"Range locations query returned: {locations}") l.debug(f"Range locations query returned: {locations}")
except Exception as e:
l.error(f"Error executing range locations query: {str(e)}")
locations = []
if not locations and (end is None or start_datetime.date() == end_datetime.date()): if not locations and (end is None or start_datetime.date() == end_datetime.date()):
fallback_query = ''' fallback_query = '''
@ -332,12 +333,19 @@ async def fetch_locations(start: Union[str, int, datetime], end: Union[str, int,
ORDER BY datetime DESC ORDER BY datetime DESC
LIMIT 1 LIMIT 1
''' '''
location_data = await Db.execute_read(fallback_query, start_datetime=start_datetime.replace(tzinfo=None)) try:
debug(f"Fallback query returned: {location_data}") location_data = await Db.read(fallback_query, start_datetime=start_datetime)
if location_data: l.debug(f"Fallback query returned: {location_data}")
locations = location_data if location_data:
locations = location_data
except Exception as e:
l.error(f"Error executing fallback locations query: {str(e)}")
locations = []
debug(f"Locations found: {locations}") l.debug(f"Locations found: {locations}")
if not locations:
return []
# Sort location_data based on the datetime field in descending order # Sort location_data based on the datetime field in descending order
sorted_locations = sorted(locations, key=lambda x: x['datetime'], reverse=True) sorted_locations = sorted(locations, key=lambda x: x['datetime'], reverse=True)
@ -366,11 +374,13 @@ async def fetch_locations(start: Union[str, int, datetime], end: Union[str, int,
return location_objects if location_objects else [] return location_objects if location_objects else []
async def fetch_last_location_before(datetime: datetime) -> Optional[Location]: async def fetch_last_location_before(datetime: datetime) -> Optional[Location]:
try: try:
datetime = await dt(datetime) datetime = await dt(datetime)
debug(f"Fetching last location before {datetime}") l.debug(f"Fetching last location before {datetime}")
query = ''' query = '''
SELECT id, datetime, SELECT id, datetime,
@ -385,16 +395,16 @@ async def fetch_last_location_before(datetime: datetime) -> Optional[Location]:
LIMIT 1 LIMIT 1
''' '''
location_data = await Db.execute_read(query, datetime=datetime.replace(tzinfo=None)) location_data = await Db.read(query, datetime=datetime)
if location_data: if location_data:
debug(f"Last location found: {location_data[0]}") l.debug(f"Last location found: {location_data[0]}")
return Location(**location_data[0]) return Location(**location_data[0])
else: else:
debug("No location found before the specified datetime") l.debug("No location found before the specified datetime")
return None return None
except Exception as e: except Exception as e:
error(f"Error fetching last location: {str(e)}") l.error(f"Error fetching last location: {str(e)}")
return None return None
@ -452,9 +462,9 @@ async def post_location(location: Location):
'country': location.country 'country': location.country
} }
await Db.execute_write(query, **params) await Db.write(query, **params)
info(f"Successfully posted location: {location.latitude}, {location.longitude}, {location.elevation} on {localized_datetime}") l.info(f"Successfully posted location: {location.latitude}, {location.longitude}, {location.elevation} on {localized_datetime}")
# Create a serializable version of params for the return value # Create a serializable version of params for the return value
serializable_params = { serializable_params = {
@ -463,15 +473,15 @@ async def post_location(location: Location):
} }
return serializable_params return serializable_params
except Exception as e: except Exception as e:
err(f"Error posting location {e}") l.error(f"Error posting location {e}")
err(traceback.format_exc()) l.error(traceback.format_exc())
return None return None
async def get_date_range(): async def get_date_range():
query = "SELECT MIN(datetime) as min_date, MAX(datetime) as max_date FROM locations" query = "SELECT MIN(datetime) as min_date, MAX(datetime) as max_date FROM locations"
row = await Db.execute_read(query) row = await Db.read(query)
if row and row[0]['min_date'] and row[0]['max_date']: if row and row[0]['min_date'] and row[0]['max_date']:
return row[0]['min_date'], row[0]['max_date'] return row[0]['min_date'], row[0]['max_date']
else: else:
@ -498,26 +508,26 @@ async def post_locate_endpoint(locations: Union[Location, List[Location]]):
"device_name": "Unknown", "device_name": "Unknown",
"device_os": "Unknown" "device_os": "Unknown"
} }
debug(f"Location received for processing: {lcn}") l.debug(f"Location received for processing: {lcn}")
geocoded_locations = await GEO.code(locations) geocoded_locations = await GEO.code(locations)
responses = [] responses = []
if isinstance(geocoded_locations, List): if isinstance(geocoded_locations, List):
for location in geocoded_locations: for location in geocoded_locations:
debug(f"Final location to be submitted to database: {location}") l.debug(f"Final location to be submitted to database: {location}")
location_entry = await post_location(location) location_entry = await post_location(location)
if location_entry: if location_entry:
responses.append({"location_data": location_entry}) responses.append({"location_data": location_entry})
else: else:
warn(f"Posting location to database appears to have failed.") l.warning(f"Posting location to database appears to have failed.")
else: else:
debug(f"Final location to be submitted to database: {geocoded_locations}") l.debug(f"Final location to be submitted to database: {geocoded_locations}")
location_entry = await post_location(geocoded_locations) location_entry = await post_location(geocoded_locations)
if location_entry: if location_entry:
responses.append({"location_data": location_entry}) responses.append({"location_data": location_entry})
else: else:
warn(f"Posting location to database appears to have failed.") l.warning(f"Posting location to database appears to have failed.")
return {"message": "Locations and weather updated", "results": responses} return {"message": "Locations and weather updated", "results": responses}
@ -540,7 +550,7 @@ async def get_locate(datetime_str: str, all: bool = False):
try: try:
date_time = await dt(datetime_str) date_time = await dt(datetime_str)
except ValueError as e: except ValueError as e:
err(f"Invalid datetime string provided: {datetime_str}") l.error(f"Invalid datetime string provided: {datetime_str}")
return ["ERROR: INVALID DATETIME PROVIDED. USE YYYYMMDDHHmmss or YYYYMMDD format."] return ["ERROR: INVALID DATETIME PROVIDED. USE YYYYMMDDHHmmss or YYYYMMDD format."]
locations = await fetch_locations(date_time) locations = await fetch_locations(date_time)
@ -565,6 +575,6 @@ async def generate_map_endpoint(
except ValueError: except ValueError:
raise HTTPException(status_code=400, detail="Invalid date format") raise HTTPException(status_code=400, detail="Invalid date format")
info(f"Generating map for {start_date} to {end_date}") l.info(f"Generating map for {start_date} to {end_date}")
html_content = await generate_map(start_date, end_date, max_points) html_content = await generate_map(start_date, end_date, max_points)
return HTMLResponse(content=html_content) return HTMLResponse(content=html_content)

View file

@ -36,19 +36,16 @@ import json
from ollama import Client as oLlama from ollama import Client as oLlama
from sijapi.routers.img import img from sijapi.routers.img import img
from dotenv import load_dotenv from dotenv import load_dotenv
from sijapi import L, COMFYUI_DIR
import io import io
from io import BytesIO from io import BytesIO
import base64 import base64
from sijapi import COMFYUI_DIR
from sijapi.logs import get_logger
l = get_logger(__name__)
ig = APIRouter() ig = APIRouter()
logger = L.get_module_logger("ig")
def debug(text: str): logger.debug(text)
def info(text: str): logger.info(text)
def warn(text: str): logger.warning(text)
def err(text: str): logger.error(text)
def crit(text: str): logger.critical(text)
class IG_Request(BaseModel): class IG_Request(BaseModel):
file: Optional[UploadFile] = None # upload a particular file to Instagram file: Optional[UploadFile] = None # upload a particular file to Instagram
@ -862,16 +859,16 @@ async def ig_flow_endpoint(new_session: bool = False):
time_remaining = 30 - (time_since_rollover % 30) time_remaining = 30 - (time_since_rollover % 30)
if time_remaining < 4: if time_remaining < 4:
logger.debug("Too close to end of TOTP counter. Waiting.") logger.l.debug("Too close to end of TOTP counter. Waiting.")
sleepupto(5, 5) sleepupto(5, 5)
if not new_session and os.path.exists(IG_SESSION_PATH): if not new_session and os.path.exists(IG_SESSION_PATH):
cl.load_settings(IG_SESSION_PATH) cl.load_settings(IG_SESSION_PATH)
logger.debug("Loaded past session.") logger.l.debug("Loaded past session.")
elif new_session and cl.login(IG_USERNAME, IG_PASSWORD, verification_code=TOTP.now()): elif new_session and cl.login(IG_USERNAME, IG_PASSWORD, verification_code=TOTP.now()):
cl.dump_settings(IG_SESSION_PATH) cl.dump_settings(IG_SESSION_PATH)
logger.debug("Logged in and saved new session.") logger.l.debug("Logged in and saved new session.")
else: else:
raise Exception(f"Failed to login as {IG_USERNAME}.") raise Exception(f"Failed to login as {IG_USERNAME}.")

View file

@ -18,15 +18,12 @@ import random
import os import os
import asyncio import asyncio
from sijapi.routers.llm import query_ollama from sijapi.routers.llm import query_ollama
from sijapi import API, L, COMFYUI_URL, COMFYUI_OUTPUT_DIR, IMG_CONFIG_PATH, IMG_DIR, IMG_WORKFLOWS_DIR from sijapi import Sys, COMFYUI_URL, COMFYUI_OUTPUT_DIR, IMG_CONFIG_PATH, IMG_DIR, IMG_WORKFLOWS_DIR
from sijapi.logs import get_logger
l = get_logger(__name__)
img = APIRouter() img = APIRouter()
logger = L.get_module_logger("img")
def debug(text: str): logger.debug(text)
def info(text: str): logger.info(text)
def warn(text: str): logger.warning(text)
def err(text: str): logger.error(text)
def crit(text: str): logger.critical(text)
CLIENT_ID = str(uuid.uuid4()) CLIENT_ID = str(uuid.uuid4())
@ -73,12 +70,12 @@ async def workflow(prompt: str, scene: str = None, size: str = None, earlyout: s
scene_workflow = random.choice(scene_data['workflows']) scene_workflow = random.choice(scene_data['workflows'])
if size: if size:
debug(f"Specified size: {size}") l.debug(f"Specified size: {size}")
size = size if size else scene_workflow.get('size', '1024x1024') size = size if size else scene_workflow.get('size', '1024x1024')
width, height = map(int, size.split('x')) width, height = map(int, size.split('x'))
debug(f"Parsed width: {width}; parsed height: {height}") l.debug(f"Parsed width: {width}; parsed height: {height}")
workflow_path = Path(IMG_WORKFLOWS_DIR) / scene_workflow['workflow'] workflow_path = Path(IMG_WORKFLOWS_DIR) / scene_workflow['workflow']
workflow_data = json.loads(workflow_path.read_text()) workflow_data = json.loads(workflow_path.read_text())
@ -92,22 +89,22 @@ async def workflow(prompt: str, scene: str = None, size: str = None, earlyout: s
} }
saved_file_key = await update_prompt_and_get_key(workflow=workflow_data, post=post, positive=image_concept) saved_file_key = await update_prompt_and_get_key(workflow=workflow_data, post=post, positive=image_concept)
info(f"Saved file key: {saved_file_key}") l.info(f"Saved file key: {saved_file_key}")
prompt_id = await queue_prompt(workflow_data) prompt_id = await queue_prompt(workflow_data)
info(f"Prompt ID: {prompt_id}") l.info(f"Prompt ID: {prompt_id}")
max_size = max(width, height) if downscale_to_fit else None max_size = max(width, height) if downscale_to_fit else None
destination_path = Path(destination_path).with_suffix(".jpg") if destination_path else IMG_DIR / f"{prompt_id}.jpg" destination_path = Path(destination_path).with_suffix(".jpg") if destination_path else IMG_DIR / f"{prompt_id}.jpg"
if earlyout: if earlyout:
asyncio.create_task(generate_and_save_image(prompt_id, saved_file_key, max_size, destination_path)) asyncio.create_task(generate_and_save_image(prompt_id, saved_file_key, max_size, destination_path))
debug(f"Returning {destination_path}") l.debug(f"Returning {destination_path}")
return destination_path return destination_path
else: else:
await generate_and_save_image(prompt_id, saved_file_key, max_size, destination_path) await generate_and_save_image(prompt_id, saved_file_key, max_size, destination_path)
debug(f"Returning {destination_path}") l.debug(f"Returning {destination_path}")
return destination_path return destination_path
@ -118,16 +115,16 @@ async def generate_and_save_image(prompt_id, saved_file_key, max_size, destinati
jpg_file_path = await save_as_jpg(image_data, prompt_id, quality=90, max_size=max_size, destination_path=destination_path) jpg_file_path = await save_as_jpg(image_data, prompt_id, quality=90, max_size=max_size, destination_path=destination_path)
if Path(jpg_file_path) != Path(destination_path): if Path(jpg_file_path) != Path(destination_path):
err(f"Mismatch between jpg_file_path, {jpg_file_path}, and detination_path, {destination_path}") l.error(f"Mismatch between jpg_file_path, {jpg_file_path}, and detination_path, {destination_path}")
except Exception as e: except Exception as e:
err(f"Error in generate_and_save_image: {e}") l.error(f"Error in generate_and_save_image: {e}")
return None return None
def get_web_path(file_path: Path) -> str: def get_web_path(file_path: Path) -> str:
uri = file_path.relative_to(IMG_DIR) uri = file_path.relative_to(IMG_DIR)
web_path = f"{API.URL}/img/{uri}" web_path = f"{Sys.URL}/img/{uri}"
return web_path return web_path
@ -143,7 +140,7 @@ async def poll_status(prompt_id):
status_data = await response.json() status_data = await response.json()
job_data = status_data.get(prompt_id, {}) job_data = status_data.get(prompt_id, {})
if job_data.get("status", {}).get("completed", False): if job_data.get("status", {}).get("completed", False):
info(f"{prompt_id} completed in {elapsed_time} seconds.") l.info(f"{prompt_id} completed in {elapsed_time} seconds.")
return job_data return job_data
await asyncio.sleep(1) await asyncio.sleep(1)
@ -194,7 +191,7 @@ async def save_as_jpg(image_data, prompt_id, max_size = None, quality = 100, des
return str(destination_path_jpg) return str(destination_path_jpg)
except Exception as e: except Exception as e:
err(f"Error processing image: {e}") l.error(f"Error processing image: {e}")
return None return None
@ -210,11 +207,11 @@ def set_presets(workflow_data, preset_values):
if 'inputs' in workflow_data.get(preset_node, {}): if 'inputs' in workflow_data.get(preset_node, {}):
workflow_data[preset_node]['inputs'][preset_key] = preset_value workflow_data[preset_node]['inputs'][preset_key] = preset_value
else: else:
debug("Node not found in workflow_data") l.debug("Node not found in workflow_data")
else: else:
debug("Required data missing in preset_values") l.debug("Required data missing in preset_values")
else: else:
debug("No preset_values found") l.debug("No preset_values found")
def get_return_path(destination_path): def get_return_path(destination_path):
@ -229,7 +226,7 @@ def get_scene(scene):
IMG_CONFIG = yaml.safe_load(IMG_CONFIG_file) IMG_CONFIG = yaml.safe_load(IMG_CONFIG_file)
for scene_data in IMG_CONFIG['scenes']: for scene_data in IMG_CONFIG['scenes']:
if scene_data['scene'] == scene: if scene_data['scene'] == scene:
debug(f"Found scene for \"{scene}\".") l.debug(f"Found scene for \"{scene}\".")
return scene_data return scene_data
return None return None
@ -249,11 +246,11 @@ def get_matching_scene(prompt):
max_count = count max_count = count
scene_data = sc scene_data = sc
if scene_data: if scene_data:
debug(f"Found better-matching scene: the prompt contains {max_count} words that match triggers for {scene_data.get('name')}!") l.debug(f"Found better-matching scene: the prompt contains {max_count} words that match triggers for {scene_data.get('name')}!")
if scene_data: if scene_data:
return scene_data return scene_data
else: else:
debug(f"No matching scenes found, falling back to default scene.") l.debug(f"No matching scenes found, falling back to default scene.")
return IMG_CONFIG['scenes'][0] return IMG_CONFIG['scenes'][0]
@ -272,11 +269,11 @@ async def ensure_comfy(retries: int = 4, timeout: float = 6.0):
for attempt in range(retries): for attempt in range(retries):
try: try:
with socket.create_connection(("127.0.0.1", 8188), timeout=2): with socket.create_connection(("127.0.0.1", 8188), timeout=2):
info("ComfyUI is already running.") l.info("ComfyUI is already running.")
return return
except (socket.timeout, ConnectionRefusedError): except (socket.timeout, ConnectionRefusedError):
if attempt == 0: # Only try to start ComfyUI on the first failed attempt if attempt == 0: # Only try to start ComfyUI on the first failed attempt
warn("ComfyUI is not running. Starting it now...") l.warning("ComfyUI is not running. Starting it now...")
try: try:
tmux_command = ( tmux_command = (
"tmux split-window -h " "tmux split-window -h "
@ -285,14 +282,14 @@ async def ensure_comfy(retries: int = 4, timeout: float = 6.0):
"python main.py; exec $SHELL\"" "python main.py; exec $SHELL\""
) )
subprocess.Popen(tmux_command, shell=True) subprocess.Popen(tmux_command, shell=True)
info("ComfyUI started in a new tmux session.") l.info("ComfyUI started in a new tmux session.")
except Exception as e: except Exception as e:
raise RuntimeError(f"Error starting ComfyUI: {e}") raise RuntimeError(f"Error starting ComfyUI: {e}")
warn(f"Attempt {attempt + 1}/{retries} failed. Waiting {timeout} seconds before retrying...") l.warning(f"Attempt {attempt + 1}/{retries} failed. Waiting {timeout} seconds before retrying...")
await asyncio.sleep(timeout) await asyncio.sleep(timeout)
crit(f"Failed to ensure ComfyUI is running after {retries} attempts with {timeout} second intervals.") l.critical(f"Failed to ensure ComfyUI is running after {retries} attempts with {timeout} second intervals.")
raise RuntimeError(f"Failed to ensure ComfyUI is running after {retries} attempts with {timeout} second intervals.") raise RuntimeError(f"Failed to ensure ComfyUI is running after {retries} attempts with {timeout} second intervals.")
@ -398,13 +395,13 @@ Even more important, it finds and returns the key to the filepath where the file
workflow[key] = random.randint(1000000000000, 9999999999999) workflow[key] = random.randint(1000000000000, 9999999999999)
elif key in ["width", "max_width", "scaled_width", "height", "max_height", "scaled_height", "side_length", "size", "value", "dimension", "dimensions", "long", "long_side", "short", "short_side", "length"]: elif key in ["width", "max_width", "scaled_width", "height", "max_height", "scaled_height", "side_length", "size", "value", "dimension", "dimensions", "long", "long_side", "short", "short_side", "length"]:
debug(f"Got a hit for a dimension: {key} {value}") l.debug(f"Got a hit for a dimension: {key} {value}")
if value == 1023: if value == 1023:
workflow[key] = post.get("width", 1024) workflow[key] = post.get("width", 1024)
debug(f"Set {key} to {workflow[key]}.") l.debug(f"Set {key} to {workflow[key]}.")
elif value == 1025: elif value == 1025:
workflow[key] = post.get("height", 1024) workflow[key] = post.get("height", 1024)
debug(f"Set {key} to {workflow[key]}.") l.debug(f"Set {key} to {workflow[key]}.")
update_recursive(workflow) update_recursive(workflow)
return found_key[0] return found_key[0]

View file

@ -26,18 +26,14 @@ import tempfile
import shutil import shutil
import html2text import html2text
import markdown import markdown
from sijapi import L, Llm, LLM_SYS_MSG, REQUESTS_DIR, OBSIDIAN_CHROMADB_COLLECTION, OBSIDIAN_VAULT_DIR, DOC_DIR, SUMMARY_INSTRUCT, SUMMARY_CHUNK_SIZE, SUMMARY_TPW, SUMMARY_CHUNK_OVERLAP, SUMMARY_LENGTH_RATIO, SUMMARY_TOKEN_LIMIT, SUMMARY_MIN_LENGTH, SUMMARY_MODEL from sijapi import Llm, LLM_SYS_MSG, REQUESTS_DIR, OBSIDIAN_CHROMADB_COLLECTION, OBSIDIAN_VAULT_DIR, DOC_DIR, SUMMARY_INSTRUCT, SUMMARY_CHUNK_SIZE, SUMMARY_TPW, SUMMARY_CHUNK_OVERLAP, SUMMARY_LENGTH_RATIO, SUMMARY_TOKEN_LIMIT, SUMMARY_MIN_LENGTH, SUMMARY_MODEL
from sijapi.utilities import convert_to_unix_time, sanitize_filename, ocr_pdf, clean_text, should_use_ocr, extract_text_from_pdf, extract_text_from_docx, read_text_file, str_to_bool, get_extension from sijapi.utilities import convert_to_unix_time, sanitize_filename, ocr_pdf, clean_text, should_use_ocr, extract_text_from_pdf, extract_text_from_docx, read_text_file, str_to_bool, get_extension
from sijapi.routers import tts from sijapi.routers import tts
from sijapi.routers.asr import transcribe_audio from sijapi.routers.asr import transcribe_audio
from sijapi.logs import get_logger
l = get_logger(__name__)
llm = APIRouter() llm = APIRouter()
logger = L.get_module_logger("llm")
def debug(text: str): logger.debug(text)
def info(text: str): logger.info(text)
def warn(text: str): logger.warning(text)
def err(text: str): logger.error(text)
def crit(text: str): logger.critical(text)
VISION_MODELS = ["llava-phi3", "moondream", "llava", "llava-llama3", "llava:34b", "llava:13b-v1.5-q8_0"] VISION_MODELS = ["llava-phi3", "moondream", "llava", "llava-llama3", "llava:34b", "llava:13b-v1.5-q8_0"]
@ -93,13 +89,13 @@ async def query_ollama(usr: str, sys: str = LLM_SYS_MSG, model: str = Llm.chat.m
LLM = Ollama() LLM = Ollama()
response = await LLM.chat(model=model, messages=messages, options={"num_predict": max_tokens}) response = await LLM.chat(model=model, messages=messages, options={"num_predict": max_tokens})
debug(response) l.debug(response)
if "message" in response: if "message" in response:
if "content" in response["message"]: if "content" in response["message"]:
content = response["message"]["content"] content = response["message"]["content"]
return content return content
else: else:
debug("No choices found in response") l.debug("No choices found in response")
return None return None
async def query_ollama_multishot( async def query_ollama_multishot(
@ -120,12 +116,12 @@ async def query_ollama_multishot(
LLM = Ollama() LLM = Ollama()
response = await LLM.chat(model=model, messages=messages, options={"num_predict": max_tokens}) response = await LLM.chat(model=model, messages=messages, options={"num_predict": max_tokens})
debug(response) l.debug(response)
if "message" in response and "content" in response["message"]: if "message" in response and "content" in response["message"]:
return response["message"]["content"] return response["message"]["content"]
else: else:
debug("No content found in response") l.debug("No content found in response")
return None return None
@ -144,21 +140,21 @@ async def chat_completions(request: Request):
raise HTTPException(status_code=400, detail="Message data is required in the request body.") raise HTTPException(status_code=400, detail="Message data is required in the request body.")
requested_model = body.get('model', 'default-model') requested_model = body.get('model', 'default-model')
debug(f"Requested model: {requested_model}") l.debug(f"Requested model: {requested_model}")
stream = body.get('stream') stream = body.get('stream')
token_limit = body.get('max_tokens') or body.get('num_predict') token_limit = body.get('max_tokens') or body.get('num_predict')
# Check if the most recent message contains an image_url # Check if the most recent message contains an image_url
recent_message = messages[-1] recent_message = messages[-1]
if recent_message.get('role') == 'user' and is_vision_request(recent_message.get('content')): if recent_message.get('role') == 'user' and is_vision_request(recent_message.get('content')):
debug("Processing as a vision request") l.debug("Processing as a vision request")
model = "llava" model = "llava"
debug(f"Using model: {model}") l.debug(f"Using model: {model}")
return StreamingResponse(stream_messages_with_vision(recent_message, model, token_limit), media_type="application/json") return StreamingResponse(stream_messages_with_vision(recent_message, model, token_limit), media_type="application/json")
else: else:
debug("Processing as a standard request") l.debug("Processing as a standard request")
model = requested_model model = requested_model
debug(f"Using model: {model}") l.debug(f"Using model: {model}")
if stream: if stream:
return StreamingResponse(stream_messages(messages, model, token_limit), media_type="application/json") return StreamingResponse(stream_messages(messages, model, token_limit), media_type="application/json")
else: else:
@ -283,17 +279,17 @@ async def generate_messages(messages: list, model: str = "llama3"):
def is_model_available(model_name): def is_model_available(model_name):
model_data = OllamaList() model_data = OllamaList()
available_models = [model['name'] for model in model_data['models']] available_models = [model['name'] for model in model_data['models']]
debug(f"Available models: {available_models}") # Log using the configured LOGGER l.debug(f"Available models: {available_models}") # Log using the configured LOGGER
matching_models = [model for model in available_models if model.startswith(model_name + ':') or model == model_name] matching_models = [model for model in available_models if model.startswith(model_name + ':') or model == model_name]
if len(matching_models) == 1: if len(matching_models) == 1:
debug(f"Unique match found: {matching_models[0]}") l.debug(f"Unique match found: {matching_models[0]}")
return True return True
elif len(matching_models) > 1: elif len(matching_models) > 1:
err(f"Ambiguous match found, models: {matching_models}") l.error(f"Ambiguous match found, models: {matching_models}")
return True return True
else: else:
err(f"No match found for model: {model_name}") l.error(f"No match found for model: {model_name}")
return False return False
@ -416,12 +412,12 @@ def query_gpt4(llmPrompt: List = [], system_msg: str = "", user_msg: str = "", m
if hasattr(first_choice, "message") and hasattr(first_choice.message, "content"): if hasattr(first_choice, "message") and hasattr(first_choice.message, "content"):
return first_choice.message.content return first_choice.message.content
else: else:
debug("No content attribute in the first choice's message") l.debug("No content attribute in the first choice's message")
debug(f"No content found in message string: {response.choices}") l.debug(f"No content found in message string: {response.choices}")
debug("Trying again!") l.debug("Trying again!")
query_gpt4(messages, max_tokens) query_gpt4(messages, max_tokens)
else: else:
debug(f"No content found in message string: {response}") l.debug(f"No content found in message string: {response}")
return "" return ""
def llava(image_base64, prompt): def llava(image_base64, prompt):
@ -431,7 +427,7 @@ def llava(image_base64, prompt):
prompt = f"This is a chat between a user and an assistant. The assistant is helping the user to describe an image. {prompt}", prompt = f"This is a chat between a user and an assistant. The assistant is helping the user to describe an image. {prompt}",
images = [image_base64] images = [image_base64]
) )
debug(response) l.debug(response)
return "" if "pass" in response["response"].lower() else response["response"] return "" if "pass" in response["response"].lower() else response["response"]
def gpt4v(image_base64, prompt_sys: str, prompt_usr: str, max_tokens: int = 150): def gpt4v(image_base64, prompt_sys: str, prompt_usr: str, max_tokens: int = 150):
@ -462,7 +458,7 @@ def gpt4v(image_base64, prompt_sys: str, prompt_usr: str, max_tokens: int = 150)
comment_content = first_choice.message.content comment_content = first_choice.message.content
if "PASS" in comment_content: if "PASS" in comment_content:
return "" return ""
debug(f"Generated comment: {comment_content}") l.debug(f"Generated comment: {comment_content}")
response_2 = VISION_LLM.chat.completions.create( response_2 = VISION_LLM.chat.completions.create(
model="gpt-4-vision-preview", model="gpt-4-vision-preview",
@ -500,15 +496,15 @@ def gpt4v(image_base64, prompt_sys: str, prompt_usr: str, max_tokens: int = 150)
first_choice = response_2.choices[0] first_choice = response_2.choices[0]
if first_choice.message and first_choice.message.content: if first_choice.message and first_choice.message.content:
final_content = first_choice.message.content final_content = first_choice.message.content
debug(f"Generated comment: {final_content}") l.debug(f"Generated comment: {final_content}")
if "PASS" in final_content: if "PASS" in final_content:
return "" return ""
else: else:
return final_content return final_content
debug("Vision response did not contain expected data.") l.debug("Vision response did not contain expected data.")
debug(f"Vision response: {response_1}") l.debug(f"Vision response: {response_1}")
asyncio.sleep(15) asyncio.sleep(15)
try_again = gpt4v(image_base64, prompt_sys, prompt_usr, max_tokens) try_again = gpt4v(image_base64, prompt_sys, prompt_usr, max_tokens)
@ -566,7 +562,7 @@ async def summarize_tts_endpoint(
) )
except Exception as e: except Exception as e:
err(f"Error in summarize_tts_endpoint: {str(e)}") l.error(f"Error in summarize_tts_endpoint: {str(e)}")
return JSONResponse( return JSONResponse(
status_code=400, status_code=400,
content={"error": str(e)} content={"error": str(e)}
@ -593,7 +589,7 @@ async def summarize_tts(
bg_tasks = BackgroundTasks() bg_tasks = BackgroundTasks()
model = await tts.get_model(voice) model = await tts.get_model(voice)
final_output_path = await tts.generate_speech(bg_tasks, summarized_text, voice, model=model, speed=speed, podcast=podcast, title=filename) final_output_path = await tts.generate_speech(bg_tasks, summarized_text, voice, model=model, speed=speed, podcast=podcast, title=filename)
debug(f"summary_tts completed with final_output_path: {final_output_path}") l.debug(f"summary_tts completed with final_output_path: {final_output_path}")
return final_output_path return final_output_path
@ -609,10 +605,10 @@ def split_text_into_chunks(text: str) -> List[str]:
sentences = re.split(r'(?<=[.!?])\s+', text) sentences = re.split(r'(?<=[.!?])\s+', text)
words = text.split() words = text.split()
total_words = len(words) total_words = len(words)
debug(f"Total words: {total_words}. SUMMARY_CHUNK_SIZE: {SUMMARY_CHUNK_SIZE}. SUMMARY_TPW: {SUMMARY_TPW}.") l.debug(f"Total words: {total_words}. SUMMARY_CHUNK_SIZE: {SUMMARY_CHUNK_SIZE}. SUMMARY_TPW: {SUMMARY_TPW}.")
max_words_per_chunk = int(SUMMARY_CHUNK_SIZE / SUMMARY_TPW) max_words_per_chunk = int(SUMMARY_CHUNK_SIZE / SUMMARY_TPW)
debug(f"Maximum words per chunk: {max_words_per_chunk}") l.debug(f"Maximum words per chunk: {max_words_per_chunk}")
chunks = [] chunks = []
current_chunk = [] current_chunk = []
@ -632,7 +628,7 @@ def split_text_into_chunks(text: str) -> List[str]:
if current_chunk: if current_chunk:
chunks.append(' '.join(current_chunk)) chunks.append(' '.join(current_chunk))
debug(f"Split text into {len(chunks)} chunks.") l.debug(f"Split text into {len(chunks)} chunks.")
return chunks return chunks
@ -644,7 +640,7 @@ def calculate_max_tokens(text: str) -> int:
async def extract_text(file: Union[UploadFile, bytes, bytearray, str, Path], bg_tasks: BackgroundTasks = None) -> str: async def extract_text(file: Union[UploadFile, bytes, bytearray, str, Path], bg_tasks: BackgroundTasks = None) -> str:
info(f"Attempting to extract text from file: {file}") l.info(f"Attempting to extract text from file: {file}")
try: try:
if isinstance(file, UploadFile): if isinstance(file, UploadFile):
@ -667,7 +663,7 @@ async def extract_text(file: Union[UploadFile, bytes, bytearray, str, Path], bg_
_, file_ext = os.path.splitext(file_path) _, file_ext = os.path.splitext(file_path)
file_ext = file_ext.lower() file_ext = file_ext.lower()
info(f"File extension: {file_ext}") l.info(f"File extension: {file_ext}")
if file_ext == '.pdf': if file_ext == '.pdf':
text_content = await extract_text_from_pdf(file_path) text_content = await extract_text_from_pdf(file_path)
@ -694,7 +690,7 @@ async def extract_text(file: Union[UploadFile, bytes, bytearray, str, Path], bg_
return text_content return text_content
except Exception as e: except Exception as e:
err(f"Error extracting text: {str(e)}") l.error(f"Error extracting text: {str(e)}")
raise ValueError(f"Error extracting text: {str(e)}") raise ValueError(f"Error extracting text: {str(e)}")
@ -703,17 +699,17 @@ async def summarize_text(text: str, instruction: str = SUMMARY_INSTRUCT, length_
chunked_text = split_text_into_chunks(text) chunked_text = split_text_into_chunks(text)
total_parts = len(chunked_text) total_parts = len(chunked_text)
debug(f"Total parts: {total_parts}. Length of chunked text: {len(chunked_text)}") l.debug(f"Total parts: {total_parts}. Length of chunked text: {len(chunked_text)}")
total_words_count = sum(len(chunk.split()) for chunk in chunked_text) total_words_count = sum(len(chunk.split()) for chunk in chunked_text)
debug(f"Total words count: {total_words_count}") l.debug(f"Total words count: {total_words_count}")
total_tokens_count = max(1, int(total_words_count * SUMMARY_TPW)) total_tokens_count = max(1, int(total_words_count * SUMMARY_TPW))
debug(f"Total tokens count: {total_tokens_count}") l.debug(f"Total tokens count: {total_tokens_count}")
total_summary_length = length_override if length_override else total_tokens_count // length_quotient total_summary_length = length_override if length_override else total_tokens_count // length_quotient
debug(f"Total summary length: {total_summary_length}") l.debug(f"Total summary length: {total_summary_length}")
corrected_total_summary_length = min(total_summary_length, SUMMARY_TOKEN_LIMIT) corrected_total_summary_length = min(total_summary_length, SUMMARY_TOKEN_LIMIT)
debug(f"Corrected total summary length: {corrected_total_summary_length}") l.debug(f"Corrected total summary length: {corrected_total_summary_length}")
summaries = await asyncio.gather(*[ summaries = await asyncio.gather(*[
process_chunk(instruction, chunk, i+1, total_parts, LLM=LLM) process_chunk(instruction, chunk, i+1, total_parts, LLM=LLM)
@ -724,21 +720,21 @@ async def summarize_text(text: str, instruction: str = SUMMARY_INSTRUCT, length_
summaries = [f"\n\n\nPART {i+1} of {total_parts}:\n\n{summary}" for i, summary in enumerate(summaries)] summaries = [f"\n\n\nPART {i+1} of {total_parts}:\n\n{summary}" for i, summary in enumerate(summaries)]
concatenated_summary = ' '.join(summaries) concatenated_summary = ' '.join(summaries)
debug(f"Concatenated summary: {concatenated_summary}") l.debug(f"Concatenated summary: {concatenated_summary}")
debug(f"Concatenated summary length: {len(concatenated_summary.split())}") l.debug(f"Concatenated summary length: {len(concatenated_summary.split())}")
if total_parts > 1: if total_parts > 1:
debug(f"Processing the concatenated_summary to smooth the edges...") l.debug(f"Processing the concatenated_summary to smooth the edges...")
concatenated_instruct = f"The following text consists of the concatenated {total_parts} summaries of {total_parts} parts of a single document that had to be split for processing. Reword it for clarity and flow as a single cohesive summary, understanding that it all relates to a single document, but that document likely consists of multiple parts potentially from multiple authors. Do not shorten it and do not omit content, simply smooth out the edges between the parts." concatenated_instruct = f"The following text consists of the concatenated {total_parts} summaries of {total_parts} parts of a single document that had to be split for processing. Reword it for clarity and flow as a single cohesive summary, understanding that it all relates to a single document, but that document likely consists of multiple parts potentially from multiple authors. Do not shorten it and do not omit content, simply smooth out the edges between the parts."
final_summary = await process_chunk(concatenated_instruct, concatenated_summary, 1, 1, length_ratio=1, LLM=LLM) final_summary = await process_chunk(concatenated_instruct, concatenated_summary, 1, 1, length_ratio=1, LLM=LLM)
debug(f"Final summary length: {len(final_summary.split())}") l.debug(f"Final summary length: {len(final_summary.split())}")
return final_summary return final_summary
else: else:
return concatenated_summary return concatenated_summary
async def process_chunk(instruction: str, text: str, part: int, total_parts: int, length_ratio: float = None, LLM: Ollama = None) -> str: async def process_chunk(instruction: str, text: str, part: int, total_parts: int, length_ratio: float = None, LLM: Ollama = None) -> str:
# debug(f"Processing chunk: {text}") # l.debug(f"Processing chunk: {text}")
LLM = LLM if LLM else Ollama() LLM = LLM if LLM else Ollama()
words_count = len(text.split()) words_count = len(text.split())
@ -748,14 +744,14 @@ async def process_chunk(instruction: str, text: str, part: int, total_parts: int
max_tokens = min(tokens_count // summary_length_ratio, SUMMARY_CHUNK_SIZE) max_tokens = min(tokens_count // summary_length_ratio, SUMMARY_CHUNK_SIZE)
max_tokens = max(max_tokens, SUMMARY_MIN_LENGTH) max_tokens = max(max_tokens, SUMMARY_MIN_LENGTH)
debug(f"Processing part {part} of {total_parts}: Words: {words_count}, Estimated tokens: {tokens_count}, Max output tokens: {max_tokens}") l.debug(f"Processing part {part} of {total_parts}: Words: {words_count}, Estimated tokens: {tokens_count}, Max output tokens: {max_tokens}")
if part and total_parts > 1: if part and total_parts > 1:
prompt = f"{instruction}. Part {part} of {total_parts}:\n{text}" prompt = f"{instruction}. Part {part} of {total_parts}:\n{text}"
else: else:
prompt = f"{instruction}:\n\n{text}" prompt = f"{instruction}:\n\n{text}"
info(f"Starting LLM.generate for part {part} of {total_parts}") l.info(f"Starting LLM.generate for part {part} of {total_parts}")
response = await LLM.generate( response = await LLM.generate(
model=SUMMARY_MODEL, model=SUMMARY_MODEL,
prompt=prompt, prompt=prompt,
@ -764,8 +760,8 @@ async def process_chunk(instruction: str, text: str, part: int, total_parts: int
) )
text_response = response['response'] text_response = response['response']
info(f"Completed LLM.generate for part {part} of {total_parts}") l.info(f"Completed LLM.generate for part {part} of {total_parts}")
debug(f"Result: {text_response}") l.debug(f"Result: {text_response}")
return text_response return text_response
async def title_and_summary(extracted_text: str): async def title_and_summary(extracted_text: str):

View file

@ -18,17 +18,13 @@ from markdownify import markdownify as md
from better_profanity import profanity from better_profanity import profanity
from fastapi import APIRouter, BackgroundTasks, UploadFile, Form, HTTPException, Query, Path as FastAPIPath from fastapi import APIRouter, BackgroundTasks, UploadFile, Form, HTTPException, Query, Path as FastAPIPath
from pathlib import Path from pathlib import Path
from sijapi import L, Archivist, News, Tts, OBSIDIAN_VAULT_DIR, OBSIDIAN_RESOURCES_DIR from sijapi import Archivist, News, Tts, OBSIDIAN_VAULT_DIR, OBSIDIAN_RESOURCES_DIR
from sijapi.utilities import html_to_markdown, download_file, sanitize_filename, assemble_journal_path, assemble_archive_path, contains_profanity, is_ad_or_tracker from sijapi.utilities import html_to_markdown, download_file, sanitize_filename, assemble_journal_path, assemble_archive_path, contains_profanity, is_ad_or_tracker
from sijapi.routers import gis, llm, tts, note from sijapi.routers import gis, llm, tts, note
from sijapi.logs import get_logger
l = get_logger(__name__)
news = APIRouter() news = APIRouter()
logger = L.get_module_logger("news")
def debug(text: str): logger.debug(text)
def info(text: str): logger.info(text)
def warn(text: str): logger.warning(text)
def err(text: str): logger.error(text)
def crit(text: str): logger.critical(text)
@news.post("/clip") @news.post("/clip")
@ -87,7 +83,7 @@ async def handle_tts(bg_tasks: BackgroundTasks, article: Article, title: str, tt
return f"![[{Path(audio_path).name}]]" return f"![[{Path(audio_path).name}]]"
except HTTPException as e: except HTTPException as e:
err(f"Failed to generate TTS: {str(e)}") l.error(f"Failed to generate TTS: {str(e)}")
return None return None
@ -99,7 +95,7 @@ def get_banner_markdown(image_url: str) -> str:
banner_image = download_file(image_url, Path(OBSIDIAN_VAULT_DIR) / OBSIDIAN_RESOURCES_DIR) banner_image = download_file(image_url, Path(OBSIDIAN_VAULT_DIR) / OBSIDIAN_RESOURCES_DIR)
return f"![[{OBSIDIAN_RESOURCES_DIR}/{banner_image}]]" if banner_image else '' return f"![[{OBSIDIAN_RESOURCES_DIR}/{banner_image}]]" if banner_image else ''
except Exception as e: except Exception as e:
err(f"Failed to download banner image: {str(e)}") l.error(f"Failed to download banner image: {str(e)}")
return '' return ''
@ -109,7 +105,7 @@ async def save_markdown_file(filename: str, content: str):
async def process_news_site(site, bg_tasks: BackgroundTasks): async def process_news_site(site, bg_tasks: BackgroundTasks):
info(f"Downloading articles from {site.name}...") l.info(f"Downloading articles from {site.name}...")
earliest_date = dt_datetime.now().date() - timedelta(days=site.days_back) earliest_date = dt_datetime.now().date() - timedelta(days=site.days_back)
@ -131,9 +127,9 @@ async def process_news_site(site, bg_tasks: BackgroundTasks):
results = await asyncio.gather(*tasks) results = await asyncio.gather(*tasks)
articles_downloaded = sum(results) articles_downloaded = sum(results)
info(f"Downloaded {articles_downloaded} articles from {site.name}") l.info(f"Downloaded {articles_downloaded} articles from {site.name}")
except Exception as e: except Exception as e:
err(f"Error processing {site.name}: {str(e)}") l.error(f"Error processing {site.name}: {str(e)}")
async def download_and_save_article(article, site_name, earliest_date, bg_tasks: BackgroundTasks, tts_mode: str = "off", voice: str = Tts.elevenlabs.default): async def download_and_save_article(article, site_name, earliest_date, bg_tasks: BackgroundTasks, tts_mode: str = "off", voice: str = Tts.elevenlabs.default):
@ -147,7 +143,7 @@ async def download_and_save_article(article, site_name, earliest_date, bg_tasks:
return await process_and_save_article(bg_tasks, url, None, tts_mode, voice, site_name=site_name) return await process_and_save_article(bg_tasks, url, None, tts_mode, voice, site_name=site_name)
except Exception as e: except Exception as e:
err(f"Error processing article from {article.url}: {str(e)}") l.error(f"Error processing article from {article.url}: {str(e)}")
return False return False
@ -186,16 +182,16 @@ async def process_and_save_article(
return f"Successfully saved: {relative_path}" return f"Successfully saved: {relative_path}"
except Exception as e: except Exception as e:
err(f"Failed to handle final markdown content preparation and/or saving to daily note; {e}") l.error(f"Failed to handle final markdown content preparation and/or saving to daily note; {e}")
except Exception as e: except Exception as e:
err(f"Failed to handle TTS: {e}") l.error(f"Failed to handle TTS: {e}")
except Exception as e: except Exception as e:
err(f"Failed to generate title, file paths, and summary: {e}") l.error(f"Failed to generate title, file paths, and summary: {e}")
except Exception as e: except Exception as e:
err(f"Failed to fetch and parse article {url}: {str(e)}") l.error(f"Failed to fetch and parse article {url}: {str(e)}")
raise HTTPException(status_code=500, detail=str(e)) raise HTTPException(status_code=500, detail=str(e))

View file

@ -17,28 +17,25 @@ from dateutil.parser import parse as dateutil_parse
from fastapi import HTTPException, status from fastapi import HTTPException, status
from pathlib import Path from pathlib import Path
from fastapi import APIRouter, Query, HTTPException from fastapi import APIRouter, Query, HTTPException
from sijapi import API, L, OBSIDIAN_VAULT_DIR, OBSIDIAN_RESOURCES_DIR, OBSIDIAN_BANNER_SCENE, GEO from sijapi import Sys, OBSIDIAN_VAULT_DIR, OBSIDIAN_RESOURCES_DIR, OBSIDIAN_BANNER_SCENE, GEO
from sijapi.routers import asr, cal, gis, img, llm, serve, timing, tts, weather from sijapi.routers import asr, cal, gis, img, llm, serve, timing, tts, weather
from sijapi.utilities import assemble_journal_path, convert_to_12_hour_format, sanitize_filename, convert_degrees_to_cardinal, check_file_name, HOURLY_COLUMNS_MAPPING from sijapi.utilities import assemble_journal_path, convert_to_12_hour_format, sanitize_filename, convert_degrees_to_cardinal, check_file_name, HOURLY_COLUMNS_MAPPING
from sijapi.classes import Location from sijapi.classes import Location
from sijapi.logs import get_logger
l = get_logger(__name__)
note = APIRouter() note = APIRouter()
logger = L.get_module_logger("note")
def debug(text: str): logger.debug(text)
def info(text: str): logger.info(text)
def warn(text: str): logger.warning(text)
def err(text: str): logger.error(text)
def crit(text: str): logger.critical(text)
@note.post("/note/add") @note.post("/note/add")
async def note_add_endpoint(file: Optional[UploadFile] = File(None), text: Optional[str] = Form(None), source: Optional[str] = Form(None), bg_tasks: BackgroundTasks = None): async def note_add_endpoint(file: Optional[UploadFile] = File(None), text: Optional[str] = Form(None), source: Optional[str] = Form(None), bg_tasks: BackgroundTasks = None):
debug(f"Received request on /note/add...") l.debug(f"Received request on /note/add...")
if not file and not text: if not file and not text:
warn(f"... without any file or text!") l.warning(f"... without any file or text!")
raise HTTPException(status_code=400, detail="Either text or a file must be provided") raise HTTPException(status_code=400, detail="Either text or a file must be provided")
else: else:
result = await process_for_daily_note(file, text, source, bg_tasks) result = await process_for_daily_note(file, text, source, bg_tasks)
info(f"Result on /note/add: {result}") l.info(f"Result on /note/add: {result}")
return JSONResponse({"message": "Note added successfully", "entry": result}, status_code=201) return JSONResponse({"message": "Note added successfully", "entry": result}, status_code=201)
@ -47,7 +44,7 @@ async def process_for_daily_note(file: Optional[UploadFile] = File(None), text:
transcription_entry = "" transcription_entry = ""
file_entry = "" file_entry = ""
if file: if file:
debug("File received...") l.debug("File received...")
file_content = await file.read() file_content = await file.read()
audio_io = BytesIO(file_content) audio_io = BytesIO(file_content)
@ -55,18 +52,18 @@ async def process_for_daily_note(file: Optional[UploadFile] = File(None), text:
guessed_type = mimetypes.guess_type(file.filename) guessed_type = mimetypes.guess_type(file.filename)
file_type = guessed_type[0] if guessed_type[0] else "application/octet-stream" file_type = guessed_type[0] if guessed_type[0] else "application/octet-stream"
debug(f"Processing as {file_type}...") l.debug(f"Processing as {file_type}...")
# Extract the main type (e.g., 'audio', 'image', 'video') # Extract the main type (e.g., 'audio', 'image', 'video')
main_type = file_type.split('/')[0] main_type = file_type.split('/')[0]
subdir = main_type.title() if main_type else "Documents" subdir = main_type.title() if main_type else "Documents"
absolute_path, relative_path = assemble_journal_path(now, subdir=subdir, filename=file.filename) absolute_path, relative_path = assemble_journal_path(now, subdir=subdir, filename=file.filename)
debug(f"Destination path: {absolute_path}") l.debug(f"Destination path: {absolute_path}")
with open(absolute_path, 'wb') as f: with open(absolute_path, 'wb') as f:
f.write(file_content) f.write(file_content)
debug(f"Processing {f.name}...") l.debug(f"Processing {f.name}...")
if main_type == 'audio': if main_type == 'audio':
transcription = await asr.transcribe_audio(file_path=absolute_path, params=asr.TranscribeParams(model="small-en", language="en", threads=6)) transcription = await asr.transcribe_audio(file_path=absolute_path, params=asr.TranscribeParams(model="small-en", language="en", threads=6))
@ -77,7 +74,7 @@ async def process_for_daily_note(file: Optional[UploadFile] = File(None), text:
file_entry = f"[Source]({relative_path})" file_entry = f"[Source]({relative_path})"
text_entry = text if text else "" text_entry = text if text else ""
debug(f"transcription: {transcription_entry}\nfile_entry: {file_entry}\ntext_entry: {text_entry}") l.debug(f"transcription: {transcription_entry}\nfile_entry: {file_entry}\ntext_entry: {text_entry}")
return await add_to_daily_note(transcription_entry, file_entry, text_entry, now) return await add_to_daily_note(transcription_entry, file_entry, text_entry, now)
@ -169,7 +166,7 @@ added: {timestamp}
obsidian_link = f"![[{OBSIDIAN_RESOURCES_DIR}/{audio_filename}{audio_ext}]]" obsidian_link = f"![[{OBSIDIAN_RESOURCES_DIR}/{audio_filename}{audio_ext}]]"
body += f"{obsidian_link}\n\n" body += f"{obsidian_link}\n\n"
except Exception as e: except Exception as e:
err(f"Failed in the TTS portion of clipping: {e}") l.error(f"Failed in the TTS portion of clipping: {e}")
body += f"> [!summary]+\n" body += f"> [!summary]+\n"
body += f"> {summary}\n\n" body += f"> {summary}\n\n"
@ -182,12 +179,12 @@ added: {timestamp}
with open(markdown_filename, 'w', encoding=encoding) as md_file: with open(markdown_filename, 'w', encoding=encoding) as md_file:
md_file.write(markdown_content) md_file.write(markdown_content)
info(f"Successfully saved to {markdown_filename}") l.info(f"Successfully saved to {markdown_filename}")
return markdown_filename return markdown_filename
except Exception as e: except Exception as e:
err(f"Failed to clip: {str(e)}") l.error(f"Failed to clip: {str(e)}")
raise HTTPException(status_code=500, detail=str(e)) raise HTTPException(status_code=500, detail=str(e))
@ -199,7 +196,7 @@ def list_and_correct_impermissible_files(root_dir, rename: bool = False):
if check_file_name(filename): if check_file_name(filename):
file_path = Path(dirpath) / filename file_path = Path(dirpath) / filename
impermissible_files.append(file_path) impermissible_files.append(file_path)
debug(f"Impermissible file found: {file_path}") l.debug(f"Impermissible file found: {file_path}")
# Sanitize the file name # Sanitize the file name
new_filename = sanitize_filename(filename) new_filename = sanitize_filename(filename)
@ -217,7 +214,7 @@ def list_and_correct_impermissible_files(root_dir, rename: bool = False):
# Rename the file # Rename the file
if rename: if rename:
os.rename(file_path, new_file_path) os.rename(file_path, new_file_path)
debug(f"Renamed: {file_path} -> {new_file_path}") l.debug(f"Renamed: {file_path} -> {new_file_path}")
return impermissible_files return impermissible_files
@ -256,18 +253,18 @@ async def build_daily_note_getpoint():
path = await build_daily_note(date_time, loc.latitude, loc.longitude) path = await build_daily_note(date_time, loc.latitude, loc.longitude)
path_str = str(path) path_str = str(path)
info(f"Successfully created daily note at {path_str}") l.info(f"Successfully created daily note at {path_str}")
return JSONResponse(content={"path": path_str}, status_code=200) return JSONResponse(content={"path": path_str}, status_code=200)
except ValueError as ve: except ValueError as ve:
error_msg = f"Value Error in build_daily_note_getpoint: {str(ve)}" error_msg = f"Value Error in build_daily_note_getpoint: {str(ve)}"
err(error_msg) l.error(error_msg)
raise HTTPException(status_code=400, detail=error_msg) raise HTTPException(status_code=400, detail=error_msg)
except Exception as e: except Exception as e:
error_msg = f"Unexpected error in build_daily_note_getpoint: {str(e)}" error_msg = f"Unexpected error in build_daily_note_getpoint: {str(e)}"
err(error_msg) l.error(error_msg)
err(f"Traceback: {traceback.format_exc()}") l.error(f"Traceback: {traceback.format_exc()}")
raise HTTPException(status_code=500, detail="An unexpected error occurred") raise HTTPException(status_code=500, detail="An unexpected error occurred")
@ -287,7 +284,7 @@ async def build_daily_note_endpoint(
else: else:
raise ValueError("Location is not provided or invalid.") raise ValueError("Location is not provided or invalid.")
except (ValueError, AttributeError, TypeError) as e: except (ValueError, AttributeError, TypeError) as e:
warn(f"Falling back to localized datetime due to error: {e}") l.warning(f"Falling back to localized datetime due to error: {e}")
try: try:
date_time = await gis.dt(date_str) date_time = await gis.dt(date_str)
places = await gis.fetch_locations(date_time) places = await gis.fetch_locations(date_time)
@ -307,7 +304,7 @@ async def build_daily_note(date_time: dt_datetime, lat: float = None, lon: float
Obsidian helper. Takes a datetime and creates a new daily note. Note: it uses the sijapi configuration file to place the daily note and does NOT presently interface with Obsidian's daily note or periodic notes extensions. It is your responsibility to ensure they match. Obsidian helper. Takes a datetime and creates a new daily note. Note: it uses the sijapi configuration file to place the daily note and does NOT presently interface with Obsidian's daily note or periodic notes extensions. It is your responsibility to ensure they match.
''' '''
absolute_path, _ = assemble_journal_path(date_time) absolute_path, _ = assemble_journal_path(date_time)
debug(f"Using {date_time.strftime('%Y-%m-%d %H:%M:%S')} as our datetime in build_daily_note.") l.debug(f"Using {date_time.strftime('%Y-%m-%d %H:%M:%S')} as our datetime in build_daily_note.")
formatted_day = date_time.strftime("%A %B %d, %Y") # Monday May 27, 2024 formatting formatted_day = date_time.strftime("%A %B %d, %Y") # Monday May 27, 2024 formatting
day_before = (date_time - timedelta(days=1)).strftime("%Y-%m-%d %A") # 2024-05-26 Sunday formatting day_before = (date_time - timedelta(days=1)).strftime("%Y-%m-%d %A") # 2024-05-26 Sunday formatting
day_after = (date_time + timedelta(days=1)).strftime("%Y-%m-%d %A") # 2024-05-28 Tuesday formatting day_after = (date_time + timedelta(days=1)).strftime("%Y-%m-%d %A") # 2024-05-28 Tuesday formatting
@ -396,7 +393,7 @@ async def update_frontmatter_endpoint(date: str, key: str, value: str):
async def update_frontmatter(date_time: dt_datetime, key: str, value: str): async def update_frontmatter(date_time: dt_datetime, key: str, value: str):
file_path, relative_path = assemble_journal_path(date_time) file_path, relative_path = assemble_journal_path(date_time)
if not file_path.exists(): if not file_path.exists():
crit(f"Markdown file not found at {file_path}") l.critical(f"Markdown file not found at {file_path}")
raise HTTPException(status_code=404, detail="Markdown file not found.") raise HTTPException(status_code=404, detail="Markdown file not found.")
with open(file_path, "r", encoding="utf-8") as file: with open(file_path, "r", encoding="utf-8") as file:
@ -430,9 +427,9 @@ async def banner_endpoint(dt: str, location: str = None, forecast: str = None, m
''' '''
Endpoint (POST) that generates a new banner image for the Obsidian daily note for a specified date, taking into account optional additional information, then updates the frontmatter if necessary. Endpoint (POST) that generates a new banner image for the Obsidian daily note for a specified date, taking into account optional additional information, then updates the frontmatter if necessary.
''' '''
debug(f"banner_endpoint requested with date: {dt} ({type(dt)})") l.debug(f"banner_endpoint requested with date: {dt} ({type(dt)})")
date_time = await gis.dt(dt) date_time = await gis.dt(dt)
debug(f"date_time after localization: {date_time} ({type(date_time)})") l.debug(f"date_time after localization: {date_time} ({type(date_time)})")
context = await generate_context(dt, location, forecast, mood, other_context) context = await generate_context(dt, location, forecast, mood, other_context)
jpg_path = await generate_banner(date_time, location, mood=mood, other_context=other_context) jpg_path = await generate_banner(date_time, location, mood=mood, other_context=other_context)
return jpg_path return jpg_path
@ -449,10 +446,10 @@ async def generate_banner(dt, location: Location = None, forecast: str = None, m
forecast = await update_dn_weather(date_time, False, location.latitude, location.longitude) forecast = await update_dn_weather(date_time, False, location.latitude, location.longitude)
prompt = await generate_context(date_time, location, forecast, mood, other_context) prompt = await generate_context(date_time, location, forecast, mood, other_context)
debug(f"Prompt: {prompt}") l.debug(f"Prompt: {prompt}")
final_path = await img.workflow(prompt, scene=OBSIDIAN_BANNER_SCENE, destination_path=destination_path) final_path = await img.workflow(prompt, scene=OBSIDIAN_BANNER_SCENE, destination_path=destination_path)
if not str(local_path) in str(final_path): if not str(local_path) in str(final_path):
info(f"Apparent mismatch between local path, {local_path}, and final_path, {final_path}") l.info(f"Apparent mismatch between local path, {local_path}, and final_path, {final_path}")
jpg_embed = f"\"![[{local_path}]]\"" jpg_embed = f"\"![[{local_path}]]\""
await update_frontmatter(date_time, "banner", jpg_embed) await update_frontmatter(date_time, "banner", jpg_embed)
return local_path return local_path
@ -481,7 +478,7 @@ async def generate_context(date_time, location: Location, forecast: str, mood: s
if geocoded_location.display_name or geocoded_location.city or geocoded_location.country: if geocoded_location.display_name or geocoded_location.city or geocoded_location.country:
return await generate_context(date_time, geocoded_location, forecast, mood, other_context) return await generate_context(date_time, geocoded_location, forecast, mood, other_context)
else: else:
warn(f"Failed to get a useable location for purposes of generating a banner, but we'll generate one anyway.") l.warning(f"Failed to get a useable location for purposes of generating a banner, but we'll generate one anyway.")
elif location and isinstance(location, str): elif location and isinstance(location, str):
display_name = f"Location: {location}\n" display_name = f"Location: {location}\n"
else: else:
@ -549,8 +546,8 @@ async def note_weather_get(
force_refresh_weather = refresh == "True" force_refresh_weather = refresh == "True"
try: try:
date_time = dt_datetime.now() if date == "0" else await gis.dt(date) date_time = dt_datetime.now() if date == "0" else await gis.dt(date)
debug(f"Using {date_time.strftime('%Y-%m-%d %H:%M:%S')} as our dt_datetime in note_weather_get.") l.debug(f"Using {date_time.strftime('%Y-%m-%d %H:%M:%S')} as our dt_datetime in note_weather_get.")
debug(f"date: {date} .. date_time: {date_time}") l.debug(f"date: {date} .. date_time: {date_time}")
content = await update_dn_weather(date_time, force_refresh_weather) #, lat, lon) content = await update_dn_weather(date_time, force_refresh_weather) #, lat, lon)
return JSONResponse(content={"forecast": content}, status_code=200) return JSONResponse(content={"forecast": content}, status_code=200)
@ -558,68 +555,68 @@ async def note_weather_get(
return JSONResponse(content={"detail": str(e.detail)}, status_code=e.status_code) return JSONResponse(content={"detail": str(e.detail)}, status_code=e.status_code)
except Exception as e: except Exception as e:
err(f"Error in note_weather_get: {str(e)}") l.error(f"Error in note_weather_get: {str(e)}")
raise HTTPException(status_code=500, detail=f"An error occurred: {str(e)}") raise HTTPException(status_code=500, detail=f"An error occurred: {str(e)}")
@note.post("/update/note/{date}") @note.post("/update/note/{date}")
async def post_update_daily_weather_and_calendar_and_timeslips(date: str, refresh: str="False") -> PlainTextResponse: async def post_update_daily_weather_and_calendar_and_timeslips(date: str, refresh: str="False") -> PlainTextResponse:
date_time = await gis.dt(date) date_time = await gis.dt(date)
debug(f"Using {date_time.strftime('%Y-%m-%d %H:%M:%S')} as our dt_datetime in post_update_daily_weather_and_calendar_and_timeslips.") l.debug(f"Using {date_time.strftime('%Y-%m-%d %H:%M:%S')} as our dt_datetime in post_update_daily_weather_and_calendar_and_timeslips.")
force_refresh_weather = refresh == "True" force_refresh_weather = refresh == "True"
await update_dn_weather(date_time, force_refresh_weather) await update_dn_weather(date_time, force_refresh_weather)
await update_daily_note_events(date_time) await update_daily_note_events(date_time)
await build_daily_timeslips(date_time) await build_daily_timeslips(date_time)
return f"[Refresh]({API.URL}/update/note/{date_time.strftime('%Y-%m-%d')}" return f"[Refresh]({Sys.URL}/update/note/{date_time.strftime('%Y-%m-%d')}"
async def update_dn_weather(date_time: dt_datetime, force_refresh: bool = False, lat: float = None, lon: float = None): async def update_dn_weather(date_time: dt_datetime, force_refresh: bool = False, lat: float = None, lon: float = None):
debug(f"Using {date_time.strftime('%Y-%m-%d %H:%M:%S')} as our datetime in update_dn_weather.") l.debug(f"Using {date_time.strftime('%Y-%m-%d %H:%M:%S')} as our datetime in update_dn_weather.")
try: try:
if lat and lon: if lat and lon:
place = await GEO.code((lat, lon)) place = await GEO.code((lat, lon))
else: else:
debug(f"Updating weather for {date_time}") l.debug(f"Updating weather for {date_time}")
places = await gis.fetch_locations(date_time) places = await gis.fetch_locations(date_time)
place = places[0] place = places[0]
lat = place.latitude lat = place.latitude
lon = place.longitude lon = place.longitude
debug(f"lat: {lat}, lon: {lon}, place: {place}") l.debug(f"lat: {lat}, lon: {lon}, place: {place}")
city = GEO.find_override_location(lat, lon) city = GEO.find_override_location(lat, lon)
if city: if city:
info(f"Using override location: {city}") l.info(f"Using override location: {city}")
else: else:
if place.city and place.city != "": if place.city and place.city != "":
city = place.city city = place.city
info(f"City in data: {city}") l.info(f"City in data: {city}")
else: else:
location = await GEO.code((lat, lon)) location = await GEO.code((lat, lon))
debug(f"location: {location}") l.debug(f"location: {location}")
city = location.name city = location.name
city = city if city else location.city city = city if city else location.city
city = city if city else location.house_number + ' ' + location.road city = city if city else location.house_number + ' ' + location.road
debug(f"City geocoded: {city}") l.debug(f"City geocoded: {city}")
# Assemble journal path # Assemble journal path
absolute_path, relative_path = assemble_journal_path(date_time, filename="Weather", extension=".md", no_timestamp = True) absolute_path, relative_path = assemble_journal_path(date_time, filename="Weather", extension=".md", no_timestamp = True)
debug(f"Journal path: absolute_path={absolute_path}, relative_path={relative_path}") l.debug(f"Journal path: absolute_path={absolute_path}, relative_path={relative_path}")
try: try:
debug(f"passing date_time {date_time.strftime('%Y-%m-%d %H:%M:%S')}, {lat}/{lon} into get_weather") l.debug(f"passing date_time {date_time.strftime('%Y-%m-%d %H:%M:%S')}, {lat}/{lon} into get_weather")
day = await weather.get_weather(date_time, lat, lon, force_refresh) day = await weather.get_weather(date_time, lat, lon, force_refresh)
debug(f"day information obtained from get_weather: {day}") l.debug(f"day information obtained from get_weather: {day}")
if day: if day:
DailyWeather = day.get('DailyWeather') DailyWeather = day.get('DailyWeather')
HourlyWeather = day.get('HourlyWeather') HourlyWeather = day.get('HourlyWeather')
if DailyWeather: if DailyWeather:
# debug(f"Day: {DailyWeather}") # l.debug(f"Day: {DailyWeather}")
icon = DailyWeather.get('icon') icon = DailyWeather.get('icon')
debug(f"Icon: {icon}") l.debug(f"Icon: {icon}")
weather_icon, admonition = get_icon_and_admonition(icon) if icon else (":LiSunMoon:", "ad-weather") weather_icon, admonition = get_icon_and_admonition(icon) if icon else (":LiSunMoon:", "ad-weather")
@ -688,38 +685,38 @@ async def update_dn_weather(date_time: dt_datetime, force_refresh: bool = False,
detailed_forecast += assemble_hourly_data_table(times, condition_symbols, temps, winds) detailed_forecast += assemble_hourly_data_table(times, condition_symbols, temps, winds)
detailed_forecast += f"```\n\n" detailed_forecast += f"```\n\n"
debug(f"Detailed forecast: {detailed_forecast}.") l.debug(f"Detailed forecast: {detailed_forecast}.")
with open(absolute_path, 'w', encoding='utf-8') as note_file: with open(absolute_path, 'w', encoding='utf-8') as note_file:
note_file.write(detailed_forecast) note_file.write(detailed_forecast)
debug(f"Operation complete.") l.debug(f"Operation complete.")
return narrative return narrative
else: else:
err(f"Failed to get DailyWeather from day: {day}") l.error(f"Failed to get DailyWeather from day: {day}")
else: else:
err(f"Failed to get day") l.error(f"Failed to get day")
raise HTTPException(status_code=500, detail="Failed to retrieve weather data") raise HTTPException(status_code=500, detail="Failed to retrieve weather data")
except HTTPException as e: except HTTPException as e:
err(f"HTTP error: {e}") l.error(f"HTTP error: {e}")
err(traceback.format_exc()) l.error(traceback.format_exc())
raise e raise e
except Exception as e: except Exception as e:
err(f"Error: {e}") l.error(f"Error: {e}")
err(traceback.format_exc()) l.error(traceback.format_exc())
raise HTTPException(status_code=999, detail=f"Error: {e}") raise HTTPException(status_code=999, detail=f"Error: {e}")
except ValueError as ve: except ValueError as ve:
err(f"Value error in update_dn_weather: {str(ve)}") l.error(f"Value error in update_dn_weather: {str(ve)}")
err(traceback.format_exc()) l.error(traceback.format_exc())
raise HTTPException(status_code=400, detail=f"Value error: {str(ve)}") raise HTTPException(status_code=400, detail=f"Value error: {str(ve)}")
except Exception as e: except Exception as e:
err(f"Error in update_dn_weather: {str(e)}") l.error(f"Error in update_dn_weather: {str(e)}")
err(traceback.format_exc()) l.error(traceback.format_exc())
raise HTTPException(status_code=500, detail=f"Error in update_dn_weather: {str(e)}") raise HTTPException(status_code=500, detail=f"Error in update_dn_weather: {str(e)}")
@ -728,8 +725,8 @@ def format_hourly_time(hour):
hour_12 = convert_to_12_hour_format(hour.get("datetime")) hour_12 = convert_to_12_hour_format(hour.get("datetime"))
return hour_12 return hour_12
except Exception as e: except Exception as e:
err(f"Error in format_hourly_time: {str(e)}") l.error(f"Error in format_hourly_time: {str(e)}")
err(traceback.format_exc()) l.error(traceback.format_exc())
return "" return ""
@ -740,7 +737,7 @@ def format_hourly_icon(hour, sunrise, sunset):
precip = hour.get('precip', float(0.0)) precip = hour.get('precip', float(0.0))
precip_prob = hour.get('precipprob', float(0.0)) precip_prob = hour.get('precipprob', float(0.0))
debug(f"precip: {precip}, prob: {precip_prob}") l.debug(f"precip: {precip}, prob: {precip_prob}")
sp_str = None sp_str = None
@ -764,8 +761,8 @@ def format_hourly_icon(hour, sunrise, sunset):
return formatted return formatted
except Exception as e: except Exception as e:
err(f"Error in format_hourly_special: {str(e)}") l.error(f"Error in format_hourly_special: {str(e)}")
err(traceback.format_exc()) l.error(traceback.format_exc())
return "" return ""
@ -774,8 +771,8 @@ def format_hourly_temperature(hour):
temp_str = f"{hour.get('temp', '')}˚ F" temp_str = f"{hour.get('temp', '')}˚ F"
return temp_str return temp_str
except Exception as e: except Exception as e:
err(f"Error in format_hourly_temperature: {str(e)}") l.error(f"Error in format_hourly_temperature: {str(e)}")
err(traceback.format_exc()) l.error(traceback.format_exc())
return "" return ""
@ -786,8 +783,8 @@ def format_hourly_wind(hour):
wind_str = f"{str(windspeed)}:LiWind: {winddir}" wind_str = f"{str(windspeed)}:LiWind: {winddir}"
return wind_str return wind_str
except Exception as e: except Exception as e:
err(f"Error in format_hourly_wind: {str(e)}") l.error(f"Error in format_hourly_wind: {str(e)}")
err(traceback.format_exc()) l.error(traceback.format_exc())
return "" return ""
def assemble_hourly_data_table(times, condition_symbols, temps, winds): def assemble_hourly_data_table(times, condition_symbols, temps, winds):
@ -800,7 +797,7 @@ def assemble_hourly_data_table(times, condition_symbols, temps, winds):
def get_icon_and_admonition(icon_str) -> Tuple: def get_icon_and_admonition(icon_str) -> Tuple:
debug(f"Received request for emoji {icon_str}") l.debug(f"Received request for emoji {icon_str}")
if icon_str.startswith(":") and icon_str.endswith(":"): if icon_str.startswith(":") and icon_str.endswith(":"):
return icon_str return icon_str
@ -891,7 +888,7 @@ async def format_events_as_markdown(event_data: Dict[str, Union[str, List[Dict[s
total_events = len(event_data["events"]) total_events = len(event_data["events"])
event_markdown = f"```ad-events" event_markdown = f"```ad-events"
for event in event_data["events"]: for event in event_data["events"]:
debug(f"event busy status: {event['busy']}; all day status: {event['all_day']}") l.debug(f"event busy status: {event['busy']}; all day status: {event['all_day']}")
if not event['name'].startswith('TC '): if not event['name'].startswith('TC '):
url = f"hook://ical/eventID={event['uid']}calendarID=17" url = f"hook://ical/eventID={event['uid']}calendarID=17"
if event['url']: if event['url']:
@ -960,18 +957,18 @@ async def note_events_endpoint(date: str = Query(None)):
async def update_daily_note_events(date_time: dt_datetime): async def update_daily_note_events(date_time: dt_datetime):
debug(f"Looking up events on date: {date_time.strftime('%Y-%m-%d')}") l.debug(f"Looking up events on date: {date_time.strftime('%Y-%m-%d')}")
try: try:
events = await cal.get_events(date_time, date_time) events = await cal.get_events(date_time, date_time)
debug(f"Raw events: {events}") l.debug(f"Raw events: {events}")
event_data = { event_data = {
"date": date_time.strftime('%Y-%m-%d'), "date": date_time.strftime('%Y-%m-%d'),
"events": events "events": events
} }
events_markdown = await format_events_as_markdown(event_data) events_markdown = await format_events_as_markdown(event_data)
debug(f"Markdown events: {events_markdown}") l.debug(f"Markdown events: {events_markdown}")
absolute_path, _ = assemble_journal_path(date_time, filename="Events", extension=".md", no_timestamp = True) absolute_path, _ = assemble_journal_path(date_time, filename="Events", extension=".md", no_timestamp = True)
debug(f"Writing events to file: {absolute_path}") l.debug(f"Writing events to file: {absolute_path}")
with open(absolute_path, 'w', encoding='utf-8') as note_file: with open(absolute_path, 'w', encoding='utf-8') as note_file:
note_file.write(events_markdown) note_file.write(events_markdown)
@ -979,5 +976,5 @@ async def update_daily_note_events(date_time: dt_datetime):
return events_markdown return events_markdown
except Exception as e: except Exception as e:
err(f"Error processing events: {e}") l.error(f"Error processing events: {e}")
raise HTTPException(status_code=500, detail=str(e)) raise HTTPException(status_code=500, detail=str(e))

View file

@ -5,15 +5,11 @@ NOTES: Haven't yet decided if this should depend on the Obsidian and Chat module
#routers/rag.py #routers/rag.py
from fastapi import APIRouter from fastapi import APIRouter
from sijapi import L from sijapi.logs import get_logger
l = get_logger(__name__)
rag = APIRouter() rag = APIRouter()
logger = L.get_module_logger("rag")
def debug(text: str): logger.debug(text)
def info(text: str): logger.info(text)
def warn(text: str): logger.warning(text)
def err(text: str): logger.error(text)
def crit(text: str): logger.critical(text)
rag.get("/rag/search") rag.get("/rag/search")
async def rag_search_endpoint(query: str, scope: str): async def rag_search_endpoint(query: str, scope: str):

View file

@ -15,14 +15,9 @@ from bs4 import BeautifulSoup
from selenium import webdriver from selenium import webdriver
from selenium.webdriver.chrome.options import Options from selenium.webdriver.chrome.options import Options
from pathlib import Path from pathlib import Path
from sijapi import Scrape, L, Dir from sijapi import Scrape,Dir
from sijapi.logs import get_logger
logger = L.get_module_logger('scrape') l = get_logger(__name__)
def debug(text: str): logger.debug(text)
def info(text: str): logger.info(text)
def warn(text: str): logger.warning(text)
def err(text: str): logger.error(text)
def crit(text: str): logger.critical(text)
scrape = APIRouter() scrape = APIRouter()
@ -31,24 +26,24 @@ Dir.DATA = Path(Dir.DATA).expanduser()
def save_to_json(data: List[Dict], output_file: str): def save_to_json(data: List[Dict], output_file: str):
output_path = Dir.DATA / output_file output_path = Dir.DATA / output_file
info(f"Saving data to {output_path}") l.info(f"Saving data to {output_path}")
output_path.parent.mkdir(parents=True, exist_ok=True) output_path.parent.mkdir(parents=True, exist_ok=True)
with open(output_path, 'w') as f: with open(output_path, 'w') as f:
json.dump(data, f, indent=2) json.dump(data, f, indent=2)
info(f"Data saved successfully to {output_path}") l.info(f"Data saved successfully to {output_path}")
def load_from_json(output_file: str) -> List[Dict]: def load_from_json(output_file: str) -> List[Dict]:
output_path = Dir.DATA / output_file output_path = Dir.DATA / output_file
info(f"Loading data from {output_path}") l.info(f"Loading data from {output_path}")
try: try:
with open(output_path, 'r') as f: with open(output_path, 'r') as f:
return json.load(f) return json.load(f)
except FileNotFoundError: except FileNotFoundError:
warn(f"File {output_path} not found") l.warning(f"File {output_path} not found")
return [] return []
async def fetch_content(config: Any) -> str: async def fetch_content(config: Any) -> str:
info(f"Fetching content from {config.url}") l.info(f"Fetching content from {config.url}")
if config.content.js_render: if config.content.js_render:
return await fetch_with_selenium(config.url) return await fetch_with_selenium(config.url)
@ -63,7 +58,7 @@ async def fetch_content(config: Any) -> str:
elif config.content.type == 'txt': elif config.content.type == 'txt':
return await response.text() return await response.text()
else: else:
warn(f"Unsupported content type: {config.content.type}") l.warning(f"Unsupported content type: {config.content.type}")
return await response.text() return await response.text()
async def fetch_with_selenium(url: str) -> str: async def fetch_with_selenium(url: str) -> str:
@ -92,7 +87,7 @@ async def handle_json(response):
return await response.json() return await response.json()
def apply_processing_step(data: Any, step: Any) -> Any: def apply_processing_step(data: Any, step: Any) -> Any:
info(f"Applying processing step: {step.type}") l.info(f"Applying processing step: {step.type}")
if step.type == 'regex_split': if step.type == 'regex_split':
return re.split(step.pattern, data)[1:] return re.split(step.pattern, data)[1:]
elif step.type == 'keyword_filter': elif step.type == 'keyword_filter':
@ -101,11 +96,11 @@ def apply_processing_step(data: Any, step: Any) -> Any:
if isinstance(data, list): if isinstance(data, list):
return [apply_regex_extract(item, step.extractions) for item in data] return [apply_regex_extract(item, step.extractions) for item in data]
return apply_regex_extract(data, step.extractions) return apply_regex_extract(data, step.extractions)
debug(f"Unknown processing step type: {step.type}") l.debug(f"Unknown processing step type: {step.type}")
return data return data
def apply_regex_extract(text: str, extractions: List[Any]) -> Dict: def apply_regex_extract(text: str, extractions: List[Any]) -> Dict:
debug(f"Applying regex extraction on text of length {len(text)}") l.debug(f"Applying regex extraction on text of length {len(text)}")
result = {} result = {}
for extraction in extractions: for extraction in extractions:
extraction_dict = extraction.dict() if hasattr(extraction, 'dict') else extraction extraction_dict = extraction.dict() if hasattr(extraction, 'dict') else extraction
@ -122,11 +117,11 @@ def apply_regex_extract(text: str, extractions: List[Any]) -> Dict:
else: else:
result[extraction_dict['name']] = matches[-1].strip() # Take the last match result[extraction_dict['name']] = matches[-1].strip() # Take the last match
debug(f"Extracted {len(result)} items") l.debug(f"Extracted {len(result)} items")
return result return result
def apply_post_processing(data: List[Dict], post_processing: List[Any]) -> List[Dict]: def apply_post_processing(data: List[Dict], post_processing: List[Any]) -> List[Dict]:
info("Applying post-processing steps") l.info("Applying post-processing steps")
for step in post_processing: for step in post_processing:
if step.type == 'custom': if step.type == 'custom':
data = globals()[step.function](data) data = globals()[step.function](data)
@ -137,7 +132,7 @@ def data_has_changed(new_data: List[Dict], old_data: List[Dict]) -> bool:
@scrape.get("/scrape/{config_name}") @scrape.get("/scrape/{config_name}")
async def scrape_site(config_name: str): async def scrape_site(config_name: str):
info(f"Starting scrape operation for {config_name}") l.info(f"Starting scrape operation for {config_name}")
if not hasattr(Scrape, 'configurations'): if not hasattr(Scrape, 'configurations'):
# If 'configurations' doesn't exist, assume the entire Scrape object is the configuration # If 'configurations' doesn't exist, assume the entire Scrape object is the configuration
@ -162,14 +157,14 @@ async def scrape_site(config_name: str):
if data_has_changed(processed_data, previous_data): if data_has_changed(processed_data, previous_data):
save_to_json(processed_data, output_file) save_to_json(processed_data, output_file)
info("Scrape completed with updates") l.info("Scrape completed with updates")
return {"message": "Site updated", "data": processed_data} return {"message": "Site updated", "data": processed_data}
else: else:
info("Scrape completed with no updates") l.info("Scrape completed with no updates")
return {"message": "No updates", "data": processed_data} return {"message": "No updates", "data": processed_data}
def apply_post_processing(data: List[Dict], post_processing: List[Any]) -> List[Dict]: def apply_post_processing(data: List[Dict], post_processing: List[Any]) -> List[Dict]:
info("Applying post-processing steps") l.info("Applying post-processing steps")
for step in post_processing: for step in post_processing:
if step.type == 'regex_extract': if step.type == 'regex_extract':
for entry in data: for entry in data:

View file

@ -33,20 +33,15 @@ from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC from selenium.webdriver.support import expected_conditions as EC
from sijapi import ( from sijapi import (
L, API, Serve, LOGS_DIR, TS_ID, CASETABLE_PATH, COURTLISTENER_DOCKETS_URL, COURTLISTENER_API_KEY, Sys, Serve, Db, LOGS_DIR, TS_ID, CASETABLE_PATH, COURTLISTENER_DOCKETS_URL, COURTLISTENER_API_KEY,
COURTLISTENER_BASE_URL, COURTLISTENER_DOCKETS_DIR, COURTLISTENER_SEARCH_DIR, ALERTS_DIR, COURTLISTENER_BASE_URL, COURTLISTENER_DOCKETS_DIR, COURTLISTENER_SEARCH_DIR, ALERTS_DIR,
MAC_UN, MAC_PW, MAC_ID, TS_TAILNET, IMG_DIR, PUBLIC_KEY, OBSIDIAN_VAULT_DIR MAC_UN, MAC_PW, MAC_ID, TS_TAILNET, IMG_DIR, PUBLIC_KEY, OBSIDIAN_VAULT_DIR
) )
from sijapi.classes import WidgetUpdate from sijapi.classes import WidgetUpdate
from sijapi.utilities import bool_convert, sanitize_filename, assemble_journal_path from sijapi.utilities import bool_convert, sanitize_filename, assemble_journal_path
from sijapi.routers import gis from sijapi.routers import gis
from sijapi.logs import get_logger
logger = L.get_module_logger("serve") l = get_logger(__name__)
def debug(text: str): logger.debug(text)
def info(text: str): logger.info(text)
def warn(text: str): logger.warning(text)
def err(text: str): logger.err(text)
def crit(text: str): logger.critical(text)
serve = APIRouter() serve = APIRouter()
templates = Jinja2Templates(directory=Path(__file__).parent.parent / "sites") templates = Jinja2Templates(directory=Path(__file__).parent.parent / "sites")
@ -85,13 +80,13 @@ async def get_file_endpoint(file_path: str):
date_time = await gis.dt(file_path); date_time = await gis.dt(file_path);
absolute_path, local_path = assemble_journal_path(date_time, no_timestamp = True) absolute_path, local_path = assemble_journal_path(date_time, no_timestamp = True)
except ValueError as e: except ValueError as e:
debug(f"Unable to parse {file_path} as a date, now trying to use it as a local path") l.debug(f"Unable to parse {file_path} as a date, now trying to use it as a local path")
absolute_path = OBSIDIAN_VAULT_DIR / file_path absolute_path = OBSIDIAN_VAULT_DIR / file_path
if not absolute_path.suffix: if not absolute_path.suffix:
absolute_path = Path(absolute_path.with_suffix(".md")) absolute_path = Path(absolute_path.with_suffix(".md"))
if not absolute_path.is_file(): if not absolute_path.is_file():
warn(f"{absolute_path} is not a valid file it seems.") l.warning(f"{absolute_path} is not a valid file it seems.")
elif absolute_path.suffix == '.md': elif absolute_path.suffix == '.md':
try: try:
with open(absolute_path, 'r', encoding='utf-8') as file: with open(absolute_path, 'r', encoding='utf-8') as file:
@ -125,11 +120,11 @@ async def hook_alert(request: Request):
async def notify(alert: str): async def notify(alert: str):
fail = True fail = True
try: try:
if API.EXTENSIONS.shellfish: if Sys.EXTENSIONS.shellfish:
await notify_shellfish(alert) await notify_shellfish(alert)
fail = False fail = False
if API.EXTENSIONS.macnotify: if Sys.EXTENSIONS.macnotify:
if TS_ID == MAC_ID: if TS_ID == MAC_ID:
await notify_local(alert) await notify_local(alert)
fail = False fail = False
@ -140,10 +135,10 @@ async def notify(alert: str):
fail = True fail = True
if fail == False: if fail == False:
info(f"Delivered alert: {alert}") l.info(f"Delivered alert: {alert}")
return {"message": alert} return {"message": alert}
else: else:
crit(f"Failed to deliver alert: {alert}") l.critical(f"Failed to deliver alert: {alert}")
return {"message": f"Failed to deliver alert: {alert}"} return {"message": f"Failed to deliver alert: {alert}"}
async def notify_local(message: str): async def notify_local(message: str):
@ -165,7 +160,7 @@ async def notify_remote(host: str, message: str, username: str = None, password:
ssh.close() ssh.close()
if API.EXTENSIONS.shellfish: if Sys.EXTENSIONS.shellfish:
async def notify_shellfish(alert: str): async def notify_shellfish(alert: str):
key = "d7e810e7601cd296a05776c169b4fe97a6a5ee1fd46abe38de54f415732b3f4b" key = "d7e810e7601cd296a05776c169b4fe97a6a5ee1fd46abe38de54f415732b3f4b"
user = "WuqPwm1VpGijF4U5AnIKzqNMVWGioANTRjJoonPm" user = "WuqPwm1VpGijF4U5AnIKzqNMVWGioANTRjJoonPm"
@ -250,14 +245,14 @@ if API.EXTENSIONS.shellfish:
return result.stdout return result.stdout
if API.EXTENSIONS.courtlistener: if Sys.EXTENSIONS.courtlistener:
with open(CASETABLE_PATH, 'r') as file: with open(CASETABLE_PATH, 'r') as file:
CASETABLE = json.load(file) CASETABLE = json.load(file)
@serve.post("/cl/search") @serve.post("/cl/search")
async def hook_cl_search(request: Request, bg_tasks: BackgroundTasks): async def hook_cl_search(request: Request, bg_tasks: BackgroundTasks):
client_ip = request.client.host client_ip = request.client.host
debug(f"Received request from IP: {client_ip}") l.debug(f"Received request from IP: {client_ip}")
data = await request.json() data = await request.json()
payload = data['payload'] payload = data['payload']
results = data['payload']['results'] results = data['payload']['results']
@ -275,7 +270,7 @@ if API.EXTENSIONS.courtlistener:
@serve.post("/cl/docket") @serve.post("/cl/docket")
async def hook_cl_docket(request: Request): async def hook_cl_docket(request: Request):
client_ip = request.client.host client_ip = request.client.host
debug(f"Received request from IP: {client_ip}") l.debug(f"Received request from IP: {client_ip}")
data = await request.json() data = await request.json()
await cl_docket(data, client_ip) await cl_docket(data, client_ip)
@ -312,14 +307,14 @@ if API.EXTENSIONS.courtlistener:
async with aiohttp.ClientSession() as session: async with aiohttp.ClientSession() as session:
async with session.get(url, headers=headers) as response: async with session.get(url, headers=headers) as response:
if response.status == 200: if response.status == 200:
debug(f"Fetching CourtListener docket information for {docket}...") l.debug(f"Fetching CourtListener docket information for {docket}...")
data = await response.json() data = await response.json()
court_docket = data['results'][0]['docket_number_core'] court_docket = data['results'][0]['docket_number_core']
court_docket = f"{court_docket[:2]}-cv-{court_docket[2:]}" # Formatting the docket number court_docket = f"{court_docket[:2]}-cv-{court_docket[2:]}" # Formatting the docket number
case_name = data['results'][0]['case_name'] case_name = data['results'][0]['case_name']
debug(f"Obtained from CourtListener: docket {court_docket}, case name {case_name}.") l.debug(f"Obtained from CourtListener: docket {court_docket}, case name {case_name}.")
else: else:
debug("Failed to fetch data from CourtListener API.") l.debug("Failed to fetch data from CourtListener API.")
court_docket = 'NoCourtDocket' court_docket = 'NoCourtDocket'
case_name = 'NoCaseName' case_name = 'NoCaseName'
@ -329,12 +324,12 @@ if API.EXTENSIONS.courtlistener:
if filepath_ia: if filepath_ia:
file_url = filepath_ia file_url = filepath_ia
debug(f"Found IA file at {file_url}.") l.debug(f"Found IA file at {file_url}.")
elif filepath_local: elif filepath_local:
file_url = f"{COURTLISTENER_BASE_URL}/{filepath_local}" file_url = f"{COURTLISTENER_BASE_URL}/{filepath_local}"
debug(f"Found local file at {file_url}.") l.debug(f"Found local file at {file_url}.")
else: else:
debug(f"No file URL found in filepath_ia or filepath_local for one of the documents.") l.debug(f"No file URL found in filepath_ia or filepath_local for one of the documents.")
continue continue
document_number = document.get('document_number', 'NoDocumentNumber') document_number = document.get('document_number', 'NoDocumentNumber')
@ -345,7 +340,7 @@ if API.EXTENSIONS.courtlistener:
target_path = Path(COURTLISTENER_DOCKETS_DIR) / case_shortname / "Docket" / file_name target_path = Path(COURTLISTENER_DOCKETS_DIR) / case_shortname / "Docket" / file_name
target_path.parent.mkdir(parents=True, exist_ok=True) target_path.parent.mkdir(parents=True, exist_ok=True)
await cl_download_file(file_url, target_path, session) await cl_download_file(file_url, target_path, session)
debug(f"Downloaded {file_name} to {target_path}") l.debug(f"Downloaded {file_name} to {target_path}")
def cl_case_details(docket): def cl_case_details(docket):
@ -360,18 +355,18 @@ if API.EXTENSIONS.courtlistener:
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/93.0.4577.82 Safari/537.36' 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/93.0.4577.82 Safari/537.36'
} }
async with aiohttp.ClientSession() as session: async with aiohttp.ClientSession() as session:
debug(f"Attempting to download {url} to {path}.") l.debug(f"Attempting to download {url} to {path}.")
try: try:
async with session.get(url, headers=headers, allow_redirects=True) as response: async with session.get(url, headers=headers, allow_redirects=True) as response:
if response.status == 403: if response.status == 403:
err(f"Access denied (403 Forbidden) for URL: {url}. Skipping download.") l.error(f"Access denied (403 Forbidden) for URL: {url}. Skipping download.")
return return
response.raise_for_status() response.raise_for_status()
# Check if the response content type is a PDF # Check if the response content type is a PDF
content_type = response.headers.get('Content-Type') content_type = response.headers.get('Content-Type')
if content_type != 'application/pdf': if content_type != 'application/pdf':
err(f"Invalid content type: {content_type}. Skipping download.") l.error(f"Invalid content type: {content_type}. Skipping download.")
return return
# Create an in-memory buffer to store the downloaded content # Create an in-memory buffer to store the downloaded content
@ -386,7 +381,7 @@ if API.EXTENSIONS.courtlistener:
try: try:
PdfReader(buffer) PdfReader(buffer)
except Exception as e: except Exception as e:
err(f"Invalid PDF content: {str(e)}. Skipping download.") l.error(f"Invalid PDF content: {str(e)}. Skipping download.")
return return
# If the PDF is valid, write the content to the file on disk # If the PDF is valid, write the content to the file on disk
@ -395,7 +390,7 @@ if API.EXTENSIONS.courtlistener:
file.write(buffer.getvalue()) file.write(buffer.getvalue())
except Exception as e: except Exception as e:
err(f"Error downloading file: {str(e)}") l.error(f"Error downloading file: {str(e)}")
async def cl_search_process_result(result): async def cl_search_process_result(result):
@ -404,7 +399,7 @@ if API.EXTENSIONS.courtlistener:
court_id = result.get('court_id') court_id = result.get('court_id')
case_name_short = result.get('caseNameShort') case_name_short = result.get('caseNameShort')
case_name = result.get('caseName') case_name = result.get('caseName')
debug(f"Received payload for case {case_name} ({court_id}) and download url {download_url}") l.debug(f"Received payload for case {case_name} ({court_id}) and download url {download_url}")
court_folder = court_id court_folder = court_id
@ -418,9 +413,9 @@ if API.EXTENSIONS.courtlistener:
target_path.parent.mkdir(parents=True, exist_ok=True) target_path.parent.mkdir(parents=True, exist_ok=True)
await cl_download_file(download_url, target_path, session) await cl_download_file(download_url, target_path, session)
debug(f"Downloaded {file_name} to {target_path}") l.debug(f"Downloaded {file_name} to {target_path}")
if API.EXTENSIONS.url_shortener: if Sys.EXTENSIONS.url_shortener:
@serve.get("/s", response_class=HTMLResponse) @serve.get("/s", response_class=HTMLResponse)
async def shortener_form(request: Request): async def shortener_form(request: Request):
return templates.TemplateResponse("shortener.html", {"request": request}) return templates.TemplateResponse("shortener.html", {"request": request})
@ -433,7 +428,7 @@ if API.EXTENSIONS.url_shortener:
if len(custom_code) != 3 or not custom_code.isalnum(): if len(custom_code) != 3 or not custom_code.isalnum():
return templates.TemplateResponse("shortener.html", {"request": request, "error": "Custom code must be 3 alphanumeric characters"}) return templates.TemplateResponse("shortener.html", {"request": request, "error": "Custom code must be 3 alphanumeric characters"})
existing = await API.execute_read_query('SELECT 1 FROM short_urls WHERE short_code = $1', custom_code, table_name="short_urls") existing = await Db.execute_read('SELECT 1 FROM short_urls WHERE short_code = $1', custom_code, table_name="short_urls")
if existing: if existing:
return templates.TemplateResponse("shortener.html", {"request": request, "error": "Custom code already in use"}) return templates.TemplateResponse("shortener.html", {"request": request, "error": "Custom code already in use"})
@ -441,13 +436,13 @@ if API.EXTENSIONS.url_shortener:
else: else:
chars = string.ascii_letters + string.digits chars = string.ascii_letters + string.digits
while True: while True:
debug(f"FOUND THE ISSUE") l.debug(f"FOUND THE ISSUE")
short_code = ''.join(random.choice(chars) for _ in range(3)) short_code = ''.join(random.choice(chars) for _ in range(3))
existing = await API.execute_read_query('SELECT 1 FROM short_urls WHERE short_code = $1', short_code, table_name="short_urls") existing = await Db.execute_read('SELECT 1 FROM short_urls WHERE short_code = $1', short_code, table_name="short_urls")
if not existing: if not existing:
break break
await API.execute_write_query( await Db.execute_write(
'INSERT INTO short_urls (short_code, long_url) VALUES ($1, $2)', 'INSERT INTO short_urls (short_code, long_url) VALUES ($1, $2)',
short_code, long_url, short_code, long_url,
table_name="short_urls" table_name="short_urls"
@ -459,7 +454,7 @@ if API.EXTENSIONS.url_shortener:
@serve.get("/{short_code}") @serve.get("/{short_code}")
async def redirect_short_url(short_code: str): async def redirect_short_url(short_code: str):
results = await API.execute_read_query( results = await Db.execute_read(
'SELECT long_url FROM short_urls WHERE short_code = $1', 'SELECT long_url FROM short_urls WHERE short_code = $1',
short_code, short_code,
table_name="short_urls" table_name="short_urls"
@ -474,7 +469,7 @@ if API.EXTENSIONS.url_shortener:
raise HTTPException(status_code=404, detail="Long URL not found") raise HTTPException(status_code=404, detail="Long URL not found")
# Increment click count (you may want to do this asynchronously) # Increment click count (you may want to do this asynchronously)
await API.execute_write_query( await Db.execute_write(
'INSERT INTO click_logs (short_code, clicked_at) VALUES ($1, $2)', 'INSERT INTO click_logs (short_code, clicked_at) VALUES ($1, $2)',
short_code, datetime.now(), short_code, datetime.now(),
table_name="click_logs" table_name="click_logs"
@ -485,7 +480,7 @@ if API.EXTENSIONS.url_shortener:
@serve.get("/analytics/{short_code}") @serve.get("/analytics/{short_code}")
async def get_analytics(short_code: str): async def get_analytics(short_code: str):
url_info = await API.execute_read_query( url_info = await Db.execute_read(
'SELECT long_url, created_at FROM short_urls WHERE short_code = $1', 'SELECT long_url, created_at FROM short_urls WHERE short_code = $1',
short_code, short_code,
table_name="short_urls" table_name="short_urls"
@ -493,13 +488,13 @@ if API.EXTENSIONS.url_shortener:
if not url_info: if not url_info:
raise HTTPException(status_code=404, detail="Short URL not found") raise HTTPException(status_code=404, detail="Short URL not found")
click_count = await API.execute_read_query( click_count = await Db.execute_read(
'SELECT COUNT(*) FROM click_logs WHERE short_code = $1', 'SELECT COUNT(*) FROM click_logs WHERE short_code = $1',
short_code, short_code,
table_name="click_logs" table_name="click_logs"
) )
clicks = await API.execute_read_query( clicks = await Db.execute_read(
'SELECT clicked_at, ip_address, user_agent FROM click_logs WHERE short_code = $1 ORDER BY clicked_at DESC LIMIT 100', 'SELECT clicked_at, ip_address, user_agent FROM click_logs WHERE short_code = $1 ORDER BY clicked_at DESC LIMIT 100',
short_code, short_code,
table_name="click_logs" table_name="click_logs"

View file

@ -8,15 +8,12 @@ import httpx
import socket import socket
from fastapi import APIRouter from fastapi import APIRouter
from tailscale import Tailscale from tailscale import Tailscale
from sijapi import L, API, TS_ID from sijapi import Sys, TS_ID
from sijapi.logs import get_logger
l = get_logger(__name__)
sys = APIRouter()
sys = APIRouter(tags=["public", "trusted", "private"])
logger = L.get_module_logger("health")
def debug(text: str): logger.debug(text)
def info(text: str): logger.info(text)
def warn(text: str): logger.warning(text)
def err(text: str): logger.error(text)
def crit(text: str): logger.critical(text)
@sys.get("/health") @sys.get("/health")
def get_health(): def get_health():
@ -28,7 +25,7 @@ def get_health() -> str:
@sys.get("/routers") @sys.get("/routers")
def get_routers() -> str: def get_routers() -> str:
active_modules = [module for module, is_active in API.MODULES.__dict__.items() if is_active] active_modules = [module for module, is_active in Sys.MODULES.__dict__.items() if is_active]
return active_modules return active_modules
@sys.get("/ip") @sys.get("/ip")
@ -36,7 +33,7 @@ def get_local_ip():
"""Get the server's local IP address.""" """Get the server's local IP address."""
s = socket.socket(socket.AF_INET, socket.SOCK_DGRAM) s = socket.socket(socket.AF_INET, socket.SOCK_DGRAM)
try: try:
s.connect((f'{API.SUBNET_BROADCAST}', 1)) s.connect((f'{Sys.SUBNET_BROADCAST}', 1))
IP = s.getsockname()[0] IP = s.getsockname()[0]
except Exception: except Exception:
IP = '127.0.0.1' IP = '127.0.0.1'
@ -54,7 +51,7 @@ async def get_wan_ip():
wan_info = response.json() wan_info = response.json()
return wan_info.get('ip', 'Unavailable') return wan_info.get('ip', 'Unavailable')
except Exception as e: except Exception as e:
err(f"Error fetching WAN IP: {e}") l.error(f"Error fetching WAN IP: {e}")
return "Unavailable" return "Unavailable"
@sys.get("/ts_ip") @sys.get("/ts_ip")

View file

@ -27,17 +27,12 @@ from typing import Optional, List, Dict, Union, Tuple
from collections import defaultdict from collections import defaultdict
from dotenv import load_dotenv from dotenv import load_dotenv
from traceback import format_exc from traceback import format_exc
from sijapi import L, TIMING_API_KEY, TIMING_API_URL from sijapi import TIMING_API_KEY, TIMING_API_URL
from sijapi.routers import gis from sijapi.routers import gis
from sijapi.logs import get_logger
l = get_logger(__name__)
timing = APIRouter(tags=["private"]) timing = APIRouter(tags=["private"])
logger = L.get_module_logger("timing")
def debug(text: str): logger.debug(text)
def info(text: str): logger.info(text)
def warn(text: str): logger.warning(text)
def err(text: str): logger.error(text)
def crit(text: str): logger.critical(text)
script_directory = os.path.dirname(os.path.abspath(__file__)) script_directory = os.path.dirname(os.path.abspath(__file__))
@ -67,17 +62,17 @@ async def post_time_entry_to_timing(entry: Dict):
'Accept': 'application/json', 'Accept': 'application/json',
'X-Time-Zone': 'America/Los_Angeles' 'X-Time-Zone': 'America/Los_Angeles'
} }
debug(f"Received entry: {entry}") l.debug(f"Received entry: {entry}")
response = None # Initialize response response = None # Initialize response
try: try:
async with httpx.AsyncClient() as client: async with httpx.AsyncClient() as client:
response = await client.post(url, headers=headers, json=entry) response = await client.post(url, headers=headers, json=entry)
response.raise_for_status() # This will only raise for 4xx and 5xx responses response.raise_for_status() # This will only raise for 4xx and 5xx responses
except httpx.HTTPStatusError as exc: except httpx.HTTPStatusError as exc:
debug(f"HTTPStatusError caught: Status code: {exc.response.status_code}, Detail: {exc.response.text}") l.debug(f"HTTPStatusError caught: Status code: {exc.response.status_code}, Detail: {exc.response.text}")
raise HTTPException(status_code=exc.response.status_code, detail=str(exc.response.text)) raise HTTPException(status_code=exc.response.status_code, detail=str(exc.response.text))
except Exception as exc: except Exception as exc:
debug(f"General exception caught: {exc}") l.debug(f"General exception caught: {exc}")
raise HTTPException(status_code=500, detail="An unexpected error occurred") raise HTTPException(status_code=500, detail="An unexpected error occurred")
if response: if response:

View file

@ -26,17 +26,12 @@ import tempfile
import random import random
import re import re
import os import os
from sijapi import L, API, Dir, Tts, TTS_SEGMENTS_DIR, VOICE_DIR, TTS_OUTPUT_DIR from sijapi import Sys, Dir, Tts, TTS_SEGMENTS_DIR, VOICE_DIR, TTS_OUTPUT_DIR
from sijapi.utilities import sanitize_filename from sijapi.utilities import sanitize_filename
from sijapi.logs import get_logger
l = get_logger(__name__)
### INITIALIZATIONS ### tts = APIRouter()
tts = APIRouter(tags=["trusted", "private"])
logger = L.get_module_logger("tts")
def debug(text: str): logger.debug(text)
def info(text: str): logger.info(text)
def warn(text: str): logger.warning(text)
def err(text: str): logger.error(text)
def crit(text: str): logger.critical(text)
DEVICE = torch.device('cpu') DEVICE = torch.device('cpu')
@ -53,7 +48,7 @@ async def list_11l_voices():
async with httpx.AsyncClient() as client: async with httpx.AsyncClient() as client:
try: try:
response = await client.get(url, headers=headers) response = await client.get(url, headers=headers)
debug(f"Response: {response}") l.debug(f"Response: {response}")
if response.status_code == 200: if response.status_code == 200:
voices_data = response.json().get("voices", []) voices_data = response.json().get("voices", [])
formatted_list = "" formatted_list = ""
@ -63,7 +58,7 @@ async def list_11l_voices():
formatted_list += f"{name}: `{id}`\n" formatted_list += f"{name}: `{id}`\n"
except Exception as e: except Exception as e:
err(f"Error determining voice ID: {e}") l.error(f"Error determining voice ID: {e}")
return PlainTextResponse(formatted_list, status_code=200) return PlainTextResponse(formatted_list, status_code=200)
@ -73,18 +68,18 @@ async def select_voice(voice_name: str) -> str:
try: try:
# Case Insensitive comparison # Case Insensitive comparison
voice_name_lower = voice_name.lower() voice_name_lower = voice_name.lower()
debug(f"Looking for {voice_name_lower}") l.debug(f"Looking for {voice_name_lower}")
for item in VOICE_DIR.iterdir(): for item in VOICE_DIR.iterdir():
debug(f"Checking {item.name.lower()}") l.debug(f"Checking {item.name.lower()}")
if item.name.lower() == f"{voice_name_lower}.wav": if item.name.lower() == f"{voice_name_lower}.wav":
debug(f"select_voice received query to use voice: {voice_name}. Found {item} inside {VOICE_DIR}.") l.debug(f"select_voice received query to use voice: {voice_name}. Found {item} inside {VOICE_DIR}.")
return item return item
err(f"Voice file not found") l.error(f"Voice file not found")
raise HTTPException(status_code=404, detail="Voice file not found") raise HTTPException(status_code=404, detail="Voice file not found")
except Exception as e: except Exception as e:
err(f"Voice file not found: {e}") l.error(f"Voice file not found: {e}")
return None return None
@ -119,51 +114,51 @@ async def generate_speech_endpoint(
else: else:
return await generate_speech(bg_tasks, text_content, voice, voice_file, model, speed, podcast) return await generate_speech(bg_tasks, text_content, voice, voice_file, model, speed, podcast)
except Exception as e: except Exception as e:
err(f"Error in TTS: {e}") l.error(f"Error in TTS: {e}")
err(traceback.format_exc()) l.error(traceback.format_exc())
raise HTTPException(status_code=666, detail="error in TTS") raise HTTPException(status_code=666, detail="error in TTS")
async def determine_voice_id(voice_name: str) -> str: async def determine_voice_id(voice_name: str) -> str:
debug(f"Searching for voice id for {voice_name}") l.debug(f"Searching for voice id for {voice_name}")
debug(f"Tts.elevenlabs.voices: {Tts.elevenlabs.voices}") l.debug(f"Tts.elevenlabs.voices: {Tts.elevenlabs.voices}")
# Check if the voice is in the configured voices # Check if the voice is in the configured voices
if voice_name and Tts.has_key(f'elevenlabs.voices.{voice_name}'): if voice_name and Tts.has_key(f'elevenlabs.voices.{voice_name}'):
voice_id = Tts.get_value(f'elevenlabs.voices.{voice_name}') voice_id = Tts.get_value(f'elevenlabs.voices.{voice_name}')
debug(f"Found voice ID in config - {voice_id}") l.debug(f"Found voice ID in config - {voice_id}")
return voice_id return voice_id
debug(f"Requested voice not among the voices specified in config/tts.yaml. Checking with ElevenLabs API using api_key: {Tts.elevenlabs.key}.") l.debug(f"Requested voice not among the voices specified in config/tts.yaml. Checking with ElevenLabs API using api_key: {Tts.elevenlabs.key}.")
url = "https://api.elevenlabs.io/v1/voices" url = "https://api.elevenlabs.io/v1/voices"
headers = {"xi-api-key": Tts.elevenlabs.key} headers = {"xi-api-key": Tts.elevenlabs.key}
async with httpx.AsyncClient() as client: async with httpx.AsyncClient() as client:
try: try:
response = await client.get(url, headers=headers) response = await client.get(url, headers=headers)
debug(f"Response status: {response.status_code}") l.debug(f"Response status: {response.status_code}")
if response.status_code == 200: if response.status_code == 200:
voices_data = response.json().get("voices", []) voices_data = response.json().get("voices", [])
for voice in voices_data: for voice in voices_data:
if voice_name == voice["voice_id"] or (voice_name and voice_name.lower() == voice["name"].lower()): if voice_name == voice["voice_id"] or (voice_name and voice_name.lower() == voice["name"].lower()):
debug(f"Found voice ID from API - {voice['voice_id']}") l.debug(f"Found voice ID from API - {voice['voice_id']}")
return voice["voice_id"] return voice["voice_id"]
else: else:
err(f"Failed to get voices from ElevenLabs API. Status code: {response.status_code}") l.error(f"Failed to get voices from ElevenLabs API. Status code: {response.status_code}")
err(f"Response content: {response.text}") l.error(f"Response content: {response.text}")
except Exception as e: except Exception as e:
err(f"Error determining voice ID: {e}") l.error(f"Error determining voice ID: {e}")
warn(f"Voice '{voice_name}' not found; using the default specified in config/tts.yaml: {Tts.elevenlabs.default}") l.warning(f"Voice '{voice_name}' not found; using the default specified in config/tts.yaml: {Tts.elevenlabs.default}")
if Tts.has_key(f'elevenlabs.voices.{Tts.elevenlabs.default}'): if Tts.has_key(f'elevenlabs.voices.{Tts.elevenlabs.default}'):
return Tts.get_value(f'elevenlabs.voices.{Tts.elevenlabs.default}') return Tts.get_value(f'elevenlabs.voices.{Tts.elevenlabs.default}')
else: else:
err(f"Default voice '{Tts.elevenlabs.default}' not found in configuration. Using first available voice.") l.error(f"Default voice '{Tts.elevenlabs.default}' not found in configuration. Using first available voice.")
first_voice = next(iter(vars(Tts.elevenlabs.voices))) first_voice = next(iter(vars(Tts.elevenlabs.voices)))
return Tts.get_value(f'elevenlabs.voices.{first_voice}') return Tts.get_value(f'elevenlabs.voices.{first_voice}')
async def elevenlabs_tts(model: str, input_text: str, voice: Optional[str], title: str = None, output_dir: str = None): async def elevenlabs_tts(model: str, input_text: str, voice: Optional[str], title: str = None, output_dir: str = None):
if getattr(API.EXTENSIONS, 'elevenlabs', False): if getattr(Sys.EXTENSIONS, 'elevenlabs', False):
voice_id = await determine_voice_id(voice) voice_id = await determine_voice_id(voice)
url = f"https://api.elevenlabs.io/v1/text-to-speech/{voice_id}" url = f"https://api.elevenlabs.io/v1/text-to-speech/{voice_id}"
@ -187,11 +182,11 @@ async def elevenlabs_tts(model: str, input_text: str, voice: Optional[str], titl
raise HTTPException(status_code=response.status_code, detail="Error from ElevenLabs API") raise HTTPException(status_code=response.status_code, detail="Error from ElevenLabs API")
except Exception as e: except Exception as e:
err(f"Error from Elevenlabs API: {e}") l.error(f"Error from Elevenlabs API: {e}")
raise HTTPException(status_code=500, detail=f"Error from ElevenLabs API: {e}") raise HTTPException(status_code=500, detail=f"Error from ElevenLabs API: {e}")
else: else:
warn(f"elevenlabs_tts called but ElevenLabs module is not enabled in config.") l.warning(f"elevenlabs_tts called but ElevenLabs module is not enabled in config.")
raise HTTPException(status_code=400, detail="ElevenLabs TTS is not enabled") raise HTTPException(status_code=400, detail="ElevenLabs TTS is not enabled")
async def generate_speech( async def generate_speech(
@ -205,13 +200,13 @@ async def generate_speech(
title: str = None, title: str = None,
output_dir = None, output_dir = None,
) -> str: ) -> str:
debug(f"Entering generate_speech function") l.debug(f"Entering generate_speech function")
debug(f"API.EXTENSIONS: {API.EXTENSIONS}") l.debug(f"Sys.EXTENSIONS: {Sys.EXTENSIONS}")
debug(f"Type of API.EXTENSIONS: {type(API.EXTENSIONS)}") l.debug(f"Type of Sys.EXTENSIONS: {type(Sys.EXTENSIONS)}")
debug(f"Dir of API.EXTENSIONS: {dir(API.EXTENSIONS)}") l.debug(f"Dir of Sys.EXTENSIONS: {dir(Sys.EXTENSIONS)}")
debug(f"Tts config: {Tts}") l.debug(f"Tts config: {Tts}")
debug(f"Type of Tts: {type(Tts)}") l.debug(f"Type of Tts: {type(Tts)}")
debug(f"Dir of Tts: {dir(Tts)}") l.debug(f"Dir of Tts: {dir(Tts)}")
use_output_dir = Path(output_dir) if output_dir else TTS_OUTPUT_DIR use_output_dir = Path(output_dir) if output_dir else TTS_OUTPUT_DIR
@ -222,26 +217,26 @@ async def generate_speech(
title = title if title else "TTS audio" title = title if title else "TTS audio"
output_path = use_output_dir / f"{dt_datetime.now().strftime('%Y%m%d%H%M%S')} {title}.wav" output_path = use_output_dir / f"{dt_datetime.now().strftime('%Y%m%d%H%M%S')} {title}.wav"
debug(f"Model: {model}") l.debug(f"Model: {model}")
debug(f"Voice: {voice}") l.debug(f"Voice: {voice}")
debug(f"Tts.elevenlabs: {Tts.elevenlabs}") l.debug(f"Tts.elevenlabs: {Tts.elevenlabs}")
if model == "eleven_turbo_v2" and getattr(API.EXTENSIONS, 'elevenlabs', False): if model == "eleven_turbo_v2" and getattr(Sys.EXTENSIONS, 'elevenlabs', False):
info("Using ElevenLabs.") l.info("Using ElevenLabs.")
audio_file_path = await elevenlabs_tts(model, text, voice, title, use_output_dir) audio_file_path = await elevenlabs_tts(model, text, voice, title, use_output_dir)
elif getattr(API.EXTENSIONS, 'xtts', False): elif getattr(Sys.EXTENSIONS, 'xtts', False):
info("Using XTTS2") l.info("Using XTTS2")
audio_file_path = await local_tts(text, speed, voice, voice_file, podcast, bg_tasks, title, output_path) audio_file_path = await local_tts(text, speed, voice, voice_file, podcast, bg_tasks, title, output_path)
else: else:
err(f"No TTS module enabled!") l.error(f"No TTS module enabled!")
raise ValueError("No TTS module enabled") raise ValueError("No TTS module enabled")
if not audio_file_path: if not audio_file_path:
raise ValueError("TTS generation failed: audio_file_path is empty or None") raise ValueError("TTS generation failed: audio_file_path is empty or None")
elif audio_file_path.exists(): elif audio_file_path.exists():
info(f"Saved to {audio_file_path}") l.info(f"Saved to {audio_file_path}")
else: else:
warn(f"No file exists at {audio_file_path}") l.warning(f"No file exists at {audio_file_path}")
if podcast: if podcast:
podcast_path = Dir.PODCAST / audio_file_path.name podcast_path = Dir.PODCAST / audio_file_path.name
@ -249,18 +244,18 @@ async def generate_speech(
if podcast_path != audio_file_path: if podcast_path != audio_file_path:
shutil.copy(audio_file_path, podcast_path) shutil.copy(audio_file_path, podcast_path)
if podcast_path.exists(): if podcast_path.exists():
info(f"Saved to podcast path: {podcast_path}") l.info(f"Saved to podcast path: {podcast_path}")
else: else:
warn(f"Podcast mode enabled, but failed to save to {podcast_path}") l.warning(f"Podcast mode enabled, but failed to save to {podcast_path}")
if output_dir and Path(output_dir) == use_output_dir: if output_dir and Path(output_dir) == use_output_dir:
debug(f"Keeping {audio_file_path} because it was specified") l.debug(f"Keeping {audio_file_path} because it was specified")
else: else:
info(f"Podcast mode enabled and output_dir not specified so we will remove {audio_file_path}") l.info(f"Podcast mode enabled and output_dir not specified so we will remove {audio_file_path}")
bg_tasks.add_task(os.remove, audio_file_path) bg_tasks.add_task(os.remove, audio_file_path)
else: else:
warn(f"Podcast path is the same as audio file path. Using existing file.") l.warning(f"Podcast path is the same as audio file path. Using existing file.")
return podcast_path return podcast_path
@ -268,20 +263,20 @@ async def generate_speech(
except Exception as e: except Exception as e:
err(f"Failed to generate speech: {e}") l.error(f"Failed to generate speech: {e}")
err(f"Traceback: {traceback.format_exc()}") l.error(f"Traceback: {traceback.format_exc()}")
raise HTTPException(status_code=500, detail=f"Failed to generate speech: {e}") raise HTTPException(status_code=500, detail=f"Failed to generate speech: {e}")
async def get_model(voice: str = None, voice_file: UploadFile = None): async def get_model(voice: str = None, voice_file: UploadFile = None):
if (voice_file or (voice and await select_voice(voice))) and API.EXTENSIONS.xtts: if (voice_file or (voice and await select_voice(voice))) and Sys.EXTENSIONS.xtts:
return "xtts" return "xtts"
elif voice and await determine_voice_id(voice) and API.EXTENSIONS.elevenlabs: elif voice and await determine_voice_id(voice) and Sys.EXTENSIONS.elevenlabs:
return "eleven_turbo_v2" return "eleven_turbo_v2"
else: else:
err(f"No model or voice specified, or no TTS module loaded") l.error(f"No model or voice specified, or no TTS module loaded")
raise HTTPException(status_code=400, detail="No model or voice specified, or no TTS module loaded") raise HTTPException(status_code=400, detail="No model or voice specified, or no TTS module loaded")
@ -296,7 +291,7 @@ async def get_text_content(text: Optional[str], file: Optional[UploadFile]) -> s
async def get_voice_file_path(voice: str = None, voice_file: UploadFile = None) -> str: async def get_voice_file_path(voice: str = None, voice_file: UploadFile = None) -> str:
if voice: if voice:
debug(f"Looking for voice: {voice}") l.debug(f"Looking for voice: {voice}")
selected_voice = await select_voice(voice) selected_voice = await select_voice(voice)
return selected_voice return selected_voice
@ -326,7 +321,7 @@ async def get_voice_file_path(voice: str = None, voice_file: UploadFile = None)
return new_file return new_file
else: else:
debug(f"No voice specified or file provided, using default voice: {Tts.xtts.default}") l.debug(f"No voice specified or file provided, using default voice: {Tts.xtts.default}")
selected_voice = await select_voice(Tts.xtts.default) selected_voice = await select_voice(Tts.xtts.default)
return selected_voice return selected_voice
@ -343,7 +338,7 @@ async def local_tts(
output_path: Optional[Path] = None output_path: Optional[Path] = None
) -> str: ) -> str:
if API.EXTENSIONS.xtts: if Sys.EXTENSIONS.xtts:
from TTS.api import TTS from TTS.api import TTS
if output_path: if output_path:
@ -368,7 +363,7 @@ async def local_tts(
for i, segment in enumerate(segments): for i, segment in enumerate(segments):
segment_file_path = TTS_SEGMENTS_DIR / f"segment_{i}.wav" segment_file_path = TTS_SEGMENTS_DIR / f"segment_{i}.wav"
debug(f"Segment file path: {segment_file_path}") l.debug(f"Segment file path: {segment_file_path}")
# Run TTS in a separate thread # Run TTS in a separate thread
await asyncio.to_thread( await asyncio.to_thread(
@ -379,7 +374,7 @@ async def local_tts(
speaker_wav=[voice_file_path], speaker_wav=[voice_file_path],
language="en" language="en"
) )
debug(f"Segment file generated: {segment_file_path}") l.debug(f"Segment file generated: {segment_file_path}")
# Load and combine audio in a separate thread # Load and combine audio in a separate thread
segment_audio = await asyncio.to_thread(AudioSegment.from_wav, segment_file_path) segment_audio = await asyncio.to_thread(AudioSegment.from_wav, segment_file_path)
@ -398,7 +393,7 @@ async def local_tts(
return file_path return file_path
else: else:
warn(f"local_tts called but xtts module disabled!") l.warning(f"local_tts called but xtts module disabled!")
return None return None
@ -421,7 +416,7 @@ async def stream_tts(text_content: str, speed: float, voice: str, voice_file) ->
async def generate_tts(text: str, speed: float, voice_file_path: str) -> str: async def generate_tts(text: str, speed: float, voice_file_path: str) -> str:
if API.EXTENSIONS.xtts: if Sys.EXTENSIONS.xtts:
from TTS.api import TTS from TTS.api import TTS
output_dir = tempfile.mktemp(suffix=".wav", dir=tempfile.gettempdir()) output_dir = tempfile.mktemp(suffix=".wav", dir=tempfile.gettempdir())
@ -432,7 +427,7 @@ async def generate_tts(text: str, speed: float, voice_file_path: str) -> str:
return output_dir return output_dir
else: else:
warn(f"generate_tts called but xtts module disabled!") l.warning(f"generate_tts called but xtts module disabled!")
return None return None
@ -466,7 +461,7 @@ def split_text(text, target_length=35, max_length=50):
if segment_length + len(sentence_words) > max_length: if segment_length + len(sentence_words) > max_length:
segments.append(' '.join(current_segment)) segments.append(' '.join(current_segment))
debug(f"split_text - segment: {' '.join(current_segment)}, word count: {segment_length}") l.debug(f"split_text - segment: {' '.join(current_segment)}, word count: {segment_length}")
current_segment = [sentence] current_segment = [sentence]
else: else:
@ -474,7 +469,7 @@ def split_text(text, target_length=35, max_length=50):
if current_segment: if current_segment:
segments.append(' '.join(current_segment)) segments.append(' '.join(current_segment))
debug(f"split_text - segment: {' '.join(current_segment)}, word count: {len(current_segment)}") l.debug(f"split_text - segment: {' '.join(current_segment)}, word count: {len(current_segment)}")
return segments return segments
@ -486,7 +481,7 @@ def clean_text_for_tts(text: str) -> str:
text = re.sub(r'\s+', ' ', text).strip() text = re.sub(r'\s+', ' ', text).strip()
return text return text
else: else:
debug(f"No text received.") l.debug(f"No text received.")
def copy_to_podcast_dir(file_path): def copy_to_podcast_dir(file_path):

View file

@ -15,17 +15,13 @@ from typing import Dict
from datetime import datetime as dt_datetime, date as dt_date from datetime import datetime as dt_datetime, date as dt_date
from shapely.wkb import loads from shapely.wkb import loads
from binascii import unhexlify from binascii import unhexlify
from sijapi import L, VISUALCROSSING_API_KEY, TZ, API, GEO from sijapi import VISUALCROSSING_API_KEY, TZ, Sys, GEO, Db
from sijapi.utilities import haversine from sijapi.utilities import haversine
from sijapi.routers import gis from sijapi.routers import gis
from sijapi.logs import get_logger
l = get_logger(__name__)
weather = APIRouter() weather = APIRouter()
logger = L.get_module_logger("weather")
def debug(text: str): logger.debug(text)
def info(text: str): logger.info(text)
def warn(text: str): logger.warning(text)
def err(text: str): logger.error(text)
def crit(text: str): logger.critical(text)
@weather.get("/weather/refresh", response_class=JSONResponse) @weather.get("/weather/refresh", response_class=JSONResponse)
@ -48,7 +44,7 @@ async def get_refreshed_weather(
tz = await GEO.tz_at(lat, lon) tz = await GEO.tz_at(lat, lon)
date_time = await gis.dt(date, tz) date_time = await gis.dt(date, tz)
debug(f"Passing date_time {date_time.strftime('%Y-%m-%d %H:%M:%S')}, {lat}/{lon} into get_weather") l.debug(f"Passing date_time {date_time.strftime('%Y-%m-%d %H:%M:%S')}, {lat}/{lon} into get_weather")
day = await get_weather(date_time, lat, lon, force_refresh=True) day = await get_weather(date_time, lat, lon, force_refresh=True)
if day is None: if day is None:
@ -67,12 +63,12 @@ async def get_refreshed_weather(
return JSONResponse(content={"weather": day_dict}, status_code=200) return JSONResponse(content={"weather": day_dict}, status_code=200)
except HTTPException as e: except HTTPException as e:
err(f"HTTP Exception in get_refreshed_weather: {e.detail}") l.error(f"HTTP Exception in get_refreshed_weather: {e.detail}")
return JSONResponse(content={"detail": str(e.detail)}, status_code=e.status_code) return JSONResponse(content={"detail": str(e.detail)}, status_code=e.status_code)
except Exception as e: except Exception as e:
err(f"Unexpected error in get_refreshed_weather: {str(e)}") l.error(f"Unexpected error in get_refreshed_weather: {str(e)}")
err(f"Traceback: {traceback.format_exc()}") l.error(f"Traceback: {traceback.format_exc()}")
return JSONResponse(content={"detail": "An unexpected error occurred"}, status_code=500) return JSONResponse(content={"detail": "An unexpected error occurred"}, status_code=500)
@ -84,7 +80,7 @@ async def get_weather(date_time: dt_datetime, latitude: float, longitude: float,
try: try:
daily_weather_data = await get_weather_from_db(date_time, latitude, longitude) daily_weather_data = await get_weather_from_db(date_time, latitude, longitude)
if daily_weather_data: if daily_weather_data:
debug(f"Daily weather data from db: {daily_weather_data}") l.debug(f"Daily weather data from db: {daily_weather_data}")
last_updated = str(daily_weather_data['DailyWeather'].get('last_updated')) last_updated = str(daily_weather_data['DailyWeather'].get('last_updated'))
last_updated = await gis.dt(last_updated) last_updated = await gis.dt(last_updated)
stored_loc_data = unhexlify(daily_weather_data['DailyWeather'].get('location')) stored_loc_data = unhexlify(daily_weather_data['DailyWeather'].get('location'))
@ -93,19 +89,19 @@ async def get_weather(date_time: dt_datetime, latitude: float, longitude: float,
hourly_weather = daily_weather_data.get('HourlyWeather') hourly_weather = daily_weather_data.get('HourlyWeather')
request_haversine = haversine(latitude, longitude, stored_lat, stored_lon) request_haversine = haversine(latitude, longitude, stored_lat, stored_lon)
debug(f"\nINFO:\nlast updated {last_updated}\nstored lat: {stored_lat} - requested lat: {latitude}\nstored lon: {stored_lon} - requested lon: {longitude}\nHaversine: {request_haversine}") l.debug(f"\nINFO:\nlast updated {last_updated}\nstored lat: {stored_lat} - requested lat: {latitude}\nstored lon: {stored_lon} - requested lon: {longitude}\nHaversine: {request_haversine}")
if last_updated and (date_time <= dt_datetime.now(TZ) and last_updated > date_time and request_haversine < 8) and hourly_weather and len(hourly_weather) > 0: if last_updated and (date_time <= dt_datetime.now(TZ) and last_updated > date_time and request_haversine < 8) and hourly_weather and len(hourly_weather) > 0:
debug(f"Using existing data") l.debug(f"Using existing data")
fetch_new_data = False fetch_new_data = False
else: else:
fetch_new_data = True fetch_new_data = True
except Exception as e: except Exception as e:
err(f"Error checking existing weather data: {e}") l.error(f"Error checking existing weather data: {e}")
fetch_new_data = True fetch_new_data = True
if fetch_new_data: if fetch_new_data:
debug(f"Fetching new weather data") l.debug(f"Fetching new weather data")
request_date_str = date_time.strftime("%Y-%m-%d") request_date_str = date_time.strftime("%Y-%m-%d")
url = f"https://weather.visualcrossing.com/VisualCrossingWebServices/rest/services/timeline/{latitude},{longitude}/{request_date_str}/{request_date_str}?unitGroup=us&key={VISUALCROSSING_API_KEY}" url = f"https://weather.visualcrossing.com/VisualCrossingWebServices/rest/services/timeline/{latitude},{longitude}/{request_date_str}/{request_date_str}?unitGroup=us&key={VISUALCROSSING_API_KEY}"
@ -114,9 +110,14 @@ async def get_weather(date_time: dt_datetime, latitude: float, longitude: float,
response = await client.get(url) response = await client.get(url)
if response.status_code == 200: if response.status_code == 200:
weather_data = response.json() weather_data = response.json()
store_result = await store_weather_to_db(date_time, weather_data)
if store_result != "SUCCESS": try:
raise HTTPException(status_code=500, detail=f"Failed to store weather data: {store_result}") store_result = await store_weather_to_db(date_time, weather_data)
if store_result != "SUCCESS":
raise HTTPException(status_code=500, detail=f"Failed to store weather data: {store_result}")
except Exception as e:
l.error(f"Error storing weather data: {str(e)}")
raise HTTPException(status_code=500, detail=f"Error storing weather data: {str(e)}")
daily_weather_data = await get_weather_from_db(date_time, latitude, longitude) daily_weather_data = await get_weather_from_db(date_time, latitude, longitude)
if daily_weather_data is None: if daily_weather_data is None:
@ -126,8 +127,8 @@ async def get_weather(date_time: dt_datetime, latitude: float, longitude: float,
except HTTPException: except HTTPException:
raise raise
except Exception as e: except Exception as e:
err(f"Exception during API call or data storage: {e}") l.error(f"Exception during API call or data storage: {e}")
err(f"Traceback: {traceback.format_exc()}") l.error(f"Traceback: {traceback.format_exc()}")
raise HTTPException(status_code=500, detail=f"Error fetching or storing weather data: {str(e)}") raise HTTPException(status_code=500, detail=f"Error fetching or storing weather data: {str(e)}")
if daily_weather_data is None: if daily_weather_data is None:
@ -136,7 +137,7 @@ async def get_weather(date_time: dt_datetime, latitude: float, longitude: float,
return daily_weather_data return daily_weather_data
# weather.py
async def store_weather_to_db(date_time: dt_datetime, weather_data: dict): async def store_weather_to_db(date_time: dt_datetime, weather_data: dict):
try: try:
@ -154,46 +155,46 @@ async def store_weather_to_db(date_time: dt_datetime, weather_data: dict):
elevation = await GEO.elevation(latitude, longitude) elevation = await GEO.elevation(latitude, longitude)
location_point = f"POINTZ({longitude} {latitude} {elevation})" if elevation else None location_point = f"POINTZ({longitude} {latitude} {elevation})" if elevation else None
daily_weather_params = [ daily_weather_params = {
location_point, 'location': location_point,
await gis.dt(day_data.get('sunriseEpoch')), 'sunrise': await gis.dt(day_data.get('sunriseEpoch')),
day_data.get('sunriseEpoch'), 'sunriseepoch': day_data.get('sunriseEpoch'),
await gis.dt(day_data.get('sunsetEpoch')), 'sunset': await gis.dt(day_data.get('sunsetEpoch')),
day_data.get('sunsetEpoch'), 'sunsetepoch': day_data.get('sunsetEpoch'),
day_data.get('description'), 'description': day_data.get('description'),
day_data.get('tempmax'), 'tempmax': day_data.get('tempmax'),
day_data.get('tempmin'), 'tempmin': day_data.get('tempmin'),
day_data.get('uvindex'), 'uvindex': day_data.get('uvindex'),
day_data.get('winddir'), 'winddir': day_data.get('winddir'),
day_data.get('windspeed'), 'windspeed': day_data.get('windspeed'),
day_data.get('icon'), 'icon': day_data.get('icon'),
dt_datetime.now(tz), 'last_updated': dt_datetime.now(tz),
await gis.dt(day_data.get('datetimeEpoch')), 'datetime': await gis.dt(day_data.get('datetimeEpoch')),
day_data.get('datetimeEpoch'), 'datetimeepoch': day_data.get('datetimeEpoch'),
day_data.get('temp'), 'temp': day_data.get('temp'),
day_data.get('feelslikemax'), 'feelslikemax': day_data.get('feelslikemax'),
day_data.get('feelslikemin'), 'feelslikemin': day_data.get('feelslikemin'),
day_data.get('feelslike'), 'feelslike': day_data.get('feelslike'),
day_data.get('dew'), 'dew': day_data.get('dew'),
day_data.get('humidity'), 'humidity': day_data.get('humidity'),
day_data.get('precip'), 'precip': day_data.get('precip'),
day_data.get('precipprob'), 'precipprob': day_data.get('precipprob'),
day_data.get('precipcover'), 'precipcover': day_data.get('precipcover'),
preciptype_array, 'preciptype': preciptype_array,
day_data.get('snow'), 'snow': day_data.get('snow'),
day_data.get('snowdepth'), 'snowdepth': day_data.get('snowdepth'),
day_data.get('windgust'), 'windgust': day_data.get('windgust'),
day_data.get('pressure'), 'pressure': day_data.get('pressure'),
day_data.get('cloudcover'), 'cloudcover': day_data.get('cloudcover'),
day_data.get('visibility'), 'visibility': day_data.get('visibility'),
day_data.get('solarradiation'), 'solarradiation': day_data.get('solarradiation'),
day_data.get('solarenergy'), 'solarenergy': day_data.get('solarenergy'),
day_data.get('severerisk', 0), 'severerisk': day_data.get('severerisk', 0),
day_data.get('moonphase'), 'moonphase': day_data.get('moonphase'),
day_data.get('conditions'), 'conditions': day_data.get('conditions'),
stations_array, 'stations': stations_array,
day_data.get('source') 'source': day_data.get('source')
] }
daily_weather_query = ''' daily_weather_query = '''
INSERT INTO dailyweather ( INSERT INTO dailyweather (
@ -205,54 +206,58 @@ async def store_weather_to_db(date_time: dt_datetime, weather_data: dict):
solarradiation, solarenergy, severerisk, moonphase, conditions, solarradiation, solarenergy, severerisk, moonphase, conditions,
stations, source stations, source
) VALUES ( ) VALUES (
$1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12, $13, $14, $15, :location, :sunrise, :sunriseepoch, :sunset, :sunsetepoch, :description,
$16, $17, $18, $19, $20, $21, $22, $23, $24, $25, $26, $27, $28, :tempmax, :tempmin, :uvindex, :winddir, :windspeed, :icon, :last_updated,
$29, $30, $31, $32, $33, $34, $35, $36, $37, $38 :datetime, :datetimeepoch, :temp, :feelslikemax, :feelslikemin, :feelslike,
:dew, :humidity, :precip, :precipprob, :precipcover, :preciptype,
:snow, :snowdepth, :windgust, :pressure, :cloudcover, :visibility,
:solarradiation, :solarenergy, :severerisk, :moonphase, :conditions,
:stations, :source
) RETURNING id ) RETURNING id
''' '''
daily_weather_result = await API.execute_write_query(daily_weather_query, *daily_weather_params, table_name="dailyweather") daily_weather_result = await Db.write(daily_weather_query, **daily_weather_params, table_name="dailyweather")
if not daily_weather_result: if daily_weather_result is None:
raise ValueError("Failed to insert daily weather data: no result returned") raise ValueError("Failed to insert daily weather data: no result returned")
daily_weather_id = daily_weather_result[0]['id'] daily_weather_id = daily_weather_result.fetchone()[0]
debug(f"Inserted daily weather data with id: {daily_weather_id}") l.debug(f"Inserted daily weather data with id: {daily_weather_id}")
# Hourly weather insertion # Hourly weather insertion
if 'hours' in day_data: if 'hours' in day_data:
debug(f"Processing {len(day_data['hours'])} hourly records") l.debug(f"Processing {len(day_data['hours'])} hourly records")
for hour_data in day_data['hours']: for hour_data in day_data['hours']:
hour_preciptype_array = hour_data.get('preciptype', []) or [] hour_preciptype_array = hour_data.get('preciptype', []) or []
hour_stations_array = hour_data.get('stations', []) or [] hour_stations_array = hour_data.get('stations', []) or []
hourly_weather_params = [ hourly_weather_params = {
daily_weather_id, 'daily_weather_id': str(daily_weather_id), # Convert UUID to string
await gis.dt(hour_data.get('datetimeEpoch')), 'datetime': await gis.dt(hour_data.get('datetimeEpoch')),
hour_data.get('datetimeEpoch'), 'datetimeepoch': hour_data.get('datetimeEpoch'),
hour_data.get('temp'), 'temp': hour_data.get('temp'),
hour_data.get('feelslike'), 'feelslike': hour_data.get('feelslike'),
hour_data.get('humidity'), 'humidity': hour_data.get('humidity'),
hour_data.get('dew'), 'dew': hour_data.get('dew'),
hour_data.get('precip'), 'precip': hour_data.get('precip'),
hour_data.get('precipprob'), 'precipprob': hour_data.get('precipprob'),
hour_preciptype_array, 'preciptype': hour_preciptype_array,
hour_data.get('snow'), 'snow': hour_data.get('snow'),
hour_data.get('snowdepth'), 'snowdepth': hour_data.get('snowdepth'),
hour_data.get('windgust'), 'windgust': hour_data.get('windgust'),
hour_data.get('windspeed'), 'windspeed': hour_data.get('windspeed'),
hour_data.get('winddir'), 'winddir': hour_data.get('winddir'),
hour_data.get('pressure'), 'pressure': hour_data.get('pressure'),
hour_data.get('cloudcover'), 'cloudcover': hour_data.get('cloudcover'),
hour_data.get('visibility'), 'visibility': hour_data.get('visibility'),
hour_data.get('solarradiation'), 'solarradiation': hour_data.get('solarradiation'),
hour_data.get('solarenergy'), 'solarenergy': hour_data.get('solarenergy'),
hour_data.get('uvindex'), 'uvindex': hour_data.get('uvindex'),
hour_data.get('severerisk', 0), 'severerisk': hour_data.get('severerisk', 0),
hour_data.get('conditions'), 'conditions': hour_data.get('conditions'),
hour_data.get('icon'), 'icon': hour_data.get('icon'),
hour_stations_array, 'stations': hour_stations_array,
hour_data.get('source', '') 'source': hour_data.get('source', '')
] }
hourly_weather_query = ''' hourly_weather_query = '''
INSERT INTO hourlyweather ( INSERT INTO hourlyweather (
@ -262,61 +267,68 @@ async def store_weather_to_db(date_time: dt_datetime, weather_data: dict):
solarradiation, solarenergy, uvindex, severerisk, conditions, solarradiation, solarenergy, uvindex, severerisk, conditions,
icon, stations, source icon, stations, source
) VALUES ( ) VALUES (
$1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12, $13, $14, $15, :daily_weather_id, :datetime, :datetimeepoch, :temp, :feelslike,
$16, $17, $18, $19, $20, $21, $22, $23, $24, $25, $26 :humidity, :dew, :precip, :precipprob, :preciptype, :snow, :snowdepth,
:windgust, :windspeed, :winddir, :pressure, :cloudcover, :visibility,
:solarradiation, :solarenergy, :uvindex, :severerisk, :conditions,
:icon, :stations, :source
) RETURNING id ) RETURNING id
''' '''
hourly_result = await API.execute_write_query(hourly_weather_query, *hourly_weather_params, table_name="hourlyweather") hourly_result = await Db.write(hourly_weather_query, **hourly_weather_params, table_name="hourlyweather")
if not hourly_result: if hourly_result is None:
warn(f"Failed to insert hourly weather data for {hour_data.get('datetimeEpoch')}") l.warning(f"Failed to insert hourly weather data for {hour_data.get('datetimeEpoch')}")
else: else:
debug(f"Inserted hourly weather data with id: {hourly_result[0]['id']}") hourly_id = hourly_result.fetchone()[0]
l.debug(f"Inserted hourly weather data with id: {hourly_id}")
return "SUCCESS" return "SUCCESS"
except Exception as e: except Exception as e:
err(f"Error in weather storage: {e}") l.error(f"Error in weather storage: {e}")
err(f"Traceback: {traceback.format_exc()}") l.error(f"Traceback: {traceback.format_exc()}")
return "FAILURE" return "FAILURE"
async def get_weather_from_db(date_time: dt_datetime, latitude: float, longitude: float): async def get_weather_from_db(date_time: dt_datetime, latitude: float, longitude: float):
debug(f"Using {date_time.strftime('%Y-%m-%d %H:%M:%S')} as our datetime in get_weather_from_db.") l.debug(f"Using {date_time.strftime('%Y-%m-%d %H:%M:%S')} as our datetime in get_weather_from_db.")
query_date = date_time.date() query_date = date_time.date()
try: try:
# Query to get daily weather data # Query to get daily weather data
daily_query = ''' daily_query = '''
SELECT * FROM dailyweather SELECT * FROM dailyweather
WHERE DATE(datetime) = $1 WHERE DATE(datetime) = :query_date
AND ST_DWithin(location::geography, ST_MakePoint($2,$3)::geography, 8046.72) AND ST_DWithin(location::geography, ST_MakePoint(:longitude,:latitude)::geography, 8046.72)
ORDER BY ST_Distance(location, ST_MakePoint($4, $5)::geography) ASC ORDER BY ST_Distance(location, ST_MakePoint(:longitude2, :latitude2)::geography) ASC
LIMIT 1 LIMIT 1
''' '''
daily_weather_records = await Db.read(daily_query, query_date=query_date, longitude=longitude, latitude=latitude, longitude2=longitude, latitude2=latitude, table_name='dailyweather')
daily_weather_records = await API.execute_read_query(daily_query, query_date, longitude, latitude, longitude, latitude, table_name='dailyweather')
if not daily_weather_records: if not daily_weather_records:
debug(f"No daily weather data retrieved from database.") l.debug(f"No daily weather data retrieved from database.")
return None return None
daily_weather_data = daily_weather_records[0] daily_weather_data = daily_weather_records[0]
# Query to get hourly weather data
hourly_query = ''' hourly_query = '''
SELECT * FROM hourlyweather SELECT * FROM hourlyweather
WHERE daily_weather_id = $1 WHERE daily_weather_id::text = :daily_weather_id
ORDER BY datetime ASC ORDER BY datetime ASC
''' '''
hourly_weather_records = await Db.read(
hourly_weather_records = await API.execute_read_query(hourly_query, daily_weather_data['id'], table_name='hourlyweather') hourly_query,
daily_weather_id=str(daily_weather_data['id']),
table_name='hourlyweather'
)
day = { day = {
'DailyWeather': daily_weather_data, 'DailyWeather': daily_weather_data,
'HourlyWeather': hourly_weather_records, 'HourlyWeather': hourly_weather_records,
} }
debug(f"Retrieved weather data for {date_time.date()}") l.debug(f"Retrieved weather data for {date_time.date()}")
return day return day
except Exception as e: except Exception as e:
err(f"Unexpected error occurred in get_weather_from_db: {e}") l.error(f"Unexpected error occurred in get_weather_from_db: {e}")
err(f"Traceback: {traceback.format_exc()}") l.error(f"Traceback: {traceback.format_exc()}")
return None return None

76
sijapi/serialization.py Normal file
View file

@ -0,0 +1,76 @@
# serialization.py
import json
from typing import Any
from uuid import UUID
from decimal import Decimal
import numpy as np
import pandas as pd
from enum import Enum
from pathlib import Path
from datetime import datetime as dt_datetime, date, time
from .logs import get_logger
l = get_logger(__name__)
def serialize(obj: Any) -> Any:
"""Serializer for database inputs that keeps datetime objects intact"""
if isinstance(obj, (dt_datetime, date, time)):
return obj
return json_serial(obj)
def json_serial(obj: Any) -> Any:
"""JSON serializer for objects not serializable by default json code"""
if isinstance(obj, (dt_datetime, date)):
return obj.isoformat()
if isinstance(obj, time):
return obj.isoformat()
if isinstance(obj, Decimal):
return float(obj)
if isinstance(obj, UUID):
return str(obj)
if isinstance(obj, bytes):
return obj.decode('utf-8')
if isinstance(obj, Path):
return str(obj)
if isinstance(obj, (str, int, float, bool)):
return obj
if isinstance(obj, list):
return [json_serial(item) for item in obj]
if isinstance(obj, dict):
return {json_serial(k): json_serial(v) for k, v in obj.items()}
if isinstance(obj, (set, frozenset)):
return [json_serial(item) for item in obj]
if isinstance(obj, tuple):
return list(json_serial(item) for item in obj)
if isinstance(obj, np.ndarray):
return json_serial(obj.tolist())
if isinstance(obj, (pd.DataFrame, pd.Series)):
return json_serial(obj.to_dict())
if obj is None:
return None
if isinstance(obj, complex):
return [obj.real, obj.imag]
if isinstance(obj, Enum):
return obj.value
if isinstance(obj, range):
return {'start': obj.start, 'stop': obj.stop, 'step': obj.step}
if hasattr(obj, '__iter__'):
return list(json_serial(item) for item in obj)
if hasattr(obj, '__dict__'):
return {k: json_serial(v) for k, v in obj.__dict__.items() if not k.startswith('_')}
raise TypeError(f"Type {type(obj)} not serializable")
def json_dumps(obj: Any) -> str:
"""
Serialize obj to a JSON formatted str using the custom serializer.
"""
return json.dumps(obj, default=json_serial)
def json_loads(json_str: str) -> Any:
"""
Deserialize json_str to a Python object.
"""
return json.loads(json_str)

View file

@ -1,56 +1,47 @@
# utilities.py # utilities.py
import re import re
import os import os
from fastapi import Form import json
import re
import io import io
from io import BytesIO
import base64 import base64
import math import math
import paramiko import paramiko
from dateutil import parser
from pathlib import Path
import filetype import filetype
import shutil import shutil
import uuid import uuid
import hashlib import hashlib
import requests import requests
from requests.adapters import HTTPAdapter import asyncio
from urllib3.util.retry import Retry import aiohttp
import pandas as pd
import ipaddress
from io import BytesIO
from pathlib import Path
from dateutil import parser
from urllib.parse import urlparse from urllib.parse import urlparse
from PyPDF2 import PdfReader from PyPDF2 import PdfReader
from better_profanity import profanity from better_profanity import profanity
from adblockparser import AdblockRules from adblockparser import AdblockRules
from pdfminer.high_level import extract_text as pdfminer_extract_text from pdfminer.high_level import extract_text as pdfminer_extract_text
import pytesseract from readability import Document as ReadabilityDocument
from readability import Document
from pdf2image import convert_from_path from pdf2image import convert_from_path
from datetime import datetime as dt_datetime, date, time from datetime import datetime as dt_datetime, date, time
from typing import Optional, Union, Tuple, List, Any from typing import Optional, Union, Tuple, List, Any
import asyncio
from PIL import Image from PIL import Image
import pandas as pd
import ipaddress
from scipy.spatial import cKDTree from scipy.spatial import cKDTree
from dateutil.parser import parse as dateutil_parse from dateutil.parser import parse as dateutil_parse
from docx import Document from docx import Document
import aiohttp
from bs4 import BeautifulSoup from bs4 import BeautifulSoup
from readability import Document as ReadabilityDocument
from markdownify import markdownify as md from markdownify import markdownify as md
from sshtunnel import SSHTunnelForwarder from sshtunnel import SSHTunnelForwarder
from urllib.parse import urlparse from fastapi import Depends, HTTPException, Request, UploadFile, Form
from fastapi import Depends, HTTPException, Request, UploadFile
from fastapi.security.api_key import APIKeyHeader from fastapi.security.api_key import APIKeyHeader
from requests.adapters import HTTPAdapter
from sijapi import L, API, Archivist, YEAR_FMT, MONTH_FMT, DAY_FMT, DAY_SHORT_FMT, OBSIDIAN_VAULT_DIR, ALLOWED_FILENAME_CHARS, MAX_PATH_LENGTH, ARCHIVE_DIR from urllib3.util.retry import Retry
import pytesseract
logger = L.get_module_logger('utilities') from sijapi import Sys, Dir, YEAR_FMT, MONTH_FMT, DAY_FMT, DAY_SHORT_FMT, OBSIDIAN_VAULT_DIR, ALLOWED_FILENAME_CHARS, MAX_PATH_LENGTH, ARCHIVE_DIR
def debug(text: str): logger.debug(text) from sijapi.logs import get_logger
def info(text: str): logger.info(text) l = get_logger(__name__)
def warn(text: str): logger.warning(text)
def err(text: str): logger.error(text)
def crit(text: str): logger.critical(text)
def assemble_archive_path(filename: str, extension: str = None, date_time: dt_datetime = None, subdir: str = None) -> Tuple[Path, Path]: def assemble_archive_path(filename: str, extension: str = None, date_time: dt_datetime = None, subdir: str = None) -> Tuple[Path, Path]:
@ -79,7 +70,7 @@ def assemble_archive_path(filename: str, extension: str = None, date_time: dt_da
filename = f"{day_short} {timestamp} {sanitized_base}{final_extension}" filename = f"{day_short} {timestamp} {sanitized_base}{final_extension}"
relative_path = Path(year) / month / day / filename relative_path = Path(year) / month / day / filename
absolute_path = Archivist.dir / relative_path absolute_path = Dir.ARCHIVE / relative_path
# Ensure the total path length doesn't exceed MAX_PATH_LENGTH # Ensure the total path length doesn't exceed MAX_PATH_LENGTH
while len(str(absolute_path)) > MAX_PATH_LENGTH and len(sanitized_base) > 0: while len(str(absolute_path)) > MAX_PATH_LENGTH and len(sanitized_base) > 0:
@ -138,7 +129,7 @@ def assemble_journal_path(date_time: dt_datetime, subdir: str = None, filename:
relative_path = relative_path / filename relative_path = relative_path / filename
else: else:
debug(f"This only happens, theoretically, when no filename nor subdirectory are provided, but an extension is. Which is kinda silly.") l.debug(f"This only happens, theoretically, when no filename nor subdirectory are provided, but an extension is. Which is kinda silly.")
return None, None return None, None
absolute_path = OBSIDIAN_VAULT_DIR / relative_path absolute_path = OBSIDIAN_VAULT_DIR / relative_path
@ -194,7 +185,7 @@ def contains_profanity(content: str, threshold: float = 0.01, custom_words: Opti
content_profanity_count = sum(1 for word in word_list if profanity.contains_profanity(word)) content_profanity_count = sum(1 for word in word_list if profanity.contains_profanity(word))
content_profanity_ratio = content_profanity_count / len(word_list) if word_list else 0 content_profanity_ratio = content_profanity_count / len(word_list) if word_list else 0
debug(f"Profanity ratio for content: {content_profanity_ratio}") l.debug(f"Profanity ratio for content: {content_profanity_ratio}")
return content_profanity_ratio >= threshold return content_profanity_ratio >= threshold
@ -204,15 +195,15 @@ def load_filter_lists(blocklists_dir: Path):
try: try:
with open(file_path, 'r', encoding='utf-8') as file: with open(file_path, 'r', encoding='utf-8') as file:
rules.extend(file.read().splitlines()) rules.extend(file.read().splitlines())
info(f"Loaded blocklist: {file_path.name}") l.info(f"Loaded blocklist: {file_path.name}")
except Exception as e: except Exception as e:
err(f"Error loading blocklist {file_path.name}: {str(e)}") l.error(f"Error loading blocklist {file_path.name}: {str(e)}")
return rules return rules
def initialize_adblock_rules(blocklists_dir: Path): def initialize_adblock_rules(blocklists_dir: Path):
rules = load_filter_lists(blocklists_dir) rules = load_filter_lists(blocklists_dir)
info(f"Initialized AdblockRules with {len(rules)} rules") l.info(f"Initialized AdblockRules with {len(rules)} rules")
return AdblockRules(rules) return AdblockRules(rules)
@ -228,14 +219,14 @@ def get_extension(file):
return file_extension return file_extension
except Exception as e: except Exception as e:
err(f"Unable to get extension of {file}") l.error(f"Unable to get extension of {file}")
raise e raise e
def sanitize_filename(text, extension: str = None, max_length: int = MAX_PATH_LENGTH): def sanitize_filename(text, extension: str = None, max_length: int = MAX_PATH_LENGTH):
"""Sanitize a string to be used as a safe filename while protecting the file extension.""" """Sanitize a string to be used as a safe filename while protecting the file extension."""
debug(f"Filename before sanitization: {text}") l.debug(f"Filename before sanitization: {text}")
# Ensure text is a string # Ensure text is a string
text = str(text) text = str(text)
@ -253,7 +244,7 @@ def sanitize_filename(text, extension: str = None, max_length: int = MAX_PATH_LE
base_name = base_name[:max_base_length - 5].rstrip() base_name = base_name[:max_base_length - 5].rstrip()
final_filename = base_name + extension final_filename = base_name + extension
debug(f"Filename after sanitization: {final_filename}") l.debug(f"Filename after sanitization: {final_filename}")
return final_filename return final_filename
@ -264,16 +255,16 @@ def check_file_name(file_name, max_length=255):
needs_sanitization = False needs_sanitization = False
if len(file_name) > max_length: if len(file_name) > max_length:
debug(f"Filename exceeds maximum length of {max_length}: {file_name}") l.debug(f"Filename exceeds maximum length of {max_length}: {file_name}")
needs_sanitization = True needs_sanitization = True
if re.search(ALLOWED_FILENAME_CHARS, file_name): if re.search(ALLOWED_FILENAME_CHARS, file_name):
debug(f"Filename contains non-word characters (except space, dot, and hyphen): {file_name}") l.debug(f"Filename contains non-word characters (except space, dot, and hyphen): {file_name}")
needs_sanitization = True needs_sanitization = True
if re.search(r'\s{2,}', file_name): if re.search(r'\s{2,}', file_name):
debug(f"Filename contains multiple consecutive spaces: {file_name}") l.debug(f"Filename contains multiple consecutive spaces: {file_name}")
needs_sanitization = True needs_sanitization = True
if file_name != file_name.strip(): if file_name != file_name.strip():
debug(f"Filename has leading or trailing spaces: {file_name}") l.debug(f"Filename has leading or trailing spaces: {file_name}")
needs_sanitization = True needs_sanitization = True
return needs_sanitization return needs_sanitization
@ -316,13 +307,13 @@ async def ocr_pdf(file_path: str) -> str:
texts = await asyncio.gather(*(asyncio.to_thread(pytesseract.image_to_string, image) for image in images)) texts = await asyncio.gather(*(asyncio.to_thread(pytesseract.image_to_string, image) for image in images))
return ' '.join(texts) return ' '.join(texts)
except Exception as e: except Exception as e:
err(f"Error during OCR: {str(e)}") l.error(f"Error during OCR: {str(e)}")
return "" return ""
async def extract_text_from_pdf(file_path: str) -> str: async def extract_text_from_pdf(file_path: str) -> str:
if not await is_valid_pdf(file_path): if not await is_valid_pdf(file_path):
err(f"Invalid PDF file: {file_path}") l.error(f"Invalid PDF file: {file_path}")
return "" return ""
text = '' text = ''
@ -340,7 +331,7 @@ async def extract_text_from_pdf(file_path: str) -> str:
if text and not should_use_ocr(text, num_pages): if text and not should_use_ocr(text, num_pages):
return clean_text(text) return clean_text(text)
except Exception as e: except Exception as e:
err(f"Error extracting text with PyPDF2: {str(e)}") l.error(f"Error extracting text with PyPDF2: {str(e)}")
# If PyPDF2 extraction fails or is insufficient, fall back to pdfminer.six # If PyPDF2 extraction fails or is insufficient, fall back to pdfminer.six
try: try:
@ -348,10 +339,10 @@ async def extract_text_from_pdf(file_path: str) -> str:
if text_pdfminer and not should_use_ocr(text_pdfminer, num_pages): if text_pdfminer and not should_use_ocr(text_pdfminer, num_pages):
return clean_text(text_pdfminer) return clean_text(text_pdfminer)
except Exception as e: except Exception as e:
err(f"Error extracting text with pdfminer.six: {e}") l.error(f"Error extracting text with pdfminer.six: {e}")
# If both methods fail or are deemed insufficient, use OCR as the last resort # If both methods fail or are deemed insufficient, use OCR as the last resort
debug("Falling back to OCR for text extraction...") l.debug("Falling back to OCR for text extraction...")
return await ocr_pdf(file_path) return await ocr_pdf(file_path)
async def is_valid_pdf(file_path: str) -> bool: async def is_valid_pdf(file_path: str) -> bool:
@ -360,12 +351,12 @@ async def is_valid_pdf(file_path: str) -> bool:
kind = filetype.guess(file_path) kind = filetype.guess(file_path)
return kind.mime == 'application/pdf' return kind.mime == 'application/pdf'
except Exception as e: except Exception as e:
err(f"Error checking file type: {e}") l.error(f"Error checking file type: {e}")
return False return False
async def extract_text_from_pdf(file_path: str) -> str: async def extract_text_from_pdf(file_path: str) -> str:
if not await is_valid_pdf(file_path): if not await is_valid_pdf(file_path):
err(f"Invalid PDF file: {file_path}") l.error(f"Invalid PDF file: {file_path}")
return "" return ""
text = '' text = ''
@ -377,23 +368,23 @@ async def extract_text_from_pdf(file_path: str) -> str:
if text.strip(): # Successfully extracted text if text.strip(): # Successfully extracted text
return clean_text(text) return clean_text(text)
except Exception as e: except Exception as e:
err(f"Error extracting text with PyPDF2: {str(e)}") l.error(f"Error extracting text with PyPDF2: {str(e)}")
try: try:
text_pdfminer = await asyncio.to_thread(pdfminer_extract_text, file_path) text_pdfminer = await asyncio.to_thread(pdfminer_extract_text, file_path)
if text_pdfminer.strip(): # Successfully extracted text if text_pdfminer.strip(): # Successfully extracted text
return clean_text(text_pdfminer) return clean_text(text_pdfminer)
except Exception as e: except Exception as e:
err(f"Error extracting text with pdfminer.six: {str(e)}") l.error(f"Error extracting text with pdfminer.six: {str(e)}")
# Fall back to OCR # Fall back to OCR
debug("Falling back to OCR for text extraction...") l.debug("Falling back to OCR for text extraction...")
try: try:
images = convert_from_path(file_path) images = convert_from_path(file_path)
ocr_texts = await asyncio.gather(*(asyncio.to_thread(pytesseract.image_to_string, img) for img in images)) ocr_texts = await asyncio.gather(*(asyncio.to_thread(pytesseract.image_to_string, img) for img in images))
return ' '.join(ocr_texts).strip() return ' '.join(ocr_texts).strip()
except Exception as e: except Exception as e:
err(f"OCR failed: {str(e)}") l.error(f"OCR failed: {str(e)}")
return "" return ""
async def extract_text_from_docx(file_path: str) -> str: async def extract_text_from_docx(file_path: str) -> str:
@ -496,7 +487,7 @@ def encode_image_to_base64(image_path):
base64_str = base64.b64encode(byte_data).decode('utf-8') base64_str = base64.b64encode(byte_data).decode('utf-8')
return base64_str return base64_str
else: else:
debug(f"Error: File does not exist at {image_path}") l.debug(f"Error: File does not exist at {image_path}")
def resize_and_convert_image(image_path, max_size=2160, quality=80): def resize_and_convert_image(image_path, max_size=2160, quality=80):
with Image.open(image_path) as img: with Image.open(image_path) as img:
@ -534,13 +525,13 @@ def download_file(url, folder):
with open(filepath, 'wb') as f: with open(filepath, 'wb') as f:
f.write(response.content) f.write(response.content)
else: else:
err(f"Failed to download image: {url}, invalid content type: {response.headers.get('Content-Type')}") l.error(f"Failed to download image: {url}, invalid content type: {response.headers.get('Content-Type')}")
return None return None
else: else:
err(f"Failed to download image: {url}, status code: {response.status_code}") l.error(f"Failed to download image: {url}, status code: {response.status_code}")
return None return None
except Exception as e: except Exception as e:
err(f"Failed to download image: {url}, error: {str(e)}") l.error(f"Failed to download image: {url}, error: {str(e)}")
return None return None
return filename return filename
@ -599,7 +590,7 @@ async def run_ssh_command(server, command):
ssh.close() ssh.close()
return output, error return output, error
except Exception as e: except Exception as e:
err(f"SSH command failed for server {server.id}: {str(e)}") l.error(f"SSH command failed for server {server.id}: {str(e)}")
raise raise
@ -611,7 +602,7 @@ async def html_to_markdown(url: str = None, source: str = None) -> Optional[str]
async with session.get(url) as response: async with session.get(url) as response:
html_content = await response.text() html_content = await response.text()
else: else:
err(f"Unable to convert nothing to markdown.") l.error(f"Unable to convert nothing to markdown.")
return None return None
# Use readability to extract the main content # Use readability to extract the main content
@ -630,33 +621,3 @@ async def html_to_markdown(url: str = None, source: str = None) -> Optional[str]
return markdown_content return markdown_content
def json_serial(obj: Any) -> Any:
"""JSON serializer for objects not serializable by default json code"""
if isinstance(obj, (datetime, date)):
return obj.isoformat()
if isinstance(obj, time):
return obj.isoformat()
if isinstance(obj, Decimal):
return float(obj)
if isinstance(obj, UUID):
return str(obj)
if isinstance(obj, bytes):
return obj.decode('utf-8')
if isinstance(obj, Path):
return str(obj)
if hasattr(obj, '__dict__'):
return obj.__dict__
raise TypeError(f"Type {type(obj)} not serializable")
def json_dumps(obj: Any) -> str:
"""
Serialize obj to a JSON formatted str using the custom serializer.
"""
return json.dumps(obj, default=json_serial)
def json_loads(json_str: str) -> Any:
"""
Deserialize json_str to a Python object.
"""
return json.loads(json_str)