Major update to database and logging methods
This commit is contained in:
parent
ee6ee1ed87
commit
b60e60ec1e
49 changed files with 46694 additions and 2266 deletions
|
@ -2,44 +2,41 @@
|
|||
|
||||
import os
|
||||
from pathlib import Path
|
||||
import ipaddress
|
||||
import multiprocessing
|
||||
from dotenv import load_dotenv
|
||||
from dateutil import tz
|
||||
from pathlib import Path
|
||||
from .classes import Logger, Configuration, APIConfig, Database, DirConfig, Geocoder
|
||||
from .logs import L, get_logger
|
||||
|
||||
|
||||
# INITIALization
|
||||
BASE_DIR = Path(__file__).resolve().parent
|
||||
CONFIG_DIR = BASE_DIR / "config"
|
||||
ENV_PATH = CONFIG_DIR / ".env"
|
||||
load_dotenv(ENV_PATH)
|
||||
LOGS_DIR = BASE_DIR / "logs"
|
||||
os.makedirs(LOGS_DIR, exist_ok=True)
|
||||
L = Logger("Central", LOGS_DIR)
|
||||
L.init('sys', LOGS_DIR)
|
||||
l = get_logger("init")
|
||||
|
||||
import ipaddress
|
||||
import multiprocessing
|
||||
from dateutil import tz
|
||||
from pathlib import Path
|
||||
from .database import Database
|
||||
from .classes import Config, SysConfig, DirConfig, Geocoder
|
||||
|
||||
# API essentials
|
||||
API = APIConfig.load('sys', 'secrets')
|
||||
Dir = DirConfig.load('dirs')
|
||||
Db = Database.load('sys')
|
||||
Sys = SysConfig.init('sys', 'secrets')
|
||||
Dir = DirConfig.init('dirs')
|
||||
l.debug(f"Dir configuration initialized: {Dir}")
|
||||
l.debug(f"ROUTER path: {Dir.ROUTER}")
|
||||
Db = Database.init('db')
|
||||
|
||||
# HOST = f"{API.BIND}:{API.PORT}"
|
||||
# LOCAL_HOSTS = [ipaddress.ip_address(localhost.strip()) for localhost in os.getenv('LOCAL_HOSTS', '127.0.0.1').split(',')] + ['localhost']
|
||||
# SUBNET_BROADCAST = os.getenv("SUBNET_BROADCAST", '10.255.255.255')
|
||||
|
||||
MAX_CPU_CORES = min(int(os.getenv("MAX_CPU_CORES", int(multiprocessing.cpu_count()/2))), multiprocessing.cpu_count())
|
||||
|
||||
IMG = Configuration.load('img', 'secrets', Dir)
|
||||
Llm = Configuration.load('llm', 'secrets', Dir)
|
||||
News = Configuration.load('news', 'secrets', Dir)
|
||||
Archivist = Configuration.load('archivist', 'secrets', Dir)
|
||||
Scrape = Configuration.load('scrape', 'secrets', Dir)
|
||||
Serve = Configuration.load('serve', 'secrets', Dir)
|
||||
Tts = Configuration.load('tts', 'secrets', Dir)
|
||||
Img = Config.init('img', 'secrets', Dir)
|
||||
Llm = Config.init('llm', 'secrets', Dir)
|
||||
News = Config.init('news', 'secrets', Dir)
|
||||
Archivist = Config.init('archivist', 'secrets', Dir)
|
||||
Scrape = Config.init('scrape', 'secrets', Dir)
|
||||
Serve = Config.init('serve', 'secrets', Dir)
|
||||
Tts = Config.init('tts', 'secrets', Dir)
|
||||
|
||||
# Directories & general paths
|
||||
ROUTER_DIR = BASE_DIR / "routers"
|
||||
DATA_DIR = BASE_DIR / "data"
|
||||
os.makedirs(DATA_DIR, exist_ok=True)
|
||||
ALERTS_DIR = DATA_DIR / "alerts"
|
||||
|
@ -172,7 +169,7 @@ CF_IP = DATA_DIR / "cf_ip.txt" # to be deprecated soon
|
|||
CF_DOMAINS_PATH = DATA_DIR / "cf_domains.json" # to be deprecated soon
|
||||
|
||||
# Caddy - not fully implemented
|
||||
API.URL = os.getenv("API.URL")
|
||||
Sys.URL = os.getenv("Sys.URL")
|
||||
CADDY_SERVER = os.getenv('CADDY_SERVER', None)
|
||||
CADDYFILE_PATH = os.getenv("CADDYFILE_PATH", "") if CADDY_SERVER is not None else None
|
||||
CADDY_API_KEY = os.getenv("CADDY_API_KEY")
|
||||
|
|
|
@ -1,81 +1,86 @@
|
|||
#!/Users/sij/miniforge3/envs/api/bin/python
|
||||
#__main__.py
|
||||
from contextlib import asynccontextmanager
|
||||
from fastapi import FastAPI, Request, HTTPException, Response
|
||||
from fastapi import FastAPI, Request, HTTPException
|
||||
from fastapi.responses import JSONResponse
|
||||
from fastapi.middleware.cors import CORSMiddleware
|
||||
from starlette.middleware.base import BaseHTTPMiddleware
|
||||
from starlette.requests import ClientDisconnect
|
||||
from hypercorn.asyncio import serve
|
||||
from hypercorn.config import Config as HypercornConfig
|
||||
import sys
|
||||
import os
|
||||
import traceback
|
||||
import asyncio
|
||||
import httpx
|
||||
import argparse
|
||||
import json
|
||||
import ipaddress
|
||||
import importlib
|
||||
from dotenv import load_dotenv
|
||||
from pathlib import Path
|
||||
from datetime import datetime
|
||||
import argparse
|
||||
from . import L, API, Db, ROUTER_DIR
|
||||
from . import Sys, Db, Dir
|
||||
from .logs import L, get_logger
|
||||
|
||||
parser = argparse.ArgumentParser(description='Personal API.')
|
||||
parser.add_argument('--log', type=str, default='INFO', help='Set overall log level (e.g., DEBUG, INFO, WARNING)')
|
||||
parser.add_argument('--debug', nargs='+', default=[], help='Set DEBUG log level for specific modules')
|
||||
parser.add_argument('--test', type=str, help='Load only the specified module.')
|
||||
args = parser.parse_args()
|
||||
def parse_args():
|
||||
parser = argparse.ArgumentParser(description='Personal API.')
|
||||
parser.add_argument('--log', type=str, default='INFO',
|
||||
choices=['DEBUG', 'INFO', 'WARNING', 'ERROR', 'CRITICAL'],
|
||||
help='Set overall log level (e.g., DEBUG, INFO, WARNING)')
|
||||
parser.add_argument('--debug', nargs='+', default=[],
|
||||
help='Set DEBUG log level for specific modules')
|
||||
parser.add_argument('--info', nargs='+', default=[],
|
||||
help='Set INFO log level for specific modules')
|
||||
parser.add_argument('--test', type=str, help='Load only the specified module.')
|
||||
return parser.parse_args()
|
||||
|
||||
args = parse_args()
|
||||
|
||||
# Setup logging
|
||||
L.setup_from_args(args)
|
||||
print(f"Debug modules after setup: {L.debug_modules}")
|
||||
l = get_logger("main")
|
||||
l.info(f"Logging initialized. Debug modules: {L.debug_modules}")
|
||||
l.info(f"Command line arguments: {args}")
|
||||
|
||||
logger = L.get_module_logger("main")
|
||||
def debug(text: str): logger.debug(text)
|
||||
def info(text: str): logger.info(text)
|
||||
def warn(text: str): logger.warning(text)
|
||||
def err(text: str): logger.error(text)
|
||||
def crit(text: str): logger.critical(text)
|
||||
l.debug(f"Current working directory: {os.getcwd()}")
|
||||
l.debug(f"__file__ path: {__file__}")
|
||||
l.debug(f"Absolute path of __file__: {os.path.abspath(__file__)}")
|
||||
|
||||
@asynccontextmanager
|
||||
async def lifespan(app: FastAPI):
|
||||
# Startup
|
||||
crit("sijapi launched")
|
||||
info(f"Arguments: {args}")
|
||||
l.critical("sijapi launched")
|
||||
l.info(f"Arguments: {args}")
|
||||
|
||||
# Log the router directory path
|
||||
l.debug(f"Router directory path: {Dir.ROUTER.absolute()}")
|
||||
l.debug(f"Router directory exists: {Dir.ROUTER.exists()}")
|
||||
l.debug(f"Router directory is a directory: {Dir.ROUTER.is_dir()}")
|
||||
l.debug(f"Contents of router directory: {list(Dir.ROUTER.iterdir())}")
|
||||
|
||||
# Load routers
|
||||
if args.test:
|
||||
load_router(args.test)
|
||||
else:
|
||||
for module_name in API.MODULES.__fields__:
|
||||
if getattr(API.MODULES, module_name):
|
||||
for module_name in Sys.MODULES.__fields__:
|
||||
if getattr(Sys.MODULES, module_name):
|
||||
load_router(module_name)
|
||||
|
||||
try:
|
||||
# Initialize sync structures on all databases
|
||||
# await API.initialize_sync()
|
||||
await Db.initialize_engines()
|
||||
|
||||
except Exception as e:
|
||||
crit(f"Error during startup: {str(e)}")
|
||||
crit(f"Traceback: {traceback.format_exc()}")
|
||||
l.critical(f"Error during startup: {str(e)}")
|
||||
l.critical(f"Traceback: {traceback.format_exc()}")
|
||||
|
||||
try:
|
||||
yield # This is where the app runs
|
||||
|
||||
finally:
|
||||
# Shutdown
|
||||
crit("Shutting down...")
|
||||
l.critical("Shutting down...")
|
||||
try:
|
||||
await asyncio.wait_for(API.close_db_pools(), timeout=20)
|
||||
crit("Database pools closed.")
|
||||
await asyncio.wait_for(Db.close(), timeout=20)
|
||||
l.critical("Database pools closed.")
|
||||
except asyncio.TimeoutError:
|
||||
crit("Timeout while closing database pools.")
|
||||
l.critical("Timeout while closing database pools.")
|
||||
except Exception as e:
|
||||
crit(f"Error during shutdown: {str(e)}")
|
||||
crit(f"Traceback: {traceback.format_exc()}")
|
||||
l.critical(f"Error during shutdown: {str(e)}")
|
||||
l.critical(f"Traceback: {traceback.format_exc()}")
|
||||
|
||||
app = FastAPI(lifespan=lifespan)
|
||||
|
||||
|
@ -87,86 +92,83 @@ app.add_middleware(
|
|||
allow_headers=['*'],
|
||||
)
|
||||
|
||||
|
||||
class SimpleAPIKeyMiddleware(BaseHTTPMiddleware):
|
||||
async def dispatch(self, request: Request, call_next):
|
||||
client_ip = ipaddress.ip_address(request.client.host)
|
||||
if request.method == "OPTIONS":
|
||||
# Allow CORS preflight requests
|
||||
return JSONResponse(status_code=200)
|
||||
if request.url.path not in API.PUBLIC:
|
||||
trusted_subnets = [ipaddress.ip_network(subnet) for subnet in API.TRUSTED_SUBNETS]
|
||||
if request.url.path not in Sys.PUBLIC:
|
||||
trusted_subnets = [ipaddress.ip_network(subnet) for subnet in Sys.TRUSTED_SUBNETS]
|
||||
if not any(client_ip in subnet for subnet in trusted_subnets):
|
||||
api_key_header = request.headers.get("Authorization")
|
||||
api_key_query = request.query_params.get("api_key")
|
||||
|
||||
# Convert API.KEYS to lowercase for case-insensitive comparison
|
||||
api_keys_lower = [key.lower() for key in API.KEYS]
|
||||
debug(f"API.KEYS (lowercase): {api_keys_lower}")
|
||||
# Convert Sys.KEYS to lowercase for case-insensitive comparison
|
||||
api_keys_lower = [key.lower() for key in Sys.KEYS]
|
||||
l.debug(f"Sys.KEYS (lowercase): {api_keys_lower}")
|
||||
|
||||
if api_key_header:
|
||||
api_key_header = api_key_header.lower().split("bearer ")[-1]
|
||||
debug(f"API key provided in header: {api_key_header}")
|
||||
l.debug(f"API key provided in header: {api_key_header}")
|
||||
if api_key_query:
|
||||
api_key_query = api_key_query.lower()
|
||||
debug(f"API key provided in query: {api_key_query}")
|
||||
l.debug(f"API key provided in query: {api_key_query}")
|
||||
|
||||
if api_key_header.lower() not in api_keys_lower and api_key_query.lower() not in api_keys_lower:
|
||||
err(f"Invalid API key provided by a requester.")
|
||||
if (api_key_header is None or api_key_header.lower() not in api_keys_lower) and \
|
||||
(api_key_query is None or api_key_query.lower() not in api_keys_lower):
|
||||
l.error(f"Invalid API key provided by a requester.")
|
||||
if api_key_header:
|
||||
debug(f"Invalid API key in header: {api_key_header}")
|
||||
l.debug(f"Invalid API key in header: {api_key_header}")
|
||||
if api_key_query:
|
||||
debug(f"Invalid API key in query: {api_key_query}")
|
||||
l.debug(f"Invalid API key in query: {api_key_query}")
|
||||
return JSONResponse(
|
||||
status_code=401,
|
||||
content={"detail": "Invalid or missing API key"}
|
||||
)
|
||||
else:
|
||||
if api_key_header.lower() in api_keys_lower:
|
||||
debug(f"Valid API key provided in header: {api_key_header}")
|
||||
if api_key_header and api_key_header.lower() in api_keys_lower:
|
||||
l.debug(f"Valid API key provided in header: {api_key_header}")
|
||||
if api_key_query and api_key_query.lower() in api_keys_lower:
|
||||
debug(f"Valid API key provided in query: {api_key_query}")
|
||||
l.debug(f"Valid API key provided in query: {api_key_query}")
|
||||
|
||||
response = await call_next(request)
|
||||
return response
|
||||
|
||||
|
||||
# Add the middleware to your FastAPI app
|
||||
app.add_middleware(SimpleAPIKeyMiddleware)
|
||||
|
||||
@app.exception_handler(HTTPException)
|
||||
async def http_exception_handler(request: Request, exc: HTTPException):
|
||||
err(f"HTTP Exception: {exc.status_code} - {exc.detail}")
|
||||
err(f"Request: {request.method} {request.url}")
|
||||
l.error(f"HTTP Exception: {exc.status_code} - {exc.detail}")
|
||||
l.error(f"Request: {request.method} {request.url}")
|
||||
return JSONResponse(status_code=exc.status_code, content={"detail": exc.detail})
|
||||
|
||||
|
||||
@app.middleware("http")
|
||||
async def handle_exception_middleware(request: Request, call_next):
|
||||
try:
|
||||
response = await call_next(request)
|
||||
return response
|
||||
except Exception as exc:
|
||||
err(f"Unhandled exception in request: {request.method} {request.url}")
|
||||
err(f"Exception: {str(exc)}")
|
||||
err(f"Traceback: {traceback.format_exc()}")
|
||||
l.error(f"Unhandled exception in request: {request.method} {request.url}")
|
||||
l.error(f"Exception: {str(exc)}")
|
||||
l.error(f"Traceback: {traceback.format_exc()}")
|
||||
return JSONResponse(
|
||||
status_code=500,
|
||||
content={"detail": "Internal Server Error"}
|
||||
)
|
||||
|
||||
|
||||
@app.post("/sync/pull")
|
||||
async def pull_changes():
|
||||
info(f"Received request to /sync/pull")
|
||||
l.info(f"Received request to /sync/pull")
|
||||
try:
|
||||
await API.add_primary_keys_to_local_tables()
|
||||
await API.add_primary_keys_to_remote_tables()
|
||||
await Sys.add_primary_keys_to_local_tables()
|
||||
await Sys.add_primary_keys_to_remote_tables()
|
||||
try:
|
||||
source = await API.get_most_recent_source()
|
||||
source = await Sys.get_most_recent_source()
|
||||
if source:
|
||||
# Pull changes from the source
|
||||
total_changes = await API.pull_changes(source)
|
||||
total_changes = await Sys.pull_changes(source)
|
||||
|
||||
return JSONResponse(content={
|
||||
"status": "success",
|
||||
|
@ -179,39 +181,48 @@ async def pull_changes():
|
|||
"status": "info",
|
||||
"message": "No instances with more recent data found or all instances are offline."
|
||||
})
|
||||
|
||||
except Exception as e:
|
||||
err(f"Error in /sync/pull: {str(e)}")
|
||||
err(f"Traceback: {traceback.format_exc()}")
|
||||
l.error(f"Error in /sync/pull: {str(e)}")
|
||||
l.error(f"Traceback: {traceback.format_exc()}")
|
||||
raise HTTPException(status_code=500, detail=f"Error during pull: {str(e)}")
|
||||
|
||||
finally:
|
||||
info(f"Finished processing /sync/pull request")
|
||||
|
||||
l.info(f"Finished processing /sync/pull request")
|
||||
except Exception as e:
|
||||
err(f"Error while ensuring primary keys to tables: {str(e)}")
|
||||
err(f"Traceback: {traceback.format_exc()}")
|
||||
raise HTTPException(status_code=500, detail=f"Error during primary key insurance: {str(e)}")
|
||||
|
||||
l.error(f"Error while ensuring primary keys to tables: {str(e)}")
|
||||
l.error(f"Traceback: {traceback.format_exc()}")
|
||||
raise HTTPException(status_code=500, detail=f"Error during primary key insurance: {str(e)}")
|
||||
|
||||
def load_router(router_name):
|
||||
router_file = ROUTER_DIR / f'{router_name}.py'
|
||||
module_logger = L.get_module_logger(router_name)
|
||||
module_logger.debug(f"Attempting to load {router_name.capitalize()}...")
|
||||
router_logger = get_logger(f"router.{router_name}")
|
||||
router_logger.debug(f"Attempting to load {router_name.capitalize()}...")
|
||||
|
||||
# Log the full path being checked
|
||||
router_file = Dir.ROUTER / f'{router_name}.py'
|
||||
router_logger.debug(f"Checking for router file at: {router_file.absolute()}")
|
||||
|
||||
if router_file.exists():
|
||||
router_logger.debug(f"Router file found: {router_file}")
|
||||
module_path = f'sijapi.routers.{router_name}'
|
||||
router_logger.debug(f"Attempting to import module: {module_path}")
|
||||
try:
|
||||
module = importlib.import_module(module_path)
|
||||
router_logger.debug(f"Module imported successfully: {module}")
|
||||
router = getattr(module, router_name)
|
||||
router_logger.debug(f"Router object retrieved: {router}")
|
||||
app.include_router(router)
|
||||
router_logger.info(f"Router {router_name} loaded successfully")
|
||||
except (ImportError, AttributeError) as e:
|
||||
module_logger.critical(f"Failed to load router {router_name}: {e}")
|
||||
router_logger.critical(f"Failed to load router {router_name}: {e}")
|
||||
router_logger.debug(f"Current working directory: {os.getcwd()}")
|
||||
router_logger.debug(f"Python path: {sys.path}")
|
||||
else:
|
||||
module_logger.error(f"Router file for {router_name} does not exist.")
|
||||
router_logger.error(f"Router file for {router_name} does not exist at {router_file.absolute()}")
|
||||
router_logger.debug(f"Contents of router directory: {list(Dir.ROUTER.iterdir())}")
|
||||
|
||||
|
||||
def main(argv):
|
||||
config = HypercornConfig()
|
||||
config.bind = [API.BIND]
|
||||
config.bind = [Sys.BIND]
|
||||
config.startup_timeout = 300 # 5 minutes
|
||||
config.shutdown_timeout = 15 # 15 seconds
|
||||
asyncio.run(serve(app, config))
|
||||
|
|
1329
sijapi/classes.py
1329
sijapi/classes.py
File diff suppressed because it is too large
Load diff
|
@ -56,8 +56,8 @@
|
|||
#─── notes: ──────────────────────────────────────────────────────────────────────
|
||||
#
|
||||
# HOST_NET† and HOST_PORT comprise HOST and determine the ip and port the server binds to.
|
||||
# API.URL is used to assemble URLs, e.g. in the MS authentication flow and for serving images generated on the img router.
|
||||
# API.URL should match the base URL used to access sijapi sans endpoint, e.g. http://localhost:4444 or https://api.sij.ai
|
||||
# Sys.URL is used to assemble URLs, e.g. in the MS authentication flow and for serving images generated on the img router.
|
||||
# Sys.URL should match the base URL used to access sijapi sans endpoint, e.g. http://localhost:4444 or https://api.sij.ai
|
||||
#
|
||||
# † Take care here! Please ensure you understand the implications of setting HOST_NET to anything besides 127.0.0.1, and configure your firewall and router appropriately if you do. Setting HOST_NET to 0.0.0.0, for instance, opens sijapi to any device the server running it is accessible to — including potentially frightening internet randos (depending how your firewall, router, and NAT are configured).
|
||||
#
|
||||
|
@ -159,7 +159,7 @@ UNLOADED=ig
|
|||
# apps that together make SetApp an incredible value for macOS users!)
|
||||
#
|
||||
# tts: designed for use with coqui — $ pip install coqui — and/or the
|
||||
# ElevenLabs API.
|
||||
# ElevenLabs Sys.
|
||||
#
|
||||
# weather: requires a VisualCrossing API key and is designed for (but doesn't
|
||||
# itself strictly require) Postgresql with the PostGIS extension;
|
||||
|
|
69
sijapi/config/db.yaml-example
Normal file
69
sijapi/config/db.yaml-example
Normal file
|
@ -0,0 +1,69 @@
|
|||
POOL:
|
||||
- ts_id: 'server1'
|
||||
ts_ip: '192.168.0.10'
|
||||
app_port: 4444
|
||||
db_port: 5432
|
||||
db_name: mydb
|
||||
db_user: dbuser
|
||||
db_pass: 'password123'
|
||||
ssh_port: 22
|
||||
ssh_user: sshuser
|
||||
ssh_pass: 'password456'
|
||||
path: '/Users/sij/workshop/sijapi'
|
||||
tmux: '/opt/homebrew/bin/tmux'
|
||||
tailscale: '/usr/local/bin/tailscale'
|
||||
conda: '/Users/sij/miniforge3/bin/mamba'
|
||||
conda_env: '/Users/sij/miniforge3/envs/sijapi'
|
||||
vitals: '/Users/sij/workshop/scripts/gitea/pathScripts/vitals'
|
||||
vpn: '/Users/sij/workshop/scripts/gitea/pathScripts/vpn'
|
||||
- ts_id: 'server2'
|
||||
ts_ip: '192.168.0.11'
|
||||
app_port: 4444
|
||||
db_port: 5432
|
||||
db_name: mydb
|
||||
db_user: dbuser
|
||||
db_pass: 'password123'
|
||||
ssh_port: 22
|
||||
ssh_user: sshuser
|
||||
ssh_pass: 'password456'
|
||||
path: '/Users/sij/workshop/sijapi'
|
||||
tmux: '/opt/homebrew/bin/tmux'
|
||||
tailscale: '/usr/local/bin/tailscale'
|
||||
conda: '/Users/sij/miniforge3/bin/mamba'
|
||||
conda_env: '/Users/sij/miniforge3/envs/sijapi'
|
||||
vitals: '/Users/sij/workshop/scripts/gitea/pathScripts/vitals'
|
||||
vpn: '/Users/sij/workshop/scripts/gitea/pathScripts/vpn'
|
||||
- ts_id: 'server3'
|
||||
ts_ip: '192.168.0.12'
|
||||
app_port: 4444
|
||||
db_port: 5432
|
||||
db_name: mydb
|
||||
db_user: dbuser
|
||||
db_pass: 'password123'
|
||||
ssh_port: 22
|
||||
ssh_user: sshuser
|
||||
ssh_pass: 'password456'
|
||||
path: '/Users/sij/workshop/sijapi'
|
||||
tmux: '/opt/homebrew/bin/tmux'
|
||||
tailscale: '/usr/local/bin/tailscale'
|
||||
conda: '/Users/sij/miniforge3/bin/mamba'
|
||||
conda_env: '/Users/sij/miniforge3/envs/sijapi'
|
||||
vitals: '/Users/sij/workshop/scripts/gitea/pathScripts/vitals'
|
||||
vpn: '/Users/sij/workshop/scripts/gitea/pathScripts/vpn'
|
||||
|
||||
TABLES:
|
||||
locations:
|
||||
primary_key: id
|
||||
use_guid: true
|
||||
dailyweather:
|
||||
primary_key: id
|
||||
use_guid: true
|
||||
hourlyweather:
|
||||
primary_key: id
|
||||
use_guid: true
|
||||
click_logs:
|
||||
primary_key: id
|
||||
use_guid: true
|
||||
short_urls:
|
||||
primary_key: id
|
||||
use_guid: true
|
13
sijapi/config/gis.yaml-example
Normal file
13
sijapi/config/gis.yaml-example
Normal file
|
@ -0,0 +1,13 @@
|
|||
custom_locations:
|
||||
- name: Echo Valley Ranch
|
||||
latitude: 42.8098216
|
||||
longitude: -123.049396
|
||||
radius: 2
|
||||
|
||||
layers:
|
||||
- url: "https://gis.blm.gov/arcgis/rest/services/Cadastral/BLM_Natl_PLSS_CadNSDI/MapServer/1/query"
|
||||
table_name: "public.plss_townships"
|
||||
layer_name: "Townships"
|
||||
- url: "https://gis.blm.gov/arcgis/rest/services/Cadastral/BLM_Natl_PLSS_CadNSDI/MapServer/2/query"
|
||||
table_name: "public.plss_sections"
|
||||
layer_name: "Sections"
|
|
@ -1,4 +0,0 @@
|
|||
- name: Echo Valley Ranch
|
||||
latitude: 42.8098216
|
||||
longitude: -123.049396
|
||||
radius: 1.5
|
|
@ -1,11 +1,12 @@
|
|||
# config/sys.yaml
|
||||
# Primary configuration file
|
||||
|
||||
HOST: '0.0.0.0'
|
||||
HOST: "0.0.0.0"
|
||||
PORT: 4444
|
||||
BIND: '{{ HOST }}:{{ PORT }}'
|
||||
URL: 'https://api.example.com'
|
||||
BIND: "{{ HOST }}:{{ PORT }}"
|
||||
URL: "https://api.sij.ai"
|
||||
MAX_CPU_CORES: 7
|
||||
|
||||
PUBLIC:
|
||||
PUBLIC:
|
||||
- /id
|
||||
- /ip
|
||||
- /health
|
||||
|
@ -15,20 +16,21 @@ PUBLIC:
|
|||
- /cd/alert
|
||||
|
||||
TRUSTED_SUBNETS:
|
||||
- 127.0.0.1/32
|
||||
- 10.13.37.0/24
|
||||
- 100.64.64.0/24
|
||||
- "127.0.0.1/32"
|
||||
- "10.13.37.0/24"
|
||||
|
||||
SUBNET_BROADCAST: "10.255.255.255"
|
||||
|
||||
MODULES:
|
||||
archivist: on
|
||||
archivist: off
|
||||
asr: on
|
||||
cal: on
|
||||
cal: off
|
||||
cf: off
|
||||
dist: off
|
||||
email: on
|
||||
email: off
|
||||
gis: on
|
||||
ig: off
|
||||
img: on
|
||||
img: off
|
||||
llm: on
|
||||
news: on
|
||||
note: on
|
||||
|
@ -36,77 +38,54 @@ MODULES:
|
|||
scrape: on
|
||||
serve: on
|
||||
sys: on
|
||||
timing: on
|
||||
timing: off
|
||||
tts: on
|
||||
weather: on
|
||||
|
||||
POOL:
|
||||
- ts_id: 'server1'
|
||||
ts_ip: '192.168.0.10'
|
||||
app_port: 4444
|
||||
db_port: 5432
|
||||
db_name: mydb
|
||||
db_user: dbuser
|
||||
db_pass: 'password123'
|
||||
ssh_port: 22
|
||||
ssh_user: sshuser
|
||||
ssh_pass: 'password456'
|
||||
path: '/Users/sij/workshop/sijapi'
|
||||
tmux: '/opt/homebrew/bin/tmux'
|
||||
tailscale: '/usr/local/bin/tailscale'
|
||||
conda: '/Users/sij/miniforge3/bin/mamba'
|
||||
conda_env: '/Users/sij/miniforge3/envs/sijapi'
|
||||
vitals: '/Users/sij/workshop/scripts/gitea/pathScripts/vitals'
|
||||
vpn: '/Users/sij/workshop/scripts/gitea/pathScripts/vpn'
|
||||
- ts_id: 'server2'
|
||||
ts_ip: '192.168.0.11'
|
||||
app_port: 4444
|
||||
db_port: 5432
|
||||
db_name: mydb
|
||||
db_user: dbuser
|
||||
db_pass: 'password123'
|
||||
ssh_port: 22
|
||||
ssh_user: sshuser
|
||||
ssh_pass: 'password456'
|
||||
path: '/Users/sij/workshop/sijapi'
|
||||
tmux: '/opt/homebrew/bin/tmux'
|
||||
tailscale: '/usr/local/bin/tailscale'
|
||||
conda: '/Users/sij/miniforge3/bin/mamba'
|
||||
conda_env: '/Users/sij/miniforge3/envs/sijapi'
|
||||
vitals: '/Users/sij/workshop/scripts/gitea/pathScripts/vitals'
|
||||
vpn: '/Users/sij/workshop/scripts/gitea/pathScripts/vpn'
|
||||
- ts_id: 'server3'
|
||||
ts_ip: '192.168.0.12'
|
||||
app_port: 4444
|
||||
db_port: 5432
|
||||
db_name: mydb
|
||||
db_user: dbuser
|
||||
db_pass: 'password123'
|
||||
ssh_port: 22
|
||||
ssh_user: sshuser
|
||||
ssh_pass: 'password456'
|
||||
path: '/Users/sij/workshop/sijapi'
|
||||
tmux: '/opt/homebrew/bin/tmux'
|
||||
tailscale: '/usr/local/bin/tailscale'
|
||||
conda: '/Users/sij/miniforge3/bin/mamba'
|
||||
conda_env: '/Users/sij/miniforge3/envs/sijapi'
|
||||
vitals: '/Users/sij/workshop/scripts/gitea/pathScripts/vitals'
|
||||
vpn: '/Users/sij/workshop/scripts/gitea/pathScripts/vpn'
|
||||
|
||||
LOGS:
|
||||
default: info
|
||||
init: debug
|
||||
classes: debug
|
||||
database: debug
|
||||
serialization: debug
|
||||
utilities: debug
|
||||
logs: debug
|
||||
main: debug
|
||||
archivist: info
|
||||
asr: info
|
||||
cal: info
|
||||
cf: info
|
||||
dist: info
|
||||
email: info
|
||||
gis: debug
|
||||
ig: info
|
||||
img: debug
|
||||
llm: debug
|
||||
news: debug
|
||||
note: debug
|
||||
rag: debug
|
||||
scrape: debug
|
||||
serve: info
|
||||
sys: debug
|
||||
timing: warn
|
||||
tts: info
|
||||
weather: info
|
||||
|
||||
EXTENSIONS:
|
||||
pgp: on
|
||||
archivist: on
|
||||
archivist: off
|
||||
courtlistener: off
|
||||
elevenlabs: on
|
||||
macnotify: on
|
||||
pgp: on
|
||||
shellfish: on
|
||||
xtts: off
|
||||
url_shortener: off
|
||||
|
||||
TZ: 'America/Los_Angeles'
|
||||
|
||||
KEYS:
|
||||
- 'sk-YOUR_FIRST_API_KEY'
|
||||
- 'sk-YOUR_SECOND_API_KEY'
|
||||
- 'sk-YOUR_THIRD_API_KEY'
|
||||
- "sk-NhrtQwCHNdK5sRZC"
|
||||
- "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"
|
||||
|
||||
TZ: "America/Los_Angeles"
|
||||
|
||||
GARBAGE:
|
||||
COLLECTION_INTERVAL: 60 * 60
|
||||
|
|
324
sijapi/database.py
Normal file
324
sijapi/database.py
Normal file
|
@ -0,0 +1,324 @@
|
|||
# database.py
|
||||
import json
|
||||
import yaml
|
||||
import time
|
||||
import aiohttp
|
||||
import asyncio
|
||||
from datetime import datetime as dt_datetime, date
|
||||
from tqdm.asyncio import tqdm
|
||||
import reverse_geocoder as rg
|
||||
from pathlib import Path
|
||||
from typing import Any, Dict, List, Optional, Tuple, Union, TypeVar, ClassVar
|
||||
from dotenv import load_dotenv
|
||||
from pydantic import BaseModel, Field, create_model, PrivateAttr
|
||||
from concurrent.futures import ThreadPoolExecutor
|
||||
from contextlib import asynccontextmanager
|
||||
from datetime import datetime, timedelta, timezone
|
||||
from zoneinfo import ZoneInfo
|
||||
from srtm import get_data
|
||||
import os
|
||||
import sys
|
||||
from loguru import logger
|
||||
from sqlalchemy import text
|
||||
from sqlalchemy.ext.asyncio import create_async_engine, AsyncSession
|
||||
from sqlalchemy.orm import sessionmaker, declarative_base
|
||||
from sqlalchemy.exc import OperationalError
|
||||
from sqlalchemy import Column, Integer, String, DateTime, JSON, Text, select, func
|
||||
from sqlalchemy.dialects.postgresql import JSONB
|
||||
from urllib.parse import urljoin
|
||||
import hashlib
|
||||
import random
|
||||
from .logs import get_logger
|
||||
from .serialization import json_dumps, json_serial, serialize
|
||||
|
||||
l = get_logger(__name__)
|
||||
|
||||
Base = declarative_base()
|
||||
|
||||
BASE_DIR = Path(__file__).resolve().parent
|
||||
CONFIG_DIR = BASE_DIR / "config"
|
||||
ENV_PATH = CONFIG_DIR / ".env"
|
||||
load_dotenv(ENV_PATH)
|
||||
TS_ID = os.environ.get('TS_ID')
|
||||
|
||||
|
||||
class QueryTracking(Base):
|
||||
__tablename__ = 'query_tracking'
|
||||
|
||||
id = Column(Integer, primary_key=True)
|
||||
ts_id = Column(String, nullable=False)
|
||||
query = Column(Text, nullable=False)
|
||||
args = Column(JSONB)
|
||||
executed_at = Column(DateTime(timezone=True), server_default=func.now())
|
||||
completed_by = Column(JSONB, default={})
|
||||
result_checksum = Column(String)
|
||||
|
||||
class Database:
|
||||
@classmethod
|
||||
def init(cls, config_name: str):
|
||||
return cls(config_name)
|
||||
|
||||
def __init__(self, config_path: str):
|
||||
self.config = self.load_config(config_path)
|
||||
self.engines: Dict[str, Any] = {}
|
||||
self.sessions: Dict[str, Any] = {}
|
||||
self.online_servers: set = set()
|
||||
self.local_ts_id = self.get_local_ts_id()
|
||||
|
||||
def load_config(self, config_path: str) -> Dict[str, Any]:
|
||||
base_path = Path(__file__).parent.parent
|
||||
full_path = base_path / "sijapi" / "config" / f"{config_path}.yaml"
|
||||
|
||||
with open(full_path, 'r') as file:
|
||||
config = yaml.safe_load(file)
|
||||
|
||||
return config
|
||||
|
||||
def get_local_ts_id(self) -> str:
|
||||
return os.environ.get('TS_ID')
|
||||
|
||||
async def initialize_engines(self):
|
||||
for db_info in self.config['POOL']:
|
||||
url = f"postgresql+asyncpg://{db_info['db_user']}:{db_info['db_pass']}@{db_info['ts_ip']}:{db_info['db_port']}/{db_info['db_name']}"
|
||||
try:
|
||||
engine = create_async_engine(url, pool_pre_ping=True, pool_size=5, max_overflow=10)
|
||||
self.engines[db_info['ts_id']] = engine
|
||||
self.sessions[db_info['ts_id']] = sessionmaker(engine, class_=AsyncSession, expire_on_commit=False)
|
||||
l.info(f"Initialized engine and session for {db_info['ts_id']}")
|
||||
except Exception as e:
|
||||
l.error(f"Failed to initialize engine for {db_info['ts_id']}: {str(e)}")
|
||||
|
||||
if self.local_ts_id not in self.sessions:
|
||||
l.error(f"Failed to initialize session for local server {self.local_ts_id}")
|
||||
else:
|
||||
try:
|
||||
# Create tables if they don't exist
|
||||
async with self.engines[self.local_ts_id].begin() as conn:
|
||||
await conn.run_sync(Base.metadata.create_all)
|
||||
l.info(f"Initialized tables for local server {self.local_ts_id}")
|
||||
except Exception as e:
|
||||
l.error(f"Failed to create tables for local server {self.local_ts_id}: {str(e)}")
|
||||
|
||||
async def get_online_servers(self) -> List[str]:
|
||||
online_servers = []
|
||||
for ts_id, engine in self.engines.items():
|
||||
try:
|
||||
async with engine.connect() as conn:
|
||||
await conn.execute(text("SELECT 1"))
|
||||
online_servers.append(ts_id)
|
||||
except OperationalError:
|
||||
pass
|
||||
self.online_servers = set(online_servers)
|
||||
return online_servers
|
||||
|
||||
async def read(self, query: str, **kwargs):
|
||||
if self.local_ts_id not in self.sessions:
|
||||
l.error(f"No session found for local server {self.local_ts_id}. Database may not be properly initialized.")
|
||||
return None
|
||||
|
||||
async with self.sessions[self.local_ts_id]() as session:
|
||||
try:
|
||||
result = await session.execute(text(query), kwargs)
|
||||
# Convert the result to a list of dictionaries
|
||||
rows = result.fetchall()
|
||||
if rows:
|
||||
columns = result.keys()
|
||||
return [dict(zip(columns, row)) for row in rows]
|
||||
else:
|
||||
return []
|
||||
except Exception as e:
|
||||
l.error(f"Failed to execute read query: {str(e)}")
|
||||
return None
|
||||
|
||||
|
||||
async def write(self, query: str, **kwargs):
|
||||
if self.local_ts_id not in self.sessions:
|
||||
l.error(f"No session found for local server {self.local_ts_id}. Database may not be properly initialized.")
|
||||
return None
|
||||
|
||||
async with self.sessions[self.local_ts_id]() as session:
|
||||
try:
|
||||
# Serialize the kwargs using
|
||||
serialized_kwargs = {key: serialize(value) for key, value in kwargs.items()}
|
||||
|
||||
# Execute the write query
|
||||
result = await session.execute(text(query), serialized_kwargs)
|
||||
|
||||
# Log the query (use json_dumps for logging purposes)
|
||||
new_query = QueryTracking(
|
||||
ts_id=self.local_ts_id,
|
||||
query=query,
|
||||
args=json_dumps(kwargs) # Use original kwargs for logging
|
||||
)
|
||||
session.add(new_query)
|
||||
await session.flush()
|
||||
query_id = new_query.id
|
||||
|
||||
await session.commit()
|
||||
l.info(f"Successfully executed write query: {query[:50]}...")
|
||||
|
||||
checksum = await self._local_compute_checksum(query, serialized_kwargs)
|
||||
|
||||
# Update query_tracking with checksum
|
||||
await self.update_query_checksum(query_id, checksum)
|
||||
|
||||
# Replicate to online servers
|
||||
online_servers = await self.get_online_servers()
|
||||
for ts_id in online_servers:
|
||||
if ts_id != self.local_ts_id:
|
||||
asyncio.create_task(self._replicate_write(ts_id, query_id, query, serialized_kwargs, checksum))
|
||||
|
||||
return result # Return the CursorResult
|
||||
|
||||
except Exception as e:
|
||||
l.error(f"Failed to execute write query: {str(e)}")
|
||||
l.error(f"Query: {query}")
|
||||
l.error(f"Kwargs: {kwargs}")
|
||||
l.error(f"Serialized kwargs: {serialized_kwargs}")
|
||||
l.error(f"Traceback: {traceback.format_exc()}")
|
||||
return None
|
||||
|
||||
async def get_primary_server(self) -> str:
|
||||
url = urljoin(self.config['URL'], '/id')
|
||||
|
||||
async with aiohttp.ClientSession() as session:
|
||||
try:
|
||||
async with session.get(url) as response:
|
||||
if response.status == 200:
|
||||
primary_ts_id = await response.text()
|
||||
return primary_ts_id.strip()
|
||||
else:
|
||||
l.error(f"Failed to get primary server. Status: {response.status}")
|
||||
return None
|
||||
except aiohttp.ClientError as e:
|
||||
l.error(f"Error connecting to load balancer: {str(e)}")
|
||||
return None
|
||||
|
||||
|
||||
async def get_checksum_server(self) -> dict:
|
||||
primary_ts_id = await self.get_primary_server()
|
||||
online_servers = await self.get_online_servers()
|
||||
|
||||
checksum_servers = [server for server in self.config['POOL'] if server['ts_id'] in online_servers and server['ts_id'] != primary_ts_id]
|
||||
|
||||
if not checksum_servers:
|
||||
return next(server for server in self.config['POOL'] if server['ts_id'] == primary_ts_id)
|
||||
|
||||
return random.choice(checksum_servers)
|
||||
|
||||
|
||||
async def _local_compute_checksum(self, query: str, params: dict):
|
||||
async with self.sessions[self.local_ts_id]() as session:
|
||||
result = await session.execute(text(query), params)
|
||||
if result.returns_rows:
|
||||
data = result.fetchall()
|
||||
else:
|
||||
data = str(result.rowcount) + query + str(params)
|
||||
checksum = hashlib.md5(str(data).encode()).hexdigest()
|
||||
return checksum
|
||||
|
||||
|
||||
async def _delegate_compute_checksum(self, server: Dict[str, Any], query: str, params: dict):
|
||||
url = f"http://{server['ts_ip']}:{server['app_port']}/sync/checksum"
|
||||
|
||||
async with aiohttp.ClientSession() as session:
|
||||
try:
|
||||
async with session.post(url, json={"query": query, "params": params}) as response:
|
||||
if response.status == 200:
|
||||
result = await response.json()
|
||||
return result['checksum']
|
||||
else:
|
||||
l.error(f"Failed to get checksum from {server['ts_id']}. Status: {response.status}")
|
||||
return await self._local_compute_checksum(query, params)
|
||||
except aiohttp.ClientError as e:
|
||||
l.error(f"Error connecting to {server['ts_id']} for checksum: {str(e)}")
|
||||
return await self._local_compute_checksum(query, params)
|
||||
|
||||
|
||||
async def update_query_checksum(self, query_id: int, checksum: str):
|
||||
async with self.sessions[self.local_ts_id]() as session:
|
||||
await session.execute(
|
||||
text("UPDATE query_tracking SET result_checksum = :checksum WHERE id = :id"),
|
||||
{"checksum": checksum, "id": query_id}
|
||||
)
|
||||
await session.commit()
|
||||
|
||||
|
||||
async def _replicate_write(self, ts_id: str, query_id: int, query: str, params: dict, expected_checksum: str):
|
||||
try:
|
||||
async with self.sessions[ts_id]() as session:
|
||||
await session.execute(text(query), params)
|
||||
actual_checksum = await self._local_compute_checksum(query, params)
|
||||
if actual_checksum != expected_checksum:
|
||||
raise ValueError(f"Checksum mismatch on {ts_id}")
|
||||
await self.mark_query_completed(query_id, ts_id)
|
||||
await session.commit()
|
||||
l.info(f"Successfully replicated write to {ts_id}")
|
||||
except Exception as e:
|
||||
l.error(f"Failed to replicate write on {ts_id}: {str(e)}")
|
||||
|
||||
|
||||
async def mark_query_completed(self, query_id: int, ts_id: str):
|
||||
async with self.sessions[self.local_ts_id]() as session:
|
||||
query = await session.get(QueryTracking, query_id)
|
||||
if query:
|
||||
completed_by = query.completed_by or {}
|
||||
completed_by[ts_id] = True
|
||||
query.completed_by = completed_by
|
||||
await session.commit()
|
||||
|
||||
|
||||
async def sync_local_server(self):
|
||||
async with self.sessions[self.local_ts_id]() as session:
|
||||
last_synced = await session.execute(
|
||||
text("SELECT MAX(id) FROM query_tracking WHERE completed_by ? :ts_id"),
|
||||
{"ts_id": self.local_ts_id}
|
||||
)
|
||||
last_synced_id = last_synced.scalar() or 0
|
||||
|
||||
unexecuted_queries = await session.execute(
|
||||
text("SELECT * FROM query_tracking WHERE id > :last_id ORDER BY id"),
|
||||
{"last_id": last_synced_id}
|
||||
)
|
||||
|
||||
for query in unexecuted_queries:
|
||||
try:
|
||||
params = json.loads(query.args)
|
||||
await session.execute(text(query.query), params)
|
||||
actual_checksum = await self._local_compute_checksum(query.query, params)
|
||||
if actual_checksum != query.result_checksum:
|
||||
raise ValueError(f"Checksum mismatch for query ID {query.id}")
|
||||
await self.mark_query_completed(query.id, self.local_ts_id)
|
||||
except Exception as e:
|
||||
l.error(f"Failed to execute query ID {query.id} during local sync: {str(e)}")
|
||||
|
||||
await session.commit()
|
||||
l.info(f"Local server sync completed. Executed {unexecuted_queries.rowcount} queries.")
|
||||
|
||||
|
||||
async def purge_completed_queries(self):
|
||||
async with self.sessions[self.local_ts_id]() as session:
|
||||
all_ts_ids = [db['ts_id'] for db in self.config['POOL']]
|
||||
|
||||
result = await session.execute(
|
||||
text("""
|
||||
DELETE FROM query_tracking
|
||||
WHERE id <= (
|
||||
SELECT MAX(id)
|
||||
FROM query_tracking
|
||||
WHERE completed_by ?& :ts_ids
|
||||
)
|
||||
"""),
|
||||
{"ts_ids": all_ts_ids}
|
||||
)
|
||||
await session.commit()
|
||||
|
||||
deleted_count = result.rowcount
|
||||
l.info(f"Purged {deleted_count} completed queries.")
|
||||
|
||||
|
||||
async def close(self):
|
||||
for engine in self.engines.values():
|
||||
await engine.dispose()
|
||||
|
||||
|
237
sijapi/helpers/CaPLSS.py
Normal file
237
sijapi/helpers/CaPLSS.py
Normal file
|
@ -0,0 +1,237 @@
|
|||
#!/usr/bin/env python3
|
||||
|
||||
import requests
|
||||
import json
|
||||
import time
|
||||
import os
|
||||
import subprocess
|
||||
import sys
|
||||
from requests.adapters import HTTPAdapter
|
||||
from urllib3.util.retry import Retry
|
||||
from datetime import datetime
|
||||
|
||||
# Environment variables for database connection
|
||||
DB_NAME = os.getenv('DB_NAME', 'sij')
|
||||
DB_USER = os.getenv('DB_USER', 'sij')
|
||||
DB_PASSWORD = os.getenv('DB_PASSWORD', 'Synchr0!')
|
||||
DB_HOST = os.getenv('DB_HOST', 'localhost')
|
||||
DB_PORT = os.getenv('DB_PORT', '5432')
|
||||
|
||||
def get_feature_count(url):
|
||||
params = {
|
||||
'where': '1=1',
|
||||
'returnCountOnly': 'true',
|
||||
'f': 'json'
|
||||
}
|
||||
retries = Retry(total=10, backoff_factor=0.5, status_forcelist=[500, 502, 503, 504])
|
||||
with requests.Session() as session:
|
||||
session.mount("https://", HTTPAdapter(max_retries=retries))
|
||||
response = session.get(url, params=params, timeout=30)
|
||||
response.raise_for_status()
|
||||
data = response.json()
|
||||
return data.get('count', 0)
|
||||
|
||||
def fetch_features(url, offset, num, max_retries=5):
|
||||
params = {
|
||||
'where': '1=1',
|
||||
'outFields': '*',
|
||||
'geometryPrecision': 6,
|
||||
'outSR': 4326,
|
||||
'f': 'json',
|
||||
'resultOffset': offset,
|
||||
'resultRecordCount': num
|
||||
}
|
||||
for attempt in range(max_retries):
|
||||
try:
|
||||
retries = Retry(total=5, backoff_factor=1, status_forcelist=[500, 502, 503, 504])
|
||||
with requests.Session() as session:
|
||||
session.mount("https://", HTTPAdapter(max_retries=retries))
|
||||
response = session.get(url, params=params, timeout=30)
|
||||
response.raise_for_status()
|
||||
return response.json()
|
||||
except requests.exceptions.RequestException as e:
|
||||
print(f"Error fetching features (attempt {attempt + 1}/{max_retries}): {e}")
|
||||
if attempt == max_retries - 1:
|
||||
raise
|
||||
time.sleep(5 * (attempt + 1)) # Exponential backoff
|
||||
|
||||
|
||||
def download_layer(layer_num, layer_name):
|
||||
base_dir = os.path.expanduser('~/data')
|
||||
os.makedirs(base_dir, exist_ok=True)
|
||||
|
||||
file_path = os.path.join(base_dir, f'PLSS_{layer_name}.geojson')
|
||||
temp_file_path = os.path.join(base_dir, f'PLSS_{layer_name}_temp.json')
|
||||
|
||||
url = f"https://gis.blm.gov/arcgis/rest/services/Cadastral/BLM_Natl_PLSS_CadNSDI/MapServer/{layer_num}/query"
|
||||
|
||||
total_count = get_feature_count(url)
|
||||
print(f"Total {layer_name} features: {total_count}")
|
||||
|
||||
batch_size = 1000
|
||||
chunk_size = 10000 # Write to file every 10,000 features
|
||||
offset = 0
|
||||
all_features = []
|
||||
|
||||
# Check if temporary file exists and load its content
|
||||
if os.path.exists(temp_file_path):
|
||||
try:
|
||||
with open(temp_file_path, 'r') as f:
|
||||
all_features = json.load(f)
|
||||
offset = len(all_features)
|
||||
print(f"Resuming download from offset {offset}")
|
||||
except json.JSONDecodeError:
|
||||
print("Error reading temporary file. Starting download from the beginning.")
|
||||
offset = 0
|
||||
all_features = []
|
||||
|
||||
try:
|
||||
while offset < total_count:
|
||||
print(f"Fetching {layer_name} features {offset} to {offset + batch_size}...")
|
||||
data = fetch_features(url, offset, batch_size)
|
||||
|
||||
new_features = data.get('features', [])
|
||||
if not new_features:
|
||||
break
|
||||
|
||||
all_features.extend(new_features)
|
||||
offset += len(new_features)
|
||||
|
||||
# Progress indicator
|
||||
progress = offset / total_count
|
||||
bar_length = 30
|
||||
filled_length = int(bar_length * progress)
|
||||
bar = '=' * filled_length + '-' * (bar_length - filled_length)
|
||||
print(f'\rProgress: [{bar}] {progress:.1%} ({offset}/{total_count} features)', end='', flush=True)
|
||||
|
||||
# Save progress to temporary file every chunk_size features
|
||||
if len(all_features) % chunk_size == 0:
|
||||
with open(temp_file_path, 'w') as f:
|
||||
json.dump(all_features, f)
|
||||
|
||||
time.sleep(1)
|
||||
|
||||
print(f"\nTotal {layer_name} features fetched: {len(all_features)}")
|
||||
|
||||
# Write final GeoJSON file
|
||||
with open(file_path, 'w') as f:
|
||||
f.write('{"type": "FeatureCollection", "features": [\n')
|
||||
for i, feature in enumerate(all_features):
|
||||
geojson_feature = {
|
||||
"type": "Feature",
|
||||
"properties": feature['attributes'],
|
||||
"geometry": feature['geometry']
|
||||
}
|
||||
json.dump(geojson_feature, f)
|
||||
if i < len(all_features) - 1:
|
||||
f.write(',\n')
|
||||
f.write('\n]}')
|
||||
|
||||
print(f"GeoJSON file saved as '{file_path}'")
|
||||
|
||||
# Remove temporary file
|
||||
if os.path.exists(temp_file_path):
|
||||
os.remove(temp_file_path)
|
||||
|
||||
return file_path
|
||||
except Exception as e:
|
||||
print(f"\nError during download: {e}")
|
||||
print(f"Partial data saved in {temp_file_path}")
|
||||
return None
|
||||
|
||||
|
||||
def check_postgres_connection():
|
||||
try:
|
||||
subprocess.run(['psql', '-h', DB_HOST, '-p', DB_PORT, '-U', DB_USER, '-d', DB_NAME, '-c', 'SELECT 1;'],
|
||||
check=True, capture_output=True, text=True)
|
||||
return True
|
||||
except subprocess.CalledProcessError:
|
||||
return False
|
||||
|
||||
def check_postgis_extension():
|
||||
try:
|
||||
result = subprocess.run(['psql', '-h', DB_HOST, '-p', DB_PORT, '-U', DB_USER, '-d', DB_NAME,
|
||||
'-c', "SELECT 1 FROM pg_extension WHERE extname = 'postgis';"],
|
||||
check=True, capture_output=True, text=True)
|
||||
return '1' in result.stdout
|
||||
except subprocess.CalledProcessError:
|
||||
return False
|
||||
|
||||
def create_postgis_extension():
|
||||
try:
|
||||
subprocess.run(['psql', '-h', DB_HOST, '-p', DB_PORT, '-U', DB_USER, '-d', DB_NAME,
|
||||
'-c', "CREATE EXTENSION IF NOT EXISTS postgis;"],
|
||||
check=True, capture_output=True, text=True)
|
||||
print("PostGIS extension created successfully.")
|
||||
except subprocess.CalledProcessError as e:
|
||||
print(f"Error creating PostGIS extension: {e}")
|
||||
sys.exit(1)
|
||||
|
||||
def import_to_postgis(file_path, table_name):
|
||||
if not check_postgres_connection():
|
||||
print("Error: Unable to connect to PostgreSQL. Please check your connection settings.")
|
||||
sys.exit(1)
|
||||
|
||||
if not check_postgis_extension():
|
||||
print("PostGIS extension not found. Attempting to create it...")
|
||||
create_postgis_extension()
|
||||
|
||||
ogr2ogr_command = [
|
||||
'ogr2ogr',
|
||||
'-f', 'PostgreSQL',
|
||||
f'PG:dbname={DB_NAME} user={DB_USER} password={DB_PASSWORD} host={DB_HOST} port={DB_PORT}',
|
||||
file_path,
|
||||
'-nln', table_name,
|
||||
'-overwrite'
|
||||
]
|
||||
|
||||
try:
|
||||
subprocess.run(ogr2ogr_command, check=True, capture_output=True, text=True)
|
||||
print(f"Data successfully imported into PostGIS table: {table_name}")
|
||||
except subprocess.CalledProcessError as e:
|
||||
print(f"Error importing data into PostGIS: {e}")
|
||||
print(f"Command that failed: {e.cmd}")
|
||||
print(f"Error output: {e.stderr}")
|
||||
|
||||
def check_ogr2ogr():
|
||||
try:
|
||||
subprocess.run(['ogr2ogr', '--version'], check=True, capture_output=True, text=True)
|
||||
return True
|
||||
except subprocess.CalledProcessError:
|
||||
return False
|
||||
except FileNotFoundError:
|
||||
return False
|
||||
|
||||
|
||||
def main():
|
||||
if not check_ogr2ogr():
|
||||
print("Error: ogr2ogr not found. Please install GDAL/OGR tools.")
|
||||
print("On Debian: sudo apt-get install gdal-bin")
|
||||
print("On macOS with Homebrew: brew install gdal")
|
||||
sys.exit(1)
|
||||
|
||||
try:
|
||||
township_file = os.path.expanduser('~/data/PLSS_Townships.geojson')
|
||||
if not os.path.exists(township_file):
|
||||
township_file = download_layer(1, "Townships")
|
||||
if township_file:
|
||||
import_to_postgis(township_file, "public.plss_townships")
|
||||
else:
|
||||
print("Failed to download Townships data. Skipping import.")
|
||||
|
||||
section_file = os.path.expanduser('~/data/PLSS_Sections.geojson')
|
||||
if not os.path.exists(section_file):
|
||||
section_file = download_layer(2, "Sections")
|
||||
if section_file:
|
||||
import_to_postgis(section_file, "public.plss_sections")
|
||||
else:
|
||||
print("Failed to download Sections data. Skipping import.")
|
||||
|
||||
except requests.exceptions.RequestException as e:
|
||||
print(f"Error fetching data: {e}")
|
||||
except Exception as e:
|
||||
print(f"An unexpected error occurred: {e}")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
|
@ -1,8 +1,12 @@
|
|||
# CaPLSS_downloader_and_importer.py
|
||||
import requests
|
||||
import json
|
||||
import time
|
||||
import os
|
||||
import subprocess
|
||||
import requests
|
||||
from requests.adapters import HTTPAdapter
|
||||
from urllib3.util.retry import Retry
|
||||
|
||||
def get_feature_count(url):
|
||||
params = {
|
||||
|
@ -10,11 +14,17 @@ def get_feature_count(url):
|
|||
'returnCountOnly': 'true',
|
||||
'f': 'json'
|
||||
}
|
||||
response = requests.get(url, params=params)
|
||||
retries = Retry(total=10, backoff_factor=0.5)
|
||||
adapter = HTTPAdapter(max_retries=retries)
|
||||
session = requests.Session()
|
||||
session.mount("https://", adapter)
|
||||
|
||||
response = session.get(url, params=params, timeout=15) # Add timeout parameter
|
||||
response.raise_for_status()
|
||||
data = response.json()
|
||||
return data.get('count', 0)
|
||||
|
||||
|
||||
def fetch_features(url, offset, num):
|
||||
params = {
|
||||
'where': '1=1',
|
||||
|
@ -70,13 +80,19 @@ def download_layer(layer_num, layer_name):
|
|||
"features": geojson_features
|
||||
}
|
||||
|
||||
# Define a base directory that exists on both macOS and Debian
|
||||
base_dir = os.path.expanduser('~/data')
|
||||
os.makedirs(base_dir, exist_ok=True) # Create the directory if it doesn't exist
|
||||
|
||||
# Use os.path.join to construct the file path
|
||||
file_path = os.path.join(base_dir, f'PLSS_{layer_name}.geojson')
|
||||
|
||||
# Save to file
|
||||
file_path = f'/Users/sij/workshop/sijapi/sijapi/data/PLSS_{layer_name}.geojson'
|
||||
with open(file_path, 'w') as f:
|
||||
json.dump(full_geojson, f)
|
||||
|
||||
|
||||
print(f"GeoJSON file saved as '{file_path}'")
|
||||
|
||||
|
||||
return file_path
|
||||
|
||||
def import_to_postgis(file_path, table_name):
|
||||
|
|
1
sijapi/helpers/Townships_progress.json
Normal file
1
sijapi/helpers/Townships_progress.json
Normal file
|
@ -0,0 +1 @@
|
|||
{"offset": 50000}
|
71
sijapi/helpers/db.py
Normal file
71
sijapi/helpers/db.py
Normal file
|
@ -0,0 +1,71 @@
|
|||
import asyncio
|
||||
import asyncpg
|
||||
import yaml
|
||||
from pathlib import Path
|
||||
|
||||
async def load_config():
|
||||
config_path = Path(__file__).parent.parent / 'config' / 'db.yaml'
|
||||
with open(config_path, 'r') as file:
|
||||
return yaml.safe_load(file)
|
||||
|
||||
async def add_foreign_key_constraint(conn):
|
||||
# Ensure short_code is not null in both tables
|
||||
await conn.execute("""
|
||||
ALTER TABLE short_urls
|
||||
ALTER COLUMN short_code SET NOT NULL;
|
||||
""")
|
||||
|
||||
await conn.execute("""
|
||||
ALTER TABLE click_logs
|
||||
ALTER COLUMN short_code SET NOT NULL;
|
||||
""")
|
||||
|
||||
# Add unique constraint to short_urls.short_code if it doesn't exist
|
||||
await conn.execute("""
|
||||
DO $$
|
||||
BEGIN
|
||||
IF NOT EXISTS (
|
||||
SELECT 1
|
||||
FROM pg_constraint
|
||||
WHERE conname = 'short_urls_short_code_key'
|
||||
) THEN
|
||||
ALTER TABLE short_urls
|
||||
ADD CONSTRAINT short_urls_short_code_key UNIQUE (short_code);
|
||||
END IF;
|
||||
END $$;
|
||||
""")
|
||||
|
||||
# Add foreign key constraint
|
||||
await conn.execute("""
|
||||
ALTER TABLE click_logs
|
||||
ADD CONSTRAINT fk_click_logs_short_urls
|
||||
FOREIGN KEY (short_code)
|
||||
REFERENCES short_urls(short_code)
|
||||
ON DELETE CASCADE;
|
||||
""")
|
||||
|
||||
print("Foreign key constraint added successfully.")
|
||||
|
||||
async def main():
|
||||
config = await load_config()
|
||||
source_server = config['POOL'][0] # sij-mbp16
|
||||
|
||||
conn_params = {
|
||||
'database': source_server['db_name'],
|
||||
'user': source_server['db_user'],
|
||||
'password': source_server['db_pass'],
|
||||
'host': source_server['ts_ip'],
|
||||
'port': source_server['db_port']
|
||||
}
|
||||
|
||||
conn = await asyncpg.connect(**conn_params)
|
||||
|
||||
try:
|
||||
await add_foreign_key_constraint(conn)
|
||||
except Exception as e:
|
||||
print(f"An error occurred: {str(e)}")
|
||||
finally:
|
||||
await conn.close()
|
||||
|
||||
if __name__ == "__main__":
|
||||
asyncio.run(main())
|
89
sijapi/helpers/db_get_schema.py
Normal file
89
sijapi/helpers/db_get_schema.py
Normal file
|
@ -0,0 +1,89 @@
|
|||
import psycopg2
|
||||
from psycopg2 import sql
|
||||
|
||||
def connect_to_db():
|
||||
return psycopg2.connect(
|
||||
dbname='sij',
|
||||
user='sij',
|
||||
password='Synchr0!',
|
||||
host='localhost' # Adjust if your database is not on localhost
|
||||
)
|
||||
|
||||
def get_table_info(conn):
|
||||
with conn.cursor() as cur:
|
||||
# Get all tables in the public schema
|
||||
cur.execute("""
|
||||
SELECT table_name
|
||||
FROM information_schema.tables
|
||||
WHERE table_schema = 'public'
|
||||
""")
|
||||
tables = cur.fetchall()
|
||||
|
||||
table_info = {}
|
||||
for (table_name,) in tables:
|
||||
table_info[table_name] = {
|
||||
'primary_keys': get_primary_keys(cur, table_name),
|
||||
'foreign_keys': get_foreign_keys(cur, table_name)
|
||||
}
|
||||
|
||||
return table_info
|
||||
|
||||
def get_primary_keys(cur, table_name):
|
||||
cur.execute("""
|
||||
SELECT a.attname
|
||||
FROM pg_index i
|
||||
JOIN pg_attribute a ON a.attrelid = i.indrelid
|
||||
AND a.attnum = ANY(i.indkey)
|
||||
WHERE i.indrelid = %s::regclass
|
||||
AND i.indisprimary
|
||||
""", (table_name,))
|
||||
return [row[0] for row in cur.fetchall()]
|
||||
|
||||
def get_foreign_keys(cur, table_name):
|
||||
cur.execute("""
|
||||
SELECT
|
||||
tc.constraint_name,
|
||||
kcu.column_name,
|
||||
ccu.table_name AS foreign_table_name,
|
||||
ccu.column_name AS foreign_column_name
|
||||
FROM
|
||||
information_schema.table_constraints AS tc
|
||||
JOIN information_schema.key_column_usage AS kcu
|
||||
ON tc.constraint_name = kcu.constraint_name
|
||||
AND tc.table_schema = kcu.table_schema
|
||||
JOIN information_schema.constraint_column_usage AS ccu
|
||||
ON ccu.constraint_name = tc.constraint_name
|
||||
AND ccu.table_schema = tc.table_schema
|
||||
WHERE tc.constraint_type = 'FOREIGN KEY' AND tc.table_name=%s
|
||||
""", (table_name,))
|
||||
return cur.fetchall()
|
||||
|
||||
def main():
|
||||
try:
|
||||
with connect_to_db() as conn:
|
||||
table_info = get_table_info(conn)
|
||||
|
||||
for table_name, info in table_info.items():
|
||||
print(f"\n## Table: {table_name}")
|
||||
|
||||
print("\nPrimary Keys:")
|
||||
if info['primary_keys']:
|
||||
for pk in info['primary_keys']:
|
||||
print(f"- {pk}")
|
||||
else:
|
||||
print("- No primary keys found")
|
||||
|
||||
print("\nForeign Keys:")
|
||||
if info['foreign_keys']:
|
||||
for fk in info['foreign_keys']:
|
||||
print(f"- {fk[1]} -> {fk[2]}.{fk[3]} (Constraint: {fk[0]})")
|
||||
else:
|
||||
print("- No foreign keys found")
|
||||
|
||||
except psycopg2.Error as e:
|
||||
print(f"Database error: {e}")
|
||||
except Exception as e:
|
||||
print(f"An unexpected error occurred: {e}")
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
83
sijapi/helpers/db_repl.py
Normal file
83
sijapi/helpers/db_repl.py
Normal file
|
@ -0,0 +1,83 @@
|
|||
import yaml
|
||||
import subprocess
|
||||
import os
|
||||
import sys
|
||||
|
||||
def load_config():
|
||||
with open('../config/sys.yaml', 'r') as file:
|
||||
return yaml.safe_load(file)
|
||||
|
||||
def run_command(command):
|
||||
process = subprocess.Popen(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=True)
|
||||
stdout, stderr = process.communicate()
|
||||
return process.returncode, stdout.decode(), stderr.decode()
|
||||
|
||||
def pg_dump(host, port, db_name, user, password, tables):
|
||||
dump_command = f"PGPASSWORD={password} pg_dump -h {host} -p {port} -U {user} -d {db_name} -t {' -t '.join(tables)} -c --no-owner"
|
||||
return run_command(dump_command)
|
||||
|
||||
def pg_restore(host, port, db_name, user, password, dump_data):
|
||||
restore_command = f"PGPASSWORD={password} psql -h {host} -p {port} -U {user} -d {db_name}"
|
||||
process = subprocess.Popen(restore_command, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=True)
|
||||
stdout, stderr = process.communicate(input=dump_data.encode())
|
||||
return process.returncode, stdout.decode(), stderr.decode()
|
||||
|
||||
def check_postgres_version(host, port, user, password):
|
||||
version_command = f"PGPASSWORD={password} psql -h {host} -p {port} -U {user} -c 'SELECT version();'"
|
||||
returncode, stdout, stderr = run_command(version_command)
|
||||
if returncode == 0:
|
||||
return stdout.strip()
|
||||
else:
|
||||
return f"Error checking version: {stderr}"
|
||||
|
||||
def replicate_databases():
|
||||
config = load_config()
|
||||
pool = config['POOL']
|
||||
tables_to_replicate = ['click_logs', 'dailyweather', 'hourlyweather', 'locations', 'short_urls']
|
||||
|
||||
source_db = pool[0]
|
||||
target_dbs = pool[1:]
|
||||
|
||||
# Check source database version
|
||||
source_version = check_postgres_version(source_db['ts_ip'], source_db['db_port'], source_db['db_user'], source_db['db_pass'])
|
||||
print(f"Source database version: {source_version}")
|
||||
|
||||
for target_db in target_dbs:
|
||||
print(f"\nReplicating to {target_db['ts_id']}...")
|
||||
|
||||
# Check target database version
|
||||
target_version = check_postgres_version(target_db['ts_ip'], target_db['db_port'], target_db['db_user'], target_db['db_pass'])
|
||||
print(f"Target database version: {target_version}")
|
||||
|
||||
# Perform dump
|
||||
returncode, dump_data, stderr = pg_dump(
|
||||
source_db['ts_ip'],
|
||||
source_db['db_port'],
|
||||
source_db['db_name'],
|
||||
source_db['db_user'],
|
||||
source_db['db_pass'],
|
||||
tables_to_replicate
|
||||
)
|
||||
|
||||
if returncode != 0:
|
||||
print(f"Error during dump: {stderr}")
|
||||
continue
|
||||
|
||||
# Perform restore
|
||||
returncode, stdout, stderr = pg_restore(
|
||||
target_db['ts_ip'],
|
||||
target_db['db_port'],
|
||||
target_db['db_name'],
|
||||
target_db['db_user'],
|
||||
target_db['db_pass'],
|
||||
dump_data
|
||||
)
|
||||
|
||||
if returncode != 0:
|
||||
print(f"Error during restore: {stderr}")
|
||||
else:
|
||||
print(f"Replication to {target_db['ts_id']} completed successfully.")
|
||||
|
||||
if __name__ == "__main__":
|
||||
replicate_databases()
|
||||
|
76
sijapi/helpers/db_replicator.py
Executable file
76
sijapi/helpers/db_replicator.py
Executable file
|
@ -0,0 +1,76 @@
|
|||
#!/usr/bin/env python3
|
||||
|
||||
import os
|
||||
import yaml
|
||||
import subprocess
|
||||
|
||||
def load_config():
|
||||
script_dir = os.path.dirname(os.path.abspath(__file__))
|
||||
sys_config_path = os.path.join(script_dir, '..', 'config', 'sys.yaml')
|
||||
gis_config_path = os.path.join(script_dir, '..', 'config', 'gis.yaml')
|
||||
|
||||
with open(sys_config_path, 'r') as f:
|
||||
sys_config = yaml.safe_load(f)
|
||||
|
||||
with open(gis_config_path, 'r') as f:
|
||||
gis_config = yaml.safe_load(f)
|
||||
|
||||
return sys_config, gis_config
|
||||
|
||||
def replicate_table(source, targets, table_name):
|
||||
print(f"Replicating {table_name}")
|
||||
|
||||
# Dump the table from the source
|
||||
dump_command = [
|
||||
'pg_dump',
|
||||
'-h', source['ts_ip'],
|
||||
'-p', str(source['db_port']),
|
||||
'-U', source['db_user'],
|
||||
'-d', source['db_name'],
|
||||
'-t', table_name,
|
||||
'--no-owner',
|
||||
'--no-acl'
|
||||
]
|
||||
|
||||
env = os.environ.copy()
|
||||
env['PGPASSWORD'] = source['db_pass']
|
||||
|
||||
with open(f"{table_name}.sql", 'w') as f:
|
||||
subprocess.run(dump_command, env=env, stdout=f, check=True)
|
||||
|
||||
# Restore the table to each target
|
||||
for target in targets:
|
||||
print(f"Replicating to {target['ts_id']}")
|
||||
restore_command = [
|
||||
'psql',
|
||||
'-h', target['ts_ip'],
|
||||
'-p', str(target['db_port']),
|
||||
'-U', target['db_user'],
|
||||
'-d', target['db_name'],
|
||||
'-c', f"DROP TABLE IF EXISTS {table_name} CASCADE;",
|
||||
'-f', f"{table_name}.sql"
|
||||
]
|
||||
|
||||
env = os.environ.copy()
|
||||
env['PGPASSWORD'] = target['db_pass']
|
||||
|
||||
subprocess.run(restore_command, env=env, check=True)
|
||||
|
||||
# Clean up the dump file
|
||||
os.remove(f"{table_name}.sql")
|
||||
|
||||
def main():
|
||||
sys_config, gis_config = load_config()
|
||||
|
||||
source_server = sys_config['POOL'][0]
|
||||
target_servers = sys_config['POOL'][1:]
|
||||
|
||||
tables = [layer['table_name'] for layer in gis_config['layers']]
|
||||
|
||||
for table in tables:
|
||||
replicate_table(source_server, target_servers, table)
|
||||
|
||||
print("Replication complete!")
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
42510
sijapi/helpers/locations.sql
Normal file
42510
sijapi/helpers/locations.sql
Normal file
File diff suppressed because it is too large
Load diff
|
@ -1,8 +1,12 @@
|
|||
import asyncio
|
||||
from pathlib import Path
|
||||
from sijapi import L, EMAIL_CONFIG, EMAIL_LOGS
|
||||
from sijapi.classes import EmailAccount
|
||||
from sijapi import EMAIL_CONFIG, EMAIL_LOGS
|
||||
from sijapi.utilities import EmailAccount
|
||||
from sijapi.routers import email
|
||||
from sijapi.logs import get_logger
|
||||
|
||||
l = get_logger(__name__)
|
||||
|
||||
|
||||
async def initialize_log_files():
|
||||
summarized_log = EMAIL_LOGS / "summarized.txt"
|
||||
|
@ -11,13 +15,13 @@ async def initialize_log_files():
|
|||
for log_file in [summarized_log, autoresponded_log, diagnostic_log]:
|
||||
log_file.parent.mkdir(parents=True, exist_ok=True)
|
||||
log_file.write_text("")
|
||||
L.DEBUG(f"Log files initialized: {summarized_log}, {autoresponded_log}, {diagnostic_log}")
|
||||
l.debug(f"Log files initialized: {summarized_log}, {autoresponded_log}, {diagnostic_log}")
|
||||
return summarized_log, autoresponded_log, diagnostic_log
|
||||
|
||||
async def process_all_emails(account: EmailAccount, summarized_log: Path, autoresponded_log: Path, diagnostic_log: Path):
|
||||
try:
|
||||
with email.get_imap_connection(account) as inbox:
|
||||
L.DEBUG(f"Connected to {account.name}, processing all emails...")
|
||||
l.debug(f"Connected to {account.name}, processing all emails...")
|
||||
all_messages = inbox.messages()
|
||||
unread_messages = set(uid for uid, _ in inbox.messages(unread=True))
|
||||
|
||||
|
@ -41,15 +45,15 @@ async def process_all_emails(account: EmailAccount, summarized_log: Path, autore
|
|||
with open(log_file, 'a') as f:
|
||||
f.write(f"{id_str}\n")
|
||||
|
||||
L.INFO(f"Processed {processed_count} non-unread emails for account {account.name}")
|
||||
l.info(f"Processed {processed_count} non-unread emails for account {account.name}")
|
||||
except Exception as e:
|
||||
L.logger.error(f"An error occurred while processing emails for account {account.name}: {e}")
|
||||
l.logger.error(f"An error occurred while processing emails for account {account.name}: {e}")
|
||||
|
||||
async def main():
|
||||
email_accounts = email.load_email_accounts(EMAIL_CONFIG)
|
||||
summarized_log, autoresponded_log, diagnostic_log = await initialize_log_files()
|
||||
|
||||
L.DEBUG(f"Processing {len(email_accounts)} email accounts")
|
||||
l.debug(f"Processing {len(email_accounts)} email accounts")
|
||||
|
||||
tasks = [process_all_emails(account, summarized_log, autoresponded_log, diagnostic_log) for account in email_accounts]
|
||||
await asyncio.gather(*tasks)
|
||||
|
@ -57,7 +61,7 @@ async def main():
|
|||
# Final verification
|
||||
with open(summarized_log, 'r') as f:
|
||||
final_count = len(f.readlines())
|
||||
L.INFO(f"Final non-unread email count: {final_count}")
|
||||
l.info(f"Final non-unread email count: {final_count}")
|
||||
|
||||
if __name__ == "__main__":
|
||||
asyncio.run(main())
|
191
sijapi/helpers/migrate_db_to_uuid.py
Normal file
191
sijapi/helpers/migrate_db_to_uuid.py
Normal file
|
@ -0,0 +1,191 @@
|
|||
import psycopg2
|
||||
from psycopg2 import sql
|
||||
import sys
|
||||
|
||||
def connect_to_db():
|
||||
return psycopg2.connect(
|
||||
dbname='sij',
|
||||
user='sij',
|
||||
password='Synchr0!',
|
||||
host='localhost'
|
||||
)
|
||||
|
||||
def get_tables(cur):
|
||||
cur.execute("""
|
||||
SELECT table_name
|
||||
FROM information_schema.tables
|
||||
WHERE table_schema = 'public' AND table_type = 'BASE TABLE'
|
||||
AND table_name NOT LIKE '%_uuid' AND table_name NOT LIKE '%_orig'
|
||||
AND table_name != 'spatial_ref_sys'
|
||||
""")
|
||||
return [row[0] for row in cur.fetchall()]
|
||||
|
||||
def get_columns(cur, table_name):
|
||||
cur.execute("""
|
||||
SELECT column_name, udt_name,
|
||||
is_nullable, column_default,
|
||||
character_maximum_length, numeric_precision, numeric_scale
|
||||
FROM information_schema.columns
|
||||
WHERE table_name = %s
|
||||
ORDER BY ordinal_position
|
||||
""", (table_name,))
|
||||
return cur.fetchall()
|
||||
|
||||
def get_constraints(cur, table_name):
|
||||
cur.execute("""
|
||||
SELECT conname, contype, pg_get_constraintdef(c.oid)
|
||||
FROM pg_constraint c
|
||||
JOIN pg_namespace n ON n.oid = c.connamespace
|
||||
WHERE conrelid = %s::regclass
|
||||
AND n.nspname = 'public'
|
||||
""", (table_name,))
|
||||
return cur.fetchall()
|
||||
|
||||
def drop_table_if_exists(cur, table_name):
|
||||
cur.execute(sql.SQL("DROP TABLE IF EXISTS {} CASCADE").format(sql.Identifier(table_name)))
|
||||
|
||||
def create_uuid_table(cur, old_table, new_table):
|
||||
drop_table_if_exists(cur, new_table)
|
||||
columns = get_columns(cur, old_table)
|
||||
constraints = get_constraints(cur, old_table)
|
||||
|
||||
column_defs = []
|
||||
has_id_column = any(col[0] == 'id' for col in columns)
|
||||
|
||||
for col in columns:
|
||||
col_name, udt_name, is_nullable, default, max_length, precision, scale = col
|
||||
if col_name == 'id' and has_id_column:
|
||||
column_defs.append(sql.SQL("{} UUID PRIMARY KEY DEFAULT gen_random_uuid()").format(sql.Identifier(col_name)))
|
||||
else:
|
||||
type_sql = sql.SQL("{}").format(sql.Identifier(udt_name))
|
||||
if max_length:
|
||||
type_sql = sql.SQL("{}({})").format(type_sql, sql.Literal(max_length))
|
||||
elif precision and scale:
|
||||
type_sql = sql.SQL("{}({},{})").format(type_sql, sql.Literal(precision), sql.Literal(scale))
|
||||
|
||||
column_def = sql.SQL("{} {}").format(sql.Identifier(col_name), type_sql)
|
||||
if is_nullable == 'NO':
|
||||
column_def = sql.SQL("{} NOT NULL").format(column_def)
|
||||
if default and 'nextval' not in default: # Skip auto-increment defaults
|
||||
column_def = sql.SQL("{} DEFAULT {}").format(column_def, sql.SQL(default))
|
||||
column_defs.append(column_def)
|
||||
|
||||
constraint_defs = []
|
||||
for constraint in constraints:
|
||||
conname, contype, condef = constraint
|
||||
if contype != 'p' or not has_id_column: # Keep primary key if there's no id column
|
||||
constraint_defs.append(sql.SQL(condef))
|
||||
|
||||
if not has_id_column:
|
||||
column_defs.append(sql.SQL("uuid UUID DEFAULT gen_random_uuid()"))
|
||||
|
||||
query = sql.SQL("CREATE TABLE {} ({})").format(
|
||||
sql.Identifier(new_table),
|
||||
sql.SQL(", ").join(column_defs + constraint_defs)
|
||||
)
|
||||
cur.execute(query)
|
||||
|
||||
def migrate_data(cur, old_table, new_table):
|
||||
columns = get_columns(cur, old_table)
|
||||
column_names = [col[0] for col in columns]
|
||||
has_id_column = 'id' in column_names
|
||||
|
||||
if has_id_column:
|
||||
column_names.remove('id')
|
||||
old_cols = sql.SQL(", ").join(map(sql.Identifier, column_names))
|
||||
new_cols = sql.SQL(", ").join(map(sql.Identifier, ['id'] + column_names))
|
||||
query = sql.SQL("INSERT INTO {} ({}) SELECT gen_random_uuid(), {} FROM {}").format(
|
||||
sql.Identifier(new_table),
|
||||
new_cols,
|
||||
old_cols,
|
||||
sql.Identifier(old_table)
|
||||
)
|
||||
else:
|
||||
old_cols = sql.SQL(", ").join(map(sql.Identifier, column_names))
|
||||
new_cols = sql.SQL(", ").join(map(sql.Identifier, column_names + ['uuid']))
|
||||
query = sql.SQL("INSERT INTO {} ({}) SELECT {}, gen_random_uuid() FROM {}").format(
|
||||
sql.Identifier(new_table),
|
||||
new_cols,
|
||||
old_cols,
|
||||
sql.Identifier(old_table)
|
||||
)
|
||||
cur.execute(query)
|
||||
|
||||
def update_foreign_keys(cur, tables):
|
||||
for table in tables:
|
||||
constraints = get_constraints(cur, table)
|
||||
for constraint in constraints:
|
||||
conname, contype, condef = constraint
|
||||
if contype == 'f': # Foreign key constraint
|
||||
referenced_table = condef.split('REFERENCES ')[1].split('(')[0].strip()
|
||||
referenced_column = condef.split('(')[2].split(')')[0].strip()
|
||||
local_column = condef.split('(')[1].split(')')[0].strip()
|
||||
|
||||
cur.execute(sql.SQL("""
|
||||
UPDATE {table_uuid}
|
||||
SET {local_column} = subquery.new_id::text::{local_column_type}
|
||||
FROM (
|
||||
SELECT old.{ref_column} AS old_id, new_table.id AS new_id
|
||||
FROM {ref_table} old
|
||||
JOIN public.{ref_table_uuid} new_table ON new_table.{ref_column}::text = old.{ref_column}::text
|
||||
) AS subquery
|
||||
WHERE {local_column}::text = subquery.old_id::text
|
||||
""").format(
|
||||
table_uuid=sql.Identifier(f"{table}_uuid"),
|
||||
local_column=sql.Identifier(local_column),
|
||||
local_column_type=sql.SQL(get_column_type(cur, f"{table}_uuid", local_column)),
|
||||
ref_column=sql.Identifier(referenced_column),
|
||||
ref_table=sql.Identifier(referenced_table),
|
||||
ref_table_uuid=sql.Identifier(f"{referenced_table}_uuid")
|
||||
))
|
||||
|
||||
def get_column_type(cur, table_name, column_name):
|
||||
cur.execute("""
|
||||
SELECT data_type
|
||||
FROM information_schema.columns
|
||||
WHERE table_name = %s AND column_name = %s
|
||||
""", (table_name, column_name))
|
||||
return cur.fetchone()[0]
|
||||
|
||||
def rename_tables(cur, tables):
|
||||
for table in tables:
|
||||
drop_table_if_exists(cur, f"{table}_orig")
|
||||
cur.execute(sql.SQL("ALTER TABLE IF EXISTS {} RENAME TO {}").format(
|
||||
sql.Identifier(table), sql.Identifier(f"{table}_orig")
|
||||
))
|
||||
cur.execute(sql.SQL("ALTER TABLE IF EXISTS {} RENAME TO {}").format(
|
||||
sql.Identifier(f"{table}_uuid"), sql.Identifier(table)
|
||||
))
|
||||
|
||||
def main():
|
||||
try:
|
||||
with connect_to_db() as conn:
|
||||
with conn.cursor() as cur:
|
||||
tables = get_tables(cur)
|
||||
|
||||
# Create new UUID tables
|
||||
for table in tables:
|
||||
print(f"Creating UUID table for {table}...")
|
||||
create_uuid_table(cur, table, f"{table}_uuid")
|
||||
|
||||
# Migrate data
|
||||
for table in tables:
|
||||
print(f"Migrating data for {table}...")
|
||||
migrate_data(cur, table, f"{table}_uuid")
|
||||
|
||||
# Update foreign keys
|
||||
print("Updating foreign key references...")
|
||||
update_foreign_keys(cur, tables)
|
||||
|
||||
# Rename tables
|
||||
print("Renaming tables...")
|
||||
rename_tables(cur, tables)
|
||||
|
||||
conn.commit()
|
||||
print("Migration completed successfully.")
|
||||
except Exception as e:
|
||||
print(f"An error occurred: {e}")
|
||||
conn.rollback()
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
366
sijapi/helpers/plss.py
Normal file
366
sijapi/helpers/plss.py
Normal file
|
@ -0,0 +1,366 @@
|
|||
#!/usr/bin/env python3
|
||||
|
||||
import requests
|
||||
import json
|
||||
import time
|
||||
import os
|
||||
import subprocess
|
||||
import sys
|
||||
import yaml
|
||||
from requests.adapters import HTTPAdapter
|
||||
from urllib3.util.retry import Retry
|
||||
import argparse
|
||||
import psycopg2
|
||||
from psycopg2.extras import execute_values
|
||||
|
||||
def load_config():
|
||||
script_dir = os.path.dirname(os.path.abspath(__file__))
|
||||
sys_config_path = os.path.join(script_dir, '..', 'config', 'sys.yaml')
|
||||
gis_config_path = os.path.join(script_dir, '..', 'config', 'gis.yaml')
|
||||
|
||||
with open(sys_config_path, 'r') as f:
|
||||
sys_config = yaml.safe_load(f)
|
||||
|
||||
with open(gis_config_path, 'r') as f:
|
||||
gis_config = yaml.safe_load(f)
|
||||
|
||||
return sys_config, gis_config
|
||||
|
||||
def get_db_config(sys_config):
|
||||
pool = sys_config.get('POOL', [])
|
||||
if pool:
|
||||
db_config = pool[0]
|
||||
return {
|
||||
'DB_NAME': db_config.get('db_name'),
|
||||
'DB_USER': db_config.get('db_user'),
|
||||
'DB_PASSWORD': db_config.get('db_pass'),
|
||||
'DB_HOST': db_config.get('ts_ip'),
|
||||
'DB_PORT': str(db_config.get('db_port'))
|
||||
}
|
||||
return {}
|
||||
|
||||
def get_feature_count(url):
|
||||
params = {
|
||||
'where': '1=1',
|
||||
'returnCountOnly': 'true',
|
||||
'f': 'json'
|
||||
}
|
||||
retries = Retry(total=10, backoff_factor=0.5, status_forcelist=[500, 502, 503, 504])
|
||||
with requests.Session() as session:
|
||||
session.mount("https://", HTTPAdapter(max_retries=retries))
|
||||
response = session.get(url, params=params, timeout=30)
|
||||
response.raise_for_status()
|
||||
data = response.json()
|
||||
return data.get('count', 0)
|
||||
|
||||
def fetch_features(url, offset, num, max_retries=5):
|
||||
params = {
|
||||
'where': '1=1',
|
||||
'outFields': '*',
|
||||
'geometryPrecision': 6,
|
||||
'outSR': 4326,
|
||||
'f': 'json',
|
||||
'resultOffset': offset,
|
||||
'resultRecordCount': num,
|
||||
'orderByFields': 'OBJECTID'
|
||||
}
|
||||
for attempt in range(max_retries):
|
||||
try:
|
||||
retries = Retry(total=5, backoff_factor=1, status_forcelist=[500, 502, 503, 504])
|
||||
with requests.Session() as session:
|
||||
session.mount("https://", HTTPAdapter(max_retries=retries))
|
||||
response = session.get(url, params=params, timeout=30)
|
||||
response.raise_for_status()
|
||||
return response.json()
|
||||
except requests.exceptions.RequestException as e:
|
||||
print(f"Error fetching features (attempt {attempt + 1}/{max_retries}): {e}")
|
||||
if attempt == max_retries - 1:
|
||||
raise
|
||||
time.sleep(5 * (attempt + 1)) # Exponential backoff
|
||||
|
||||
|
||||
def create_table(db_config, table_name, gis_config):
|
||||
conn = psycopg2.connect(
|
||||
dbname=db_config['DB_NAME'],
|
||||
user=db_config['DB_USER'],
|
||||
password=db_config['DB_PASSWORD'],
|
||||
host=db_config['DB_HOST'],
|
||||
port=db_config['DB_PORT']
|
||||
)
|
||||
try:
|
||||
with conn.cursor() as cur:
|
||||
# Check if the table already exists
|
||||
cur.execute(f"SELECT to_regclass('{table_name}')")
|
||||
if cur.fetchone()[0] is None:
|
||||
# If the table doesn't exist, create it based on the first feature
|
||||
url = next(layer['url'] for layer in gis_config['layers'] if layer['table_name'] == table_name)
|
||||
first_feature = fetch_features(url, 0, 1)['features'][0]
|
||||
columns = []
|
||||
for attr, value in first_feature['attributes'].items():
|
||||
column_name = attr.lower().replace('.', '_').replace('()', '')
|
||||
if isinstance(value, int):
|
||||
columns.append(f'"{column_name}" INTEGER')
|
||||
elif isinstance(value, float):
|
||||
columns.append(f'"{column_name}" DOUBLE PRECISION')
|
||||
else:
|
||||
columns.append(f'"{column_name}" TEXT')
|
||||
|
||||
create_sql = f"""
|
||||
CREATE TABLE {table_name} (
|
||||
id SERIAL PRIMARY KEY,
|
||||
geom GEOMETRY(Polygon, 4326),
|
||||
{', '.join(columns)}
|
||||
)
|
||||
"""
|
||||
cur.execute(create_sql)
|
||||
|
||||
# Create index on plssid
|
||||
cur.execute(f'CREATE INDEX idx_{table_name.split(".")[-1]}_plssid ON {table_name}("plssid")')
|
||||
|
||||
print(f"Created table: {table_name}")
|
||||
else:
|
||||
print(f"Table {table_name} already exists")
|
||||
conn.commit()
|
||||
except psycopg2.Error as e:
|
||||
print(f"Error creating table {table_name}: {e}")
|
||||
finally:
|
||||
conn.close()
|
||||
|
||||
|
||||
def insert_features_to_db(features, table_name, db_config):
|
||||
conn = psycopg2.connect(
|
||||
dbname=db_config['DB_NAME'],
|
||||
user=db_config['DB_USER'],
|
||||
password=db_config['DB_PASSWORD'],
|
||||
host=db_config['DB_HOST'],
|
||||
port=db_config['DB_PORT']
|
||||
)
|
||||
try:
|
||||
with conn.cursor() as cur:
|
||||
# Get the column names from the table
|
||||
cur.execute(f"SELECT column_name FROM information_schema.columns WHERE table_name = '{table_name.split('.')[-1]}'")
|
||||
db_columns = [row[0] for row in cur.fetchall() if row[0] != 'id']
|
||||
|
||||
# Prepare the SQL statement
|
||||
sql = f"""
|
||||
INSERT INTO {table_name} ({', '.join([f'"{col}"' for col in db_columns])})
|
||||
VALUES %s
|
||||
"""
|
||||
|
||||
# Prepare the template for execute_values
|
||||
template = f"({', '.join(['%s' for _ in db_columns])})"
|
||||
|
||||
values = []
|
||||
for feature in features:
|
||||
geom = feature.get('geometry')
|
||||
attrs = feature.get('attributes')
|
||||
if geom and attrs:
|
||||
rings = geom['rings'][0]
|
||||
wkt = f"POLYGON(({','.join([f'{x} {y}' for x, y in rings])}))"
|
||||
|
||||
row = []
|
||||
for col in db_columns:
|
||||
if col == 'geom':
|
||||
row.append(wkt)
|
||||
else:
|
||||
# Map database column names back to original attribute names
|
||||
attr_name = col.upper()
|
||||
if attr_name == 'SHAPE_STAREA':
|
||||
attr_name = 'Shape.STArea()'
|
||||
elif attr_name == 'SHAPE_STLENGTH':
|
||||
attr_name = 'Shape.STLength()'
|
||||
row.append(attrs.get(attr_name))
|
||||
|
||||
values.append(tuple(row))
|
||||
else:
|
||||
print(f"Skipping invalid feature: {feature}")
|
||||
|
||||
if values:
|
||||
execute_values(cur, sql, values, template=template, page_size=100)
|
||||
print(f"Inserted {len(values)} features")
|
||||
else:
|
||||
print("No valid features to insert")
|
||||
conn.commit()
|
||||
except Exception as e:
|
||||
print(f"Error inserting features: {e}")
|
||||
print(f"First feature for debugging: {features[0] if features else 'No features'}")
|
||||
conn.rollback()
|
||||
finally:
|
||||
conn.close()
|
||||
|
||||
|
||||
|
||||
def download_and_import_layer(layer_config, db_config, gis_config, force_refresh):
|
||||
url = layer_config['url']
|
||||
layer_name = layer_config['layer_name']
|
||||
table_name = layer_config['table_name']
|
||||
batch_size = layer_config['batch_size']
|
||||
delay = layer_config['delay'] / 1000 # Convert to seconds
|
||||
|
||||
total_count = get_feature_count(url)
|
||||
print(f"Total {layer_name} features: {total_count}")
|
||||
|
||||
# Check existing records in the database
|
||||
existing_count = get_existing_record_count(db_config, table_name)
|
||||
|
||||
if existing_count == total_count and not force_refresh:
|
||||
print(f"Table {table_name} already contains all {total_count} features. Skipping.")
|
||||
return
|
||||
|
||||
if force_refresh:
|
||||
delete_existing_table(db_config, table_name)
|
||||
create_table(db_config, table_name, gis_config)
|
||||
existing_count = 0
|
||||
elif existing_count == 0:
|
||||
create_table(db_config, table_name, gis_config)
|
||||
|
||||
offset = existing_count
|
||||
|
||||
start_time = time.time()
|
||||
try:
|
||||
while offset < total_count:
|
||||
batch_start_time = time.time()
|
||||
print(f"Fetching {layer_name} features {offset} to {offset + batch_size}...")
|
||||
try:
|
||||
data = fetch_features(url, offset, batch_size)
|
||||
new_features = data.get('features', [])
|
||||
if not new_features:
|
||||
break
|
||||
|
||||
insert_features_to_db(new_features, table_name, db_config)
|
||||
offset += len(new_features)
|
||||
|
||||
batch_end_time = time.time()
|
||||
batch_duration = batch_end_time - batch_start_time
|
||||
print(f"Batch processed in {batch_duration:.2f} seconds")
|
||||
|
||||
# Progress indicator
|
||||
progress = offset / total_count
|
||||
bar_length = 30
|
||||
filled_length = int(bar_length * progress)
|
||||
bar = '=' * filled_length + '-' * (bar_length - filled_length)
|
||||
print(f'\rProgress: [{bar}] {progress:.1%} ({offset}/{total_count} features)', end='', flush=True)
|
||||
|
||||
time.sleep(delay)
|
||||
except Exception as e:
|
||||
print(f"\nError processing batch starting at offset {offset}: {e}")
|
||||
print("Continuing with next batch...")
|
||||
offset += batch_size
|
||||
|
||||
end_time = time.time()
|
||||
total_duration = end_time - start_time
|
||||
print(f"\nTotal {layer_name} features fetched and imported: {offset}")
|
||||
print(f"Total time: {total_duration:.2f} seconds")
|
||||
|
||||
except Exception as e:
|
||||
print(f"\nError during download and import: {e}")
|
||||
print(f"Last successful offset: {offset}")
|
||||
|
||||
def get_existing_record_count(db_config, table_name):
|
||||
conn = psycopg2.connect(
|
||||
dbname=db_config['DB_NAME'],
|
||||
user=db_config['DB_USER'],
|
||||
password=db_config['DB_PASSWORD'],
|
||||
host=db_config['DB_HOST'],
|
||||
port=db_config['DB_PORT']
|
||||
)
|
||||
try:
|
||||
with conn.cursor() as cur:
|
||||
cur.execute(f"SELECT COUNT(*) FROM {table_name}")
|
||||
count = cur.fetchone()[0]
|
||||
return count
|
||||
except psycopg2.Error:
|
||||
return 0
|
||||
finally:
|
||||
conn.close()
|
||||
|
||||
def delete_existing_table(db_config, table_name):
|
||||
conn = psycopg2.connect(
|
||||
dbname=db_config['DB_NAME'],
|
||||
user=db_config['DB_USER'],
|
||||
password=db_config['DB_PASSWORD'],
|
||||
host=db_config['DB_HOST'],
|
||||
port=db_config['DB_PORT']
|
||||
)
|
||||
try:
|
||||
with conn.cursor() as cur:
|
||||
# Drop the index if it exists
|
||||
cur.execute(f"DROP INDEX IF EXISTS idx_{table_name.split('.')[-1]}_plssid")
|
||||
|
||||
# Then drop the table
|
||||
cur.execute(f"DROP TABLE IF EXISTS {table_name} CASCADE")
|
||||
conn.commit()
|
||||
print(f"Deleted existing table and index: {table_name}")
|
||||
except psycopg2.Error as e:
|
||||
print(f"Error deleting table {table_name}: {e}")
|
||||
finally:
|
||||
conn.close()
|
||||
|
||||
|
||||
def check_postgres_connection(db_config):
|
||||
try:
|
||||
subprocess.run(['psql',
|
||||
'-h', db_config['DB_HOST'],
|
||||
'-p', db_config['DB_PORT'],
|
||||
'-U', db_config['DB_USER'],
|
||||
'-d', db_config['DB_NAME'],
|
||||
'-c', 'SELECT 1;'],
|
||||
check=True, capture_output=True, text=True)
|
||||
return True
|
||||
except subprocess.CalledProcessError:
|
||||
return False
|
||||
|
||||
def check_postgis_extension(db_config):
|
||||
try:
|
||||
result = subprocess.run(['psql',
|
||||
'-h', db_config['DB_HOST'],
|
||||
'-p', db_config['DB_PORT'],
|
||||
'-U', db_config['DB_USER'],
|
||||
'-d', db_config['DB_NAME'],
|
||||
'-c', "SELECT 1 FROM pg_extension WHERE extname = 'postgis';"],
|
||||
check=True, capture_output=True, text=True)
|
||||
return '1' in result.stdout
|
||||
except subprocess.CalledProcessError:
|
||||
return False
|
||||
|
||||
def create_postgis_extension(db_config):
|
||||
try:
|
||||
subprocess.run(['psql',
|
||||
'-h', db_config['DB_HOST'],
|
||||
'-p', db_config['DB_PORT'],
|
||||
'-U', db_config['DB_USER'],
|
||||
'-d', db_config['DB_NAME'],
|
||||
'-c', "CREATE EXTENSION IF NOT EXISTS postgis;"],
|
||||
check=True, capture_output=True, text=True)
|
||||
print("PostGIS extension created successfully.")
|
||||
except subprocess.CalledProcessError as e:
|
||||
print(f"Error creating PostGIS extension: {e}")
|
||||
sys.exit(1)
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(description="Download and import PLSS data")
|
||||
parser.add_argument("--force-refresh", nargs='*', help="Force refresh of specified layers or all if none specified")
|
||||
args = parser.parse_args()
|
||||
|
||||
sys_config, gis_config = load_config()
|
||||
db_config = get_db_config(sys_config)
|
||||
|
||||
if not check_postgres_connection(db_config):
|
||||
print("Error: Unable to connect to PostgreSQL. Please check your connection settings.")
|
||||
sys.exit(1)
|
||||
|
||||
if not check_postgis_extension(db_config):
|
||||
print("PostGIS extension not found. Attempting to create it...")
|
||||
create_postgis_extension(db_config)
|
||||
|
||||
try:
|
||||
for layer in gis_config['layers']:
|
||||
if args.force_refresh is None or not args.force_refresh or layer['layer_name'] in args.force_refresh:
|
||||
download_and_import_layer(layer, db_config, gis_config, bool(args.force_refresh))
|
||||
except requests.exceptions.RequestException as e:
|
||||
print(f"Error fetching data: {e}")
|
||||
except Exception as e:
|
||||
print(f"An unexpected error occurred: {e}")
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
101
sijapi/helpers/repair_weather_db.py
Normal file
101
sijapi/helpers/repair_weather_db.py
Normal file
|
@ -0,0 +1,101 @@
|
|||
import asyncio
|
||||
import asyncpg
|
||||
import yaml
|
||||
from pathlib import Path
|
||||
import subprocess
|
||||
|
||||
async def load_config():
|
||||
config_path = Path(__file__).parent.parent / 'config' / 'db.yaml'
|
||||
with open(config_path, 'r') as file:
|
||||
return yaml.safe_load(file)
|
||||
|
||||
async def get_table_size(conn, table_name):
|
||||
return await conn.fetchval(f"SELECT COUNT(*) FROM {table_name}")
|
||||
|
||||
async def check_postgres_version(conn):
|
||||
return await conn.fetchval("SELECT version()")
|
||||
|
||||
async def replicate_table(source, target, table_name):
|
||||
print(f"Replicating {table_name} from {source['ts_id']} to {target['ts_id']}")
|
||||
|
||||
source_conn = await asyncpg.connect(**{k: source[k] for k in ['db_name', 'db_user', 'db_pass', 'ts_ip', 'db_port']})
|
||||
target_conn = await asyncpg.connect(**{k: target[k] for k in ['db_name', 'db_user', 'db_pass', 'ts_ip', 'db_port']})
|
||||
|
||||
try:
|
||||
source_version = await check_postgres_version(source_conn)
|
||||
target_version = await check_postgres_version(target_conn)
|
||||
print(f"Source database version: {source_version}")
|
||||
print(f"Target database version: {target_version}")
|
||||
|
||||
table_size = await get_table_size(source_conn, table_name)
|
||||
print(f"Table size: {table_size} rows")
|
||||
|
||||
# Dump the table
|
||||
dump_command = [
|
||||
'pg_dump',
|
||||
'-h', source['ts_ip'],
|
||||
'-p', str(source['db_port']),
|
||||
'-U', source['db_user'],
|
||||
'-d', source['db_name'],
|
||||
'-t', table_name,
|
||||
'--no-owner',
|
||||
'--no-acl'
|
||||
]
|
||||
env = {'PGPASSWORD': source['db_pass']}
|
||||
dump_result = subprocess.run(dump_command, env=env, capture_output=True, text=True)
|
||||
|
||||
if dump_result.returncode != 0:
|
||||
raise Exception(f"Dump failed: {dump_result.stderr}")
|
||||
|
||||
print("Dump completed successfully")
|
||||
|
||||
# Drop and recreate the table on the target
|
||||
await target_conn.execute(f"DROP TABLE IF EXISTS {table_name} CASCADE")
|
||||
print(f"Dropped table {table_name} on target")
|
||||
|
||||
# Restore the table
|
||||
restore_command = [
|
||||
'psql',
|
||||
'-h', target['ts_ip'],
|
||||
'-p', str(target['db_port']),
|
||||
'-U', target['db_user'],
|
||||
'-d', target['db_name'],
|
||||
]
|
||||
env = {'PGPASSWORD': target['db_pass']}
|
||||
restore_result = subprocess.run(restore_command, input=dump_result.stdout, env=env, capture_output=True, text=True)
|
||||
|
||||
if restore_result.returncode != 0:
|
||||
raise Exception(f"Restore failed: {restore_result.stderr}")
|
||||
|
||||
print(f"Table {table_name} restored successfully")
|
||||
|
||||
# Verify the number of rows in the target table
|
||||
target_size = await get_table_size(target_conn, table_name)
|
||||
if target_size == table_size:
|
||||
print(f"Replication successful. {target_size} rows copied.")
|
||||
else:
|
||||
print(f"Warning: Source had {table_size} rows, but target has {target_size} rows.")
|
||||
|
||||
except Exception as e:
|
||||
print(f"An error occurred while replicating {table_name}: {str(e)}")
|
||||
finally:
|
||||
await source_conn.close()
|
||||
await target_conn.close()
|
||||
|
||||
async def main():
|
||||
config = await load_config()
|
||||
source_server = config['POOL'][0] # sij-mbp16
|
||||
target_servers = config['POOL'][1:] # sij-vm and sij-vps
|
||||
|
||||
tables_to_replicate = [
|
||||
'click_logs', 'dailyweather', 'hourlyweather', 'locations', 'short_urls'
|
||||
]
|
||||
|
||||
for table_name in tables_to_replicate:
|
||||
for target_server in target_servers:
|
||||
await replicate_table(source_server, target_server, table_name)
|
||||
|
||||
print("All replications completed!")
|
||||
|
||||
if __name__ == "__main__":
|
||||
asyncio.run(main())
|
132
sijapi/helpers/repl.py
Normal file
132
sijapi/helpers/repl.py
Normal file
|
@ -0,0 +1,132 @@
|
|||
#!/usr/bin/env python3
|
||||
|
||||
import os
|
||||
import yaml
|
||||
import subprocess
|
||||
import time
|
||||
from tqdm import tqdm
|
||||
|
||||
def load_config():
|
||||
script_dir = os.path.dirname(os.path.abspath(__file__))
|
||||
sys_config_path = os.path.join(script_dir, '..', 'config', 'sys.yaml')
|
||||
gis_config_path = os.path.join(script_dir, '..', 'config', 'gis.yaml')
|
||||
|
||||
with open(sys_config_path, 'r') as f:
|
||||
sys_config = yaml.safe_load(f)
|
||||
|
||||
with open(gis_config_path, 'r') as f:
|
||||
gis_config = yaml.safe_load(f)
|
||||
|
||||
return sys_config, gis_config
|
||||
|
||||
def get_table_size(server, table_name):
|
||||
env = os.environ.copy()
|
||||
env['PGPASSWORD'] = server['db_pass']
|
||||
|
||||
command = [
|
||||
'psql',
|
||||
'-h', server['ts_ip'],
|
||||
'-p', str(server['db_port']),
|
||||
'-U', server['db_user'],
|
||||
'-d', server['db_name'],
|
||||
'-t',
|
||||
'-c', f"SELECT COUNT(*) FROM {table_name}"
|
||||
]
|
||||
|
||||
result = subprocess.run(command, env=env, capture_output=True, text=True, check=True)
|
||||
return int(result.stdout.strip())
|
||||
|
||||
def replicate_table(source, targets, table_name):
|
||||
print(f"Replicating {table_name}")
|
||||
|
||||
# Get table size for progress bar
|
||||
table_size = get_table_size(source, table_name)
|
||||
print(f"Table size: {table_size} rows")
|
||||
|
||||
# Dump the table from the source
|
||||
dump_command = [
|
||||
'pg_dump',
|
||||
'-h', source['ts_ip'],
|
||||
'-p', str(source['db_port']),
|
||||
'-U', source['db_user'],
|
||||
'-d', source['db_name'],
|
||||
'-t', table_name,
|
||||
'--no-owner',
|
||||
'--no-acl'
|
||||
]
|
||||
|
||||
env = os.environ.copy()
|
||||
env['PGPASSWORD'] = source['db_pass']
|
||||
|
||||
print("Dumping table...")
|
||||
with open(f"{table_name}.sql", 'w') as f:
|
||||
subprocess.run(dump_command, env=env, stdout=f, check=True)
|
||||
print("Dump complete")
|
||||
|
||||
# Restore the table to each target
|
||||
for target in targets:
|
||||
print(f"Replicating to {target['ts_id']}")
|
||||
|
||||
# Drop table and its sequence
|
||||
drop_commands = [
|
||||
f"DROP TABLE IF EXISTS {table_name} CASCADE;",
|
||||
f"DROP SEQUENCE IF EXISTS {table_name}_id_seq CASCADE;"
|
||||
]
|
||||
|
||||
restore_command = [
|
||||
'psql',
|
||||
'-h', target['ts_ip'],
|
||||
'-p', str(target['db_port']),
|
||||
'-U', target['db_user'],
|
||||
'-d', target['db_name'],
|
||||
]
|
||||
|
||||
env = os.environ.copy()
|
||||
env['PGPASSWORD'] = target['db_pass']
|
||||
|
||||
# Execute drop commands
|
||||
for cmd in drop_commands:
|
||||
print(f"Executing: {cmd}")
|
||||
subprocess.run(restore_command + ['-c', cmd], env=env, check=True)
|
||||
|
||||
# Restore the table
|
||||
print("Restoring table...")
|
||||
process = subprocess.Popen(restore_command + ['-f', f"{table_name}.sql"], env=env,
|
||||
stdout=subprocess.PIPE, stderr=subprocess.PIPE, universal_newlines=True)
|
||||
|
||||
pbar = tqdm(total=table_size, desc="Copying rows")
|
||||
copied_rows = 0
|
||||
for line in process.stderr:
|
||||
if line.startswith("COPY"):
|
||||
copied_rows = int(line.split()[1])
|
||||
pbar.update(copied_rows - pbar.n)
|
||||
print(line, end='') # Print all output for visibility
|
||||
|
||||
pbar.close()
|
||||
process.wait()
|
||||
|
||||
if process.returncode != 0:
|
||||
print(f"Error occurred during restoration to {target['ts_id']}")
|
||||
print(process.stderr.read())
|
||||
else:
|
||||
print(f"Restoration to {target['ts_id']} completed successfully")
|
||||
|
||||
# Clean up the dump file
|
||||
os.remove(f"{table_name}.sql")
|
||||
print(f"Replication of {table_name} completed")
|
||||
|
||||
def main():
|
||||
sys_config, gis_config = load_config()
|
||||
|
||||
source_server = sys_config['POOL'][0]
|
||||
target_servers = sys_config['POOL'][1:]
|
||||
|
||||
tables = [layer['table_name'] for layer in gis_config['layers']]
|
||||
|
||||
for table in tables:
|
||||
replicate_table(source_server, target_servers, table)
|
||||
|
||||
print("All replications completed!")
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
90
sijapi/helpers/repl.sh
Executable file
90
sijapi/helpers/repl.sh
Executable file
|
@ -0,0 +1,90 @@
|
|||
#!/bin/bash
|
||||
|
||||
# Configuration
|
||||
SOURCE_HOST="100.64.64.20"
|
||||
SOURCE_PORT="5432"
|
||||
SOURCE_DB="sij"
|
||||
SOURCE_USER="sij"
|
||||
SOURCE_PASS="Synchr0!"
|
||||
|
||||
# Target servers
|
||||
declare -a TARGETS=(
|
||||
"sij-vm:100.64.64.11:5432:sij:sij:Synchr0!"
|
||||
"sij-vps:100.64.64.15:5432:sij:sij:Synchr0!"
|
||||
)
|
||||
|
||||
# Tables to replicate
|
||||
TABLES=("dailyweather" "hourlyweather" "short_urls" "click_logs" "locations")
|
||||
|
||||
# PostgreSQL binaries
|
||||
PSQL="/Applications/Postgres.app/Contents/Versions/latest/bin/psql"
|
||||
PG_DUMP="/Applications/Postgres.app/Contents/Versions/latest/bin/pg_dump"
|
||||
|
||||
# Function to run SQL and display results
|
||||
run_sql() {
|
||||
local host=$1
|
||||
local port=$2
|
||||
local db=$3
|
||||
local user=$4
|
||||
local pass=$5
|
||||
local sql=$6
|
||||
|
||||
PGPASSWORD=$pass $PSQL -h $host -p $port -U $user -d $db -c "$sql"
|
||||
}
|
||||
|
||||
# Replicate to a target
|
||||
replicate_to_target() {
|
||||
local target_info=$1
|
||||
IFS=':' read -r target_name target_host target_port target_db target_user target_pass <<< "$target_info"
|
||||
|
||||
echo "Replicating to $target_name ($target_host)"
|
||||
|
||||
# Check source tables
|
||||
echo "Checking source tables:"
|
||||
for table in "${TABLES[@]}"; do
|
||||
run_sql $SOURCE_HOST $SOURCE_PORT $SOURCE_DB $SOURCE_USER $SOURCE_PASS "SELECT COUNT(*) FROM $table;"
|
||||
done
|
||||
|
||||
# Dump and restore each table
|
||||
for table in "${TABLES[@]}"; do
|
||||
echo "Replicating $table"
|
||||
|
||||
# Dump table
|
||||
PGPASSWORD=$SOURCE_PASS $PG_DUMP -h $SOURCE_HOST -p $SOURCE_PORT -U $SOURCE_USER -d $SOURCE_DB -t $table --no-owner --no-acl > ${table}_dump.sql
|
||||
|
||||
if [ $? -ne 0 ]; then
|
||||
echo "Error dumping $table"
|
||||
continue
|
||||
fi
|
||||
|
||||
# Drop and recreate table on target
|
||||
run_sql $target_host $target_port $target_db $target_user $target_pass "DROP TABLE IF EXISTS $table CASCADE; "
|
||||
|
||||
# Restore table
|
||||
PGPASSWORD=$target_pass $PSQL -h $target_host -p $target_port -U $target_user -d $target_db -f ${table}_dump.sql
|
||||
|
||||
if [ $? -ne 0 ]; then
|
||||
echo "Error restoring $table"
|
||||
else
|
||||
echo "$table replicated successfully"
|
||||
fi
|
||||
|
||||
# Clean up dump file
|
||||
rm ${table}_dump.sql
|
||||
done
|
||||
|
||||
# Verify replication
|
||||
echo "Verifying replication:"
|
||||
for table in "${TABLES[@]}"; do
|
||||
echo "Checking $table on target:"
|
||||
run_sql $target_host $target_port $target_db $target_user $target_pass "SELECT COUNT(*) FROM $table;"
|
||||
done
|
||||
}
|
||||
|
||||
# Main replication process
|
||||
for target in "${TARGETS[@]}"; do
|
||||
replicate_to_target "$target"
|
||||
done
|
||||
|
||||
echo "Replication completed"
|
||||
|
125
sijapi/helpers/replicator.py
Normal file
125
sijapi/helpers/replicator.py
Normal file
|
@ -0,0 +1,125 @@
|
|||
import asyncio
|
||||
import asyncpg
|
||||
import yaml
|
||||
from pathlib import Path
|
||||
import subprocess
|
||||
import sys
|
||||
import os
|
||||
|
||||
async def load_config():
|
||||
config_path = Path(__file__).parent.parent / 'config' / 'db.yaml'
|
||||
with open(config_path, 'r') as file:
|
||||
return yaml.safe_load(file)
|
||||
|
||||
async def check_table_existence(conn, tables):
|
||||
for table in tables:
|
||||
exists = await conn.fetchval(f"""
|
||||
SELECT EXISTS (
|
||||
SELECT FROM information_schema.tables
|
||||
WHERE table_schema = 'public'
|
||||
AND table_name = $1
|
||||
)
|
||||
""", table)
|
||||
print(f"Table {table} {'exists' if exists else 'does not exist'} in the database.")
|
||||
|
||||
async def check_user_permissions(conn, tables):
|
||||
for table in tables:
|
||||
has_permission = await conn.fetchval(f"""
|
||||
SELECT has_table_privilege(current_user, $1, 'SELECT')
|
||||
""", table)
|
||||
print(f"User {'has' if has_permission else 'does not have'} SELECT permission on table {table}.")
|
||||
|
||||
async def replicate_tables(source, target, tables):
|
||||
print(f"Replicating tables from {source['ts_id']} to {target['ts_id']}")
|
||||
|
||||
conn_params = {
|
||||
'database': 'db_name',
|
||||
'user': 'db_user',
|
||||
'password': 'db_pass',
|
||||
'host': 'ts_ip',
|
||||
'port': 'db_port'
|
||||
}
|
||||
|
||||
source_conn = await asyncpg.connect(**{k: source[v] for k, v in conn_params.items()})
|
||||
target_conn = await asyncpg.connect(**{k: target[v] for k, v in conn_params.items()})
|
||||
|
||||
try:
|
||||
source_version = await source_conn.fetchval("SELECT version()")
|
||||
target_version = await target_conn.fetchval("SELECT version()")
|
||||
print(f"Source database version: {source_version}")
|
||||
print(f"Target database version: {target_version}")
|
||||
|
||||
print("Checking table existence in source database:")
|
||||
await check_table_existence(source_conn, tables)
|
||||
|
||||
print("\nChecking user permissions in source database:")
|
||||
await check_user_permissions(source_conn, tables)
|
||||
|
||||
# Dump all tables to a file
|
||||
dump_file = 'dump.sql'
|
||||
dump_command = [
|
||||
'/Applications/Postgres.app/Contents/Versions/latest/bin/pg_dump',
|
||||
'-h', source['ts_ip'],
|
||||
'-p', str(source['db_port']),
|
||||
'-U', source['db_user'],
|
||||
'-d', source['db_name'],
|
||||
'-t', ' -t '.join(tables),
|
||||
'--no-owner',
|
||||
'--no-acl',
|
||||
'-f', dump_file
|
||||
]
|
||||
env = {'PGPASSWORD': source['db_pass']}
|
||||
print(f"\nExecuting dump command: {' '.join(dump_command)}")
|
||||
dump_result = subprocess.run(dump_command, env=env, capture_output=True, text=True)
|
||||
|
||||
if dump_result.returncode != 0:
|
||||
print(f"Dump stderr: {dump_result.stderr}")
|
||||
raise Exception(f"Dump failed: {dump_result.stderr}")
|
||||
|
||||
print("Dump completed successfully.")
|
||||
|
||||
# Restore from the dump file
|
||||
restore_command = [
|
||||
'/Applications/Postgres.app/Contents/Versions/latest/bin/psql',
|
||||
'-h', target['ts_ip'],
|
||||
'-p', str(target['db_port']),
|
||||
'-U', target['db_user'],
|
||||
'-d', target['db_name'],
|
||||
'-f', dump_file
|
||||
]
|
||||
env = {'PGPASSWORD': target['db_pass']}
|
||||
print(f"\nExecuting restore command: {' '.join(restore_command)}")
|
||||
restore_result = subprocess.run(restore_command, env=env, capture_output=True, text=True)
|
||||
|
||||
if restore_result.returncode != 0:
|
||||
print(f"Restore stderr: {restore_result.stderr}")
|
||||
raise Exception(f"Restore failed: {restore_result.stderr}")
|
||||
|
||||
print("Restore completed successfully.")
|
||||
|
||||
# Clean up the dump file
|
||||
os.remove(dump_file)
|
||||
|
||||
except Exception as e:
|
||||
print(f"An error occurred during replication: {str(e)}")
|
||||
print("Exception details:", sys.exc_info())
|
||||
finally:
|
||||
await source_conn.close()
|
||||
await target_conn.close()
|
||||
|
||||
async def main():
|
||||
config = await load_config()
|
||||
source_server = config['POOL'][0] # sij-mbp16
|
||||
target_servers = config['POOL'][1:] # sij-vm and sij-vps
|
||||
|
||||
tables_to_replicate = [
|
||||
'dailyweather', 'hourlyweather', 'short_urls', 'click_logs', 'locations'
|
||||
]
|
||||
|
||||
for target_server in target_servers:
|
||||
await replicate_tables(source_server, target_server, tables_to_replicate)
|
||||
|
||||
print("All replications completed!")
|
||||
|
||||
if __name__ == "__main__":
|
||||
asyncio.run(main())
|
1103
sijapi/helpers/schema_info.yaml
Normal file
1103
sijapi/helpers/schema_info.yaml
Normal file
File diff suppressed because it is too large
Load diff
|
@ -12,7 +12,7 @@ import sys
|
|||
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
|
||||
|
||||
def load_config():
|
||||
config_path = Path(__file__).parent.parent / 'config' / 'api.yaml'
|
||||
config_path = Path(__file__).parent.parent / 'config' / 'sys.yaml'
|
||||
with open(config_path, 'r') as file:
|
||||
return yaml.safe_load(file)
|
||||
|
||||
|
|
110
sijapi/logs.py
Normal file
110
sijapi/logs.py
Normal file
|
@ -0,0 +1,110 @@
|
|||
# logs.py
|
||||
import os
|
||||
import sys
|
||||
from pathlib import Path
|
||||
import yaml
|
||||
from loguru import logger as loguru_logger
|
||||
from typing import Union, Optional
|
||||
|
||||
class LogLevels:
|
||||
def __init__(self):
|
||||
self.levels = {}
|
||||
self.default_level = "INFO"
|
||||
self.HOME = Path.home()
|
||||
|
||||
def init(self, yaml_path: Union[str, Path]):
|
||||
yaml_path = self._resolve_path(yaml_path, 'config')
|
||||
|
||||
try:
|
||||
with yaml_path.open('r') as file:
|
||||
config_data = yaml.safe_load(file)
|
||||
|
||||
logs_config = config_data.get('LOGS', {})
|
||||
self.default_level = logs_config.get('default', "INFO")
|
||||
self.levels = {k: v for k, v in logs_config.items() if k != 'default'}
|
||||
|
||||
loguru_logger.info(f"Loaded log levels configuration from {yaml_path}")
|
||||
except Exception as e:
|
||||
loguru_logger.error(f"Error loading log levels configuration: {str(e)}")
|
||||
raise
|
||||
|
||||
def _resolve_path(self, path: Union[str, Path], default_dir: str) -> Path:
|
||||
base_path = Path(__file__).parent.parent
|
||||
path = Path(path)
|
||||
if not path.suffix:
|
||||
path = base_path / 'sijapi' / default_dir / f"{path.name}.yaml"
|
||||
elif not path.is_absolute():
|
||||
path = base_path / path
|
||||
return path
|
||||
|
||||
def set_level(self, module, level):
|
||||
self.levels[module] = level
|
||||
|
||||
def set_default_level(self, level):
|
||||
self.default_level = level
|
||||
|
||||
def get_level(self, module):
|
||||
return self.levels.get(module, self.default_level)
|
||||
|
||||
|
||||
class Logger:
|
||||
def __init__(self, name):
|
||||
self.name = name
|
||||
self.logger = loguru_logger
|
||||
self.debug_modules = set()
|
||||
self.log_levels = LogLevels()
|
||||
self.logs_dir = None
|
||||
|
||||
def init(self, yaml_path: Union[str, Path], logs_dir: Path):
|
||||
self.log_levels.init(yaml_path)
|
||||
self.logs_dir = logs_dir
|
||||
os.makedirs(self.logs_dir, exist_ok=True)
|
||||
|
||||
# Set up initial logging configuration
|
||||
self.logger.remove()
|
||||
log_format = "{time:YYYY-MM-DD HH:mm:ss} - {name} - <level>{level: <8}</level> - <level>{message}</level>"
|
||||
self.logger.add(self.logs_dir / 'app.log', rotation="2 MB", level="DEBUG", format=log_format)
|
||||
self.logger.add(sys.stdout, level="DEBUG", format=log_format, colorize=True,
|
||||
filter=self._level_filter)
|
||||
|
||||
def setup_from_args(self, args):
|
||||
if not self.logs_dir:
|
||||
raise ValueError("Logger not initialized. Call init() before setup_from_args().")
|
||||
|
||||
# Update log levels based on command line arguments
|
||||
for module in args.debug:
|
||||
self.log_levels.set_level(module, "DEBUG")
|
||||
if hasattr(args, 'info'):
|
||||
for module in args.info:
|
||||
self.log_levels.set_level(module, "INFO")
|
||||
if args.log:
|
||||
self.log_levels.set_default_level(args.log.upper())
|
||||
|
||||
# Set debug modules
|
||||
self.debug_modules = set(args.debug)
|
||||
|
||||
# Custom color and style mappings
|
||||
self.logger.level("CRITICAL", color="<yellow><bold><MAGENTA>")
|
||||
self.logger.level("ERROR", color="<red><bold>")
|
||||
self.logger.level("WARNING", color="<yellow><bold>")
|
||||
self.logger.level("DEBUG", color="<green><bold>")
|
||||
|
||||
self.logger.info(f"Debug modules: {self.debug_modules}")
|
||||
self.logger.info(f"Log levels: {self.log_levels.levels}")
|
||||
self.logger.info(f"Default log level: {self.log_levels.default_level}")
|
||||
|
||||
def _level_filter(self, record):
|
||||
module_level = self.log_levels.get_level(record["name"])
|
||||
return record["level"].no >= self.logger.level(module_level).no
|
||||
|
||||
def get_logger(self, module_name):
|
||||
level = self.log_levels.get_level(module_name)
|
||||
self.logger.debug(f"Creating logger for {module_name} with level {level}")
|
||||
return self.logger.bind(name=module_name)
|
||||
|
||||
# Global logger instance
|
||||
L = Logger("Central")
|
||||
|
||||
# Function to get module-specific logger
|
||||
def get_logger(module_name):
|
||||
return L.get_logger(module_name)
|
|
@ -29,18 +29,14 @@ from requests.adapters import HTTPAdapter
|
|||
from urllib3.util.retry import Retry
|
||||
from datetime import datetime as dt_datetime
|
||||
from better_profanity import profanity
|
||||
from sijapi.logs import get_logger
|
||||
from sijapi.utilities import html_to_markdown, sanitize_filename, assemble_journal_path, assemble_archive_path, contains_profanity, is_ad_or_tracker, initialize_adblock_rules, contains_blacklisted_word
|
||||
from sijapi import L, API, Archivist, BLOCKLISTS_DIR, OBSIDIAN_VAULT_DIR, OBSIDIAN_RESOURCES_DIR
|
||||
from sijapi import Sys, Archivist, BLOCKLISTS_DIR, OBSIDIAN_VAULT_DIR, OBSIDIAN_RESOURCES_DIR
|
||||
from sijapi.logs import get_logger
|
||||
l = get_logger(__name__)
|
||||
|
||||
archivist = APIRouter()
|
||||
|
||||
logger = L.get_module_logger("news")
|
||||
def debug(text: str): logger.debug(text)
|
||||
def info(text: str): logger.info(text)
|
||||
def warn(text: str): logger.warning(text)
|
||||
def err(text: str): logger.error(text)
|
||||
def crit(text: str): logger.critical(text)
|
||||
|
||||
adblock_rules = initialize_adblock_rules(BLOCKLISTS_DIR)
|
||||
|
||||
@archivist.post("/archive")
|
||||
|
@ -51,11 +47,11 @@ async def archive_post(
|
|||
encoding: str = Form('utf-8')
|
||||
):
|
||||
if not url:
|
||||
warn(f"No URL provided to /archive endpoint.")
|
||||
l.warning(f"No URL provided to /archive endpoint.")
|
||||
raise HTTPException(status_code=400, detail="URL is required")
|
||||
|
||||
if is_ad_or_tracker(url, adblock_rules):
|
||||
debug(f"Skipping likely ad or tracker URL: {url}")
|
||||
l.debug(f"Skipping likely ad or tracker URL: {url}")
|
||||
raise HTTPException(status_code=400, detail="URL is likely an ad or tracker")
|
||||
|
||||
markdown_filename = await process_archive(url, title, encoding, source)
|
||||
|
@ -70,7 +66,7 @@ async def process_archive(
|
|||
|
||||
# Check URL against blacklist
|
||||
if contains_blacklisted_word(url, Archivist.blacklist):
|
||||
info(f"Not archiving {url} due to blacklisted word in URL")
|
||||
l.info(f"Not archiving {url} due to blacklisted word in URL")
|
||||
return None
|
||||
|
||||
timestamp = dt_datetime.now().strftime('%b %d, %Y at %H:%M')
|
||||
|
@ -82,13 +78,13 @@ async def process_archive(
|
|||
|
||||
# Check content for profanity
|
||||
if contains_profanity(content, threshold=0.01, custom_words=Archivist.blacklist):
|
||||
info(f"Not archiving {url} due to profanity in content")
|
||||
l.info(f"Not archiving {url} due to profanity in content")
|
||||
return None
|
||||
|
||||
try:
|
||||
markdown_path, relative_path = assemble_archive_path(filename=readable_title, extension=".md")
|
||||
except Exception as e:
|
||||
warn(f"Failed to assemble archive path for {url}: {str(e)}")
|
||||
l.warning(f"Failed to assemble archive path for {url}: {str(e)}")
|
||||
return None
|
||||
|
||||
markdown_content = f"---\n"
|
||||
|
@ -105,8 +101,8 @@ async def process_archive(
|
|||
markdown_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
with open(markdown_path, 'w', encoding=encoding) as md_file:
|
||||
md_file.write(markdown_content)
|
||||
debug(f"Successfully saved to {markdown_path}")
|
||||
l.debug(f"Successfully saved to {markdown_path}")
|
||||
return markdown_path
|
||||
except Exception as e:
|
||||
warn(f"Failed to write markdown file: {str(e)}")
|
||||
l.warning(f"Failed to write markdown file: {str(e)}")
|
||||
return None
|
||||
|
|
|
@ -13,15 +13,10 @@ from fastapi import APIRouter, HTTPException, Form, UploadFile, File, Background
|
|||
from fastapi.responses import JSONResponse
|
||||
from pydantic import BaseModel, Field
|
||||
from typing import Optional
|
||||
from sijapi import L, ASR_DIR, WHISPER_CPP_MODELS, WHISPER_CPP_DIR, MAX_CPU_CORES
|
||||
|
||||
from sijapi import Sys, ASR_DIR, WHISPER_CPP_MODELS, WHISPER_CPP_DIR
|
||||
from sijapi.logs import get_logger
|
||||
l = get_logger(__name__)
|
||||
asr = APIRouter()
|
||||
logger = L.get_module_logger("asr")
|
||||
def debug(text: str): logger.debug(text)
|
||||
def info(text: str): logger.info(text)
|
||||
def warn(text: str): logger.warning(text)
|
||||
def err(text: str): logger.error(text)
|
||||
def crit(text: str): logger.critical(text)
|
||||
|
||||
transcription_results = {}
|
||||
class TranscribeParams(BaseModel):
|
||||
|
@ -84,13 +79,13 @@ async def transcribe_endpoint(
|
|||
|
||||
|
||||
async def transcribe_audio(file_path, params: TranscribeParams):
|
||||
debug(f"Transcribing audio file from {file_path}...")
|
||||
l.debug(f"Transcribing audio file from {file_path}...")
|
||||
file_path = await convert_to_wav(file_path)
|
||||
model = params.model if params.model in WHISPER_CPP_MODELS else 'small'
|
||||
model_path = WHISPER_CPP_DIR / 'models' / f'ggml-{model}.bin'
|
||||
command = [str(WHISPER_CPP_DIR / 'build' / 'bin' / 'main')]
|
||||
command.extend(['-m', str(model_path)])
|
||||
command.extend(['-t', str(max(1, min(params.threads or MAX_CPU_CORES, MAX_CPU_CORES)))])
|
||||
command.extend(['-t', str(max(1, min(params.threads or Sys.MAX_CPU_CORES, Sys.MAX_CPU_CORES)))])
|
||||
command.extend(['-np'])
|
||||
|
||||
if params.split_on_word:
|
||||
|
@ -121,11 +116,11 @@ async def transcribe_audio(file_path, params: TranscribeParams):
|
|||
command.extend(['--dtw', params.dtw])
|
||||
|
||||
command.extend(['-f', file_path])
|
||||
debug(f"Command: {command}")
|
||||
l.debug(f"Command: {command}")
|
||||
|
||||
# Create a unique ID for this transcription job
|
||||
job_id = str(uuid.uuid4())
|
||||
debug(f"Created job ID: {job_id}")
|
||||
l.debug(f"Created job ID: {job_id}")
|
||||
|
||||
# Store the job status
|
||||
transcription_results[job_id] = {"status": "processing", "result": None}
|
||||
|
@ -137,20 +132,20 @@ async def transcribe_audio(file_path, params: TranscribeParams):
|
|||
poll_interval = 10 # 10 seconds
|
||||
start_time = asyncio.get_event_loop().time()
|
||||
|
||||
debug(f"Starting to poll for job {job_id}")
|
||||
l.debug(f"Starting to poll for job {job_id}")
|
||||
try:
|
||||
while asyncio.get_event_loop().time() - start_time < max_wait_time:
|
||||
job_status = transcription_results.get(job_id, {})
|
||||
debug(f"Current status for job {job_id}: {job_status['status']}")
|
||||
l.debug(f"Current status for job {job_id}: {job_status['status']}")
|
||||
if job_status["status"] == "completed":
|
||||
info(f"Transcription completed for job {job_id}")
|
||||
l.info(f"Transcription completed for job {job_id}")
|
||||
return job_id # This is the only change
|
||||
elif job_status["status"] == "failed":
|
||||
err(f"Transcription failed for job {job_id}: {job_status.get('error', 'Unknown error')}")
|
||||
l.error(f"Transcription failed for job {job_id}: {job_status.get('error', 'Unknown error')}")
|
||||
raise Exception(f"Transcription failed: {job_status.get('error', 'Unknown error')}")
|
||||
await asyncio.sleep(poll_interval)
|
||||
|
||||
err(f"Transcription timed out for job {job_id}")
|
||||
l.error(f"Transcription timed out for job {job_id}")
|
||||
raise TimeoutError("Transcription timed out")
|
||||
finally:
|
||||
# Ensure the task is cancelled if we exit the loop
|
||||
|
@ -160,20 +155,20 @@ async def transcribe_audio(file_path, params: TranscribeParams):
|
|||
|
||||
async def process_transcription(command, file_path, job_id):
|
||||
try:
|
||||
debug(f"Starting transcription process for job {job_id}")
|
||||
l.debug(f"Starting transcription process for job {job_id}")
|
||||
result = await run_transcription(command, file_path)
|
||||
transcription_results[job_id] = {"status": "completed", "result": result}
|
||||
debug(f"Transcription completed for job {job_id}")
|
||||
l.debug(f"Transcription completed for job {job_id}")
|
||||
except Exception as e:
|
||||
err(f"Transcription failed for job {job_id}: {str(e)}")
|
||||
l.error(f"Transcription failed for job {job_id}: {str(e)}")
|
||||
transcription_results[job_id] = {"status": "failed", "error": str(e)}
|
||||
finally:
|
||||
# Clean up the temporary file
|
||||
os.remove(file_path)
|
||||
debug(f"Cleaned up temporary file for job {job_id}")
|
||||
l.debug(f"Cleaned up temporary file for job {job_id}")
|
||||
|
||||
async def run_transcription(command, file_path):
|
||||
debug(f"Running transcription command: {' '.join(command)}")
|
||||
l.debug(f"Running transcription command: {' '.join(command)}")
|
||||
proc = await asyncio.create_subprocess_exec(
|
||||
*command,
|
||||
stdout=asyncio.subprocess.PIPE,
|
||||
|
@ -182,9 +177,9 @@ async def run_transcription(command, file_path):
|
|||
stdout, stderr = await proc.communicate()
|
||||
if proc.returncode != 0:
|
||||
error_message = f"Error running command: {stderr.decode()}"
|
||||
err(error_message)
|
||||
l.error(error_message)
|
||||
raise Exception(error_message)
|
||||
debug("Transcription command completed successfully")
|
||||
l.debug("Transcription command completed successfully")
|
||||
return stdout.decode().strip()
|
||||
|
||||
async def convert_to_wav(file_path: str):
|
||||
|
|
|
@ -17,45 +17,42 @@ import threading
|
|||
from typing import Dict, List, Any
|
||||
from datetime import datetime, timedelta
|
||||
|
||||
from sijapi import L, ICAL_TOGGLE, ICALENDARS, MS365_TOGGLE, MS365_CLIENT_ID, MS365_SECRET, MS365_AUTHORITY_URL, MS365_SCOPE, MS365_REDIRECT_PATH, MS365_TOKEN_PATH
|
||||
from sijapi import ICAL_TOGGLE, ICALENDARS, MS365_TOGGLE, MS365_CLIENT_ID, MS365_SECRET, MS365_AUTHORITY_URL, MS365_SCOPE, MS365_REDIRECT_PATH, MS365_TOKEN_PATH
|
||||
from sijapi.routers import gis
|
||||
from sijapi.logs import get_logger
|
||||
l = get_logger(__name__)
|
||||
|
||||
cal = APIRouter()
|
||||
oauth2_scheme = OAuth2PasswordBearer(tokenUrl="/token")
|
||||
timeout = httpx.Timeout(12)
|
||||
logger = L.get_module_logger("cal")
|
||||
def debug(text: str): logger.debug(text)
|
||||
def info(text: str): logger.info(text)
|
||||
def warn(text: str): logger.warning(text)
|
||||
def err(text: str): logger.error(text)
|
||||
def crit(text: str): logger.critical(text)
|
||||
|
||||
|
||||
if MS365_TOGGLE is True:
|
||||
crit(f"Visit https://api.sij.ai/o365/login to obtain your Microsoft 365 authentication token.")
|
||||
l.critical(f"Visit https://api.sij.ai/o365/login to obtain your Microsoft 365 authentication token.")
|
||||
|
||||
@cal.get("/o365/login")
|
||||
async def login():
|
||||
debug(f"Received request to /o365/login")
|
||||
debug(f"SCOPE: {MS365_SCOPE}")
|
||||
l.debug(f"Received request to /o365/login")
|
||||
l.debug(f"SCOPE: {MS365_SCOPE}")
|
||||
if not MS365_SCOPE:
|
||||
err("No scopes defined for authorization.")
|
||||
l.error("No scopes defined for authorization.")
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
|
||||
detail="No scopes defined for authorization."
|
||||
)
|
||||
authorization_url = f"{MS365_AUTHORITY_URL}/oauth2/v2.0/authorize?client_id={MS365_CLIENT_ID}&response_type=code&redirect_uri={MS365_REDIRECT_PATH}&scope={'+'.join(MS365_SCOPE)}"
|
||||
info(f"Redirecting to authorization URL: {authorization_url}")
|
||||
l.info(f"Redirecting to authorization URL: {authorization_url}")
|
||||
return RedirectResponse(authorization_url)
|
||||
|
||||
@cal.get("/o365/oauth_redirect")
|
||||
async def oauth_redirect(code: str = None, error: str = None):
|
||||
debug(f"Received request to /o365/oauth_redirect")
|
||||
l.debug(f"Received request to /o365/oauth_redirect")
|
||||
if error:
|
||||
err(f"OAuth2 Error: {error}")
|
||||
l.error(f"OAuth2 Error: {error}")
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_400_BAD_REQUEST, detail="OAuth2 Error"
|
||||
)
|
||||
info(f"Requesting token with authorization code: {code}")
|
||||
l.info(f"Requesting token with authorization code: {code}")
|
||||
token_url = f"{MS365_AUTHORITY_URL}/oauth2/v2.0/token"
|
||||
data = {
|
||||
"client_id": MS365_CLIENT_ID,
|
||||
|
@ -66,15 +63,15 @@ if MS365_TOGGLE is True:
|
|||
}
|
||||
async with httpx.AsyncClient(timeout=timeout) as client:
|
||||
response = await client.post(token_url, data=data)
|
||||
debug(f"Token endpoint response status code: {response.status_code}")
|
||||
info(f"Token endpoint response text: {response.text}")
|
||||
l.debug(f"Token endpoint response status code: {response.status_code}")
|
||||
l.info(f"Token endpoint response text: {response.text}")
|
||||
result = response.json()
|
||||
if 'access_token' in result:
|
||||
await save_token(result)
|
||||
info("Access token obtained successfully")
|
||||
l.info("Access token obtained successfully")
|
||||
return {"message": "Access token stored successfully"}
|
||||
else:
|
||||
crit(f"Failed to obtain access token. Response: {result}")
|
||||
l.critical(f"Failed to obtain access token. Response: {result}")
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
|
||||
detail="Failed to obtain access token"
|
||||
|
@ -82,7 +79,7 @@ if MS365_TOGGLE is True:
|
|||
|
||||
@cal.get("/o365/me")
|
||||
async def read_items():
|
||||
debug(f"Received request to /o365/me")
|
||||
l.debug(f"Received request to /o365/me")
|
||||
token = await load_token()
|
||||
if not token:
|
||||
raise HTTPException(
|
||||
|
@ -95,10 +92,10 @@ if MS365_TOGGLE is True:
|
|||
response = await client.get(graph_url, headers=headers)
|
||||
if response.status_code == 200:
|
||||
user = response.json()
|
||||
info(f"User retrieved: {user}")
|
||||
l.info(f"User retrieved: {user}")
|
||||
return user
|
||||
else:
|
||||
err("Invalid or expired token")
|
||||
l.error("Invalid or expired token")
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_401_UNAUTHORIZED,
|
||||
detail="Invalid or expired token",
|
||||
|
@ -106,14 +103,14 @@ if MS365_TOGGLE is True:
|
|||
)
|
||||
|
||||
async def save_token(token):
|
||||
debug(f"Saving token: {token}")
|
||||
l.debug(f"Saving token: {token}")
|
||||
try:
|
||||
token["expires_at"] = int(time.time()) + token["expires_in"]
|
||||
with open(MS365_TOKEN_PATH, "w") as file:
|
||||
json.dump(token, file)
|
||||
debug(f"Saved token to {MS365_TOKEN_PATH}")
|
||||
l.debug(f"Saved token to {MS365_TOKEN_PATH}")
|
||||
except Exception as e:
|
||||
err(f"Failed to save token: {e}")
|
||||
l.error(f"Failed to save token: {e}")
|
||||
|
||||
async def load_token():
|
||||
if os.path.exists(MS365_TOKEN_PATH):
|
||||
|
@ -121,21 +118,21 @@ if MS365_TOGGLE is True:
|
|||
with open(MS365_TOKEN_PATH, "r") as file:
|
||||
token = json.load(file)
|
||||
except FileNotFoundError:
|
||||
err("Token file not found.")
|
||||
l.error("Token file not found.")
|
||||
return None
|
||||
except json.JSONDecodeError:
|
||||
err("Failed to decode token JSON")
|
||||
l.error("Failed to decode token JSON")
|
||||
return None
|
||||
|
||||
if token:
|
||||
token["expires_at"] = int(time.time()) + token["expires_in"]
|
||||
debug(f"Loaded token: {token}") # Add this line to log the loaded token
|
||||
l.debug(f"Loaded token: {token}") # Add this line to log the loaded token
|
||||
return token
|
||||
else:
|
||||
debug("No token found.")
|
||||
l.debug("No token found.")
|
||||
return None
|
||||
else:
|
||||
err(f"No file found at {MS365_TOKEN_PATH}")
|
||||
l.error(f"No file found at {MS365_TOKEN_PATH}")
|
||||
return None
|
||||
|
||||
|
||||
|
@ -165,39 +162,39 @@ if MS365_TOGGLE is True:
|
|||
response = await client.post(token_url, data=data)
|
||||
result = response.json()
|
||||
if "access_token" in result:
|
||||
info("Access token refreshed successfully")
|
||||
l.info("Access token refreshed successfully")
|
||||
return result
|
||||
else:
|
||||
err("Failed to refresh access token")
|
||||
l.error("Failed to refresh access token")
|
||||
return None
|
||||
|
||||
|
||||
async def refresh_token():
|
||||
token = await load_token()
|
||||
if not token:
|
||||
err("No token found in storage")
|
||||
l.error("No token found in storage")
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_401_UNAUTHORIZED,
|
||||
detail="No token found",
|
||||
)
|
||||
|
||||
if 'refresh_token' not in token:
|
||||
err("Refresh token not found in the loaded token")
|
||||
l.error("Refresh token not found in the loaded token")
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_401_UNAUTHORIZED,
|
||||
detail="Refresh token not found",
|
||||
)
|
||||
|
||||
refresh_token = token['refresh_token']
|
||||
debug("Found refresh token, attempting to refresh access token")
|
||||
l.debug("Found refresh token, attempting to refresh access token")
|
||||
|
||||
new_token = await get_new_token_with_refresh_token(refresh_token)
|
||||
|
||||
if new_token:
|
||||
await save_token(new_token)
|
||||
info("Token refreshed and saved successfully")
|
||||
l.info("Token refreshed and saved successfully")
|
||||
else:
|
||||
err("Failed to refresh token")
|
||||
l.error("Failed to refresh token")
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
|
||||
detail="Failed to refresh token",
|
||||
|
@ -218,7 +215,7 @@ if ICAL_TOGGLE is True:
|
|||
calendar_identifiers = {
|
||||
calendar.title() : calendar.calendarIdentifier() for calendar in all_calendars
|
||||
}
|
||||
debug(f"{calendar_identifiers}")
|
||||
l.debug(f"{calendar_identifiers}")
|
||||
return calendar_identifiers
|
||||
|
||||
def get_macos_calendar_events(start_date: datetime, end_date: datetime, calendar_ids: List[str] = None) -> List[Dict]:
|
||||
|
@ -230,7 +227,7 @@ if ICAL_TOGGLE is True:
|
|||
|
||||
def completion_handler(granted, error):
|
||||
if error is not None:
|
||||
err(f"Error: {error}")
|
||||
l.error(f"Error: {error}")
|
||||
access_granted.append(granted)
|
||||
with access_granted_condition:
|
||||
access_granted_condition.notify()
|
||||
|
@ -242,11 +239,11 @@ if ICAL_TOGGLE is True:
|
|||
if access_granted:
|
||||
return access_granted[0]
|
||||
else:
|
||||
err("Request access timed out or failed")
|
||||
l.error("Request access timed out or failed")
|
||||
return False
|
||||
|
||||
if not request_access():
|
||||
err("Access to calendar data was not granted")
|
||||
l.error("Access to calendar data was not granted")
|
||||
return []
|
||||
|
||||
ns_start_date = datetime_to_nsdate(start_date)
|
||||
|
@ -336,7 +333,7 @@ async def get_ms365_events(start_date: datetime, end_date: datetime):
|
|||
response = await client.get(graph_url, headers=headers)
|
||||
|
||||
if response.status_code != 200:
|
||||
err("Failed to retrieve events from Microsoft 365")
|
||||
l.error("Failed to retrieve events from Microsoft 365")
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
|
||||
detail="Failed to retrieve events",
|
||||
|
@ -352,33 +349,33 @@ async def parse_calendar_for_day(range_start: datetime, range_end: datetime, eve
|
|||
event_list = []
|
||||
|
||||
for event in events:
|
||||
info(f"Event: {event}")
|
||||
l.info(f"Event: {event}")
|
||||
start_str = event.get('start')
|
||||
end_str = event.get('end')
|
||||
|
||||
if isinstance(start_str, dict):
|
||||
start_str = start_str.get('dateTime')
|
||||
else:
|
||||
info(f"Start date string not a dict")
|
||||
l.info(f"Start date string not a dict")
|
||||
|
||||
if isinstance(end_str, dict):
|
||||
end_str = end_str.get('dateTime')
|
||||
else:
|
||||
info(f"End date string not a dict")
|
||||
l.info(f"End date string not a dict")
|
||||
|
||||
try:
|
||||
start_date = await gis.dt(start_str) if start_str else None
|
||||
except (ValueError, TypeError) as e:
|
||||
err(f"Invalid start date format: {start_str}, error: {e}")
|
||||
l.error(f"Invalid start date format: {start_str}, error: {e}")
|
||||
continue
|
||||
|
||||
try:
|
||||
end_date = await gis.dt(end_str) if end_str else None
|
||||
except (ValueError, TypeError) as e:
|
||||
err(f"Invalid end date format: {end_str}, error: {e}")
|
||||
l.error(f"Invalid end date format: {end_str}, error: {e}")
|
||||
continue
|
||||
|
||||
debug(f"Comparing {start_date} with range {range_start} to {range_end}")
|
||||
l.debug(f"Comparing {start_date} with range {range_start} to {range_end}")
|
||||
|
||||
if start_date:
|
||||
# Ensure start_date is timezone-aware
|
||||
|
@ -410,11 +407,11 @@ async def parse_calendar_for_day(range_start: datetime, range_end: datetime, eve
|
|||
"busy": event.get('showAs', '') in ['busy', 'tentative'],
|
||||
"all_day": event.get('isAllDay', False)
|
||||
}
|
||||
info(f"Event_data: {event_data}")
|
||||
l.info(f"Event_data: {event_data}")
|
||||
event_list.append(event_data)
|
||||
else:
|
||||
debug(f"Event outside of specified range: {start_date} to {end_date}")
|
||||
l.debug(f"Event outside of specified range: {start_date} to {end_date}")
|
||||
else:
|
||||
err(f"Invalid or missing start date for event: {event.get('id', 'Unknown ID')}")
|
||||
l.error(f"Invalid or missing start date for event: {event.get('id', 'Unknown ID')}")
|
||||
|
||||
return event_list
|
|
@ -7,19 +7,15 @@ from fastapi import APIRouter, HTTPException
|
|||
from pydantic import BaseModel
|
||||
from fastapi.responses import PlainTextResponse, JSONResponse
|
||||
from typing import Optional
|
||||
from sijapi import L, CF_TOKEN, CADDYFILE_PATH, CF_API_BASE_URL, CF_IP
|
||||
import httpx
|
||||
import asyncio
|
||||
from asyncio import sleep
|
||||
import os
|
||||
from sijapi import CF_TOKEN, CADDYFILE_PATH, CF_API_BASE_URL, CF_IP
|
||||
from sijapi.logs import get_logger
|
||||
l = get_logger(__name__)
|
||||
|
||||
cf = APIRouter()
|
||||
logger = L.get_module_logger("cal")
|
||||
def debug(text: str): logger.debug(text)
|
||||
def info(text: str): logger.info(text)
|
||||
def warn(text: str): logger.warning(text)
|
||||
def err(text: str): logger.error(text)
|
||||
def crit(text: str): logger.critical(text)
|
||||
|
||||
class DNSRecordRequest(BaseModel):
|
||||
full_domain: str
|
||||
|
@ -77,7 +73,7 @@ async def retry_request(url, headers, max_retries=5, backoff_factor=1):
|
|||
response.raise_for_status()
|
||||
return response
|
||||
except (httpx.HTTPError, httpx.ConnectTimeout) as e:
|
||||
err(f"Request failed: {e}. Retrying {retry + 1}/{max_retries}...")
|
||||
l.error(f"Request failed: {e}. Retrying {retry + 1}/{max_retries}...")
|
||||
await sleep(backoff_factor * (2 ** retry))
|
||||
raise HTTPException(status_code=500, detail="Max retries exceeded for Cloudflare API request")
|
||||
|
||||
|
|
|
@ -22,19 +22,15 @@ import ssl
|
|||
import yaml
|
||||
from typing import List, Dict, Optional, Set
|
||||
from datetime import datetime as dt_datetime
|
||||
from sijapi import L, Dir, EMAIL_CONFIG, EMAIL_LOGS
|
||||
from sijapi import Dir, Tts, EMAIL_CONFIG, EMAIL_LOGS
|
||||
from sijapi.routers import gis, img, tts, llm
|
||||
from sijapi.utilities import clean_text, assemble_journal_path, extract_text, prefix_lines
|
||||
from sijapi.classes import EmailAccount, IMAPConfig, SMTPConfig, IncomingEmail, EmailContact, AutoResponder
|
||||
from sijapi.logs import get_logger
|
||||
l = get_logger(__name__)
|
||||
|
||||
email = APIRouter()
|
||||
|
||||
logger = L.get_module_logger("email")
|
||||
def debug(text: str): logger.debug(text)
|
||||
def info(text: str): logger.info(text)
|
||||
def warn(text: str): logger.warning(text)
|
||||
def err(text: str): logger.error(text)
|
||||
def crit(text: str): logger.critical(text)
|
||||
|
||||
def load_email_accounts(yaml_path: str) -> List[EmailAccount]:
|
||||
with open(yaml_path, 'r') as file:
|
||||
|
@ -60,36 +56,36 @@ def get_smtp_connection(autoresponder: AutoResponder):
|
|||
|
||||
if smtp_config.encryption == 'SSL':
|
||||
try:
|
||||
debug(f"Attempting SSL connection to {smtp_config.host}:{smtp_config.port}")
|
||||
l.debug(f"Attempting SSL connection to {smtp_config.host}:{smtp_config.port}")
|
||||
return SMTP_SSL(smtp_config.host, smtp_config.port, context=context)
|
||||
except ssl.SSLError as e:
|
||||
err(f"SSL connection failed: {str(e)}")
|
||||
l.error(f"SSL connection failed: {str(e)}")
|
||||
# If SSL fails, try TLS
|
||||
try:
|
||||
debug(f"Attempting STARTTLS connection to {smtp_config.host}:{smtp_config.port}")
|
||||
l.debug(f"Attempting STARTTLS connection to {smtp_config.host}:{smtp_config.port}")
|
||||
smtp = SMTP(smtp_config.host, smtp_config.port)
|
||||
smtp.starttls(context=context)
|
||||
return smtp
|
||||
except Exception as e:
|
||||
err(f"STARTTLS connection failed: {str(e)}")
|
||||
l.error(f"STARTTLS connection failed: {str(e)}")
|
||||
raise
|
||||
|
||||
elif smtp_config.encryption == 'STARTTLS':
|
||||
try:
|
||||
debug(f"Attempting STARTTLS connection to {smtp_config.host}:{smtp_config.port}")
|
||||
l.debug(f"Attempting STARTTLS connection to {smtp_config.host}:{smtp_config.port}")
|
||||
smtp = SMTP(smtp_config.host, smtp_config.port)
|
||||
smtp.starttls(context=context)
|
||||
return smtp
|
||||
except Exception as e:
|
||||
err(f"STARTTLS connection failed: {str(e)}")
|
||||
l.error(f"STARTTLS connection failed: {str(e)}")
|
||||
raise
|
||||
|
||||
else:
|
||||
try:
|
||||
debug(f"Attempting unencrypted connection to {smtp_config.host}:{smtp_config.port}")
|
||||
l.debug(f"Attempting unencrypted connection to {smtp_config.host}:{smtp_config.port}")
|
||||
return SMTP(smtp_config.host, smtp_config.port)
|
||||
except Exception as e:
|
||||
err(f"Unencrypted connection failed: {str(e)}")
|
||||
l.error(f"Unencrypted connection failed: {str(e)}")
|
||||
raise
|
||||
|
||||
async def send_response(to_email: str, subject: str, body: str, profile: AutoResponder, image_attachment: Path = None) -> bool:
|
||||
|
@ -106,20 +102,20 @@ async def send_response(to_email: str, subject: str, body: str, profile: AutoRes
|
|||
img = MIMEImage(img_file.read(), name=os.path.basename(image_attachment))
|
||||
message.attach(img)
|
||||
|
||||
debug(f"Sending auto-response to {to_email} concerning {subject} from account {profile.name}...")
|
||||
l.debug(f"Sending auto-response to {to_email} concerning {subject} from account {profile.name}...")
|
||||
|
||||
server = get_smtp_connection(profile)
|
||||
debug(f"SMTP connection established: {type(server)}")
|
||||
l.debug(f"SMTP connection established: {type(server)}")
|
||||
server.login(profile.smtp.username, profile.smtp.password)
|
||||
server.send_message(message)
|
||||
|
||||
info(f"Auto-response sent to {to_email} concerning {subject} from account {profile.name}!")
|
||||
l.info(f"Auto-response sent to {to_email} concerning {subject} from account {profile.name}!")
|
||||
return True
|
||||
|
||||
except Exception as e:
|
||||
err(f"Error in preparing/sending auto-response from account {profile.name}: {str(e)}")
|
||||
err(f"SMTP details - Host: {profile.smtp.host}, Port: {profile.smtp.port}, Encryption: {profile.smtp.encryption}")
|
||||
err(traceback.format_exc())
|
||||
l.error(f"Error in preparing/sending auto-response from account {profile.name}: {str(e)}")
|
||||
l.error(f"SMTP details - Host: {profile.smtp.host}, Port: {profile.smtp.port}, Encryption: {profile.smtp.encryption}")
|
||||
l.error(traceback.format_exc())
|
||||
return False
|
||||
|
||||
finally:
|
||||
|
@ -127,7 +123,7 @@ async def send_response(to_email: str, subject: str, body: str, profile: AutoRes
|
|||
try:
|
||||
server.quit()
|
||||
except Exception as e:
|
||||
err(f"Error closing SMTP connection: {str(e)}")
|
||||
l.error(f"Error closing SMTP connection: {str(e)}")
|
||||
|
||||
|
||||
def clean_email_content(html_content):
|
||||
|
@ -163,10 +159,10 @@ async def process_account_archival(account: EmailAccount):
|
|||
while True:
|
||||
try:
|
||||
processed_uids = await load_processed_uids(summarized_log)
|
||||
debug(f"{len(processed_uids)} emails marked as already summarized are being ignored.")
|
||||
l.debug(f"{len(processed_uids)} emails marked as already summarized are being ignored.")
|
||||
with get_imap_connection(account) as inbox:
|
||||
unread_messages = inbox.messages(unread=True)
|
||||
debug(f"There are {len(unread_messages)} unread messages.")
|
||||
l.debug(f"There are {len(unread_messages)} unread messages.")
|
||||
for uid, message in unread_messages:
|
||||
uid_str = uid.decode() if isinstance(uid, bytes) else str(uid)
|
||||
if uid_str not in processed_uids:
|
||||
|
@ -186,13 +182,13 @@ async def process_account_archival(account: EmailAccount):
|
|||
save_success = await save_email(md_path, md_content)
|
||||
if save_success:
|
||||
await save_processed_uid(summarized_log, account.name, uid_str)
|
||||
info(f"Summarized email: {uid_str}")
|
||||
l.info(f"Summarized email: {uid_str}")
|
||||
else:
|
||||
warn(f"Failed to summarize {this_email.subject}")
|
||||
l.warning(f"Failed to summarize {this_email.subject}")
|
||||
# else:
|
||||
# debug(f"Skipping {uid_str} because it was already processed.")
|
||||
# l.debug(f"Skipping {uid_str} because it was already processed.")
|
||||
except Exception as e:
|
||||
err(f"An error occurred during summarization for account {account.name}: {e}")
|
||||
l.error(f"An error occurred during summarization for account {account.name}: {e}")
|
||||
|
||||
await asyncio.sleep(account.refresh)
|
||||
|
||||
|
@ -240,7 +236,7 @@ tags:
|
|||
return markdown_content
|
||||
|
||||
except Exception as e:
|
||||
err(f"Exception: {e}")
|
||||
l.error(f"Exception: {e}")
|
||||
return False
|
||||
|
||||
|
||||
|
@ -249,15 +245,15 @@ async def save_email(md_path, md_content):
|
|||
with open(md_path, 'w', encoding='utf-8') as md_file:
|
||||
md_file.write(md_content)
|
||||
|
||||
debug(f"Saved markdown to {md_path}")
|
||||
l.debug(f"Saved markdown to {md_path}")
|
||||
return True
|
||||
except Exception as e:
|
||||
err(f"Failed to save email: {e}")
|
||||
l.error(f"Failed to save email: {e}")
|
||||
return False
|
||||
|
||||
|
||||
def get_matching_autoresponders(this_email: IncomingEmail, account: EmailAccount) -> List[AutoResponder]:
|
||||
debug(f"Called get_matching_autoresponders for email \"{this_email.subject},\" account name \"{account.name}\"")
|
||||
l.debug(f"Called get_matching_autoresponders for email \"{this_email.subject},\" account name \"{account.name}\"")
|
||||
def matches_list(item: str, this_email: IncomingEmail) -> bool:
|
||||
if '@' in item:
|
||||
return item in this_email.sender
|
||||
|
@ -268,12 +264,12 @@ def get_matching_autoresponders(this_email: IncomingEmail, account: EmailAccount
|
|||
whitelist_match = not profile.whitelist or any(matches_list(item, this_email) for item in profile.whitelist)
|
||||
blacklist_match = any(matches_list(item, this_email) for item in profile.blacklist)
|
||||
if whitelist_match and not blacklist_match:
|
||||
debug(f"We have a match for {whitelist_match} and no blacklist matches.")
|
||||
l.debug(f"We have a match for {whitelist_match} and no blacklist matches.")
|
||||
matching_profiles.append(profile)
|
||||
elif whitelist_match and blacklist_match:
|
||||
debug(f"Matched whitelist for {whitelist_match}, but also matched blacklist for {blacklist_match}")
|
||||
l.debug(f"Matched whitelist for {whitelist_match}, but also matched blacklist for {blacklist_match}")
|
||||
else:
|
||||
debug(f"No whitelist or blacklist matches.")
|
||||
l.debug(f"No whitelist or blacklist matches.")
|
||||
return matching_profiles
|
||||
|
||||
|
||||
|
@ -284,31 +280,31 @@ async def process_account_autoresponding(account: EmailAccount):
|
|||
while True:
|
||||
try:
|
||||
processed_uids = await load_processed_uids(EMAIL_AUTORESPONSE_LOG)
|
||||
debug(f"{len(processed_uids)} emails marked as already responded to are being ignored.")
|
||||
l.debug(f"{len(processed_uids)} emails marked as already responded to are being ignored.")
|
||||
|
||||
with get_imap_connection(account) as inbox:
|
||||
unread_messages = inbox.messages(unread=True)
|
||||
debug(f"There are {len(unread_messages)} unread messages.")
|
||||
l.debug(f"There are {len(unread_messages)} unread messages.")
|
||||
|
||||
for uid, message in unread_messages:
|
||||
uid_str = uid.decode() if isinstance(uid, bytes) else str(uid)
|
||||
if uid_str not in processed_uids:
|
||||
await autorespond_single_email(message, uid_str, account, EMAIL_AUTORESPONSE_LOG)
|
||||
else:
|
||||
debug(f"Skipping {uid_str} because it was already processed.")
|
||||
l.debug(f"Skipping {uid_str} because it was already processed.")
|
||||
|
||||
except Exception as e:
|
||||
err(f"An error occurred during auto-responding for account {account.name}: {e}")
|
||||
l.error(f"An error occurred during auto-responding for account {account.name}: {e}")
|
||||
|
||||
await asyncio.sleep(account.refresh)
|
||||
|
||||
|
||||
async def autorespond_single_email(message, uid_str: str, account: EmailAccount, log_file: Path):
|
||||
this_email = await create_incoming_email(message)
|
||||
debug(f"Evaluating {this_email.subject} for autoresponse-worthiness...")
|
||||
l.debug(f"Evaluating {this_email.subject} for autoresponse-worthiness...")
|
||||
|
||||
matching_profiles = get_matching_autoresponders(this_email, account)
|
||||
debug(f"Matching profiles: {matching_profiles}")
|
||||
l.debug(f"Matching profiles: {matching_profiles}")
|
||||
|
||||
for profile in matching_profiles:
|
||||
response_body = await generate_response(this_email, profile, account)
|
||||
|
@ -318,16 +314,16 @@ async def autorespond_single_email(message, uid_str: str, account: EmailAccount,
|
|||
jpg_path = await img.workflow(profile.image_prompt, earlyout=False, downscale_to_fit=True) if profile.image_prompt else None
|
||||
success = await send_response(this_email.sender, subject, response_body, profile, jpg_path)
|
||||
if success:
|
||||
warn(f"Auto-responded to email: {this_email.subject}")
|
||||
l.warning(f"Auto-responded to email: {this_email.subject}")
|
||||
await save_processed_uid(log_file, account.name, uid_str)
|
||||
else:
|
||||
warn(f"Failed to send auto-response to {this_email.subject}")
|
||||
l.warning(f"Failed to send auto-response to {this_email.subject}")
|
||||
else:
|
||||
warn(f"Unable to generate auto-response for {this_email.subject}")
|
||||
l.warning(f"Unable to generate auto-response for {this_email.subject}")
|
||||
|
||||
|
||||
async def generate_response(this_email: IncomingEmail, profile: AutoResponder, account: EmailAccount) -> Optional[str]:
|
||||
info(f"Generating auto-response to {this_email.subject} with profile: {profile.name}")
|
||||
l.info(f"Generating auto-response to {this_email.subject} with profile: {profile.name}")
|
||||
|
||||
now = await gis.dt(dt_datetime.now())
|
||||
then = await gis.dt(this_email.datetime_received)
|
||||
|
@ -345,7 +341,7 @@ Respond on behalf of {account.fullname}, who is unable to respond personally bec
|
|||
|
||||
try:
|
||||
response = await llm.query_ollama(usr_prompt, sys_prompt, profile.ollama_model, 400)
|
||||
debug(f"query_ollama response: {response}")
|
||||
l.debug(f"query_ollama response: {response}")
|
||||
|
||||
if isinstance(response, dict) and "message" in response and "content" in response["message"]:
|
||||
response = response["message"]["content"]
|
||||
|
@ -353,7 +349,7 @@ Respond on behalf of {account.fullname}, who is unable to respond personally bec
|
|||
return response + "\n\n"
|
||||
|
||||
except Exception as e:
|
||||
err(f"Error generating auto-response: {str(e)}")
|
||||
l.error(f"Error generating auto-response: {str(e)}")
|
||||
return None
|
||||
|
||||
|
||||
|
|
|
@ -33,29 +33,25 @@ from selenium.webdriver.common.by import By
|
|||
from selenium.webdriver.support.ui import WebDriverWait
|
||||
from selenium.webdriver.support import expected_conditions as EC
|
||||
from sijapi import (
|
||||
L, API, Serve, LOGS_DIR, TS_ID, CASETABLE_PATH, COURTLISTENER_DOCKETS_URL, COURTLISTENER_API_KEY,
|
||||
Sys, Serve, LOGS_DIR, TS_ID, CASETABLE_PATH, COURTLISTENER_DOCKETS_URL, COURTLISTENER_API_KEY,
|
||||
COURTLISTENER_BASE_URL, COURTLISTENER_DOCKETS_DIR, COURTLISTENER_SEARCH_DIR, ALERTS_DIR,
|
||||
MAC_UN, MAC_PW, MAC_ID, TS_TAILNET, IMG_DIR, PUBLIC_KEY, OBSIDIAN_VAULT_DIR
|
||||
)
|
||||
from sijapi.classes import WidgetUpdate
|
||||
from sijapi.utilities import bool_convert, sanitize_filename, assemble_journal_path
|
||||
from sijapi.routers import gis
|
||||
from sijapi.logs import get_logger
|
||||
l = get_logger(__name__)
|
||||
|
||||
forward = APIRouter()
|
||||
|
||||
logger = L.get_module_logger("email")
|
||||
def debug(text: str): logger.debug(text)
|
||||
def info(text: str): logger.info(text)
|
||||
def warn(text: str): logger.warning(text)
|
||||
def err(text: str): logger.error(text)
|
||||
def crit(text: str): logger.critical(text)
|
||||
|
||||
async def forward_traffic(reader: asyncio.StreamReader, writer: asyncio.StreamWriter, destination: str):
|
||||
try:
|
||||
dest_host, dest_port = destination.split(':')
|
||||
dest_port = int(dest_port)
|
||||
except ValueError:
|
||||
warn(f"Invalid destination format: {destination}. Expected 'host:port'.")
|
||||
l.warning(f"Invalid destination format: {destination}. Expected 'host:port'.")
|
||||
writer.close()
|
||||
await writer.wait_closed()
|
||||
return
|
||||
|
@ -63,7 +59,7 @@ async def forward_traffic(reader: asyncio.StreamReader, writer: asyncio.StreamWr
|
|||
try:
|
||||
dest_reader, dest_writer = await asyncio.open_connection(dest_host, dest_port)
|
||||
except Exception as e:
|
||||
warn(f"Failed to connect to destination {destination}: {str(e)}")
|
||||
l.warning(f"Failed to connect to destination {destination}: {str(e)}")
|
||||
writer.close()
|
||||
await writer.wait_closed()
|
||||
return
|
||||
|
@ -77,7 +73,7 @@ async def forward_traffic(reader: asyncio.StreamReader, writer: asyncio.StreamWr
|
|||
dst.write(data)
|
||||
await dst.drain()
|
||||
except Exception as e:
|
||||
warn(f"Error in forwarding: {str(e)}")
|
||||
l.warning(f"Error in forwarding: {str(e)}")
|
||||
finally:
|
||||
dst.close()
|
||||
await dst.wait_closed()
|
||||
|
@ -110,7 +106,7 @@ async def start_port_forwarding():
|
|||
for rule in Serve.forwarding_rules:
|
||||
asyncio.create_task(start_server(rule.source, rule.destination))
|
||||
else:
|
||||
warn("No forwarding rules found in the configuration.")
|
||||
l.warning("No forwarding rules found in the configuration.")
|
||||
|
||||
|
||||
@forward.get("/forward_status")
|
||||
|
|
|
@ -11,6 +11,8 @@ import json
|
|||
import yaml
|
||||
import jwt
|
||||
from sijapi import GHOST_API_KEY, GHOST_API_URL
|
||||
from sijapi.logs import get_logger
|
||||
l = get_logger(__name__)
|
||||
|
||||
ghost = APIRouter()
|
||||
|
||||
|
|
|
@ -16,17 +16,14 @@ from folium.plugins import Fullscreen, MiniMap, MousePosition, Geocoder, Draw, M
|
|||
from zoneinfo import ZoneInfo
|
||||
from dateutil.parser import parse as dateutil_parse
|
||||
from typing import Optional, List, Union
|
||||
from sijapi import L, API, Db, TZ, GEO
|
||||
from sijapi import Sys, Db, TZ, GEO
|
||||
from sijapi.classes import Location
|
||||
from sijapi.utilities import haversine, assemble_journal_path, json_serial
|
||||
from sijapi.utilities import haversine, assemble_journal_path
|
||||
from sijapi.serialization import json_dumps
|
||||
from sijapi.logs import get_logger
|
||||
l = get_logger(__name__)
|
||||
|
||||
gis = APIRouter()
|
||||
logger = L.get_module_logger("gis")
|
||||
def debug(text: str): logger.debug(text)
|
||||
def info(text: str): logger.info(text)
|
||||
def warn(text: str): logger.warning(text)
|
||||
def err(text: str): logger.error(text)
|
||||
def crit(text: str): logger.critical(text)
|
||||
|
||||
async def dt(
|
||||
date_time: Union[str, int, datetime],
|
||||
|
@ -36,12 +33,12 @@ async def dt(
|
|||
# Convert integer (epoch time) to UTC datetime
|
||||
if isinstance(date_time, int):
|
||||
date_time = datetime.fromtimestamp(date_time, tz=timezone.utc)
|
||||
debug(f"Converted epoch time {date_time} to UTC datetime object.")
|
||||
l.debug(f"Converted epoch time {date_time} to UTC datetime object.")
|
||||
|
||||
# Convert string to datetime if necessary
|
||||
elif isinstance(date_time, str):
|
||||
date_time = dateutil_parse(date_time)
|
||||
debug(f"Converted string '{date_time}' to datetime object.")
|
||||
l.debug(f"Converted string '{date_time}' to datetime object.")
|
||||
|
||||
if not isinstance(date_time, datetime):
|
||||
raise ValueError(f"Input must be a string, integer (epoch time), or datetime object. What we received: {date_time}, type {type(date_time)}")
|
||||
|
@ -49,7 +46,7 @@ async def dt(
|
|||
# Ensure the datetime is timezone-aware (UTC if not specified)
|
||||
if date_time.tzinfo is None:
|
||||
date_time = date_time.replace(tzinfo=timezone.utc)
|
||||
debug("Added UTC timezone to naive datetime.")
|
||||
l.debug("Added UTC timezone to naive datetime.")
|
||||
|
||||
# Handle provided timezone
|
||||
if tz is not None:
|
||||
|
@ -57,12 +54,12 @@ async def dt(
|
|||
if tz == "local":
|
||||
last_loc = await get_timezone_without_timezone(date_time)
|
||||
tz = await GEO.tz_at(last_loc.latitude, last_loc.longitude)
|
||||
debug(f"Using local timezone: {tz}")
|
||||
l.debug(f"Using local timezone: {tz}")
|
||||
else:
|
||||
try:
|
||||
tz = ZoneInfo(tz)
|
||||
except Exception as e:
|
||||
err(f"Invalid timezone string '{tz}'. Error: {e}")
|
||||
l.error(f"Invalid timezone string '{tz}'. Error: {e}")
|
||||
raise ValueError(f"Invalid timezone string: {tz}")
|
||||
elif isinstance(tz, ZoneInfo):
|
||||
pass # tz is already a ZoneInfo object
|
||||
|
@ -71,14 +68,14 @@ async def dt(
|
|||
|
||||
# Convert to the provided or determined timezone
|
||||
date_time = date_time.astimezone(tz)
|
||||
debug(f"Converted datetime to timezone: {tz}")
|
||||
l.debug(f"Converted datetime to timezone: {tz}")
|
||||
|
||||
return date_time
|
||||
except ValueError as e:
|
||||
err(f"Error in dt: {e}")
|
||||
l.error(f"Error in dt: {e}")
|
||||
raise
|
||||
except Exception as e:
|
||||
err(f"Unexpected error in dt: {e}")
|
||||
l.error(f"Unexpected error in dt: {e}")
|
||||
raise ValueError(f"Failed to process datetime: {e}")
|
||||
|
||||
|
||||
|
@ -112,12 +109,12 @@ async def get_timezone_without_timezone(date_time):
|
|||
|
||||
async def get_last_location() -> Optional[Location]:
|
||||
query_datetime = datetime.now(TZ)
|
||||
debug(f"Query_datetime: {query_datetime}")
|
||||
l.debug(f"Query_datetime: {query_datetime}")
|
||||
|
||||
this_location = await fetch_last_location_before(query_datetime)
|
||||
|
||||
if this_location:
|
||||
debug(f"location: {this_location}")
|
||||
l.debug(f"location: {this_location}")
|
||||
return this_location
|
||||
|
||||
return None
|
||||
|
@ -164,15 +161,15 @@ Generate a heatmap for the given date range and save it as a PNG file using Foli
|
|||
|
||||
m.save(str(output_path))
|
||||
|
||||
info(f"Heatmap saved as PNG: {output_path}")
|
||||
l.info(f"Heatmap saved as PNG: {output_path}")
|
||||
return output_path
|
||||
|
||||
except Exception as e:
|
||||
err(f"Error saving heatmap: {str(e)}")
|
||||
l.error(f"Error saving heatmap: {str(e)}")
|
||||
raise
|
||||
|
||||
except Exception as e:
|
||||
err(f"Error generating heatmap: {str(e)}")
|
||||
l.error(f"Error generating heatmap: {str(e)}")
|
||||
raise
|
||||
|
||||
async def generate_map(start_date: datetime, end_date: datetime, max_points: int):
|
||||
|
@ -180,7 +177,7 @@ async def generate_map(start_date: datetime, end_date: datetime, max_points: int
|
|||
if not locations:
|
||||
raise HTTPException(status_code=404, detail="No locations found for the given date range")
|
||||
|
||||
info(f"Found {len(locations)} locations for the given date range")
|
||||
l.info(f"Found {len(locations)} locations for the given date range")
|
||||
|
||||
if len(locations) > max_points:
|
||||
locations = random.sample(locations, max_points)
|
||||
|
@ -291,18 +288,19 @@ map.on(L.Draw.Event.CREATED, function (event) {
|
|||
return m.get_root().render()
|
||||
|
||||
|
||||
|
||||
async def fetch_locations(start: Union[str, int, datetime], end: Union[str, int, datetime, None] = None) -> List[Location]:
|
||||
start_datetime = await dt(start)
|
||||
if end is None:
|
||||
end_datetime = await dt(start_datetime.replace(hour=23, minute=59, second=59))
|
||||
else:
|
||||
end_datetime = await dt(end) if not isinstance(end, datetime) else end
|
||||
|
||||
|
||||
if start_datetime.time() == datetime.min.time() and end_datetime.time() == datetime.min.time():
|
||||
end_datetime = await dt(end_datetime.replace(hour=23, minute=59, second=59))
|
||||
|
||||
debug(f"Fetching locations between {start_datetime} and {end_datetime}")
|
||||
|
||||
|
||||
l.debug(f"Fetching locations between {start_datetime} and {end_datetime}")
|
||||
|
||||
query = '''
|
||||
SELECT id, datetime,
|
||||
ST_X(ST_AsText(location)::geometry) AS longitude,
|
||||
|
@ -315,10 +313,13 @@ async def fetch_locations(start: Union[str, int, datetime], end: Union[str, int,
|
|||
ORDER BY datetime DESC
|
||||
'''
|
||||
|
||||
locations = await Db.execute_read(query, start_datetime=start_datetime.replace(tzinfo=None), end_datetime=end_datetime.replace(tzinfo=None))
|
||||
try:
|
||||
locations = await Db.read(query, start_datetime=start_datetime, end_datetime=end_datetime)
|
||||
l.debug(f"Range locations query returned: {locations}")
|
||||
except Exception as e:
|
||||
l.error(f"Error executing range locations query: {str(e)}")
|
||||
locations = []
|
||||
|
||||
debug(f"Range locations query returned: {locations}")
|
||||
|
||||
if not locations and (end is None or start_datetime.date() == end_datetime.date()):
|
||||
fallback_query = '''
|
||||
SELECT id, datetime,
|
||||
|
@ -332,12 +333,19 @@ async def fetch_locations(start: Union[str, int, datetime], end: Union[str, int,
|
|||
ORDER BY datetime DESC
|
||||
LIMIT 1
|
||||
'''
|
||||
location_data = await Db.execute_read(fallback_query, start_datetime=start_datetime.replace(tzinfo=None))
|
||||
debug(f"Fallback query returned: {location_data}")
|
||||
if location_data:
|
||||
locations = location_data
|
||||
try:
|
||||
location_data = await Db.read(fallback_query, start_datetime=start_datetime)
|
||||
l.debug(f"Fallback query returned: {location_data}")
|
||||
if location_data:
|
||||
locations = location_data
|
||||
except Exception as e:
|
||||
l.error(f"Error executing fallback locations query: {str(e)}")
|
||||
locations = []
|
||||
|
||||
debug(f"Locations found: {locations}")
|
||||
l.debug(f"Locations found: {locations}")
|
||||
|
||||
if not locations:
|
||||
return []
|
||||
|
||||
# Sort location_data based on the datetime field in descending order
|
||||
sorted_locations = sorted(locations, key=lambda x: x['datetime'], reverse=True)
|
||||
|
@ -365,12 +373,14 @@ async def fetch_locations(start: Union[str, int, datetime], end: Union[str, int,
|
|||
|
||||
return location_objects if location_objects else []
|
||||
|
||||
|
||||
|
||||
|
||||
async def fetch_last_location_before(datetime: datetime) -> Optional[Location]:
|
||||
try:
|
||||
datetime = await dt(datetime)
|
||||
|
||||
debug(f"Fetching last location before {datetime}")
|
||||
l.debug(f"Fetching last location before {datetime}")
|
||||
|
||||
query = '''
|
||||
SELECT id, datetime,
|
||||
|
@ -385,16 +395,16 @@ async def fetch_last_location_before(datetime: datetime) -> Optional[Location]:
|
|||
LIMIT 1
|
||||
'''
|
||||
|
||||
location_data = await Db.execute_read(query, datetime=datetime.replace(tzinfo=None))
|
||||
location_data = await Db.read(query, datetime=datetime)
|
||||
|
||||
if location_data:
|
||||
debug(f"Last location found: {location_data[0]}")
|
||||
l.debug(f"Last location found: {location_data[0]}")
|
||||
return Location(**location_data[0])
|
||||
else:
|
||||
debug("No location found before the specified datetime")
|
||||
l.debug("No location found before the specified datetime")
|
||||
return None
|
||||
except Exception as e:
|
||||
error(f"Error fetching last location: {str(e)}")
|
||||
l.error(f"Error fetching last location: {str(e)}")
|
||||
return None
|
||||
|
||||
|
||||
|
@ -452,9 +462,9 @@ async def post_location(location: Location):
|
|||
'country': location.country
|
||||
}
|
||||
|
||||
await Db.execute_write(query, **params)
|
||||
await Db.write(query, **params)
|
||||
|
||||
info(f"Successfully posted location: {location.latitude}, {location.longitude}, {location.elevation} on {localized_datetime}")
|
||||
l.info(f"Successfully posted location: {location.latitude}, {location.longitude}, {location.elevation} on {localized_datetime}")
|
||||
|
||||
# Create a serializable version of params for the return value
|
||||
serializable_params = {
|
||||
|
@ -463,15 +473,15 @@ async def post_location(location: Location):
|
|||
}
|
||||
return serializable_params
|
||||
except Exception as e:
|
||||
err(f"Error posting location {e}")
|
||||
err(traceback.format_exc())
|
||||
l.error(f"Error posting location {e}")
|
||||
l.error(traceback.format_exc())
|
||||
return None
|
||||
|
||||
|
||||
|
||||
async def get_date_range():
|
||||
query = "SELECT MIN(datetime) as min_date, MAX(datetime) as max_date FROM locations"
|
||||
row = await Db.execute_read(query)
|
||||
row = await Db.read(query)
|
||||
if row and row[0]['min_date'] and row[0]['max_date']:
|
||||
return row[0]['min_date'], row[0]['max_date']
|
||||
else:
|
||||
|
@ -498,26 +508,26 @@ async def post_locate_endpoint(locations: Union[Location, List[Location]]):
|
|||
"device_name": "Unknown",
|
||||
"device_os": "Unknown"
|
||||
}
|
||||
debug(f"Location received for processing: {lcn}")
|
||||
l.debug(f"Location received for processing: {lcn}")
|
||||
|
||||
geocoded_locations = await GEO.code(locations)
|
||||
|
||||
responses = []
|
||||
if isinstance(geocoded_locations, List):
|
||||
for location in geocoded_locations:
|
||||
debug(f"Final location to be submitted to database: {location}")
|
||||
l.debug(f"Final location to be submitted to database: {location}")
|
||||
location_entry = await post_location(location)
|
||||
if location_entry:
|
||||
responses.append({"location_data": location_entry})
|
||||
else:
|
||||
warn(f"Posting location to database appears to have failed.")
|
||||
l.warning(f"Posting location to database appears to have failed.")
|
||||
else:
|
||||
debug(f"Final location to be submitted to database: {geocoded_locations}")
|
||||
l.debug(f"Final location to be submitted to database: {geocoded_locations}")
|
||||
location_entry = await post_location(geocoded_locations)
|
||||
if location_entry:
|
||||
responses.append({"location_data": location_entry})
|
||||
else:
|
||||
warn(f"Posting location to database appears to have failed.")
|
||||
l.warning(f"Posting location to database appears to have failed.")
|
||||
|
||||
return {"message": "Locations and weather updated", "results": responses}
|
||||
|
||||
|
@ -540,7 +550,7 @@ async def get_locate(datetime_str: str, all: bool = False):
|
|||
try:
|
||||
date_time = await dt(datetime_str)
|
||||
except ValueError as e:
|
||||
err(f"Invalid datetime string provided: {datetime_str}")
|
||||
l.error(f"Invalid datetime string provided: {datetime_str}")
|
||||
return ["ERROR: INVALID DATETIME PROVIDED. USE YYYYMMDDHHmmss or YYYYMMDD format."]
|
||||
|
||||
locations = await fetch_locations(date_time)
|
||||
|
@ -565,6 +575,6 @@ async def generate_map_endpoint(
|
|||
except ValueError:
|
||||
raise HTTPException(status_code=400, detail="Invalid date format")
|
||||
|
||||
info(f"Generating map for {start_date} to {end_date}")
|
||||
l.info(f"Generating map for {start_date} to {end_date}")
|
||||
html_content = await generate_map(start_date, end_date, max_points)
|
||||
return HTMLResponse(content=html_content)
|
||||
|
|
|
@ -36,19 +36,16 @@ import json
|
|||
from ollama import Client as oLlama
|
||||
from sijapi.routers.img import img
|
||||
from dotenv import load_dotenv
|
||||
from sijapi import L, COMFYUI_DIR
|
||||
|
||||
import io
|
||||
from io import BytesIO
|
||||
import base64
|
||||
|
||||
from sijapi import COMFYUI_DIR
|
||||
from sijapi.logs import get_logger
|
||||
l = get_logger(__name__)
|
||||
|
||||
ig = APIRouter()
|
||||
logger = L.get_module_logger("ig")
|
||||
def debug(text: str): logger.debug(text)
|
||||
def info(text: str): logger.info(text)
|
||||
def warn(text: str): logger.warning(text)
|
||||
def err(text: str): logger.error(text)
|
||||
def crit(text: str): logger.critical(text)
|
||||
|
||||
|
||||
class IG_Request(BaseModel):
|
||||
file: Optional[UploadFile] = None # upload a particular file to Instagram
|
||||
|
@ -862,16 +859,16 @@ async def ig_flow_endpoint(new_session: bool = False):
|
|||
time_remaining = 30 - (time_since_rollover % 30)
|
||||
|
||||
if time_remaining < 4:
|
||||
logger.debug("Too close to end of TOTP counter. Waiting.")
|
||||
logger.l.debug("Too close to end of TOTP counter. Waiting.")
|
||||
sleepupto(5, 5)
|
||||
|
||||
if not new_session and os.path.exists(IG_SESSION_PATH):
|
||||
cl.load_settings(IG_SESSION_PATH)
|
||||
logger.debug("Loaded past session.")
|
||||
logger.l.debug("Loaded past session.")
|
||||
|
||||
elif new_session and cl.login(IG_USERNAME, IG_PASSWORD, verification_code=TOTP.now()):
|
||||
cl.dump_settings(IG_SESSION_PATH)
|
||||
logger.debug("Logged in and saved new session.")
|
||||
logger.l.debug("Logged in and saved new session.")
|
||||
|
||||
else:
|
||||
raise Exception(f"Failed to login as {IG_USERNAME}.")
|
||||
|
|
|
@ -18,15 +18,12 @@ import random
|
|||
import os
|
||||
import asyncio
|
||||
from sijapi.routers.llm import query_ollama
|
||||
from sijapi import API, L, COMFYUI_URL, COMFYUI_OUTPUT_DIR, IMG_CONFIG_PATH, IMG_DIR, IMG_WORKFLOWS_DIR
|
||||
from sijapi import Sys, COMFYUI_URL, COMFYUI_OUTPUT_DIR, IMG_CONFIG_PATH, IMG_DIR, IMG_WORKFLOWS_DIR
|
||||
from sijapi.logs import get_logger
|
||||
l = get_logger(__name__)
|
||||
|
||||
img = APIRouter()
|
||||
logger = L.get_module_logger("img")
|
||||
def debug(text: str): logger.debug(text)
|
||||
def info(text: str): logger.info(text)
|
||||
def warn(text: str): logger.warning(text)
|
||||
def err(text: str): logger.error(text)
|
||||
def crit(text: str): logger.critical(text)
|
||||
|
||||
|
||||
CLIENT_ID = str(uuid.uuid4())
|
||||
|
||||
|
@ -73,12 +70,12 @@ async def workflow(prompt: str, scene: str = None, size: str = None, earlyout: s
|
|||
|
||||
scene_workflow = random.choice(scene_data['workflows'])
|
||||
if size:
|
||||
debug(f"Specified size: {size}")
|
||||
l.debug(f"Specified size: {size}")
|
||||
|
||||
size = size if size else scene_workflow.get('size', '1024x1024')
|
||||
|
||||
width, height = map(int, size.split('x'))
|
||||
debug(f"Parsed width: {width}; parsed height: {height}")
|
||||
l.debug(f"Parsed width: {width}; parsed height: {height}")
|
||||
|
||||
workflow_path = Path(IMG_WORKFLOWS_DIR) / scene_workflow['workflow']
|
||||
workflow_data = json.loads(workflow_path.read_text())
|
||||
|
@ -92,22 +89,22 @@ async def workflow(prompt: str, scene: str = None, size: str = None, earlyout: s
|
|||
}
|
||||
|
||||
saved_file_key = await update_prompt_and_get_key(workflow=workflow_data, post=post, positive=image_concept)
|
||||
info(f"Saved file key: {saved_file_key}")
|
||||
l.info(f"Saved file key: {saved_file_key}")
|
||||
|
||||
prompt_id = await queue_prompt(workflow_data)
|
||||
info(f"Prompt ID: {prompt_id}")
|
||||
l.info(f"Prompt ID: {prompt_id}")
|
||||
|
||||
max_size = max(width, height) if downscale_to_fit else None
|
||||
destination_path = Path(destination_path).with_suffix(".jpg") if destination_path else IMG_DIR / f"{prompt_id}.jpg"
|
||||
|
||||
if earlyout:
|
||||
asyncio.create_task(generate_and_save_image(prompt_id, saved_file_key, max_size, destination_path))
|
||||
debug(f"Returning {destination_path}")
|
||||
l.debug(f"Returning {destination_path}")
|
||||
return destination_path
|
||||
|
||||
else:
|
||||
await generate_and_save_image(prompt_id, saved_file_key, max_size, destination_path)
|
||||
debug(f"Returning {destination_path}")
|
||||
l.debug(f"Returning {destination_path}")
|
||||
return destination_path
|
||||
|
||||
|
||||
|
@ -118,16 +115,16 @@ async def generate_and_save_image(prompt_id, saved_file_key, max_size, destinati
|
|||
jpg_file_path = await save_as_jpg(image_data, prompt_id, quality=90, max_size=max_size, destination_path=destination_path)
|
||||
|
||||
if Path(jpg_file_path) != Path(destination_path):
|
||||
err(f"Mismatch between jpg_file_path, {jpg_file_path}, and detination_path, {destination_path}")
|
||||
l.error(f"Mismatch between jpg_file_path, {jpg_file_path}, and detination_path, {destination_path}")
|
||||
|
||||
except Exception as e:
|
||||
err(f"Error in generate_and_save_image: {e}")
|
||||
l.error(f"Error in generate_and_save_image: {e}")
|
||||
return None
|
||||
|
||||
|
||||
def get_web_path(file_path: Path) -> str:
|
||||
uri = file_path.relative_to(IMG_DIR)
|
||||
web_path = f"{API.URL}/img/{uri}"
|
||||
web_path = f"{Sys.URL}/img/{uri}"
|
||||
return web_path
|
||||
|
||||
|
||||
|
@ -143,7 +140,7 @@ async def poll_status(prompt_id):
|
|||
status_data = await response.json()
|
||||
job_data = status_data.get(prompt_id, {})
|
||||
if job_data.get("status", {}).get("completed", False):
|
||||
info(f"{prompt_id} completed in {elapsed_time} seconds.")
|
||||
l.info(f"{prompt_id} completed in {elapsed_time} seconds.")
|
||||
return job_data
|
||||
await asyncio.sleep(1)
|
||||
|
||||
|
@ -194,7 +191,7 @@ async def save_as_jpg(image_data, prompt_id, max_size = None, quality = 100, des
|
|||
return str(destination_path_jpg)
|
||||
|
||||
except Exception as e:
|
||||
err(f"Error processing image: {e}")
|
||||
l.error(f"Error processing image: {e}")
|
||||
return None
|
||||
|
||||
|
||||
|
@ -210,11 +207,11 @@ def set_presets(workflow_data, preset_values):
|
|||
if 'inputs' in workflow_data.get(preset_node, {}):
|
||||
workflow_data[preset_node]['inputs'][preset_key] = preset_value
|
||||
else:
|
||||
debug("Node not found in workflow_data")
|
||||
l.debug("Node not found in workflow_data")
|
||||
else:
|
||||
debug("Required data missing in preset_values")
|
||||
l.debug("Required data missing in preset_values")
|
||||
else:
|
||||
debug("No preset_values found")
|
||||
l.debug("No preset_values found")
|
||||
|
||||
|
||||
def get_return_path(destination_path):
|
||||
|
@ -229,7 +226,7 @@ def get_scene(scene):
|
|||
IMG_CONFIG = yaml.safe_load(IMG_CONFIG_file)
|
||||
for scene_data in IMG_CONFIG['scenes']:
|
||||
if scene_data['scene'] == scene:
|
||||
debug(f"Found scene for \"{scene}\".")
|
||||
l.debug(f"Found scene for \"{scene}\".")
|
||||
return scene_data
|
||||
return None
|
||||
|
||||
|
@ -249,11 +246,11 @@ def get_matching_scene(prompt):
|
|||
max_count = count
|
||||
scene_data = sc
|
||||
if scene_data:
|
||||
debug(f"Found better-matching scene: the prompt contains {max_count} words that match triggers for {scene_data.get('name')}!")
|
||||
l.debug(f"Found better-matching scene: the prompt contains {max_count} words that match triggers for {scene_data.get('name')}!")
|
||||
if scene_data:
|
||||
return scene_data
|
||||
else:
|
||||
debug(f"No matching scenes found, falling back to default scene.")
|
||||
l.debug(f"No matching scenes found, falling back to default scene.")
|
||||
return IMG_CONFIG['scenes'][0]
|
||||
|
||||
|
||||
|
@ -272,11 +269,11 @@ async def ensure_comfy(retries: int = 4, timeout: float = 6.0):
|
|||
for attempt in range(retries):
|
||||
try:
|
||||
with socket.create_connection(("127.0.0.1", 8188), timeout=2):
|
||||
info("ComfyUI is already running.")
|
||||
l.info("ComfyUI is already running.")
|
||||
return
|
||||
except (socket.timeout, ConnectionRefusedError):
|
||||
if attempt == 0: # Only try to start ComfyUI on the first failed attempt
|
||||
warn("ComfyUI is not running. Starting it now...")
|
||||
l.warning("ComfyUI is not running. Starting it now...")
|
||||
try:
|
||||
tmux_command = (
|
||||
"tmux split-window -h "
|
||||
|
@ -285,14 +282,14 @@ async def ensure_comfy(retries: int = 4, timeout: float = 6.0):
|
|||
"python main.py; exec $SHELL\""
|
||||
)
|
||||
subprocess.Popen(tmux_command, shell=True)
|
||||
info("ComfyUI started in a new tmux session.")
|
||||
l.info("ComfyUI started in a new tmux session.")
|
||||
except Exception as e:
|
||||
raise RuntimeError(f"Error starting ComfyUI: {e}")
|
||||
|
||||
warn(f"Attempt {attempt + 1}/{retries} failed. Waiting {timeout} seconds before retrying...")
|
||||
l.warning(f"Attempt {attempt + 1}/{retries} failed. Waiting {timeout} seconds before retrying...")
|
||||
await asyncio.sleep(timeout)
|
||||
|
||||
crit(f"Failed to ensure ComfyUI is running after {retries} attempts with {timeout} second intervals.")
|
||||
l.critical(f"Failed to ensure ComfyUI is running after {retries} attempts with {timeout} second intervals.")
|
||||
raise RuntimeError(f"Failed to ensure ComfyUI is running after {retries} attempts with {timeout} second intervals.")
|
||||
|
||||
|
||||
|
@ -398,13 +395,13 @@ Even more important, it finds and returns the key to the filepath where the file
|
|||
workflow[key] = random.randint(1000000000000, 9999999999999)
|
||||
|
||||
elif key in ["width", "max_width", "scaled_width", "height", "max_height", "scaled_height", "side_length", "size", "value", "dimension", "dimensions", "long", "long_side", "short", "short_side", "length"]:
|
||||
debug(f"Got a hit for a dimension: {key} {value}")
|
||||
l.debug(f"Got a hit for a dimension: {key} {value}")
|
||||
if value == 1023:
|
||||
workflow[key] = post.get("width", 1024)
|
||||
debug(f"Set {key} to {workflow[key]}.")
|
||||
l.debug(f"Set {key} to {workflow[key]}.")
|
||||
elif value == 1025:
|
||||
workflow[key] = post.get("height", 1024)
|
||||
debug(f"Set {key} to {workflow[key]}.")
|
||||
l.debug(f"Set {key} to {workflow[key]}.")
|
||||
|
||||
update_recursive(workflow)
|
||||
return found_key[0]
|
||||
|
|
|
@ -26,18 +26,14 @@ import tempfile
|
|||
import shutil
|
||||
import html2text
|
||||
import markdown
|
||||
from sijapi import L, Llm, LLM_SYS_MSG, REQUESTS_DIR, OBSIDIAN_CHROMADB_COLLECTION, OBSIDIAN_VAULT_DIR, DOC_DIR, SUMMARY_INSTRUCT, SUMMARY_CHUNK_SIZE, SUMMARY_TPW, SUMMARY_CHUNK_OVERLAP, SUMMARY_LENGTH_RATIO, SUMMARY_TOKEN_LIMIT, SUMMARY_MIN_LENGTH, SUMMARY_MODEL
|
||||
from sijapi import Llm, LLM_SYS_MSG, REQUESTS_DIR, OBSIDIAN_CHROMADB_COLLECTION, OBSIDIAN_VAULT_DIR, DOC_DIR, SUMMARY_INSTRUCT, SUMMARY_CHUNK_SIZE, SUMMARY_TPW, SUMMARY_CHUNK_OVERLAP, SUMMARY_LENGTH_RATIO, SUMMARY_TOKEN_LIMIT, SUMMARY_MIN_LENGTH, SUMMARY_MODEL
|
||||
from sijapi.utilities import convert_to_unix_time, sanitize_filename, ocr_pdf, clean_text, should_use_ocr, extract_text_from_pdf, extract_text_from_docx, read_text_file, str_to_bool, get_extension
|
||||
from sijapi.routers import tts
|
||||
from sijapi.routers.asr import transcribe_audio
|
||||
from sijapi.logs import get_logger
|
||||
l = get_logger(__name__)
|
||||
|
||||
llm = APIRouter()
|
||||
logger = L.get_module_logger("llm")
|
||||
def debug(text: str): logger.debug(text)
|
||||
def info(text: str): logger.info(text)
|
||||
def warn(text: str): logger.warning(text)
|
||||
def err(text: str): logger.error(text)
|
||||
def crit(text: str): logger.critical(text)
|
||||
|
||||
|
||||
VISION_MODELS = ["llava-phi3", "moondream", "llava", "llava-llama3", "llava:34b", "llava:13b-v1.5-q8_0"]
|
||||
|
@ -93,13 +89,13 @@ async def query_ollama(usr: str, sys: str = LLM_SYS_MSG, model: str = Llm.chat.m
|
|||
LLM = Ollama()
|
||||
response = await LLM.chat(model=model, messages=messages, options={"num_predict": max_tokens})
|
||||
|
||||
debug(response)
|
||||
l.debug(response)
|
||||
if "message" in response:
|
||||
if "content" in response["message"]:
|
||||
content = response["message"]["content"]
|
||||
return content
|
||||
else:
|
||||
debug("No choices found in response")
|
||||
l.debug("No choices found in response")
|
||||
return None
|
||||
|
||||
async def query_ollama_multishot(
|
||||
|
@ -120,12 +116,12 @@ async def query_ollama_multishot(
|
|||
|
||||
LLM = Ollama()
|
||||
response = await LLM.chat(model=model, messages=messages, options={"num_predict": max_tokens})
|
||||
debug(response)
|
||||
l.debug(response)
|
||||
|
||||
if "message" in response and "content" in response["message"]:
|
||||
return response["message"]["content"]
|
||||
else:
|
||||
debug("No content found in response")
|
||||
l.debug("No content found in response")
|
||||
return None
|
||||
|
||||
|
||||
|
@ -144,21 +140,21 @@ async def chat_completions(request: Request):
|
|||
raise HTTPException(status_code=400, detail="Message data is required in the request body.")
|
||||
|
||||
requested_model = body.get('model', 'default-model')
|
||||
debug(f"Requested model: {requested_model}")
|
||||
l.debug(f"Requested model: {requested_model}")
|
||||
stream = body.get('stream')
|
||||
token_limit = body.get('max_tokens') or body.get('num_predict')
|
||||
|
||||
# Check if the most recent message contains an image_url
|
||||
recent_message = messages[-1]
|
||||
if recent_message.get('role') == 'user' and is_vision_request(recent_message.get('content')):
|
||||
debug("Processing as a vision request")
|
||||
l.debug("Processing as a vision request")
|
||||
model = "llava"
|
||||
debug(f"Using model: {model}")
|
||||
l.debug(f"Using model: {model}")
|
||||
return StreamingResponse(stream_messages_with_vision(recent_message, model, token_limit), media_type="application/json")
|
||||
else:
|
||||
debug("Processing as a standard request")
|
||||
l.debug("Processing as a standard request")
|
||||
model = requested_model
|
||||
debug(f"Using model: {model}")
|
||||
l.debug(f"Using model: {model}")
|
||||
if stream:
|
||||
return StreamingResponse(stream_messages(messages, model, token_limit), media_type="application/json")
|
||||
else:
|
||||
|
@ -283,17 +279,17 @@ async def generate_messages(messages: list, model: str = "llama3"):
|
|||
def is_model_available(model_name):
|
||||
model_data = OllamaList()
|
||||
available_models = [model['name'] for model in model_data['models']]
|
||||
debug(f"Available models: {available_models}") # Log using the configured LOGGER
|
||||
l.debug(f"Available models: {available_models}") # Log using the configured LOGGER
|
||||
|
||||
matching_models = [model for model in available_models if model.startswith(model_name + ':') or model == model_name]
|
||||
if len(matching_models) == 1:
|
||||
debug(f"Unique match found: {matching_models[0]}")
|
||||
l.debug(f"Unique match found: {matching_models[0]}")
|
||||
return True
|
||||
elif len(matching_models) > 1:
|
||||
err(f"Ambiguous match found, models: {matching_models}")
|
||||
l.error(f"Ambiguous match found, models: {matching_models}")
|
||||
return True
|
||||
else:
|
||||
err(f"No match found for model: {model_name}")
|
||||
l.error(f"No match found for model: {model_name}")
|
||||
return False
|
||||
|
||||
|
||||
|
@ -416,12 +412,12 @@ def query_gpt4(llmPrompt: List = [], system_msg: str = "", user_msg: str = "", m
|
|||
if hasattr(first_choice, "message") and hasattr(first_choice.message, "content"):
|
||||
return first_choice.message.content
|
||||
else:
|
||||
debug("No content attribute in the first choice's message")
|
||||
debug(f"No content found in message string: {response.choices}")
|
||||
debug("Trying again!")
|
||||
l.debug("No content attribute in the first choice's message")
|
||||
l.debug(f"No content found in message string: {response.choices}")
|
||||
l.debug("Trying again!")
|
||||
query_gpt4(messages, max_tokens)
|
||||
else:
|
||||
debug(f"No content found in message string: {response}")
|
||||
l.debug(f"No content found in message string: {response}")
|
||||
return ""
|
||||
|
||||
def llava(image_base64, prompt):
|
||||
|
@ -431,7 +427,7 @@ def llava(image_base64, prompt):
|
|||
prompt = f"This is a chat between a user and an assistant. The assistant is helping the user to describe an image. {prompt}",
|
||||
images = [image_base64]
|
||||
)
|
||||
debug(response)
|
||||
l.debug(response)
|
||||
return "" if "pass" in response["response"].lower() else response["response"]
|
||||
|
||||
def gpt4v(image_base64, prompt_sys: str, prompt_usr: str, max_tokens: int = 150):
|
||||
|
@ -462,7 +458,7 @@ def gpt4v(image_base64, prompt_sys: str, prompt_usr: str, max_tokens: int = 150)
|
|||
comment_content = first_choice.message.content
|
||||
if "PASS" in comment_content:
|
||||
return ""
|
||||
debug(f"Generated comment: {comment_content}")
|
||||
l.debug(f"Generated comment: {comment_content}")
|
||||
|
||||
response_2 = VISION_LLM.chat.completions.create(
|
||||
model="gpt-4-vision-preview",
|
||||
|
@ -500,15 +496,15 @@ def gpt4v(image_base64, prompt_sys: str, prompt_usr: str, max_tokens: int = 150)
|
|||
first_choice = response_2.choices[0]
|
||||
if first_choice.message and first_choice.message.content:
|
||||
final_content = first_choice.message.content
|
||||
debug(f"Generated comment: {final_content}")
|
||||
l.debug(f"Generated comment: {final_content}")
|
||||
if "PASS" in final_content:
|
||||
return ""
|
||||
else:
|
||||
return final_content
|
||||
|
||||
|
||||
debug("Vision response did not contain expected data.")
|
||||
debug(f"Vision response: {response_1}")
|
||||
l.debug("Vision response did not contain expected data.")
|
||||
l.debug(f"Vision response: {response_1}")
|
||||
asyncio.sleep(15)
|
||||
|
||||
try_again = gpt4v(image_base64, prompt_sys, prompt_usr, max_tokens)
|
||||
|
@ -566,7 +562,7 @@ async def summarize_tts_endpoint(
|
|||
)
|
||||
|
||||
except Exception as e:
|
||||
err(f"Error in summarize_tts_endpoint: {str(e)}")
|
||||
l.error(f"Error in summarize_tts_endpoint: {str(e)}")
|
||||
return JSONResponse(
|
||||
status_code=400,
|
||||
content={"error": str(e)}
|
||||
|
@ -593,7 +589,7 @@ async def summarize_tts(
|
|||
bg_tasks = BackgroundTasks()
|
||||
model = await tts.get_model(voice)
|
||||
final_output_path = await tts.generate_speech(bg_tasks, summarized_text, voice, model=model, speed=speed, podcast=podcast, title=filename)
|
||||
debug(f"summary_tts completed with final_output_path: {final_output_path}")
|
||||
l.debug(f"summary_tts completed with final_output_path: {final_output_path}")
|
||||
return final_output_path
|
||||
|
||||
|
||||
|
@ -609,10 +605,10 @@ def split_text_into_chunks(text: str) -> List[str]:
|
|||
sentences = re.split(r'(?<=[.!?])\s+', text)
|
||||
words = text.split()
|
||||
total_words = len(words)
|
||||
debug(f"Total words: {total_words}. SUMMARY_CHUNK_SIZE: {SUMMARY_CHUNK_SIZE}. SUMMARY_TPW: {SUMMARY_TPW}.")
|
||||
l.debug(f"Total words: {total_words}. SUMMARY_CHUNK_SIZE: {SUMMARY_CHUNK_SIZE}. SUMMARY_TPW: {SUMMARY_TPW}.")
|
||||
|
||||
max_words_per_chunk = int(SUMMARY_CHUNK_SIZE / SUMMARY_TPW)
|
||||
debug(f"Maximum words per chunk: {max_words_per_chunk}")
|
||||
l.debug(f"Maximum words per chunk: {max_words_per_chunk}")
|
||||
|
||||
chunks = []
|
||||
current_chunk = []
|
||||
|
@ -632,7 +628,7 @@ def split_text_into_chunks(text: str) -> List[str]:
|
|||
if current_chunk:
|
||||
chunks.append(' '.join(current_chunk))
|
||||
|
||||
debug(f"Split text into {len(chunks)} chunks.")
|
||||
l.debug(f"Split text into {len(chunks)} chunks.")
|
||||
return chunks
|
||||
|
||||
|
||||
|
@ -644,7 +640,7 @@ def calculate_max_tokens(text: str) -> int:
|
|||
|
||||
|
||||
async def extract_text(file: Union[UploadFile, bytes, bytearray, str, Path], bg_tasks: BackgroundTasks = None) -> str:
|
||||
info(f"Attempting to extract text from file: {file}")
|
||||
l.info(f"Attempting to extract text from file: {file}")
|
||||
|
||||
try:
|
||||
if isinstance(file, UploadFile):
|
||||
|
@ -667,7 +663,7 @@ async def extract_text(file: Union[UploadFile, bytes, bytearray, str, Path], bg_
|
|||
|
||||
_, file_ext = os.path.splitext(file_path)
|
||||
file_ext = file_ext.lower()
|
||||
info(f"File extension: {file_ext}")
|
||||
l.info(f"File extension: {file_ext}")
|
||||
|
||||
if file_ext == '.pdf':
|
||||
text_content = await extract_text_from_pdf(file_path)
|
||||
|
@ -694,7 +690,7 @@ async def extract_text(file: Union[UploadFile, bytes, bytearray, str, Path], bg_
|
|||
return text_content
|
||||
|
||||
except Exception as e:
|
||||
err(f"Error extracting text: {str(e)}")
|
||||
l.error(f"Error extracting text: {str(e)}")
|
||||
raise ValueError(f"Error extracting text: {str(e)}")
|
||||
|
||||
|
||||
|
@ -703,17 +699,17 @@ async def summarize_text(text: str, instruction: str = SUMMARY_INSTRUCT, length_
|
|||
|
||||
chunked_text = split_text_into_chunks(text)
|
||||
total_parts = len(chunked_text)
|
||||
debug(f"Total parts: {total_parts}. Length of chunked text: {len(chunked_text)}")
|
||||
l.debug(f"Total parts: {total_parts}. Length of chunked text: {len(chunked_text)}")
|
||||
|
||||
total_words_count = sum(len(chunk.split()) for chunk in chunked_text)
|
||||
debug(f"Total words count: {total_words_count}")
|
||||
l.debug(f"Total words count: {total_words_count}")
|
||||
total_tokens_count = max(1, int(total_words_count * SUMMARY_TPW))
|
||||
debug(f"Total tokens count: {total_tokens_count}")
|
||||
l.debug(f"Total tokens count: {total_tokens_count}")
|
||||
|
||||
total_summary_length = length_override if length_override else total_tokens_count // length_quotient
|
||||
debug(f"Total summary length: {total_summary_length}")
|
||||
l.debug(f"Total summary length: {total_summary_length}")
|
||||
corrected_total_summary_length = min(total_summary_length, SUMMARY_TOKEN_LIMIT)
|
||||
debug(f"Corrected total summary length: {corrected_total_summary_length}")
|
||||
l.debug(f"Corrected total summary length: {corrected_total_summary_length}")
|
||||
|
||||
summaries = await asyncio.gather(*[
|
||||
process_chunk(instruction, chunk, i+1, total_parts, LLM=LLM)
|
||||
|
@ -724,21 +720,21 @@ async def summarize_text(text: str, instruction: str = SUMMARY_INSTRUCT, length_
|
|||
summaries = [f"\n\n\nPART {i+1} of {total_parts}:\n\n{summary}" for i, summary in enumerate(summaries)]
|
||||
|
||||
concatenated_summary = ' '.join(summaries)
|
||||
debug(f"Concatenated summary: {concatenated_summary}")
|
||||
debug(f"Concatenated summary length: {len(concatenated_summary.split())}")
|
||||
l.debug(f"Concatenated summary: {concatenated_summary}")
|
||||
l.debug(f"Concatenated summary length: {len(concatenated_summary.split())}")
|
||||
|
||||
if total_parts > 1:
|
||||
debug(f"Processing the concatenated_summary to smooth the edges...")
|
||||
l.debug(f"Processing the concatenated_summary to smooth the edges...")
|
||||
concatenated_instruct = f"The following text consists of the concatenated {total_parts} summaries of {total_parts} parts of a single document that had to be split for processing. Reword it for clarity and flow as a single cohesive summary, understanding that it all relates to a single document, but that document likely consists of multiple parts potentially from multiple authors. Do not shorten it and do not omit content, simply smooth out the edges between the parts."
|
||||
final_summary = await process_chunk(concatenated_instruct, concatenated_summary, 1, 1, length_ratio=1, LLM=LLM)
|
||||
debug(f"Final summary length: {len(final_summary.split())}")
|
||||
l.debug(f"Final summary length: {len(final_summary.split())}")
|
||||
return final_summary
|
||||
else:
|
||||
return concatenated_summary
|
||||
|
||||
|
||||
async def process_chunk(instruction: str, text: str, part: int, total_parts: int, length_ratio: float = None, LLM: Ollama = None) -> str:
|
||||
# debug(f"Processing chunk: {text}")
|
||||
# l.debug(f"Processing chunk: {text}")
|
||||
LLM = LLM if LLM else Ollama()
|
||||
|
||||
words_count = len(text.split())
|
||||
|
@ -748,14 +744,14 @@ async def process_chunk(instruction: str, text: str, part: int, total_parts: int
|
|||
max_tokens = min(tokens_count // summary_length_ratio, SUMMARY_CHUNK_SIZE)
|
||||
max_tokens = max(max_tokens, SUMMARY_MIN_LENGTH)
|
||||
|
||||
debug(f"Processing part {part} of {total_parts}: Words: {words_count}, Estimated tokens: {tokens_count}, Max output tokens: {max_tokens}")
|
||||
l.debug(f"Processing part {part} of {total_parts}: Words: {words_count}, Estimated tokens: {tokens_count}, Max output tokens: {max_tokens}")
|
||||
|
||||
if part and total_parts > 1:
|
||||
prompt = f"{instruction}. Part {part} of {total_parts}:\n{text}"
|
||||
else:
|
||||
prompt = f"{instruction}:\n\n{text}"
|
||||
|
||||
info(f"Starting LLM.generate for part {part} of {total_parts}")
|
||||
l.info(f"Starting LLM.generate for part {part} of {total_parts}")
|
||||
response = await LLM.generate(
|
||||
model=SUMMARY_MODEL,
|
||||
prompt=prompt,
|
||||
|
@ -764,8 +760,8 @@ async def process_chunk(instruction: str, text: str, part: int, total_parts: int
|
|||
)
|
||||
|
||||
text_response = response['response']
|
||||
info(f"Completed LLM.generate for part {part} of {total_parts}")
|
||||
debug(f"Result: {text_response}")
|
||||
l.info(f"Completed LLM.generate for part {part} of {total_parts}")
|
||||
l.debug(f"Result: {text_response}")
|
||||
return text_response
|
||||
|
||||
async def title_and_summary(extracted_text: str):
|
||||
|
|
|
@ -18,17 +18,13 @@ from markdownify import markdownify as md
|
|||
from better_profanity import profanity
|
||||
from fastapi import APIRouter, BackgroundTasks, UploadFile, Form, HTTPException, Query, Path as FastAPIPath
|
||||
from pathlib import Path
|
||||
from sijapi import L, Archivist, News, Tts, OBSIDIAN_VAULT_DIR, OBSIDIAN_RESOURCES_DIR
|
||||
from sijapi import Archivist, News, Tts, OBSIDIAN_VAULT_DIR, OBSIDIAN_RESOURCES_DIR
|
||||
from sijapi.utilities import html_to_markdown, download_file, sanitize_filename, assemble_journal_path, assemble_archive_path, contains_profanity, is_ad_or_tracker
|
||||
from sijapi.routers import gis, llm, tts, note
|
||||
from sijapi.logs import get_logger
|
||||
l = get_logger(__name__)
|
||||
|
||||
news = APIRouter()
|
||||
logger = L.get_module_logger("news")
|
||||
def debug(text: str): logger.debug(text)
|
||||
def info(text: str): logger.info(text)
|
||||
def warn(text: str): logger.warning(text)
|
||||
def err(text: str): logger.error(text)
|
||||
def crit(text: str): logger.critical(text)
|
||||
|
||||
|
||||
@news.post("/clip")
|
||||
|
@ -87,7 +83,7 @@ async def handle_tts(bg_tasks: BackgroundTasks, article: Article, title: str, tt
|
|||
return f"![[{Path(audio_path).name}]]"
|
||||
|
||||
except HTTPException as e:
|
||||
err(f"Failed to generate TTS: {str(e)}")
|
||||
l.error(f"Failed to generate TTS: {str(e)}")
|
||||
return None
|
||||
|
||||
|
||||
|
@ -99,7 +95,7 @@ def get_banner_markdown(image_url: str) -> str:
|
|||
banner_image = download_file(image_url, Path(OBSIDIAN_VAULT_DIR) / OBSIDIAN_RESOURCES_DIR)
|
||||
return f"![[{OBSIDIAN_RESOURCES_DIR}/{banner_image}]]" if banner_image else ''
|
||||
except Exception as e:
|
||||
err(f"Failed to download banner image: {str(e)}")
|
||||
l.error(f"Failed to download banner image: {str(e)}")
|
||||
return ''
|
||||
|
||||
|
||||
|
@ -109,7 +105,7 @@ async def save_markdown_file(filename: str, content: str):
|
|||
|
||||
|
||||
async def process_news_site(site, bg_tasks: BackgroundTasks):
|
||||
info(f"Downloading articles from {site.name}...")
|
||||
l.info(f"Downloading articles from {site.name}...")
|
||||
|
||||
earliest_date = dt_datetime.now().date() - timedelta(days=site.days_back)
|
||||
|
||||
|
@ -131,9 +127,9 @@ async def process_news_site(site, bg_tasks: BackgroundTasks):
|
|||
results = await asyncio.gather(*tasks)
|
||||
articles_downloaded = sum(results)
|
||||
|
||||
info(f"Downloaded {articles_downloaded} articles from {site.name}")
|
||||
l.info(f"Downloaded {articles_downloaded} articles from {site.name}")
|
||||
except Exception as e:
|
||||
err(f"Error processing {site.name}: {str(e)}")
|
||||
l.error(f"Error processing {site.name}: {str(e)}")
|
||||
|
||||
|
||||
async def download_and_save_article(article, site_name, earliest_date, bg_tasks: BackgroundTasks, tts_mode: str = "off", voice: str = Tts.elevenlabs.default):
|
||||
|
@ -147,7 +143,7 @@ async def download_and_save_article(article, site_name, earliest_date, bg_tasks:
|
|||
return await process_and_save_article(bg_tasks, url, None, tts_mode, voice, site_name=site_name)
|
||||
|
||||
except Exception as e:
|
||||
err(f"Error processing article from {article.url}: {str(e)}")
|
||||
l.error(f"Error processing article from {article.url}: {str(e)}")
|
||||
return False
|
||||
|
||||
|
||||
|
@ -186,16 +182,16 @@ async def process_and_save_article(
|
|||
return f"Successfully saved: {relative_path}"
|
||||
|
||||
except Exception as e:
|
||||
err(f"Failed to handle final markdown content preparation and/or saving to daily note; {e}")
|
||||
l.error(f"Failed to handle final markdown content preparation and/or saving to daily note; {e}")
|
||||
|
||||
except Exception as e:
|
||||
err(f"Failed to handle TTS: {e}")
|
||||
l.error(f"Failed to handle TTS: {e}")
|
||||
|
||||
except Exception as e:
|
||||
err(f"Failed to generate title, file paths, and summary: {e}")
|
||||
l.error(f"Failed to generate title, file paths, and summary: {e}")
|
||||
|
||||
except Exception as e:
|
||||
err(f"Failed to fetch and parse article {url}: {str(e)}")
|
||||
l.error(f"Failed to fetch and parse article {url}: {str(e)}")
|
||||
raise HTTPException(status_code=500, detail=str(e))
|
||||
|
||||
|
||||
|
|
|
@ -17,28 +17,25 @@ from dateutil.parser import parse as dateutil_parse
|
|||
from fastapi import HTTPException, status
|
||||
from pathlib import Path
|
||||
from fastapi import APIRouter, Query, HTTPException
|
||||
from sijapi import API, L, OBSIDIAN_VAULT_DIR, OBSIDIAN_RESOURCES_DIR, OBSIDIAN_BANNER_SCENE, GEO
|
||||
from sijapi import Sys, OBSIDIAN_VAULT_DIR, OBSIDIAN_RESOURCES_DIR, OBSIDIAN_BANNER_SCENE, GEO
|
||||
from sijapi.routers import asr, cal, gis, img, llm, serve, timing, tts, weather
|
||||
from sijapi.utilities import assemble_journal_path, convert_to_12_hour_format, sanitize_filename, convert_degrees_to_cardinal, check_file_name, HOURLY_COLUMNS_MAPPING
|
||||
from sijapi.classes import Location
|
||||
from sijapi.logs import get_logger
|
||||
|
||||
l = get_logger(__name__)
|
||||
|
||||
note = APIRouter()
|
||||
logger = L.get_module_logger("note")
|
||||
def debug(text: str): logger.debug(text)
|
||||
def info(text: str): logger.info(text)
|
||||
def warn(text: str): logger.warning(text)
|
||||
def err(text: str): logger.error(text)
|
||||
def crit(text: str): logger.critical(text)
|
||||
|
||||
@note.post("/note/add")
|
||||
async def note_add_endpoint(file: Optional[UploadFile] = File(None), text: Optional[str] = Form(None), source: Optional[str] = Form(None), bg_tasks: BackgroundTasks = None):
|
||||
debug(f"Received request on /note/add...")
|
||||
l.debug(f"Received request on /note/add...")
|
||||
if not file and not text:
|
||||
warn(f"... without any file or text!")
|
||||
l.warning(f"... without any file or text!")
|
||||
raise HTTPException(status_code=400, detail="Either text or a file must be provided")
|
||||
else:
|
||||
result = await process_for_daily_note(file, text, source, bg_tasks)
|
||||
info(f"Result on /note/add: {result}")
|
||||
l.info(f"Result on /note/add: {result}")
|
||||
return JSONResponse({"message": "Note added successfully", "entry": result}, status_code=201)
|
||||
|
||||
|
||||
|
@ -47,7 +44,7 @@ async def process_for_daily_note(file: Optional[UploadFile] = File(None), text:
|
|||
transcription_entry = ""
|
||||
file_entry = ""
|
||||
if file:
|
||||
debug("File received...")
|
||||
l.debug("File received...")
|
||||
file_content = await file.read()
|
||||
audio_io = BytesIO(file_content)
|
||||
|
||||
|
@ -55,18 +52,18 @@ async def process_for_daily_note(file: Optional[UploadFile] = File(None), text:
|
|||
guessed_type = mimetypes.guess_type(file.filename)
|
||||
file_type = guessed_type[0] if guessed_type[0] else "application/octet-stream"
|
||||
|
||||
debug(f"Processing as {file_type}...")
|
||||
l.debug(f"Processing as {file_type}...")
|
||||
|
||||
# Extract the main type (e.g., 'audio', 'image', 'video')
|
||||
main_type = file_type.split('/')[0]
|
||||
subdir = main_type.title() if main_type else "Documents"
|
||||
|
||||
absolute_path, relative_path = assemble_journal_path(now, subdir=subdir, filename=file.filename)
|
||||
debug(f"Destination path: {absolute_path}")
|
||||
l.debug(f"Destination path: {absolute_path}")
|
||||
|
||||
with open(absolute_path, 'wb') as f:
|
||||
f.write(file_content)
|
||||
debug(f"Processing {f.name}...")
|
||||
l.debug(f"Processing {f.name}...")
|
||||
|
||||
if main_type == 'audio':
|
||||
transcription = await asr.transcribe_audio(file_path=absolute_path, params=asr.TranscribeParams(model="small-en", language="en", threads=6))
|
||||
|
@ -77,7 +74,7 @@ async def process_for_daily_note(file: Optional[UploadFile] = File(None), text:
|
|||
file_entry = f"[Source]({relative_path})"
|
||||
|
||||
text_entry = text if text else ""
|
||||
debug(f"transcription: {transcription_entry}\nfile_entry: {file_entry}\ntext_entry: {text_entry}")
|
||||
l.debug(f"transcription: {transcription_entry}\nfile_entry: {file_entry}\ntext_entry: {text_entry}")
|
||||
return await add_to_daily_note(transcription_entry, file_entry, text_entry, now)
|
||||
|
||||
|
||||
|
@ -169,7 +166,7 @@ added: {timestamp}
|
|||
obsidian_link = f"![[{OBSIDIAN_RESOURCES_DIR}/{audio_filename}{audio_ext}]]"
|
||||
body += f"{obsidian_link}\n\n"
|
||||
except Exception as e:
|
||||
err(f"Failed in the TTS portion of clipping: {e}")
|
||||
l.error(f"Failed in the TTS portion of clipping: {e}")
|
||||
|
||||
body += f"> [!summary]+\n"
|
||||
body += f"> {summary}\n\n"
|
||||
|
@ -182,12 +179,12 @@ added: {timestamp}
|
|||
with open(markdown_filename, 'w', encoding=encoding) as md_file:
|
||||
md_file.write(markdown_content)
|
||||
|
||||
info(f"Successfully saved to {markdown_filename}")
|
||||
l.info(f"Successfully saved to {markdown_filename}")
|
||||
|
||||
return markdown_filename
|
||||
|
||||
except Exception as e:
|
||||
err(f"Failed to clip: {str(e)}")
|
||||
l.error(f"Failed to clip: {str(e)}")
|
||||
raise HTTPException(status_code=500, detail=str(e))
|
||||
|
||||
|
||||
|
@ -199,7 +196,7 @@ def list_and_correct_impermissible_files(root_dir, rename: bool = False):
|
|||
if check_file_name(filename):
|
||||
file_path = Path(dirpath) / filename
|
||||
impermissible_files.append(file_path)
|
||||
debug(f"Impermissible file found: {file_path}")
|
||||
l.debug(f"Impermissible file found: {file_path}")
|
||||
|
||||
# Sanitize the file name
|
||||
new_filename = sanitize_filename(filename)
|
||||
|
@ -217,7 +214,7 @@ def list_and_correct_impermissible_files(root_dir, rename: bool = False):
|
|||
# Rename the file
|
||||
if rename:
|
||||
os.rename(file_path, new_file_path)
|
||||
debug(f"Renamed: {file_path} -> {new_file_path}")
|
||||
l.debug(f"Renamed: {file_path} -> {new_file_path}")
|
||||
|
||||
return impermissible_files
|
||||
|
||||
|
@ -256,18 +253,18 @@ async def build_daily_note_getpoint():
|
|||
path = await build_daily_note(date_time, loc.latitude, loc.longitude)
|
||||
path_str = str(path)
|
||||
|
||||
info(f"Successfully created daily note at {path_str}")
|
||||
l.info(f"Successfully created daily note at {path_str}")
|
||||
return JSONResponse(content={"path": path_str}, status_code=200)
|
||||
|
||||
except ValueError as ve:
|
||||
error_msg = f"Value Error in build_daily_note_getpoint: {str(ve)}"
|
||||
err(error_msg)
|
||||
l.error(error_msg)
|
||||
raise HTTPException(status_code=400, detail=error_msg)
|
||||
|
||||
except Exception as e:
|
||||
error_msg = f"Unexpected error in build_daily_note_getpoint: {str(e)}"
|
||||
err(error_msg)
|
||||
err(f"Traceback: {traceback.format_exc()}")
|
||||
l.error(error_msg)
|
||||
l.error(f"Traceback: {traceback.format_exc()}")
|
||||
raise HTTPException(status_code=500, detail="An unexpected error occurred")
|
||||
|
||||
|
||||
|
@ -287,7 +284,7 @@ async def build_daily_note_endpoint(
|
|||
else:
|
||||
raise ValueError("Location is not provided or invalid.")
|
||||
except (ValueError, AttributeError, TypeError) as e:
|
||||
warn(f"Falling back to localized datetime due to error: {e}")
|
||||
l.warning(f"Falling back to localized datetime due to error: {e}")
|
||||
try:
|
||||
date_time = await gis.dt(date_str)
|
||||
places = await gis.fetch_locations(date_time)
|
||||
|
@ -307,7 +304,7 @@ async def build_daily_note(date_time: dt_datetime, lat: float = None, lon: float
|
|||
Obsidian helper. Takes a datetime and creates a new daily note. Note: it uses the sijapi configuration file to place the daily note and does NOT presently interface with Obsidian's daily note or periodic notes extensions. It is your responsibility to ensure they match.
|
||||
'''
|
||||
absolute_path, _ = assemble_journal_path(date_time)
|
||||
debug(f"Using {date_time.strftime('%Y-%m-%d %H:%M:%S')} as our datetime in build_daily_note.")
|
||||
l.debug(f"Using {date_time.strftime('%Y-%m-%d %H:%M:%S')} as our datetime in build_daily_note.")
|
||||
formatted_day = date_time.strftime("%A %B %d, %Y") # Monday May 27, 2024 formatting
|
||||
day_before = (date_time - timedelta(days=1)).strftime("%Y-%m-%d %A") # 2024-05-26 Sunday formatting
|
||||
day_after = (date_time + timedelta(days=1)).strftime("%Y-%m-%d %A") # 2024-05-28 Tuesday formatting
|
||||
|
@ -396,7 +393,7 @@ async def update_frontmatter_endpoint(date: str, key: str, value: str):
|
|||
async def update_frontmatter(date_time: dt_datetime, key: str, value: str):
|
||||
file_path, relative_path = assemble_journal_path(date_time)
|
||||
if not file_path.exists():
|
||||
crit(f"Markdown file not found at {file_path}")
|
||||
l.critical(f"Markdown file not found at {file_path}")
|
||||
raise HTTPException(status_code=404, detail="Markdown file not found.")
|
||||
|
||||
with open(file_path, "r", encoding="utf-8") as file:
|
||||
|
@ -430,9 +427,9 @@ async def banner_endpoint(dt: str, location: str = None, forecast: str = None, m
|
|||
'''
|
||||
Endpoint (POST) that generates a new banner image for the Obsidian daily note for a specified date, taking into account optional additional information, then updates the frontmatter if necessary.
|
||||
'''
|
||||
debug(f"banner_endpoint requested with date: {dt} ({type(dt)})")
|
||||
l.debug(f"banner_endpoint requested with date: {dt} ({type(dt)})")
|
||||
date_time = await gis.dt(dt)
|
||||
debug(f"date_time after localization: {date_time} ({type(date_time)})")
|
||||
l.debug(f"date_time after localization: {date_time} ({type(date_time)})")
|
||||
context = await generate_context(dt, location, forecast, mood, other_context)
|
||||
jpg_path = await generate_banner(date_time, location, mood=mood, other_context=other_context)
|
||||
return jpg_path
|
||||
|
@ -449,10 +446,10 @@ async def generate_banner(dt, location: Location = None, forecast: str = None, m
|
|||
forecast = await update_dn_weather(date_time, False, location.latitude, location.longitude)
|
||||
|
||||
prompt = await generate_context(date_time, location, forecast, mood, other_context)
|
||||
debug(f"Prompt: {prompt}")
|
||||
l.debug(f"Prompt: {prompt}")
|
||||
final_path = await img.workflow(prompt, scene=OBSIDIAN_BANNER_SCENE, destination_path=destination_path)
|
||||
if not str(local_path) in str(final_path):
|
||||
info(f"Apparent mismatch between local path, {local_path}, and final_path, {final_path}")
|
||||
l.info(f"Apparent mismatch between local path, {local_path}, and final_path, {final_path}")
|
||||
jpg_embed = f"\"![[{local_path}]]\""
|
||||
await update_frontmatter(date_time, "banner", jpg_embed)
|
||||
return local_path
|
||||
|
@ -481,7 +478,7 @@ async def generate_context(date_time, location: Location, forecast: str, mood: s
|
|||
if geocoded_location.display_name or geocoded_location.city or geocoded_location.country:
|
||||
return await generate_context(date_time, geocoded_location, forecast, mood, other_context)
|
||||
else:
|
||||
warn(f"Failed to get a useable location for purposes of generating a banner, but we'll generate one anyway.")
|
||||
l.warning(f"Failed to get a useable location for purposes of generating a banner, but we'll generate one anyway.")
|
||||
elif location and isinstance(location, str):
|
||||
display_name = f"Location: {location}\n"
|
||||
else:
|
||||
|
@ -549,8 +546,8 @@ async def note_weather_get(
|
|||
force_refresh_weather = refresh == "True"
|
||||
try:
|
||||
date_time = dt_datetime.now() if date == "0" else await gis.dt(date)
|
||||
debug(f"Using {date_time.strftime('%Y-%m-%d %H:%M:%S')} as our dt_datetime in note_weather_get.")
|
||||
debug(f"date: {date} .. date_time: {date_time}")
|
||||
l.debug(f"Using {date_time.strftime('%Y-%m-%d %H:%M:%S')} as our dt_datetime in note_weather_get.")
|
||||
l.debug(f"date: {date} .. date_time: {date_time}")
|
||||
content = await update_dn_weather(date_time, force_refresh_weather) #, lat, lon)
|
||||
return JSONResponse(content={"forecast": content}, status_code=200)
|
||||
|
||||
|
@ -558,68 +555,68 @@ async def note_weather_get(
|
|||
return JSONResponse(content={"detail": str(e.detail)}, status_code=e.status_code)
|
||||
|
||||
except Exception as e:
|
||||
err(f"Error in note_weather_get: {str(e)}")
|
||||
l.error(f"Error in note_weather_get: {str(e)}")
|
||||
raise HTTPException(status_code=500, detail=f"An error occurred: {str(e)}")
|
||||
|
||||
|
||||
@note.post("/update/note/{date}")
|
||||
async def post_update_daily_weather_and_calendar_and_timeslips(date: str, refresh: str="False") -> PlainTextResponse:
|
||||
date_time = await gis.dt(date)
|
||||
debug(f"Using {date_time.strftime('%Y-%m-%d %H:%M:%S')} as our dt_datetime in post_update_daily_weather_and_calendar_and_timeslips.")
|
||||
l.debug(f"Using {date_time.strftime('%Y-%m-%d %H:%M:%S')} as our dt_datetime in post_update_daily_weather_and_calendar_and_timeslips.")
|
||||
force_refresh_weather = refresh == "True"
|
||||
await update_dn_weather(date_time, force_refresh_weather)
|
||||
await update_daily_note_events(date_time)
|
||||
await build_daily_timeslips(date_time)
|
||||
return f"[Refresh]({API.URL}/update/note/{date_time.strftime('%Y-%m-%d')}"
|
||||
return f"[Refresh]({Sys.URL}/update/note/{date_time.strftime('%Y-%m-%d')}"
|
||||
|
||||
|
||||
async def update_dn_weather(date_time: dt_datetime, force_refresh: bool = False, lat: float = None, lon: float = None):
|
||||
debug(f"Using {date_time.strftime('%Y-%m-%d %H:%M:%S')} as our datetime in update_dn_weather.")
|
||||
l.debug(f"Using {date_time.strftime('%Y-%m-%d %H:%M:%S')} as our datetime in update_dn_weather.")
|
||||
try:
|
||||
if lat and lon:
|
||||
place = await GEO.code((lat, lon))
|
||||
|
||||
else:
|
||||
debug(f"Updating weather for {date_time}")
|
||||
l.debug(f"Updating weather for {date_time}")
|
||||
places = await gis.fetch_locations(date_time)
|
||||
place = places[0]
|
||||
lat = place.latitude
|
||||
lon = place.longitude
|
||||
|
||||
debug(f"lat: {lat}, lon: {lon}, place: {place}")
|
||||
l.debug(f"lat: {lat}, lon: {lon}, place: {place}")
|
||||
city = GEO.find_override_location(lat, lon)
|
||||
if city:
|
||||
info(f"Using override location: {city}")
|
||||
l.info(f"Using override location: {city}")
|
||||
|
||||
else:
|
||||
if place.city and place.city != "":
|
||||
city = place.city
|
||||
info(f"City in data: {city}")
|
||||
l.info(f"City in data: {city}")
|
||||
|
||||
else:
|
||||
location = await GEO.code((lat, lon))
|
||||
debug(f"location: {location}")
|
||||
l.debug(f"location: {location}")
|
||||
city = location.name
|
||||
city = city if city else location.city
|
||||
city = city if city else location.house_number + ' ' + location.road
|
||||
|
||||
debug(f"City geocoded: {city}")
|
||||
l.debug(f"City geocoded: {city}")
|
||||
|
||||
# Assemble journal path
|
||||
absolute_path, relative_path = assemble_journal_path(date_time, filename="Weather", extension=".md", no_timestamp = True)
|
||||
debug(f"Journal path: absolute_path={absolute_path}, relative_path={relative_path}")
|
||||
l.debug(f"Journal path: absolute_path={absolute_path}, relative_path={relative_path}")
|
||||
|
||||
try:
|
||||
debug(f"passing date_time {date_time.strftime('%Y-%m-%d %H:%M:%S')}, {lat}/{lon} into get_weather")
|
||||
l.debug(f"passing date_time {date_time.strftime('%Y-%m-%d %H:%M:%S')}, {lat}/{lon} into get_weather")
|
||||
day = await weather.get_weather(date_time, lat, lon, force_refresh)
|
||||
debug(f"day information obtained from get_weather: {day}")
|
||||
l.debug(f"day information obtained from get_weather: {day}")
|
||||
if day:
|
||||
DailyWeather = day.get('DailyWeather')
|
||||
HourlyWeather = day.get('HourlyWeather')
|
||||
if DailyWeather:
|
||||
# debug(f"Day: {DailyWeather}")
|
||||
# l.debug(f"Day: {DailyWeather}")
|
||||
icon = DailyWeather.get('icon')
|
||||
debug(f"Icon: {icon}")
|
||||
l.debug(f"Icon: {icon}")
|
||||
|
||||
weather_icon, admonition = get_icon_and_admonition(icon) if icon else (":LiSunMoon:", "ad-weather")
|
||||
|
||||
|
@ -688,38 +685,38 @@ async def update_dn_weather(date_time: dt_datetime, force_refresh: bool = False,
|
|||
detailed_forecast += assemble_hourly_data_table(times, condition_symbols, temps, winds)
|
||||
detailed_forecast += f"```\n\n"
|
||||
|
||||
debug(f"Detailed forecast: {detailed_forecast}.")
|
||||
l.debug(f"Detailed forecast: {detailed_forecast}.")
|
||||
|
||||
with open(absolute_path, 'w', encoding='utf-8') as note_file:
|
||||
note_file.write(detailed_forecast)
|
||||
|
||||
debug(f"Operation complete.")
|
||||
l.debug(f"Operation complete.")
|
||||
|
||||
return narrative
|
||||
else:
|
||||
err(f"Failed to get DailyWeather from day: {day}")
|
||||
l.error(f"Failed to get DailyWeather from day: {day}")
|
||||
else:
|
||||
err(f"Failed to get day")
|
||||
l.error(f"Failed to get day")
|
||||
raise HTTPException(status_code=500, detail="Failed to retrieve weather data")
|
||||
|
||||
except HTTPException as e:
|
||||
err(f"HTTP error: {e}")
|
||||
err(traceback.format_exc())
|
||||
l.error(f"HTTP error: {e}")
|
||||
l.error(traceback.format_exc())
|
||||
raise e
|
||||
|
||||
except Exception as e:
|
||||
err(f"Error: {e}")
|
||||
err(traceback.format_exc())
|
||||
l.error(f"Error: {e}")
|
||||
l.error(traceback.format_exc())
|
||||
raise HTTPException(status_code=999, detail=f"Error: {e}")
|
||||
|
||||
except ValueError as ve:
|
||||
err(f"Value error in update_dn_weather: {str(ve)}")
|
||||
err(traceback.format_exc())
|
||||
l.error(f"Value error in update_dn_weather: {str(ve)}")
|
||||
l.error(traceback.format_exc())
|
||||
raise HTTPException(status_code=400, detail=f"Value error: {str(ve)}")
|
||||
|
||||
except Exception as e:
|
||||
err(f"Error in update_dn_weather: {str(e)}")
|
||||
err(traceback.format_exc())
|
||||
l.error(f"Error in update_dn_weather: {str(e)}")
|
||||
l.error(traceback.format_exc())
|
||||
raise HTTPException(status_code=500, detail=f"Error in update_dn_weather: {str(e)}")
|
||||
|
||||
|
||||
|
@ -728,8 +725,8 @@ def format_hourly_time(hour):
|
|||
hour_12 = convert_to_12_hour_format(hour.get("datetime"))
|
||||
return hour_12
|
||||
except Exception as e:
|
||||
err(f"Error in format_hourly_time: {str(e)}")
|
||||
err(traceback.format_exc())
|
||||
l.error(f"Error in format_hourly_time: {str(e)}")
|
||||
l.error(traceback.format_exc())
|
||||
return ""
|
||||
|
||||
|
||||
|
@ -740,7 +737,7 @@ def format_hourly_icon(hour, sunrise, sunset):
|
|||
|
||||
precip = hour.get('precip', float(0.0))
|
||||
precip_prob = hour.get('precipprob', float(0.0))
|
||||
debug(f"precip: {precip}, prob: {precip_prob}")
|
||||
l.debug(f"precip: {precip}, prob: {precip_prob}")
|
||||
|
||||
sp_str = None
|
||||
|
||||
|
@ -764,8 +761,8 @@ def format_hourly_icon(hour, sunrise, sunset):
|
|||
return formatted
|
||||
|
||||
except Exception as e:
|
||||
err(f"Error in format_hourly_special: {str(e)}")
|
||||
err(traceback.format_exc())
|
||||
l.error(f"Error in format_hourly_special: {str(e)}")
|
||||
l.error(traceback.format_exc())
|
||||
return ""
|
||||
|
||||
|
||||
|
@ -774,8 +771,8 @@ def format_hourly_temperature(hour):
|
|||
temp_str = f"{hour.get('temp', '')}˚ F"
|
||||
return temp_str
|
||||
except Exception as e:
|
||||
err(f"Error in format_hourly_temperature: {str(e)}")
|
||||
err(traceback.format_exc())
|
||||
l.error(f"Error in format_hourly_temperature: {str(e)}")
|
||||
l.error(traceback.format_exc())
|
||||
return ""
|
||||
|
||||
|
||||
|
@ -786,8 +783,8 @@ def format_hourly_wind(hour):
|
|||
wind_str = f"{str(windspeed)}:LiWind: {winddir}"
|
||||
return wind_str
|
||||
except Exception as e:
|
||||
err(f"Error in format_hourly_wind: {str(e)}")
|
||||
err(traceback.format_exc())
|
||||
l.error(f"Error in format_hourly_wind: {str(e)}")
|
||||
l.error(traceback.format_exc())
|
||||
return ""
|
||||
|
||||
def assemble_hourly_data_table(times, condition_symbols, temps, winds):
|
||||
|
@ -800,7 +797,7 @@ def assemble_hourly_data_table(times, condition_symbols, temps, winds):
|
|||
|
||||
|
||||
def get_icon_and_admonition(icon_str) -> Tuple:
|
||||
debug(f"Received request for emoji {icon_str}")
|
||||
l.debug(f"Received request for emoji {icon_str}")
|
||||
if icon_str.startswith(":") and icon_str.endswith(":"):
|
||||
return icon_str
|
||||
|
||||
|
@ -891,7 +888,7 @@ async def format_events_as_markdown(event_data: Dict[str, Union[str, List[Dict[s
|
|||
total_events = len(event_data["events"])
|
||||
event_markdown = f"```ad-events"
|
||||
for event in event_data["events"]:
|
||||
debug(f"event busy status: {event['busy']}; all day status: {event['all_day']}")
|
||||
l.debug(f"event busy status: {event['busy']}; all day status: {event['all_day']}")
|
||||
if not event['name'].startswith('TC '):
|
||||
url = f"hook://ical/eventID={event['uid']}calendarID=17"
|
||||
if event['url']:
|
||||
|
@ -960,18 +957,18 @@ async def note_events_endpoint(date: str = Query(None)):
|
|||
|
||||
|
||||
async def update_daily_note_events(date_time: dt_datetime):
|
||||
debug(f"Looking up events on date: {date_time.strftime('%Y-%m-%d')}")
|
||||
l.debug(f"Looking up events on date: {date_time.strftime('%Y-%m-%d')}")
|
||||
try:
|
||||
events = await cal.get_events(date_time, date_time)
|
||||
debug(f"Raw events: {events}")
|
||||
l.debug(f"Raw events: {events}")
|
||||
event_data = {
|
||||
"date": date_time.strftime('%Y-%m-%d'),
|
||||
"events": events
|
||||
}
|
||||
events_markdown = await format_events_as_markdown(event_data)
|
||||
debug(f"Markdown events: {events_markdown}")
|
||||
l.debug(f"Markdown events: {events_markdown}")
|
||||
absolute_path, _ = assemble_journal_path(date_time, filename="Events", extension=".md", no_timestamp = True)
|
||||
debug(f"Writing events to file: {absolute_path}")
|
||||
l.debug(f"Writing events to file: {absolute_path}")
|
||||
|
||||
with open(absolute_path, 'w', encoding='utf-8') as note_file:
|
||||
note_file.write(events_markdown)
|
||||
|
@ -979,5 +976,5 @@ async def update_daily_note_events(date_time: dt_datetime):
|
|||
return events_markdown
|
||||
|
||||
except Exception as e:
|
||||
err(f"Error processing events: {e}")
|
||||
l.error(f"Error processing events: {e}")
|
||||
raise HTTPException(status_code=500, detail=str(e))
|
||||
|
|
|
@ -5,15 +5,11 @@ NOTES: Haven't yet decided if this should depend on the Obsidian and Chat module
|
|||
#routers/rag.py
|
||||
|
||||
from fastapi import APIRouter
|
||||
from sijapi import L
|
||||
from sijapi.logs import get_logger
|
||||
l = get_logger(__name__)
|
||||
|
||||
rag = APIRouter()
|
||||
logger = L.get_module_logger("rag")
|
||||
def debug(text: str): logger.debug(text)
|
||||
def info(text: str): logger.info(text)
|
||||
def warn(text: str): logger.warning(text)
|
||||
def err(text: str): logger.error(text)
|
||||
def crit(text: str): logger.critical(text)
|
||||
|
||||
|
||||
rag.get("/rag/search")
|
||||
async def rag_search_endpoint(query: str, scope: str):
|
||||
|
|
|
@ -15,14 +15,9 @@ from bs4 import BeautifulSoup
|
|||
from selenium import webdriver
|
||||
from selenium.webdriver.chrome.options import Options
|
||||
from pathlib import Path
|
||||
from sijapi import Scrape, L, Dir
|
||||
|
||||
logger = L.get_module_logger('scrape')
|
||||
def debug(text: str): logger.debug(text)
|
||||
def info(text: str): logger.info(text)
|
||||
def warn(text: str): logger.warning(text)
|
||||
def err(text: str): logger.error(text)
|
||||
def crit(text: str): logger.critical(text)
|
||||
from sijapi import Scrape,Dir
|
||||
from sijapi.logs import get_logger
|
||||
l = get_logger(__name__)
|
||||
|
||||
scrape = APIRouter()
|
||||
|
||||
|
@ -31,24 +26,24 @@ Dir.DATA = Path(Dir.DATA).expanduser()
|
|||
|
||||
def save_to_json(data: List[Dict], output_file: str):
|
||||
output_path = Dir.DATA / output_file
|
||||
info(f"Saving data to {output_path}")
|
||||
l.info(f"Saving data to {output_path}")
|
||||
output_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
with open(output_path, 'w') as f:
|
||||
json.dump(data, f, indent=2)
|
||||
info(f"Data saved successfully to {output_path}")
|
||||
l.info(f"Data saved successfully to {output_path}")
|
||||
|
||||
def load_from_json(output_file: str) -> List[Dict]:
|
||||
output_path = Dir.DATA / output_file
|
||||
info(f"Loading data from {output_path}")
|
||||
l.info(f"Loading data from {output_path}")
|
||||
try:
|
||||
with open(output_path, 'r') as f:
|
||||
return json.load(f)
|
||||
except FileNotFoundError:
|
||||
warn(f"File {output_path} not found")
|
||||
l.warning(f"File {output_path} not found")
|
||||
return []
|
||||
|
||||
async def fetch_content(config: Any) -> str:
|
||||
info(f"Fetching content from {config.url}")
|
||||
l.info(f"Fetching content from {config.url}")
|
||||
if config.content.js_render:
|
||||
return await fetch_with_selenium(config.url)
|
||||
|
||||
|
@ -63,7 +58,7 @@ async def fetch_content(config: Any) -> str:
|
|||
elif config.content.type == 'txt':
|
||||
return await response.text()
|
||||
else:
|
||||
warn(f"Unsupported content type: {config.content.type}")
|
||||
l.warning(f"Unsupported content type: {config.content.type}")
|
||||
return await response.text()
|
||||
|
||||
async def fetch_with_selenium(url: str) -> str:
|
||||
|
@ -92,7 +87,7 @@ async def handle_json(response):
|
|||
return await response.json()
|
||||
|
||||
def apply_processing_step(data: Any, step: Any) -> Any:
|
||||
info(f"Applying processing step: {step.type}")
|
||||
l.info(f"Applying processing step: {step.type}")
|
||||
if step.type == 'regex_split':
|
||||
return re.split(step.pattern, data)[1:]
|
||||
elif step.type == 'keyword_filter':
|
||||
|
@ -101,11 +96,11 @@ def apply_processing_step(data: Any, step: Any) -> Any:
|
|||
if isinstance(data, list):
|
||||
return [apply_regex_extract(item, step.extractions) for item in data]
|
||||
return apply_regex_extract(data, step.extractions)
|
||||
debug(f"Unknown processing step type: {step.type}")
|
||||
l.debug(f"Unknown processing step type: {step.type}")
|
||||
return data
|
||||
|
||||
def apply_regex_extract(text: str, extractions: List[Any]) -> Dict:
|
||||
debug(f"Applying regex extraction on text of length {len(text)}")
|
||||
l.debug(f"Applying regex extraction on text of length {len(text)}")
|
||||
result = {}
|
||||
for extraction in extractions:
|
||||
extraction_dict = extraction.dict() if hasattr(extraction, 'dict') else extraction
|
||||
|
@ -122,11 +117,11 @@ def apply_regex_extract(text: str, extractions: List[Any]) -> Dict:
|
|||
else:
|
||||
result[extraction_dict['name']] = matches[-1].strip() # Take the last match
|
||||
|
||||
debug(f"Extracted {len(result)} items")
|
||||
l.debug(f"Extracted {len(result)} items")
|
||||
return result
|
||||
|
||||
def apply_post_processing(data: List[Dict], post_processing: List[Any]) -> List[Dict]:
|
||||
info("Applying post-processing steps")
|
||||
l.info("Applying post-processing steps")
|
||||
for step in post_processing:
|
||||
if step.type == 'custom':
|
||||
data = globals()[step.function](data)
|
||||
|
@ -137,7 +132,7 @@ def data_has_changed(new_data: List[Dict], old_data: List[Dict]) -> bool:
|
|||
|
||||
@scrape.get("/scrape/{config_name}")
|
||||
async def scrape_site(config_name: str):
|
||||
info(f"Starting scrape operation for {config_name}")
|
||||
l.info(f"Starting scrape operation for {config_name}")
|
||||
|
||||
if not hasattr(Scrape, 'configurations'):
|
||||
# If 'configurations' doesn't exist, assume the entire Scrape object is the configuration
|
||||
|
@ -162,14 +157,14 @@ async def scrape_site(config_name: str):
|
|||
|
||||
if data_has_changed(processed_data, previous_data):
|
||||
save_to_json(processed_data, output_file)
|
||||
info("Scrape completed with updates")
|
||||
l.info("Scrape completed with updates")
|
||||
return {"message": "Site updated", "data": processed_data}
|
||||
else:
|
||||
info("Scrape completed with no updates")
|
||||
l.info("Scrape completed with no updates")
|
||||
return {"message": "No updates", "data": processed_data}
|
||||
|
||||
def apply_post_processing(data: List[Dict], post_processing: List[Any]) -> List[Dict]:
|
||||
info("Applying post-processing steps")
|
||||
l.info("Applying post-processing steps")
|
||||
for step in post_processing:
|
||||
if step.type == 'regex_extract':
|
||||
for entry in data:
|
||||
|
|
|
@ -33,20 +33,15 @@ from selenium.webdriver.common.by import By
|
|||
from selenium.webdriver.support.ui import WebDriverWait
|
||||
from selenium.webdriver.support import expected_conditions as EC
|
||||
from sijapi import (
|
||||
L, API, Serve, LOGS_DIR, TS_ID, CASETABLE_PATH, COURTLISTENER_DOCKETS_URL, COURTLISTENER_API_KEY,
|
||||
Sys, Serve, Db, LOGS_DIR, TS_ID, CASETABLE_PATH, COURTLISTENER_DOCKETS_URL, COURTLISTENER_API_KEY,
|
||||
COURTLISTENER_BASE_URL, COURTLISTENER_DOCKETS_DIR, COURTLISTENER_SEARCH_DIR, ALERTS_DIR,
|
||||
MAC_UN, MAC_PW, MAC_ID, TS_TAILNET, IMG_DIR, PUBLIC_KEY, OBSIDIAN_VAULT_DIR
|
||||
)
|
||||
from sijapi.classes import WidgetUpdate
|
||||
from sijapi.utilities import bool_convert, sanitize_filename, assemble_journal_path
|
||||
from sijapi.routers import gis
|
||||
|
||||
logger = L.get_module_logger("serve")
|
||||
def debug(text: str): logger.debug(text)
|
||||
def info(text: str): logger.info(text)
|
||||
def warn(text: str): logger.warning(text)
|
||||
def err(text: str): logger.err(text)
|
||||
def crit(text: str): logger.critical(text)
|
||||
from sijapi.logs import get_logger
|
||||
l = get_logger(__name__)
|
||||
|
||||
serve = APIRouter()
|
||||
templates = Jinja2Templates(directory=Path(__file__).parent.parent / "sites")
|
||||
|
@ -85,13 +80,13 @@ async def get_file_endpoint(file_path: str):
|
|||
date_time = await gis.dt(file_path);
|
||||
absolute_path, local_path = assemble_journal_path(date_time, no_timestamp = True)
|
||||
except ValueError as e:
|
||||
debug(f"Unable to parse {file_path} as a date, now trying to use it as a local path")
|
||||
l.debug(f"Unable to parse {file_path} as a date, now trying to use it as a local path")
|
||||
absolute_path = OBSIDIAN_VAULT_DIR / file_path
|
||||
if not absolute_path.suffix:
|
||||
absolute_path = Path(absolute_path.with_suffix(".md"))
|
||||
|
||||
if not absolute_path.is_file():
|
||||
warn(f"{absolute_path} is not a valid file it seems.")
|
||||
l.warning(f"{absolute_path} is not a valid file it seems.")
|
||||
elif absolute_path.suffix == '.md':
|
||||
try:
|
||||
with open(absolute_path, 'r', encoding='utf-8') as file:
|
||||
|
@ -125,11 +120,11 @@ async def hook_alert(request: Request):
|
|||
async def notify(alert: str):
|
||||
fail = True
|
||||
try:
|
||||
if API.EXTENSIONS.shellfish:
|
||||
if Sys.EXTENSIONS.shellfish:
|
||||
await notify_shellfish(alert)
|
||||
fail = False
|
||||
|
||||
if API.EXTENSIONS.macnotify:
|
||||
if Sys.EXTENSIONS.macnotify:
|
||||
if TS_ID == MAC_ID:
|
||||
await notify_local(alert)
|
||||
fail = False
|
||||
|
@ -140,10 +135,10 @@ async def notify(alert: str):
|
|||
fail = True
|
||||
|
||||
if fail == False:
|
||||
info(f"Delivered alert: {alert}")
|
||||
l.info(f"Delivered alert: {alert}")
|
||||
return {"message": alert}
|
||||
else:
|
||||
crit(f"Failed to deliver alert: {alert}")
|
||||
l.critical(f"Failed to deliver alert: {alert}")
|
||||
return {"message": f"Failed to deliver alert: {alert}"}
|
||||
|
||||
async def notify_local(message: str):
|
||||
|
@ -165,7 +160,7 @@ async def notify_remote(host: str, message: str, username: str = None, password:
|
|||
ssh.close()
|
||||
|
||||
|
||||
if API.EXTENSIONS.shellfish:
|
||||
if Sys.EXTENSIONS.shellfish:
|
||||
async def notify_shellfish(alert: str):
|
||||
key = "d7e810e7601cd296a05776c169b4fe97a6a5ee1fd46abe38de54f415732b3f4b"
|
||||
user = "WuqPwm1VpGijF4U5AnIKzqNMVWGioANTRjJoonPm"
|
||||
|
@ -250,14 +245,14 @@ if API.EXTENSIONS.shellfish:
|
|||
return result.stdout
|
||||
|
||||
|
||||
if API.EXTENSIONS.courtlistener:
|
||||
if Sys.EXTENSIONS.courtlistener:
|
||||
with open(CASETABLE_PATH, 'r') as file:
|
||||
CASETABLE = json.load(file)
|
||||
|
||||
@serve.post("/cl/search")
|
||||
async def hook_cl_search(request: Request, bg_tasks: BackgroundTasks):
|
||||
client_ip = request.client.host
|
||||
debug(f"Received request from IP: {client_ip}")
|
||||
l.debug(f"Received request from IP: {client_ip}")
|
||||
data = await request.json()
|
||||
payload = data['payload']
|
||||
results = data['payload']['results']
|
||||
|
@ -275,7 +270,7 @@ if API.EXTENSIONS.courtlistener:
|
|||
@serve.post("/cl/docket")
|
||||
async def hook_cl_docket(request: Request):
|
||||
client_ip = request.client.host
|
||||
debug(f"Received request from IP: {client_ip}")
|
||||
l.debug(f"Received request from IP: {client_ip}")
|
||||
data = await request.json()
|
||||
await cl_docket(data, client_ip)
|
||||
|
||||
|
@ -312,14 +307,14 @@ if API.EXTENSIONS.courtlistener:
|
|||
async with aiohttp.ClientSession() as session:
|
||||
async with session.get(url, headers=headers) as response:
|
||||
if response.status == 200:
|
||||
debug(f"Fetching CourtListener docket information for {docket}...")
|
||||
l.debug(f"Fetching CourtListener docket information for {docket}...")
|
||||
data = await response.json()
|
||||
court_docket = data['results'][0]['docket_number_core']
|
||||
court_docket = f"{court_docket[:2]}-cv-{court_docket[2:]}" # Formatting the docket number
|
||||
case_name = data['results'][0]['case_name']
|
||||
debug(f"Obtained from CourtListener: docket {court_docket}, case name {case_name}.")
|
||||
l.debug(f"Obtained from CourtListener: docket {court_docket}, case name {case_name}.")
|
||||
else:
|
||||
debug("Failed to fetch data from CourtListener API.")
|
||||
l.debug("Failed to fetch data from CourtListener API.")
|
||||
court_docket = 'NoCourtDocket'
|
||||
case_name = 'NoCaseName'
|
||||
|
||||
|
@ -329,12 +324,12 @@ if API.EXTENSIONS.courtlistener:
|
|||
|
||||
if filepath_ia:
|
||||
file_url = filepath_ia
|
||||
debug(f"Found IA file at {file_url}.")
|
||||
l.debug(f"Found IA file at {file_url}.")
|
||||
elif filepath_local:
|
||||
file_url = f"{COURTLISTENER_BASE_URL}/{filepath_local}"
|
||||
debug(f"Found local file at {file_url}.")
|
||||
l.debug(f"Found local file at {file_url}.")
|
||||
else:
|
||||
debug(f"No file URL found in filepath_ia or filepath_local for one of the documents.")
|
||||
l.debug(f"No file URL found in filepath_ia or filepath_local for one of the documents.")
|
||||
continue
|
||||
|
||||
document_number = document.get('document_number', 'NoDocumentNumber')
|
||||
|
@ -345,7 +340,7 @@ if API.EXTENSIONS.courtlistener:
|
|||
target_path = Path(COURTLISTENER_DOCKETS_DIR) / case_shortname / "Docket" / file_name
|
||||
target_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
await cl_download_file(file_url, target_path, session)
|
||||
debug(f"Downloaded {file_name} to {target_path}")
|
||||
l.debug(f"Downloaded {file_name} to {target_path}")
|
||||
|
||||
|
||||
def cl_case_details(docket):
|
||||
|
@ -360,18 +355,18 @@ if API.EXTENSIONS.courtlistener:
|
|||
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/93.0.4577.82 Safari/537.36'
|
||||
}
|
||||
async with aiohttp.ClientSession() as session:
|
||||
debug(f"Attempting to download {url} to {path}.")
|
||||
l.debug(f"Attempting to download {url} to {path}.")
|
||||
try:
|
||||
async with session.get(url, headers=headers, allow_redirects=True) as response:
|
||||
if response.status == 403:
|
||||
err(f"Access denied (403 Forbidden) for URL: {url}. Skipping download.")
|
||||
l.error(f"Access denied (403 Forbidden) for URL: {url}. Skipping download.")
|
||||
return
|
||||
response.raise_for_status()
|
||||
|
||||
# Check if the response content type is a PDF
|
||||
content_type = response.headers.get('Content-Type')
|
||||
if content_type != 'application/pdf':
|
||||
err(f"Invalid content type: {content_type}. Skipping download.")
|
||||
l.error(f"Invalid content type: {content_type}. Skipping download.")
|
||||
return
|
||||
|
||||
# Create an in-memory buffer to store the downloaded content
|
||||
|
@ -386,7 +381,7 @@ if API.EXTENSIONS.courtlistener:
|
|||
try:
|
||||
PdfReader(buffer)
|
||||
except Exception as e:
|
||||
err(f"Invalid PDF content: {str(e)}. Skipping download.")
|
||||
l.error(f"Invalid PDF content: {str(e)}. Skipping download.")
|
||||
return
|
||||
|
||||
# If the PDF is valid, write the content to the file on disk
|
||||
|
@ -395,7 +390,7 @@ if API.EXTENSIONS.courtlistener:
|
|||
file.write(buffer.getvalue())
|
||||
|
||||
except Exception as e:
|
||||
err(f"Error downloading file: {str(e)}")
|
||||
l.error(f"Error downloading file: {str(e)}")
|
||||
|
||||
|
||||
async def cl_search_process_result(result):
|
||||
|
@ -404,7 +399,7 @@ if API.EXTENSIONS.courtlistener:
|
|||
court_id = result.get('court_id')
|
||||
case_name_short = result.get('caseNameShort')
|
||||
case_name = result.get('caseName')
|
||||
debug(f"Received payload for case {case_name} ({court_id}) and download url {download_url}")
|
||||
l.debug(f"Received payload for case {case_name} ({court_id}) and download url {download_url}")
|
||||
|
||||
court_folder = court_id
|
||||
|
||||
|
@ -418,9 +413,9 @@ if API.EXTENSIONS.courtlistener:
|
|||
target_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
await cl_download_file(download_url, target_path, session)
|
||||
debug(f"Downloaded {file_name} to {target_path}")
|
||||
l.debug(f"Downloaded {file_name} to {target_path}")
|
||||
|
||||
if API.EXTENSIONS.url_shortener:
|
||||
if Sys.EXTENSIONS.url_shortener:
|
||||
@serve.get("/s", response_class=HTMLResponse)
|
||||
async def shortener_form(request: Request):
|
||||
return templates.TemplateResponse("shortener.html", {"request": request})
|
||||
|
@ -433,7 +428,7 @@ if API.EXTENSIONS.url_shortener:
|
|||
if len(custom_code) != 3 or not custom_code.isalnum():
|
||||
return templates.TemplateResponse("shortener.html", {"request": request, "error": "Custom code must be 3 alphanumeric characters"})
|
||||
|
||||
existing = await API.execute_read_query('SELECT 1 FROM short_urls WHERE short_code = $1', custom_code, table_name="short_urls")
|
||||
existing = await Db.execute_read('SELECT 1 FROM short_urls WHERE short_code = $1', custom_code, table_name="short_urls")
|
||||
if existing:
|
||||
return templates.TemplateResponse("shortener.html", {"request": request, "error": "Custom code already in use"})
|
||||
|
||||
|
@ -441,13 +436,13 @@ if API.EXTENSIONS.url_shortener:
|
|||
else:
|
||||
chars = string.ascii_letters + string.digits
|
||||
while True:
|
||||
debug(f"FOUND THE ISSUE")
|
||||
l.debug(f"FOUND THE ISSUE")
|
||||
short_code = ''.join(random.choice(chars) for _ in range(3))
|
||||
existing = await API.execute_read_query('SELECT 1 FROM short_urls WHERE short_code = $1', short_code, table_name="short_urls")
|
||||
existing = await Db.execute_read('SELECT 1 FROM short_urls WHERE short_code = $1', short_code, table_name="short_urls")
|
||||
if not existing:
|
||||
break
|
||||
|
||||
await API.execute_write_query(
|
||||
await Db.execute_write(
|
||||
'INSERT INTO short_urls (short_code, long_url) VALUES ($1, $2)',
|
||||
short_code, long_url,
|
||||
table_name="short_urls"
|
||||
|
@ -459,7 +454,7 @@ if API.EXTENSIONS.url_shortener:
|
|||
|
||||
@serve.get("/{short_code}")
|
||||
async def redirect_short_url(short_code: str):
|
||||
results = await API.execute_read_query(
|
||||
results = await Db.execute_read(
|
||||
'SELECT long_url FROM short_urls WHERE short_code = $1',
|
||||
short_code,
|
||||
table_name="short_urls"
|
||||
|
@ -474,7 +469,7 @@ if API.EXTENSIONS.url_shortener:
|
|||
raise HTTPException(status_code=404, detail="Long URL not found")
|
||||
|
||||
# Increment click count (you may want to do this asynchronously)
|
||||
await API.execute_write_query(
|
||||
await Db.execute_write(
|
||||
'INSERT INTO click_logs (short_code, clicked_at) VALUES ($1, $2)',
|
||||
short_code, datetime.now(),
|
||||
table_name="click_logs"
|
||||
|
@ -485,7 +480,7 @@ if API.EXTENSIONS.url_shortener:
|
|||
|
||||
@serve.get("/analytics/{short_code}")
|
||||
async def get_analytics(short_code: str):
|
||||
url_info = await API.execute_read_query(
|
||||
url_info = await Db.execute_read(
|
||||
'SELECT long_url, created_at FROM short_urls WHERE short_code = $1',
|
||||
short_code,
|
||||
table_name="short_urls"
|
||||
|
@ -493,13 +488,13 @@ if API.EXTENSIONS.url_shortener:
|
|||
if not url_info:
|
||||
raise HTTPException(status_code=404, detail="Short URL not found")
|
||||
|
||||
click_count = await API.execute_read_query(
|
||||
click_count = await Db.execute_read(
|
||||
'SELECT COUNT(*) FROM click_logs WHERE short_code = $1',
|
||||
short_code,
|
||||
table_name="click_logs"
|
||||
)
|
||||
|
||||
clicks = await API.execute_read_query(
|
||||
clicks = await Db.execute_read(
|
||||
'SELECT clicked_at, ip_address, user_agent FROM click_logs WHERE short_code = $1 ORDER BY clicked_at DESC LIMIT 100',
|
||||
short_code,
|
||||
table_name="click_logs"
|
||||
|
|
|
@ -8,15 +8,12 @@ import httpx
|
|||
import socket
|
||||
from fastapi import APIRouter
|
||||
from tailscale import Tailscale
|
||||
from sijapi import L, API, TS_ID
|
||||
from sijapi import Sys, TS_ID
|
||||
from sijapi.logs import get_logger
|
||||
l = get_logger(__name__)
|
||||
|
||||
sys = APIRouter()
|
||||
|
||||
sys = APIRouter(tags=["public", "trusted", "private"])
|
||||
logger = L.get_module_logger("health")
|
||||
def debug(text: str): logger.debug(text)
|
||||
def info(text: str): logger.info(text)
|
||||
def warn(text: str): logger.warning(text)
|
||||
def err(text: str): logger.error(text)
|
||||
def crit(text: str): logger.critical(text)
|
||||
|
||||
@sys.get("/health")
|
||||
def get_health():
|
||||
|
@ -28,7 +25,7 @@ def get_health() -> str:
|
|||
|
||||
@sys.get("/routers")
|
||||
def get_routers() -> str:
|
||||
active_modules = [module for module, is_active in API.MODULES.__dict__.items() if is_active]
|
||||
active_modules = [module for module, is_active in Sys.MODULES.__dict__.items() if is_active]
|
||||
return active_modules
|
||||
|
||||
@sys.get("/ip")
|
||||
|
@ -36,7 +33,7 @@ def get_local_ip():
|
|||
"""Get the server's local IP address."""
|
||||
s = socket.socket(socket.AF_INET, socket.SOCK_DGRAM)
|
||||
try:
|
||||
s.connect((f'{API.SUBNET_BROADCAST}', 1))
|
||||
s.connect((f'{Sys.SUBNET_BROADCAST}', 1))
|
||||
IP = s.getsockname()[0]
|
||||
except Exception:
|
||||
IP = '127.0.0.1'
|
||||
|
@ -54,7 +51,7 @@ async def get_wan_ip():
|
|||
wan_info = response.json()
|
||||
return wan_info.get('ip', 'Unavailable')
|
||||
except Exception as e:
|
||||
err(f"Error fetching WAN IP: {e}")
|
||||
l.error(f"Error fetching WAN IP: {e}")
|
||||
return "Unavailable"
|
||||
|
||||
@sys.get("/ts_ip")
|
||||
|
|
|
@ -27,17 +27,12 @@ from typing import Optional, List, Dict, Union, Tuple
|
|||
from collections import defaultdict
|
||||
from dotenv import load_dotenv
|
||||
from traceback import format_exc
|
||||
from sijapi import L, TIMING_API_KEY, TIMING_API_URL
|
||||
from sijapi import TIMING_API_KEY, TIMING_API_URL
|
||||
from sijapi.routers import gis
|
||||
|
||||
from sijapi.logs import get_logger
|
||||
l = get_logger(__name__)
|
||||
|
||||
timing = APIRouter(tags=["private"])
|
||||
logger = L.get_module_logger("timing")
|
||||
def debug(text: str): logger.debug(text)
|
||||
def info(text: str): logger.info(text)
|
||||
def warn(text: str): logger.warning(text)
|
||||
def err(text: str): logger.error(text)
|
||||
def crit(text: str): logger.critical(text)
|
||||
|
||||
|
||||
script_directory = os.path.dirname(os.path.abspath(__file__))
|
||||
|
@ -67,17 +62,17 @@ async def post_time_entry_to_timing(entry: Dict):
|
|||
'Accept': 'application/json',
|
||||
'X-Time-Zone': 'America/Los_Angeles'
|
||||
}
|
||||
debug(f"Received entry: {entry}")
|
||||
l.debug(f"Received entry: {entry}")
|
||||
response = None # Initialize response
|
||||
try:
|
||||
async with httpx.AsyncClient() as client:
|
||||
response = await client.post(url, headers=headers, json=entry)
|
||||
response.raise_for_status() # This will only raise for 4xx and 5xx responses
|
||||
except httpx.HTTPStatusError as exc:
|
||||
debug(f"HTTPStatusError caught: Status code: {exc.response.status_code}, Detail: {exc.response.text}")
|
||||
l.debug(f"HTTPStatusError caught: Status code: {exc.response.status_code}, Detail: {exc.response.text}")
|
||||
raise HTTPException(status_code=exc.response.status_code, detail=str(exc.response.text))
|
||||
except Exception as exc:
|
||||
debug(f"General exception caught: {exc}")
|
||||
l.debug(f"General exception caught: {exc}")
|
||||
raise HTTPException(status_code=500, detail="An unexpected error occurred")
|
||||
|
||||
if response:
|
||||
|
|
|
@ -26,17 +26,12 @@ import tempfile
|
|||
import random
|
||||
import re
|
||||
import os
|
||||
from sijapi import L, API, Dir, Tts, TTS_SEGMENTS_DIR, VOICE_DIR, TTS_OUTPUT_DIR
|
||||
from sijapi import Sys, Dir, Tts, TTS_SEGMENTS_DIR, VOICE_DIR, TTS_OUTPUT_DIR
|
||||
from sijapi.utilities import sanitize_filename
|
||||
from sijapi.logs import get_logger
|
||||
l = get_logger(__name__)
|
||||
|
||||
### INITIALIZATIONS ###
|
||||
tts = APIRouter(tags=["trusted", "private"])
|
||||
logger = L.get_module_logger("tts")
|
||||
def debug(text: str): logger.debug(text)
|
||||
def info(text: str): logger.info(text)
|
||||
def warn(text: str): logger.warning(text)
|
||||
def err(text: str): logger.error(text)
|
||||
def crit(text: str): logger.critical(text)
|
||||
tts = APIRouter()
|
||||
|
||||
DEVICE = torch.device('cpu')
|
||||
|
||||
|
@ -53,7 +48,7 @@ async def list_11l_voices():
|
|||
async with httpx.AsyncClient() as client:
|
||||
try:
|
||||
response = await client.get(url, headers=headers)
|
||||
debug(f"Response: {response}")
|
||||
l.debug(f"Response: {response}")
|
||||
if response.status_code == 200:
|
||||
voices_data = response.json().get("voices", [])
|
||||
formatted_list = ""
|
||||
|
@ -63,7 +58,7 @@ async def list_11l_voices():
|
|||
formatted_list += f"{name}: `{id}`\n"
|
||||
|
||||
except Exception as e:
|
||||
err(f"Error determining voice ID: {e}")
|
||||
l.error(f"Error determining voice ID: {e}")
|
||||
|
||||
return PlainTextResponse(formatted_list, status_code=200)
|
||||
|
||||
|
@ -73,18 +68,18 @@ async def select_voice(voice_name: str) -> str:
|
|||
try:
|
||||
# Case Insensitive comparison
|
||||
voice_name_lower = voice_name.lower()
|
||||
debug(f"Looking for {voice_name_lower}")
|
||||
l.debug(f"Looking for {voice_name_lower}")
|
||||
for item in VOICE_DIR.iterdir():
|
||||
debug(f"Checking {item.name.lower()}")
|
||||
l.debug(f"Checking {item.name.lower()}")
|
||||
if item.name.lower() == f"{voice_name_lower}.wav":
|
||||
debug(f"select_voice received query to use voice: {voice_name}. Found {item} inside {VOICE_DIR}.")
|
||||
l.debug(f"select_voice received query to use voice: {voice_name}. Found {item} inside {VOICE_DIR}.")
|
||||
return item
|
||||
|
||||
err(f"Voice file not found")
|
||||
l.error(f"Voice file not found")
|
||||
raise HTTPException(status_code=404, detail="Voice file not found")
|
||||
|
||||
except Exception as e:
|
||||
err(f"Voice file not found: {e}")
|
||||
l.error(f"Voice file not found: {e}")
|
||||
return None
|
||||
|
||||
|
||||
|
@ -119,51 +114,51 @@ async def generate_speech_endpoint(
|
|||
else:
|
||||
return await generate_speech(bg_tasks, text_content, voice, voice_file, model, speed, podcast)
|
||||
except Exception as e:
|
||||
err(f"Error in TTS: {e}")
|
||||
err(traceback.format_exc())
|
||||
l.error(f"Error in TTS: {e}")
|
||||
l.error(traceback.format_exc())
|
||||
raise HTTPException(status_code=666, detail="error in TTS")
|
||||
|
||||
|
||||
|
||||
async def determine_voice_id(voice_name: str) -> str:
|
||||
debug(f"Searching for voice id for {voice_name}")
|
||||
debug(f"Tts.elevenlabs.voices: {Tts.elevenlabs.voices}")
|
||||
l.debug(f"Searching for voice id for {voice_name}")
|
||||
l.debug(f"Tts.elevenlabs.voices: {Tts.elevenlabs.voices}")
|
||||
|
||||
# Check if the voice is in the configured voices
|
||||
if voice_name and Tts.has_key(f'elevenlabs.voices.{voice_name}'):
|
||||
voice_id = Tts.get_value(f'elevenlabs.voices.{voice_name}')
|
||||
debug(f"Found voice ID in config - {voice_id}")
|
||||
l.debug(f"Found voice ID in config - {voice_id}")
|
||||
return voice_id
|
||||
|
||||
debug(f"Requested voice not among the voices specified in config/tts.yaml. Checking with ElevenLabs API using api_key: {Tts.elevenlabs.key}.")
|
||||
l.debug(f"Requested voice not among the voices specified in config/tts.yaml. Checking with ElevenLabs API using api_key: {Tts.elevenlabs.key}.")
|
||||
url = "https://api.elevenlabs.io/v1/voices"
|
||||
headers = {"xi-api-key": Tts.elevenlabs.key}
|
||||
async with httpx.AsyncClient() as client:
|
||||
try:
|
||||
response = await client.get(url, headers=headers)
|
||||
debug(f"Response status: {response.status_code}")
|
||||
l.debug(f"Response status: {response.status_code}")
|
||||
if response.status_code == 200:
|
||||
voices_data = response.json().get("voices", [])
|
||||
for voice in voices_data:
|
||||
if voice_name == voice["voice_id"] or (voice_name and voice_name.lower() == voice["name"].lower()):
|
||||
debug(f"Found voice ID from API - {voice['voice_id']}")
|
||||
l.debug(f"Found voice ID from API - {voice['voice_id']}")
|
||||
return voice["voice_id"]
|
||||
else:
|
||||
err(f"Failed to get voices from ElevenLabs API. Status code: {response.status_code}")
|
||||
err(f"Response content: {response.text}")
|
||||
l.error(f"Failed to get voices from ElevenLabs API. Status code: {response.status_code}")
|
||||
l.error(f"Response content: {response.text}")
|
||||
except Exception as e:
|
||||
err(f"Error determining voice ID: {e}")
|
||||
l.error(f"Error determining voice ID: {e}")
|
||||
|
||||
warn(f"Voice '{voice_name}' not found; using the default specified in config/tts.yaml: {Tts.elevenlabs.default}")
|
||||
l.warning(f"Voice '{voice_name}' not found; using the default specified in config/tts.yaml: {Tts.elevenlabs.default}")
|
||||
if Tts.has_key(f'elevenlabs.voices.{Tts.elevenlabs.default}'):
|
||||
return Tts.get_value(f'elevenlabs.voices.{Tts.elevenlabs.default}')
|
||||
else:
|
||||
err(f"Default voice '{Tts.elevenlabs.default}' not found in configuration. Using first available voice.")
|
||||
l.error(f"Default voice '{Tts.elevenlabs.default}' not found in configuration. Using first available voice.")
|
||||
first_voice = next(iter(vars(Tts.elevenlabs.voices)))
|
||||
return Tts.get_value(f'elevenlabs.voices.{first_voice}')
|
||||
|
||||
async def elevenlabs_tts(model: str, input_text: str, voice: Optional[str], title: str = None, output_dir: str = None):
|
||||
if getattr(API.EXTENSIONS, 'elevenlabs', False):
|
||||
if getattr(Sys.EXTENSIONS, 'elevenlabs', False):
|
||||
voice_id = await determine_voice_id(voice)
|
||||
|
||||
url = f"https://api.elevenlabs.io/v1/text-to-speech/{voice_id}"
|
||||
|
@ -187,11 +182,11 @@ async def elevenlabs_tts(model: str, input_text: str, voice: Optional[str], titl
|
|||
raise HTTPException(status_code=response.status_code, detail="Error from ElevenLabs API")
|
||||
|
||||
except Exception as e:
|
||||
err(f"Error from Elevenlabs API: {e}")
|
||||
l.error(f"Error from Elevenlabs API: {e}")
|
||||
raise HTTPException(status_code=500, detail=f"Error from ElevenLabs API: {e}")
|
||||
|
||||
else:
|
||||
warn(f"elevenlabs_tts called but ElevenLabs module is not enabled in config.")
|
||||
l.warning(f"elevenlabs_tts called but ElevenLabs module is not enabled in config.")
|
||||
raise HTTPException(status_code=400, detail="ElevenLabs TTS is not enabled")
|
||||
|
||||
async def generate_speech(
|
||||
|
@ -205,13 +200,13 @@ async def generate_speech(
|
|||
title: str = None,
|
||||
output_dir = None,
|
||||
) -> str:
|
||||
debug(f"Entering generate_speech function")
|
||||
debug(f"API.EXTENSIONS: {API.EXTENSIONS}")
|
||||
debug(f"Type of API.EXTENSIONS: {type(API.EXTENSIONS)}")
|
||||
debug(f"Dir of API.EXTENSIONS: {dir(API.EXTENSIONS)}")
|
||||
debug(f"Tts config: {Tts}")
|
||||
debug(f"Type of Tts: {type(Tts)}")
|
||||
debug(f"Dir of Tts: {dir(Tts)}")
|
||||
l.debug(f"Entering generate_speech function")
|
||||
l.debug(f"Sys.EXTENSIONS: {Sys.EXTENSIONS}")
|
||||
l.debug(f"Type of Sys.EXTENSIONS: {type(Sys.EXTENSIONS)}")
|
||||
l.debug(f"Dir of Sys.EXTENSIONS: {dir(Sys.EXTENSIONS)}")
|
||||
l.debug(f"Tts config: {Tts}")
|
||||
l.debug(f"Type of Tts: {type(Tts)}")
|
||||
l.debug(f"Dir of Tts: {dir(Tts)}")
|
||||
|
||||
|
||||
use_output_dir = Path(output_dir) if output_dir else TTS_OUTPUT_DIR
|
||||
|
@ -222,26 +217,26 @@ async def generate_speech(
|
|||
title = title if title else "TTS audio"
|
||||
output_path = use_output_dir / f"{dt_datetime.now().strftime('%Y%m%d%H%M%S')} {title}.wav"
|
||||
|
||||
debug(f"Model: {model}")
|
||||
debug(f"Voice: {voice}")
|
||||
debug(f"Tts.elevenlabs: {Tts.elevenlabs}")
|
||||
l.debug(f"Model: {model}")
|
||||
l.debug(f"Voice: {voice}")
|
||||
l.debug(f"Tts.elevenlabs: {Tts.elevenlabs}")
|
||||
|
||||
if model == "eleven_turbo_v2" and getattr(API.EXTENSIONS, 'elevenlabs', False):
|
||||
info("Using ElevenLabs.")
|
||||
if model == "eleven_turbo_v2" and getattr(Sys.EXTENSIONS, 'elevenlabs', False):
|
||||
l.info("Using ElevenLabs.")
|
||||
audio_file_path = await elevenlabs_tts(model, text, voice, title, use_output_dir)
|
||||
elif getattr(API.EXTENSIONS, 'xtts', False):
|
||||
info("Using XTTS2")
|
||||
elif getattr(Sys.EXTENSIONS, 'xtts', False):
|
||||
l.info("Using XTTS2")
|
||||
audio_file_path = await local_tts(text, speed, voice, voice_file, podcast, bg_tasks, title, output_path)
|
||||
else:
|
||||
err(f"No TTS module enabled!")
|
||||
l.error(f"No TTS module enabled!")
|
||||
raise ValueError("No TTS module enabled")
|
||||
|
||||
if not audio_file_path:
|
||||
raise ValueError("TTS generation failed: audio_file_path is empty or None")
|
||||
elif audio_file_path.exists():
|
||||
info(f"Saved to {audio_file_path}")
|
||||
l.info(f"Saved to {audio_file_path}")
|
||||
else:
|
||||
warn(f"No file exists at {audio_file_path}")
|
||||
l.warning(f"No file exists at {audio_file_path}")
|
||||
|
||||
if podcast:
|
||||
podcast_path = Dir.PODCAST / audio_file_path.name
|
||||
|
@ -249,18 +244,18 @@ async def generate_speech(
|
|||
if podcast_path != audio_file_path:
|
||||
shutil.copy(audio_file_path, podcast_path)
|
||||
if podcast_path.exists():
|
||||
info(f"Saved to podcast path: {podcast_path}")
|
||||
l.info(f"Saved to podcast path: {podcast_path}")
|
||||
else:
|
||||
warn(f"Podcast mode enabled, but failed to save to {podcast_path}")
|
||||
l.warning(f"Podcast mode enabled, but failed to save to {podcast_path}")
|
||||
|
||||
if output_dir and Path(output_dir) == use_output_dir:
|
||||
debug(f"Keeping {audio_file_path} because it was specified")
|
||||
l.debug(f"Keeping {audio_file_path} because it was specified")
|
||||
|
||||
else:
|
||||
info(f"Podcast mode enabled and output_dir not specified so we will remove {audio_file_path}")
|
||||
l.info(f"Podcast mode enabled and output_dir not specified so we will remove {audio_file_path}")
|
||||
bg_tasks.add_task(os.remove, audio_file_path)
|
||||
else:
|
||||
warn(f"Podcast path is the same as audio file path. Using existing file.")
|
||||
l.warning(f"Podcast path is the same as audio file path. Using existing file.")
|
||||
|
||||
return podcast_path
|
||||
|
||||
|
@ -268,20 +263,20 @@ async def generate_speech(
|
|||
|
||||
|
||||
except Exception as e:
|
||||
err(f"Failed to generate speech: {e}")
|
||||
err(f"Traceback: {traceback.format_exc()}")
|
||||
l.error(f"Failed to generate speech: {e}")
|
||||
l.error(f"Traceback: {traceback.format_exc()}")
|
||||
raise HTTPException(status_code=500, detail=f"Failed to generate speech: {e}")
|
||||
|
||||
|
||||
async def get_model(voice: str = None, voice_file: UploadFile = None):
|
||||
if (voice_file or (voice and await select_voice(voice))) and API.EXTENSIONS.xtts:
|
||||
if (voice_file or (voice and await select_voice(voice))) and Sys.EXTENSIONS.xtts:
|
||||
return "xtts"
|
||||
|
||||
elif voice and await determine_voice_id(voice) and API.EXTENSIONS.elevenlabs:
|
||||
elif voice and await determine_voice_id(voice) and Sys.EXTENSIONS.elevenlabs:
|
||||
return "eleven_turbo_v2"
|
||||
|
||||
else:
|
||||
err(f"No model or voice specified, or no TTS module loaded")
|
||||
l.error(f"No model or voice specified, or no TTS module loaded")
|
||||
raise HTTPException(status_code=400, detail="No model or voice specified, or no TTS module loaded")
|
||||
|
||||
|
||||
|
@ -296,7 +291,7 @@ async def get_text_content(text: Optional[str], file: Optional[UploadFile]) -> s
|
|||
|
||||
async def get_voice_file_path(voice: str = None, voice_file: UploadFile = None) -> str:
|
||||
if voice:
|
||||
debug(f"Looking for voice: {voice}")
|
||||
l.debug(f"Looking for voice: {voice}")
|
||||
selected_voice = await select_voice(voice)
|
||||
return selected_voice
|
||||
|
||||
|
@ -326,7 +321,7 @@ async def get_voice_file_path(voice: str = None, voice_file: UploadFile = None)
|
|||
return new_file
|
||||
|
||||
else:
|
||||
debug(f"No voice specified or file provided, using default voice: {Tts.xtts.default}")
|
||||
l.debug(f"No voice specified or file provided, using default voice: {Tts.xtts.default}")
|
||||
selected_voice = await select_voice(Tts.xtts.default)
|
||||
return selected_voice
|
||||
|
||||
|
@ -343,7 +338,7 @@ async def local_tts(
|
|||
output_path: Optional[Path] = None
|
||||
) -> str:
|
||||
|
||||
if API.EXTENSIONS.xtts:
|
||||
if Sys.EXTENSIONS.xtts:
|
||||
from TTS.api import TTS
|
||||
|
||||
if output_path:
|
||||
|
@ -368,7 +363,7 @@ async def local_tts(
|
|||
|
||||
for i, segment in enumerate(segments):
|
||||
segment_file_path = TTS_SEGMENTS_DIR / f"segment_{i}.wav"
|
||||
debug(f"Segment file path: {segment_file_path}")
|
||||
l.debug(f"Segment file path: {segment_file_path}")
|
||||
|
||||
# Run TTS in a separate thread
|
||||
await asyncio.to_thread(
|
||||
|
@ -379,7 +374,7 @@ async def local_tts(
|
|||
speaker_wav=[voice_file_path],
|
||||
language="en"
|
||||
)
|
||||
debug(f"Segment file generated: {segment_file_path}")
|
||||
l.debug(f"Segment file generated: {segment_file_path}")
|
||||
|
||||
# Load and combine audio in a separate thread
|
||||
segment_audio = await asyncio.to_thread(AudioSegment.from_wav, segment_file_path)
|
||||
|
@ -398,7 +393,7 @@ async def local_tts(
|
|||
return file_path
|
||||
|
||||
else:
|
||||
warn(f"local_tts called but xtts module disabled!")
|
||||
l.warning(f"local_tts called but xtts module disabled!")
|
||||
return None
|
||||
|
||||
|
||||
|
@ -421,7 +416,7 @@ async def stream_tts(text_content: str, speed: float, voice: str, voice_file) ->
|
|||
|
||||
async def generate_tts(text: str, speed: float, voice_file_path: str) -> str:
|
||||
|
||||
if API.EXTENSIONS.xtts:
|
||||
if Sys.EXTENSIONS.xtts:
|
||||
from TTS.api import TTS
|
||||
|
||||
output_dir = tempfile.mktemp(suffix=".wav", dir=tempfile.gettempdir())
|
||||
|
@ -432,7 +427,7 @@ async def generate_tts(text: str, speed: float, voice_file_path: str) -> str:
|
|||
return output_dir
|
||||
|
||||
else:
|
||||
warn(f"generate_tts called but xtts module disabled!")
|
||||
l.warning(f"generate_tts called but xtts module disabled!")
|
||||
return None
|
||||
|
||||
|
||||
|
@ -466,7 +461,7 @@ def split_text(text, target_length=35, max_length=50):
|
|||
|
||||
if segment_length + len(sentence_words) > max_length:
|
||||
segments.append(' '.join(current_segment))
|
||||
debug(f"split_text - segment: {' '.join(current_segment)}, word count: {segment_length}")
|
||||
l.debug(f"split_text - segment: {' '.join(current_segment)}, word count: {segment_length}")
|
||||
|
||||
current_segment = [sentence]
|
||||
else:
|
||||
|
@ -474,7 +469,7 @@ def split_text(text, target_length=35, max_length=50):
|
|||
|
||||
if current_segment:
|
||||
segments.append(' '.join(current_segment))
|
||||
debug(f"split_text - segment: {' '.join(current_segment)}, word count: {len(current_segment)}")
|
||||
l.debug(f"split_text - segment: {' '.join(current_segment)}, word count: {len(current_segment)}")
|
||||
|
||||
return segments
|
||||
|
||||
|
@ -486,7 +481,7 @@ def clean_text_for_tts(text: str) -> str:
|
|||
text = re.sub(r'\s+', ' ', text).strip()
|
||||
return text
|
||||
else:
|
||||
debug(f"No text received.")
|
||||
l.debug(f"No text received.")
|
||||
|
||||
|
||||
def copy_to_podcast_dir(file_path):
|
||||
|
|
|
@ -15,17 +15,13 @@ from typing import Dict
|
|||
from datetime import datetime as dt_datetime, date as dt_date
|
||||
from shapely.wkb import loads
|
||||
from binascii import unhexlify
|
||||
from sijapi import L, VISUALCROSSING_API_KEY, TZ, API, GEO
|
||||
from sijapi import VISUALCROSSING_API_KEY, TZ, Sys, GEO, Db
|
||||
from sijapi.utilities import haversine
|
||||
from sijapi.routers import gis
|
||||
from sijapi.logs import get_logger
|
||||
l = get_logger(__name__)
|
||||
|
||||
weather = APIRouter()
|
||||
logger = L.get_module_logger("weather")
|
||||
def debug(text: str): logger.debug(text)
|
||||
def info(text: str): logger.info(text)
|
||||
def warn(text: str): logger.warning(text)
|
||||
def err(text: str): logger.error(text)
|
||||
def crit(text: str): logger.critical(text)
|
||||
|
||||
|
||||
@weather.get("/weather/refresh", response_class=JSONResponse)
|
||||
|
@ -48,7 +44,7 @@ async def get_refreshed_weather(
|
|||
tz = await GEO.tz_at(lat, lon)
|
||||
date_time = await gis.dt(date, tz)
|
||||
|
||||
debug(f"Passing date_time {date_time.strftime('%Y-%m-%d %H:%M:%S')}, {lat}/{lon} into get_weather")
|
||||
l.debug(f"Passing date_time {date_time.strftime('%Y-%m-%d %H:%M:%S')}, {lat}/{lon} into get_weather")
|
||||
day = await get_weather(date_time, lat, lon, force_refresh=True)
|
||||
|
||||
if day is None:
|
||||
|
@ -67,12 +63,12 @@ async def get_refreshed_weather(
|
|||
return JSONResponse(content={"weather": day_dict}, status_code=200)
|
||||
|
||||
except HTTPException as e:
|
||||
err(f"HTTP Exception in get_refreshed_weather: {e.detail}")
|
||||
l.error(f"HTTP Exception in get_refreshed_weather: {e.detail}")
|
||||
return JSONResponse(content={"detail": str(e.detail)}, status_code=e.status_code)
|
||||
|
||||
except Exception as e:
|
||||
err(f"Unexpected error in get_refreshed_weather: {str(e)}")
|
||||
err(f"Traceback: {traceback.format_exc()}")
|
||||
l.error(f"Unexpected error in get_refreshed_weather: {str(e)}")
|
||||
l.error(f"Traceback: {traceback.format_exc()}")
|
||||
return JSONResponse(content={"detail": "An unexpected error occurred"}, status_code=500)
|
||||
|
||||
|
||||
|
@ -84,7 +80,7 @@ async def get_weather(date_time: dt_datetime, latitude: float, longitude: float,
|
|||
try:
|
||||
daily_weather_data = await get_weather_from_db(date_time, latitude, longitude)
|
||||
if daily_weather_data:
|
||||
debug(f"Daily weather data from db: {daily_weather_data}")
|
||||
l.debug(f"Daily weather data from db: {daily_weather_data}")
|
||||
last_updated = str(daily_weather_data['DailyWeather'].get('last_updated'))
|
||||
last_updated = await gis.dt(last_updated)
|
||||
stored_loc_data = unhexlify(daily_weather_data['DailyWeather'].get('location'))
|
||||
|
@ -93,19 +89,19 @@ async def get_weather(date_time: dt_datetime, latitude: float, longitude: float,
|
|||
|
||||
hourly_weather = daily_weather_data.get('HourlyWeather')
|
||||
request_haversine = haversine(latitude, longitude, stored_lat, stored_lon)
|
||||
debug(f"\nINFO:\nlast updated {last_updated}\nstored lat: {stored_lat} - requested lat: {latitude}\nstored lon: {stored_lon} - requested lon: {longitude}\nHaversine: {request_haversine}")
|
||||
l.debug(f"\nINFO:\nlast updated {last_updated}\nstored lat: {stored_lat} - requested lat: {latitude}\nstored lon: {stored_lon} - requested lon: {longitude}\nHaversine: {request_haversine}")
|
||||
|
||||
if last_updated and (date_time <= dt_datetime.now(TZ) and last_updated > date_time and request_haversine < 8) and hourly_weather and len(hourly_weather) > 0:
|
||||
debug(f"Using existing data")
|
||||
l.debug(f"Using existing data")
|
||||
fetch_new_data = False
|
||||
else:
|
||||
fetch_new_data = True
|
||||
except Exception as e:
|
||||
err(f"Error checking existing weather data: {e}")
|
||||
l.error(f"Error checking existing weather data: {e}")
|
||||
fetch_new_data = True
|
||||
|
||||
if fetch_new_data:
|
||||
debug(f"Fetching new weather data")
|
||||
l.debug(f"Fetching new weather data")
|
||||
request_date_str = date_time.strftime("%Y-%m-%d")
|
||||
url = f"https://weather.visualcrossing.com/VisualCrossingWebServices/rest/services/timeline/{latitude},{longitude}/{request_date_str}/{request_date_str}?unitGroup=us&key={VISUALCROSSING_API_KEY}"
|
||||
|
||||
|
@ -114,9 +110,14 @@ async def get_weather(date_time: dt_datetime, latitude: float, longitude: float,
|
|||
response = await client.get(url)
|
||||
if response.status_code == 200:
|
||||
weather_data = response.json()
|
||||
store_result = await store_weather_to_db(date_time, weather_data)
|
||||
if store_result != "SUCCESS":
|
||||
raise HTTPException(status_code=500, detail=f"Failed to store weather data: {store_result}")
|
||||
|
||||
try:
|
||||
store_result = await store_weather_to_db(date_time, weather_data)
|
||||
if store_result != "SUCCESS":
|
||||
raise HTTPException(status_code=500, detail=f"Failed to store weather data: {store_result}")
|
||||
except Exception as e:
|
||||
l.error(f"Error storing weather data: {str(e)}")
|
||||
raise HTTPException(status_code=500, detail=f"Error storing weather data: {str(e)}")
|
||||
|
||||
daily_weather_data = await get_weather_from_db(date_time, latitude, longitude)
|
||||
if daily_weather_data is None:
|
||||
|
@ -126,8 +127,8 @@ async def get_weather(date_time: dt_datetime, latitude: float, longitude: float,
|
|||
except HTTPException:
|
||||
raise
|
||||
except Exception as e:
|
||||
err(f"Exception during API call or data storage: {e}")
|
||||
err(f"Traceback: {traceback.format_exc()}")
|
||||
l.error(f"Exception during API call or data storage: {e}")
|
||||
l.error(f"Traceback: {traceback.format_exc()}")
|
||||
raise HTTPException(status_code=500, detail=f"Error fetching or storing weather data: {str(e)}")
|
||||
|
||||
if daily_weather_data is None:
|
||||
|
@ -136,7 +137,7 @@ async def get_weather(date_time: dt_datetime, latitude: float, longitude: float,
|
|||
return daily_weather_data
|
||||
|
||||
|
||||
|
||||
# weather.py
|
||||
|
||||
async def store_weather_to_db(date_time: dt_datetime, weather_data: dict):
|
||||
try:
|
||||
|
@ -154,46 +155,46 @@ async def store_weather_to_db(date_time: dt_datetime, weather_data: dict):
|
|||
elevation = await GEO.elevation(latitude, longitude)
|
||||
location_point = f"POINTZ({longitude} {latitude} {elevation})" if elevation else None
|
||||
|
||||
daily_weather_params = [
|
||||
location_point,
|
||||
await gis.dt(day_data.get('sunriseEpoch')),
|
||||
day_data.get('sunriseEpoch'),
|
||||
await gis.dt(day_data.get('sunsetEpoch')),
|
||||
day_data.get('sunsetEpoch'),
|
||||
day_data.get('description'),
|
||||
day_data.get('tempmax'),
|
||||
day_data.get('tempmin'),
|
||||
day_data.get('uvindex'),
|
||||
day_data.get('winddir'),
|
||||
day_data.get('windspeed'),
|
||||
day_data.get('icon'),
|
||||
dt_datetime.now(tz),
|
||||
await gis.dt(day_data.get('datetimeEpoch')),
|
||||
day_data.get('datetimeEpoch'),
|
||||
day_data.get('temp'),
|
||||
day_data.get('feelslikemax'),
|
||||
day_data.get('feelslikemin'),
|
||||
day_data.get('feelslike'),
|
||||
day_data.get('dew'),
|
||||
day_data.get('humidity'),
|
||||
day_data.get('precip'),
|
||||
day_data.get('precipprob'),
|
||||
day_data.get('precipcover'),
|
||||
preciptype_array,
|
||||
day_data.get('snow'),
|
||||
day_data.get('snowdepth'),
|
||||
day_data.get('windgust'),
|
||||
day_data.get('pressure'),
|
||||
day_data.get('cloudcover'),
|
||||
day_data.get('visibility'),
|
||||
day_data.get('solarradiation'),
|
||||
day_data.get('solarenergy'),
|
||||
day_data.get('severerisk', 0),
|
||||
day_data.get('moonphase'),
|
||||
day_data.get('conditions'),
|
||||
stations_array,
|
||||
day_data.get('source')
|
||||
]
|
||||
daily_weather_params = {
|
||||
'location': location_point,
|
||||
'sunrise': await gis.dt(day_data.get('sunriseEpoch')),
|
||||
'sunriseepoch': day_data.get('sunriseEpoch'),
|
||||
'sunset': await gis.dt(day_data.get('sunsetEpoch')),
|
||||
'sunsetepoch': day_data.get('sunsetEpoch'),
|
||||
'description': day_data.get('description'),
|
||||
'tempmax': day_data.get('tempmax'),
|
||||
'tempmin': day_data.get('tempmin'),
|
||||
'uvindex': day_data.get('uvindex'),
|
||||
'winddir': day_data.get('winddir'),
|
||||
'windspeed': day_data.get('windspeed'),
|
||||
'icon': day_data.get('icon'),
|
||||
'last_updated': dt_datetime.now(tz),
|
||||
'datetime': await gis.dt(day_data.get('datetimeEpoch')),
|
||||
'datetimeepoch': day_data.get('datetimeEpoch'),
|
||||
'temp': day_data.get('temp'),
|
||||
'feelslikemax': day_data.get('feelslikemax'),
|
||||
'feelslikemin': day_data.get('feelslikemin'),
|
||||
'feelslike': day_data.get('feelslike'),
|
||||
'dew': day_data.get('dew'),
|
||||
'humidity': day_data.get('humidity'),
|
||||
'precip': day_data.get('precip'),
|
||||
'precipprob': day_data.get('precipprob'),
|
||||
'precipcover': day_data.get('precipcover'),
|
||||
'preciptype': preciptype_array,
|
||||
'snow': day_data.get('snow'),
|
||||
'snowdepth': day_data.get('snowdepth'),
|
||||
'windgust': day_data.get('windgust'),
|
||||
'pressure': day_data.get('pressure'),
|
||||
'cloudcover': day_data.get('cloudcover'),
|
||||
'visibility': day_data.get('visibility'),
|
||||
'solarradiation': day_data.get('solarradiation'),
|
||||
'solarenergy': day_data.get('solarenergy'),
|
||||
'severerisk': day_data.get('severerisk', 0),
|
||||
'moonphase': day_data.get('moonphase'),
|
||||
'conditions': day_data.get('conditions'),
|
||||
'stations': stations_array,
|
||||
'source': day_data.get('source')
|
||||
}
|
||||
|
||||
daily_weather_query = '''
|
||||
INSERT INTO dailyweather (
|
||||
|
@ -205,54 +206,58 @@ async def store_weather_to_db(date_time: dt_datetime, weather_data: dict):
|
|||
solarradiation, solarenergy, severerisk, moonphase, conditions,
|
||||
stations, source
|
||||
) VALUES (
|
||||
$1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12, $13, $14, $15,
|
||||
$16, $17, $18, $19, $20, $21, $22, $23, $24, $25, $26, $27, $28,
|
||||
$29, $30, $31, $32, $33, $34, $35, $36, $37, $38
|
||||
:location, :sunrise, :sunriseepoch, :sunset, :sunsetepoch, :description,
|
||||
:tempmax, :tempmin, :uvindex, :winddir, :windspeed, :icon, :last_updated,
|
||||
:datetime, :datetimeepoch, :temp, :feelslikemax, :feelslikemin, :feelslike,
|
||||
:dew, :humidity, :precip, :precipprob, :precipcover, :preciptype,
|
||||
:snow, :snowdepth, :windgust, :pressure, :cloudcover, :visibility,
|
||||
:solarradiation, :solarenergy, :severerisk, :moonphase, :conditions,
|
||||
:stations, :source
|
||||
) RETURNING id
|
||||
'''
|
||||
|
||||
daily_weather_result = await API.execute_write_query(daily_weather_query, *daily_weather_params, table_name="dailyweather")
|
||||
daily_weather_result = await Db.write(daily_weather_query, **daily_weather_params, table_name="dailyweather")
|
||||
|
||||
if not daily_weather_result:
|
||||
if daily_weather_result is None:
|
||||
raise ValueError("Failed to insert daily weather data: no result returned")
|
||||
|
||||
daily_weather_id = daily_weather_result[0]['id']
|
||||
debug(f"Inserted daily weather data with id: {daily_weather_id}")
|
||||
daily_weather_id = daily_weather_result.fetchone()[0]
|
||||
l.debug(f"Inserted daily weather data with id: {daily_weather_id}")
|
||||
|
||||
# Hourly weather insertion
|
||||
if 'hours' in day_data:
|
||||
debug(f"Processing {len(day_data['hours'])} hourly records")
|
||||
l.debug(f"Processing {len(day_data['hours'])} hourly records")
|
||||
for hour_data in day_data['hours']:
|
||||
hour_preciptype_array = hour_data.get('preciptype', []) or []
|
||||
hour_stations_array = hour_data.get('stations', []) or []
|
||||
hourly_weather_params = [
|
||||
daily_weather_id,
|
||||
await gis.dt(hour_data.get('datetimeEpoch')),
|
||||
hour_data.get('datetimeEpoch'),
|
||||
hour_data.get('temp'),
|
||||
hour_data.get('feelslike'),
|
||||
hour_data.get('humidity'),
|
||||
hour_data.get('dew'),
|
||||
hour_data.get('precip'),
|
||||
hour_data.get('precipprob'),
|
||||
hour_preciptype_array,
|
||||
hour_data.get('snow'),
|
||||
hour_data.get('snowdepth'),
|
||||
hour_data.get('windgust'),
|
||||
hour_data.get('windspeed'),
|
||||
hour_data.get('winddir'),
|
||||
hour_data.get('pressure'),
|
||||
hour_data.get('cloudcover'),
|
||||
hour_data.get('visibility'),
|
||||
hour_data.get('solarradiation'),
|
||||
hour_data.get('solarenergy'),
|
||||
hour_data.get('uvindex'),
|
||||
hour_data.get('severerisk', 0),
|
||||
hour_data.get('conditions'),
|
||||
hour_data.get('icon'),
|
||||
hour_stations_array,
|
||||
hour_data.get('source', '')
|
||||
]
|
||||
hourly_weather_params = {
|
||||
'daily_weather_id': str(daily_weather_id), # Convert UUID to string
|
||||
'datetime': await gis.dt(hour_data.get('datetimeEpoch')),
|
||||
'datetimeepoch': hour_data.get('datetimeEpoch'),
|
||||
'temp': hour_data.get('temp'),
|
||||
'feelslike': hour_data.get('feelslike'),
|
||||
'humidity': hour_data.get('humidity'),
|
||||
'dew': hour_data.get('dew'),
|
||||
'precip': hour_data.get('precip'),
|
||||
'precipprob': hour_data.get('precipprob'),
|
||||
'preciptype': hour_preciptype_array,
|
||||
'snow': hour_data.get('snow'),
|
||||
'snowdepth': hour_data.get('snowdepth'),
|
||||
'windgust': hour_data.get('windgust'),
|
||||
'windspeed': hour_data.get('windspeed'),
|
||||
'winddir': hour_data.get('winddir'),
|
||||
'pressure': hour_data.get('pressure'),
|
||||
'cloudcover': hour_data.get('cloudcover'),
|
||||
'visibility': hour_data.get('visibility'),
|
||||
'solarradiation': hour_data.get('solarradiation'),
|
||||
'solarenergy': hour_data.get('solarenergy'),
|
||||
'uvindex': hour_data.get('uvindex'),
|
||||
'severerisk': hour_data.get('severerisk', 0),
|
||||
'conditions': hour_data.get('conditions'),
|
||||
'icon': hour_data.get('icon'),
|
||||
'stations': hour_stations_array,
|
||||
'source': hour_data.get('source', '')
|
||||
}
|
||||
|
||||
hourly_weather_query = '''
|
||||
INSERT INTO hourlyweather (
|
||||
|
@ -262,61 +267,68 @@ async def store_weather_to_db(date_time: dt_datetime, weather_data: dict):
|
|||
solarradiation, solarenergy, uvindex, severerisk, conditions,
|
||||
icon, stations, source
|
||||
) VALUES (
|
||||
$1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12, $13, $14, $15,
|
||||
$16, $17, $18, $19, $20, $21, $22, $23, $24, $25, $26
|
||||
:daily_weather_id, :datetime, :datetimeepoch, :temp, :feelslike,
|
||||
:humidity, :dew, :precip, :precipprob, :preciptype, :snow, :snowdepth,
|
||||
:windgust, :windspeed, :winddir, :pressure, :cloudcover, :visibility,
|
||||
:solarradiation, :solarenergy, :uvindex, :severerisk, :conditions,
|
||||
:icon, :stations, :source
|
||||
) RETURNING id
|
||||
'''
|
||||
hourly_result = await API.execute_write_query(hourly_weather_query, *hourly_weather_params, table_name="hourlyweather")
|
||||
if not hourly_result:
|
||||
warn(f"Failed to insert hourly weather data for {hour_data.get('datetimeEpoch')}")
|
||||
hourly_result = await Db.write(hourly_weather_query, **hourly_weather_params, table_name="hourlyweather")
|
||||
if hourly_result is None:
|
||||
l.warning(f"Failed to insert hourly weather data for {hour_data.get('datetimeEpoch')}")
|
||||
else:
|
||||
debug(f"Inserted hourly weather data with id: {hourly_result[0]['id']}")
|
||||
hourly_id = hourly_result.fetchone()[0]
|
||||
l.debug(f"Inserted hourly weather data with id: {hourly_id}")
|
||||
|
||||
return "SUCCESS"
|
||||
except Exception as e:
|
||||
err(f"Error in weather storage: {e}")
|
||||
err(f"Traceback: {traceback.format_exc()}")
|
||||
l.error(f"Error in weather storage: {e}")
|
||||
l.error(f"Traceback: {traceback.format_exc()}")
|
||||
return "FAILURE"
|
||||
|
||||
|
||||
|
||||
async def get_weather_from_db(date_time: dt_datetime, latitude: float, longitude: float):
|
||||
debug(f"Using {date_time.strftime('%Y-%m-%d %H:%M:%S')} as our datetime in get_weather_from_db.")
|
||||
l.debug(f"Using {date_time.strftime('%Y-%m-%d %H:%M:%S')} as our datetime in get_weather_from_db.")
|
||||
query_date = date_time.date()
|
||||
try:
|
||||
# Query to get daily weather data
|
||||
daily_query = '''
|
||||
SELECT * FROM dailyweather
|
||||
WHERE DATE(datetime) = $1
|
||||
AND ST_DWithin(location::geography, ST_MakePoint($2,$3)::geography, 8046.72)
|
||||
ORDER BY ST_Distance(location, ST_MakePoint($4, $5)::geography) ASC
|
||||
LIMIT 1
|
||||
'''
|
||||
|
||||
daily_weather_records = await API.execute_read_query(daily_query, query_date, longitude, latitude, longitude, latitude, table_name='dailyweather')
|
||||
|
||||
SELECT * FROM dailyweather
|
||||
WHERE DATE(datetime) = :query_date
|
||||
AND ST_DWithin(location::geography, ST_MakePoint(:longitude,:latitude)::geography, 8046.72)
|
||||
ORDER BY ST_Distance(location, ST_MakePoint(:longitude2, :latitude2)::geography) ASC
|
||||
LIMIT 1
|
||||
'''
|
||||
daily_weather_records = await Db.read(daily_query, query_date=query_date, longitude=longitude, latitude=latitude, longitude2=longitude, latitude2=latitude, table_name='dailyweather')
|
||||
|
||||
if not daily_weather_records:
|
||||
debug(f"No daily weather data retrieved from database.")
|
||||
l.debug(f"No daily weather data retrieved from database.")
|
||||
return None
|
||||
|
||||
daily_weather_data = daily_weather_records[0]
|
||||
|
||||
# Query to get hourly weather data
|
||||
hourly_query = '''
|
||||
SELECT * FROM hourlyweather
|
||||
WHERE daily_weather_id = $1
|
||||
ORDER BY datetime ASC
|
||||
'''
|
||||
|
||||
hourly_weather_records = await API.execute_read_query(hourly_query, daily_weather_data['id'], table_name='hourlyweather')
|
||||
|
||||
SELECT * FROM hourlyweather
|
||||
WHERE daily_weather_id::text = :daily_weather_id
|
||||
ORDER BY datetime ASC
|
||||
'''
|
||||
hourly_weather_records = await Db.read(
|
||||
hourly_query,
|
||||
daily_weather_id=str(daily_weather_data['id']),
|
||||
table_name='hourlyweather'
|
||||
)
|
||||
|
||||
day = {
|
||||
'DailyWeather': daily_weather_data,
|
||||
'HourlyWeather': hourly_weather_records,
|
||||
}
|
||||
|
||||
debug(f"Retrieved weather data for {date_time.date()}")
|
||||
l.debug(f"Retrieved weather data for {date_time.date()}")
|
||||
return day
|
||||
|
||||
except Exception as e:
|
||||
err(f"Unexpected error occurred in get_weather_from_db: {e}")
|
||||
err(f"Traceback: {traceback.format_exc()}")
|
||||
return None
|
||||
l.error(f"Unexpected error occurred in get_weather_from_db: {e}")
|
||||
l.error(f"Traceback: {traceback.format_exc()}")
|
||||
return None
|
76
sijapi/serialization.py
Normal file
76
sijapi/serialization.py
Normal file
|
@ -0,0 +1,76 @@
|
|||
# serialization.py
|
||||
|
||||
import json
|
||||
from typing import Any
|
||||
from uuid import UUID
|
||||
from decimal import Decimal
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
from enum import Enum
|
||||
from pathlib import Path
|
||||
from datetime import datetime as dt_datetime, date, time
|
||||
from .logs import get_logger
|
||||
|
||||
l = get_logger(__name__)
|
||||
|
||||
def serialize(obj: Any) -> Any:
|
||||
"""Serializer for database inputs that keeps datetime objects intact"""
|
||||
if isinstance(obj, (dt_datetime, date, time)):
|
||||
return obj
|
||||
return json_serial(obj)
|
||||
|
||||
|
||||
def json_serial(obj: Any) -> Any:
|
||||
"""JSON serializer for objects not serializable by default json code"""
|
||||
if isinstance(obj, (dt_datetime, date)):
|
||||
return obj.isoformat()
|
||||
if isinstance(obj, time):
|
||||
return obj.isoformat()
|
||||
if isinstance(obj, Decimal):
|
||||
return float(obj)
|
||||
if isinstance(obj, UUID):
|
||||
return str(obj)
|
||||
if isinstance(obj, bytes):
|
||||
return obj.decode('utf-8')
|
||||
if isinstance(obj, Path):
|
||||
return str(obj)
|
||||
if isinstance(obj, (str, int, float, bool)):
|
||||
return obj
|
||||
if isinstance(obj, list):
|
||||
return [json_serial(item) for item in obj]
|
||||
if isinstance(obj, dict):
|
||||
return {json_serial(k): json_serial(v) for k, v in obj.items()}
|
||||
if isinstance(obj, (set, frozenset)):
|
||||
return [json_serial(item) for item in obj]
|
||||
if isinstance(obj, tuple):
|
||||
return list(json_serial(item) for item in obj)
|
||||
if isinstance(obj, np.ndarray):
|
||||
return json_serial(obj.tolist())
|
||||
if isinstance(obj, (pd.DataFrame, pd.Series)):
|
||||
return json_serial(obj.to_dict())
|
||||
if obj is None:
|
||||
return None
|
||||
if isinstance(obj, complex):
|
||||
return [obj.real, obj.imag]
|
||||
if isinstance(obj, Enum):
|
||||
return obj.value
|
||||
if isinstance(obj, range):
|
||||
return {'start': obj.start, 'stop': obj.stop, 'step': obj.step}
|
||||
if hasattr(obj, '__iter__'):
|
||||
return list(json_serial(item) for item in obj)
|
||||
if hasattr(obj, '__dict__'):
|
||||
return {k: json_serial(v) for k, v in obj.__dict__.items() if not k.startswith('_')}
|
||||
raise TypeError(f"Type {type(obj)} not serializable")
|
||||
|
||||
|
||||
def json_dumps(obj: Any) -> str:
|
||||
"""
|
||||
Serialize obj to a JSON formatted str using the custom serializer.
|
||||
"""
|
||||
return json.dumps(obj, default=json_serial)
|
||||
|
||||
def json_loads(json_str: str) -> Any:
|
||||
"""
|
||||
Deserialize json_str to a Python object.
|
||||
"""
|
||||
return json.loads(json_str)
|
|
@ -1,56 +1,47 @@
|
|||
# utilities.py
|
||||
import re
|
||||
import os
|
||||
from fastapi import Form
|
||||
import re
|
||||
import json
|
||||
import io
|
||||
from io import BytesIO
|
||||
import base64
|
||||
import math
|
||||
import paramiko
|
||||
from dateutil import parser
|
||||
from pathlib import Path
|
||||
import filetype
|
||||
import shutil
|
||||
import uuid
|
||||
import hashlib
|
||||
import requests
|
||||
from requests.adapters import HTTPAdapter
|
||||
from urllib3.util.retry import Retry
|
||||
import asyncio
|
||||
import aiohttp
|
||||
import pandas as pd
|
||||
import ipaddress
|
||||
from io import BytesIO
|
||||
from pathlib import Path
|
||||
from dateutil import parser
|
||||
from urllib.parse import urlparse
|
||||
from PyPDF2 import PdfReader
|
||||
from better_profanity import profanity
|
||||
from adblockparser import AdblockRules
|
||||
from pdfminer.high_level import extract_text as pdfminer_extract_text
|
||||
import pytesseract
|
||||
from readability import Document
|
||||
from readability import Document as ReadabilityDocument
|
||||
from pdf2image import convert_from_path
|
||||
from datetime import datetime as dt_datetime, date, time
|
||||
from typing import Optional, Union, Tuple, List, Any
|
||||
import asyncio
|
||||
from PIL import Image
|
||||
import pandas as pd
|
||||
import ipaddress
|
||||
from scipy.spatial import cKDTree
|
||||
from dateutil.parser import parse as dateutil_parse
|
||||
from docx import Document
|
||||
import aiohttp
|
||||
from bs4 import BeautifulSoup
|
||||
from readability import Document as ReadabilityDocument
|
||||
from markdownify import markdownify as md
|
||||
from sshtunnel import SSHTunnelForwarder
|
||||
from urllib.parse import urlparse
|
||||
from fastapi import Depends, HTTPException, Request, UploadFile
|
||||
from fastapi import Depends, HTTPException, Request, UploadFile, Form
|
||||
from fastapi.security.api_key import APIKeyHeader
|
||||
|
||||
from sijapi import L, API, Archivist, YEAR_FMT, MONTH_FMT, DAY_FMT, DAY_SHORT_FMT, OBSIDIAN_VAULT_DIR, ALLOWED_FILENAME_CHARS, MAX_PATH_LENGTH, ARCHIVE_DIR
|
||||
|
||||
logger = L.get_module_logger('utilities')
|
||||
def debug(text: str): logger.debug(text)
|
||||
def info(text: str): logger.info(text)
|
||||
def warn(text: str): logger.warning(text)
|
||||
def err(text: str): logger.error(text)
|
||||
def crit(text: str): logger.critical(text)
|
||||
from requests.adapters import HTTPAdapter
|
||||
from urllib3.util.retry import Retry
|
||||
import pytesseract
|
||||
from sijapi import Sys, Dir, YEAR_FMT, MONTH_FMT, DAY_FMT, DAY_SHORT_FMT, OBSIDIAN_VAULT_DIR, ALLOWED_FILENAME_CHARS, MAX_PATH_LENGTH, ARCHIVE_DIR
|
||||
from sijapi.logs import get_logger
|
||||
l = get_logger(__name__)
|
||||
|
||||
|
||||
def assemble_archive_path(filename: str, extension: str = None, date_time: dt_datetime = None, subdir: str = None) -> Tuple[Path, Path]:
|
||||
|
@ -79,7 +70,7 @@ def assemble_archive_path(filename: str, extension: str = None, date_time: dt_da
|
|||
filename = f"{day_short} {timestamp} {sanitized_base}{final_extension}"
|
||||
|
||||
relative_path = Path(year) / month / day / filename
|
||||
absolute_path = Archivist.dir / relative_path
|
||||
absolute_path = Dir.ARCHIVE / relative_path
|
||||
|
||||
# Ensure the total path length doesn't exceed MAX_PATH_LENGTH
|
||||
while len(str(absolute_path)) > MAX_PATH_LENGTH and len(sanitized_base) > 0:
|
||||
|
@ -138,7 +129,7 @@ def assemble_journal_path(date_time: dt_datetime, subdir: str = None, filename:
|
|||
relative_path = relative_path / filename
|
||||
|
||||
else:
|
||||
debug(f"This only happens, theoretically, when no filename nor subdirectory are provided, but an extension is. Which is kinda silly.")
|
||||
l.debug(f"This only happens, theoretically, when no filename nor subdirectory are provided, but an extension is. Which is kinda silly.")
|
||||
return None, None
|
||||
|
||||
absolute_path = OBSIDIAN_VAULT_DIR / relative_path
|
||||
|
@ -194,7 +185,7 @@ def contains_profanity(content: str, threshold: float = 0.01, custom_words: Opti
|
|||
content_profanity_count = sum(1 for word in word_list if profanity.contains_profanity(word))
|
||||
content_profanity_ratio = content_profanity_count / len(word_list) if word_list else 0
|
||||
|
||||
debug(f"Profanity ratio for content: {content_profanity_ratio}")
|
||||
l.debug(f"Profanity ratio for content: {content_profanity_ratio}")
|
||||
return content_profanity_ratio >= threshold
|
||||
|
||||
|
||||
|
@ -204,15 +195,15 @@ def load_filter_lists(blocklists_dir: Path):
|
|||
try:
|
||||
with open(file_path, 'r', encoding='utf-8') as file:
|
||||
rules.extend(file.read().splitlines())
|
||||
info(f"Loaded blocklist: {file_path.name}")
|
||||
l.info(f"Loaded blocklist: {file_path.name}")
|
||||
except Exception as e:
|
||||
err(f"Error loading blocklist {file_path.name}: {str(e)}")
|
||||
l.error(f"Error loading blocklist {file_path.name}: {str(e)}")
|
||||
return rules
|
||||
|
||||
|
||||
def initialize_adblock_rules(blocklists_dir: Path):
|
||||
rules = load_filter_lists(blocklists_dir)
|
||||
info(f"Initialized AdblockRules with {len(rules)} rules")
|
||||
l.info(f"Initialized AdblockRules with {len(rules)} rules")
|
||||
return AdblockRules(rules)
|
||||
|
||||
|
||||
|
@ -228,14 +219,14 @@ def get_extension(file):
|
|||
return file_extension
|
||||
|
||||
except Exception as e:
|
||||
err(f"Unable to get extension of {file}")
|
||||
l.error(f"Unable to get extension of {file}")
|
||||
raise e
|
||||
|
||||
|
||||
|
||||
def sanitize_filename(text, extension: str = None, max_length: int = MAX_PATH_LENGTH):
|
||||
"""Sanitize a string to be used as a safe filename while protecting the file extension."""
|
||||
debug(f"Filename before sanitization: {text}")
|
||||
l.debug(f"Filename before sanitization: {text}")
|
||||
|
||||
# Ensure text is a string
|
||||
text = str(text)
|
||||
|
@ -253,7 +244,7 @@ def sanitize_filename(text, extension: str = None, max_length: int = MAX_PATH_LE
|
|||
base_name = base_name[:max_base_length - 5].rstrip()
|
||||
final_filename = base_name + extension
|
||||
|
||||
debug(f"Filename after sanitization: {final_filename}")
|
||||
l.debug(f"Filename after sanitization: {final_filename}")
|
||||
return final_filename
|
||||
|
||||
|
||||
|
@ -264,16 +255,16 @@ def check_file_name(file_name, max_length=255):
|
|||
needs_sanitization = False
|
||||
|
||||
if len(file_name) > max_length:
|
||||
debug(f"Filename exceeds maximum length of {max_length}: {file_name}")
|
||||
l.debug(f"Filename exceeds maximum length of {max_length}: {file_name}")
|
||||
needs_sanitization = True
|
||||
if re.search(ALLOWED_FILENAME_CHARS, file_name):
|
||||
debug(f"Filename contains non-word characters (except space, dot, and hyphen): {file_name}")
|
||||
l.debug(f"Filename contains non-word characters (except space, dot, and hyphen): {file_name}")
|
||||
needs_sanitization = True
|
||||
if re.search(r'\s{2,}', file_name):
|
||||
debug(f"Filename contains multiple consecutive spaces: {file_name}")
|
||||
l.debug(f"Filename contains multiple consecutive spaces: {file_name}")
|
||||
needs_sanitization = True
|
||||
if file_name != file_name.strip():
|
||||
debug(f"Filename has leading or trailing spaces: {file_name}")
|
||||
l.debug(f"Filename has leading or trailing spaces: {file_name}")
|
||||
needs_sanitization = True
|
||||
|
||||
return needs_sanitization
|
||||
|
@ -316,13 +307,13 @@ async def ocr_pdf(file_path: str) -> str:
|
|||
texts = await asyncio.gather(*(asyncio.to_thread(pytesseract.image_to_string, image) for image in images))
|
||||
return ' '.join(texts)
|
||||
except Exception as e:
|
||||
err(f"Error during OCR: {str(e)}")
|
||||
l.error(f"Error during OCR: {str(e)}")
|
||||
return ""
|
||||
|
||||
|
||||
async def extract_text_from_pdf(file_path: str) -> str:
|
||||
if not await is_valid_pdf(file_path):
|
||||
err(f"Invalid PDF file: {file_path}")
|
||||
l.error(f"Invalid PDF file: {file_path}")
|
||||
return ""
|
||||
|
||||
text = ''
|
||||
|
@ -340,7 +331,7 @@ async def extract_text_from_pdf(file_path: str) -> str:
|
|||
if text and not should_use_ocr(text, num_pages):
|
||||
return clean_text(text)
|
||||
except Exception as e:
|
||||
err(f"Error extracting text with PyPDF2: {str(e)}")
|
||||
l.error(f"Error extracting text with PyPDF2: {str(e)}")
|
||||
|
||||
# If PyPDF2 extraction fails or is insufficient, fall back to pdfminer.six
|
||||
try:
|
||||
|
@ -348,10 +339,10 @@ async def extract_text_from_pdf(file_path: str) -> str:
|
|||
if text_pdfminer and not should_use_ocr(text_pdfminer, num_pages):
|
||||
return clean_text(text_pdfminer)
|
||||
except Exception as e:
|
||||
err(f"Error extracting text with pdfminer.six: {e}")
|
||||
l.error(f"Error extracting text with pdfminer.six: {e}")
|
||||
|
||||
# If both methods fail or are deemed insufficient, use OCR as the last resort
|
||||
debug("Falling back to OCR for text extraction...")
|
||||
l.debug("Falling back to OCR for text extraction...")
|
||||
return await ocr_pdf(file_path)
|
||||
|
||||
async def is_valid_pdf(file_path: str) -> bool:
|
||||
|
@ -360,12 +351,12 @@ async def is_valid_pdf(file_path: str) -> bool:
|
|||
kind = filetype.guess(file_path)
|
||||
return kind.mime == 'application/pdf'
|
||||
except Exception as e:
|
||||
err(f"Error checking file type: {e}")
|
||||
l.error(f"Error checking file type: {e}")
|
||||
return False
|
||||
|
||||
async def extract_text_from_pdf(file_path: str) -> str:
|
||||
if not await is_valid_pdf(file_path):
|
||||
err(f"Invalid PDF file: {file_path}")
|
||||
l.error(f"Invalid PDF file: {file_path}")
|
||||
return ""
|
||||
|
||||
text = ''
|
||||
|
@ -377,23 +368,23 @@ async def extract_text_from_pdf(file_path: str) -> str:
|
|||
if text.strip(): # Successfully extracted text
|
||||
return clean_text(text)
|
||||
except Exception as e:
|
||||
err(f"Error extracting text with PyPDF2: {str(e)}")
|
||||
l.error(f"Error extracting text with PyPDF2: {str(e)}")
|
||||
|
||||
try:
|
||||
text_pdfminer = await asyncio.to_thread(pdfminer_extract_text, file_path)
|
||||
if text_pdfminer.strip(): # Successfully extracted text
|
||||
return clean_text(text_pdfminer)
|
||||
except Exception as e:
|
||||
err(f"Error extracting text with pdfminer.six: {str(e)}")
|
||||
l.error(f"Error extracting text with pdfminer.six: {str(e)}")
|
||||
|
||||
# Fall back to OCR
|
||||
debug("Falling back to OCR for text extraction...")
|
||||
l.debug("Falling back to OCR for text extraction...")
|
||||
try:
|
||||
images = convert_from_path(file_path)
|
||||
ocr_texts = await asyncio.gather(*(asyncio.to_thread(pytesseract.image_to_string, img) for img in images))
|
||||
return ' '.join(ocr_texts).strip()
|
||||
except Exception as e:
|
||||
err(f"OCR failed: {str(e)}")
|
||||
l.error(f"OCR failed: {str(e)}")
|
||||
return ""
|
||||
|
||||
async def extract_text_from_docx(file_path: str) -> str:
|
||||
|
@ -496,7 +487,7 @@ def encode_image_to_base64(image_path):
|
|||
base64_str = base64.b64encode(byte_data).decode('utf-8')
|
||||
return base64_str
|
||||
else:
|
||||
debug(f"Error: File does not exist at {image_path}")
|
||||
l.debug(f"Error: File does not exist at {image_path}")
|
||||
|
||||
def resize_and_convert_image(image_path, max_size=2160, quality=80):
|
||||
with Image.open(image_path) as img:
|
||||
|
@ -534,13 +525,13 @@ def download_file(url, folder):
|
|||
with open(filepath, 'wb') as f:
|
||||
f.write(response.content)
|
||||
else:
|
||||
err(f"Failed to download image: {url}, invalid content type: {response.headers.get('Content-Type')}")
|
||||
l.error(f"Failed to download image: {url}, invalid content type: {response.headers.get('Content-Type')}")
|
||||
return None
|
||||
else:
|
||||
err(f"Failed to download image: {url}, status code: {response.status_code}")
|
||||
l.error(f"Failed to download image: {url}, status code: {response.status_code}")
|
||||
return None
|
||||
except Exception as e:
|
||||
err(f"Failed to download image: {url}, error: {str(e)}")
|
||||
l.error(f"Failed to download image: {url}, error: {str(e)}")
|
||||
return None
|
||||
return filename
|
||||
|
||||
|
@ -599,7 +590,7 @@ async def run_ssh_command(server, command):
|
|||
ssh.close()
|
||||
return output, error
|
||||
except Exception as e:
|
||||
err(f"SSH command failed for server {server.id}: {str(e)}")
|
||||
l.error(f"SSH command failed for server {server.id}: {str(e)}")
|
||||
raise
|
||||
|
||||
|
||||
|
@ -611,7 +602,7 @@ async def html_to_markdown(url: str = None, source: str = None) -> Optional[str]
|
|||
async with session.get(url) as response:
|
||||
html_content = await response.text()
|
||||
else:
|
||||
err(f"Unable to convert nothing to markdown.")
|
||||
l.error(f"Unable to convert nothing to markdown.")
|
||||
return None
|
||||
|
||||
# Use readability to extract the main content
|
||||
|
@ -630,33 +621,3 @@ async def html_to_markdown(url: str = None, source: str = None) -> Optional[str]
|
|||
|
||||
return markdown_content
|
||||
|
||||
|
||||
def json_serial(obj: Any) -> Any:
|
||||
"""JSON serializer for objects not serializable by default json code"""
|
||||
if isinstance(obj, (datetime, date)):
|
||||
return obj.isoformat()
|
||||
if isinstance(obj, time):
|
||||
return obj.isoformat()
|
||||
if isinstance(obj, Decimal):
|
||||
return float(obj)
|
||||
if isinstance(obj, UUID):
|
||||
return str(obj)
|
||||
if isinstance(obj, bytes):
|
||||
return obj.decode('utf-8')
|
||||
if isinstance(obj, Path):
|
||||
return str(obj)
|
||||
if hasattr(obj, '__dict__'):
|
||||
return obj.__dict__
|
||||
raise TypeError(f"Type {type(obj)} not serializable")
|
||||
|
||||
def json_dumps(obj: Any) -> str:
|
||||
"""
|
||||
Serialize obj to a JSON formatted str using the custom serializer.
|
||||
"""
|
||||
return json.dumps(obj, default=json_serial)
|
||||
|
||||
def json_loads(json_str: str) -> Any:
|
||||
"""
|
||||
Deserialize json_str to a Python object.
|
||||
"""
|
||||
return json.loads(json_str)
|
Loading…
Reference in a new issue