Major update to database and logging methods
This commit is contained in:
parent
ee6ee1ed87
commit
b60e60ec1e
49 changed files with 46694 additions and 2266 deletions
|
@ -2,44 +2,41 @@
|
||||||
|
|
||||||
import os
|
import os
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
import ipaddress
|
|
||||||
import multiprocessing
|
|
||||||
from dotenv import load_dotenv
|
from dotenv import load_dotenv
|
||||||
from dateutil import tz
|
from .logs import L, get_logger
|
||||||
from pathlib import Path
|
|
||||||
from .classes import Logger, Configuration, APIConfig, Database, DirConfig, Geocoder
|
|
||||||
|
|
||||||
|
|
||||||
# INITIALization
|
|
||||||
BASE_DIR = Path(__file__).resolve().parent
|
BASE_DIR = Path(__file__).resolve().parent
|
||||||
CONFIG_DIR = BASE_DIR / "config"
|
CONFIG_DIR = BASE_DIR / "config"
|
||||||
ENV_PATH = CONFIG_DIR / ".env"
|
ENV_PATH = CONFIG_DIR / ".env"
|
||||||
load_dotenv(ENV_PATH)
|
load_dotenv(ENV_PATH)
|
||||||
LOGS_DIR = BASE_DIR / "logs"
|
LOGS_DIR = BASE_DIR / "logs"
|
||||||
os.makedirs(LOGS_DIR, exist_ok=True)
|
os.makedirs(LOGS_DIR, exist_ok=True)
|
||||||
L = Logger("Central", LOGS_DIR)
|
L.init('sys', LOGS_DIR)
|
||||||
|
l = get_logger("init")
|
||||||
|
|
||||||
|
import ipaddress
|
||||||
|
import multiprocessing
|
||||||
|
from dateutil import tz
|
||||||
|
from pathlib import Path
|
||||||
|
from .database import Database
|
||||||
|
from .classes import Config, SysConfig, DirConfig, Geocoder
|
||||||
|
|
||||||
# API essentials
|
# API essentials
|
||||||
API = APIConfig.load('sys', 'secrets')
|
Sys = SysConfig.init('sys', 'secrets')
|
||||||
Dir = DirConfig.load('dirs')
|
Dir = DirConfig.init('dirs')
|
||||||
Db = Database.load('sys')
|
l.debug(f"Dir configuration initialized: {Dir}")
|
||||||
|
l.debug(f"ROUTER path: {Dir.ROUTER}")
|
||||||
|
Db = Database.init('db')
|
||||||
|
|
||||||
# HOST = f"{API.BIND}:{API.PORT}"
|
Img = Config.init('img', 'secrets', Dir)
|
||||||
# LOCAL_HOSTS = [ipaddress.ip_address(localhost.strip()) for localhost in os.getenv('LOCAL_HOSTS', '127.0.0.1').split(',')] + ['localhost']
|
Llm = Config.init('llm', 'secrets', Dir)
|
||||||
# SUBNET_BROADCAST = os.getenv("SUBNET_BROADCAST", '10.255.255.255')
|
News = Config.init('news', 'secrets', Dir)
|
||||||
|
Archivist = Config.init('archivist', 'secrets', Dir)
|
||||||
MAX_CPU_CORES = min(int(os.getenv("MAX_CPU_CORES", int(multiprocessing.cpu_count()/2))), multiprocessing.cpu_count())
|
Scrape = Config.init('scrape', 'secrets', Dir)
|
||||||
|
Serve = Config.init('serve', 'secrets', Dir)
|
||||||
IMG = Configuration.load('img', 'secrets', Dir)
|
Tts = Config.init('tts', 'secrets', Dir)
|
||||||
Llm = Configuration.load('llm', 'secrets', Dir)
|
|
||||||
News = Configuration.load('news', 'secrets', Dir)
|
|
||||||
Archivist = Configuration.load('archivist', 'secrets', Dir)
|
|
||||||
Scrape = Configuration.load('scrape', 'secrets', Dir)
|
|
||||||
Serve = Configuration.load('serve', 'secrets', Dir)
|
|
||||||
Tts = Configuration.load('tts', 'secrets', Dir)
|
|
||||||
|
|
||||||
# Directories & general paths
|
# Directories & general paths
|
||||||
ROUTER_DIR = BASE_DIR / "routers"
|
|
||||||
DATA_DIR = BASE_DIR / "data"
|
DATA_DIR = BASE_DIR / "data"
|
||||||
os.makedirs(DATA_DIR, exist_ok=True)
|
os.makedirs(DATA_DIR, exist_ok=True)
|
||||||
ALERTS_DIR = DATA_DIR / "alerts"
|
ALERTS_DIR = DATA_DIR / "alerts"
|
||||||
|
@ -172,7 +169,7 @@ CF_IP = DATA_DIR / "cf_ip.txt" # to be deprecated soon
|
||||||
CF_DOMAINS_PATH = DATA_DIR / "cf_domains.json" # to be deprecated soon
|
CF_DOMAINS_PATH = DATA_DIR / "cf_domains.json" # to be deprecated soon
|
||||||
|
|
||||||
# Caddy - not fully implemented
|
# Caddy - not fully implemented
|
||||||
API.URL = os.getenv("API.URL")
|
Sys.URL = os.getenv("Sys.URL")
|
||||||
CADDY_SERVER = os.getenv('CADDY_SERVER', None)
|
CADDY_SERVER = os.getenv('CADDY_SERVER', None)
|
||||||
CADDYFILE_PATH = os.getenv("CADDYFILE_PATH", "") if CADDY_SERVER is not None else None
|
CADDYFILE_PATH = os.getenv("CADDYFILE_PATH", "") if CADDY_SERVER is not None else None
|
||||||
CADDY_API_KEY = os.getenv("CADDY_API_KEY")
|
CADDY_API_KEY = os.getenv("CADDY_API_KEY")
|
||||||
|
|
|
@ -1,81 +1,86 @@
|
||||||
#!/Users/sij/miniforge3/envs/api/bin/python
|
#!/Users/sij/miniforge3/envs/api/bin/python
|
||||||
#__main__.py
|
#__main__.py
|
||||||
from contextlib import asynccontextmanager
|
from contextlib import asynccontextmanager
|
||||||
from fastapi import FastAPI, Request, HTTPException, Response
|
from fastapi import FastAPI, Request, HTTPException
|
||||||
from fastapi.responses import JSONResponse
|
from fastapi.responses import JSONResponse
|
||||||
from fastapi.middleware.cors import CORSMiddleware
|
from fastapi.middleware.cors import CORSMiddleware
|
||||||
from starlette.middleware.base import BaseHTTPMiddleware
|
from starlette.middleware.base import BaseHTTPMiddleware
|
||||||
from starlette.requests import ClientDisconnect
|
|
||||||
from hypercorn.asyncio import serve
|
from hypercorn.asyncio import serve
|
||||||
from hypercorn.config import Config as HypercornConfig
|
from hypercorn.config import Config as HypercornConfig
|
||||||
import sys
|
import sys
|
||||||
import os
|
import os
|
||||||
import traceback
|
import traceback
|
||||||
import asyncio
|
import asyncio
|
||||||
import httpx
|
|
||||||
import argparse
|
|
||||||
import json
|
|
||||||
import ipaddress
|
import ipaddress
|
||||||
import importlib
|
import importlib
|
||||||
from dotenv import load_dotenv
|
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from datetime import datetime
|
|
||||||
import argparse
|
import argparse
|
||||||
from . import L, API, Db, ROUTER_DIR
|
from . import Sys, Db, Dir
|
||||||
|
from .logs import L, get_logger
|
||||||
|
|
||||||
parser = argparse.ArgumentParser(description='Personal API.')
|
def parse_args():
|
||||||
parser.add_argument('--log', type=str, default='INFO', help='Set overall log level (e.g., DEBUG, INFO, WARNING)')
|
parser = argparse.ArgumentParser(description='Personal API.')
|
||||||
parser.add_argument('--debug', nargs='+', default=[], help='Set DEBUG log level for specific modules')
|
parser.add_argument('--log', type=str, default='INFO',
|
||||||
parser.add_argument('--test', type=str, help='Load only the specified module.')
|
choices=['DEBUG', 'INFO', 'WARNING', 'ERROR', 'CRITICAL'],
|
||||||
args = parser.parse_args()
|
help='Set overall log level (e.g., DEBUG, INFO, WARNING)')
|
||||||
|
parser.add_argument('--debug', nargs='+', default=[],
|
||||||
|
help='Set DEBUG log level for specific modules')
|
||||||
|
parser.add_argument('--info', nargs='+', default=[],
|
||||||
|
help='Set INFO log level for specific modules')
|
||||||
|
parser.add_argument('--test', type=str, help='Load only the specified module.')
|
||||||
|
return parser.parse_args()
|
||||||
|
|
||||||
|
args = parse_args()
|
||||||
|
|
||||||
|
# Setup logging
|
||||||
L.setup_from_args(args)
|
L.setup_from_args(args)
|
||||||
print(f"Debug modules after setup: {L.debug_modules}")
|
l = get_logger("main")
|
||||||
|
l.info(f"Logging initialized. Debug modules: {L.debug_modules}")
|
||||||
|
l.info(f"Command line arguments: {args}")
|
||||||
|
|
||||||
logger = L.get_module_logger("main")
|
l.debug(f"Current working directory: {os.getcwd()}")
|
||||||
def debug(text: str): logger.debug(text)
|
l.debug(f"__file__ path: {__file__}")
|
||||||
def info(text: str): logger.info(text)
|
l.debug(f"Absolute path of __file__: {os.path.abspath(__file__)}")
|
||||||
def warn(text: str): logger.warning(text)
|
|
||||||
def err(text: str): logger.error(text)
|
|
||||||
def crit(text: str): logger.critical(text)
|
|
||||||
|
|
||||||
@asynccontextmanager
|
@asynccontextmanager
|
||||||
async def lifespan(app: FastAPI):
|
async def lifespan(app: FastAPI):
|
||||||
# Startup
|
# Startup
|
||||||
crit("sijapi launched")
|
l.critical("sijapi launched")
|
||||||
info(f"Arguments: {args}")
|
l.info(f"Arguments: {args}")
|
||||||
|
|
||||||
|
# Log the router directory path
|
||||||
|
l.debug(f"Router directory path: {Dir.ROUTER.absolute()}")
|
||||||
|
l.debug(f"Router directory exists: {Dir.ROUTER.exists()}")
|
||||||
|
l.debug(f"Router directory is a directory: {Dir.ROUTER.is_dir()}")
|
||||||
|
l.debug(f"Contents of router directory: {list(Dir.ROUTER.iterdir())}")
|
||||||
|
|
||||||
# Load routers
|
# Load routers
|
||||||
if args.test:
|
if args.test:
|
||||||
load_router(args.test)
|
load_router(args.test)
|
||||||
else:
|
else:
|
||||||
for module_name in API.MODULES.__fields__:
|
for module_name in Sys.MODULES.__fields__:
|
||||||
if getattr(API.MODULES, module_name):
|
if getattr(Sys.MODULES, module_name):
|
||||||
load_router(module_name)
|
load_router(module_name)
|
||||||
|
|
||||||
try:
|
try:
|
||||||
# Initialize sync structures on all databases
|
|
||||||
# await API.initialize_sync()
|
|
||||||
await Db.initialize_engines()
|
await Db.initialize_engines()
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
crit(f"Error during startup: {str(e)}")
|
l.critical(f"Error during startup: {str(e)}")
|
||||||
crit(f"Traceback: {traceback.format_exc()}")
|
l.critical(f"Traceback: {traceback.format_exc()}")
|
||||||
|
|
||||||
try:
|
try:
|
||||||
yield # This is where the app runs
|
yield # This is where the app runs
|
||||||
|
|
||||||
finally:
|
finally:
|
||||||
# Shutdown
|
# Shutdown
|
||||||
crit("Shutting down...")
|
l.critical("Shutting down...")
|
||||||
try:
|
try:
|
||||||
await asyncio.wait_for(API.close_db_pools(), timeout=20)
|
await asyncio.wait_for(Db.close(), timeout=20)
|
||||||
crit("Database pools closed.")
|
l.critical("Database pools closed.")
|
||||||
except asyncio.TimeoutError:
|
except asyncio.TimeoutError:
|
||||||
crit("Timeout while closing database pools.")
|
l.critical("Timeout while closing database pools.")
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
crit(f"Error during shutdown: {str(e)}")
|
l.critical(f"Error during shutdown: {str(e)}")
|
||||||
crit(f"Traceback: {traceback.format_exc()}")
|
l.critical(f"Traceback: {traceback.format_exc()}")
|
||||||
|
|
||||||
app = FastAPI(lifespan=lifespan)
|
app = FastAPI(lifespan=lifespan)
|
||||||
|
|
||||||
|
@ -87,86 +92,83 @@ app.add_middleware(
|
||||||
allow_headers=['*'],
|
allow_headers=['*'],
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
class SimpleAPIKeyMiddleware(BaseHTTPMiddleware):
|
class SimpleAPIKeyMiddleware(BaseHTTPMiddleware):
|
||||||
async def dispatch(self, request: Request, call_next):
|
async def dispatch(self, request: Request, call_next):
|
||||||
client_ip = ipaddress.ip_address(request.client.host)
|
client_ip = ipaddress.ip_address(request.client.host)
|
||||||
if request.method == "OPTIONS":
|
if request.method == "OPTIONS":
|
||||||
# Allow CORS preflight requests
|
# Allow CORS preflight requests
|
||||||
return JSONResponse(status_code=200)
|
return JSONResponse(status_code=200)
|
||||||
if request.url.path not in API.PUBLIC:
|
if request.url.path not in Sys.PUBLIC:
|
||||||
trusted_subnets = [ipaddress.ip_network(subnet) for subnet in API.TRUSTED_SUBNETS]
|
trusted_subnets = [ipaddress.ip_network(subnet) for subnet in Sys.TRUSTED_SUBNETS]
|
||||||
if not any(client_ip in subnet for subnet in trusted_subnets):
|
if not any(client_ip in subnet for subnet in trusted_subnets):
|
||||||
api_key_header = request.headers.get("Authorization")
|
api_key_header = request.headers.get("Authorization")
|
||||||
api_key_query = request.query_params.get("api_key")
|
api_key_query = request.query_params.get("api_key")
|
||||||
|
|
||||||
# Convert API.KEYS to lowercase for case-insensitive comparison
|
# Convert Sys.KEYS to lowercase for case-insensitive comparison
|
||||||
api_keys_lower = [key.lower() for key in API.KEYS]
|
api_keys_lower = [key.lower() for key in Sys.KEYS]
|
||||||
debug(f"API.KEYS (lowercase): {api_keys_lower}")
|
l.debug(f"Sys.KEYS (lowercase): {api_keys_lower}")
|
||||||
|
|
||||||
if api_key_header:
|
if api_key_header:
|
||||||
api_key_header = api_key_header.lower().split("bearer ")[-1]
|
api_key_header = api_key_header.lower().split("bearer ")[-1]
|
||||||
debug(f"API key provided in header: {api_key_header}")
|
l.debug(f"API key provided in header: {api_key_header}")
|
||||||
if api_key_query:
|
if api_key_query:
|
||||||
api_key_query = api_key_query.lower()
|
api_key_query = api_key_query.lower()
|
||||||
debug(f"API key provided in query: {api_key_query}")
|
l.debug(f"API key provided in query: {api_key_query}")
|
||||||
|
|
||||||
if api_key_header.lower() not in api_keys_lower and api_key_query.lower() not in api_keys_lower:
|
if (api_key_header is None or api_key_header.lower() not in api_keys_lower) and \
|
||||||
err(f"Invalid API key provided by a requester.")
|
(api_key_query is None or api_key_query.lower() not in api_keys_lower):
|
||||||
|
l.error(f"Invalid API key provided by a requester.")
|
||||||
if api_key_header:
|
if api_key_header:
|
||||||
debug(f"Invalid API key in header: {api_key_header}")
|
l.debug(f"Invalid API key in header: {api_key_header}")
|
||||||
if api_key_query:
|
if api_key_query:
|
||||||
debug(f"Invalid API key in query: {api_key_query}")
|
l.debug(f"Invalid API key in query: {api_key_query}")
|
||||||
return JSONResponse(
|
return JSONResponse(
|
||||||
status_code=401,
|
status_code=401,
|
||||||
content={"detail": "Invalid or missing API key"}
|
content={"detail": "Invalid or missing API key"}
|
||||||
)
|
)
|
||||||
else:
|
else:
|
||||||
if api_key_header.lower() in api_keys_lower:
|
if api_key_header and api_key_header.lower() in api_keys_lower:
|
||||||
debug(f"Valid API key provided in header: {api_key_header}")
|
l.debug(f"Valid API key provided in header: {api_key_header}")
|
||||||
if api_key_query and api_key_query.lower() in api_keys_lower:
|
if api_key_query and api_key_query.lower() in api_keys_lower:
|
||||||
debug(f"Valid API key provided in query: {api_key_query}")
|
l.debug(f"Valid API key provided in query: {api_key_query}")
|
||||||
|
|
||||||
response = await call_next(request)
|
response = await call_next(request)
|
||||||
return response
|
return response
|
||||||
|
|
||||||
|
|
||||||
# Add the middleware to your FastAPI app
|
# Add the middleware to your FastAPI app
|
||||||
app.add_middleware(SimpleAPIKeyMiddleware)
|
app.add_middleware(SimpleAPIKeyMiddleware)
|
||||||
|
|
||||||
@app.exception_handler(HTTPException)
|
@app.exception_handler(HTTPException)
|
||||||
async def http_exception_handler(request: Request, exc: HTTPException):
|
async def http_exception_handler(request: Request, exc: HTTPException):
|
||||||
err(f"HTTP Exception: {exc.status_code} - {exc.detail}")
|
l.error(f"HTTP Exception: {exc.status_code} - {exc.detail}")
|
||||||
err(f"Request: {request.method} {request.url}")
|
l.error(f"Request: {request.method} {request.url}")
|
||||||
return JSONResponse(status_code=exc.status_code, content={"detail": exc.detail})
|
return JSONResponse(status_code=exc.status_code, content={"detail": exc.detail})
|
||||||
|
|
||||||
|
|
||||||
@app.middleware("http")
|
@app.middleware("http")
|
||||||
async def handle_exception_middleware(request: Request, call_next):
|
async def handle_exception_middleware(request: Request, call_next):
|
||||||
try:
|
try:
|
||||||
response = await call_next(request)
|
response = await call_next(request)
|
||||||
return response
|
return response
|
||||||
except Exception as exc:
|
except Exception as exc:
|
||||||
err(f"Unhandled exception in request: {request.method} {request.url}")
|
l.error(f"Unhandled exception in request: {request.method} {request.url}")
|
||||||
err(f"Exception: {str(exc)}")
|
l.error(f"Exception: {str(exc)}")
|
||||||
err(f"Traceback: {traceback.format_exc()}")
|
l.error(f"Traceback: {traceback.format_exc()}")
|
||||||
return JSONResponse(
|
return JSONResponse(
|
||||||
status_code=500,
|
status_code=500,
|
||||||
content={"detail": "Internal Server Error"}
|
content={"detail": "Internal Server Error"}
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@app.post("/sync/pull")
|
@app.post("/sync/pull")
|
||||||
async def pull_changes():
|
async def pull_changes():
|
||||||
info(f"Received request to /sync/pull")
|
l.info(f"Received request to /sync/pull")
|
||||||
try:
|
try:
|
||||||
await API.add_primary_keys_to_local_tables()
|
await Sys.add_primary_keys_to_local_tables()
|
||||||
await API.add_primary_keys_to_remote_tables()
|
await Sys.add_primary_keys_to_remote_tables()
|
||||||
try:
|
try:
|
||||||
source = await API.get_most_recent_source()
|
source = await Sys.get_most_recent_source()
|
||||||
if source:
|
if source:
|
||||||
# Pull changes from the source
|
# Pull changes from the source
|
||||||
total_changes = await API.pull_changes(source)
|
total_changes = await Sys.pull_changes(source)
|
||||||
|
|
||||||
return JSONResponse(content={
|
return JSONResponse(content={
|
||||||
"status": "success",
|
"status": "success",
|
||||||
|
@ -179,39 +181,48 @@ async def pull_changes():
|
||||||
"status": "info",
|
"status": "info",
|
||||||
"message": "No instances with more recent data found or all instances are offline."
|
"message": "No instances with more recent data found or all instances are offline."
|
||||||
})
|
})
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
err(f"Error in /sync/pull: {str(e)}")
|
l.error(f"Error in /sync/pull: {str(e)}")
|
||||||
err(f"Traceback: {traceback.format_exc()}")
|
l.error(f"Traceback: {traceback.format_exc()}")
|
||||||
raise HTTPException(status_code=500, detail=f"Error during pull: {str(e)}")
|
raise HTTPException(status_code=500, detail=f"Error during pull: {str(e)}")
|
||||||
|
|
||||||
finally:
|
finally:
|
||||||
info(f"Finished processing /sync/pull request")
|
l.info(f"Finished processing /sync/pull request")
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
err(f"Error while ensuring primary keys to tables: {str(e)}")
|
l.error(f"Error while ensuring primary keys to tables: {str(e)}")
|
||||||
err(f"Traceback: {traceback.format_exc()}")
|
l.error(f"Traceback: {traceback.format_exc()}")
|
||||||
raise HTTPException(status_code=500, detail=f"Error during primary key insurance: {str(e)}")
|
raise HTTPException(status_code=500, detail=f"Error during primary key insurance: {str(e)}")
|
||||||
|
|
||||||
|
|
||||||
def load_router(router_name):
|
def load_router(router_name):
|
||||||
router_file = ROUTER_DIR / f'{router_name}.py'
|
router_logger = get_logger(f"router.{router_name}")
|
||||||
module_logger = L.get_module_logger(router_name)
|
router_logger.debug(f"Attempting to load {router_name.capitalize()}...")
|
||||||
module_logger.debug(f"Attempting to load {router_name.capitalize()}...")
|
|
||||||
|
# Log the full path being checked
|
||||||
|
router_file = Dir.ROUTER / f'{router_name}.py'
|
||||||
|
router_logger.debug(f"Checking for router file at: {router_file.absolute()}")
|
||||||
|
|
||||||
if router_file.exists():
|
if router_file.exists():
|
||||||
|
router_logger.debug(f"Router file found: {router_file}")
|
||||||
module_path = f'sijapi.routers.{router_name}'
|
module_path = f'sijapi.routers.{router_name}'
|
||||||
|
router_logger.debug(f"Attempting to import module: {module_path}")
|
||||||
try:
|
try:
|
||||||
module = importlib.import_module(module_path)
|
module = importlib.import_module(module_path)
|
||||||
|
router_logger.debug(f"Module imported successfully: {module}")
|
||||||
router = getattr(module, router_name)
|
router = getattr(module, router_name)
|
||||||
|
router_logger.debug(f"Router object retrieved: {router}")
|
||||||
app.include_router(router)
|
app.include_router(router)
|
||||||
|
router_logger.info(f"Router {router_name} loaded successfully")
|
||||||
except (ImportError, AttributeError) as e:
|
except (ImportError, AttributeError) as e:
|
||||||
module_logger.critical(f"Failed to load router {router_name}: {e}")
|
router_logger.critical(f"Failed to load router {router_name}: {e}")
|
||||||
|
router_logger.debug(f"Current working directory: {os.getcwd()}")
|
||||||
|
router_logger.debug(f"Python path: {sys.path}")
|
||||||
else:
|
else:
|
||||||
module_logger.error(f"Router file for {router_name} does not exist.")
|
router_logger.error(f"Router file for {router_name} does not exist at {router_file.absolute()}")
|
||||||
|
router_logger.debug(f"Contents of router directory: {list(Dir.ROUTER.iterdir())}")
|
||||||
|
|
||||||
|
|
||||||
def main(argv):
|
def main(argv):
|
||||||
config = HypercornConfig()
|
config = HypercornConfig()
|
||||||
config.bind = [API.BIND]
|
config.bind = [Sys.BIND]
|
||||||
config.startup_timeout = 300 # 5 minutes
|
config.startup_timeout = 300 # 5 minutes
|
||||||
config.shutdown_timeout = 15 # 15 seconds
|
config.shutdown_timeout = 15 # 15 seconds
|
||||||
asyncio.run(serve(app, config))
|
asyncio.run(serve(app, config))
|
||||||
|
|
1327
sijapi/classes.py
1327
sijapi/classes.py
File diff suppressed because it is too large
Load diff
|
@ -56,8 +56,8 @@
|
||||||
#─── notes: ──────────────────────────────────────────────────────────────────────
|
#─── notes: ──────────────────────────────────────────────────────────────────────
|
||||||
#
|
#
|
||||||
# HOST_NET† and HOST_PORT comprise HOST and determine the ip and port the server binds to.
|
# HOST_NET† and HOST_PORT comprise HOST and determine the ip and port the server binds to.
|
||||||
# API.URL is used to assemble URLs, e.g. in the MS authentication flow and for serving images generated on the img router.
|
# Sys.URL is used to assemble URLs, e.g. in the MS authentication flow and for serving images generated on the img router.
|
||||||
# API.URL should match the base URL used to access sijapi sans endpoint, e.g. http://localhost:4444 or https://api.sij.ai
|
# Sys.URL should match the base URL used to access sijapi sans endpoint, e.g. http://localhost:4444 or https://api.sij.ai
|
||||||
#
|
#
|
||||||
# † Take care here! Please ensure you understand the implications of setting HOST_NET to anything besides 127.0.0.1, and configure your firewall and router appropriately if you do. Setting HOST_NET to 0.0.0.0, for instance, opens sijapi to any device the server running it is accessible to — including potentially frightening internet randos (depending how your firewall, router, and NAT are configured).
|
# † Take care here! Please ensure you understand the implications of setting HOST_NET to anything besides 127.0.0.1, and configure your firewall and router appropriately if you do. Setting HOST_NET to 0.0.0.0, for instance, opens sijapi to any device the server running it is accessible to — including potentially frightening internet randos (depending how your firewall, router, and NAT are configured).
|
||||||
#
|
#
|
||||||
|
@ -159,7 +159,7 @@ UNLOADED=ig
|
||||||
# apps that together make SetApp an incredible value for macOS users!)
|
# apps that together make SetApp an incredible value for macOS users!)
|
||||||
#
|
#
|
||||||
# tts: designed for use with coqui — $ pip install coqui — and/or the
|
# tts: designed for use with coqui — $ pip install coqui — and/or the
|
||||||
# ElevenLabs API.
|
# ElevenLabs Sys.
|
||||||
#
|
#
|
||||||
# weather: requires a VisualCrossing API key and is designed for (but doesn't
|
# weather: requires a VisualCrossing API key and is designed for (but doesn't
|
||||||
# itself strictly require) Postgresql with the PostGIS extension;
|
# itself strictly require) Postgresql with the PostGIS extension;
|
||||||
|
|
69
sijapi/config/db.yaml-example
Normal file
69
sijapi/config/db.yaml-example
Normal file
|
@ -0,0 +1,69 @@
|
||||||
|
POOL:
|
||||||
|
- ts_id: 'server1'
|
||||||
|
ts_ip: '192.168.0.10'
|
||||||
|
app_port: 4444
|
||||||
|
db_port: 5432
|
||||||
|
db_name: mydb
|
||||||
|
db_user: dbuser
|
||||||
|
db_pass: 'password123'
|
||||||
|
ssh_port: 22
|
||||||
|
ssh_user: sshuser
|
||||||
|
ssh_pass: 'password456'
|
||||||
|
path: '/Users/sij/workshop/sijapi'
|
||||||
|
tmux: '/opt/homebrew/bin/tmux'
|
||||||
|
tailscale: '/usr/local/bin/tailscale'
|
||||||
|
conda: '/Users/sij/miniforge3/bin/mamba'
|
||||||
|
conda_env: '/Users/sij/miniforge3/envs/sijapi'
|
||||||
|
vitals: '/Users/sij/workshop/scripts/gitea/pathScripts/vitals'
|
||||||
|
vpn: '/Users/sij/workshop/scripts/gitea/pathScripts/vpn'
|
||||||
|
- ts_id: 'server2'
|
||||||
|
ts_ip: '192.168.0.11'
|
||||||
|
app_port: 4444
|
||||||
|
db_port: 5432
|
||||||
|
db_name: mydb
|
||||||
|
db_user: dbuser
|
||||||
|
db_pass: 'password123'
|
||||||
|
ssh_port: 22
|
||||||
|
ssh_user: sshuser
|
||||||
|
ssh_pass: 'password456'
|
||||||
|
path: '/Users/sij/workshop/sijapi'
|
||||||
|
tmux: '/opt/homebrew/bin/tmux'
|
||||||
|
tailscale: '/usr/local/bin/tailscale'
|
||||||
|
conda: '/Users/sij/miniforge3/bin/mamba'
|
||||||
|
conda_env: '/Users/sij/miniforge3/envs/sijapi'
|
||||||
|
vitals: '/Users/sij/workshop/scripts/gitea/pathScripts/vitals'
|
||||||
|
vpn: '/Users/sij/workshop/scripts/gitea/pathScripts/vpn'
|
||||||
|
- ts_id: 'server3'
|
||||||
|
ts_ip: '192.168.0.12'
|
||||||
|
app_port: 4444
|
||||||
|
db_port: 5432
|
||||||
|
db_name: mydb
|
||||||
|
db_user: dbuser
|
||||||
|
db_pass: 'password123'
|
||||||
|
ssh_port: 22
|
||||||
|
ssh_user: sshuser
|
||||||
|
ssh_pass: 'password456'
|
||||||
|
path: '/Users/sij/workshop/sijapi'
|
||||||
|
tmux: '/opt/homebrew/bin/tmux'
|
||||||
|
tailscale: '/usr/local/bin/tailscale'
|
||||||
|
conda: '/Users/sij/miniforge3/bin/mamba'
|
||||||
|
conda_env: '/Users/sij/miniforge3/envs/sijapi'
|
||||||
|
vitals: '/Users/sij/workshop/scripts/gitea/pathScripts/vitals'
|
||||||
|
vpn: '/Users/sij/workshop/scripts/gitea/pathScripts/vpn'
|
||||||
|
|
||||||
|
TABLES:
|
||||||
|
locations:
|
||||||
|
primary_key: id
|
||||||
|
use_guid: true
|
||||||
|
dailyweather:
|
||||||
|
primary_key: id
|
||||||
|
use_guid: true
|
||||||
|
hourlyweather:
|
||||||
|
primary_key: id
|
||||||
|
use_guid: true
|
||||||
|
click_logs:
|
||||||
|
primary_key: id
|
||||||
|
use_guid: true
|
||||||
|
short_urls:
|
||||||
|
primary_key: id
|
||||||
|
use_guid: true
|
13
sijapi/config/gis.yaml-example
Normal file
13
sijapi/config/gis.yaml-example
Normal file
|
@ -0,0 +1,13 @@
|
||||||
|
custom_locations:
|
||||||
|
- name: Echo Valley Ranch
|
||||||
|
latitude: 42.8098216
|
||||||
|
longitude: -123.049396
|
||||||
|
radius: 2
|
||||||
|
|
||||||
|
layers:
|
||||||
|
- url: "https://gis.blm.gov/arcgis/rest/services/Cadastral/BLM_Natl_PLSS_CadNSDI/MapServer/1/query"
|
||||||
|
table_name: "public.plss_townships"
|
||||||
|
layer_name: "Townships"
|
||||||
|
- url: "https://gis.blm.gov/arcgis/rest/services/Cadastral/BLM_Natl_PLSS_CadNSDI/MapServer/2/query"
|
||||||
|
table_name: "public.plss_sections"
|
||||||
|
layer_name: "Sections"
|
|
@ -1,4 +0,0 @@
|
||||||
- name: Echo Valley Ranch
|
|
||||||
latitude: 42.8098216
|
|
||||||
longitude: -123.049396
|
|
||||||
radius: 1.5
|
|
|
@ -1,9 +1,10 @@
|
||||||
|
# config/sys.yaml
|
||||||
# Primary configuration file
|
# Primary configuration file
|
||||||
|
HOST: "0.0.0.0"
|
||||||
HOST: '0.0.0.0'
|
|
||||||
PORT: 4444
|
PORT: 4444
|
||||||
BIND: '{{ HOST }}:{{ PORT }}'
|
BIND: "{{ HOST }}:{{ PORT }}"
|
||||||
URL: 'https://api.example.com'
|
URL: "https://api.sij.ai"
|
||||||
|
MAX_CPU_CORES: 7
|
||||||
|
|
||||||
PUBLIC:
|
PUBLIC:
|
||||||
- /id
|
- /id
|
||||||
|
@ -15,20 +16,21 @@ PUBLIC:
|
||||||
- /cd/alert
|
- /cd/alert
|
||||||
|
|
||||||
TRUSTED_SUBNETS:
|
TRUSTED_SUBNETS:
|
||||||
- 127.0.0.1/32
|
- "127.0.0.1/32"
|
||||||
- 10.13.37.0/24
|
- "10.13.37.0/24"
|
||||||
- 100.64.64.0/24
|
|
||||||
|
SUBNET_BROADCAST: "10.255.255.255"
|
||||||
|
|
||||||
MODULES:
|
MODULES:
|
||||||
archivist: on
|
archivist: off
|
||||||
asr: on
|
asr: on
|
||||||
cal: on
|
cal: off
|
||||||
cf: off
|
cf: off
|
||||||
dist: off
|
dist: off
|
||||||
email: on
|
email: off
|
||||||
gis: on
|
gis: on
|
||||||
ig: off
|
ig: off
|
||||||
img: on
|
img: off
|
||||||
llm: on
|
llm: on
|
||||||
news: on
|
news: on
|
||||||
note: on
|
note: on
|
||||||
|
@ -36,77 +38,54 @@ MODULES:
|
||||||
scrape: on
|
scrape: on
|
||||||
serve: on
|
serve: on
|
||||||
sys: on
|
sys: on
|
||||||
timing: on
|
timing: off
|
||||||
tts: on
|
tts: on
|
||||||
weather: on
|
weather: on
|
||||||
|
|
||||||
POOL:
|
LOGS:
|
||||||
- ts_id: 'server1'
|
default: info
|
||||||
ts_ip: '192.168.0.10'
|
init: debug
|
||||||
app_port: 4444
|
classes: debug
|
||||||
db_port: 5432
|
database: debug
|
||||||
db_name: mydb
|
serialization: debug
|
||||||
db_user: dbuser
|
utilities: debug
|
||||||
db_pass: 'password123'
|
logs: debug
|
||||||
ssh_port: 22
|
main: debug
|
||||||
ssh_user: sshuser
|
archivist: info
|
||||||
ssh_pass: 'password456'
|
asr: info
|
||||||
path: '/Users/sij/workshop/sijapi'
|
cal: info
|
||||||
tmux: '/opt/homebrew/bin/tmux'
|
cf: info
|
||||||
tailscale: '/usr/local/bin/tailscale'
|
dist: info
|
||||||
conda: '/Users/sij/miniforge3/bin/mamba'
|
email: info
|
||||||
conda_env: '/Users/sij/miniforge3/envs/sijapi'
|
gis: debug
|
||||||
vitals: '/Users/sij/workshop/scripts/gitea/pathScripts/vitals'
|
ig: info
|
||||||
vpn: '/Users/sij/workshop/scripts/gitea/pathScripts/vpn'
|
img: debug
|
||||||
- ts_id: 'server2'
|
llm: debug
|
||||||
ts_ip: '192.168.0.11'
|
news: debug
|
||||||
app_port: 4444
|
note: debug
|
||||||
db_port: 5432
|
rag: debug
|
||||||
db_name: mydb
|
scrape: debug
|
||||||
db_user: dbuser
|
serve: info
|
||||||
db_pass: 'password123'
|
sys: debug
|
||||||
ssh_port: 22
|
timing: warn
|
||||||
ssh_user: sshuser
|
tts: info
|
||||||
ssh_pass: 'password456'
|
weather: info
|
||||||
path: '/Users/sij/workshop/sijapi'
|
|
||||||
tmux: '/opt/homebrew/bin/tmux'
|
|
||||||
tailscale: '/usr/local/bin/tailscale'
|
|
||||||
conda: '/Users/sij/miniforge3/bin/mamba'
|
|
||||||
conda_env: '/Users/sij/miniforge3/envs/sijapi'
|
|
||||||
vitals: '/Users/sij/workshop/scripts/gitea/pathScripts/vitals'
|
|
||||||
vpn: '/Users/sij/workshop/scripts/gitea/pathScripts/vpn'
|
|
||||||
- ts_id: 'server3'
|
|
||||||
ts_ip: '192.168.0.12'
|
|
||||||
app_port: 4444
|
|
||||||
db_port: 5432
|
|
||||||
db_name: mydb
|
|
||||||
db_user: dbuser
|
|
||||||
db_pass: 'password123'
|
|
||||||
ssh_port: 22
|
|
||||||
ssh_user: sshuser
|
|
||||||
ssh_pass: 'password456'
|
|
||||||
path: '/Users/sij/workshop/sijapi'
|
|
||||||
tmux: '/opt/homebrew/bin/tmux'
|
|
||||||
tailscale: '/usr/local/bin/tailscale'
|
|
||||||
conda: '/Users/sij/miniforge3/bin/mamba'
|
|
||||||
conda_env: '/Users/sij/miniforge3/envs/sijapi'
|
|
||||||
vitals: '/Users/sij/workshop/scripts/gitea/pathScripts/vitals'
|
|
||||||
vpn: '/Users/sij/workshop/scripts/gitea/pathScripts/vpn'
|
|
||||||
|
|
||||||
EXTENSIONS:
|
EXTENSIONS:
|
||||||
pgp: on
|
archivist: off
|
||||||
archivist: on
|
|
||||||
courtlistener: off
|
courtlistener: off
|
||||||
|
elevenlabs: on
|
||||||
macnotify: on
|
macnotify: on
|
||||||
|
pgp: on
|
||||||
shellfish: on
|
shellfish: on
|
||||||
|
xtts: off
|
||||||
url_shortener: off
|
url_shortener: off
|
||||||
|
|
||||||
TZ: 'America/Los_Angeles'
|
|
||||||
|
|
||||||
KEYS:
|
KEYS:
|
||||||
- 'sk-YOUR_FIRST_API_KEY'
|
- "sk-NhrtQwCHNdK5sRZC"
|
||||||
- 'sk-YOUR_SECOND_API_KEY'
|
- "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"
|
||||||
- 'sk-YOUR_THIRD_API_KEY'
|
|
||||||
|
TZ: "America/Los_Angeles"
|
||||||
|
|
||||||
GARBAGE:
|
GARBAGE:
|
||||||
COLLECTION_INTERVAL: 60 * 60
|
COLLECTION_INTERVAL: 60 * 60
|
||||||
|
|
324
sijapi/database.py
Normal file
324
sijapi/database.py
Normal file
|
@ -0,0 +1,324 @@
|
||||||
|
# database.py
|
||||||
|
import json
|
||||||
|
import yaml
|
||||||
|
import time
|
||||||
|
import aiohttp
|
||||||
|
import asyncio
|
||||||
|
from datetime import datetime as dt_datetime, date
|
||||||
|
from tqdm.asyncio import tqdm
|
||||||
|
import reverse_geocoder as rg
|
||||||
|
from pathlib import Path
|
||||||
|
from typing import Any, Dict, List, Optional, Tuple, Union, TypeVar, ClassVar
|
||||||
|
from dotenv import load_dotenv
|
||||||
|
from pydantic import BaseModel, Field, create_model, PrivateAttr
|
||||||
|
from concurrent.futures import ThreadPoolExecutor
|
||||||
|
from contextlib import asynccontextmanager
|
||||||
|
from datetime import datetime, timedelta, timezone
|
||||||
|
from zoneinfo import ZoneInfo
|
||||||
|
from srtm import get_data
|
||||||
|
import os
|
||||||
|
import sys
|
||||||
|
from loguru import logger
|
||||||
|
from sqlalchemy import text
|
||||||
|
from sqlalchemy.ext.asyncio import create_async_engine, AsyncSession
|
||||||
|
from sqlalchemy.orm import sessionmaker, declarative_base
|
||||||
|
from sqlalchemy.exc import OperationalError
|
||||||
|
from sqlalchemy import Column, Integer, String, DateTime, JSON, Text, select, func
|
||||||
|
from sqlalchemy.dialects.postgresql import JSONB
|
||||||
|
from urllib.parse import urljoin
|
||||||
|
import hashlib
|
||||||
|
import random
|
||||||
|
from .logs import get_logger
|
||||||
|
from .serialization import json_dumps, json_serial, serialize
|
||||||
|
|
||||||
|
l = get_logger(__name__)
|
||||||
|
|
||||||
|
Base = declarative_base()
|
||||||
|
|
||||||
|
BASE_DIR = Path(__file__).resolve().parent
|
||||||
|
CONFIG_DIR = BASE_DIR / "config"
|
||||||
|
ENV_PATH = CONFIG_DIR / ".env"
|
||||||
|
load_dotenv(ENV_PATH)
|
||||||
|
TS_ID = os.environ.get('TS_ID')
|
||||||
|
|
||||||
|
|
||||||
|
class QueryTracking(Base):
|
||||||
|
__tablename__ = 'query_tracking'
|
||||||
|
|
||||||
|
id = Column(Integer, primary_key=True)
|
||||||
|
ts_id = Column(String, nullable=False)
|
||||||
|
query = Column(Text, nullable=False)
|
||||||
|
args = Column(JSONB)
|
||||||
|
executed_at = Column(DateTime(timezone=True), server_default=func.now())
|
||||||
|
completed_by = Column(JSONB, default={})
|
||||||
|
result_checksum = Column(String)
|
||||||
|
|
||||||
|
class Database:
|
||||||
|
@classmethod
|
||||||
|
def init(cls, config_name: str):
|
||||||
|
return cls(config_name)
|
||||||
|
|
||||||
|
def __init__(self, config_path: str):
|
||||||
|
self.config = self.load_config(config_path)
|
||||||
|
self.engines: Dict[str, Any] = {}
|
||||||
|
self.sessions: Dict[str, Any] = {}
|
||||||
|
self.online_servers: set = set()
|
||||||
|
self.local_ts_id = self.get_local_ts_id()
|
||||||
|
|
||||||
|
def load_config(self, config_path: str) -> Dict[str, Any]:
|
||||||
|
base_path = Path(__file__).parent.parent
|
||||||
|
full_path = base_path / "sijapi" / "config" / f"{config_path}.yaml"
|
||||||
|
|
||||||
|
with open(full_path, 'r') as file:
|
||||||
|
config = yaml.safe_load(file)
|
||||||
|
|
||||||
|
return config
|
||||||
|
|
||||||
|
def get_local_ts_id(self) -> str:
|
||||||
|
return os.environ.get('TS_ID')
|
||||||
|
|
||||||
|
async def initialize_engines(self):
|
||||||
|
for db_info in self.config['POOL']:
|
||||||
|
url = f"postgresql+asyncpg://{db_info['db_user']}:{db_info['db_pass']}@{db_info['ts_ip']}:{db_info['db_port']}/{db_info['db_name']}"
|
||||||
|
try:
|
||||||
|
engine = create_async_engine(url, pool_pre_ping=True, pool_size=5, max_overflow=10)
|
||||||
|
self.engines[db_info['ts_id']] = engine
|
||||||
|
self.sessions[db_info['ts_id']] = sessionmaker(engine, class_=AsyncSession, expire_on_commit=False)
|
||||||
|
l.info(f"Initialized engine and session for {db_info['ts_id']}")
|
||||||
|
except Exception as e:
|
||||||
|
l.error(f"Failed to initialize engine for {db_info['ts_id']}: {str(e)}")
|
||||||
|
|
||||||
|
if self.local_ts_id not in self.sessions:
|
||||||
|
l.error(f"Failed to initialize session for local server {self.local_ts_id}")
|
||||||
|
else:
|
||||||
|
try:
|
||||||
|
# Create tables if they don't exist
|
||||||
|
async with self.engines[self.local_ts_id].begin() as conn:
|
||||||
|
await conn.run_sync(Base.metadata.create_all)
|
||||||
|
l.info(f"Initialized tables for local server {self.local_ts_id}")
|
||||||
|
except Exception as e:
|
||||||
|
l.error(f"Failed to create tables for local server {self.local_ts_id}: {str(e)}")
|
||||||
|
|
||||||
|
async def get_online_servers(self) -> List[str]:
|
||||||
|
online_servers = []
|
||||||
|
for ts_id, engine in self.engines.items():
|
||||||
|
try:
|
||||||
|
async with engine.connect() as conn:
|
||||||
|
await conn.execute(text("SELECT 1"))
|
||||||
|
online_servers.append(ts_id)
|
||||||
|
except OperationalError:
|
||||||
|
pass
|
||||||
|
self.online_servers = set(online_servers)
|
||||||
|
return online_servers
|
||||||
|
|
||||||
|
async def read(self, query: str, **kwargs):
|
||||||
|
if self.local_ts_id not in self.sessions:
|
||||||
|
l.error(f"No session found for local server {self.local_ts_id}. Database may not be properly initialized.")
|
||||||
|
return None
|
||||||
|
|
||||||
|
async with self.sessions[self.local_ts_id]() as session:
|
||||||
|
try:
|
||||||
|
result = await session.execute(text(query), kwargs)
|
||||||
|
# Convert the result to a list of dictionaries
|
||||||
|
rows = result.fetchall()
|
||||||
|
if rows:
|
||||||
|
columns = result.keys()
|
||||||
|
return [dict(zip(columns, row)) for row in rows]
|
||||||
|
else:
|
||||||
|
return []
|
||||||
|
except Exception as e:
|
||||||
|
l.error(f"Failed to execute read query: {str(e)}")
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
async def write(self, query: str, **kwargs):
|
||||||
|
if self.local_ts_id not in self.sessions:
|
||||||
|
l.error(f"No session found for local server {self.local_ts_id}. Database may not be properly initialized.")
|
||||||
|
return None
|
||||||
|
|
||||||
|
async with self.sessions[self.local_ts_id]() as session:
|
||||||
|
try:
|
||||||
|
# Serialize the kwargs using
|
||||||
|
serialized_kwargs = {key: serialize(value) for key, value in kwargs.items()}
|
||||||
|
|
||||||
|
# Execute the write query
|
||||||
|
result = await session.execute(text(query), serialized_kwargs)
|
||||||
|
|
||||||
|
# Log the query (use json_dumps for logging purposes)
|
||||||
|
new_query = QueryTracking(
|
||||||
|
ts_id=self.local_ts_id,
|
||||||
|
query=query,
|
||||||
|
args=json_dumps(kwargs) # Use original kwargs for logging
|
||||||
|
)
|
||||||
|
session.add(new_query)
|
||||||
|
await session.flush()
|
||||||
|
query_id = new_query.id
|
||||||
|
|
||||||
|
await session.commit()
|
||||||
|
l.info(f"Successfully executed write query: {query[:50]}...")
|
||||||
|
|
||||||
|
checksum = await self._local_compute_checksum(query, serialized_kwargs)
|
||||||
|
|
||||||
|
# Update query_tracking with checksum
|
||||||
|
await self.update_query_checksum(query_id, checksum)
|
||||||
|
|
||||||
|
# Replicate to online servers
|
||||||
|
online_servers = await self.get_online_servers()
|
||||||
|
for ts_id in online_servers:
|
||||||
|
if ts_id != self.local_ts_id:
|
||||||
|
asyncio.create_task(self._replicate_write(ts_id, query_id, query, serialized_kwargs, checksum))
|
||||||
|
|
||||||
|
return result # Return the CursorResult
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
l.error(f"Failed to execute write query: {str(e)}")
|
||||||
|
l.error(f"Query: {query}")
|
||||||
|
l.error(f"Kwargs: {kwargs}")
|
||||||
|
l.error(f"Serialized kwargs: {serialized_kwargs}")
|
||||||
|
l.error(f"Traceback: {traceback.format_exc()}")
|
||||||
|
return None
|
||||||
|
|
||||||
|
async def get_primary_server(self) -> str:
|
||||||
|
url = urljoin(self.config['URL'], '/id')
|
||||||
|
|
||||||
|
async with aiohttp.ClientSession() as session:
|
||||||
|
try:
|
||||||
|
async with session.get(url) as response:
|
||||||
|
if response.status == 200:
|
||||||
|
primary_ts_id = await response.text()
|
||||||
|
return primary_ts_id.strip()
|
||||||
|
else:
|
||||||
|
l.error(f"Failed to get primary server. Status: {response.status}")
|
||||||
|
return None
|
||||||
|
except aiohttp.ClientError as e:
|
||||||
|
l.error(f"Error connecting to load balancer: {str(e)}")
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
async def get_checksum_server(self) -> dict:
|
||||||
|
primary_ts_id = await self.get_primary_server()
|
||||||
|
online_servers = await self.get_online_servers()
|
||||||
|
|
||||||
|
checksum_servers = [server for server in self.config['POOL'] if server['ts_id'] in online_servers and server['ts_id'] != primary_ts_id]
|
||||||
|
|
||||||
|
if not checksum_servers:
|
||||||
|
return next(server for server in self.config['POOL'] if server['ts_id'] == primary_ts_id)
|
||||||
|
|
||||||
|
return random.choice(checksum_servers)
|
||||||
|
|
||||||
|
|
||||||
|
async def _local_compute_checksum(self, query: str, params: dict):
|
||||||
|
async with self.sessions[self.local_ts_id]() as session:
|
||||||
|
result = await session.execute(text(query), params)
|
||||||
|
if result.returns_rows:
|
||||||
|
data = result.fetchall()
|
||||||
|
else:
|
||||||
|
data = str(result.rowcount) + query + str(params)
|
||||||
|
checksum = hashlib.md5(str(data).encode()).hexdigest()
|
||||||
|
return checksum
|
||||||
|
|
||||||
|
|
||||||
|
async def _delegate_compute_checksum(self, server: Dict[str, Any], query: str, params: dict):
|
||||||
|
url = f"http://{server['ts_ip']}:{server['app_port']}/sync/checksum"
|
||||||
|
|
||||||
|
async with aiohttp.ClientSession() as session:
|
||||||
|
try:
|
||||||
|
async with session.post(url, json={"query": query, "params": params}) as response:
|
||||||
|
if response.status == 200:
|
||||||
|
result = await response.json()
|
||||||
|
return result['checksum']
|
||||||
|
else:
|
||||||
|
l.error(f"Failed to get checksum from {server['ts_id']}. Status: {response.status}")
|
||||||
|
return await self._local_compute_checksum(query, params)
|
||||||
|
except aiohttp.ClientError as e:
|
||||||
|
l.error(f"Error connecting to {server['ts_id']} for checksum: {str(e)}")
|
||||||
|
return await self._local_compute_checksum(query, params)
|
||||||
|
|
||||||
|
|
||||||
|
async def update_query_checksum(self, query_id: int, checksum: str):
|
||||||
|
async with self.sessions[self.local_ts_id]() as session:
|
||||||
|
await session.execute(
|
||||||
|
text("UPDATE query_tracking SET result_checksum = :checksum WHERE id = :id"),
|
||||||
|
{"checksum": checksum, "id": query_id}
|
||||||
|
)
|
||||||
|
await session.commit()
|
||||||
|
|
||||||
|
|
||||||
|
async def _replicate_write(self, ts_id: str, query_id: int, query: str, params: dict, expected_checksum: str):
|
||||||
|
try:
|
||||||
|
async with self.sessions[ts_id]() as session:
|
||||||
|
await session.execute(text(query), params)
|
||||||
|
actual_checksum = await self._local_compute_checksum(query, params)
|
||||||
|
if actual_checksum != expected_checksum:
|
||||||
|
raise ValueError(f"Checksum mismatch on {ts_id}")
|
||||||
|
await self.mark_query_completed(query_id, ts_id)
|
||||||
|
await session.commit()
|
||||||
|
l.info(f"Successfully replicated write to {ts_id}")
|
||||||
|
except Exception as e:
|
||||||
|
l.error(f"Failed to replicate write on {ts_id}: {str(e)}")
|
||||||
|
|
||||||
|
|
||||||
|
async def mark_query_completed(self, query_id: int, ts_id: str):
|
||||||
|
async with self.sessions[self.local_ts_id]() as session:
|
||||||
|
query = await session.get(QueryTracking, query_id)
|
||||||
|
if query:
|
||||||
|
completed_by = query.completed_by or {}
|
||||||
|
completed_by[ts_id] = True
|
||||||
|
query.completed_by = completed_by
|
||||||
|
await session.commit()
|
||||||
|
|
||||||
|
|
||||||
|
async def sync_local_server(self):
|
||||||
|
async with self.sessions[self.local_ts_id]() as session:
|
||||||
|
last_synced = await session.execute(
|
||||||
|
text("SELECT MAX(id) FROM query_tracking WHERE completed_by ? :ts_id"),
|
||||||
|
{"ts_id": self.local_ts_id}
|
||||||
|
)
|
||||||
|
last_synced_id = last_synced.scalar() or 0
|
||||||
|
|
||||||
|
unexecuted_queries = await session.execute(
|
||||||
|
text("SELECT * FROM query_tracking WHERE id > :last_id ORDER BY id"),
|
||||||
|
{"last_id": last_synced_id}
|
||||||
|
)
|
||||||
|
|
||||||
|
for query in unexecuted_queries:
|
||||||
|
try:
|
||||||
|
params = json.loads(query.args)
|
||||||
|
await session.execute(text(query.query), params)
|
||||||
|
actual_checksum = await self._local_compute_checksum(query.query, params)
|
||||||
|
if actual_checksum != query.result_checksum:
|
||||||
|
raise ValueError(f"Checksum mismatch for query ID {query.id}")
|
||||||
|
await self.mark_query_completed(query.id, self.local_ts_id)
|
||||||
|
except Exception as e:
|
||||||
|
l.error(f"Failed to execute query ID {query.id} during local sync: {str(e)}")
|
||||||
|
|
||||||
|
await session.commit()
|
||||||
|
l.info(f"Local server sync completed. Executed {unexecuted_queries.rowcount} queries.")
|
||||||
|
|
||||||
|
|
||||||
|
async def purge_completed_queries(self):
|
||||||
|
async with self.sessions[self.local_ts_id]() as session:
|
||||||
|
all_ts_ids = [db['ts_id'] for db in self.config['POOL']]
|
||||||
|
|
||||||
|
result = await session.execute(
|
||||||
|
text("""
|
||||||
|
DELETE FROM query_tracking
|
||||||
|
WHERE id <= (
|
||||||
|
SELECT MAX(id)
|
||||||
|
FROM query_tracking
|
||||||
|
WHERE completed_by ?& :ts_ids
|
||||||
|
)
|
||||||
|
"""),
|
||||||
|
{"ts_ids": all_ts_ids}
|
||||||
|
)
|
||||||
|
await session.commit()
|
||||||
|
|
||||||
|
deleted_count = result.rowcount
|
||||||
|
l.info(f"Purged {deleted_count} completed queries.")
|
||||||
|
|
||||||
|
|
||||||
|
async def close(self):
|
||||||
|
for engine in self.engines.values():
|
||||||
|
await engine.dispose()
|
||||||
|
|
||||||
|
|
237
sijapi/helpers/CaPLSS.py
Normal file
237
sijapi/helpers/CaPLSS.py
Normal file
|
@ -0,0 +1,237 @@
|
||||||
|
#!/usr/bin/env python3
|
||||||
|
|
||||||
|
import requests
|
||||||
|
import json
|
||||||
|
import time
|
||||||
|
import os
|
||||||
|
import subprocess
|
||||||
|
import sys
|
||||||
|
from requests.adapters import HTTPAdapter
|
||||||
|
from urllib3.util.retry import Retry
|
||||||
|
from datetime import datetime
|
||||||
|
|
||||||
|
# Environment variables for database connection
|
||||||
|
DB_NAME = os.getenv('DB_NAME', 'sij')
|
||||||
|
DB_USER = os.getenv('DB_USER', 'sij')
|
||||||
|
DB_PASSWORD = os.getenv('DB_PASSWORD', 'Synchr0!')
|
||||||
|
DB_HOST = os.getenv('DB_HOST', 'localhost')
|
||||||
|
DB_PORT = os.getenv('DB_PORT', '5432')
|
||||||
|
|
||||||
|
def get_feature_count(url):
|
||||||
|
params = {
|
||||||
|
'where': '1=1',
|
||||||
|
'returnCountOnly': 'true',
|
||||||
|
'f': 'json'
|
||||||
|
}
|
||||||
|
retries = Retry(total=10, backoff_factor=0.5, status_forcelist=[500, 502, 503, 504])
|
||||||
|
with requests.Session() as session:
|
||||||
|
session.mount("https://", HTTPAdapter(max_retries=retries))
|
||||||
|
response = session.get(url, params=params, timeout=30)
|
||||||
|
response.raise_for_status()
|
||||||
|
data = response.json()
|
||||||
|
return data.get('count', 0)
|
||||||
|
|
||||||
|
def fetch_features(url, offset, num, max_retries=5):
|
||||||
|
params = {
|
||||||
|
'where': '1=1',
|
||||||
|
'outFields': '*',
|
||||||
|
'geometryPrecision': 6,
|
||||||
|
'outSR': 4326,
|
||||||
|
'f': 'json',
|
||||||
|
'resultOffset': offset,
|
||||||
|
'resultRecordCount': num
|
||||||
|
}
|
||||||
|
for attempt in range(max_retries):
|
||||||
|
try:
|
||||||
|
retries = Retry(total=5, backoff_factor=1, status_forcelist=[500, 502, 503, 504])
|
||||||
|
with requests.Session() as session:
|
||||||
|
session.mount("https://", HTTPAdapter(max_retries=retries))
|
||||||
|
response = session.get(url, params=params, timeout=30)
|
||||||
|
response.raise_for_status()
|
||||||
|
return response.json()
|
||||||
|
except requests.exceptions.RequestException as e:
|
||||||
|
print(f"Error fetching features (attempt {attempt + 1}/{max_retries}): {e}")
|
||||||
|
if attempt == max_retries - 1:
|
||||||
|
raise
|
||||||
|
time.sleep(5 * (attempt + 1)) # Exponential backoff
|
||||||
|
|
||||||
|
|
||||||
|
def download_layer(layer_num, layer_name):
|
||||||
|
base_dir = os.path.expanduser('~/data')
|
||||||
|
os.makedirs(base_dir, exist_ok=True)
|
||||||
|
|
||||||
|
file_path = os.path.join(base_dir, f'PLSS_{layer_name}.geojson')
|
||||||
|
temp_file_path = os.path.join(base_dir, f'PLSS_{layer_name}_temp.json')
|
||||||
|
|
||||||
|
url = f"https://gis.blm.gov/arcgis/rest/services/Cadastral/BLM_Natl_PLSS_CadNSDI/MapServer/{layer_num}/query"
|
||||||
|
|
||||||
|
total_count = get_feature_count(url)
|
||||||
|
print(f"Total {layer_name} features: {total_count}")
|
||||||
|
|
||||||
|
batch_size = 1000
|
||||||
|
chunk_size = 10000 # Write to file every 10,000 features
|
||||||
|
offset = 0
|
||||||
|
all_features = []
|
||||||
|
|
||||||
|
# Check if temporary file exists and load its content
|
||||||
|
if os.path.exists(temp_file_path):
|
||||||
|
try:
|
||||||
|
with open(temp_file_path, 'r') as f:
|
||||||
|
all_features = json.load(f)
|
||||||
|
offset = len(all_features)
|
||||||
|
print(f"Resuming download from offset {offset}")
|
||||||
|
except json.JSONDecodeError:
|
||||||
|
print("Error reading temporary file. Starting download from the beginning.")
|
||||||
|
offset = 0
|
||||||
|
all_features = []
|
||||||
|
|
||||||
|
try:
|
||||||
|
while offset < total_count:
|
||||||
|
print(f"Fetching {layer_name} features {offset} to {offset + batch_size}...")
|
||||||
|
data = fetch_features(url, offset, batch_size)
|
||||||
|
|
||||||
|
new_features = data.get('features', [])
|
||||||
|
if not new_features:
|
||||||
|
break
|
||||||
|
|
||||||
|
all_features.extend(new_features)
|
||||||
|
offset += len(new_features)
|
||||||
|
|
||||||
|
# Progress indicator
|
||||||
|
progress = offset / total_count
|
||||||
|
bar_length = 30
|
||||||
|
filled_length = int(bar_length * progress)
|
||||||
|
bar = '=' * filled_length + '-' * (bar_length - filled_length)
|
||||||
|
print(f'\rProgress: [{bar}] {progress:.1%} ({offset}/{total_count} features)', end='', flush=True)
|
||||||
|
|
||||||
|
# Save progress to temporary file every chunk_size features
|
||||||
|
if len(all_features) % chunk_size == 0:
|
||||||
|
with open(temp_file_path, 'w') as f:
|
||||||
|
json.dump(all_features, f)
|
||||||
|
|
||||||
|
time.sleep(1)
|
||||||
|
|
||||||
|
print(f"\nTotal {layer_name} features fetched: {len(all_features)}")
|
||||||
|
|
||||||
|
# Write final GeoJSON file
|
||||||
|
with open(file_path, 'w') as f:
|
||||||
|
f.write('{"type": "FeatureCollection", "features": [\n')
|
||||||
|
for i, feature in enumerate(all_features):
|
||||||
|
geojson_feature = {
|
||||||
|
"type": "Feature",
|
||||||
|
"properties": feature['attributes'],
|
||||||
|
"geometry": feature['geometry']
|
||||||
|
}
|
||||||
|
json.dump(geojson_feature, f)
|
||||||
|
if i < len(all_features) - 1:
|
||||||
|
f.write(',\n')
|
||||||
|
f.write('\n]}')
|
||||||
|
|
||||||
|
print(f"GeoJSON file saved as '{file_path}'")
|
||||||
|
|
||||||
|
# Remove temporary file
|
||||||
|
if os.path.exists(temp_file_path):
|
||||||
|
os.remove(temp_file_path)
|
||||||
|
|
||||||
|
return file_path
|
||||||
|
except Exception as e:
|
||||||
|
print(f"\nError during download: {e}")
|
||||||
|
print(f"Partial data saved in {temp_file_path}")
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
def check_postgres_connection():
|
||||||
|
try:
|
||||||
|
subprocess.run(['psql', '-h', DB_HOST, '-p', DB_PORT, '-U', DB_USER, '-d', DB_NAME, '-c', 'SELECT 1;'],
|
||||||
|
check=True, capture_output=True, text=True)
|
||||||
|
return True
|
||||||
|
except subprocess.CalledProcessError:
|
||||||
|
return False
|
||||||
|
|
||||||
|
def check_postgis_extension():
|
||||||
|
try:
|
||||||
|
result = subprocess.run(['psql', '-h', DB_HOST, '-p', DB_PORT, '-U', DB_USER, '-d', DB_NAME,
|
||||||
|
'-c', "SELECT 1 FROM pg_extension WHERE extname = 'postgis';"],
|
||||||
|
check=True, capture_output=True, text=True)
|
||||||
|
return '1' in result.stdout
|
||||||
|
except subprocess.CalledProcessError:
|
||||||
|
return False
|
||||||
|
|
||||||
|
def create_postgis_extension():
|
||||||
|
try:
|
||||||
|
subprocess.run(['psql', '-h', DB_HOST, '-p', DB_PORT, '-U', DB_USER, '-d', DB_NAME,
|
||||||
|
'-c', "CREATE EXTENSION IF NOT EXISTS postgis;"],
|
||||||
|
check=True, capture_output=True, text=True)
|
||||||
|
print("PostGIS extension created successfully.")
|
||||||
|
except subprocess.CalledProcessError as e:
|
||||||
|
print(f"Error creating PostGIS extension: {e}")
|
||||||
|
sys.exit(1)
|
||||||
|
|
||||||
|
def import_to_postgis(file_path, table_name):
|
||||||
|
if not check_postgres_connection():
|
||||||
|
print("Error: Unable to connect to PostgreSQL. Please check your connection settings.")
|
||||||
|
sys.exit(1)
|
||||||
|
|
||||||
|
if not check_postgis_extension():
|
||||||
|
print("PostGIS extension not found. Attempting to create it...")
|
||||||
|
create_postgis_extension()
|
||||||
|
|
||||||
|
ogr2ogr_command = [
|
||||||
|
'ogr2ogr',
|
||||||
|
'-f', 'PostgreSQL',
|
||||||
|
f'PG:dbname={DB_NAME} user={DB_USER} password={DB_PASSWORD} host={DB_HOST} port={DB_PORT}',
|
||||||
|
file_path,
|
||||||
|
'-nln', table_name,
|
||||||
|
'-overwrite'
|
||||||
|
]
|
||||||
|
|
||||||
|
try:
|
||||||
|
subprocess.run(ogr2ogr_command, check=True, capture_output=True, text=True)
|
||||||
|
print(f"Data successfully imported into PostGIS table: {table_name}")
|
||||||
|
except subprocess.CalledProcessError as e:
|
||||||
|
print(f"Error importing data into PostGIS: {e}")
|
||||||
|
print(f"Command that failed: {e.cmd}")
|
||||||
|
print(f"Error output: {e.stderr}")
|
||||||
|
|
||||||
|
def check_ogr2ogr():
|
||||||
|
try:
|
||||||
|
subprocess.run(['ogr2ogr', '--version'], check=True, capture_output=True, text=True)
|
||||||
|
return True
|
||||||
|
except subprocess.CalledProcessError:
|
||||||
|
return False
|
||||||
|
except FileNotFoundError:
|
||||||
|
return False
|
||||||
|
|
||||||
|
|
||||||
|
def main():
|
||||||
|
if not check_ogr2ogr():
|
||||||
|
print("Error: ogr2ogr not found. Please install GDAL/OGR tools.")
|
||||||
|
print("On Debian: sudo apt-get install gdal-bin")
|
||||||
|
print("On macOS with Homebrew: brew install gdal")
|
||||||
|
sys.exit(1)
|
||||||
|
|
||||||
|
try:
|
||||||
|
township_file = os.path.expanduser('~/data/PLSS_Townships.geojson')
|
||||||
|
if not os.path.exists(township_file):
|
||||||
|
township_file = download_layer(1, "Townships")
|
||||||
|
if township_file:
|
||||||
|
import_to_postgis(township_file, "public.plss_townships")
|
||||||
|
else:
|
||||||
|
print("Failed to download Townships data. Skipping import.")
|
||||||
|
|
||||||
|
section_file = os.path.expanduser('~/data/PLSS_Sections.geojson')
|
||||||
|
if not os.path.exists(section_file):
|
||||||
|
section_file = download_layer(2, "Sections")
|
||||||
|
if section_file:
|
||||||
|
import_to_postgis(section_file, "public.plss_sections")
|
||||||
|
else:
|
||||||
|
print("Failed to download Sections data. Skipping import.")
|
||||||
|
|
||||||
|
except requests.exceptions.RequestException as e:
|
||||||
|
print(f"Error fetching data: {e}")
|
||||||
|
except Exception as e:
|
||||||
|
print(f"An unexpected error occurred: {e}")
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
|
@ -1,8 +1,12 @@
|
||||||
|
# CaPLSS_downloader_and_importer.py
|
||||||
import requests
|
import requests
|
||||||
import json
|
import json
|
||||||
import time
|
import time
|
||||||
import os
|
import os
|
||||||
import subprocess
|
import subprocess
|
||||||
|
import requests
|
||||||
|
from requests.adapters import HTTPAdapter
|
||||||
|
from urllib3.util.retry import Retry
|
||||||
|
|
||||||
def get_feature_count(url):
|
def get_feature_count(url):
|
||||||
params = {
|
params = {
|
||||||
|
@ -10,11 +14,17 @@ def get_feature_count(url):
|
||||||
'returnCountOnly': 'true',
|
'returnCountOnly': 'true',
|
||||||
'f': 'json'
|
'f': 'json'
|
||||||
}
|
}
|
||||||
response = requests.get(url, params=params)
|
retries = Retry(total=10, backoff_factor=0.5)
|
||||||
|
adapter = HTTPAdapter(max_retries=retries)
|
||||||
|
session = requests.Session()
|
||||||
|
session.mount("https://", adapter)
|
||||||
|
|
||||||
|
response = session.get(url, params=params, timeout=15) # Add timeout parameter
|
||||||
response.raise_for_status()
|
response.raise_for_status()
|
||||||
data = response.json()
|
data = response.json()
|
||||||
return data.get('count', 0)
|
return data.get('count', 0)
|
||||||
|
|
||||||
|
|
||||||
def fetch_features(url, offset, num):
|
def fetch_features(url, offset, num):
|
||||||
params = {
|
params = {
|
||||||
'where': '1=1',
|
'where': '1=1',
|
||||||
|
@ -70,8 +80,14 @@ def download_layer(layer_num, layer_name):
|
||||||
"features": geojson_features
|
"features": geojson_features
|
||||||
}
|
}
|
||||||
|
|
||||||
|
# Define a base directory that exists on both macOS and Debian
|
||||||
|
base_dir = os.path.expanduser('~/data')
|
||||||
|
os.makedirs(base_dir, exist_ok=True) # Create the directory if it doesn't exist
|
||||||
|
|
||||||
|
# Use os.path.join to construct the file path
|
||||||
|
file_path = os.path.join(base_dir, f'PLSS_{layer_name}.geojson')
|
||||||
|
|
||||||
# Save to file
|
# Save to file
|
||||||
file_path = f'/Users/sij/workshop/sijapi/sijapi/data/PLSS_{layer_name}.geojson'
|
|
||||||
with open(file_path, 'w') as f:
|
with open(file_path, 'w') as f:
|
||||||
json.dump(full_geojson, f)
|
json.dump(full_geojson, f)
|
||||||
|
|
||||||
|
|
1
sijapi/helpers/Townships_progress.json
Normal file
1
sijapi/helpers/Townships_progress.json
Normal file
|
@ -0,0 +1 @@
|
||||||
|
{"offset": 50000}
|
71
sijapi/helpers/db.py
Normal file
71
sijapi/helpers/db.py
Normal file
|
@ -0,0 +1,71 @@
|
||||||
|
import asyncio
|
||||||
|
import asyncpg
|
||||||
|
import yaml
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
async def load_config():
|
||||||
|
config_path = Path(__file__).parent.parent / 'config' / 'db.yaml'
|
||||||
|
with open(config_path, 'r') as file:
|
||||||
|
return yaml.safe_load(file)
|
||||||
|
|
||||||
|
async def add_foreign_key_constraint(conn):
|
||||||
|
# Ensure short_code is not null in both tables
|
||||||
|
await conn.execute("""
|
||||||
|
ALTER TABLE short_urls
|
||||||
|
ALTER COLUMN short_code SET NOT NULL;
|
||||||
|
""")
|
||||||
|
|
||||||
|
await conn.execute("""
|
||||||
|
ALTER TABLE click_logs
|
||||||
|
ALTER COLUMN short_code SET NOT NULL;
|
||||||
|
""")
|
||||||
|
|
||||||
|
# Add unique constraint to short_urls.short_code if it doesn't exist
|
||||||
|
await conn.execute("""
|
||||||
|
DO $$
|
||||||
|
BEGIN
|
||||||
|
IF NOT EXISTS (
|
||||||
|
SELECT 1
|
||||||
|
FROM pg_constraint
|
||||||
|
WHERE conname = 'short_urls_short_code_key'
|
||||||
|
) THEN
|
||||||
|
ALTER TABLE short_urls
|
||||||
|
ADD CONSTRAINT short_urls_short_code_key UNIQUE (short_code);
|
||||||
|
END IF;
|
||||||
|
END $$;
|
||||||
|
""")
|
||||||
|
|
||||||
|
# Add foreign key constraint
|
||||||
|
await conn.execute("""
|
||||||
|
ALTER TABLE click_logs
|
||||||
|
ADD CONSTRAINT fk_click_logs_short_urls
|
||||||
|
FOREIGN KEY (short_code)
|
||||||
|
REFERENCES short_urls(short_code)
|
||||||
|
ON DELETE CASCADE;
|
||||||
|
""")
|
||||||
|
|
||||||
|
print("Foreign key constraint added successfully.")
|
||||||
|
|
||||||
|
async def main():
|
||||||
|
config = await load_config()
|
||||||
|
source_server = config['POOL'][0] # sij-mbp16
|
||||||
|
|
||||||
|
conn_params = {
|
||||||
|
'database': source_server['db_name'],
|
||||||
|
'user': source_server['db_user'],
|
||||||
|
'password': source_server['db_pass'],
|
||||||
|
'host': source_server['ts_ip'],
|
||||||
|
'port': source_server['db_port']
|
||||||
|
}
|
||||||
|
|
||||||
|
conn = await asyncpg.connect(**conn_params)
|
||||||
|
|
||||||
|
try:
|
||||||
|
await add_foreign_key_constraint(conn)
|
||||||
|
except Exception as e:
|
||||||
|
print(f"An error occurred: {str(e)}")
|
||||||
|
finally:
|
||||||
|
await conn.close()
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
asyncio.run(main())
|
89
sijapi/helpers/db_get_schema.py
Normal file
89
sijapi/helpers/db_get_schema.py
Normal file
|
@ -0,0 +1,89 @@
|
||||||
|
import psycopg2
|
||||||
|
from psycopg2 import sql
|
||||||
|
|
||||||
|
def connect_to_db():
|
||||||
|
return psycopg2.connect(
|
||||||
|
dbname='sij',
|
||||||
|
user='sij',
|
||||||
|
password='Synchr0!',
|
||||||
|
host='localhost' # Adjust if your database is not on localhost
|
||||||
|
)
|
||||||
|
|
||||||
|
def get_table_info(conn):
|
||||||
|
with conn.cursor() as cur:
|
||||||
|
# Get all tables in the public schema
|
||||||
|
cur.execute("""
|
||||||
|
SELECT table_name
|
||||||
|
FROM information_schema.tables
|
||||||
|
WHERE table_schema = 'public'
|
||||||
|
""")
|
||||||
|
tables = cur.fetchall()
|
||||||
|
|
||||||
|
table_info = {}
|
||||||
|
for (table_name,) in tables:
|
||||||
|
table_info[table_name] = {
|
||||||
|
'primary_keys': get_primary_keys(cur, table_name),
|
||||||
|
'foreign_keys': get_foreign_keys(cur, table_name)
|
||||||
|
}
|
||||||
|
|
||||||
|
return table_info
|
||||||
|
|
||||||
|
def get_primary_keys(cur, table_name):
|
||||||
|
cur.execute("""
|
||||||
|
SELECT a.attname
|
||||||
|
FROM pg_index i
|
||||||
|
JOIN pg_attribute a ON a.attrelid = i.indrelid
|
||||||
|
AND a.attnum = ANY(i.indkey)
|
||||||
|
WHERE i.indrelid = %s::regclass
|
||||||
|
AND i.indisprimary
|
||||||
|
""", (table_name,))
|
||||||
|
return [row[0] for row in cur.fetchall()]
|
||||||
|
|
||||||
|
def get_foreign_keys(cur, table_name):
|
||||||
|
cur.execute("""
|
||||||
|
SELECT
|
||||||
|
tc.constraint_name,
|
||||||
|
kcu.column_name,
|
||||||
|
ccu.table_name AS foreign_table_name,
|
||||||
|
ccu.column_name AS foreign_column_name
|
||||||
|
FROM
|
||||||
|
information_schema.table_constraints AS tc
|
||||||
|
JOIN information_schema.key_column_usage AS kcu
|
||||||
|
ON tc.constraint_name = kcu.constraint_name
|
||||||
|
AND tc.table_schema = kcu.table_schema
|
||||||
|
JOIN information_schema.constraint_column_usage AS ccu
|
||||||
|
ON ccu.constraint_name = tc.constraint_name
|
||||||
|
AND ccu.table_schema = tc.table_schema
|
||||||
|
WHERE tc.constraint_type = 'FOREIGN KEY' AND tc.table_name=%s
|
||||||
|
""", (table_name,))
|
||||||
|
return cur.fetchall()
|
||||||
|
|
||||||
|
def main():
|
||||||
|
try:
|
||||||
|
with connect_to_db() as conn:
|
||||||
|
table_info = get_table_info(conn)
|
||||||
|
|
||||||
|
for table_name, info in table_info.items():
|
||||||
|
print(f"\n## Table: {table_name}")
|
||||||
|
|
||||||
|
print("\nPrimary Keys:")
|
||||||
|
if info['primary_keys']:
|
||||||
|
for pk in info['primary_keys']:
|
||||||
|
print(f"- {pk}")
|
||||||
|
else:
|
||||||
|
print("- No primary keys found")
|
||||||
|
|
||||||
|
print("\nForeign Keys:")
|
||||||
|
if info['foreign_keys']:
|
||||||
|
for fk in info['foreign_keys']:
|
||||||
|
print(f"- {fk[1]} -> {fk[2]}.{fk[3]} (Constraint: {fk[0]})")
|
||||||
|
else:
|
||||||
|
print("- No foreign keys found")
|
||||||
|
|
||||||
|
except psycopg2.Error as e:
|
||||||
|
print(f"Database error: {e}")
|
||||||
|
except Exception as e:
|
||||||
|
print(f"An unexpected error occurred: {e}")
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
83
sijapi/helpers/db_repl.py
Normal file
83
sijapi/helpers/db_repl.py
Normal file
|
@ -0,0 +1,83 @@
|
||||||
|
import yaml
|
||||||
|
import subprocess
|
||||||
|
import os
|
||||||
|
import sys
|
||||||
|
|
||||||
|
def load_config():
|
||||||
|
with open('../config/sys.yaml', 'r') as file:
|
||||||
|
return yaml.safe_load(file)
|
||||||
|
|
||||||
|
def run_command(command):
|
||||||
|
process = subprocess.Popen(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=True)
|
||||||
|
stdout, stderr = process.communicate()
|
||||||
|
return process.returncode, stdout.decode(), stderr.decode()
|
||||||
|
|
||||||
|
def pg_dump(host, port, db_name, user, password, tables):
|
||||||
|
dump_command = f"PGPASSWORD={password} pg_dump -h {host} -p {port} -U {user} -d {db_name} -t {' -t '.join(tables)} -c --no-owner"
|
||||||
|
return run_command(dump_command)
|
||||||
|
|
||||||
|
def pg_restore(host, port, db_name, user, password, dump_data):
|
||||||
|
restore_command = f"PGPASSWORD={password} psql -h {host} -p {port} -U {user} -d {db_name}"
|
||||||
|
process = subprocess.Popen(restore_command, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=True)
|
||||||
|
stdout, stderr = process.communicate(input=dump_data.encode())
|
||||||
|
return process.returncode, stdout.decode(), stderr.decode()
|
||||||
|
|
||||||
|
def check_postgres_version(host, port, user, password):
|
||||||
|
version_command = f"PGPASSWORD={password} psql -h {host} -p {port} -U {user} -c 'SELECT version();'"
|
||||||
|
returncode, stdout, stderr = run_command(version_command)
|
||||||
|
if returncode == 0:
|
||||||
|
return stdout.strip()
|
||||||
|
else:
|
||||||
|
return f"Error checking version: {stderr}"
|
||||||
|
|
||||||
|
def replicate_databases():
|
||||||
|
config = load_config()
|
||||||
|
pool = config['POOL']
|
||||||
|
tables_to_replicate = ['click_logs', 'dailyweather', 'hourlyweather', 'locations', 'short_urls']
|
||||||
|
|
||||||
|
source_db = pool[0]
|
||||||
|
target_dbs = pool[1:]
|
||||||
|
|
||||||
|
# Check source database version
|
||||||
|
source_version = check_postgres_version(source_db['ts_ip'], source_db['db_port'], source_db['db_user'], source_db['db_pass'])
|
||||||
|
print(f"Source database version: {source_version}")
|
||||||
|
|
||||||
|
for target_db in target_dbs:
|
||||||
|
print(f"\nReplicating to {target_db['ts_id']}...")
|
||||||
|
|
||||||
|
# Check target database version
|
||||||
|
target_version = check_postgres_version(target_db['ts_ip'], target_db['db_port'], target_db['db_user'], target_db['db_pass'])
|
||||||
|
print(f"Target database version: {target_version}")
|
||||||
|
|
||||||
|
# Perform dump
|
||||||
|
returncode, dump_data, stderr = pg_dump(
|
||||||
|
source_db['ts_ip'],
|
||||||
|
source_db['db_port'],
|
||||||
|
source_db['db_name'],
|
||||||
|
source_db['db_user'],
|
||||||
|
source_db['db_pass'],
|
||||||
|
tables_to_replicate
|
||||||
|
)
|
||||||
|
|
||||||
|
if returncode != 0:
|
||||||
|
print(f"Error during dump: {stderr}")
|
||||||
|
continue
|
||||||
|
|
||||||
|
# Perform restore
|
||||||
|
returncode, stdout, stderr = pg_restore(
|
||||||
|
target_db['ts_ip'],
|
||||||
|
target_db['db_port'],
|
||||||
|
target_db['db_name'],
|
||||||
|
target_db['db_user'],
|
||||||
|
target_db['db_pass'],
|
||||||
|
dump_data
|
||||||
|
)
|
||||||
|
|
||||||
|
if returncode != 0:
|
||||||
|
print(f"Error during restore: {stderr}")
|
||||||
|
else:
|
||||||
|
print(f"Replication to {target_db['ts_id']} completed successfully.")
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
replicate_databases()
|
||||||
|
|
76
sijapi/helpers/db_replicator.py
Executable file
76
sijapi/helpers/db_replicator.py
Executable file
|
@ -0,0 +1,76 @@
|
||||||
|
#!/usr/bin/env python3
|
||||||
|
|
||||||
|
import os
|
||||||
|
import yaml
|
||||||
|
import subprocess
|
||||||
|
|
||||||
|
def load_config():
|
||||||
|
script_dir = os.path.dirname(os.path.abspath(__file__))
|
||||||
|
sys_config_path = os.path.join(script_dir, '..', 'config', 'sys.yaml')
|
||||||
|
gis_config_path = os.path.join(script_dir, '..', 'config', 'gis.yaml')
|
||||||
|
|
||||||
|
with open(sys_config_path, 'r') as f:
|
||||||
|
sys_config = yaml.safe_load(f)
|
||||||
|
|
||||||
|
with open(gis_config_path, 'r') as f:
|
||||||
|
gis_config = yaml.safe_load(f)
|
||||||
|
|
||||||
|
return sys_config, gis_config
|
||||||
|
|
||||||
|
def replicate_table(source, targets, table_name):
|
||||||
|
print(f"Replicating {table_name}")
|
||||||
|
|
||||||
|
# Dump the table from the source
|
||||||
|
dump_command = [
|
||||||
|
'pg_dump',
|
||||||
|
'-h', source['ts_ip'],
|
||||||
|
'-p', str(source['db_port']),
|
||||||
|
'-U', source['db_user'],
|
||||||
|
'-d', source['db_name'],
|
||||||
|
'-t', table_name,
|
||||||
|
'--no-owner',
|
||||||
|
'--no-acl'
|
||||||
|
]
|
||||||
|
|
||||||
|
env = os.environ.copy()
|
||||||
|
env['PGPASSWORD'] = source['db_pass']
|
||||||
|
|
||||||
|
with open(f"{table_name}.sql", 'w') as f:
|
||||||
|
subprocess.run(dump_command, env=env, stdout=f, check=True)
|
||||||
|
|
||||||
|
# Restore the table to each target
|
||||||
|
for target in targets:
|
||||||
|
print(f"Replicating to {target['ts_id']}")
|
||||||
|
restore_command = [
|
||||||
|
'psql',
|
||||||
|
'-h', target['ts_ip'],
|
||||||
|
'-p', str(target['db_port']),
|
||||||
|
'-U', target['db_user'],
|
||||||
|
'-d', target['db_name'],
|
||||||
|
'-c', f"DROP TABLE IF EXISTS {table_name} CASCADE;",
|
||||||
|
'-f', f"{table_name}.sql"
|
||||||
|
]
|
||||||
|
|
||||||
|
env = os.environ.copy()
|
||||||
|
env['PGPASSWORD'] = target['db_pass']
|
||||||
|
|
||||||
|
subprocess.run(restore_command, env=env, check=True)
|
||||||
|
|
||||||
|
# Clean up the dump file
|
||||||
|
os.remove(f"{table_name}.sql")
|
||||||
|
|
||||||
|
def main():
|
||||||
|
sys_config, gis_config = load_config()
|
||||||
|
|
||||||
|
source_server = sys_config['POOL'][0]
|
||||||
|
target_servers = sys_config['POOL'][1:]
|
||||||
|
|
||||||
|
tables = [layer['table_name'] for layer in gis_config['layers']]
|
||||||
|
|
||||||
|
for table in tables:
|
||||||
|
replicate_table(source_server, target_servers, table)
|
||||||
|
|
||||||
|
print("Replication complete!")
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
42510
sijapi/helpers/locations.sql
Normal file
42510
sijapi/helpers/locations.sql
Normal file
File diff suppressed because it is too large
Load diff
|
@ -1,8 +1,12 @@
|
||||||
import asyncio
|
import asyncio
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from sijapi import L, EMAIL_CONFIG, EMAIL_LOGS
|
from sijapi import EMAIL_CONFIG, EMAIL_LOGS
|
||||||
from sijapi.classes import EmailAccount
|
from sijapi.utilities import EmailAccount
|
||||||
from sijapi.routers import email
|
from sijapi.routers import email
|
||||||
|
from sijapi.logs import get_logger
|
||||||
|
|
||||||
|
l = get_logger(__name__)
|
||||||
|
|
||||||
|
|
||||||
async def initialize_log_files():
|
async def initialize_log_files():
|
||||||
summarized_log = EMAIL_LOGS / "summarized.txt"
|
summarized_log = EMAIL_LOGS / "summarized.txt"
|
||||||
|
@ -11,13 +15,13 @@ async def initialize_log_files():
|
||||||
for log_file in [summarized_log, autoresponded_log, diagnostic_log]:
|
for log_file in [summarized_log, autoresponded_log, diagnostic_log]:
|
||||||
log_file.parent.mkdir(parents=True, exist_ok=True)
|
log_file.parent.mkdir(parents=True, exist_ok=True)
|
||||||
log_file.write_text("")
|
log_file.write_text("")
|
||||||
L.DEBUG(f"Log files initialized: {summarized_log}, {autoresponded_log}, {diagnostic_log}")
|
l.debug(f"Log files initialized: {summarized_log}, {autoresponded_log}, {diagnostic_log}")
|
||||||
return summarized_log, autoresponded_log, diagnostic_log
|
return summarized_log, autoresponded_log, diagnostic_log
|
||||||
|
|
||||||
async def process_all_emails(account: EmailAccount, summarized_log: Path, autoresponded_log: Path, diagnostic_log: Path):
|
async def process_all_emails(account: EmailAccount, summarized_log: Path, autoresponded_log: Path, diagnostic_log: Path):
|
||||||
try:
|
try:
|
||||||
with email.get_imap_connection(account) as inbox:
|
with email.get_imap_connection(account) as inbox:
|
||||||
L.DEBUG(f"Connected to {account.name}, processing all emails...")
|
l.debug(f"Connected to {account.name}, processing all emails...")
|
||||||
all_messages = inbox.messages()
|
all_messages = inbox.messages()
|
||||||
unread_messages = set(uid for uid, _ in inbox.messages(unread=True))
|
unread_messages = set(uid for uid, _ in inbox.messages(unread=True))
|
||||||
|
|
||||||
|
@ -41,15 +45,15 @@ async def process_all_emails(account: EmailAccount, summarized_log: Path, autore
|
||||||
with open(log_file, 'a') as f:
|
with open(log_file, 'a') as f:
|
||||||
f.write(f"{id_str}\n")
|
f.write(f"{id_str}\n")
|
||||||
|
|
||||||
L.INFO(f"Processed {processed_count} non-unread emails for account {account.name}")
|
l.info(f"Processed {processed_count} non-unread emails for account {account.name}")
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
L.logger.error(f"An error occurred while processing emails for account {account.name}: {e}")
|
l.logger.error(f"An error occurred while processing emails for account {account.name}: {e}")
|
||||||
|
|
||||||
async def main():
|
async def main():
|
||||||
email_accounts = email.load_email_accounts(EMAIL_CONFIG)
|
email_accounts = email.load_email_accounts(EMAIL_CONFIG)
|
||||||
summarized_log, autoresponded_log, diagnostic_log = await initialize_log_files()
|
summarized_log, autoresponded_log, diagnostic_log = await initialize_log_files()
|
||||||
|
|
||||||
L.DEBUG(f"Processing {len(email_accounts)} email accounts")
|
l.debug(f"Processing {len(email_accounts)} email accounts")
|
||||||
|
|
||||||
tasks = [process_all_emails(account, summarized_log, autoresponded_log, diagnostic_log) for account in email_accounts]
|
tasks = [process_all_emails(account, summarized_log, autoresponded_log, diagnostic_log) for account in email_accounts]
|
||||||
await asyncio.gather(*tasks)
|
await asyncio.gather(*tasks)
|
||||||
|
@ -57,7 +61,7 @@ async def main():
|
||||||
# Final verification
|
# Final verification
|
||||||
with open(summarized_log, 'r') as f:
|
with open(summarized_log, 'r') as f:
|
||||||
final_count = len(f.readlines())
|
final_count = len(f.readlines())
|
||||||
L.INFO(f"Final non-unread email count: {final_count}")
|
l.info(f"Final non-unread email count: {final_count}")
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
asyncio.run(main())
|
asyncio.run(main())
|
191
sijapi/helpers/migrate_db_to_uuid.py
Normal file
191
sijapi/helpers/migrate_db_to_uuid.py
Normal file
|
@ -0,0 +1,191 @@
|
||||||
|
import psycopg2
|
||||||
|
from psycopg2 import sql
|
||||||
|
import sys
|
||||||
|
|
||||||
|
def connect_to_db():
|
||||||
|
return psycopg2.connect(
|
||||||
|
dbname='sij',
|
||||||
|
user='sij',
|
||||||
|
password='Synchr0!',
|
||||||
|
host='localhost'
|
||||||
|
)
|
||||||
|
|
||||||
|
def get_tables(cur):
|
||||||
|
cur.execute("""
|
||||||
|
SELECT table_name
|
||||||
|
FROM information_schema.tables
|
||||||
|
WHERE table_schema = 'public' AND table_type = 'BASE TABLE'
|
||||||
|
AND table_name NOT LIKE '%_uuid' AND table_name NOT LIKE '%_orig'
|
||||||
|
AND table_name != 'spatial_ref_sys'
|
||||||
|
""")
|
||||||
|
return [row[0] for row in cur.fetchall()]
|
||||||
|
|
||||||
|
def get_columns(cur, table_name):
|
||||||
|
cur.execute("""
|
||||||
|
SELECT column_name, udt_name,
|
||||||
|
is_nullable, column_default,
|
||||||
|
character_maximum_length, numeric_precision, numeric_scale
|
||||||
|
FROM information_schema.columns
|
||||||
|
WHERE table_name = %s
|
||||||
|
ORDER BY ordinal_position
|
||||||
|
""", (table_name,))
|
||||||
|
return cur.fetchall()
|
||||||
|
|
||||||
|
def get_constraints(cur, table_name):
|
||||||
|
cur.execute("""
|
||||||
|
SELECT conname, contype, pg_get_constraintdef(c.oid)
|
||||||
|
FROM pg_constraint c
|
||||||
|
JOIN pg_namespace n ON n.oid = c.connamespace
|
||||||
|
WHERE conrelid = %s::regclass
|
||||||
|
AND n.nspname = 'public'
|
||||||
|
""", (table_name,))
|
||||||
|
return cur.fetchall()
|
||||||
|
|
||||||
|
def drop_table_if_exists(cur, table_name):
|
||||||
|
cur.execute(sql.SQL("DROP TABLE IF EXISTS {} CASCADE").format(sql.Identifier(table_name)))
|
||||||
|
|
||||||
|
def create_uuid_table(cur, old_table, new_table):
|
||||||
|
drop_table_if_exists(cur, new_table)
|
||||||
|
columns = get_columns(cur, old_table)
|
||||||
|
constraints = get_constraints(cur, old_table)
|
||||||
|
|
||||||
|
column_defs = []
|
||||||
|
has_id_column = any(col[0] == 'id' for col in columns)
|
||||||
|
|
||||||
|
for col in columns:
|
||||||
|
col_name, udt_name, is_nullable, default, max_length, precision, scale = col
|
||||||
|
if col_name == 'id' and has_id_column:
|
||||||
|
column_defs.append(sql.SQL("{} UUID PRIMARY KEY DEFAULT gen_random_uuid()").format(sql.Identifier(col_name)))
|
||||||
|
else:
|
||||||
|
type_sql = sql.SQL("{}").format(sql.Identifier(udt_name))
|
||||||
|
if max_length:
|
||||||
|
type_sql = sql.SQL("{}({})").format(type_sql, sql.Literal(max_length))
|
||||||
|
elif precision and scale:
|
||||||
|
type_sql = sql.SQL("{}({},{})").format(type_sql, sql.Literal(precision), sql.Literal(scale))
|
||||||
|
|
||||||
|
column_def = sql.SQL("{} {}").format(sql.Identifier(col_name), type_sql)
|
||||||
|
if is_nullable == 'NO':
|
||||||
|
column_def = sql.SQL("{} NOT NULL").format(column_def)
|
||||||
|
if default and 'nextval' not in default: # Skip auto-increment defaults
|
||||||
|
column_def = sql.SQL("{} DEFAULT {}").format(column_def, sql.SQL(default))
|
||||||
|
column_defs.append(column_def)
|
||||||
|
|
||||||
|
constraint_defs = []
|
||||||
|
for constraint in constraints:
|
||||||
|
conname, contype, condef = constraint
|
||||||
|
if contype != 'p' or not has_id_column: # Keep primary key if there's no id column
|
||||||
|
constraint_defs.append(sql.SQL(condef))
|
||||||
|
|
||||||
|
if not has_id_column:
|
||||||
|
column_defs.append(sql.SQL("uuid UUID DEFAULT gen_random_uuid()"))
|
||||||
|
|
||||||
|
query = sql.SQL("CREATE TABLE {} ({})").format(
|
||||||
|
sql.Identifier(new_table),
|
||||||
|
sql.SQL(", ").join(column_defs + constraint_defs)
|
||||||
|
)
|
||||||
|
cur.execute(query)
|
||||||
|
|
||||||
|
def migrate_data(cur, old_table, new_table):
|
||||||
|
columns = get_columns(cur, old_table)
|
||||||
|
column_names = [col[0] for col in columns]
|
||||||
|
has_id_column = 'id' in column_names
|
||||||
|
|
||||||
|
if has_id_column:
|
||||||
|
column_names.remove('id')
|
||||||
|
old_cols = sql.SQL(", ").join(map(sql.Identifier, column_names))
|
||||||
|
new_cols = sql.SQL(", ").join(map(sql.Identifier, ['id'] + column_names))
|
||||||
|
query = sql.SQL("INSERT INTO {} ({}) SELECT gen_random_uuid(), {} FROM {}").format(
|
||||||
|
sql.Identifier(new_table),
|
||||||
|
new_cols,
|
||||||
|
old_cols,
|
||||||
|
sql.Identifier(old_table)
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
old_cols = sql.SQL(", ").join(map(sql.Identifier, column_names))
|
||||||
|
new_cols = sql.SQL(", ").join(map(sql.Identifier, column_names + ['uuid']))
|
||||||
|
query = sql.SQL("INSERT INTO {} ({}) SELECT {}, gen_random_uuid() FROM {}").format(
|
||||||
|
sql.Identifier(new_table),
|
||||||
|
new_cols,
|
||||||
|
old_cols,
|
||||||
|
sql.Identifier(old_table)
|
||||||
|
)
|
||||||
|
cur.execute(query)
|
||||||
|
|
||||||
|
def update_foreign_keys(cur, tables):
|
||||||
|
for table in tables:
|
||||||
|
constraints = get_constraints(cur, table)
|
||||||
|
for constraint in constraints:
|
||||||
|
conname, contype, condef = constraint
|
||||||
|
if contype == 'f': # Foreign key constraint
|
||||||
|
referenced_table = condef.split('REFERENCES ')[1].split('(')[0].strip()
|
||||||
|
referenced_column = condef.split('(')[2].split(')')[0].strip()
|
||||||
|
local_column = condef.split('(')[1].split(')')[0].strip()
|
||||||
|
|
||||||
|
cur.execute(sql.SQL("""
|
||||||
|
UPDATE {table_uuid}
|
||||||
|
SET {local_column} = subquery.new_id::text::{local_column_type}
|
||||||
|
FROM (
|
||||||
|
SELECT old.{ref_column} AS old_id, new_table.id AS new_id
|
||||||
|
FROM {ref_table} old
|
||||||
|
JOIN public.{ref_table_uuid} new_table ON new_table.{ref_column}::text = old.{ref_column}::text
|
||||||
|
) AS subquery
|
||||||
|
WHERE {local_column}::text = subquery.old_id::text
|
||||||
|
""").format(
|
||||||
|
table_uuid=sql.Identifier(f"{table}_uuid"),
|
||||||
|
local_column=sql.Identifier(local_column),
|
||||||
|
local_column_type=sql.SQL(get_column_type(cur, f"{table}_uuid", local_column)),
|
||||||
|
ref_column=sql.Identifier(referenced_column),
|
||||||
|
ref_table=sql.Identifier(referenced_table),
|
||||||
|
ref_table_uuid=sql.Identifier(f"{referenced_table}_uuid")
|
||||||
|
))
|
||||||
|
|
||||||
|
def get_column_type(cur, table_name, column_name):
|
||||||
|
cur.execute("""
|
||||||
|
SELECT data_type
|
||||||
|
FROM information_schema.columns
|
||||||
|
WHERE table_name = %s AND column_name = %s
|
||||||
|
""", (table_name, column_name))
|
||||||
|
return cur.fetchone()[0]
|
||||||
|
|
||||||
|
def rename_tables(cur, tables):
|
||||||
|
for table in tables:
|
||||||
|
drop_table_if_exists(cur, f"{table}_orig")
|
||||||
|
cur.execute(sql.SQL("ALTER TABLE IF EXISTS {} RENAME TO {}").format(
|
||||||
|
sql.Identifier(table), sql.Identifier(f"{table}_orig")
|
||||||
|
))
|
||||||
|
cur.execute(sql.SQL("ALTER TABLE IF EXISTS {} RENAME TO {}").format(
|
||||||
|
sql.Identifier(f"{table}_uuid"), sql.Identifier(table)
|
||||||
|
))
|
||||||
|
|
||||||
|
def main():
|
||||||
|
try:
|
||||||
|
with connect_to_db() as conn:
|
||||||
|
with conn.cursor() as cur:
|
||||||
|
tables = get_tables(cur)
|
||||||
|
|
||||||
|
# Create new UUID tables
|
||||||
|
for table in tables:
|
||||||
|
print(f"Creating UUID table for {table}...")
|
||||||
|
create_uuid_table(cur, table, f"{table}_uuid")
|
||||||
|
|
||||||
|
# Migrate data
|
||||||
|
for table in tables:
|
||||||
|
print(f"Migrating data for {table}...")
|
||||||
|
migrate_data(cur, table, f"{table}_uuid")
|
||||||
|
|
||||||
|
# Update foreign keys
|
||||||
|
print("Updating foreign key references...")
|
||||||
|
update_foreign_keys(cur, tables)
|
||||||
|
|
||||||
|
# Rename tables
|
||||||
|
print("Renaming tables...")
|
||||||
|
rename_tables(cur, tables)
|
||||||
|
|
||||||
|
conn.commit()
|
||||||
|
print("Migration completed successfully.")
|
||||||
|
except Exception as e:
|
||||||
|
print(f"An error occurred: {e}")
|
||||||
|
conn.rollback()
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
366
sijapi/helpers/plss.py
Normal file
366
sijapi/helpers/plss.py
Normal file
|
@ -0,0 +1,366 @@
|
||||||
|
#!/usr/bin/env python3
|
||||||
|
|
||||||
|
import requests
|
||||||
|
import json
|
||||||
|
import time
|
||||||
|
import os
|
||||||
|
import subprocess
|
||||||
|
import sys
|
||||||
|
import yaml
|
||||||
|
from requests.adapters import HTTPAdapter
|
||||||
|
from urllib3.util.retry import Retry
|
||||||
|
import argparse
|
||||||
|
import psycopg2
|
||||||
|
from psycopg2.extras import execute_values
|
||||||
|
|
||||||
|
def load_config():
|
||||||
|
script_dir = os.path.dirname(os.path.abspath(__file__))
|
||||||
|
sys_config_path = os.path.join(script_dir, '..', 'config', 'sys.yaml')
|
||||||
|
gis_config_path = os.path.join(script_dir, '..', 'config', 'gis.yaml')
|
||||||
|
|
||||||
|
with open(sys_config_path, 'r') as f:
|
||||||
|
sys_config = yaml.safe_load(f)
|
||||||
|
|
||||||
|
with open(gis_config_path, 'r') as f:
|
||||||
|
gis_config = yaml.safe_load(f)
|
||||||
|
|
||||||
|
return sys_config, gis_config
|
||||||
|
|
||||||
|
def get_db_config(sys_config):
|
||||||
|
pool = sys_config.get('POOL', [])
|
||||||
|
if pool:
|
||||||
|
db_config = pool[0]
|
||||||
|
return {
|
||||||
|
'DB_NAME': db_config.get('db_name'),
|
||||||
|
'DB_USER': db_config.get('db_user'),
|
||||||
|
'DB_PASSWORD': db_config.get('db_pass'),
|
||||||
|
'DB_HOST': db_config.get('ts_ip'),
|
||||||
|
'DB_PORT': str(db_config.get('db_port'))
|
||||||
|
}
|
||||||
|
return {}
|
||||||
|
|
||||||
|
def get_feature_count(url):
|
||||||
|
params = {
|
||||||
|
'where': '1=1',
|
||||||
|
'returnCountOnly': 'true',
|
||||||
|
'f': 'json'
|
||||||
|
}
|
||||||
|
retries = Retry(total=10, backoff_factor=0.5, status_forcelist=[500, 502, 503, 504])
|
||||||
|
with requests.Session() as session:
|
||||||
|
session.mount("https://", HTTPAdapter(max_retries=retries))
|
||||||
|
response = session.get(url, params=params, timeout=30)
|
||||||
|
response.raise_for_status()
|
||||||
|
data = response.json()
|
||||||
|
return data.get('count', 0)
|
||||||
|
|
||||||
|
def fetch_features(url, offset, num, max_retries=5):
|
||||||
|
params = {
|
||||||
|
'where': '1=1',
|
||||||
|
'outFields': '*',
|
||||||
|
'geometryPrecision': 6,
|
||||||
|
'outSR': 4326,
|
||||||
|
'f': 'json',
|
||||||
|
'resultOffset': offset,
|
||||||
|
'resultRecordCount': num,
|
||||||
|
'orderByFields': 'OBJECTID'
|
||||||
|
}
|
||||||
|
for attempt in range(max_retries):
|
||||||
|
try:
|
||||||
|
retries = Retry(total=5, backoff_factor=1, status_forcelist=[500, 502, 503, 504])
|
||||||
|
with requests.Session() as session:
|
||||||
|
session.mount("https://", HTTPAdapter(max_retries=retries))
|
||||||
|
response = session.get(url, params=params, timeout=30)
|
||||||
|
response.raise_for_status()
|
||||||
|
return response.json()
|
||||||
|
except requests.exceptions.RequestException as e:
|
||||||
|
print(f"Error fetching features (attempt {attempt + 1}/{max_retries}): {e}")
|
||||||
|
if attempt == max_retries - 1:
|
||||||
|
raise
|
||||||
|
time.sleep(5 * (attempt + 1)) # Exponential backoff
|
||||||
|
|
||||||
|
|
||||||
|
def create_table(db_config, table_name, gis_config):
|
||||||
|
conn = psycopg2.connect(
|
||||||
|
dbname=db_config['DB_NAME'],
|
||||||
|
user=db_config['DB_USER'],
|
||||||
|
password=db_config['DB_PASSWORD'],
|
||||||
|
host=db_config['DB_HOST'],
|
||||||
|
port=db_config['DB_PORT']
|
||||||
|
)
|
||||||
|
try:
|
||||||
|
with conn.cursor() as cur:
|
||||||
|
# Check if the table already exists
|
||||||
|
cur.execute(f"SELECT to_regclass('{table_name}')")
|
||||||
|
if cur.fetchone()[0] is None:
|
||||||
|
# If the table doesn't exist, create it based on the first feature
|
||||||
|
url = next(layer['url'] for layer in gis_config['layers'] if layer['table_name'] == table_name)
|
||||||
|
first_feature = fetch_features(url, 0, 1)['features'][0]
|
||||||
|
columns = []
|
||||||
|
for attr, value in first_feature['attributes'].items():
|
||||||
|
column_name = attr.lower().replace('.', '_').replace('()', '')
|
||||||
|
if isinstance(value, int):
|
||||||
|
columns.append(f'"{column_name}" INTEGER')
|
||||||
|
elif isinstance(value, float):
|
||||||
|
columns.append(f'"{column_name}" DOUBLE PRECISION')
|
||||||
|
else:
|
||||||
|
columns.append(f'"{column_name}" TEXT')
|
||||||
|
|
||||||
|
create_sql = f"""
|
||||||
|
CREATE TABLE {table_name} (
|
||||||
|
id SERIAL PRIMARY KEY,
|
||||||
|
geom GEOMETRY(Polygon, 4326),
|
||||||
|
{', '.join(columns)}
|
||||||
|
)
|
||||||
|
"""
|
||||||
|
cur.execute(create_sql)
|
||||||
|
|
||||||
|
# Create index on plssid
|
||||||
|
cur.execute(f'CREATE INDEX idx_{table_name.split(".")[-1]}_plssid ON {table_name}("plssid")')
|
||||||
|
|
||||||
|
print(f"Created table: {table_name}")
|
||||||
|
else:
|
||||||
|
print(f"Table {table_name} already exists")
|
||||||
|
conn.commit()
|
||||||
|
except psycopg2.Error as e:
|
||||||
|
print(f"Error creating table {table_name}: {e}")
|
||||||
|
finally:
|
||||||
|
conn.close()
|
||||||
|
|
||||||
|
|
||||||
|
def insert_features_to_db(features, table_name, db_config):
|
||||||
|
conn = psycopg2.connect(
|
||||||
|
dbname=db_config['DB_NAME'],
|
||||||
|
user=db_config['DB_USER'],
|
||||||
|
password=db_config['DB_PASSWORD'],
|
||||||
|
host=db_config['DB_HOST'],
|
||||||
|
port=db_config['DB_PORT']
|
||||||
|
)
|
||||||
|
try:
|
||||||
|
with conn.cursor() as cur:
|
||||||
|
# Get the column names from the table
|
||||||
|
cur.execute(f"SELECT column_name FROM information_schema.columns WHERE table_name = '{table_name.split('.')[-1]}'")
|
||||||
|
db_columns = [row[0] for row in cur.fetchall() if row[0] != 'id']
|
||||||
|
|
||||||
|
# Prepare the SQL statement
|
||||||
|
sql = f"""
|
||||||
|
INSERT INTO {table_name} ({', '.join([f'"{col}"' for col in db_columns])})
|
||||||
|
VALUES %s
|
||||||
|
"""
|
||||||
|
|
||||||
|
# Prepare the template for execute_values
|
||||||
|
template = f"({', '.join(['%s' for _ in db_columns])})"
|
||||||
|
|
||||||
|
values = []
|
||||||
|
for feature in features:
|
||||||
|
geom = feature.get('geometry')
|
||||||
|
attrs = feature.get('attributes')
|
||||||
|
if geom and attrs:
|
||||||
|
rings = geom['rings'][0]
|
||||||
|
wkt = f"POLYGON(({','.join([f'{x} {y}' for x, y in rings])}))"
|
||||||
|
|
||||||
|
row = []
|
||||||
|
for col in db_columns:
|
||||||
|
if col == 'geom':
|
||||||
|
row.append(wkt)
|
||||||
|
else:
|
||||||
|
# Map database column names back to original attribute names
|
||||||
|
attr_name = col.upper()
|
||||||
|
if attr_name == 'SHAPE_STAREA':
|
||||||
|
attr_name = 'Shape.STArea()'
|
||||||
|
elif attr_name == 'SHAPE_STLENGTH':
|
||||||
|
attr_name = 'Shape.STLength()'
|
||||||
|
row.append(attrs.get(attr_name))
|
||||||
|
|
||||||
|
values.append(tuple(row))
|
||||||
|
else:
|
||||||
|
print(f"Skipping invalid feature: {feature}")
|
||||||
|
|
||||||
|
if values:
|
||||||
|
execute_values(cur, sql, values, template=template, page_size=100)
|
||||||
|
print(f"Inserted {len(values)} features")
|
||||||
|
else:
|
||||||
|
print("No valid features to insert")
|
||||||
|
conn.commit()
|
||||||
|
except Exception as e:
|
||||||
|
print(f"Error inserting features: {e}")
|
||||||
|
print(f"First feature for debugging: {features[0] if features else 'No features'}")
|
||||||
|
conn.rollback()
|
||||||
|
finally:
|
||||||
|
conn.close()
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
def download_and_import_layer(layer_config, db_config, gis_config, force_refresh):
|
||||||
|
url = layer_config['url']
|
||||||
|
layer_name = layer_config['layer_name']
|
||||||
|
table_name = layer_config['table_name']
|
||||||
|
batch_size = layer_config['batch_size']
|
||||||
|
delay = layer_config['delay'] / 1000 # Convert to seconds
|
||||||
|
|
||||||
|
total_count = get_feature_count(url)
|
||||||
|
print(f"Total {layer_name} features: {total_count}")
|
||||||
|
|
||||||
|
# Check existing records in the database
|
||||||
|
existing_count = get_existing_record_count(db_config, table_name)
|
||||||
|
|
||||||
|
if existing_count == total_count and not force_refresh:
|
||||||
|
print(f"Table {table_name} already contains all {total_count} features. Skipping.")
|
||||||
|
return
|
||||||
|
|
||||||
|
if force_refresh:
|
||||||
|
delete_existing_table(db_config, table_name)
|
||||||
|
create_table(db_config, table_name, gis_config)
|
||||||
|
existing_count = 0
|
||||||
|
elif existing_count == 0:
|
||||||
|
create_table(db_config, table_name, gis_config)
|
||||||
|
|
||||||
|
offset = existing_count
|
||||||
|
|
||||||
|
start_time = time.time()
|
||||||
|
try:
|
||||||
|
while offset < total_count:
|
||||||
|
batch_start_time = time.time()
|
||||||
|
print(f"Fetching {layer_name} features {offset} to {offset + batch_size}...")
|
||||||
|
try:
|
||||||
|
data = fetch_features(url, offset, batch_size)
|
||||||
|
new_features = data.get('features', [])
|
||||||
|
if not new_features:
|
||||||
|
break
|
||||||
|
|
||||||
|
insert_features_to_db(new_features, table_name, db_config)
|
||||||
|
offset += len(new_features)
|
||||||
|
|
||||||
|
batch_end_time = time.time()
|
||||||
|
batch_duration = batch_end_time - batch_start_time
|
||||||
|
print(f"Batch processed in {batch_duration:.2f} seconds")
|
||||||
|
|
||||||
|
# Progress indicator
|
||||||
|
progress = offset / total_count
|
||||||
|
bar_length = 30
|
||||||
|
filled_length = int(bar_length * progress)
|
||||||
|
bar = '=' * filled_length + '-' * (bar_length - filled_length)
|
||||||
|
print(f'\rProgress: [{bar}] {progress:.1%} ({offset}/{total_count} features)', end='', flush=True)
|
||||||
|
|
||||||
|
time.sleep(delay)
|
||||||
|
except Exception as e:
|
||||||
|
print(f"\nError processing batch starting at offset {offset}: {e}")
|
||||||
|
print("Continuing with next batch...")
|
||||||
|
offset += batch_size
|
||||||
|
|
||||||
|
end_time = time.time()
|
||||||
|
total_duration = end_time - start_time
|
||||||
|
print(f"\nTotal {layer_name} features fetched and imported: {offset}")
|
||||||
|
print(f"Total time: {total_duration:.2f} seconds")
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
print(f"\nError during download and import: {e}")
|
||||||
|
print(f"Last successful offset: {offset}")
|
||||||
|
|
||||||
|
def get_existing_record_count(db_config, table_name):
|
||||||
|
conn = psycopg2.connect(
|
||||||
|
dbname=db_config['DB_NAME'],
|
||||||
|
user=db_config['DB_USER'],
|
||||||
|
password=db_config['DB_PASSWORD'],
|
||||||
|
host=db_config['DB_HOST'],
|
||||||
|
port=db_config['DB_PORT']
|
||||||
|
)
|
||||||
|
try:
|
||||||
|
with conn.cursor() as cur:
|
||||||
|
cur.execute(f"SELECT COUNT(*) FROM {table_name}")
|
||||||
|
count = cur.fetchone()[0]
|
||||||
|
return count
|
||||||
|
except psycopg2.Error:
|
||||||
|
return 0
|
||||||
|
finally:
|
||||||
|
conn.close()
|
||||||
|
|
||||||
|
def delete_existing_table(db_config, table_name):
|
||||||
|
conn = psycopg2.connect(
|
||||||
|
dbname=db_config['DB_NAME'],
|
||||||
|
user=db_config['DB_USER'],
|
||||||
|
password=db_config['DB_PASSWORD'],
|
||||||
|
host=db_config['DB_HOST'],
|
||||||
|
port=db_config['DB_PORT']
|
||||||
|
)
|
||||||
|
try:
|
||||||
|
with conn.cursor() as cur:
|
||||||
|
# Drop the index if it exists
|
||||||
|
cur.execute(f"DROP INDEX IF EXISTS idx_{table_name.split('.')[-1]}_plssid")
|
||||||
|
|
||||||
|
# Then drop the table
|
||||||
|
cur.execute(f"DROP TABLE IF EXISTS {table_name} CASCADE")
|
||||||
|
conn.commit()
|
||||||
|
print(f"Deleted existing table and index: {table_name}")
|
||||||
|
except psycopg2.Error as e:
|
||||||
|
print(f"Error deleting table {table_name}: {e}")
|
||||||
|
finally:
|
||||||
|
conn.close()
|
||||||
|
|
||||||
|
|
||||||
|
def check_postgres_connection(db_config):
|
||||||
|
try:
|
||||||
|
subprocess.run(['psql',
|
||||||
|
'-h', db_config['DB_HOST'],
|
||||||
|
'-p', db_config['DB_PORT'],
|
||||||
|
'-U', db_config['DB_USER'],
|
||||||
|
'-d', db_config['DB_NAME'],
|
||||||
|
'-c', 'SELECT 1;'],
|
||||||
|
check=True, capture_output=True, text=True)
|
||||||
|
return True
|
||||||
|
except subprocess.CalledProcessError:
|
||||||
|
return False
|
||||||
|
|
||||||
|
def check_postgis_extension(db_config):
|
||||||
|
try:
|
||||||
|
result = subprocess.run(['psql',
|
||||||
|
'-h', db_config['DB_HOST'],
|
||||||
|
'-p', db_config['DB_PORT'],
|
||||||
|
'-U', db_config['DB_USER'],
|
||||||
|
'-d', db_config['DB_NAME'],
|
||||||
|
'-c', "SELECT 1 FROM pg_extension WHERE extname = 'postgis';"],
|
||||||
|
check=True, capture_output=True, text=True)
|
||||||
|
return '1' in result.stdout
|
||||||
|
except subprocess.CalledProcessError:
|
||||||
|
return False
|
||||||
|
|
||||||
|
def create_postgis_extension(db_config):
|
||||||
|
try:
|
||||||
|
subprocess.run(['psql',
|
||||||
|
'-h', db_config['DB_HOST'],
|
||||||
|
'-p', db_config['DB_PORT'],
|
||||||
|
'-U', db_config['DB_USER'],
|
||||||
|
'-d', db_config['DB_NAME'],
|
||||||
|
'-c', "CREATE EXTENSION IF NOT EXISTS postgis;"],
|
||||||
|
check=True, capture_output=True, text=True)
|
||||||
|
print("PostGIS extension created successfully.")
|
||||||
|
except subprocess.CalledProcessError as e:
|
||||||
|
print(f"Error creating PostGIS extension: {e}")
|
||||||
|
sys.exit(1)
|
||||||
|
|
||||||
|
def main():
|
||||||
|
parser = argparse.ArgumentParser(description="Download and import PLSS data")
|
||||||
|
parser.add_argument("--force-refresh", nargs='*', help="Force refresh of specified layers or all if none specified")
|
||||||
|
args = parser.parse_args()
|
||||||
|
|
||||||
|
sys_config, gis_config = load_config()
|
||||||
|
db_config = get_db_config(sys_config)
|
||||||
|
|
||||||
|
if not check_postgres_connection(db_config):
|
||||||
|
print("Error: Unable to connect to PostgreSQL. Please check your connection settings.")
|
||||||
|
sys.exit(1)
|
||||||
|
|
||||||
|
if not check_postgis_extension(db_config):
|
||||||
|
print("PostGIS extension not found. Attempting to create it...")
|
||||||
|
create_postgis_extension(db_config)
|
||||||
|
|
||||||
|
try:
|
||||||
|
for layer in gis_config['layers']:
|
||||||
|
if args.force_refresh is None or not args.force_refresh or layer['layer_name'] in args.force_refresh:
|
||||||
|
download_and_import_layer(layer, db_config, gis_config, bool(args.force_refresh))
|
||||||
|
except requests.exceptions.RequestException as e:
|
||||||
|
print(f"Error fetching data: {e}")
|
||||||
|
except Exception as e:
|
||||||
|
print(f"An unexpected error occurred: {e}")
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
101
sijapi/helpers/repair_weather_db.py
Normal file
101
sijapi/helpers/repair_weather_db.py
Normal file
|
@ -0,0 +1,101 @@
|
||||||
|
import asyncio
|
||||||
|
import asyncpg
|
||||||
|
import yaml
|
||||||
|
from pathlib import Path
|
||||||
|
import subprocess
|
||||||
|
|
||||||
|
async def load_config():
|
||||||
|
config_path = Path(__file__).parent.parent / 'config' / 'db.yaml'
|
||||||
|
with open(config_path, 'r') as file:
|
||||||
|
return yaml.safe_load(file)
|
||||||
|
|
||||||
|
async def get_table_size(conn, table_name):
|
||||||
|
return await conn.fetchval(f"SELECT COUNT(*) FROM {table_name}")
|
||||||
|
|
||||||
|
async def check_postgres_version(conn):
|
||||||
|
return await conn.fetchval("SELECT version()")
|
||||||
|
|
||||||
|
async def replicate_table(source, target, table_name):
|
||||||
|
print(f"Replicating {table_name} from {source['ts_id']} to {target['ts_id']}")
|
||||||
|
|
||||||
|
source_conn = await asyncpg.connect(**{k: source[k] for k in ['db_name', 'db_user', 'db_pass', 'ts_ip', 'db_port']})
|
||||||
|
target_conn = await asyncpg.connect(**{k: target[k] for k in ['db_name', 'db_user', 'db_pass', 'ts_ip', 'db_port']})
|
||||||
|
|
||||||
|
try:
|
||||||
|
source_version = await check_postgres_version(source_conn)
|
||||||
|
target_version = await check_postgres_version(target_conn)
|
||||||
|
print(f"Source database version: {source_version}")
|
||||||
|
print(f"Target database version: {target_version}")
|
||||||
|
|
||||||
|
table_size = await get_table_size(source_conn, table_name)
|
||||||
|
print(f"Table size: {table_size} rows")
|
||||||
|
|
||||||
|
# Dump the table
|
||||||
|
dump_command = [
|
||||||
|
'pg_dump',
|
||||||
|
'-h', source['ts_ip'],
|
||||||
|
'-p', str(source['db_port']),
|
||||||
|
'-U', source['db_user'],
|
||||||
|
'-d', source['db_name'],
|
||||||
|
'-t', table_name,
|
||||||
|
'--no-owner',
|
||||||
|
'--no-acl'
|
||||||
|
]
|
||||||
|
env = {'PGPASSWORD': source['db_pass']}
|
||||||
|
dump_result = subprocess.run(dump_command, env=env, capture_output=True, text=True)
|
||||||
|
|
||||||
|
if dump_result.returncode != 0:
|
||||||
|
raise Exception(f"Dump failed: {dump_result.stderr}")
|
||||||
|
|
||||||
|
print("Dump completed successfully")
|
||||||
|
|
||||||
|
# Drop and recreate the table on the target
|
||||||
|
await target_conn.execute(f"DROP TABLE IF EXISTS {table_name} CASCADE")
|
||||||
|
print(f"Dropped table {table_name} on target")
|
||||||
|
|
||||||
|
# Restore the table
|
||||||
|
restore_command = [
|
||||||
|
'psql',
|
||||||
|
'-h', target['ts_ip'],
|
||||||
|
'-p', str(target['db_port']),
|
||||||
|
'-U', target['db_user'],
|
||||||
|
'-d', target['db_name'],
|
||||||
|
]
|
||||||
|
env = {'PGPASSWORD': target['db_pass']}
|
||||||
|
restore_result = subprocess.run(restore_command, input=dump_result.stdout, env=env, capture_output=True, text=True)
|
||||||
|
|
||||||
|
if restore_result.returncode != 0:
|
||||||
|
raise Exception(f"Restore failed: {restore_result.stderr}")
|
||||||
|
|
||||||
|
print(f"Table {table_name} restored successfully")
|
||||||
|
|
||||||
|
# Verify the number of rows in the target table
|
||||||
|
target_size = await get_table_size(target_conn, table_name)
|
||||||
|
if target_size == table_size:
|
||||||
|
print(f"Replication successful. {target_size} rows copied.")
|
||||||
|
else:
|
||||||
|
print(f"Warning: Source had {table_size} rows, but target has {target_size} rows.")
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
print(f"An error occurred while replicating {table_name}: {str(e)}")
|
||||||
|
finally:
|
||||||
|
await source_conn.close()
|
||||||
|
await target_conn.close()
|
||||||
|
|
||||||
|
async def main():
|
||||||
|
config = await load_config()
|
||||||
|
source_server = config['POOL'][0] # sij-mbp16
|
||||||
|
target_servers = config['POOL'][1:] # sij-vm and sij-vps
|
||||||
|
|
||||||
|
tables_to_replicate = [
|
||||||
|
'click_logs', 'dailyweather', 'hourlyweather', 'locations', 'short_urls'
|
||||||
|
]
|
||||||
|
|
||||||
|
for table_name in tables_to_replicate:
|
||||||
|
for target_server in target_servers:
|
||||||
|
await replicate_table(source_server, target_server, table_name)
|
||||||
|
|
||||||
|
print("All replications completed!")
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
asyncio.run(main())
|
132
sijapi/helpers/repl.py
Normal file
132
sijapi/helpers/repl.py
Normal file
|
@ -0,0 +1,132 @@
|
||||||
|
#!/usr/bin/env python3
|
||||||
|
|
||||||
|
import os
|
||||||
|
import yaml
|
||||||
|
import subprocess
|
||||||
|
import time
|
||||||
|
from tqdm import tqdm
|
||||||
|
|
||||||
|
def load_config():
|
||||||
|
script_dir = os.path.dirname(os.path.abspath(__file__))
|
||||||
|
sys_config_path = os.path.join(script_dir, '..', 'config', 'sys.yaml')
|
||||||
|
gis_config_path = os.path.join(script_dir, '..', 'config', 'gis.yaml')
|
||||||
|
|
||||||
|
with open(sys_config_path, 'r') as f:
|
||||||
|
sys_config = yaml.safe_load(f)
|
||||||
|
|
||||||
|
with open(gis_config_path, 'r') as f:
|
||||||
|
gis_config = yaml.safe_load(f)
|
||||||
|
|
||||||
|
return sys_config, gis_config
|
||||||
|
|
||||||
|
def get_table_size(server, table_name):
|
||||||
|
env = os.environ.copy()
|
||||||
|
env['PGPASSWORD'] = server['db_pass']
|
||||||
|
|
||||||
|
command = [
|
||||||
|
'psql',
|
||||||
|
'-h', server['ts_ip'],
|
||||||
|
'-p', str(server['db_port']),
|
||||||
|
'-U', server['db_user'],
|
||||||
|
'-d', server['db_name'],
|
||||||
|
'-t',
|
||||||
|
'-c', f"SELECT COUNT(*) FROM {table_name}"
|
||||||
|
]
|
||||||
|
|
||||||
|
result = subprocess.run(command, env=env, capture_output=True, text=True, check=True)
|
||||||
|
return int(result.stdout.strip())
|
||||||
|
|
||||||
|
def replicate_table(source, targets, table_name):
|
||||||
|
print(f"Replicating {table_name}")
|
||||||
|
|
||||||
|
# Get table size for progress bar
|
||||||
|
table_size = get_table_size(source, table_name)
|
||||||
|
print(f"Table size: {table_size} rows")
|
||||||
|
|
||||||
|
# Dump the table from the source
|
||||||
|
dump_command = [
|
||||||
|
'pg_dump',
|
||||||
|
'-h', source['ts_ip'],
|
||||||
|
'-p', str(source['db_port']),
|
||||||
|
'-U', source['db_user'],
|
||||||
|
'-d', source['db_name'],
|
||||||
|
'-t', table_name,
|
||||||
|
'--no-owner',
|
||||||
|
'--no-acl'
|
||||||
|
]
|
||||||
|
|
||||||
|
env = os.environ.copy()
|
||||||
|
env['PGPASSWORD'] = source['db_pass']
|
||||||
|
|
||||||
|
print("Dumping table...")
|
||||||
|
with open(f"{table_name}.sql", 'w') as f:
|
||||||
|
subprocess.run(dump_command, env=env, stdout=f, check=True)
|
||||||
|
print("Dump complete")
|
||||||
|
|
||||||
|
# Restore the table to each target
|
||||||
|
for target in targets:
|
||||||
|
print(f"Replicating to {target['ts_id']}")
|
||||||
|
|
||||||
|
# Drop table and its sequence
|
||||||
|
drop_commands = [
|
||||||
|
f"DROP TABLE IF EXISTS {table_name} CASCADE;",
|
||||||
|
f"DROP SEQUENCE IF EXISTS {table_name}_id_seq CASCADE;"
|
||||||
|
]
|
||||||
|
|
||||||
|
restore_command = [
|
||||||
|
'psql',
|
||||||
|
'-h', target['ts_ip'],
|
||||||
|
'-p', str(target['db_port']),
|
||||||
|
'-U', target['db_user'],
|
||||||
|
'-d', target['db_name'],
|
||||||
|
]
|
||||||
|
|
||||||
|
env = os.environ.copy()
|
||||||
|
env['PGPASSWORD'] = target['db_pass']
|
||||||
|
|
||||||
|
# Execute drop commands
|
||||||
|
for cmd in drop_commands:
|
||||||
|
print(f"Executing: {cmd}")
|
||||||
|
subprocess.run(restore_command + ['-c', cmd], env=env, check=True)
|
||||||
|
|
||||||
|
# Restore the table
|
||||||
|
print("Restoring table...")
|
||||||
|
process = subprocess.Popen(restore_command + ['-f', f"{table_name}.sql"], env=env,
|
||||||
|
stdout=subprocess.PIPE, stderr=subprocess.PIPE, universal_newlines=True)
|
||||||
|
|
||||||
|
pbar = tqdm(total=table_size, desc="Copying rows")
|
||||||
|
copied_rows = 0
|
||||||
|
for line in process.stderr:
|
||||||
|
if line.startswith("COPY"):
|
||||||
|
copied_rows = int(line.split()[1])
|
||||||
|
pbar.update(copied_rows - pbar.n)
|
||||||
|
print(line, end='') # Print all output for visibility
|
||||||
|
|
||||||
|
pbar.close()
|
||||||
|
process.wait()
|
||||||
|
|
||||||
|
if process.returncode != 0:
|
||||||
|
print(f"Error occurred during restoration to {target['ts_id']}")
|
||||||
|
print(process.stderr.read())
|
||||||
|
else:
|
||||||
|
print(f"Restoration to {target['ts_id']} completed successfully")
|
||||||
|
|
||||||
|
# Clean up the dump file
|
||||||
|
os.remove(f"{table_name}.sql")
|
||||||
|
print(f"Replication of {table_name} completed")
|
||||||
|
|
||||||
|
def main():
|
||||||
|
sys_config, gis_config = load_config()
|
||||||
|
|
||||||
|
source_server = sys_config['POOL'][0]
|
||||||
|
target_servers = sys_config['POOL'][1:]
|
||||||
|
|
||||||
|
tables = [layer['table_name'] for layer in gis_config['layers']]
|
||||||
|
|
||||||
|
for table in tables:
|
||||||
|
replicate_table(source_server, target_servers, table)
|
||||||
|
|
||||||
|
print("All replications completed!")
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
90
sijapi/helpers/repl.sh
Executable file
90
sijapi/helpers/repl.sh
Executable file
|
@ -0,0 +1,90 @@
|
||||||
|
#!/bin/bash
|
||||||
|
|
||||||
|
# Configuration
|
||||||
|
SOURCE_HOST="100.64.64.20"
|
||||||
|
SOURCE_PORT="5432"
|
||||||
|
SOURCE_DB="sij"
|
||||||
|
SOURCE_USER="sij"
|
||||||
|
SOURCE_PASS="Synchr0!"
|
||||||
|
|
||||||
|
# Target servers
|
||||||
|
declare -a TARGETS=(
|
||||||
|
"sij-vm:100.64.64.11:5432:sij:sij:Synchr0!"
|
||||||
|
"sij-vps:100.64.64.15:5432:sij:sij:Synchr0!"
|
||||||
|
)
|
||||||
|
|
||||||
|
# Tables to replicate
|
||||||
|
TABLES=("dailyweather" "hourlyweather" "short_urls" "click_logs" "locations")
|
||||||
|
|
||||||
|
# PostgreSQL binaries
|
||||||
|
PSQL="/Applications/Postgres.app/Contents/Versions/latest/bin/psql"
|
||||||
|
PG_DUMP="/Applications/Postgres.app/Contents/Versions/latest/bin/pg_dump"
|
||||||
|
|
||||||
|
# Function to run SQL and display results
|
||||||
|
run_sql() {
|
||||||
|
local host=$1
|
||||||
|
local port=$2
|
||||||
|
local db=$3
|
||||||
|
local user=$4
|
||||||
|
local pass=$5
|
||||||
|
local sql=$6
|
||||||
|
|
||||||
|
PGPASSWORD=$pass $PSQL -h $host -p $port -U $user -d $db -c "$sql"
|
||||||
|
}
|
||||||
|
|
||||||
|
# Replicate to a target
|
||||||
|
replicate_to_target() {
|
||||||
|
local target_info=$1
|
||||||
|
IFS=':' read -r target_name target_host target_port target_db target_user target_pass <<< "$target_info"
|
||||||
|
|
||||||
|
echo "Replicating to $target_name ($target_host)"
|
||||||
|
|
||||||
|
# Check source tables
|
||||||
|
echo "Checking source tables:"
|
||||||
|
for table in "${TABLES[@]}"; do
|
||||||
|
run_sql $SOURCE_HOST $SOURCE_PORT $SOURCE_DB $SOURCE_USER $SOURCE_PASS "SELECT COUNT(*) FROM $table;"
|
||||||
|
done
|
||||||
|
|
||||||
|
# Dump and restore each table
|
||||||
|
for table in "${TABLES[@]}"; do
|
||||||
|
echo "Replicating $table"
|
||||||
|
|
||||||
|
# Dump table
|
||||||
|
PGPASSWORD=$SOURCE_PASS $PG_DUMP -h $SOURCE_HOST -p $SOURCE_PORT -U $SOURCE_USER -d $SOURCE_DB -t $table --no-owner --no-acl > ${table}_dump.sql
|
||||||
|
|
||||||
|
if [ $? -ne 0 ]; then
|
||||||
|
echo "Error dumping $table"
|
||||||
|
continue
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Drop and recreate table on target
|
||||||
|
run_sql $target_host $target_port $target_db $target_user $target_pass "DROP TABLE IF EXISTS $table CASCADE; "
|
||||||
|
|
||||||
|
# Restore table
|
||||||
|
PGPASSWORD=$target_pass $PSQL -h $target_host -p $target_port -U $target_user -d $target_db -f ${table}_dump.sql
|
||||||
|
|
||||||
|
if [ $? -ne 0 ]; then
|
||||||
|
echo "Error restoring $table"
|
||||||
|
else
|
||||||
|
echo "$table replicated successfully"
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Clean up dump file
|
||||||
|
rm ${table}_dump.sql
|
||||||
|
done
|
||||||
|
|
||||||
|
# Verify replication
|
||||||
|
echo "Verifying replication:"
|
||||||
|
for table in "${TABLES[@]}"; do
|
||||||
|
echo "Checking $table on target:"
|
||||||
|
run_sql $target_host $target_port $target_db $target_user $target_pass "SELECT COUNT(*) FROM $table;"
|
||||||
|
done
|
||||||
|
}
|
||||||
|
|
||||||
|
# Main replication process
|
||||||
|
for target in "${TARGETS[@]}"; do
|
||||||
|
replicate_to_target "$target"
|
||||||
|
done
|
||||||
|
|
||||||
|
echo "Replication completed"
|
||||||
|
|
125
sijapi/helpers/replicator.py
Normal file
125
sijapi/helpers/replicator.py
Normal file
|
@ -0,0 +1,125 @@
|
||||||
|
import asyncio
|
||||||
|
import asyncpg
|
||||||
|
import yaml
|
||||||
|
from pathlib import Path
|
||||||
|
import subprocess
|
||||||
|
import sys
|
||||||
|
import os
|
||||||
|
|
||||||
|
async def load_config():
|
||||||
|
config_path = Path(__file__).parent.parent / 'config' / 'db.yaml'
|
||||||
|
with open(config_path, 'r') as file:
|
||||||
|
return yaml.safe_load(file)
|
||||||
|
|
||||||
|
async def check_table_existence(conn, tables):
|
||||||
|
for table in tables:
|
||||||
|
exists = await conn.fetchval(f"""
|
||||||
|
SELECT EXISTS (
|
||||||
|
SELECT FROM information_schema.tables
|
||||||
|
WHERE table_schema = 'public'
|
||||||
|
AND table_name = $1
|
||||||
|
)
|
||||||
|
""", table)
|
||||||
|
print(f"Table {table} {'exists' if exists else 'does not exist'} in the database.")
|
||||||
|
|
||||||
|
async def check_user_permissions(conn, tables):
|
||||||
|
for table in tables:
|
||||||
|
has_permission = await conn.fetchval(f"""
|
||||||
|
SELECT has_table_privilege(current_user, $1, 'SELECT')
|
||||||
|
""", table)
|
||||||
|
print(f"User {'has' if has_permission else 'does not have'} SELECT permission on table {table}.")
|
||||||
|
|
||||||
|
async def replicate_tables(source, target, tables):
|
||||||
|
print(f"Replicating tables from {source['ts_id']} to {target['ts_id']}")
|
||||||
|
|
||||||
|
conn_params = {
|
||||||
|
'database': 'db_name',
|
||||||
|
'user': 'db_user',
|
||||||
|
'password': 'db_pass',
|
||||||
|
'host': 'ts_ip',
|
||||||
|
'port': 'db_port'
|
||||||
|
}
|
||||||
|
|
||||||
|
source_conn = await asyncpg.connect(**{k: source[v] for k, v in conn_params.items()})
|
||||||
|
target_conn = await asyncpg.connect(**{k: target[v] for k, v in conn_params.items()})
|
||||||
|
|
||||||
|
try:
|
||||||
|
source_version = await source_conn.fetchval("SELECT version()")
|
||||||
|
target_version = await target_conn.fetchval("SELECT version()")
|
||||||
|
print(f"Source database version: {source_version}")
|
||||||
|
print(f"Target database version: {target_version}")
|
||||||
|
|
||||||
|
print("Checking table existence in source database:")
|
||||||
|
await check_table_existence(source_conn, tables)
|
||||||
|
|
||||||
|
print("\nChecking user permissions in source database:")
|
||||||
|
await check_user_permissions(source_conn, tables)
|
||||||
|
|
||||||
|
# Dump all tables to a file
|
||||||
|
dump_file = 'dump.sql'
|
||||||
|
dump_command = [
|
||||||
|
'/Applications/Postgres.app/Contents/Versions/latest/bin/pg_dump',
|
||||||
|
'-h', source['ts_ip'],
|
||||||
|
'-p', str(source['db_port']),
|
||||||
|
'-U', source['db_user'],
|
||||||
|
'-d', source['db_name'],
|
||||||
|
'-t', ' -t '.join(tables),
|
||||||
|
'--no-owner',
|
||||||
|
'--no-acl',
|
||||||
|
'-f', dump_file
|
||||||
|
]
|
||||||
|
env = {'PGPASSWORD': source['db_pass']}
|
||||||
|
print(f"\nExecuting dump command: {' '.join(dump_command)}")
|
||||||
|
dump_result = subprocess.run(dump_command, env=env, capture_output=True, text=True)
|
||||||
|
|
||||||
|
if dump_result.returncode != 0:
|
||||||
|
print(f"Dump stderr: {dump_result.stderr}")
|
||||||
|
raise Exception(f"Dump failed: {dump_result.stderr}")
|
||||||
|
|
||||||
|
print("Dump completed successfully.")
|
||||||
|
|
||||||
|
# Restore from the dump file
|
||||||
|
restore_command = [
|
||||||
|
'/Applications/Postgres.app/Contents/Versions/latest/bin/psql',
|
||||||
|
'-h', target['ts_ip'],
|
||||||
|
'-p', str(target['db_port']),
|
||||||
|
'-U', target['db_user'],
|
||||||
|
'-d', target['db_name'],
|
||||||
|
'-f', dump_file
|
||||||
|
]
|
||||||
|
env = {'PGPASSWORD': target['db_pass']}
|
||||||
|
print(f"\nExecuting restore command: {' '.join(restore_command)}")
|
||||||
|
restore_result = subprocess.run(restore_command, env=env, capture_output=True, text=True)
|
||||||
|
|
||||||
|
if restore_result.returncode != 0:
|
||||||
|
print(f"Restore stderr: {restore_result.stderr}")
|
||||||
|
raise Exception(f"Restore failed: {restore_result.stderr}")
|
||||||
|
|
||||||
|
print("Restore completed successfully.")
|
||||||
|
|
||||||
|
# Clean up the dump file
|
||||||
|
os.remove(dump_file)
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
print(f"An error occurred during replication: {str(e)}")
|
||||||
|
print("Exception details:", sys.exc_info())
|
||||||
|
finally:
|
||||||
|
await source_conn.close()
|
||||||
|
await target_conn.close()
|
||||||
|
|
||||||
|
async def main():
|
||||||
|
config = await load_config()
|
||||||
|
source_server = config['POOL'][0] # sij-mbp16
|
||||||
|
target_servers = config['POOL'][1:] # sij-vm and sij-vps
|
||||||
|
|
||||||
|
tables_to_replicate = [
|
||||||
|
'dailyweather', 'hourlyweather', 'short_urls', 'click_logs', 'locations'
|
||||||
|
]
|
||||||
|
|
||||||
|
for target_server in target_servers:
|
||||||
|
await replicate_tables(source_server, target_server, tables_to_replicate)
|
||||||
|
|
||||||
|
print("All replications completed!")
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
asyncio.run(main())
|
1103
sijapi/helpers/schema_info.yaml
Normal file
1103
sijapi/helpers/schema_info.yaml
Normal file
File diff suppressed because it is too large
Load diff
|
@ -12,7 +12,7 @@ import sys
|
||||||
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
|
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
|
||||||
|
|
||||||
def load_config():
|
def load_config():
|
||||||
config_path = Path(__file__).parent.parent / 'config' / 'api.yaml'
|
config_path = Path(__file__).parent.parent / 'config' / 'sys.yaml'
|
||||||
with open(config_path, 'r') as file:
|
with open(config_path, 'r') as file:
|
||||||
return yaml.safe_load(file)
|
return yaml.safe_load(file)
|
||||||
|
|
||||||
|
|
110
sijapi/logs.py
Normal file
110
sijapi/logs.py
Normal file
|
@ -0,0 +1,110 @@
|
||||||
|
# logs.py
|
||||||
|
import os
|
||||||
|
import sys
|
||||||
|
from pathlib import Path
|
||||||
|
import yaml
|
||||||
|
from loguru import logger as loguru_logger
|
||||||
|
from typing import Union, Optional
|
||||||
|
|
||||||
|
class LogLevels:
|
||||||
|
def __init__(self):
|
||||||
|
self.levels = {}
|
||||||
|
self.default_level = "INFO"
|
||||||
|
self.HOME = Path.home()
|
||||||
|
|
||||||
|
def init(self, yaml_path: Union[str, Path]):
|
||||||
|
yaml_path = self._resolve_path(yaml_path, 'config')
|
||||||
|
|
||||||
|
try:
|
||||||
|
with yaml_path.open('r') as file:
|
||||||
|
config_data = yaml.safe_load(file)
|
||||||
|
|
||||||
|
logs_config = config_data.get('LOGS', {})
|
||||||
|
self.default_level = logs_config.get('default', "INFO")
|
||||||
|
self.levels = {k: v for k, v in logs_config.items() if k != 'default'}
|
||||||
|
|
||||||
|
loguru_logger.info(f"Loaded log levels configuration from {yaml_path}")
|
||||||
|
except Exception as e:
|
||||||
|
loguru_logger.error(f"Error loading log levels configuration: {str(e)}")
|
||||||
|
raise
|
||||||
|
|
||||||
|
def _resolve_path(self, path: Union[str, Path], default_dir: str) -> Path:
|
||||||
|
base_path = Path(__file__).parent.parent
|
||||||
|
path = Path(path)
|
||||||
|
if not path.suffix:
|
||||||
|
path = base_path / 'sijapi' / default_dir / f"{path.name}.yaml"
|
||||||
|
elif not path.is_absolute():
|
||||||
|
path = base_path / path
|
||||||
|
return path
|
||||||
|
|
||||||
|
def set_level(self, module, level):
|
||||||
|
self.levels[module] = level
|
||||||
|
|
||||||
|
def set_default_level(self, level):
|
||||||
|
self.default_level = level
|
||||||
|
|
||||||
|
def get_level(self, module):
|
||||||
|
return self.levels.get(module, self.default_level)
|
||||||
|
|
||||||
|
|
||||||
|
class Logger:
|
||||||
|
def __init__(self, name):
|
||||||
|
self.name = name
|
||||||
|
self.logger = loguru_logger
|
||||||
|
self.debug_modules = set()
|
||||||
|
self.log_levels = LogLevels()
|
||||||
|
self.logs_dir = None
|
||||||
|
|
||||||
|
def init(self, yaml_path: Union[str, Path], logs_dir: Path):
|
||||||
|
self.log_levels.init(yaml_path)
|
||||||
|
self.logs_dir = logs_dir
|
||||||
|
os.makedirs(self.logs_dir, exist_ok=True)
|
||||||
|
|
||||||
|
# Set up initial logging configuration
|
||||||
|
self.logger.remove()
|
||||||
|
log_format = "{time:YYYY-MM-DD HH:mm:ss} - {name} - <level>{level: <8}</level> - <level>{message}</level>"
|
||||||
|
self.logger.add(self.logs_dir / 'app.log', rotation="2 MB", level="DEBUG", format=log_format)
|
||||||
|
self.logger.add(sys.stdout, level="DEBUG", format=log_format, colorize=True,
|
||||||
|
filter=self._level_filter)
|
||||||
|
|
||||||
|
def setup_from_args(self, args):
|
||||||
|
if not self.logs_dir:
|
||||||
|
raise ValueError("Logger not initialized. Call init() before setup_from_args().")
|
||||||
|
|
||||||
|
# Update log levels based on command line arguments
|
||||||
|
for module in args.debug:
|
||||||
|
self.log_levels.set_level(module, "DEBUG")
|
||||||
|
if hasattr(args, 'info'):
|
||||||
|
for module in args.info:
|
||||||
|
self.log_levels.set_level(module, "INFO")
|
||||||
|
if args.log:
|
||||||
|
self.log_levels.set_default_level(args.log.upper())
|
||||||
|
|
||||||
|
# Set debug modules
|
||||||
|
self.debug_modules = set(args.debug)
|
||||||
|
|
||||||
|
# Custom color and style mappings
|
||||||
|
self.logger.level("CRITICAL", color="<yellow><bold><MAGENTA>")
|
||||||
|
self.logger.level("ERROR", color="<red><bold>")
|
||||||
|
self.logger.level("WARNING", color="<yellow><bold>")
|
||||||
|
self.logger.level("DEBUG", color="<green><bold>")
|
||||||
|
|
||||||
|
self.logger.info(f"Debug modules: {self.debug_modules}")
|
||||||
|
self.logger.info(f"Log levels: {self.log_levels.levels}")
|
||||||
|
self.logger.info(f"Default log level: {self.log_levels.default_level}")
|
||||||
|
|
||||||
|
def _level_filter(self, record):
|
||||||
|
module_level = self.log_levels.get_level(record["name"])
|
||||||
|
return record["level"].no >= self.logger.level(module_level).no
|
||||||
|
|
||||||
|
def get_logger(self, module_name):
|
||||||
|
level = self.log_levels.get_level(module_name)
|
||||||
|
self.logger.debug(f"Creating logger for {module_name} with level {level}")
|
||||||
|
return self.logger.bind(name=module_name)
|
||||||
|
|
||||||
|
# Global logger instance
|
||||||
|
L = Logger("Central")
|
||||||
|
|
||||||
|
# Function to get module-specific logger
|
||||||
|
def get_logger(module_name):
|
||||||
|
return L.get_logger(module_name)
|
|
@ -29,18 +29,14 @@ from requests.adapters import HTTPAdapter
|
||||||
from urllib3.util.retry import Retry
|
from urllib3.util.retry import Retry
|
||||||
from datetime import datetime as dt_datetime
|
from datetime import datetime as dt_datetime
|
||||||
from better_profanity import profanity
|
from better_profanity import profanity
|
||||||
|
from sijapi.logs import get_logger
|
||||||
from sijapi.utilities import html_to_markdown, sanitize_filename, assemble_journal_path, assemble_archive_path, contains_profanity, is_ad_or_tracker, initialize_adblock_rules, contains_blacklisted_word
|
from sijapi.utilities import html_to_markdown, sanitize_filename, assemble_journal_path, assemble_archive_path, contains_profanity, is_ad_or_tracker, initialize_adblock_rules, contains_blacklisted_word
|
||||||
from sijapi import L, API, Archivist, BLOCKLISTS_DIR, OBSIDIAN_VAULT_DIR, OBSIDIAN_RESOURCES_DIR
|
from sijapi import Sys, Archivist, BLOCKLISTS_DIR, OBSIDIAN_VAULT_DIR, OBSIDIAN_RESOURCES_DIR
|
||||||
|
from sijapi.logs import get_logger
|
||||||
|
l = get_logger(__name__)
|
||||||
|
|
||||||
archivist = APIRouter()
|
archivist = APIRouter()
|
||||||
|
|
||||||
logger = L.get_module_logger("news")
|
|
||||||
def debug(text: str): logger.debug(text)
|
|
||||||
def info(text: str): logger.info(text)
|
|
||||||
def warn(text: str): logger.warning(text)
|
|
||||||
def err(text: str): logger.error(text)
|
|
||||||
def crit(text: str): logger.critical(text)
|
|
||||||
|
|
||||||
adblock_rules = initialize_adblock_rules(BLOCKLISTS_DIR)
|
adblock_rules = initialize_adblock_rules(BLOCKLISTS_DIR)
|
||||||
|
|
||||||
@archivist.post("/archive")
|
@archivist.post("/archive")
|
||||||
|
@ -51,11 +47,11 @@ async def archive_post(
|
||||||
encoding: str = Form('utf-8')
|
encoding: str = Form('utf-8')
|
||||||
):
|
):
|
||||||
if not url:
|
if not url:
|
||||||
warn(f"No URL provided to /archive endpoint.")
|
l.warning(f"No URL provided to /archive endpoint.")
|
||||||
raise HTTPException(status_code=400, detail="URL is required")
|
raise HTTPException(status_code=400, detail="URL is required")
|
||||||
|
|
||||||
if is_ad_or_tracker(url, adblock_rules):
|
if is_ad_or_tracker(url, adblock_rules):
|
||||||
debug(f"Skipping likely ad or tracker URL: {url}")
|
l.debug(f"Skipping likely ad or tracker URL: {url}")
|
||||||
raise HTTPException(status_code=400, detail="URL is likely an ad or tracker")
|
raise HTTPException(status_code=400, detail="URL is likely an ad or tracker")
|
||||||
|
|
||||||
markdown_filename = await process_archive(url, title, encoding, source)
|
markdown_filename = await process_archive(url, title, encoding, source)
|
||||||
|
@ -70,7 +66,7 @@ async def process_archive(
|
||||||
|
|
||||||
# Check URL against blacklist
|
# Check URL against blacklist
|
||||||
if contains_blacklisted_word(url, Archivist.blacklist):
|
if contains_blacklisted_word(url, Archivist.blacklist):
|
||||||
info(f"Not archiving {url} due to blacklisted word in URL")
|
l.info(f"Not archiving {url} due to blacklisted word in URL")
|
||||||
return None
|
return None
|
||||||
|
|
||||||
timestamp = dt_datetime.now().strftime('%b %d, %Y at %H:%M')
|
timestamp = dt_datetime.now().strftime('%b %d, %Y at %H:%M')
|
||||||
|
@ -82,13 +78,13 @@ async def process_archive(
|
||||||
|
|
||||||
# Check content for profanity
|
# Check content for profanity
|
||||||
if contains_profanity(content, threshold=0.01, custom_words=Archivist.blacklist):
|
if contains_profanity(content, threshold=0.01, custom_words=Archivist.blacklist):
|
||||||
info(f"Not archiving {url} due to profanity in content")
|
l.info(f"Not archiving {url} due to profanity in content")
|
||||||
return None
|
return None
|
||||||
|
|
||||||
try:
|
try:
|
||||||
markdown_path, relative_path = assemble_archive_path(filename=readable_title, extension=".md")
|
markdown_path, relative_path = assemble_archive_path(filename=readable_title, extension=".md")
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
warn(f"Failed to assemble archive path for {url}: {str(e)}")
|
l.warning(f"Failed to assemble archive path for {url}: {str(e)}")
|
||||||
return None
|
return None
|
||||||
|
|
||||||
markdown_content = f"---\n"
|
markdown_content = f"---\n"
|
||||||
|
@ -105,8 +101,8 @@ async def process_archive(
|
||||||
markdown_path.parent.mkdir(parents=True, exist_ok=True)
|
markdown_path.parent.mkdir(parents=True, exist_ok=True)
|
||||||
with open(markdown_path, 'w', encoding=encoding) as md_file:
|
with open(markdown_path, 'w', encoding=encoding) as md_file:
|
||||||
md_file.write(markdown_content)
|
md_file.write(markdown_content)
|
||||||
debug(f"Successfully saved to {markdown_path}")
|
l.debug(f"Successfully saved to {markdown_path}")
|
||||||
return markdown_path
|
return markdown_path
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
warn(f"Failed to write markdown file: {str(e)}")
|
l.warning(f"Failed to write markdown file: {str(e)}")
|
||||||
return None
|
return None
|
||||||
|
|
|
@ -13,15 +13,10 @@ from fastapi import APIRouter, HTTPException, Form, UploadFile, File, Background
|
||||||
from fastapi.responses import JSONResponse
|
from fastapi.responses import JSONResponse
|
||||||
from pydantic import BaseModel, Field
|
from pydantic import BaseModel, Field
|
||||||
from typing import Optional
|
from typing import Optional
|
||||||
from sijapi import L, ASR_DIR, WHISPER_CPP_MODELS, WHISPER_CPP_DIR, MAX_CPU_CORES
|
from sijapi import Sys, ASR_DIR, WHISPER_CPP_MODELS, WHISPER_CPP_DIR
|
||||||
|
from sijapi.logs import get_logger
|
||||||
|
l = get_logger(__name__)
|
||||||
asr = APIRouter()
|
asr = APIRouter()
|
||||||
logger = L.get_module_logger("asr")
|
|
||||||
def debug(text: str): logger.debug(text)
|
|
||||||
def info(text: str): logger.info(text)
|
|
||||||
def warn(text: str): logger.warning(text)
|
|
||||||
def err(text: str): logger.error(text)
|
|
||||||
def crit(text: str): logger.critical(text)
|
|
||||||
|
|
||||||
transcription_results = {}
|
transcription_results = {}
|
||||||
class TranscribeParams(BaseModel):
|
class TranscribeParams(BaseModel):
|
||||||
|
@ -84,13 +79,13 @@ async def transcribe_endpoint(
|
||||||
|
|
||||||
|
|
||||||
async def transcribe_audio(file_path, params: TranscribeParams):
|
async def transcribe_audio(file_path, params: TranscribeParams):
|
||||||
debug(f"Transcribing audio file from {file_path}...")
|
l.debug(f"Transcribing audio file from {file_path}...")
|
||||||
file_path = await convert_to_wav(file_path)
|
file_path = await convert_to_wav(file_path)
|
||||||
model = params.model if params.model in WHISPER_CPP_MODELS else 'small'
|
model = params.model if params.model in WHISPER_CPP_MODELS else 'small'
|
||||||
model_path = WHISPER_CPP_DIR / 'models' / f'ggml-{model}.bin'
|
model_path = WHISPER_CPP_DIR / 'models' / f'ggml-{model}.bin'
|
||||||
command = [str(WHISPER_CPP_DIR / 'build' / 'bin' / 'main')]
|
command = [str(WHISPER_CPP_DIR / 'build' / 'bin' / 'main')]
|
||||||
command.extend(['-m', str(model_path)])
|
command.extend(['-m', str(model_path)])
|
||||||
command.extend(['-t', str(max(1, min(params.threads or MAX_CPU_CORES, MAX_CPU_CORES)))])
|
command.extend(['-t', str(max(1, min(params.threads or Sys.MAX_CPU_CORES, Sys.MAX_CPU_CORES)))])
|
||||||
command.extend(['-np'])
|
command.extend(['-np'])
|
||||||
|
|
||||||
if params.split_on_word:
|
if params.split_on_word:
|
||||||
|
@ -121,11 +116,11 @@ async def transcribe_audio(file_path, params: TranscribeParams):
|
||||||
command.extend(['--dtw', params.dtw])
|
command.extend(['--dtw', params.dtw])
|
||||||
|
|
||||||
command.extend(['-f', file_path])
|
command.extend(['-f', file_path])
|
||||||
debug(f"Command: {command}")
|
l.debug(f"Command: {command}")
|
||||||
|
|
||||||
# Create a unique ID for this transcription job
|
# Create a unique ID for this transcription job
|
||||||
job_id = str(uuid.uuid4())
|
job_id = str(uuid.uuid4())
|
||||||
debug(f"Created job ID: {job_id}")
|
l.debug(f"Created job ID: {job_id}")
|
||||||
|
|
||||||
# Store the job status
|
# Store the job status
|
||||||
transcription_results[job_id] = {"status": "processing", "result": None}
|
transcription_results[job_id] = {"status": "processing", "result": None}
|
||||||
|
@ -137,20 +132,20 @@ async def transcribe_audio(file_path, params: TranscribeParams):
|
||||||
poll_interval = 10 # 10 seconds
|
poll_interval = 10 # 10 seconds
|
||||||
start_time = asyncio.get_event_loop().time()
|
start_time = asyncio.get_event_loop().time()
|
||||||
|
|
||||||
debug(f"Starting to poll for job {job_id}")
|
l.debug(f"Starting to poll for job {job_id}")
|
||||||
try:
|
try:
|
||||||
while asyncio.get_event_loop().time() - start_time < max_wait_time:
|
while asyncio.get_event_loop().time() - start_time < max_wait_time:
|
||||||
job_status = transcription_results.get(job_id, {})
|
job_status = transcription_results.get(job_id, {})
|
||||||
debug(f"Current status for job {job_id}: {job_status['status']}")
|
l.debug(f"Current status for job {job_id}: {job_status['status']}")
|
||||||
if job_status["status"] == "completed":
|
if job_status["status"] == "completed":
|
||||||
info(f"Transcription completed for job {job_id}")
|
l.info(f"Transcription completed for job {job_id}")
|
||||||
return job_id # This is the only change
|
return job_id # This is the only change
|
||||||
elif job_status["status"] == "failed":
|
elif job_status["status"] == "failed":
|
||||||
err(f"Transcription failed for job {job_id}: {job_status.get('error', 'Unknown error')}")
|
l.error(f"Transcription failed for job {job_id}: {job_status.get('error', 'Unknown error')}")
|
||||||
raise Exception(f"Transcription failed: {job_status.get('error', 'Unknown error')}")
|
raise Exception(f"Transcription failed: {job_status.get('error', 'Unknown error')}")
|
||||||
await asyncio.sleep(poll_interval)
|
await asyncio.sleep(poll_interval)
|
||||||
|
|
||||||
err(f"Transcription timed out for job {job_id}")
|
l.error(f"Transcription timed out for job {job_id}")
|
||||||
raise TimeoutError("Transcription timed out")
|
raise TimeoutError("Transcription timed out")
|
||||||
finally:
|
finally:
|
||||||
# Ensure the task is cancelled if we exit the loop
|
# Ensure the task is cancelled if we exit the loop
|
||||||
|
@ -160,20 +155,20 @@ async def transcribe_audio(file_path, params: TranscribeParams):
|
||||||
|
|
||||||
async def process_transcription(command, file_path, job_id):
|
async def process_transcription(command, file_path, job_id):
|
||||||
try:
|
try:
|
||||||
debug(f"Starting transcription process for job {job_id}")
|
l.debug(f"Starting transcription process for job {job_id}")
|
||||||
result = await run_transcription(command, file_path)
|
result = await run_transcription(command, file_path)
|
||||||
transcription_results[job_id] = {"status": "completed", "result": result}
|
transcription_results[job_id] = {"status": "completed", "result": result}
|
||||||
debug(f"Transcription completed for job {job_id}")
|
l.debug(f"Transcription completed for job {job_id}")
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
err(f"Transcription failed for job {job_id}: {str(e)}")
|
l.error(f"Transcription failed for job {job_id}: {str(e)}")
|
||||||
transcription_results[job_id] = {"status": "failed", "error": str(e)}
|
transcription_results[job_id] = {"status": "failed", "error": str(e)}
|
||||||
finally:
|
finally:
|
||||||
# Clean up the temporary file
|
# Clean up the temporary file
|
||||||
os.remove(file_path)
|
os.remove(file_path)
|
||||||
debug(f"Cleaned up temporary file for job {job_id}")
|
l.debug(f"Cleaned up temporary file for job {job_id}")
|
||||||
|
|
||||||
async def run_transcription(command, file_path):
|
async def run_transcription(command, file_path):
|
||||||
debug(f"Running transcription command: {' '.join(command)}")
|
l.debug(f"Running transcription command: {' '.join(command)}")
|
||||||
proc = await asyncio.create_subprocess_exec(
|
proc = await asyncio.create_subprocess_exec(
|
||||||
*command,
|
*command,
|
||||||
stdout=asyncio.subprocess.PIPE,
|
stdout=asyncio.subprocess.PIPE,
|
||||||
|
@ -182,9 +177,9 @@ async def run_transcription(command, file_path):
|
||||||
stdout, stderr = await proc.communicate()
|
stdout, stderr = await proc.communicate()
|
||||||
if proc.returncode != 0:
|
if proc.returncode != 0:
|
||||||
error_message = f"Error running command: {stderr.decode()}"
|
error_message = f"Error running command: {stderr.decode()}"
|
||||||
err(error_message)
|
l.error(error_message)
|
||||||
raise Exception(error_message)
|
raise Exception(error_message)
|
||||||
debug("Transcription command completed successfully")
|
l.debug("Transcription command completed successfully")
|
||||||
return stdout.decode().strip()
|
return stdout.decode().strip()
|
||||||
|
|
||||||
async def convert_to_wav(file_path: str):
|
async def convert_to_wav(file_path: str):
|
||||||
|
|
|
@ -17,45 +17,42 @@ import threading
|
||||||
from typing import Dict, List, Any
|
from typing import Dict, List, Any
|
||||||
from datetime import datetime, timedelta
|
from datetime import datetime, timedelta
|
||||||
|
|
||||||
from sijapi import L, ICAL_TOGGLE, ICALENDARS, MS365_TOGGLE, MS365_CLIENT_ID, MS365_SECRET, MS365_AUTHORITY_URL, MS365_SCOPE, MS365_REDIRECT_PATH, MS365_TOKEN_PATH
|
from sijapi import ICAL_TOGGLE, ICALENDARS, MS365_TOGGLE, MS365_CLIENT_ID, MS365_SECRET, MS365_AUTHORITY_URL, MS365_SCOPE, MS365_REDIRECT_PATH, MS365_TOKEN_PATH
|
||||||
from sijapi.routers import gis
|
from sijapi.routers import gis
|
||||||
|
from sijapi.logs import get_logger
|
||||||
|
l = get_logger(__name__)
|
||||||
|
|
||||||
cal = APIRouter()
|
cal = APIRouter()
|
||||||
oauth2_scheme = OAuth2PasswordBearer(tokenUrl="/token")
|
oauth2_scheme = OAuth2PasswordBearer(tokenUrl="/token")
|
||||||
timeout = httpx.Timeout(12)
|
timeout = httpx.Timeout(12)
|
||||||
logger = L.get_module_logger("cal")
|
|
||||||
def debug(text: str): logger.debug(text)
|
|
||||||
def info(text: str): logger.info(text)
|
|
||||||
def warn(text: str): logger.warning(text)
|
|
||||||
def err(text: str): logger.error(text)
|
|
||||||
def crit(text: str): logger.critical(text)
|
|
||||||
|
|
||||||
if MS365_TOGGLE is True:
|
if MS365_TOGGLE is True:
|
||||||
crit(f"Visit https://api.sij.ai/o365/login to obtain your Microsoft 365 authentication token.")
|
l.critical(f"Visit https://api.sij.ai/o365/login to obtain your Microsoft 365 authentication token.")
|
||||||
|
|
||||||
@cal.get("/o365/login")
|
@cal.get("/o365/login")
|
||||||
async def login():
|
async def login():
|
||||||
debug(f"Received request to /o365/login")
|
l.debug(f"Received request to /o365/login")
|
||||||
debug(f"SCOPE: {MS365_SCOPE}")
|
l.debug(f"SCOPE: {MS365_SCOPE}")
|
||||||
if not MS365_SCOPE:
|
if not MS365_SCOPE:
|
||||||
err("No scopes defined for authorization.")
|
l.error("No scopes defined for authorization.")
|
||||||
raise HTTPException(
|
raise HTTPException(
|
||||||
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
|
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
|
||||||
detail="No scopes defined for authorization."
|
detail="No scopes defined for authorization."
|
||||||
)
|
)
|
||||||
authorization_url = f"{MS365_AUTHORITY_URL}/oauth2/v2.0/authorize?client_id={MS365_CLIENT_ID}&response_type=code&redirect_uri={MS365_REDIRECT_PATH}&scope={'+'.join(MS365_SCOPE)}"
|
authorization_url = f"{MS365_AUTHORITY_URL}/oauth2/v2.0/authorize?client_id={MS365_CLIENT_ID}&response_type=code&redirect_uri={MS365_REDIRECT_PATH}&scope={'+'.join(MS365_SCOPE)}"
|
||||||
info(f"Redirecting to authorization URL: {authorization_url}")
|
l.info(f"Redirecting to authorization URL: {authorization_url}")
|
||||||
return RedirectResponse(authorization_url)
|
return RedirectResponse(authorization_url)
|
||||||
|
|
||||||
@cal.get("/o365/oauth_redirect")
|
@cal.get("/o365/oauth_redirect")
|
||||||
async def oauth_redirect(code: str = None, error: str = None):
|
async def oauth_redirect(code: str = None, error: str = None):
|
||||||
debug(f"Received request to /o365/oauth_redirect")
|
l.debug(f"Received request to /o365/oauth_redirect")
|
||||||
if error:
|
if error:
|
||||||
err(f"OAuth2 Error: {error}")
|
l.error(f"OAuth2 Error: {error}")
|
||||||
raise HTTPException(
|
raise HTTPException(
|
||||||
status_code=status.HTTP_400_BAD_REQUEST, detail="OAuth2 Error"
|
status_code=status.HTTP_400_BAD_REQUEST, detail="OAuth2 Error"
|
||||||
)
|
)
|
||||||
info(f"Requesting token with authorization code: {code}")
|
l.info(f"Requesting token with authorization code: {code}")
|
||||||
token_url = f"{MS365_AUTHORITY_URL}/oauth2/v2.0/token"
|
token_url = f"{MS365_AUTHORITY_URL}/oauth2/v2.0/token"
|
||||||
data = {
|
data = {
|
||||||
"client_id": MS365_CLIENT_ID,
|
"client_id": MS365_CLIENT_ID,
|
||||||
|
@ -66,15 +63,15 @@ if MS365_TOGGLE is True:
|
||||||
}
|
}
|
||||||
async with httpx.AsyncClient(timeout=timeout) as client:
|
async with httpx.AsyncClient(timeout=timeout) as client:
|
||||||
response = await client.post(token_url, data=data)
|
response = await client.post(token_url, data=data)
|
||||||
debug(f"Token endpoint response status code: {response.status_code}")
|
l.debug(f"Token endpoint response status code: {response.status_code}")
|
||||||
info(f"Token endpoint response text: {response.text}")
|
l.info(f"Token endpoint response text: {response.text}")
|
||||||
result = response.json()
|
result = response.json()
|
||||||
if 'access_token' in result:
|
if 'access_token' in result:
|
||||||
await save_token(result)
|
await save_token(result)
|
||||||
info("Access token obtained successfully")
|
l.info("Access token obtained successfully")
|
||||||
return {"message": "Access token stored successfully"}
|
return {"message": "Access token stored successfully"}
|
||||||
else:
|
else:
|
||||||
crit(f"Failed to obtain access token. Response: {result}")
|
l.critical(f"Failed to obtain access token. Response: {result}")
|
||||||
raise HTTPException(
|
raise HTTPException(
|
||||||
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
|
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
|
||||||
detail="Failed to obtain access token"
|
detail="Failed to obtain access token"
|
||||||
|
@ -82,7 +79,7 @@ if MS365_TOGGLE is True:
|
||||||
|
|
||||||
@cal.get("/o365/me")
|
@cal.get("/o365/me")
|
||||||
async def read_items():
|
async def read_items():
|
||||||
debug(f"Received request to /o365/me")
|
l.debug(f"Received request to /o365/me")
|
||||||
token = await load_token()
|
token = await load_token()
|
||||||
if not token:
|
if not token:
|
||||||
raise HTTPException(
|
raise HTTPException(
|
||||||
|
@ -95,10 +92,10 @@ if MS365_TOGGLE is True:
|
||||||
response = await client.get(graph_url, headers=headers)
|
response = await client.get(graph_url, headers=headers)
|
||||||
if response.status_code == 200:
|
if response.status_code == 200:
|
||||||
user = response.json()
|
user = response.json()
|
||||||
info(f"User retrieved: {user}")
|
l.info(f"User retrieved: {user}")
|
||||||
return user
|
return user
|
||||||
else:
|
else:
|
||||||
err("Invalid or expired token")
|
l.error("Invalid or expired token")
|
||||||
raise HTTPException(
|
raise HTTPException(
|
||||||
status_code=status.HTTP_401_UNAUTHORIZED,
|
status_code=status.HTTP_401_UNAUTHORIZED,
|
||||||
detail="Invalid or expired token",
|
detail="Invalid or expired token",
|
||||||
|
@ -106,14 +103,14 @@ if MS365_TOGGLE is True:
|
||||||
)
|
)
|
||||||
|
|
||||||
async def save_token(token):
|
async def save_token(token):
|
||||||
debug(f"Saving token: {token}")
|
l.debug(f"Saving token: {token}")
|
||||||
try:
|
try:
|
||||||
token["expires_at"] = int(time.time()) + token["expires_in"]
|
token["expires_at"] = int(time.time()) + token["expires_in"]
|
||||||
with open(MS365_TOKEN_PATH, "w") as file:
|
with open(MS365_TOKEN_PATH, "w") as file:
|
||||||
json.dump(token, file)
|
json.dump(token, file)
|
||||||
debug(f"Saved token to {MS365_TOKEN_PATH}")
|
l.debug(f"Saved token to {MS365_TOKEN_PATH}")
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
err(f"Failed to save token: {e}")
|
l.error(f"Failed to save token: {e}")
|
||||||
|
|
||||||
async def load_token():
|
async def load_token():
|
||||||
if os.path.exists(MS365_TOKEN_PATH):
|
if os.path.exists(MS365_TOKEN_PATH):
|
||||||
|
@ -121,21 +118,21 @@ if MS365_TOGGLE is True:
|
||||||
with open(MS365_TOKEN_PATH, "r") as file:
|
with open(MS365_TOKEN_PATH, "r") as file:
|
||||||
token = json.load(file)
|
token = json.load(file)
|
||||||
except FileNotFoundError:
|
except FileNotFoundError:
|
||||||
err("Token file not found.")
|
l.error("Token file not found.")
|
||||||
return None
|
return None
|
||||||
except json.JSONDecodeError:
|
except json.JSONDecodeError:
|
||||||
err("Failed to decode token JSON")
|
l.error("Failed to decode token JSON")
|
||||||
return None
|
return None
|
||||||
|
|
||||||
if token:
|
if token:
|
||||||
token["expires_at"] = int(time.time()) + token["expires_in"]
|
token["expires_at"] = int(time.time()) + token["expires_in"]
|
||||||
debug(f"Loaded token: {token}") # Add this line to log the loaded token
|
l.debug(f"Loaded token: {token}") # Add this line to log the loaded token
|
||||||
return token
|
return token
|
||||||
else:
|
else:
|
||||||
debug("No token found.")
|
l.debug("No token found.")
|
||||||
return None
|
return None
|
||||||
else:
|
else:
|
||||||
err(f"No file found at {MS365_TOKEN_PATH}")
|
l.error(f"No file found at {MS365_TOKEN_PATH}")
|
||||||
return None
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
@ -165,39 +162,39 @@ if MS365_TOGGLE is True:
|
||||||
response = await client.post(token_url, data=data)
|
response = await client.post(token_url, data=data)
|
||||||
result = response.json()
|
result = response.json()
|
||||||
if "access_token" in result:
|
if "access_token" in result:
|
||||||
info("Access token refreshed successfully")
|
l.info("Access token refreshed successfully")
|
||||||
return result
|
return result
|
||||||
else:
|
else:
|
||||||
err("Failed to refresh access token")
|
l.error("Failed to refresh access token")
|
||||||
return None
|
return None
|
||||||
|
|
||||||
|
|
||||||
async def refresh_token():
|
async def refresh_token():
|
||||||
token = await load_token()
|
token = await load_token()
|
||||||
if not token:
|
if not token:
|
||||||
err("No token found in storage")
|
l.error("No token found in storage")
|
||||||
raise HTTPException(
|
raise HTTPException(
|
||||||
status_code=status.HTTP_401_UNAUTHORIZED,
|
status_code=status.HTTP_401_UNAUTHORIZED,
|
||||||
detail="No token found",
|
detail="No token found",
|
||||||
)
|
)
|
||||||
|
|
||||||
if 'refresh_token' not in token:
|
if 'refresh_token' not in token:
|
||||||
err("Refresh token not found in the loaded token")
|
l.error("Refresh token not found in the loaded token")
|
||||||
raise HTTPException(
|
raise HTTPException(
|
||||||
status_code=status.HTTP_401_UNAUTHORIZED,
|
status_code=status.HTTP_401_UNAUTHORIZED,
|
||||||
detail="Refresh token not found",
|
detail="Refresh token not found",
|
||||||
)
|
)
|
||||||
|
|
||||||
refresh_token = token['refresh_token']
|
refresh_token = token['refresh_token']
|
||||||
debug("Found refresh token, attempting to refresh access token")
|
l.debug("Found refresh token, attempting to refresh access token")
|
||||||
|
|
||||||
new_token = await get_new_token_with_refresh_token(refresh_token)
|
new_token = await get_new_token_with_refresh_token(refresh_token)
|
||||||
|
|
||||||
if new_token:
|
if new_token:
|
||||||
await save_token(new_token)
|
await save_token(new_token)
|
||||||
info("Token refreshed and saved successfully")
|
l.info("Token refreshed and saved successfully")
|
||||||
else:
|
else:
|
||||||
err("Failed to refresh token")
|
l.error("Failed to refresh token")
|
||||||
raise HTTPException(
|
raise HTTPException(
|
||||||
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
|
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
|
||||||
detail="Failed to refresh token",
|
detail="Failed to refresh token",
|
||||||
|
@ -218,7 +215,7 @@ if ICAL_TOGGLE is True:
|
||||||
calendar_identifiers = {
|
calendar_identifiers = {
|
||||||
calendar.title() : calendar.calendarIdentifier() for calendar in all_calendars
|
calendar.title() : calendar.calendarIdentifier() for calendar in all_calendars
|
||||||
}
|
}
|
||||||
debug(f"{calendar_identifiers}")
|
l.debug(f"{calendar_identifiers}")
|
||||||
return calendar_identifiers
|
return calendar_identifiers
|
||||||
|
|
||||||
def get_macos_calendar_events(start_date: datetime, end_date: datetime, calendar_ids: List[str] = None) -> List[Dict]:
|
def get_macos_calendar_events(start_date: datetime, end_date: datetime, calendar_ids: List[str] = None) -> List[Dict]:
|
||||||
|
@ -230,7 +227,7 @@ if ICAL_TOGGLE is True:
|
||||||
|
|
||||||
def completion_handler(granted, error):
|
def completion_handler(granted, error):
|
||||||
if error is not None:
|
if error is not None:
|
||||||
err(f"Error: {error}")
|
l.error(f"Error: {error}")
|
||||||
access_granted.append(granted)
|
access_granted.append(granted)
|
||||||
with access_granted_condition:
|
with access_granted_condition:
|
||||||
access_granted_condition.notify()
|
access_granted_condition.notify()
|
||||||
|
@ -242,11 +239,11 @@ if ICAL_TOGGLE is True:
|
||||||
if access_granted:
|
if access_granted:
|
||||||
return access_granted[0]
|
return access_granted[0]
|
||||||
else:
|
else:
|
||||||
err("Request access timed out or failed")
|
l.error("Request access timed out or failed")
|
||||||
return False
|
return False
|
||||||
|
|
||||||
if not request_access():
|
if not request_access():
|
||||||
err("Access to calendar data was not granted")
|
l.error("Access to calendar data was not granted")
|
||||||
return []
|
return []
|
||||||
|
|
||||||
ns_start_date = datetime_to_nsdate(start_date)
|
ns_start_date = datetime_to_nsdate(start_date)
|
||||||
|
@ -336,7 +333,7 @@ async def get_ms365_events(start_date: datetime, end_date: datetime):
|
||||||
response = await client.get(graph_url, headers=headers)
|
response = await client.get(graph_url, headers=headers)
|
||||||
|
|
||||||
if response.status_code != 200:
|
if response.status_code != 200:
|
||||||
err("Failed to retrieve events from Microsoft 365")
|
l.error("Failed to retrieve events from Microsoft 365")
|
||||||
raise HTTPException(
|
raise HTTPException(
|
||||||
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
|
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
|
||||||
detail="Failed to retrieve events",
|
detail="Failed to retrieve events",
|
||||||
|
@ -352,33 +349,33 @@ async def parse_calendar_for_day(range_start: datetime, range_end: datetime, eve
|
||||||
event_list = []
|
event_list = []
|
||||||
|
|
||||||
for event in events:
|
for event in events:
|
||||||
info(f"Event: {event}")
|
l.info(f"Event: {event}")
|
||||||
start_str = event.get('start')
|
start_str = event.get('start')
|
||||||
end_str = event.get('end')
|
end_str = event.get('end')
|
||||||
|
|
||||||
if isinstance(start_str, dict):
|
if isinstance(start_str, dict):
|
||||||
start_str = start_str.get('dateTime')
|
start_str = start_str.get('dateTime')
|
||||||
else:
|
else:
|
||||||
info(f"Start date string not a dict")
|
l.info(f"Start date string not a dict")
|
||||||
|
|
||||||
if isinstance(end_str, dict):
|
if isinstance(end_str, dict):
|
||||||
end_str = end_str.get('dateTime')
|
end_str = end_str.get('dateTime')
|
||||||
else:
|
else:
|
||||||
info(f"End date string not a dict")
|
l.info(f"End date string not a dict")
|
||||||
|
|
||||||
try:
|
try:
|
||||||
start_date = await gis.dt(start_str) if start_str else None
|
start_date = await gis.dt(start_str) if start_str else None
|
||||||
except (ValueError, TypeError) as e:
|
except (ValueError, TypeError) as e:
|
||||||
err(f"Invalid start date format: {start_str}, error: {e}")
|
l.error(f"Invalid start date format: {start_str}, error: {e}")
|
||||||
continue
|
continue
|
||||||
|
|
||||||
try:
|
try:
|
||||||
end_date = await gis.dt(end_str) if end_str else None
|
end_date = await gis.dt(end_str) if end_str else None
|
||||||
except (ValueError, TypeError) as e:
|
except (ValueError, TypeError) as e:
|
||||||
err(f"Invalid end date format: {end_str}, error: {e}")
|
l.error(f"Invalid end date format: {end_str}, error: {e}")
|
||||||
continue
|
continue
|
||||||
|
|
||||||
debug(f"Comparing {start_date} with range {range_start} to {range_end}")
|
l.debug(f"Comparing {start_date} with range {range_start} to {range_end}")
|
||||||
|
|
||||||
if start_date:
|
if start_date:
|
||||||
# Ensure start_date is timezone-aware
|
# Ensure start_date is timezone-aware
|
||||||
|
@ -410,11 +407,11 @@ async def parse_calendar_for_day(range_start: datetime, range_end: datetime, eve
|
||||||
"busy": event.get('showAs', '') in ['busy', 'tentative'],
|
"busy": event.get('showAs', '') in ['busy', 'tentative'],
|
||||||
"all_day": event.get('isAllDay', False)
|
"all_day": event.get('isAllDay', False)
|
||||||
}
|
}
|
||||||
info(f"Event_data: {event_data}")
|
l.info(f"Event_data: {event_data}")
|
||||||
event_list.append(event_data)
|
event_list.append(event_data)
|
||||||
else:
|
else:
|
||||||
debug(f"Event outside of specified range: {start_date} to {end_date}")
|
l.debug(f"Event outside of specified range: {start_date} to {end_date}")
|
||||||
else:
|
else:
|
||||||
err(f"Invalid or missing start date for event: {event.get('id', 'Unknown ID')}")
|
l.error(f"Invalid or missing start date for event: {event.get('id', 'Unknown ID')}")
|
||||||
|
|
||||||
return event_list
|
return event_list
|
|
@ -7,19 +7,15 @@ from fastapi import APIRouter, HTTPException
|
||||||
from pydantic import BaseModel
|
from pydantic import BaseModel
|
||||||
from fastapi.responses import PlainTextResponse, JSONResponse
|
from fastapi.responses import PlainTextResponse, JSONResponse
|
||||||
from typing import Optional
|
from typing import Optional
|
||||||
from sijapi import L, CF_TOKEN, CADDYFILE_PATH, CF_API_BASE_URL, CF_IP
|
|
||||||
import httpx
|
import httpx
|
||||||
import asyncio
|
import asyncio
|
||||||
from asyncio import sleep
|
from asyncio import sleep
|
||||||
import os
|
import os
|
||||||
|
from sijapi import CF_TOKEN, CADDYFILE_PATH, CF_API_BASE_URL, CF_IP
|
||||||
|
from sijapi.logs import get_logger
|
||||||
|
l = get_logger(__name__)
|
||||||
|
|
||||||
cf = APIRouter()
|
cf = APIRouter()
|
||||||
logger = L.get_module_logger("cal")
|
|
||||||
def debug(text: str): logger.debug(text)
|
|
||||||
def info(text: str): logger.info(text)
|
|
||||||
def warn(text: str): logger.warning(text)
|
|
||||||
def err(text: str): logger.error(text)
|
|
||||||
def crit(text: str): logger.critical(text)
|
|
||||||
|
|
||||||
class DNSRecordRequest(BaseModel):
|
class DNSRecordRequest(BaseModel):
|
||||||
full_domain: str
|
full_domain: str
|
||||||
|
@ -77,7 +73,7 @@ async def retry_request(url, headers, max_retries=5, backoff_factor=1):
|
||||||
response.raise_for_status()
|
response.raise_for_status()
|
||||||
return response
|
return response
|
||||||
except (httpx.HTTPError, httpx.ConnectTimeout) as e:
|
except (httpx.HTTPError, httpx.ConnectTimeout) as e:
|
||||||
err(f"Request failed: {e}. Retrying {retry + 1}/{max_retries}...")
|
l.error(f"Request failed: {e}. Retrying {retry + 1}/{max_retries}...")
|
||||||
await sleep(backoff_factor * (2 ** retry))
|
await sleep(backoff_factor * (2 ** retry))
|
||||||
raise HTTPException(status_code=500, detail="Max retries exceeded for Cloudflare API request")
|
raise HTTPException(status_code=500, detail="Max retries exceeded for Cloudflare API request")
|
||||||
|
|
||||||
|
|
|
@ -22,19 +22,15 @@ import ssl
|
||||||
import yaml
|
import yaml
|
||||||
from typing import List, Dict, Optional, Set
|
from typing import List, Dict, Optional, Set
|
||||||
from datetime import datetime as dt_datetime
|
from datetime import datetime as dt_datetime
|
||||||
from sijapi import L, Dir, EMAIL_CONFIG, EMAIL_LOGS
|
from sijapi import Dir, Tts, EMAIL_CONFIG, EMAIL_LOGS
|
||||||
from sijapi.routers import gis, img, tts, llm
|
from sijapi.routers import gis, img, tts, llm
|
||||||
from sijapi.utilities import clean_text, assemble_journal_path, extract_text, prefix_lines
|
from sijapi.utilities import clean_text, assemble_journal_path, extract_text, prefix_lines
|
||||||
from sijapi.classes import EmailAccount, IMAPConfig, SMTPConfig, IncomingEmail, EmailContact, AutoResponder
|
from sijapi.classes import EmailAccount, IMAPConfig, SMTPConfig, IncomingEmail, EmailContact, AutoResponder
|
||||||
|
from sijapi.logs import get_logger
|
||||||
|
l = get_logger(__name__)
|
||||||
|
|
||||||
email = APIRouter()
|
email = APIRouter()
|
||||||
|
|
||||||
logger = L.get_module_logger("email")
|
|
||||||
def debug(text: str): logger.debug(text)
|
|
||||||
def info(text: str): logger.info(text)
|
|
||||||
def warn(text: str): logger.warning(text)
|
|
||||||
def err(text: str): logger.error(text)
|
|
||||||
def crit(text: str): logger.critical(text)
|
|
||||||
|
|
||||||
def load_email_accounts(yaml_path: str) -> List[EmailAccount]:
|
def load_email_accounts(yaml_path: str) -> List[EmailAccount]:
|
||||||
with open(yaml_path, 'r') as file:
|
with open(yaml_path, 'r') as file:
|
||||||
|
@ -60,36 +56,36 @@ def get_smtp_connection(autoresponder: AutoResponder):
|
||||||
|
|
||||||
if smtp_config.encryption == 'SSL':
|
if smtp_config.encryption == 'SSL':
|
||||||
try:
|
try:
|
||||||
debug(f"Attempting SSL connection to {smtp_config.host}:{smtp_config.port}")
|
l.debug(f"Attempting SSL connection to {smtp_config.host}:{smtp_config.port}")
|
||||||
return SMTP_SSL(smtp_config.host, smtp_config.port, context=context)
|
return SMTP_SSL(smtp_config.host, smtp_config.port, context=context)
|
||||||
except ssl.SSLError as e:
|
except ssl.SSLError as e:
|
||||||
err(f"SSL connection failed: {str(e)}")
|
l.error(f"SSL connection failed: {str(e)}")
|
||||||
# If SSL fails, try TLS
|
# If SSL fails, try TLS
|
||||||
try:
|
try:
|
||||||
debug(f"Attempting STARTTLS connection to {smtp_config.host}:{smtp_config.port}")
|
l.debug(f"Attempting STARTTLS connection to {smtp_config.host}:{smtp_config.port}")
|
||||||
smtp = SMTP(smtp_config.host, smtp_config.port)
|
smtp = SMTP(smtp_config.host, smtp_config.port)
|
||||||
smtp.starttls(context=context)
|
smtp.starttls(context=context)
|
||||||
return smtp
|
return smtp
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
err(f"STARTTLS connection failed: {str(e)}")
|
l.error(f"STARTTLS connection failed: {str(e)}")
|
||||||
raise
|
raise
|
||||||
|
|
||||||
elif smtp_config.encryption == 'STARTTLS':
|
elif smtp_config.encryption == 'STARTTLS':
|
||||||
try:
|
try:
|
||||||
debug(f"Attempting STARTTLS connection to {smtp_config.host}:{smtp_config.port}")
|
l.debug(f"Attempting STARTTLS connection to {smtp_config.host}:{smtp_config.port}")
|
||||||
smtp = SMTP(smtp_config.host, smtp_config.port)
|
smtp = SMTP(smtp_config.host, smtp_config.port)
|
||||||
smtp.starttls(context=context)
|
smtp.starttls(context=context)
|
||||||
return smtp
|
return smtp
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
err(f"STARTTLS connection failed: {str(e)}")
|
l.error(f"STARTTLS connection failed: {str(e)}")
|
||||||
raise
|
raise
|
||||||
|
|
||||||
else:
|
else:
|
||||||
try:
|
try:
|
||||||
debug(f"Attempting unencrypted connection to {smtp_config.host}:{smtp_config.port}")
|
l.debug(f"Attempting unencrypted connection to {smtp_config.host}:{smtp_config.port}")
|
||||||
return SMTP(smtp_config.host, smtp_config.port)
|
return SMTP(smtp_config.host, smtp_config.port)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
err(f"Unencrypted connection failed: {str(e)}")
|
l.error(f"Unencrypted connection failed: {str(e)}")
|
||||||
raise
|
raise
|
||||||
|
|
||||||
async def send_response(to_email: str, subject: str, body: str, profile: AutoResponder, image_attachment: Path = None) -> bool:
|
async def send_response(to_email: str, subject: str, body: str, profile: AutoResponder, image_attachment: Path = None) -> bool:
|
||||||
|
@ -106,20 +102,20 @@ async def send_response(to_email: str, subject: str, body: str, profile: AutoRes
|
||||||
img = MIMEImage(img_file.read(), name=os.path.basename(image_attachment))
|
img = MIMEImage(img_file.read(), name=os.path.basename(image_attachment))
|
||||||
message.attach(img)
|
message.attach(img)
|
||||||
|
|
||||||
debug(f"Sending auto-response to {to_email} concerning {subject} from account {profile.name}...")
|
l.debug(f"Sending auto-response to {to_email} concerning {subject} from account {profile.name}...")
|
||||||
|
|
||||||
server = get_smtp_connection(profile)
|
server = get_smtp_connection(profile)
|
||||||
debug(f"SMTP connection established: {type(server)}")
|
l.debug(f"SMTP connection established: {type(server)}")
|
||||||
server.login(profile.smtp.username, profile.smtp.password)
|
server.login(profile.smtp.username, profile.smtp.password)
|
||||||
server.send_message(message)
|
server.send_message(message)
|
||||||
|
|
||||||
info(f"Auto-response sent to {to_email} concerning {subject} from account {profile.name}!")
|
l.info(f"Auto-response sent to {to_email} concerning {subject} from account {profile.name}!")
|
||||||
return True
|
return True
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
err(f"Error in preparing/sending auto-response from account {profile.name}: {str(e)}")
|
l.error(f"Error in preparing/sending auto-response from account {profile.name}: {str(e)}")
|
||||||
err(f"SMTP details - Host: {profile.smtp.host}, Port: {profile.smtp.port}, Encryption: {profile.smtp.encryption}")
|
l.error(f"SMTP details - Host: {profile.smtp.host}, Port: {profile.smtp.port}, Encryption: {profile.smtp.encryption}")
|
||||||
err(traceback.format_exc())
|
l.error(traceback.format_exc())
|
||||||
return False
|
return False
|
||||||
|
|
||||||
finally:
|
finally:
|
||||||
|
@ -127,7 +123,7 @@ async def send_response(to_email: str, subject: str, body: str, profile: AutoRes
|
||||||
try:
|
try:
|
||||||
server.quit()
|
server.quit()
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
err(f"Error closing SMTP connection: {str(e)}")
|
l.error(f"Error closing SMTP connection: {str(e)}")
|
||||||
|
|
||||||
|
|
||||||
def clean_email_content(html_content):
|
def clean_email_content(html_content):
|
||||||
|
@ -163,10 +159,10 @@ async def process_account_archival(account: EmailAccount):
|
||||||
while True:
|
while True:
|
||||||
try:
|
try:
|
||||||
processed_uids = await load_processed_uids(summarized_log)
|
processed_uids = await load_processed_uids(summarized_log)
|
||||||
debug(f"{len(processed_uids)} emails marked as already summarized are being ignored.")
|
l.debug(f"{len(processed_uids)} emails marked as already summarized are being ignored.")
|
||||||
with get_imap_connection(account) as inbox:
|
with get_imap_connection(account) as inbox:
|
||||||
unread_messages = inbox.messages(unread=True)
|
unread_messages = inbox.messages(unread=True)
|
||||||
debug(f"There are {len(unread_messages)} unread messages.")
|
l.debug(f"There are {len(unread_messages)} unread messages.")
|
||||||
for uid, message in unread_messages:
|
for uid, message in unread_messages:
|
||||||
uid_str = uid.decode() if isinstance(uid, bytes) else str(uid)
|
uid_str = uid.decode() if isinstance(uid, bytes) else str(uid)
|
||||||
if uid_str not in processed_uids:
|
if uid_str not in processed_uids:
|
||||||
|
@ -186,13 +182,13 @@ async def process_account_archival(account: EmailAccount):
|
||||||
save_success = await save_email(md_path, md_content)
|
save_success = await save_email(md_path, md_content)
|
||||||
if save_success:
|
if save_success:
|
||||||
await save_processed_uid(summarized_log, account.name, uid_str)
|
await save_processed_uid(summarized_log, account.name, uid_str)
|
||||||
info(f"Summarized email: {uid_str}")
|
l.info(f"Summarized email: {uid_str}")
|
||||||
else:
|
else:
|
||||||
warn(f"Failed to summarize {this_email.subject}")
|
l.warning(f"Failed to summarize {this_email.subject}")
|
||||||
# else:
|
# else:
|
||||||
# debug(f"Skipping {uid_str} because it was already processed.")
|
# l.debug(f"Skipping {uid_str} because it was already processed.")
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
err(f"An error occurred during summarization for account {account.name}: {e}")
|
l.error(f"An error occurred during summarization for account {account.name}: {e}")
|
||||||
|
|
||||||
await asyncio.sleep(account.refresh)
|
await asyncio.sleep(account.refresh)
|
||||||
|
|
||||||
|
@ -240,7 +236,7 @@ tags:
|
||||||
return markdown_content
|
return markdown_content
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
err(f"Exception: {e}")
|
l.error(f"Exception: {e}")
|
||||||
return False
|
return False
|
||||||
|
|
||||||
|
|
||||||
|
@ -249,15 +245,15 @@ async def save_email(md_path, md_content):
|
||||||
with open(md_path, 'w', encoding='utf-8') as md_file:
|
with open(md_path, 'w', encoding='utf-8') as md_file:
|
||||||
md_file.write(md_content)
|
md_file.write(md_content)
|
||||||
|
|
||||||
debug(f"Saved markdown to {md_path}")
|
l.debug(f"Saved markdown to {md_path}")
|
||||||
return True
|
return True
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
err(f"Failed to save email: {e}")
|
l.error(f"Failed to save email: {e}")
|
||||||
return False
|
return False
|
||||||
|
|
||||||
|
|
||||||
def get_matching_autoresponders(this_email: IncomingEmail, account: EmailAccount) -> List[AutoResponder]:
|
def get_matching_autoresponders(this_email: IncomingEmail, account: EmailAccount) -> List[AutoResponder]:
|
||||||
debug(f"Called get_matching_autoresponders for email \"{this_email.subject},\" account name \"{account.name}\"")
|
l.debug(f"Called get_matching_autoresponders for email \"{this_email.subject},\" account name \"{account.name}\"")
|
||||||
def matches_list(item: str, this_email: IncomingEmail) -> bool:
|
def matches_list(item: str, this_email: IncomingEmail) -> bool:
|
||||||
if '@' in item:
|
if '@' in item:
|
||||||
return item in this_email.sender
|
return item in this_email.sender
|
||||||
|
@ -268,12 +264,12 @@ def get_matching_autoresponders(this_email: IncomingEmail, account: EmailAccount
|
||||||
whitelist_match = not profile.whitelist or any(matches_list(item, this_email) for item in profile.whitelist)
|
whitelist_match = not profile.whitelist or any(matches_list(item, this_email) for item in profile.whitelist)
|
||||||
blacklist_match = any(matches_list(item, this_email) for item in profile.blacklist)
|
blacklist_match = any(matches_list(item, this_email) for item in profile.blacklist)
|
||||||
if whitelist_match and not blacklist_match:
|
if whitelist_match and not blacklist_match:
|
||||||
debug(f"We have a match for {whitelist_match} and no blacklist matches.")
|
l.debug(f"We have a match for {whitelist_match} and no blacklist matches.")
|
||||||
matching_profiles.append(profile)
|
matching_profiles.append(profile)
|
||||||
elif whitelist_match and blacklist_match:
|
elif whitelist_match and blacklist_match:
|
||||||
debug(f"Matched whitelist for {whitelist_match}, but also matched blacklist for {blacklist_match}")
|
l.debug(f"Matched whitelist for {whitelist_match}, but also matched blacklist for {blacklist_match}")
|
||||||
else:
|
else:
|
||||||
debug(f"No whitelist or blacklist matches.")
|
l.debug(f"No whitelist or blacklist matches.")
|
||||||
return matching_profiles
|
return matching_profiles
|
||||||
|
|
||||||
|
|
||||||
|
@ -284,31 +280,31 @@ async def process_account_autoresponding(account: EmailAccount):
|
||||||
while True:
|
while True:
|
||||||
try:
|
try:
|
||||||
processed_uids = await load_processed_uids(EMAIL_AUTORESPONSE_LOG)
|
processed_uids = await load_processed_uids(EMAIL_AUTORESPONSE_LOG)
|
||||||
debug(f"{len(processed_uids)} emails marked as already responded to are being ignored.")
|
l.debug(f"{len(processed_uids)} emails marked as already responded to are being ignored.")
|
||||||
|
|
||||||
with get_imap_connection(account) as inbox:
|
with get_imap_connection(account) as inbox:
|
||||||
unread_messages = inbox.messages(unread=True)
|
unread_messages = inbox.messages(unread=True)
|
||||||
debug(f"There are {len(unread_messages)} unread messages.")
|
l.debug(f"There are {len(unread_messages)} unread messages.")
|
||||||
|
|
||||||
for uid, message in unread_messages:
|
for uid, message in unread_messages:
|
||||||
uid_str = uid.decode() if isinstance(uid, bytes) else str(uid)
|
uid_str = uid.decode() if isinstance(uid, bytes) else str(uid)
|
||||||
if uid_str not in processed_uids:
|
if uid_str not in processed_uids:
|
||||||
await autorespond_single_email(message, uid_str, account, EMAIL_AUTORESPONSE_LOG)
|
await autorespond_single_email(message, uid_str, account, EMAIL_AUTORESPONSE_LOG)
|
||||||
else:
|
else:
|
||||||
debug(f"Skipping {uid_str} because it was already processed.")
|
l.debug(f"Skipping {uid_str} because it was already processed.")
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
err(f"An error occurred during auto-responding for account {account.name}: {e}")
|
l.error(f"An error occurred during auto-responding for account {account.name}: {e}")
|
||||||
|
|
||||||
await asyncio.sleep(account.refresh)
|
await asyncio.sleep(account.refresh)
|
||||||
|
|
||||||
|
|
||||||
async def autorespond_single_email(message, uid_str: str, account: EmailAccount, log_file: Path):
|
async def autorespond_single_email(message, uid_str: str, account: EmailAccount, log_file: Path):
|
||||||
this_email = await create_incoming_email(message)
|
this_email = await create_incoming_email(message)
|
||||||
debug(f"Evaluating {this_email.subject} for autoresponse-worthiness...")
|
l.debug(f"Evaluating {this_email.subject} for autoresponse-worthiness...")
|
||||||
|
|
||||||
matching_profiles = get_matching_autoresponders(this_email, account)
|
matching_profiles = get_matching_autoresponders(this_email, account)
|
||||||
debug(f"Matching profiles: {matching_profiles}")
|
l.debug(f"Matching profiles: {matching_profiles}")
|
||||||
|
|
||||||
for profile in matching_profiles:
|
for profile in matching_profiles:
|
||||||
response_body = await generate_response(this_email, profile, account)
|
response_body = await generate_response(this_email, profile, account)
|
||||||
|
@ -318,16 +314,16 @@ async def autorespond_single_email(message, uid_str: str, account: EmailAccount,
|
||||||
jpg_path = await img.workflow(profile.image_prompt, earlyout=False, downscale_to_fit=True) if profile.image_prompt else None
|
jpg_path = await img.workflow(profile.image_prompt, earlyout=False, downscale_to_fit=True) if profile.image_prompt else None
|
||||||
success = await send_response(this_email.sender, subject, response_body, profile, jpg_path)
|
success = await send_response(this_email.sender, subject, response_body, profile, jpg_path)
|
||||||
if success:
|
if success:
|
||||||
warn(f"Auto-responded to email: {this_email.subject}")
|
l.warning(f"Auto-responded to email: {this_email.subject}")
|
||||||
await save_processed_uid(log_file, account.name, uid_str)
|
await save_processed_uid(log_file, account.name, uid_str)
|
||||||
else:
|
else:
|
||||||
warn(f"Failed to send auto-response to {this_email.subject}")
|
l.warning(f"Failed to send auto-response to {this_email.subject}")
|
||||||
else:
|
else:
|
||||||
warn(f"Unable to generate auto-response for {this_email.subject}")
|
l.warning(f"Unable to generate auto-response for {this_email.subject}")
|
||||||
|
|
||||||
|
|
||||||
async def generate_response(this_email: IncomingEmail, profile: AutoResponder, account: EmailAccount) -> Optional[str]:
|
async def generate_response(this_email: IncomingEmail, profile: AutoResponder, account: EmailAccount) -> Optional[str]:
|
||||||
info(f"Generating auto-response to {this_email.subject} with profile: {profile.name}")
|
l.info(f"Generating auto-response to {this_email.subject} with profile: {profile.name}")
|
||||||
|
|
||||||
now = await gis.dt(dt_datetime.now())
|
now = await gis.dt(dt_datetime.now())
|
||||||
then = await gis.dt(this_email.datetime_received)
|
then = await gis.dt(this_email.datetime_received)
|
||||||
|
@ -345,7 +341,7 @@ Respond on behalf of {account.fullname}, who is unable to respond personally bec
|
||||||
|
|
||||||
try:
|
try:
|
||||||
response = await llm.query_ollama(usr_prompt, sys_prompt, profile.ollama_model, 400)
|
response = await llm.query_ollama(usr_prompt, sys_prompt, profile.ollama_model, 400)
|
||||||
debug(f"query_ollama response: {response}")
|
l.debug(f"query_ollama response: {response}")
|
||||||
|
|
||||||
if isinstance(response, dict) and "message" in response and "content" in response["message"]:
|
if isinstance(response, dict) and "message" in response and "content" in response["message"]:
|
||||||
response = response["message"]["content"]
|
response = response["message"]["content"]
|
||||||
|
@ -353,7 +349,7 @@ Respond on behalf of {account.fullname}, who is unable to respond personally bec
|
||||||
return response + "\n\n"
|
return response + "\n\n"
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
err(f"Error generating auto-response: {str(e)}")
|
l.error(f"Error generating auto-response: {str(e)}")
|
||||||
return None
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -33,29 +33,25 @@ from selenium.webdriver.common.by import By
|
||||||
from selenium.webdriver.support.ui import WebDriverWait
|
from selenium.webdriver.support.ui import WebDriverWait
|
||||||
from selenium.webdriver.support import expected_conditions as EC
|
from selenium.webdriver.support import expected_conditions as EC
|
||||||
from sijapi import (
|
from sijapi import (
|
||||||
L, API, Serve, LOGS_DIR, TS_ID, CASETABLE_PATH, COURTLISTENER_DOCKETS_URL, COURTLISTENER_API_KEY,
|
Sys, Serve, LOGS_DIR, TS_ID, CASETABLE_PATH, COURTLISTENER_DOCKETS_URL, COURTLISTENER_API_KEY,
|
||||||
COURTLISTENER_BASE_URL, COURTLISTENER_DOCKETS_DIR, COURTLISTENER_SEARCH_DIR, ALERTS_DIR,
|
COURTLISTENER_BASE_URL, COURTLISTENER_DOCKETS_DIR, COURTLISTENER_SEARCH_DIR, ALERTS_DIR,
|
||||||
MAC_UN, MAC_PW, MAC_ID, TS_TAILNET, IMG_DIR, PUBLIC_KEY, OBSIDIAN_VAULT_DIR
|
MAC_UN, MAC_PW, MAC_ID, TS_TAILNET, IMG_DIR, PUBLIC_KEY, OBSIDIAN_VAULT_DIR
|
||||||
)
|
)
|
||||||
from sijapi.classes import WidgetUpdate
|
from sijapi.classes import WidgetUpdate
|
||||||
from sijapi.utilities import bool_convert, sanitize_filename, assemble_journal_path
|
from sijapi.utilities import bool_convert, sanitize_filename, assemble_journal_path
|
||||||
from sijapi.routers import gis
|
from sijapi.routers import gis
|
||||||
|
from sijapi.logs import get_logger
|
||||||
|
l = get_logger(__name__)
|
||||||
|
|
||||||
forward = APIRouter()
|
forward = APIRouter()
|
||||||
|
|
||||||
logger = L.get_module_logger("email")
|
|
||||||
def debug(text: str): logger.debug(text)
|
|
||||||
def info(text: str): logger.info(text)
|
|
||||||
def warn(text: str): logger.warning(text)
|
|
||||||
def err(text: str): logger.error(text)
|
|
||||||
def crit(text: str): logger.critical(text)
|
|
||||||
|
|
||||||
async def forward_traffic(reader: asyncio.StreamReader, writer: asyncio.StreamWriter, destination: str):
|
async def forward_traffic(reader: asyncio.StreamReader, writer: asyncio.StreamWriter, destination: str):
|
||||||
try:
|
try:
|
||||||
dest_host, dest_port = destination.split(':')
|
dest_host, dest_port = destination.split(':')
|
||||||
dest_port = int(dest_port)
|
dest_port = int(dest_port)
|
||||||
except ValueError:
|
except ValueError:
|
||||||
warn(f"Invalid destination format: {destination}. Expected 'host:port'.")
|
l.warning(f"Invalid destination format: {destination}. Expected 'host:port'.")
|
||||||
writer.close()
|
writer.close()
|
||||||
await writer.wait_closed()
|
await writer.wait_closed()
|
||||||
return
|
return
|
||||||
|
@ -63,7 +59,7 @@ async def forward_traffic(reader: asyncio.StreamReader, writer: asyncio.StreamWr
|
||||||
try:
|
try:
|
||||||
dest_reader, dest_writer = await asyncio.open_connection(dest_host, dest_port)
|
dest_reader, dest_writer = await asyncio.open_connection(dest_host, dest_port)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
warn(f"Failed to connect to destination {destination}: {str(e)}")
|
l.warning(f"Failed to connect to destination {destination}: {str(e)}")
|
||||||
writer.close()
|
writer.close()
|
||||||
await writer.wait_closed()
|
await writer.wait_closed()
|
||||||
return
|
return
|
||||||
|
@ -77,7 +73,7 @@ async def forward_traffic(reader: asyncio.StreamReader, writer: asyncio.StreamWr
|
||||||
dst.write(data)
|
dst.write(data)
|
||||||
await dst.drain()
|
await dst.drain()
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
warn(f"Error in forwarding: {str(e)}")
|
l.warning(f"Error in forwarding: {str(e)}")
|
||||||
finally:
|
finally:
|
||||||
dst.close()
|
dst.close()
|
||||||
await dst.wait_closed()
|
await dst.wait_closed()
|
||||||
|
@ -110,7 +106,7 @@ async def start_port_forwarding():
|
||||||
for rule in Serve.forwarding_rules:
|
for rule in Serve.forwarding_rules:
|
||||||
asyncio.create_task(start_server(rule.source, rule.destination))
|
asyncio.create_task(start_server(rule.source, rule.destination))
|
||||||
else:
|
else:
|
||||||
warn("No forwarding rules found in the configuration.")
|
l.warning("No forwarding rules found in the configuration.")
|
||||||
|
|
||||||
|
|
||||||
@forward.get("/forward_status")
|
@forward.get("/forward_status")
|
||||||
|
|
|
@ -11,6 +11,8 @@ import json
|
||||||
import yaml
|
import yaml
|
||||||
import jwt
|
import jwt
|
||||||
from sijapi import GHOST_API_KEY, GHOST_API_URL
|
from sijapi import GHOST_API_KEY, GHOST_API_URL
|
||||||
|
from sijapi.logs import get_logger
|
||||||
|
l = get_logger(__name__)
|
||||||
|
|
||||||
ghost = APIRouter()
|
ghost = APIRouter()
|
||||||
|
|
||||||
|
|
|
@ -16,17 +16,14 @@ from folium.plugins import Fullscreen, MiniMap, MousePosition, Geocoder, Draw, M
|
||||||
from zoneinfo import ZoneInfo
|
from zoneinfo import ZoneInfo
|
||||||
from dateutil.parser import parse as dateutil_parse
|
from dateutil.parser import parse as dateutil_parse
|
||||||
from typing import Optional, List, Union
|
from typing import Optional, List, Union
|
||||||
from sijapi import L, API, Db, TZ, GEO
|
from sijapi import Sys, Db, TZ, GEO
|
||||||
from sijapi.classes import Location
|
from sijapi.classes import Location
|
||||||
from sijapi.utilities import haversine, assemble_journal_path, json_serial
|
from sijapi.utilities import haversine, assemble_journal_path
|
||||||
|
from sijapi.serialization import json_dumps
|
||||||
|
from sijapi.logs import get_logger
|
||||||
|
l = get_logger(__name__)
|
||||||
|
|
||||||
gis = APIRouter()
|
gis = APIRouter()
|
||||||
logger = L.get_module_logger("gis")
|
|
||||||
def debug(text: str): logger.debug(text)
|
|
||||||
def info(text: str): logger.info(text)
|
|
||||||
def warn(text: str): logger.warning(text)
|
|
||||||
def err(text: str): logger.error(text)
|
|
||||||
def crit(text: str): logger.critical(text)
|
|
||||||
|
|
||||||
async def dt(
|
async def dt(
|
||||||
date_time: Union[str, int, datetime],
|
date_time: Union[str, int, datetime],
|
||||||
|
@ -36,12 +33,12 @@ async def dt(
|
||||||
# Convert integer (epoch time) to UTC datetime
|
# Convert integer (epoch time) to UTC datetime
|
||||||
if isinstance(date_time, int):
|
if isinstance(date_time, int):
|
||||||
date_time = datetime.fromtimestamp(date_time, tz=timezone.utc)
|
date_time = datetime.fromtimestamp(date_time, tz=timezone.utc)
|
||||||
debug(f"Converted epoch time {date_time} to UTC datetime object.")
|
l.debug(f"Converted epoch time {date_time} to UTC datetime object.")
|
||||||
|
|
||||||
# Convert string to datetime if necessary
|
# Convert string to datetime if necessary
|
||||||
elif isinstance(date_time, str):
|
elif isinstance(date_time, str):
|
||||||
date_time = dateutil_parse(date_time)
|
date_time = dateutil_parse(date_time)
|
||||||
debug(f"Converted string '{date_time}' to datetime object.")
|
l.debug(f"Converted string '{date_time}' to datetime object.")
|
||||||
|
|
||||||
if not isinstance(date_time, datetime):
|
if not isinstance(date_time, datetime):
|
||||||
raise ValueError(f"Input must be a string, integer (epoch time), or datetime object. What we received: {date_time}, type {type(date_time)}")
|
raise ValueError(f"Input must be a string, integer (epoch time), or datetime object. What we received: {date_time}, type {type(date_time)}")
|
||||||
|
@ -49,7 +46,7 @@ async def dt(
|
||||||
# Ensure the datetime is timezone-aware (UTC if not specified)
|
# Ensure the datetime is timezone-aware (UTC if not specified)
|
||||||
if date_time.tzinfo is None:
|
if date_time.tzinfo is None:
|
||||||
date_time = date_time.replace(tzinfo=timezone.utc)
|
date_time = date_time.replace(tzinfo=timezone.utc)
|
||||||
debug("Added UTC timezone to naive datetime.")
|
l.debug("Added UTC timezone to naive datetime.")
|
||||||
|
|
||||||
# Handle provided timezone
|
# Handle provided timezone
|
||||||
if tz is not None:
|
if tz is not None:
|
||||||
|
@ -57,12 +54,12 @@ async def dt(
|
||||||
if tz == "local":
|
if tz == "local":
|
||||||
last_loc = await get_timezone_without_timezone(date_time)
|
last_loc = await get_timezone_without_timezone(date_time)
|
||||||
tz = await GEO.tz_at(last_loc.latitude, last_loc.longitude)
|
tz = await GEO.tz_at(last_loc.latitude, last_loc.longitude)
|
||||||
debug(f"Using local timezone: {tz}")
|
l.debug(f"Using local timezone: {tz}")
|
||||||
else:
|
else:
|
||||||
try:
|
try:
|
||||||
tz = ZoneInfo(tz)
|
tz = ZoneInfo(tz)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
err(f"Invalid timezone string '{tz}'. Error: {e}")
|
l.error(f"Invalid timezone string '{tz}'. Error: {e}")
|
||||||
raise ValueError(f"Invalid timezone string: {tz}")
|
raise ValueError(f"Invalid timezone string: {tz}")
|
||||||
elif isinstance(tz, ZoneInfo):
|
elif isinstance(tz, ZoneInfo):
|
||||||
pass # tz is already a ZoneInfo object
|
pass # tz is already a ZoneInfo object
|
||||||
|
@ -71,14 +68,14 @@ async def dt(
|
||||||
|
|
||||||
# Convert to the provided or determined timezone
|
# Convert to the provided or determined timezone
|
||||||
date_time = date_time.astimezone(tz)
|
date_time = date_time.astimezone(tz)
|
||||||
debug(f"Converted datetime to timezone: {tz}")
|
l.debug(f"Converted datetime to timezone: {tz}")
|
||||||
|
|
||||||
return date_time
|
return date_time
|
||||||
except ValueError as e:
|
except ValueError as e:
|
||||||
err(f"Error in dt: {e}")
|
l.error(f"Error in dt: {e}")
|
||||||
raise
|
raise
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
err(f"Unexpected error in dt: {e}")
|
l.error(f"Unexpected error in dt: {e}")
|
||||||
raise ValueError(f"Failed to process datetime: {e}")
|
raise ValueError(f"Failed to process datetime: {e}")
|
||||||
|
|
||||||
|
|
||||||
|
@ -112,12 +109,12 @@ async def get_timezone_without_timezone(date_time):
|
||||||
|
|
||||||
async def get_last_location() -> Optional[Location]:
|
async def get_last_location() -> Optional[Location]:
|
||||||
query_datetime = datetime.now(TZ)
|
query_datetime = datetime.now(TZ)
|
||||||
debug(f"Query_datetime: {query_datetime}")
|
l.debug(f"Query_datetime: {query_datetime}")
|
||||||
|
|
||||||
this_location = await fetch_last_location_before(query_datetime)
|
this_location = await fetch_last_location_before(query_datetime)
|
||||||
|
|
||||||
if this_location:
|
if this_location:
|
||||||
debug(f"location: {this_location}")
|
l.debug(f"location: {this_location}")
|
||||||
return this_location
|
return this_location
|
||||||
|
|
||||||
return None
|
return None
|
||||||
|
@ -164,15 +161,15 @@ Generate a heatmap for the given date range and save it as a PNG file using Foli
|
||||||
|
|
||||||
m.save(str(output_path))
|
m.save(str(output_path))
|
||||||
|
|
||||||
info(f"Heatmap saved as PNG: {output_path}")
|
l.info(f"Heatmap saved as PNG: {output_path}")
|
||||||
return output_path
|
return output_path
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
err(f"Error saving heatmap: {str(e)}")
|
l.error(f"Error saving heatmap: {str(e)}")
|
||||||
raise
|
raise
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
err(f"Error generating heatmap: {str(e)}")
|
l.error(f"Error generating heatmap: {str(e)}")
|
||||||
raise
|
raise
|
||||||
|
|
||||||
async def generate_map(start_date: datetime, end_date: datetime, max_points: int):
|
async def generate_map(start_date: datetime, end_date: datetime, max_points: int):
|
||||||
|
@ -180,7 +177,7 @@ async def generate_map(start_date: datetime, end_date: datetime, max_points: int
|
||||||
if not locations:
|
if not locations:
|
||||||
raise HTTPException(status_code=404, detail="No locations found for the given date range")
|
raise HTTPException(status_code=404, detail="No locations found for the given date range")
|
||||||
|
|
||||||
info(f"Found {len(locations)} locations for the given date range")
|
l.info(f"Found {len(locations)} locations for the given date range")
|
||||||
|
|
||||||
if len(locations) > max_points:
|
if len(locations) > max_points:
|
||||||
locations = random.sample(locations, max_points)
|
locations = random.sample(locations, max_points)
|
||||||
|
@ -291,6 +288,7 @@ map.on(L.Draw.Event.CREATED, function (event) {
|
||||||
return m.get_root().render()
|
return m.get_root().render()
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
async def fetch_locations(start: Union[str, int, datetime], end: Union[str, int, datetime, None] = None) -> List[Location]:
|
async def fetch_locations(start: Union[str, int, datetime], end: Union[str, int, datetime, None] = None) -> List[Location]:
|
||||||
start_datetime = await dt(start)
|
start_datetime = await dt(start)
|
||||||
if end is None:
|
if end is None:
|
||||||
|
@ -301,7 +299,7 @@ async def fetch_locations(start: Union[str, int, datetime], end: Union[str, int,
|
||||||
if start_datetime.time() == datetime.min.time() and end_datetime.time() == datetime.min.time():
|
if start_datetime.time() == datetime.min.time() and end_datetime.time() == datetime.min.time():
|
||||||
end_datetime = await dt(end_datetime.replace(hour=23, minute=59, second=59))
|
end_datetime = await dt(end_datetime.replace(hour=23, minute=59, second=59))
|
||||||
|
|
||||||
debug(f"Fetching locations between {start_datetime} and {end_datetime}")
|
l.debug(f"Fetching locations between {start_datetime} and {end_datetime}")
|
||||||
|
|
||||||
query = '''
|
query = '''
|
||||||
SELECT id, datetime,
|
SELECT id, datetime,
|
||||||
|
@ -315,9 +313,12 @@ async def fetch_locations(start: Union[str, int, datetime], end: Union[str, int,
|
||||||
ORDER BY datetime DESC
|
ORDER BY datetime DESC
|
||||||
'''
|
'''
|
||||||
|
|
||||||
locations = await Db.execute_read(query, start_datetime=start_datetime.replace(tzinfo=None), end_datetime=end_datetime.replace(tzinfo=None))
|
try:
|
||||||
|
locations = await Db.read(query, start_datetime=start_datetime, end_datetime=end_datetime)
|
||||||
debug(f"Range locations query returned: {locations}")
|
l.debug(f"Range locations query returned: {locations}")
|
||||||
|
except Exception as e:
|
||||||
|
l.error(f"Error executing range locations query: {str(e)}")
|
||||||
|
locations = []
|
||||||
|
|
||||||
if not locations and (end is None or start_datetime.date() == end_datetime.date()):
|
if not locations and (end is None or start_datetime.date() == end_datetime.date()):
|
||||||
fallback_query = '''
|
fallback_query = '''
|
||||||
|
@ -332,12 +333,19 @@ async def fetch_locations(start: Union[str, int, datetime], end: Union[str, int,
|
||||||
ORDER BY datetime DESC
|
ORDER BY datetime DESC
|
||||||
LIMIT 1
|
LIMIT 1
|
||||||
'''
|
'''
|
||||||
location_data = await Db.execute_read(fallback_query, start_datetime=start_datetime.replace(tzinfo=None))
|
try:
|
||||||
debug(f"Fallback query returned: {location_data}")
|
location_data = await Db.read(fallback_query, start_datetime=start_datetime)
|
||||||
if location_data:
|
l.debug(f"Fallback query returned: {location_data}")
|
||||||
locations = location_data
|
if location_data:
|
||||||
|
locations = location_data
|
||||||
|
except Exception as e:
|
||||||
|
l.error(f"Error executing fallback locations query: {str(e)}")
|
||||||
|
locations = []
|
||||||
|
|
||||||
debug(f"Locations found: {locations}")
|
l.debug(f"Locations found: {locations}")
|
||||||
|
|
||||||
|
if not locations:
|
||||||
|
return []
|
||||||
|
|
||||||
# Sort location_data based on the datetime field in descending order
|
# Sort location_data based on the datetime field in descending order
|
||||||
sorted_locations = sorted(locations, key=lambda x: x['datetime'], reverse=True)
|
sorted_locations = sorted(locations, key=lambda x: x['datetime'], reverse=True)
|
||||||
|
@ -366,11 +374,13 @@ async def fetch_locations(start: Union[str, int, datetime], end: Union[str, int,
|
||||||
return location_objects if location_objects else []
|
return location_objects if location_objects else []
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
async def fetch_last_location_before(datetime: datetime) -> Optional[Location]:
|
async def fetch_last_location_before(datetime: datetime) -> Optional[Location]:
|
||||||
try:
|
try:
|
||||||
datetime = await dt(datetime)
|
datetime = await dt(datetime)
|
||||||
|
|
||||||
debug(f"Fetching last location before {datetime}")
|
l.debug(f"Fetching last location before {datetime}")
|
||||||
|
|
||||||
query = '''
|
query = '''
|
||||||
SELECT id, datetime,
|
SELECT id, datetime,
|
||||||
|
@ -385,16 +395,16 @@ async def fetch_last_location_before(datetime: datetime) -> Optional[Location]:
|
||||||
LIMIT 1
|
LIMIT 1
|
||||||
'''
|
'''
|
||||||
|
|
||||||
location_data = await Db.execute_read(query, datetime=datetime.replace(tzinfo=None))
|
location_data = await Db.read(query, datetime=datetime)
|
||||||
|
|
||||||
if location_data:
|
if location_data:
|
||||||
debug(f"Last location found: {location_data[0]}")
|
l.debug(f"Last location found: {location_data[0]}")
|
||||||
return Location(**location_data[0])
|
return Location(**location_data[0])
|
||||||
else:
|
else:
|
||||||
debug("No location found before the specified datetime")
|
l.debug("No location found before the specified datetime")
|
||||||
return None
|
return None
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
error(f"Error fetching last location: {str(e)}")
|
l.error(f"Error fetching last location: {str(e)}")
|
||||||
return None
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
@ -452,9 +462,9 @@ async def post_location(location: Location):
|
||||||
'country': location.country
|
'country': location.country
|
||||||
}
|
}
|
||||||
|
|
||||||
await Db.execute_write(query, **params)
|
await Db.write(query, **params)
|
||||||
|
|
||||||
info(f"Successfully posted location: {location.latitude}, {location.longitude}, {location.elevation} on {localized_datetime}")
|
l.info(f"Successfully posted location: {location.latitude}, {location.longitude}, {location.elevation} on {localized_datetime}")
|
||||||
|
|
||||||
# Create a serializable version of params for the return value
|
# Create a serializable version of params for the return value
|
||||||
serializable_params = {
|
serializable_params = {
|
||||||
|
@ -463,15 +473,15 @@ async def post_location(location: Location):
|
||||||
}
|
}
|
||||||
return serializable_params
|
return serializable_params
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
err(f"Error posting location {e}")
|
l.error(f"Error posting location {e}")
|
||||||
err(traceback.format_exc())
|
l.error(traceback.format_exc())
|
||||||
return None
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
async def get_date_range():
|
async def get_date_range():
|
||||||
query = "SELECT MIN(datetime) as min_date, MAX(datetime) as max_date FROM locations"
|
query = "SELECT MIN(datetime) as min_date, MAX(datetime) as max_date FROM locations"
|
||||||
row = await Db.execute_read(query)
|
row = await Db.read(query)
|
||||||
if row and row[0]['min_date'] and row[0]['max_date']:
|
if row and row[0]['min_date'] and row[0]['max_date']:
|
||||||
return row[0]['min_date'], row[0]['max_date']
|
return row[0]['min_date'], row[0]['max_date']
|
||||||
else:
|
else:
|
||||||
|
@ -498,26 +508,26 @@ async def post_locate_endpoint(locations: Union[Location, List[Location]]):
|
||||||
"device_name": "Unknown",
|
"device_name": "Unknown",
|
||||||
"device_os": "Unknown"
|
"device_os": "Unknown"
|
||||||
}
|
}
|
||||||
debug(f"Location received for processing: {lcn}")
|
l.debug(f"Location received for processing: {lcn}")
|
||||||
|
|
||||||
geocoded_locations = await GEO.code(locations)
|
geocoded_locations = await GEO.code(locations)
|
||||||
|
|
||||||
responses = []
|
responses = []
|
||||||
if isinstance(geocoded_locations, List):
|
if isinstance(geocoded_locations, List):
|
||||||
for location in geocoded_locations:
|
for location in geocoded_locations:
|
||||||
debug(f"Final location to be submitted to database: {location}")
|
l.debug(f"Final location to be submitted to database: {location}")
|
||||||
location_entry = await post_location(location)
|
location_entry = await post_location(location)
|
||||||
if location_entry:
|
if location_entry:
|
||||||
responses.append({"location_data": location_entry})
|
responses.append({"location_data": location_entry})
|
||||||
else:
|
else:
|
||||||
warn(f"Posting location to database appears to have failed.")
|
l.warning(f"Posting location to database appears to have failed.")
|
||||||
else:
|
else:
|
||||||
debug(f"Final location to be submitted to database: {geocoded_locations}")
|
l.debug(f"Final location to be submitted to database: {geocoded_locations}")
|
||||||
location_entry = await post_location(geocoded_locations)
|
location_entry = await post_location(geocoded_locations)
|
||||||
if location_entry:
|
if location_entry:
|
||||||
responses.append({"location_data": location_entry})
|
responses.append({"location_data": location_entry})
|
||||||
else:
|
else:
|
||||||
warn(f"Posting location to database appears to have failed.")
|
l.warning(f"Posting location to database appears to have failed.")
|
||||||
|
|
||||||
return {"message": "Locations and weather updated", "results": responses}
|
return {"message": "Locations and weather updated", "results": responses}
|
||||||
|
|
||||||
|
@ -540,7 +550,7 @@ async def get_locate(datetime_str: str, all: bool = False):
|
||||||
try:
|
try:
|
||||||
date_time = await dt(datetime_str)
|
date_time = await dt(datetime_str)
|
||||||
except ValueError as e:
|
except ValueError as e:
|
||||||
err(f"Invalid datetime string provided: {datetime_str}")
|
l.error(f"Invalid datetime string provided: {datetime_str}")
|
||||||
return ["ERROR: INVALID DATETIME PROVIDED. USE YYYYMMDDHHmmss or YYYYMMDD format."]
|
return ["ERROR: INVALID DATETIME PROVIDED. USE YYYYMMDDHHmmss or YYYYMMDD format."]
|
||||||
|
|
||||||
locations = await fetch_locations(date_time)
|
locations = await fetch_locations(date_time)
|
||||||
|
@ -565,6 +575,6 @@ async def generate_map_endpoint(
|
||||||
except ValueError:
|
except ValueError:
|
||||||
raise HTTPException(status_code=400, detail="Invalid date format")
|
raise HTTPException(status_code=400, detail="Invalid date format")
|
||||||
|
|
||||||
info(f"Generating map for {start_date} to {end_date}")
|
l.info(f"Generating map for {start_date} to {end_date}")
|
||||||
html_content = await generate_map(start_date, end_date, max_points)
|
html_content = await generate_map(start_date, end_date, max_points)
|
||||||
return HTMLResponse(content=html_content)
|
return HTMLResponse(content=html_content)
|
||||||
|
|
|
@ -36,19 +36,16 @@ import json
|
||||||
from ollama import Client as oLlama
|
from ollama import Client as oLlama
|
||||||
from sijapi.routers.img import img
|
from sijapi.routers.img import img
|
||||||
from dotenv import load_dotenv
|
from dotenv import load_dotenv
|
||||||
from sijapi import L, COMFYUI_DIR
|
|
||||||
|
|
||||||
import io
|
import io
|
||||||
from io import BytesIO
|
from io import BytesIO
|
||||||
import base64
|
import base64
|
||||||
|
|
||||||
|
from sijapi import COMFYUI_DIR
|
||||||
|
from sijapi.logs import get_logger
|
||||||
|
l = get_logger(__name__)
|
||||||
|
|
||||||
ig = APIRouter()
|
ig = APIRouter()
|
||||||
logger = L.get_module_logger("ig")
|
|
||||||
def debug(text: str): logger.debug(text)
|
|
||||||
def info(text: str): logger.info(text)
|
|
||||||
def warn(text: str): logger.warning(text)
|
|
||||||
def err(text: str): logger.error(text)
|
|
||||||
def crit(text: str): logger.critical(text)
|
|
||||||
|
|
||||||
class IG_Request(BaseModel):
|
class IG_Request(BaseModel):
|
||||||
file: Optional[UploadFile] = None # upload a particular file to Instagram
|
file: Optional[UploadFile] = None # upload a particular file to Instagram
|
||||||
|
@ -862,16 +859,16 @@ async def ig_flow_endpoint(new_session: bool = False):
|
||||||
time_remaining = 30 - (time_since_rollover % 30)
|
time_remaining = 30 - (time_since_rollover % 30)
|
||||||
|
|
||||||
if time_remaining < 4:
|
if time_remaining < 4:
|
||||||
logger.debug("Too close to end of TOTP counter. Waiting.")
|
logger.l.debug("Too close to end of TOTP counter. Waiting.")
|
||||||
sleepupto(5, 5)
|
sleepupto(5, 5)
|
||||||
|
|
||||||
if not new_session and os.path.exists(IG_SESSION_PATH):
|
if not new_session and os.path.exists(IG_SESSION_PATH):
|
||||||
cl.load_settings(IG_SESSION_PATH)
|
cl.load_settings(IG_SESSION_PATH)
|
||||||
logger.debug("Loaded past session.")
|
logger.l.debug("Loaded past session.")
|
||||||
|
|
||||||
elif new_session and cl.login(IG_USERNAME, IG_PASSWORD, verification_code=TOTP.now()):
|
elif new_session and cl.login(IG_USERNAME, IG_PASSWORD, verification_code=TOTP.now()):
|
||||||
cl.dump_settings(IG_SESSION_PATH)
|
cl.dump_settings(IG_SESSION_PATH)
|
||||||
logger.debug("Logged in and saved new session.")
|
logger.l.debug("Logged in and saved new session.")
|
||||||
|
|
||||||
else:
|
else:
|
||||||
raise Exception(f"Failed to login as {IG_USERNAME}.")
|
raise Exception(f"Failed to login as {IG_USERNAME}.")
|
||||||
|
|
|
@ -18,15 +18,12 @@ import random
|
||||||
import os
|
import os
|
||||||
import asyncio
|
import asyncio
|
||||||
from sijapi.routers.llm import query_ollama
|
from sijapi.routers.llm import query_ollama
|
||||||
from sijapi import API, L, COMFYUI_URL, COMFYUI_OUTPUT_DIR, IMG_CONFIG_PATH, IMG_DIR, IMG_WORKFLOWS_DIR
|
from sijapi import Sys, COMFYUI_URL, COMFYUI_OUTPUT_DIR, IMG_CONFIG_PATH, IMG_DIR, IMG_WORKFLOWS_DIR
|
||||||
|
from sijapi.logs import get_logger
|
||||||
|
l = get_logger(__name__)
|
||||||
|
|
||||||
img = APIRouter()
|
img = APIRouter()
|
||||||
logger = L.get_module_logger("img")
|
|
||||||
def debug(text: str): logger.debug(text)
|
|
||||||
def info(text: str): logger.info(text)
|
|
||||||
def warn(text: str): logger.warning(text)
|
|
||||||
def err(text: str): logger.error(text)
|
|
||||||
def crit(text: str): logger.critical(text)
|
|
||||||
|
|
||||||
CLIENT_ID = str(uuid.uuid4())
|
CLIENT_ID = str(uuid.uuid4())
|
||||||
|
|
||||||
|
@ -73,12 +70,12 @@ async def workflow(prompt: str, scene: str = None, size: str = None, earlyout: s
|
||||||
|
|
||||||
scene_workflow = random.choice(scene_data['workflows'])
|
scene_workflow = random.choice(scene_data['workflows'])
|
||||||
if size:
|
if size:
|
||||||
debug(f"Specified size: {size}")
|
l.debug(f"Specified size: {size}")
|
||||||
|
|
||||||
size = size if size else scene_workflow.get('size', '1024x1024')
|
size = size if size else scene_workflow.get('size', '1024x1024')
|
||||||
|
|
||||||
width, height = map(int, size.split('x'))
|
width, height = map(int, size.split('x'))
|
||||||
debug(f"Parsed width: {width}; parsed height: {height}")
|
l.debug(f"Parsed width: {width}; parsed height: {height}")
|
||||||
|
|
||||||
workflow_path = Path(IMG_WORKFLOWS_DIR) / scene_workflow['workflow']
|
workflow_path = Path(IMG_WORKFLOWS_DIR) / scene_workflow['workflow']
|
||||||
workflow_data = json.loads(workflow_path.read_text())
|
workflow_data = json.loads(workflow_path.read_text())
|
||||||
|
@ -92,22 +89,22 @@ async def workflow(prompt: str, scene: str = None, size: str = None, earlyout: s
|
||||||
}
|
}
|
||||||
|
|
||||||
saved_file_key = await update_prompt_and_get_key(workflow=workflow_data, post=post, positive=image_concept)
|
saved_file_key = await update_prompt_and_get_key(workflow=workflow_data, post=post, positive=image_concept)
|
||||||
info(f"Saved file key: {saved_file_key}")
|
l.info(f"Saved file key: {saved_file_key}")
|
||||||
|
|
||||||
prompt_id = await queue_prompt(workflow_data)
|
prompt_id = await queue_prompt(workflow_data)
|
||||||
info(f"Prompt ID: {prompt_id}")
|
l.info(f"Prompt ID: {prompt_id}")
|
||||||
|
|
||||||
max_size = max(width, height) if downscale_to_fit else None
|
max_size = max(width, height) if downscale_to_fit else None
|
||||||
destination_path = Path(destination_path).with_suffix(".jpg") if destination_path else IMG_DIR / f"{prompt_id}.jpg"
|
destination_path = Path(destination_path).with_suffix(".jpg") if destination_path else IMG_DIR / f"{prompt_id}.jpg"
|
||||||
|
|
||||||
if earlyout:
|
if earlyout:
|
||||||
asyncio.create_task(generate_and_save_image(prompt_id, saved_file_key, max_size, destination_path))
|
asyncio.create_task(generate_and_save_image(prompt_id, saved_file_key, max_size, destination_path))
|
||||||
debug(f"Returning {destination_path}")
|
l.debug(f"Returning {destination_path}")
|
||||||
return destination_path
|
return destination_path
|
||||||
|
|
||||||
else:
|
else:
|
||||||
await generate_and_save_image(prompt_id, saved_file_key, max_size, destination_path)
|
await generate_and_save_image(prompt_id, saved_file_key, max_size, destination_path)
|
||||||
debug(f"Returning {destination_path}")
|
l.debug(f"Returning {destination_path}")
|
||||||
return destination_path
|
return destination_path
|
||||||
|
|
||||||
|
|
||||||
|
@ -118,16 +115,16 @@ async def generate_and_save_image(prompt_id, saved_file_key, max_size, destinati
|
||||||
jpg_file_path = await save_as_jpg(image_data, prompt_id, quality=90, max_size=max_size, destination_path=destination_path)
|
jpg_file_path = await save_as_jpg(image_data, prompt_id, quality=90, max_size=max_size, destination_path=destination_path)
|
||||||
|
|
||||||
if Path(jpg_file_path) != Path(destination_path):
|
if Path(jpg_file_path) != Path(destination_path):
|
||||||
err(f"Mismatch between jpg_file_path, {jpg_file_path}, and detination_path, {destination_path}")
|
l.error(f"Mismatch between jpg_file_path, {jpg_file_path}, and detination_path, {destination_path}")
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
err(f"Error in generate_and_save_image: {e}")
|
l.error(f"Error in generate_and_save_image: {e}")
|
||||||
return None
|
return None
|
||||||
|
|
||||||
|
|
||||||
def get_web_path(file_path: Path) -> str:
|
def get_web_path(file_path: Path) -> str:
|
||||||
uri = file_path.relative_to(IMG_DIR)
|
uri = file_path.relative_to(IMG_DIR)
|
||||||
web_path = f"{API.URL}/img/{uri}"
|
web_path = f"{Sys.URL}/img/{uri}"
|
||||||
return web_path
|
return web_path
|
||||||
|
|
||||||
|
|
||||||
|
@ -143,7 +140,7 @@ async def poll_status(prompt_id):
|
||||||
status_data = await response.json()
|
status_data = await response.json()
|
||||||
job_data = status_data.get(prompt_id, {})
|
job_data = status_data.get(prompt_id, {})
|
||||||
if job_data.get("status", {}).get("completed", False):
|
if job_data.get("status", {}).get("completed", False):
|
||||||
info(f"{prompt_id} completed in {elapsed_time} seconds.")
|
l.info(f"{prompt_id} completed in {elapsed_time} seconds.")
|
||||||
return job_data
|
return job_data
|
||||||
await asyncio.sleep(1)
|
await asyncio.sleep(1)
|
||||||
|
|
||||||
|
@ -194,7 +191,7 @@ async def save_as_jpg(image_data, prompt_id, max_size = None, quality = 100, des
|
||||||
return str(destination_path_jpg)
|
return str(destination_path_jpg)
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
err(f"Error processing image: {e}")
|
l.error(f"Error processing image: {e}")
|
||||||
return None
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
@ -210,11 +207,11 @@ def set_presets(workflow_data, preset_values):
|
||||||
if 'inputs' in workflow_data.get(preset_node, {}):
|
if 'inputs' in workflow_data.get(preset_node, {}):
|
||||||
workflow_data[preset_node]['inputs'][preset_key] = preset_value
|
workflow_data[preset_node]['inputs'][preset_key] = preset_value
|
||||||
else:
|
else:
|
||||||
debug("Node not found in workflow_data")
|
l.debug("Node not found in workflow_data")
|
||||||
else:
|
else:
|
||||||
debug("Required data missing in preset_values")
|
l.debug("Required data missing in preset_values")
|
||||||
else:
|
else:
|
||||||
debug("No preset_values found")
|
l.debug("No preset_values found")
|
||||||
|
|
||||||
|
|
||||||
def get_return_path(destination_path):
|
def get_return_path(destination_path):
|
||||||
|
@ -229,7 +226,7 @@ def get_scene(scene):
|
||||||
IMG_CONFIG = yaml.safe_load(IMG_CONFIG_file)
|
IMG_CONFIG = yaml.safe_load(IMG_CONFIG_file)
|
||||||
for scene_data in IMG_CONFIG['scenes']:
|
for scene_data in IMG_CONFIG['scenes']:
|
||||||
if scene_data['scene'] == scene:
|
if scene_data['scene'] == scene:
|
||||||
debug(f"Found scene for \"{scene}\".")
|
l.debug(f"Found scene for \"{scene}\".")
|
||||||
return scene_data
|
return scene_data
|
||||||
return None
|
return None
|
||||||
|
|
||||||
|
@ -249,11 +246,11 @@ def get_matching_scene(prompt):
|
||||||
max_count = count
|
max_count = count
|
||||||
scene_data = sc
|
scene_data = sc
|
||||||
if scene_data:
|
if scene_data:
|
||||||
debug(f"Found better-matching scene: the prompt contains {max_count} words that match triggers for {scene_data.get('name')}!")
|
l.debug(f"Found better-matching scene: the prompt contains {max_count} words that match triggers for {scene_data.get('name')}!")
|
||||||
if scene_data:
|
if scene_data:
|
||||||
return scene_data
|
return scene_data
|
||||||
else:
|
else:
|
||||||
debug(f"No matching scenes found, falling back to default scene.")
|
l.debug(f"No matching scenes found, falling back to default scene.")
|
||||||
return IMG_CONFIG['scenes'][0]
|
return IMG_CONFIG['scenes'][0]
|
||||||
|
|
||||||
|
|
||||||
|
@ -272,11 +269,11 @@ async def ensure_comfy(retries: int = 4, timeout: float = 6.0):
|
||||||
for attempt in range(retries):
|
for attempt in range(retries):
|
||||||
try:
|
try:
|
||||||
with socket.create_connection(("127.0.0.1", 8188), timeout=2):
|
with socket.create_connection(("127.0.0.1", 8188), timeout=2):
|
||||||
info("ComfyUI is already running.")
|
l.info("ComfyUI is already running.")
|
||||||
return
|
return
|
||||||
except (socket.timeout, ConnectionRefusedError):
|
except (socket.timeout, ConnectionRefusedError):
|
||||||
if attempt == 0: # Only try to start ComfyUI on the first failed attempt
|
if attempt == 0: # Only try to start ComfyUI on the first failed attempt
|
||||||
warn("ComfyUI is not running. Starting it now...")
|
l.warning("ComfyUI is not running. Starting it now...")
|
||||||
try:
|
try:
|
||||||
tmux_command = (
|
tmux_command = (
|
||||||
"tmux split-window -h "
|
"tmux split-window -h "
|
||||||
|
@ -285,14 +282,14 @@ async def ensure_comfy(retries: int = 4, timeout: float = 6.0):
|
||||||
"python main.py; exec $SHELL\""
|
"python main.py; exec $SHELL\""
|
||||||
)
|
)
|
||||||
subprocess.Popen(tmux_command, shell=True)
|
subprocess.Popen(tmux_command, shell=True)
|
||||||
info("ComfyUI started in a new tmux session.")
|
l.info("ComfyUI started in a new tmux session.")
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
raise RuntimeError(f"Error starting ComfyUI: {e}")
|
raise RuntimeError(f"Error starting ComfyUI: {e}")
|
||||||
|
|
||||||
warn(f"Attempt {attempt + 1}/{retries} failed. Waiting {timeout} seconds before retrying...")
|
l.warning(f"Attempt {attempt + 1}/{retries} failed. Waiting {timeout} seconds before retrying...")
|
||||||
await asyncio.sleep(timeout)
|
await asyncio.sleep(timeout)
|
||||||
|
|
||||||
crit(f"Failed to ensure ComfyUI is running after {retries} attempts with {timeout} second intervals.")
|
l.critical(f"Failed to ensure ComfyUI is running after {retries} attempts with {timeout} second intervals.")
|
||||||
raise RuntimeError(f"Failed to ensure ComfyUI is running after {retries} attempts with {timeout} second intervals.")
|
raise RuntimeError(f"Failed to ensure ComfyUI is running after {retries} attempts with {timeout} second intervals.")
|
||||||
|
|
||||||
|
|
||||||
|
@ -398,13 +395,13 @@ Even more important, it finds and returns the key to the filepath where the file
|
||||||
workflow[key] = random.randint(1000000000000, 9999999999999)
|
workflow[key] = random.randint(1000000000000, 9999999999999)
|
||||||
|
|
||||||
elif key in ["width", "max_width", "scaled_width", "height", "max_height", "scaled_height", "side_length", "size", "value", "dimension", "dimensions", "long", "long_side", "short", "short_side", "length"]:
|
elif key in ["width", "max_width", "scaled_width", "height", "max_height", "scaled_height", "side_length", "size", "value", "dimension", "dimensions", "long", "long_side", "short", "short_side", "length"]:
|
||||||
debug(f"Got a hit for a dimension: {key} {value}")
|
l.debug(f"Got a hit for a dimension: {key} {value}")
|
||||||
if value == 1023:
|
if value == 1023:
|
||||||
workflow[key] = post.get("width", 1024)
|
workflow[key] = post.get("width", 1024)
|
||||||
debug(f"Set {key} to {workflow[key]}.")
|
l.debug(f"Set {key} to {workflow[key]}.")
|
||||||
elif value == 1025:
|
elif value == 1025:
|
||||||
workflow[key] = post.get("height", 1024)
|
workflow[key] = post.get("height", 1024)
|
||||||
debug(f"Set {key} to {workflow[key]}.")
|
l.debug(f"Set {key} to {workflow[key]}.")
|
||||||
|
|
||||||
update_recursive(workflow)
|
update_recursive(workflow)
|
||||||
return found_key[0]
|
return found_key[0]
|
||||||
|
|
|
@ -26,18 +26,14 @@ import tempfile
|
||||||
import shutil
|
import shutil
|
||||||
import html2text
|
import html2text
|
||||||
import markdown
|
import markdown
|
||||||
from sijapi import L, Llm, LLM_SYS_MSG, REQUESTS_DIR, OBSIDIAN_CHROMADB_COLLECTION, OBSIDIAN_VAULT_DIR, DOC_DIR, SUMMARY_INSTRUCT, SUMMARY_CHUNK_SIZE, SUMMARY_TPW, SUMMARY_CHUNK_OVERLAP, SUMMARY_LENGTH_RATIO, SUMMARY_TOKEN_LIMIT, SUMMARY_MIN_LENGTH, SUMMARY_MODEL
|
from sijapi import Llm, LLM_SYS_MSG, REQUESTS_DIR, OBSIDIAN_CHROMADB_COLLECTION, OBSIDIAN_VAULT_DIR, DOC_DIR, SUMMARY_INSTRUCT, SUMMARY_CHUNK_SIZE, SUMMARY_TPW, SUMMARY_CHUNK_OVERLAP, SUMMARY_LENGTH_RATIO, SUMMARY_TOKEN_LIMIT, SUMMARY_MIN_LENGTH, SUMMARY_MODEL
|
||||||
from sijapi.utilities import convert_to_unix_time, sanitize_filename, ocr_pdf, clean_text, should_use_ocr, extract_text_from_pdf, extract_text_from_docx, read_text_file, str_to_bool, get_extension
|
from sijapi.utilities import convert_to_unix_time, sanitize_filename, ocr_pdf, clean_text, should_use_ocr, extract_text_from_pdf, extract_text_from_docx, read_text_file, str_to_bool, get_extension
|
||||||
from sijapi.routers import tts
|
from sijapi.routers import tts
|
||||||
from sijapi.routers.asr import transcribe_audio
|
from sijapi.routers.asr import transcribe_audio
|
||||||
|
from sijapi.logs import get_logger
|
||||||
|
l = get_logger(__name__)
|
||||||
|
|
||||||
llm = APIRouter()
|
llm = APIRouter()
|
||||||
logger = L.get_module_logger("llm")
|
|
||||||
def debug(text: str): logger.debug(text)
|
|
||||||
def info(text: str): logger.info(text)
|
|
||||||
def warn(text: str): logger.warning(text)
|
|
||||||
def err(text: str): logger.error(text)
|
|
||||||
def crit(text: str): logger.critical(text)
|
|
||||||
|
|
||||||
|
|
||||||
VISION_MODELS = ["llava-phi3", "moondream", "llava", "llava-llama3", "llava:34b", "llava:13b-v1.5-q8_0"]
|
VISION_MODELS = ["llava-phi3", "moondream", "llava", "llava-llama3", "llava:34b", "llava:13b-v1.5-q8_0"]
|
||||||
|
@ -93,13 +89,13 @@ async def query_ollama(usr: str, sys: str = LLM_SYS_MSG, model: str = Llm.chat.m
|
||||||
LLM = Ollama()
|
LLM = Ollama()
|
||||||
response = await LLM.chat(model=model, messages=messages, options={"num_predict": max_tokens})
|
response = await LLM.chat(model=model, messages=messages, options={"num_predict": max_tokens})
|
||||||
|
|
||||||
debug(response)
|
l.debug(response)
|
||||||
if "message" in response:
|
if "message" in response:
|
||||||
if "content" in response["message"]:
|
if "content" in response["message"]:
|
||||||
content = response["message"]["content"]
|
content = response["message"]["content"]
|
||||||
return content
|
return content
|
||||||
else:
|
else:
|
||||||
debug("No choices found in response")
|
l.debug("No choices found in response")
|
||||||
return None
|
return None
|
||||||
|
|
||||||
async def query_ollama_multishot(
|
async def query_ollama_multishot(
|
||||||
|
@ -120,12 +116,12 @@ async def query_ollama_multishot(
|
||||||
|
|
||||||
LLM = Ollama()
|
LLM = Ollama()
|
||||||
response = await LLM.chat(model=model, messages=messages, options={"num_predict": max_tokens})
|
response = await LLM.chat(model=model, messages=messages, options={"num_predict": max_tokens})
|
||||||
debug(response)
|
l.debug(response)
|
||||||
|
|
||||||
if "message" in response and "content" in response["message"]:
|
if "message" in response and "content" in response["message"]:
|
||||||
return response["message"]["content"]
|
return response["message"]["content"]
|
||||||
else:
|
else:
|
||||||
debug("No content found in response")
|
l.debug("No content found in response")
|
||||||
return None
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
@ -144,21 +140,21 @@ async def chat_completions(request: Request):
|
||||||
raise HTTPException(status_code=400, detail="Message data is required in the request body.")
|
raise HTTPException(status_code=400, detail="Message data is required in the request body.")
|
||||||
|
|
||||||
requested_model = body.get('model', 'default-model')
|
requested_model = body.get('model', 'default-model')
|
||||||
debug(f"Requested model: {requested_model}")
|
l.debug(f"Requested model: {requested_model}")
|
||||||
stream = body.get('stream')
|
stream = body.get('stream')
|
||||||
token_limit = body.get('max_tokens') or body.get('num_predict')
|
token_limit = body.get('max_tokens') or body.get('num_predict')
|
||||||
|
|
||||||
# Check if the most recent message contains an image_url
|
# Check if the most recent message contains an image_url
|
||||||
recent_message = messages[-1]
|
recent_message = messages[-1]
|
||||||
if recent_message.get('role') == 'user' and is_vision_request(recent_message.get('content')):
|
if recent_message.get('role') == 'user' and is_vision_request(recent_message.get('content')):
|
||||||
debug("Processing as a vision request")
|
l.debug("Processing as a vision request")
|
||||||
model = "llava"
|
model = "llava"
|
||||||
debug(f"Using model: {model}")
|
l.debug(f"Using model: {model}")
|
||||||
return StreamingResponse(stream_messages_with_vision(recent_message, model, token_limit), media_type="application/json")
|
return StreamingResponse(stream_messages_with_vision(recent_message, model, token_limit), media_type="application/json")
|
||||||
else:
|
else:
|
||||||
debug("Processing as a standard request")
|
l.debug("Processing as a standard request")
|
||||||
model = requested_model
|
model = requested_model
|
||||||
debug(f"Using model: {model}")
|
l.debug(f"Using model: {model}")
|
||||||
if stream:
|
if stream:
|
||||||
return StreamingResponse(stream_messages(messages, model, token_limit), media_type="application/json")
|
return StreamingResponse(stream_messages(messages, model, token_limit), media_type="application/json")
|
||||||
else:
|
else:
|
||||||
|
@ -283,17 +279,17 @@ async def generate_messages(messages: list, model: str = "llama3"):
|
||||||
def is_model_available(model_name):
|
def is_model_available(model_name):
|
||||||
model_data = OllamaList()
|
model_data = OllamaList()
|
||||||
available_models = [model['name'] for model in model_data['models']]
|
available_models = [model['name'] for model in model_data['models']]
|
||||||
debug(f"Available models: {available_models}") # Log using the configured LOGGER
|
l.debug(f"Available models: {available_models}") # Log using the configured LOGGER
|
||||||
|
|
||||||
matching_models = [model for model in available_models if model.startswith(model_name + ':') or model == model_name]
|
matching_models = [model for model in available_models if model.startswith(model_name + ':') or model == model_name]
|
||||||
if len(matching_models) == 1:
|
if len(matching_models) == 1:
|
||||||
debug(f"Unique match found: {matching_models[0]}")
|
l.debug(f"Unique match found: {matching_models[0]}")
|
||||||
return True
|
return True
|
||||||
elif len(matching_models) > 1:
|
elif len(matching_models) > 1:
|
||||||
err(f"Ambiguous match found, models: {matching_models}")
|
l.error(f"Ambiguous match found, models: {matching_models}")
|
||||||
return True
|
return True
|
||||||
else:
|
else:
|
||||||
err(f"No match found for model: {model_name}")
|
l.error(f"No match found for model: {model_name}")
|
||||||
return False
|
return False
|
||||||
|
|
||||||
|
|
||||||
|
@ -416,12 +412,12 @@ def query_gpt4(llmPrompt: List = [], system_msg: str = "", user_msg: str = "", m
|
||||||
if hasattr(first_choice, "message") and hasattr(first_choice.message, "content"):
|
if hasattr(first_choice, "message") and hasattr(first_choice.message, "content"):
|
||||||
return first_choice.message.content
|
return first_choice.message.content
|
||||||
else:
|
else:
|
||||||
debug("No content attribute in the first choice's message")
|
l.debug("No content attribute in the first choice's message")
|
||||||
debug(f"No content found in message string: {response.choices}")
|
l.debug(f"No content found in message string: {response.choices}")
|
||||||
debug("Trying again!")
|
l.debug("Trying again!")
|
||||||
query_gpt4(messages, max_tokens)
|
query_gpt4(messages, max_tokens)
|
||||||
else:
|
else:
|
||||||
debug(f"No content found in message string: {response}")
|
l.debug(f"No content found in message string: {response}")
|
||||||
return ""
|
return ""
|
||||||
|
|
||||||
def llava(image_base64, prompt):
|
def llava(image_base64, prompt):
|
||||||
|
@ -431,7 +427,7 @@ def llava(image_base64, prompt):
|
||||||
prompt = f"This is a chat between a user and an assistant. The assistant is helping the user to describe an image. {prompt}",
|
prompt = f"This is a chat between a user and an assistant. The assistant is helping the user to describe an image. {prompt}",
|
||||||
images = [image_base64]
|
images = [image_base64]
|
||||||
)
|
)
|
||||||
debug(response)
|
l.debug(response)
|
||||||
return "" if "pass" in response["response"].lower() else response["response"]
|
return "" if "pass" in response["response"].lower() else response["response"]
|
||||||
|
|
||||||
def gpt4v(image_base64, prompt_sys: str, prompt_usr: str, max_tokens: int = 150):
|
def gpt4v(image_base64, prompt_sys: str, prompt_usr: str, max_tokens: int = 150):
|
||||||
|
@ -462,7 +458,7 @@ def gpt4v(image_base64, prompt_sys: str, prompt_usr: str, max_tokens: int = 150)
|
||||||
comment_content = first_choice.message.content
|
comment_content = first_choice.message.content
|
||||||
if "PASS" in comment_content:
|
if "PASS" in comment_content:
|
||||||
return ""
|
return ""
|
||||||
debug(f"Generated comment: {comment_content}")
|
l.debug(f"Generated comment: {comment_content}")
|
||||||
|
|
||||||
response_2 = VISION_LLM.chat.completions.create(
|
response_2 = VISION_LLM.chat.completions.create(
|
||||||
model="gpt-4-vision-preview",
|
model="gpt-4-vision-preview",
|
||||||
|
@ -500,15 +496,15 @@ def gpt4v(image_base64, prompt_sys: str, prompt_usr: str, max_tokens: int = 150)
|
||||||
first_choice = response_2.choices[0]
|
first_choice = response_2.choices[0]
|
||||||
if first_choice.message and first_choice.message.content:
|
if first_choice.message and first_choice.message.content:
|
||||||
final_content = first_choice.message.content
|
final_content = first_choice.message.content
|
||||||
debug(f"Generated comment: {final_content}")
|
l.debug(f"Generated comment: {final_content}")
|
||||||
if "PASS" in final_content:
|
if "PASS" in final_content:
|
||||||
return ""
|
return ""
|
||||||
else:
|
else:
|
||||||
return final_content
|
return final_content
|
||||||
|
|
||||||
|
|
||||||
debug("Vision response did not contain expected data.")
|
l.debug("Vision response did not contain expected data.")
|
||||||
debug(f"Vision response: {response_1}")
|
l.debug(f"Vision response: {response_1}")
|
||||||
asyncio.sleep(15)
|
asyncio.sleep(15)
|
||||||
|
|
||||||
try_again = gpt4v(image_base64, prompt_sys, prompt_usr, max_tokens)
|
try_again = gpt4v(image_base64, prompt_sys, prompt_usr, max_tokens)
|
||||||
|
@ -566,7 +562,7 @@ async def summarize_tts_endpoint(
|
||||||
)
|
)
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
err(f"Error in summarize_tts_endpoint: {str(e)}")
|
l.error(f"Error in summarize_tts_endpoint: {str(e)}")
|
||||||
return JSONResponse(
|
return JSONResponse(
|
||||||
status_code=400,
|
status_code=400,
|
||||||
content={"error": str(e)}
|
content={"error": str(e)}
|
||||||
|
@ -593,7 +589,7 @@ async def summarize_tts(
|
||||||
bg_tasks = BackgroundTasks()
|
bg_tasks = BackgroundTasks()
|
||||||
model = await tts.get_model(voice)
|
model = await tts.get_model(voice)
|
||||||
final_output_path = await tts.generate_speech(bg_tasks, summarized_text, voice, model=model, speed=speed, podcast=podcast, title=filename)
|
final_output_path = await tts.generate_speech(bg_tasks, summarized_text, voice, model=model, speed=speed, podcast=podcast, title=filename)
|
||||||
debug(f"summary_tts completed with final_output_path: {final_output_path}")
|
l.debug(f"summary_tts completed with final_output_path: {final_output_path}")
|
||||||
return final_output_path
|
return final_output_path
|
||||||
|
|
||||||
|
|
||||||
|
@ -609,10 +605,10 @@ def split_text_into_chunks(text: str) -> List[str]:
|
||||||
sentences = re.split(r'(?<=[.!?])\s+', text)
|
sentences = re.split(r'(?<=[.!?])\s+', text)
|
||||||
words = text.split()
|
words = text.split()
|
||||||
total_words = len(words)
|
total_words = len(words)
|
||||||
debug(f"Total words: {total_words}. SUMMARY_CHUNK_SIZE: {SUMMARY_CHUNK_SIZE}. SUMMARY_TPW: {SUMMARY_TPW}.")
|
l.debug(f"Total words: {total_words}. SUMMARY_CHUNK_SIZE: {SUMMARY_CHUNK_SIZE}. SUMMARY_TPW: {SUMMARY_TPW}.")
|
||||||
|
|
||||||
max_words_per_chunk = int(SUMMARY_CHUNK_SIZE / SUMMARY_TPW)
|
max_words_per_chunk = int(SUMMARY_CHUNK_SIZE / SUMMARY_TPW)
|
||||||
debug(f"Maximum words per chunk: {max_words_per_chunk}")
|
l.debug(f"Maximum words per chunk: {max_words_per_chunk}")
|
||||||
|
|
||||||
chunks = []
|
chunks = []
|
||||||
current_chunk = []
|
current_chunk = []
|
||||||
|
@ -632,7 +628,7 @@ def split_text_into_chunks(text: str) -> List[str]:
|
||||||
if current_chunk:
|
if current_chunk:
|
||||||
chunks.append(' '.join(current_chunk))
|
chunks.append(' '.join(current_chunk))
|
||||||
|
|
||||||
debug(f"Split text into {len(chunks)} chunks.")
|
l.debug(f"Split text into {len(chunks)} chunks.")
|
||||||
return chunks
|
return chunks
|
||||||
|
|
||||||
|
|
||||||
|
@ -644,7 +640,7 @@ def calculate_max_tokens(text: str) -> int:
|
||||||
|
|
||||||
|
|
||||||
async def extract_text(file: Union[UploadFile, bytes, bytearray, str, Path], bg_tasks: BackgroundTasks = None) -> str:
|
async def extract_text(file: Union[UploadFile, bytes, bytearray, str, Path], bg_tasks: BackgroundTasks = None) -> str:
|
||||||
info(f"Attempting to extract text from file: {file}")
|
l.info(f"Attempting to extract text from file: {file}")
|
||||||
|
|
||||||
try:
|
try:
|
||||||
if isinstance(file, UploadFile):
|
if isinstance(file, UploadFile):
|
||||||
|
@ -667,7 +663,7 @@ async def extract_text(file: Union[UploadFile, bytes, bytearray, str, Path], bg_
|
||||||
|
|
||||||
_, file_ext = os.path.splitext(file_path)
|
_, file_ext = os.path.splitext(file_path)
|
||||||
file_ext = file_ext.lower()
|
file_ext = file_ext.lower()
|
||||||
info(f"File extension: {file_ext}")
|
l.info(f"File extension: {file_ext}")
|
||||||
|
|
||||||
if file_ext == '.pdf':
|
if file_ext == '.pdf':
|
||||||
text_content = await extract_text_from_pdf(file_path)
|
text_content = await extract_text_from_pdf(file_path)
|
||||||
|
@ -694,7 +690,7 @@ async def extract_text(file: Union[UploadFile, bytes, bytearray, str, Path], bg_
|
||||||
return text_content
|
return text_content
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
err(f"Error extracting text: {str(e)}")
|
l.error(f"Error extracting text: {str(e)}")
|
||||||
raise ValueError(f"Error extracting text: {str(e)}")
|
raise ValueError(f"Error extracting text: {str(e)}")
|
||||||
|
|
||||||
|
|
||||||
|
@ -703,17 +699,17 @@ async def summarize_text(text: str, instruction: str = SUMMARY_INSTRUCT, length_
|
||||||
|
|
||||||
chunked_text = split_text_into_chunks(text)
|
chunked_text = split_text_into_chunks(text)
|
||||||
total_parts = len(chunked_text)
|
total_parts = len(chunked_text)
|
||||||
debug(f"Total parts: {total_parts}. Length of chunked text: {len(chunked_text)}")
|
l.debug(f"Total parts: {total_parts}. Length of chunked text: {len(chunked_text)}")
|
||||||
|
|
||||||
total_words_count = sum(len(chunk.split()) for chunk in chunked_text)
|
total_words_count = sum(len(chunk.split()) for chunk in chunked_text)
|
||||||
debug(f"Total words count: {total_words_count}")
|
l.debug(f"Total words count: {total_words_count}")
|
||||||
total_tokens_count = max(1, int(total_words_count * SUMMARY_TPW))
|
total_tokens_count = max(1, int(total_words_count * SUMMARY_TPW))
|
||||||
debug(f"Total tokens count: {total_tokens_count}")
|
l.debug(f"Total tokens count: {total_tokens_count}")
|
||||||
|
|
||||||
total_summary_length = length_override if length_override else total_tokens_count // length_quotient
|
total_summary_length = length_override if length_override else total_tokens_count // length_quotient
|
||||||
debug(f"Total summary length: {total_summary_length}")
|
l.debug(f"Total summary length: {total_summary_length}")
|
||||||
corrected_total_summary_length = min(total_summary_length, SUMMARY_TOKEN_LIMIT)
|
corrected_total_summary_length = min(total_summary_length, SUMMARY_TOKEN_LIMIT)
|
||||||
debug(f"Corrected total summary length: {corrected_total_summary_length}")
|
l.debug(f"Corrected total summary length: {corrected_total_summary_length}")
|
||||||
|
|
||||||
summaries = await asyncio.gather(*[
|
summaries = await asyncio.gather(*[
|
||||||
process_chunk(instruction, chunk, i+1, total_parts, LLM=LLM)
|
process_chunk(instruction, chunk, i+1, total_parts, LLM=LLM)
|
||||||
|
@ -724,21 +720,21 @@ async def summarize_text(text: str, instruction: str = SUMMARY_INSTRUCT, length_
|
||||||
summaries = [f"\n\n\nPART {i+1} of {total_parts}:\n\n{summary}" for i, summary in enumerate(summaries)]
|
summaries = [f"\n\n\nPART {i+1} of {total_parts}:\n\n{summary}" for i, summary in enumerate(summaries)]
|
||||||
|
|
||||||
concatenated_summary = ' '.join(summaries)
|
concatenated_summary = ' '.join(summaries)
|
||||||
debug(f"Concatenated summary: {concatenated_summary}")
|
l.debug(f"Concatenated summary: {concatenated_summary}")
|
||||||
debug(f"Concatenated summary length: {len(concatenated_summary.split())}")
|
l.debug(f"Concatenated summary length: {len(concatenated_summary.split())}")
|
||||||
|
|
||||||
if total_parts > 1:
|
if total_parts > 1:
|
||||||
debug(f"Processing the concatenated_summary to smooth the edges...")
|
l.debug(f"Processing the concatenated_summary to smooth the edges...")
|
||||||
concatenated_instruct = f"The following text consists of the concatenated {total_parts} summaries of {total_parts} parts of a single document that had to be split for processing. Reword it for clarity and flow as a single cohesive summary, understanding that it all relates to a single document, but that document likely consists of multiple parts potentially from multiple authors. Do not shorten it and do not omit content, simply smooth out the edges between the parts."
|
concatenated_instruct = f"The following text consists of the concatenated {total_parts} summaries of {total_parts} parts of a single document that had to be split for processing. Reword it for clarity and flow as a single cohesive summary, understanding that it all relates to a single document, but that document likely consists of multiple parts potentially from multiple authors. Do not shorten it and do not omit content, simply smooth out the edges between the parts."
|
||||||
final_summary = await process_chunk(concatenated_instruct, concatenated_summary, 1, 1, length_ratio=1, LLM=LLM)
|
final_summary = await process_chunk(concatenated_instruct, concatenated_summary, 1, 1, length_ratio=1, LLM=LLM)
|
||||||
debug(f"Final summary length: {len(final_summary.split())}")
|
l.debug(f"Final summary length: {len(final_summary.split())}")
|
||||||
return final_summary
|
return final_summary
|
||||||
else:
|
else:
|
||||||
return concatenated_summary
|
return concatenated_summary
|
||||||
|
|
||||||
|
|
||||||
async def process_chunk(instruction: str, text: str, part: int, total_parts: int, length_ratio: float = None, LLM: Ollama = None) -> str:
|
async def process_chunk(instruction: str, text: str, part: int, total_parts: int, length_ratio: float = None, LLM: Ollama = None) -> str:
|
||||||
# debug(f"Processing chunk: {text}")
|
# l.debug(f"Processing chunk: {text}")
|
||||||
LLM = LLM if LLM else Ollama()
|
LLM = LLM if LLM else Ollama()
|
||||||
|
|
||||||
words_count = len(text.split())
|
words_count = len(text.split())
|
||||||
|
@ -748,14 +744,14 @@ async def process_chunk(instruction: str, text: str, part: int, total_parts: int
|
||||||
max_tokens = min(tokens_count // summary_length_ratio, SUMMARY_CHUNK_SIZE)
|
max_tokens = min(tokens_count // summary_length_ratio, SUMMARY_CHUNK_SIZE)
|
||||||
max_tokens = max(max_tokens, SUMMARY_MIN_LENGTH)
|
max_tokens = max(max_tokens, SUMMARY_MIN_LENGTH)
|
||||||
|
|
||||||
debug(f"Processing part {part} of {total_parts}: Words: {words_count}, Estimated tokens: {tokens_count}, Max output tokens: {max_tokens}")
|
l.debug(f"Processing part {part} of {total_parts}: Words: {words_count}, Estimated tokens: {tokens_count}, Max output tokens: {max_tokens}")
|
||||||
|
|
||||||
if part and total_parts > 1:
|
if part and total_parts > 1:
|
||||||
prompt = f"{instruction}. Part {part} of {total_parts}:\n{text}"
|
prompt = f"{instruction}. Part {part} of {total_parts}:\n{text}"
|
||||||
else:
|
else:
|
||||||
prompt = f"{instruction}:\n\n{text}"
|
prompt = f"{instruction}:\n\n{text}"
|
||||||
|
|
||||||
info(f"Starting LLM.generate for part {part} of {total_parts}")
|
l.info(f"Starting LLM.generate for part {part} of {total_parts}")
|
||||||
response = await LLM.generate(
|
response = await LLM.generate(
|
||||||
model=SUMMARY_MODEL,
|
model=SUMMARY_MODEL,
|
||||||
prompt=prompt,
|
prompt=prompt,
|
||||||
|
@ -764,8 +760,8 @@ async def process_chunk(instruction: str, text: str, part: int, total_parts: int
|
||||||
)
|
)
|
||||||
|
|
||||||
text_response = response['response']
|
text_response = response['response']
|
||||||
info(f"Completed LLM.generate for part {part} of {total_parts}")
|
l.info(f"Completed LLM.generate for part {part} of {total_parts}")
|
||||||
debug(f"Result: {text_response}")
|
l.debug(f"Result: {text_response}")
|
||||||
return text_response
|
return text_response
|
||||||
|
|
||||||
async def title_and_summary(extracted_text: str):
|
async def title_and_summary(extracted_text: str):
|
||||||
|
|
|
@ -18,17 +18,13 @@ from markdownify import markdownify as md
|
||||||
from better_profanity import profanity
|
from better_profanity import profanity
|
||||||
from fastapi import APIRouter, BackgroundTasks, UploadFile, Form, HTTPException, Query, Path as FastAPIPath
|
from fastapi import APIRouter, BackgroundTasks, UploadFile, Form, HTTPException, Query, Path as FastAPIPath
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from sijapi import L, Archivist, News, Tts, OBSIDIAN_VAULT_DIR, OBSIDIAN_RESOURCES_DIR
|
from sijapi import Archivist, News, Tts, OBSIDIAN_VAULT_DIR, OBSIDIAN_RESOURCES_DIR
|
||||||
from sijapi.utilities import html_to_markdown, download_file, sanitize_filename, assemble_journal_path, assemble_archive_path, contains_profanity, is_ad_or_tracker
|
from sijapi.utilities import html_to_markdown, download_file, sanitize_filename, assemble_journal_path, assemble_archive_path, contains_profanity, is_ad_or_tracker
|
||||||
from sijapi.routers import gis, llm, tts, note
|
from sijapi.routers import gis, llm, tts, note
|
||||||
|
from sijapi.logs import get_logger
|
||||||
|
l = get_logger(__name__)
|
||||||
|
|
||||||
news = APIRouter()
|
news = APIRouter()
|
||||||
logger = L.get_module_logger("news")
|
|
||||||
def debug(text: str): logger.debug(text)
|
|
||||||
def info(text: str): logger.info(text)
|
|
||||||
def warn(text: str): logger.warning(text)
|
|
||||||
def err(text: str): logger.error(text)
|
|
||||||
def crit(text: str): logger.critical(text)
|
|
||||||
|
|
||||||
|
|
||||||
@news.post("/clip")
|
@news.post("/clip")
|
||||||
|
@ -87,7 +83,7 @@ async def handle_tts(bg_tasks: BackgroundTasks, article: Article, title: str, tt
|
||||||
return f"![[{Path(audio_path).name}]]"
|
return f"![[{Path(audio_path).name}]]"
|
||||||
|
|
||||||
except HTTPException as e:
|
except HTTPException as e:
|
||||||
err(f"Failed to generate TTS: {str(e)}")
|
l.error(f"Failed to generate TTS: {str(e)}")
|
||||||
return None
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
@ -99,7 +95,7 @@ def get_banner_markdown(image_url: str) -> str:
|
||||||
banner_image = download_file(image_url, Path(OBSIDIAN_VAULT_DIR) / OBSIDIAN_RESOURCES_DIR)
|
banner_image = download_file(image_url, Path(OBSIDIAN_VAULT_DIR) / OBSIDIAN_RESOURCES_DIR)
|
||||||
return f"![[{OBSIDIAN_RESOURCES_DIR}/{banner_image}]]" if banner_image else ''
|
return f"![[{OBSIDIAN_RESOURCES_DIR}/{banner_image}]]" if banner_image else ''
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
err(f"Failed to download banner image: {str(e)}")
|
l.error(f"Failed to download banner image: {str(e)}")
|
||||||
return ''
|
return ''
|
||||||
|
|
||||||
|
|
||||||
|
@ -109,7 +105,7 @@ async def save_markdown_file(filename: str, content: str):
|
||||||
|
|
||||||
|
|
||||||
async def process_news_site(site, bg_tasks: BackgroundTasks):
|
async def process_news_site(site, bg_tasks: BackgroundTasks):
|
||||||
info(f"Downloading articles from {site.name}...")
|
l.info(f"Downloading articles from {site.name}...")
|
||||||
|
|
||||||
earliest_date = dt_datetime.now().date() - timedelta(days=site.days_back)
|
earliest_date = dt_datetime.now().date() - timedelta(days=site.days_back)
|
||||||
|
|
||||||
|
@ -131,9 +127,9 @@ async def process_news_site(site, bg_tasks: BackgroundTasks):
|
||||||
results = await asyncio.gather(*tasks)
|
results = await asyncio.gather(*tasks)
|
||||||
articles_downloaded = sum(results)
|
articles_downloaded = sum(results)
|
||||||
|
|
||||||
info(f"Downloaded {articles_downloaded} articles from {site.name}")
|
l.info(f"Downloaded {articles_downloaded} articles from {site.name}")
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
err(f"Error processing {site.name}: {str(e)}")
|
l.error(f"Error processing {site.name}: {str(e)}")
|
||||||
|
|
||||||
|
|
||||||
async def download_and_save_article(article, site_name, earliest_date, bg_tasks: BackgroundTasks, tts_mode: str = "off", voice: str = Tts.elevenlabs.default):
|
async def download_and_save_article(article, site_name, earliest_date, bg_tasks: BackgroundTasks, tts_mode: str = "off", voice: str = Tts.elevenlabs.default):
|
||||||
|
@ -147,7 +143,7 @@ async def download_and_save_article(article, site_name, earliest_date, bg_tasks:
|
||||||
return await process_and_save_article(bg_tasks, url, None, tts_mode, voice, site_name=site_name)
|
return await process_and_save_article(bg_tasks, url, None, tts_mode, voice, site_name=site_name)
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
err(f"Error processing article from {article.url}: {str(e)}")
|
l.error(f"Error processing article from {article.url}: {str(e)}")
|
||||||
return False
|
return False
|
||||||
|
|
||||||
|
|
||||||
|
@ -186,16 +182,16 @@ async def process_and_save_article(
|
||||||
return f"Successfully saved: {relative_path}"
|
return f"Successfully saved: {relative_path}"
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
err(f"Failed to handle final markdown content preparation and/or saving to daily note; {e}")
|
l.error(f"Failed to handle final markdown content preparation and/or saving to daily note; {e}")
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
err(f"Failed to handle TTS: {e}")
|
l.error(f"Failed to handle TTS: {e}")
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
err(f"Failed to generate title, file paths, and summary: {e}")
|
l.error(f"Failed to generate title, file paths, and summary: {e}")
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
err(f"Failed to fetch and parse article {url}: {str(e)}")
|
l.error(f"Failed to fetch and parse article {url}: {str(e)}")
|
||||||
raise HTTPException(status_code=500, detail=str(e))
|
raise HTTPException(status_code=500, detail=str(e))
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -17,28 +17,25 @@ from dateutil.parser import parse as dateutil_parse
|
||||||
from fastapi import HTTPException, status
|
from fastapi import HTTPException, status
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from fastapi import APIRouter, Query, HTTPException
|
from fastapi import APIRouter, Query, HTTPException
|
||||||
from sijapi import API, L, OBSIDIAN_VAULT_DIR, OBSIDIAN_RESOURCES_DIR, OBSIDIAN_BANNER_SCENE, GEO
|
from sijapi import Sys, OBSIDIAN_VAULT_DIR, OBSIDIAN_RESOURCES_DIR, OBSIDIAN_BANNER_SCENE, GEO
|
||||||
from sijapi.routers import asr, cal, gis, img, llm, serve, timing, tts, weather
|
from sijapi.routers import asr, cal, gis, img, llm, serve, timing, tts, weather
|
||||||
from sijapi.utilities import assemble_journal_path, convert_to_12_hour_format, sanitize_filename, convert_degrees_to_cardinal, check_file_name, HOURLY_COLUMNS_MAPPING
|
from sijapi.utilities import assemble_journal_path, convert_to_12_hour_format, sanitize_filename, convert_degrees_to_cardinal, check_file_name, HOURLY_COLUMNS_MAPPING
|
||||||
from sijapi.classes import Location
|
from sijapi.classes import Location
|
||||||
|
from sijapi.logs import get_logger
|
||||||
|
|
||||||
|
l = get_logger(__name__)
|
||||||
|
|
||||||
note = APIRouter()
|
note = APIRouter()
|
||||||
logger = L.get_module_logger("note")
|
|
||||||
def debug(text: str): logger.debug(text)
|
|
||||||
def info(text: str): logger.info(text)
|
|
||||||
def warn(text: str): logger.warning(text)
|
|
||||||
def err(text: str): logger.error(text)
|
|
||||||
def crit(text: str): logger.critical(text)
|
|
||||||
|
|
||||||
@note.post("/note/add")
|
@note.post("/note/add")
|
||||||
async def note_add_endpoint(file: Optional[UploadFile] = File(None), text: Optional[str] = Form(None), source: Optional[str] = Form(None), bg_tasks: BackgroundTasks = None):
|
async def note_add_endpoint(file: Optional[UploadFile] = File(None), text: Optional[str] = Form(None), source: Optional[str] = Form(None), bg_tasks: BackgroundTasks = None):
|
||||||
debug(f"Received request on /note/add...")
|
l.debug(f"Received request on /note/add...")
|
||||||
if not file and not text:
|
if not file and not text:
|
||||||
warn(f"... without any file or text!")
|
l.warning(f"... without any file or text!")
|
||||||
raise HTTPException(status_code=400, detail="Either text or a file must be provided")
|
raise HTTPException(status_code=400, detail="Either text or a file must be provided")
|
||||||
else:
|
else:
|
||||||
result = await process_for_daily_note(file, text, source, bg_tasks)
|
result = await process_for_daily_note(file, text, source, bg_tasks)
|
||||||
info(f"Result on /note/add: {result}")
|
l.info(f"Result on /note/add: {result}")
|
||||||
return JSONResponse({"message": "Note added successfully", "entry": result}, status_code=201)
|
return JSONResponse({"message": "Note added successfully", "entry": result}, status_code=201)
|
||||||
|
|
||||||
|
|
||||||
|
@ -47,7 +44,7 @@ async def process_for_daily_note(file: Optional[UploadFile] = File(None), text:
|
||||||
transcription_entry = ""
|
transcription_entry = ""
|
||||||
file_entry = ""
|
file_entry = ""
|
||||||
if file:
|
if file:
|
||||||
debug("File received...")
|
l.debug("File received...")
|
||||||
file_content = await file.read()
|
file_content = await file.read()
|
||||||
audio_io = BytesIO(file_content)
|
audio_io = BytesIO(file_content)
|
||||||
|
|
||||||
|
@ -55,18 +52,18 @@ async def process_for_daily_note(file: Optional[UploadFile] = File(None), text:
|
||||||
guessed_type = mimetypes.guess_type(file.filename)
|
guessed_type = mimetypes.guess_type(file.filename)
|
||||||
file_type = guessed_type[0] if guessed_type[0] else "application/octet-stream"
|
file_type = guessed_type[0] if guessed_type[0] else "application/octet-stream"
|
||||||
|
|
||||||
debug(f"Processing as {file_type}...")
|
l.debug(f"Processing as {file_type}...")
|
||||||
|
|
||||||
# Extract the main type (e.g., 'audio', 'image', 'video')
|
# Extract the main type (e.g., 'audio', 'image', 'video')
|
||||||
main_type = file_type.split('/')[0]
|
main_type = file_type.split('/')[0]
|
||||||
subdir = main_type.title() if main_type else "Documents"
|
subdir = main_type.title() if main_type else "Documents"
|
||||||
|
|
||||||
absolute_path, relative_path = assemble_journal_path(now, subdir=subdir, filename=file.filename)
|
absolute_path, relative_path = assemble_journal_path(now, subdir=subdir, filename=file.filename)
|
||||||
debug(f"Destination path: {absolute_path}")
|
l.debug(f"Destination path: {absolute_path}")
|
||||||
|
|
||||||
with open(absolute_path, 'wb') as f:
|
with open(absolute_path, 'wb') as f:
|
||||||
f.write(file_content)
|
f.write(file_content)
|
||||||
debug(f"Processing {f.name}...")
|
l.debug(f"Processing {f.name}...")
|
||||||
|
|
||||||
if main_type == 'audio':
|
if main_type == 'audio':
|
||||||
transcription = await asr.transcribe_audio(file_path=absolute_path, params=asr.TranscribeParams(model="small-en", language="en", threads=6))
|
transcription = await asr.transcribe_audio(file_path=absolute_path, params=asr.TranscribeParams(model="small-en", language="en", threads=6))
|
||||||
|
@ -77,7 +74,7 @@ async def process_for_daily_note(file: Optional[UploadFile] = File(None), text:
|
||||||
file_entry = f"[Source]({relative_path})"
|
file_entry = f"[Source]({relative_path})"
|
||||||
|
|
||||||
text_entry = text if text else ""
|
text_entry = text if text else ""
|
||||||
debug(f"transcription: {transcription_entry}\nfile_entry: {file_entry}\ntext_entry: {text_entry}")
|
l.debug(f"transcription: {transcription_entry}\nfile_entry: {file_entry}\ntext_entry: {text_entry}")
|
||||||
return await add_to_daily_note(transcription_entry, file_entry, text_entry, now)
|
return await add_to_daily_note(transcription_entry, file_entry, text_entry, now)
|
||||||
|
|
||||||
|
|
||||||
|
@ -169,7 +166,7 @@ added: {timestamp}
|
||||||
obsidian_link = f"![[{OBSIDIAN_RESOURCES_DIR}/{audio_filename}{audio_ext}]]"
|
obsidian_link = f"![[{OBSIDIAN_RESOURCES_DIR}/{audio_filename}{audio_ext}]]"
|
||||||
body += f"{obsidian_link}\n\n"
|
body += f"{obsidian_link}\n\n"
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
err(f"Failed in the TTS portion of clipping: {e}")
|
l.error(f"Failed in the TTS portion of clipping: {e}")
|
||||||
|
|
||||||
body += f"> [!summary]+\n"
|
body += f"> [!summary]+\n"
|
||||||
body += f"> {summary}\n\n"
|
body += f"> {summary}\n\n"
|
||||||
|
@ -182,12 +179,12 @@ added: {timestamp}
|
||||||
with open(markdown_filename, 'w', encoding=encoding) as md_file:
|
with open(markdown_filename, 'w', encoding=encoding) as md_file:
|
||||||
md_file.write(markdown_content)
|
md_file.write(markdown_content)
|
||||||
|
|
||||||
info(f"Successfully saved to {markdown_filename}")
|
l.info(f"Successfully saved to {markdown_filename}")
|
||||||
|
|
||||||
return markdown_filename
|
return markdown_filename
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
err(f"Failed to clip: {str(e)}")
|
l.error(f"Failed to clip: {str(e)}")
|
||||||
raise HTTPException(status_code=500, detail=str(e))
|
raise HTTPException(status_code=500, detail=str(e))
|
||||||
|
|
||||||
|
|
||||||
|
@ -199,7 +196,7 @@ def list_and_correct_impermissible_files(root_dir, rename: bool = False):
|
||||||
if check_file_name(filename):
|
if check_file_name(filename):
|
||||||
file_path = Path(dirpath) / filename
|
file_path = Path(dirpath) / filename
|
||||||
impermissible_files.append(file_path)
|
impermissible_files.append(file_path)
|
||||||
debug(f"Impermissible file found: {file_path}")
|
l.debug(f"Impermissible file found: {file_path}")
|
||||||
|
|
||||||
# Sanitize the file name
|
# Sanitize the file name
|
||||||
new_filename = sanitize_filename(filename)
|
new_filename = sanitize_filename(filename)
|
||||||
|
@ -217,7 +214,7 @@ def list_and_correct_impermissible_files(root_dir, rename: bool = False):
|
||||||
# Rename the file
|
# Rename the file
|
||||||
if rename:
|
if rename:
|
||||||
os.rename(file_path, new_file_path)
|
os.rename(file_path, new_file_path)
|
||||||
debug(f"Renamed: {file_path} -> {new_file_path}")
|
l.debug(f"Renamed: {file_path} -> {new_file_path}")
|
||||||
|
|
||||||
return impermissible_files
|
return impermissible_files
|
||||||
|
|
||||||
|
@ -256,18 +253,18 @@ async def build_daily_note_getpoint():
|
||||||
path = await build_daily_note(date_time, loc.latitude, loc.longitude)
|
path = await build_daily_note(date_time, loc.latitude, loc.longitude)
|
||||||
path_str = str(path)
|
path_str = str(path)
|
||||||
|
|
||||||
info(f"Successfully created daily note at {path_str}")
|
l.info(f"Successfully created daily note at {path_str}")
|
||||||
return JSONResponse(content={"path": path_str}, status_code=200)
|
return JSONResponse(content={"path": path_str}, status_code=200)
|
||||||
|
|
||||||
except ValueError as ve:
|
except ValueError as ve:
|
||||||
error_msg = f"Value Error in build_daily_note_getpoint: {str(ve)}"
|
error_msg = f"Value Error in build_daily_note_getpoint: {str(ve)}"
|
||||||
err(error_msg)
|
l.error(error_msg)
|
||||||
raise HTTPException(status_code=400, detail=error_msg)
|
raise HTTPException(status_code=400, detail=error_msg)
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
error_msg = f"Unexpected error in build_daily_note_getpoint: {str(e)}"
|
error_msg = f"Unexpected error in build_daily_note_getpoint: {str(e)}"
|
||||||
err(error_msg)
|
l.error(error_msg)
|
||||||
err(f"Traceback: {traceback.format_exc()}")
|
l.error(f"Traceback: {traceback.format_exc()}")
|
||||||
raise HTTPException(status_code=500, detail="An unexpected error occurred")
|
raise HTTPException(status_code=500, detail="An unexpected error occurred")
|
||||||
|
|
||||||
|
|
||||||
|
@ -287,7 +284,7 @@ async def build_daily_note_endpoint(
|
||||||
else:
|
else:
|
||||||
raise ValueError("Location is not provided or invalid.")
|
raise ValueError("Location is not provided or invalid.")
|
||||||
except (ValueError, AttributeError, TypeError) as e:
|
except (ValueError, AttributeError, TypeError) as e:
|
||||||
warn(f"Falling back to localized datetime due to error: {e}")
|
l.warning(f"Falling back to localized datetime due to error: {e}")
|
||||||
try:
|
try:
|
||||||
date_time = await gis.dt(date_str)
|
date_time = await gis.dt(date_str)
|
||||||
places = await gis.fetch_locations(date_time)
|
places = await gis.fetch_locations(date_time)
|
||||||
|
@ -307,7 +304,7 @@ async def build_daily_note(date_time: dt_datetime, lat: float = None, lon: float
|
||||||
Obsidian helper. Takes a datetime and creates a new daily note. Note: it uses the sijapi configuration file to place the daily note and does NOT presently interface with Obsidian's daily note or periodic notes extensions. It is your responsibility to ensure they match.
|
Obsidian helper. Takes a datetime and creates a new daily note. Note: it uses the sijapi configuration file to place the daily note and does NOT presently interface with Obsidian's daily note or periodic notes extensions. It is your responsibility to ensure they match.
|
||||||
'''
|
'''
|
||||||
absolute_path, _ = assemble_journal_path(date_time)
|
absolute_path, _ = assemble_journal_path(date_time)
|
||||||
debug(f"Using {date_time.strftime('%Y-%m-%d %H:%M:%S')} as our datetime in build_daily_note.")
|
l.debug(f"Using {date_time.strftime('%Y-%m-%d %H:%M:%S')} as our datetime in build_daily_note.")
|
||||||
formatted_day = date_time.strftime("%A %B %d, %Y") # Monday May 27, 2024 formatting
|
formatted_day = date_time.strftime("%A %B %d, %Y") # Monday May 27, 2024 formatting
|
||||||
day_before = (date_time - timedelta(days=1)).strftime("%Y-%m-%d %A") # 2024-05-26 Sunday formatting
|
day_before = (date_time - timedelta(days=1)).strftime("%Y-%m-%d %A") # 2024-05-26 Sunday formatting
|
||||||
day_after = (date_time + timedelta(days=1)).strftime("%Y-%m-%d %A") # 2024-05-28 Tuesday formatting
|
day_after = (date_time + timedelta(days=1)).strftime("%Y-%m-%d %A") # 2024-05-28 Tuesday formatting
|
||||||
|
@ -396,7 +393,7 @@ async def update_frontmatter_endpoint(date: str, key: str, value: str):
|
||||||
async def update_frontmatter(date_time: dt_datetime, key: str, value: str):
|
async def update_frontmatter(date_time: dt_datetime, key: str, value: str):
|
||||||
file_path, relative_path = assemble_journal_path(date_time)
|
file_path, relative_path = assemble_journal_path(date_time)
|
||||||
if not file_path.exists():
|
if not file_path.exists():
|
||||||
crit(f"Markdown file not found at {file_path}")
|
l.critical(f"Markdown file not found at {file_path}")
|
||||||
raise HTTPException(status_code=404, detail="Markdown file not found.")
|
raise HTTPException(status_code=404, detail="Markdown file not found.")
|
||||||
|
|
||||||
with open(file_path, "r", encoding="utf-8") as file:
|
with open(file_path, "r", encoding="utf-8") as file:
|
||||||
|
@ -430,9 +427,9 @@ async def banner_endpoint(dt: str, location: str = None, forecast: str = None, m
|
||||||
'''
|
'''
|
||||||
Endpoint (POST) that generates a new banner image for the Obsidian daily note for a specified date, taking into account optional additional information, then updates the frontmatter if necessary.
|
Endpoint (POST) that generates a new banner image for the Obsidian daily note for a specified date, taking into account optional additional information, then updates the frontmatter if necessary.
|
||||||
'''
|
'''
|
||||||
debug(f"banner_endpoint requested with date: {dt} ({type(dt)})")
|
l.debug(f"banner_endpoint requested with date: {dt} ({type(dt)})")
|
||||||
date_time = await gis.dt(dt)
|
date_time = await gis.dt(dt)
|
||||||
debug(f"date_time after localization: {date_time} ({type(date_time)})")
|
l.debug(f"date_time after localization: {date_time} ({type(date_time)})")
|
||||||
context = await generate_context(dt, location, forecast, mood, other_context)
|
context = await generate_context(dt, location, forecast, mood, other_context)
|
||||||
jpg_path = await generate_banner(date_time, location, mood=mood, other_context=other_context)
|
jpg_path = await generate_banner(date_time, location, mood=mood, other_context=other_context)
|
||||||
return jpg_path
|
return jpg_path
|
||||||
|
@ -449,10 +446,10 @@ async def generate_banner(dt, location: Location = None, forecast: str = None, m
|
||||||
forecast = await update_dn_weather(date_time, False, location.latitude, location.longitude)
|
forecast = await update_dn_weather(date_time, False, location.latitude, location.longitude)
|
||||||
|
|
||||||
prompt = await generate_context(date_time, location, forecast, mood, other_context)
|
prompt = await generate_context(date_time, location, forecast, mood, other_context)
|
||||||
debug(f"Prompt: {prompt}")
|
l.debug(f"Prompt: {prompt}")
|
||||||
final_path = await img.workflow(prompt, scene=OBSIDIAN_BANNER_SCENE, destination_path=destination_path)
|
final_path = await img.workflow(prompt, scene=OBSIDIAN_BANNER_SCENE, destination_path=destination_path)
|
||||||
if not str(local_path) in str(final_path):
|
if not str(local_path) in str(final_path):
|
||||||
info(f"Apparent mismatch between local path, {local_path}, and final_path, {final_path}")
|
l.info(f"Apparent mismatch between local path, {local_path}, and final_path, {final_path}")
|
||||||
jpg_embed = f"\"![[{local_path}]]\""
|
jpg_embed = f"\"![[{local_path}]]\""
|
||||||
await update_frontmatter(date_time, "banner", jpg_embed)
|
await update_frontmatter(date_time, "banner", jpg_embed)
|
||||||
return local_path
|
return local_path
|
||||||
|
@ -481,7 +478,7 @@ async def generate_context(date_time, location: Location, forecast: str, mood: s
|
||||||
if geocoded_location.display_name or geocoded_location.city or geocoded_location.country:
|
if geocoded_location.display_name or geocoded_location.city or geocoded_location.country:
|
||||||
return await generate_context(date_time, geocoded_location, forecast, mood, other_context)
|
return await generate_context(date_time, geocoded_location, forecast, mood, other_context)
|
||||||
else:
|
else:
|
||||||
warn(f"Failed to get a useable location for purposes of generating a banner, but we'll generate one anyway.")
|
l.warning(f"Failed to get a useable location for purposes of generating a banner, but we'll generate one anyway.")
|
||||||
elif location and isinstance(location, str):
|
elif location and isinstance(location, str):
|
||||||
display_name = f"Location: {location}\n"
|
display_name = f"Location: {location}\n"
|
||||||
else:
|
else:
|
||||||
|
@ -549,8 +546,8 @@ async def note_weather_get(
|
||||||
force_refresh_weather = refresh == "True"
|
force_refresh_weather = refresh == "True"
|
||||||
try:
|
try:
|
||||||
date_time = dt_datetime.now() if date == "0" else await gis.dt(date)
|
date_time = dt_datetime.now() if date == "0" else await gis.dt(date)
|
||||||
debug(f"Using {date_time.strftime('%Y-%m-%d %H:%M:%S')} as our dt_datetime in note_weather_get.")
|
l.debug(f"Using {date_time.strftime('%Y-%m-%d %H:%M:%S')} as our dt_datetime in note_weather_get.")
|
||||||
debug(f"date: {date} .. date_time: {date_time}")
|
l.debug(f"date: {date} .. date_time: {date_time}")
|
||||||
content = await update_dn_weather(date_time, force_refresh_weather) #, lat, lon)
|
content = await update_dn_weather(date_time, force_refresh_weather) #, lat, lon)
|
||||||
return JSONResponse(content={"forecast": content}, status_code=200)
|
return JSONResponse(content={"forecast": content}, status_code=200)
|
||||||
|
|
||||||
|
@ -558,68 +555,68 @@ async def note_weather_get(
|
||||||
return JSONResponse(content={"detail": str(e.detail)}, status_code=e.status_code)
|
return JSONResponse(content={"detail": str(e.detail)}, status_code=e.status_code)
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
err(f"Error in note_weather_get: {str(e)}")
|
l.error(f"Error in note_weather_get: {str(e)}")
|
||||||
raise HTTPException(status_code=500, detail=f"An error occurred: {str(e)}")
|
raise HTTPException(status_code=500, detail=f"An error occurred: {str(e)}")
|
||||||
|
|
||||||
|
|
||||||
@note.post("/update/note/{date}")
|
@note.post("/update/note/{date}")
|
||||||
async def post_update_daily_weather_and_calendar_and_timeslips(date: str, refresh: str="False") -> PlainTextResponse:
|
async def post_update_daily_weather_and_calendar_and_timeslips(date: str, refresh: str="False") -> PlainTextResponse:
|
||||||
date_time = await gis.dt(date)
|
date_time = await gis.dt(date)
|
||||||
debug(f"Using {date_time.strftime('%Y-%m-%d %H:%M:%S')} as our dt_datetime in post_update_daily_weather_and_calendar_and_timeslips.")
|
l.debug(f"Using {date_time.strftime('%Y-%m-%d %H:%M:%S')} as our dt_datetime in post_update_daily_weather_and_calendar_and_timeslips.")
|
||||||
force_refresh_weather = refresh == "True"
|
force_refresh_weather = refresh == "True"
|
||||||
await update_dn_weather(date_time, force_refresh_weather)
|
await update_dn_weather(date_time, force_refresh_weather)
|
||||||
await update_daily_note_events(date_time)
|
await update_daily_note_events(date_time)
|
||||||
await build_daily_timeslips(date_time)
|
await build_daily_timeslips(date_time)
|
||||||
return f"[Refresh]({API.URL}/update/note/{date_time.strftime('%Y-%m-%d')}"
|
return f"[Refresh]({Sys.URL}/update/note/{date_time.strftime('%Y-%m-%d')}"
|
||||||
|
|
||||||
|
|
||||||
async def update_dn_weather(date_time: dt_datetime, force_refresh: bool = False, lat: float = None, lon: float = None):
|
async def update_dn_weather(date_time: dt_datetime, force_refresh: bool = False, lat: float = None, lon: float = None):
|
||||||
debug(f"Using {date_time.strftime('%Y-%m-%d %H:%M:%S')} as our datetime in update_dn_weather.")
|
l.debug(f"Using {date_time.strftime('%Y-%m-%d %H:%M:%S')} as our datetime in update_dn_weather.")
|
||||||
try:
|
try:
|
||||||
if lat and lon:
|
if lat and lon:
|
||||||
place = await GEO.code((lat, lon))
|
place = await GEO.code((lat, lon))
|
||||||
|
|
||||||
else:
|
else:
|
||||||
debug(f"Updating weather for {date_time}")
|
l.debug(f"Updating weather for {date_time}")
|
||||||
places = await gis.fetch_locations(date_time)
|
places = await gis.fetch_locations(date_time)
|
||||||
place = places[0]
|
place = places[0]
|
||||||
lat = place.latitude
|
lat = place.latitude
|
||||||
lon = place.longitude
|
lon = place.longitude
|
||||||
|
|
||||||
debug(f"lat: {lat}, lon: {lon}, place: {place}")
|
l.debug(f"lat: {lat}, lon: {lon}, place: {place}")
|
||||||
city = GEO.find_override_location(lat, lon)
|
city = GEO.find_override_location(lat, lon)
|
||||||
if city:
|
if city:
|
||||||
info(f"Using override location: {city}")
|
l.info(f"Using override location: {city}")
|
||||||
|
|
||||||
else:
|
else:
|
||||||
if place.city and place.city != "":
|
if place.city and place.city != "":
|
||||||
city = place.city
|
city = place.city
|
||||||
info(f"City in data: {city}")
|
l.info(f"City in data: {city}")
|
||||||
|
|
||||||
else:
|
else:
|
||||||
location = await GEO.code((lat, lon))
|
location = await GEO.code((lat, lon))
|
||||||
debug(f"location: {location}")
|
l.debug(f"location: {location}")
|
||||||
city = location.name
|
city = location.name
|
||||||
city = city if city else location.city
|
city = city if city else location.city
|
||||||
city = city if city else location.house_number + ' ' + location.road
|
city = city if city else location.house_number + ' ' + location.road
|
||||||
|
|
||||||
debug(f"City geocoded: {city}")
|
l.debug(f"City geocoded: {city}")
|
||||||
|
|
||||||
# Assemble journal path
|
# Assemble journal path
|
||||||
absolute_path, relative_path = assemble_journal_path(date_time, filename="Weather", extension=".md", no_timestamp = True)
|
absolute_path, relative_path = assemble_journal_path(date_time, filename="Weather", extension=".md", no_timestamp = True)
|
||||||
debug(f"Journal path: absolute_path={absolute_path}, relative_path={relative_path}")
|
l.debug(f"Journal path: absolute_path={absolute_path}, relative_path={relative_path}")
|
||||||
|
|
||||||
try:
|
try:
|
||||||
debug(f"passing date_time {date_time.strftime('%Y-%m-%d %H:%M:%S')}, {lat}/{lon} into get_weather")
|
l.debug(f"passing date_time {date_time.strftime('%Y-%m-%d %H:%M:%S')}, {lat}/{lon} into get_weather")
|
||||||
day = await weather.get_weather(date_time, lat, lon, force_refresh)
|
day = await weather.get_weather(date_time, lat, lon, force_refresh)
|
||||||
debug(f"day information obtained from get_weather: {day}")
|
l.debug(f"day information obtained from get_weather: {day}")
|
||||||
if day:
|
if day:
|
||||||
DailyWeather = day.get('DailyWeather')
|
DailyWeather = day.get('DailyWeather')
|
||||||
HourlyWeather = day.get('HourlyWeather')
|
HourlyWeather = day.get('HourlyWeather')
|
||||||
if DailyWeather:
|
if DailyWeather:
|
||||||
# debug(f"Day: {DailyWeather}")
|
# l.debug(f"Day: {DailyWeather}")
|
||||||
icon = DailyWeather.get('icon')
|
icon = DailyWeather.get('icon')
|
||||||
debug(f"Icon: {icon}")
|
l.debug(f"Icon: {icon}")
|
||||||
|
|
||||||
weather_icon, admonition = get_icon_and_admonition(icon) if icon else (":LiSunMoon:", "ad-weather")
|
weather_icon, admonition = get_icon_and_admonition(icon) if icon else (":LiSunMoon:", "ad-weather")
|
||||||
|
|
||||||
|
@ -688,38 +685,38 @@ async def update_dn_weather(date_time: dt_datetime, force_refresh: bool = False,
|
||||||
detailed_forecast += assemble_hourly_data_table(times, condition_symbols, temps, winds)
|
detailed_forecast += assemble_hourly_data_table(times, condition_symbols, temps, winds)
|
||||||
detailed_forecast += f"```\n\n"
|
detailed_forecast += f"```\n\n"
|
||||||
|
|
||||||
debug(f"Detailed forecast: {detailed_forecast}.")
|
l.debug(f"Detailed forecast: {detailed_forecast}.")
|
||||||
|
|
||||||
with open(absolute_path, 'w', encoding='utf-8') as note_file:
|
with open(absolute_path, 'w', encoding='utf-8') as note_file:
|
||||||
note_file.write(detailed_forecast)
|
note_file.write(detailed_forecast)
|
||||||
|
|
||||||
debug(f"Operation complete.")
|
l.debug(f"Operation complete.")
|
||||||
|
|
||||||
return narrative
|
return narrative
|
||||||
else:
|
else:
|
||||||
err(f"Failed to get DailyWeather from day: {day}")
|
l.error(f"Failed to get DailyWeather from day: {day}")
|
||||||
else:
|
else:
|
||||||
err(f"Failed to get day")
|
l.error(f"Failed to get day")
|
||||||
raise HTTPException(status_code=500, detail="Failed to retrieve weather data")
|
raise HTTPException(status_code=500, detail="Failed to retrieve weather data")
|
||||||
|
|
||||||
except HTTPException as e:
|
except HTTPException as e:
|
||||||
err(f"HTTP error: {e}")
|
l.error(f"HTTP error: {e}")
|
||||||
err(traceback.format_exc())
|
l.error(traceback.format_exc())
|
||||||
raise e
|
raise e
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
err(f"Error: {e}")
|
l.error(f"Error: {e}")
|
||||||
err(traceback.format_exc())
|
l.error(traceback.format_exc())
|
||||||
raise HTTPException(status_code=999, detail=f"Error: {e}")
|
raise HTTPException(status_code=999, detail=f"Error: {e}")
|
||||||
|
|
||||||
except ValueError as ve:
|
except ValueError as ve:
|
||||||
err(f"Value error in update_dn_weather: {str(ve)}")
|
l.error(f"Value error in update_dn_weather: {str(ve)}")
|
||||||
err(traceback.format_exc())
|
l.error(traceback.format_exc())
|
||||||
raise HTTPException(status_code=400, detail=f"Value error: {str(ve)}")
|
raise HTTPException(status_code=400, detail=f"Value error: {str(ve)}")
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
err(f"Error in update_dn_weather: {str(e)}")
|
l.error(f"Error in update_dn_weather: {str(e)}")
|
||||||
err(traceback.format_exc())
|
l.error(traceback.format_exc())
|
||||||
raise HTTPException(status_code=500, detail=f"Error in update_dn_weather: {str(e)}")
|
raise HTTPException(status_code=500, detail=f"Error in update_dn_weather: {str(e)}")
|
||||||
|
|
||||||
|
|
||||||
|
@ -728,8 +725,8 @@ def format_hourly_time(hour):
|
||||||
hour_12 = convert_to_12_hour_format(hour.get("datetime"))
|
hour_12 = convert_to_12_hour_format(hour.get("datetime"))
|
||||||
return hour_12
|
return hour_12
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
err(f"Error in format_hourly_time: {str(e)}")
|
l.error(f"Error in format_hourly_time: {str(e)}")
|
||||||
err(traceback.format_exc())
|
l.error(traceback.format_exc())
|
||||||
return ""
|
return ""
|
||||||
|
|
||||||
|
|
||||||
|
@ -740,7 +737,7 @@ def format_hourly_icon(hour, sunrise, sunset):
|
||||||
|
|
||||||
precip = hour.get('precip', float(0.0))
|
precip = hour.get('precip', float(0.0))
|
||||||
precip_prob = hour.get('precipprob', float(0.0))
|
precip_prob = hour.get('precipprob', float(0.0))
|
||||||
debug(f"precip: {precip}, prob: {precip_prob}")
|
l.debug(f"precip: {precip}, prob: {precip_prob}")
|
||||||
|
|
||||||
sp_str = None
|
sp_str = None
|
||||||
|
|
||||||
|
@ -764,8 +761,8 @@ def format_hourly_icon(hour, sunrise, sunset):
|
||||||
return formatted
|
return formatted
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
err(f"Error in format_hourly_special: {str(e)}")
|
l.error(f"Error in format_hourly_special: {str(e)}")
|
||||||
err(traceback.format_exc())
|
l.error(traceback.format_exc())
|
||||||
return ""
|
return ""
|
||||||
|
|
||||||
|
|
||||||
|
@ -774,8 +771,8 @@ def format_hourly_temperature(hour):
|
||||||
temp_str = f"{hour.get('temp', '')}˚ F"
|
temp_str = f"{hour.get('temp', '')}˚ F"
|
||||||
return temp_str
|
return temp_str
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
err(f"Error in format_hourly_temperature: {str(e)}")
|
l.error(f"Error in format_hourly_temperature: {str(e)}")
|
||||||
err(traceback.format_exc())
|
l.error(traceback.format_exc())
|
||||||
return ""
|
return ""
|
||||||
|
|
||||||
|
|
||||||
|
@ -786,8 +783,8 @@ def format_hourly_wind(hour):
|
||||||
wind_str = f"{str(windspeed)}:LiWind: {winddir}"
|
wind_str = f"{str(windspeed)}:LiWind: {winddir}"
|
||||||
return wind_str
|
return wind_str
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
err(f"Error in format_hourly_wind: {str(e)}")
|
l.error(f"Error in format_hourly_wind: {str(e)}")
|
||||||
err(traceback.format_exc())
|
l.error(traceback.format_exc())
|
||||||
return ""
|
return ""
|
||||||
|
|
||||||
def assemble_hourly_data_table(times, condition_symbols, temps, winds):
|
def assemble_hourly_data_table(times, condition_symbols, temps, winds):
|
||||||
|
@ -800,7 +797,7 @@ def assemble_hourly_data_table(times, condition_symbols, temps, winds):
|
||||||
|
|
||||||
|
|
||||||
def get_icon_and_admonition(icon_str) -> Tuple:
|
def get_icon_and_admonition(icon_str) -> Tuple:
|
||||||
debug(f"Received request for emoji {icon_str}")
|
l.debug(f"Received request for emoji {icon_str}")
|
||||||
if icon_str.startswith(":") and icon_str.endswith(":"):
|
if icon_str.startswith(":") and icon_str.endswith(":"):
|
||||||
return icon_str
|
return icon_str
|
||||||
|
|
||||||
|
@ -891,7 +888,7 @@ async def format_events_as_markdown(event_data: Dict[str, Union[str, List[Dict[s
|
||||||
total_events = len(event_data["events"])
|
total_events = len(event_data["events"])
|
||||||
event_markdown = f"```ad-events"
|
event_markdown = f"```ad-events"
|
||||||
for event in event_data["events"]:
|
for event in event_data["events"]:
|
||||||
debug(f"event busy status: {event['busy']}; all day status: {event['all_day']}")
|
l.debug(f"event busy status: {event['busy']}; all day status: {event['all_day']}")
|
||||||
if not event['name'].startswith('TC '):
|
if not event['name'].startswith('TC '):
|
||||||
url = f"hook://ical/eventID={event['uid']}calendarID=17"
|
url = f"hook://ical/eventID={event['uid']}calendarID=17"
|
||||||
if event['url']:
|
if event['url']:
|
||||||
|
@ -960,18 +957,18 @@ async def note_events_endpoint(date: str = Query(None)):
|
||||||
|
|
||||||
|
|
||||||
async def update_daily_note_events(date_time: dt_datetime):
|
async def update_daily_note_events(date_time: dt_datetime):
|
||||||
debug(f"Looking up events on date: {date_time.strftime('%Y-%m-%d')}")
|
l.debug(f"Looking up events on date: {date_time.strftime('%Y-%m-%d')}")
|
||||||
try:
|
try:
|
||||||
events = await cal.get_events(date_time, date_time)
|
events = await cal.get_events(date_time, date_time)
|
||||||
debug(f"Raw events: {events}")
|
l.debug(f"Raw events: {events}")
|
||||||
event_data = {
|
event_data = {
|
||||||
"date": date_time.strftime('%Y-%m-%d'),
|
"date": date_time.strftime('%Y-%m-%d'),
|
||||||
"events": events
|
"events": events
|
||||||
}
|
}
|
||||||
events_markdown = await format_events_as_markdown(event_data)
|
events_markdown = await format_events_as_markdown(event_data)
|
||||||
debug(f"Markdown events: {events_markdown}")
|
l.debug(f"Markdown events: {events_markdown}")
|
||||||
absolute_path, _ = assemble_journal_path(date_time, filename="Events", extension=".md", no_timestamp = True)
|
absolute_path, _ = assemble_journal_path(date_time, filename="Events", extension=".md", no_timestamp = True)
|
||||||
debug(f"Writing events to file: {absolute_path}")
|
l.debug(f"Writing events to file: {absolute_path}")
|
||||||
|
|
||||||
with open(absolute_path, 'w', encoding='utf-8') as note_file:
|
with open(absolute_path, 'w', encoding='utf-8') as note_file:
|
||||||
note_file.write(events_markdown)
|
note_file.write(events_markdown)
|
||||||
|
@ -979,5 +976,5 @@ async def update_daily_note_events(date_time: dt_datetime):
|
||||||
return events_markdown
|
return events_markdown
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
err(f"Error processing events: {e}")
|
l.error(f"Error processing events: {e}")
|
||||||
raise HTTPException(status_code=500, detail=str(e))
|
raise HTTPException(status_code=500, detail=str(e))
|
||||||
|
|
|
@ -5,15 +5,11 @@ NOTES: Haven't yet decided if this should depend on the Obsidian and Chat module
|
||||||
#routers/rag.py
|
#routers/rag.py
|
||||||
|
|
||||||
from fastapi import APIRouter
|
from fastapi import APIRouter
|
||||||
from sijapi import L
|
from sijapi.logs import get_logger
|
||||||
|
l = get_logger(__name__)
|
||||||
|
|
||||||
rag = APIRouter()
|
rag = APIRouter()
|
||||||
logger = L.get_module_logger("rag")
|
|
||||||
def debug(text: str): logger.debug(text)
|
|
||||||
def info(text: str): logger.info(text)
|
|
||||||
def warn(text: str): logger.warning(text)
|
|
||||||
def err(text: str): logger.error(text)
|
|
||||||
def crit(text: str): logger.critical(text)
|
|
||||||
|
|
||||||
rag.get("/rag/search")
|
rag.get("/rag/search")
|
||||||
async def rag_search_endpoint(query: str, scope: str):
|
async def rag_search_endpoint(query: str, scope: str):
|
||||||
|
|
|
@ -15,14 +15,9 @@ from bs4 import BeautifulSoup
|
||||||
from selenium import webdriver
|
from selenium import webdriver
|
||||||
from selenium.webdriver.chrome.options import Options
|
from selenium.webdriver.chrome.options import Options
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from sijapi import Scrape, L, Dir
|
from sijapi import Scrape,Dir
|
||||||
|
from sijapi.logs import get_logger
|
||||||
logger = L.get_module_logger('scrape')
|
l = get_logger(__name__)
|
||||||
def debug(text: str): logger.debug(text)
|
|
||||||
def info(text: str): logger.info(text)
|
|
||||||
def warn(text: str): logger.warning(text)
|
|
||||||
def err(text: str): logger.error(text)
|
|
||||||
def crit(text: str): logger.critical(text)
|
|
||||||
|
|
||||||
scrape = APIRouter()
|
scrape = APIRouter()
|
||||||
|
|
||||||
|
@ -31,24 +26,24 @@ Dir.DATA = Path(Dir.DATA).expanduser()
|
||||||
|
|
||||||
def save_to_json(data: List[Dict], output_file: str):
|
def save_to_json(data: List[Dict], output_file: str):
|
||||||
output_path = Dir.DATA / output_file
|
output_path = Dir.DATA / output_file
|
||||||
info(f"Saving data to {output_path}")
|
l.info(f"Saving data to {output_path}")
|
||||||
output_path.parent.mkdir(parents=True, exist_ok=True)
|
output_path.parent.mkdir(parents=True, exist_ok=True)
|
||||||
with open(output_path, 'w') as f:
|
with open(output_path, 'w') as f:
|
||||||
json.dump(data, f, indent=2)
|
json.dump(data, f, indent=2)
|
||||||
info(f"Data saved successfully to {output_path}")
|
l.info(f"Data saved successfully to {output_path}")
|
||||||
|
|
||||||
def load_from_json(output_file: str) -> List[Dict]:
|
def load_from_json(output_file: str) -> List[Dict]:
|
||||||
output_path = Dir.DATA / output_file
|
output_path = Dir.DATA / output_file
|
||||||
info(f"Loading data from {output_path}")
|
l.info(f"Loading data from {output_path}")
|
||||||
try:
|
try:
|
||||||
with open(output_path, 'r') as f:
|
with open(output_path, 'r') as f:
|
||||||
return json.load(f)
|
return json.load(f)
|
||||||
except FileNotFoundError:
|
except FileNotFoundError:
|
||||||
warn(f"File {output_path} not found")
|
l.warning(f"File {output_path} not found")
|
||||||
return []
|
return []
|
||||||
|
|
||||||
async def fetch_content(config: Any) -> str:
|
async def fetch_content(config: Any) -> str:
|
||||||
info(f"Fetching content from {config.url}")
|
l.info(f"Fetching content from {config.url}")
|
||||||
if config.content.js_render:
|
if config.content.js_render:
|
||||||
return await fetch_with_selenium(config.url)
|
return await fetch_with_selenium(config.url)
|
||||||
|
|
||||||
|
@ -63,7 +58,7 @@ async def fetch_content(config: Any) -> str:
|
||||||
elif config.content.type == 'txt':
|
elif config.content.type == 'txt':
|
||||||
return await response.text()
|
return await response.text()
|
||||||
else:
|
else:
|
||||||
warn(f"Unsupported content type: {config.content.type}")
|
l.warning(f"Unsupported content type: {config.content.type}")
|
||||||
return await response.text()
|
return await response.text()
|
||||||
|
|
||||||
async def fetch_with_selenium(url: str) -> str:
|
async def fetch_with_selenium(url: str) -> str:
|
||||||
|
@ -92,7 +87,7 @@ async def handle_json(response):
|
||||||
return await response.json()
|
return await response.json()
|
||||||
|
|
||||||
def apply_processing_step(data: Any, step: Any) -> Any:
|
def apply_processing_step(data: Any, step: Any) -> Any:
|
||||||
info(f"Applying processing step: {step.type}")
|
l.info(f"Applying processing step: {step.type}")
|
||||||
if step.type == 'regex_split':
|
if step.type == 'regex_split':
|
||||||
return re.split(step.pattern, data)[1:]
|
return re.split(step.pattern, data)[1:]
|
||||||
elif step.type == 'keyword_filter':
|
elif step.type == 'keyword_filter':
|
||||||
|
@ -101,11 +96,11 @@ def apply_processing_step(data: Any, step: Any) -> Any:
|
||||||
if isinstance(data, list):
|
if isinstance(data, list):
|
||||||
return [apply_regex_extract(item, step.extractions) for item in data]
|
return [apply_regex_extract(item, step.extractions) for item in data]
|
||||||
return apply_regex_extract(data, step.extractions)
|
return apply_regex_extract(data, step.extractions)
|
||||||
debug(f"Unknown processing step type: {step.type}")
|
l.debug(f"Unknown processing step type: {step.type}")
|
||||||
return data
|
return data
|
||||||
|
|
||||||
def apply_regex_extract(text: str, extractions: List[Any]) -> Dict:
|
def apply_regex_extract(text: str, extractions: List[Any]) -> Dict:
|
||||||
debug(f"Applying regex extraction on text of length {len(text)}")
|
l.debug(f"Applying regex extraction on text of length {len(text)}")
|
||||||
result = {}
|
result = {}
|
||||||
for extraction in extractions:
|
for extraction in extractions:
|
||||||
extraction_dict = extraction.dict() if hasattr(extraction, 'dict') else extraction
|
extraction_dict = extraction.dict() if hasattr(extraction, 'dict') else extraction
|
||||||
|
@ -122,11 +117,11 @@ def apply_regex_extract(text: str, extractions: List[Any]) -> Dict:
|
||||||
else:
|
else:
|
||||||
result[extraction_dict['name']] = matches[-1].strip() # Take the last match
|
result[extraction_dict['name']] = matches[-1].strip() # Take the last match
|
||||||
|
|
||||||
debug(f"Extracted {len(result)} items")
|
l.debug(f"Extracted {len(result)} items")
|
||||||
return result
|
return result
|
||||||
|
|
||||||
def apply_post_processing(data: List[Dict], post_processing: List[Any]) -> List[Dict]:
|
def apply_post_processing(data: List[Dict], post_processing: List[Any]) -> List[Dict]:
|
||||||
info("Applying post-processing steps")
|
l.info("Applying post-processing steps")
|
||||||
for step in post_processing:
|
for step in post_processing:
|
||||||
if step.type == 'custom':
|
if step.type == 'custom':
|
||||||
data = globals()[step.function](data)
|
data = globals()[step.function](data)
|
||||||
|
@ -137,7 +132,7 @@ def data_has_changed(new_data: List[Dict], old_data: List[Dict]) -> bool:
|
||||||
|
|
||||||
@scrape.get("/scrape/{config_name}")
|
@scrape.get("/scrape/{config_name}")
|
||||||
async def scrape_site(config_name: str):
|
async def scrape_site(config_name: str):
|
||||||
info(f"Starting scrape operation for {config_name}")
|
l.info(f"Starting scrape operation for {config_name}")
|
||||||
|
|
||||||
if not hasattr(Scrape, 'configurations'):
|
if not hasattr(Scrape, 'configurations'):
|
||||||
# If 'configurations' doesn't exist, assume the entire Scrape object is the configuration
|
# If 'configurations' doesn't exist, assume the entire Scrape object is the configuration
|
||||||
|
@ -162,14 +157,14 @@ async def scrape_site(config_name: str):
|
||||||
|
|
||||||
if data_has_changed(processed_data, previous_data):
|
if data_has_changed(processed_data, previous_data):
|
||||||
save_to_json(processed_data, output_file)
|
save_to_json(processed_data, output_file)
|
||||||
info("Scrape completed with updates")
|
l.info("Scrape completed with updates")
|
||||||
return {"message": "Site updated", "data": processed_data}
|
return {"message": "Site updated", "data": processed_data}
|
||||||
else:
|
else:
|
||||||
info("Scrape completed with no updates")
|
l.info("Scrape completed with no updates")
|
||||||
return {"message": "No updates", "data": processed_data}
|
return {"message": "No updates", "data": processed_data}
|
||||||
|
|
||||||
def apply_post_processing(data: List[Dict], post_processing: List[Any]) -> List[Dict]:
|
def apply_post_processing(data: List[Dict], post_processing: List[Any]) -> List[Dict]:
|
||||||
info("Applying post-processing steps")
|
l.info("Applying post-processing steps")
|
||||||
for step in post_processing:
|
for step in post_processing:
|
||||||
if step.type == 'regex_extract':
|
if step.type == 'regex_extract':
|
||||||
for entry in data:
|
for entry in data:
|
||||||
|
|
|
@ -33,20 +33,15 @@ from selenium.webdriver.common.by import By
|
||||||
from selenium.webdriver.support.ui import WebDriverWait
|
from selenium.webdriver.support.ui import WebDriverWait
|
||||||
from selenium.webdriver.support import expected_conditions as EC
|
from selenium.webdriver.support import expected_conditions as EC
|
||||||
from sijapi import (
|
from sijapi import (
|
||||||
L, API, Serve, LOGS_DIR, TS_ID, CASETABLE_PATH, COURTLISTENER_DOCKETS_URL, COURTLISTENER_API_KEY,
|
Sys, Serve, Db, LOGS_DIR, TS_ID, CASETABLE_PATH, COURTLISTENER_DOCKETS_URL, COURTLISTENER_API_KEY,
|
||||||
COURTLISTENER_BASE_URL, COURTLISTENER_DOCKETS_DIR, COURTLISTENER_SEARCH_DIR, ALERTS_DIR,
|
COURTLISTENER_BASE_URL, COURTLISTENER_DOCKETS_DIR, COURTLISTENER_SEARCH_DIR, ALERTS_DIR,
|
||||||
MAC_UN, MAC_PW, MAC_ID, TS_TAILNET, IMG_DIR, PUBLIC_KEY, OBSIDIAN_VAULT_DIR
|
MAC_UN, MAC_PW, MAC_ID, TS_TAILNET, IMG_DIR, PUBLIC_KEY, OBSIDIAN_VAULT_DIR
|
||||||
)
|
)
|
||||||
from sijapi.classes import WidgetUpdate
|
from sijapi.classes import WidgetUpdate
|
||||||
from sijapi.utilities import bool_convert, sanitize_filename, assemble_journal_path
|
from sijapi.utilities import bool_convert, sanitize_filename, assemble_journal_path
|
||||||
from sijapi.routers import gis
|
from sijapi.routers import gis
|
||||||
|
from sijapi.logs import get_logger
|
||||||
logger = L.get_module_logger("serve")
|
l = get_logger(__name__)
|
||||||
def debug(text: str): logger.debug(text)
|
|
||||||
def info(text: str): logger.info(text)
|
|
||||||
def warn(text: str): logger.warning(text)
|
|
||||||
def err(text: str): logger.err(text)
|
|
||||||
def crit(text: str): logger.critical(text)
|
|
||||||
|
|
||||||
serve = APIRouter()
|
serve = APIRouter()
|
||||||
templates = Jinja2Templates(directory=Path(__file__).parent.parent / "sites")
|
templates = Jinja2Templates(directory=Path(__file__).parent.parent / "sites")
|
||||||
|
@ -85,13 +80,13 @@ async def get_file_endpoint(file_path: str):
|
||||||
date_time = await gis.dt(file_path);
|
date_time = await gis.dt(file_path);
|
||||||
absolute_path, local_path = assemble_journal_path(date_time, no_timestamp = True)
|
absolute_path, local_path = assemble_journal_path(date_time, no_timestamp = True)
|
||||||
except ValueError as e:
|
except ValueError as e:
|
||||||
debug(f"Unable to parse {file_path} as a date, now trying to use it as a local path")
|
l.debug(f"Unable to parse {file_path} as a date, now trying to use it as a local path")
|
||||||
absolute_path = OBSIDIAN_VAULT_DIR / file_path
|
absolute_path = OBSIDIAN_VAULT_DIR / file_path
|
||||||
if not absolute_path.suffix:
|
if not absolute_path.suffix:
|
||||||
absolute_path = Path(absolute_path.with_suffix(".md"))
|
absolute_path = Path(absolute_path.with_suffix(".md"))
|
||||||
|
|
||||||
if not absolute_path.is_file():
|
if not absolute_path.is_file():
|
||||||
warn(f"{absolute_path} is not a valid file it seems.")
|
l.warning(f"{absolute_path} is not a valid file it seems.")
|
||||||
elif absolute_path.suffix == '.md':
|
elif absolute_path.suffix == '.md':
|
||||||
try:
|
try:
|
||||||
with open(absolute_path, 'r', encoding='utf-8') as file:
|
with open(absolute_path, 'r', encoding='utf-8') as file:
|
||||||
|
@ -125,11 +120,11 @@ async def hook_alert(request: Request):
|
||||||
async def notify(alert: str):
|
async def notify(alert: str):
|
||||||
fail = True
|
fail = True
|
||||||
try:
|
try:
|
||||||
if API.EXTENSIONS.shellfish:
|
if Sys.EXTENSIONS.shellfish:
|
||||||
await notify_shellfish(alert)
|
await notify_shellfish(alert)
|
||||||
fail = False
|
fail = False
|
||||||
|
|
||||||
if API.EXTENSIONS.macnotify:
|
if Sys.EXTENSIONS.macnotify:
|
||||||
if TS_ID == MAC_ID:
|
if TS_ID == MAC_ID:
|
||||||
await notify_local(alert)
|
await notify_local(alert)
|
||||||
fail = False
|
fail = False
|
||||||
|
@ -140,10 +135,10 @@ async def notify(alert: str):
|
||||||
fail = True
|
fail = True
|
||||||
|
|
||||||
if fail == False:
|
if fail == False:
|
||||||
info(f"Delivered alert: {alert}")
|
l.info(f"Delivered alert: {alert}")
|
||||||
return {"message": alert}
|
return {"message": alert}
|
||||||
else:
|
else:
|
||||||
crit(f"Failed to deliver alert: {alert}")
|
l.critical(f"Failed to deliver alert: {alert}")
|
||||||
return {"message": f"Failed to deliver alert: {alert}"}
|
return {"message": f"Failed to deliver alert: {alert}"}
|
||||||
|
|
||||||
async def notify_local(message: str):
|
async def notify_local(message: str):
|
||||||
|
@ -165,7 +160,7 @@ async def notify_remote(host: str, message: str, username: str = None, password:
|
||||||
ssh.close()
|
ssh.close()
|
||||||
|
|
||||||
|
|
||||||
if API.EXTENSIONS.shellfish:
|
if Sys.EXTENSIONS.shellfish:
|
||||||
async def notify_shellfish(alert: str):
|
async def notify_shellfish(alert: str):
|
||||||
key = "d7e810e7601cd296a05776c169b4fe97a6a5ee1fd46abe38de54f415732b3f4b"
|
key = "d7e810e7601cd296a05776c169b4fe97a6a5ee1fd46abe38de54f415732b3f4b"
|
||||||
user = "WuqPwm1VpGijF4U5AnIKzqNMVWGioANTRjJoonPm"
|
user = "WuqPwm1VpGijF4U5AnIKzqNMVWGioANTRjJoonPm"
|
||||||
|
@ -250,14 +245,14 @@ if API.EXTENSIONS.shellfish:
|
||||||
return result.stdout
|
return result.stdout
|
||||||
|
|
||||||
|
|
||||||
if API.EXTENSIONS.courtlistener:
|
if Sys.EXTENSIONS.courtlistener:
|
||||||
with open(CASETABLE_PATH, 'r') as file:
|
with open(CASETABLE_PATH, 'r') as file:
|
||||||
CASETABLE = json.load(file)
|
CASETABLE = json.load(file)
|
||||||
|
|
||||||
@serve.post("/cl/search")
|
@serve.post("/cl/search")
|
||||||
async def hook_cl_search(request: Request, bg_tasks: BackgroundTasks):
|
async def hook_cl_search(request: Request, bg_tasks: BackgroundTasks):
|
||||||
client_ip = request.client.host
|
client_ip = request.client.host
|
||||||
debug(f"Received request from IP: {client_ip}")
|
l.debug(f"Received request from IP: {client_ip}")
|
||||||
data = await request.json()
|
data = await request.json()
|
||||||
payload = data['payload']
|
payload = data['payload']
|
||||||
results = data['payload']['results']
|
results = data['payload']['results']
|
||||||
|
@ -275,7 +270,7 @@ if API.EXTENSIONS.courtlistener:
|
||||||
@serve.post("/cl/docket")
|
@serve.post("/cl/docket")
|
||||||
async def hook_cl_docket(request: Request):
|
async def hook_cl_docket(request: Request):
|
||||||
client_ip = request.client.host
|
client_ip = request.client.host
|
||||||
debug(f"Received request from IP: {client_ip}")
|
l.debug(f"Received request from IP: {client_ip}")
|
||||||
data = await request.json()
|
data = await request.json()
|
||||||
await cl_docket(data, client_ip)
|
await cl_docket(data, client_ip)
|
||||||
|
|
||||||
|
@ -312,14 +307,14 @@ if API.EXTENSIONS.courtlistener:
|
||||||
async with aiohttp.ClientSession() as session:
|
async with aiohttp.ClientSession() as session:
|
||||||
async with session.get(url, headers=headers) as response:
|
async with session.get(url, headers=headers) as response:
|
||||||
if response.status == 200:
|
if response.status == 200:
|
||||||
debug(f"Fetching CourtListener docket information for {docket}...")
|
l.debug(f"Fetching CourtListener docket information for {docket}...")
|
||||||
data = await response.json()
|
data = await response.json()
|
||||||
court_docket = data['results'][0]['docket_number_core']
|
court_docket = data['results'][0]['docket_number_core']
|
||||||
court_docket = f"{court_docket[:2]}-cv-{court_docket[2:]}" # Formatting the docket number
|
court_docket = f"{court_docket[:2]}-cv-{court_docket[2:]}" # Formatting the docket number
|
||||||
case_name = data['results'][0]['case_name']
|
case_name = data['results'][0]['case_name']
|
||||||
debug(f"Obtained from CourtListener: docket {court_docket}, case name {case_name}.")
|
l.debug(f"Obtained from CourtListener: docket {court_docket}, case name {case_name}.")
|
||||||
else:
|
else:
|
||||||
debug("Failed to fetch data from CourtListener API.")
|
l.debug("Failed to fetch data from CourtListener API.")
|
||||||
court_docket = 'NoCourtDocket'
|
court_docket = 'NoCourtDocket'
|
||||||
case_name = 'NoCaseName'
|
case_name = 'NoCaseName'
|
||||||
|
|
||||||
|
@ -329,12 +324,12 @@ if API.EXTENSIONS.courtlistener:
|
||||||
|
|
||||||
if filepath_ia:
|
if filepath_ia:
|
||||||
file_url = filepath_ia
|
file_url = filepath_ia
|
||||||
debug(f"Found IA file at {file_url}.")
|
l.debug(f"Found IA file at {file_url}.")
|
||||||
elif filepath_local:
|
elif filepath_local:
|
||||||
file_url = f"{COURTLISTENER_BASE_URL}/{filepath_local}"
|
file_url = f"{COURTLISTENER_BASE_URL}/{filepath_local}"
|
||||||
debug(f"Found local file at {file_url}.")
|
l.debug(f"Found local file at {file_url}.")
|
||||||
else:
|
else:
|
||||||
debug(f"No file URL found in filepath_ia or filepath_local for one of the documents.")
|
l.debug(f"No file URL found in filepath_ia or filepath_local for one of the documents.")
|
||||||
continue
|
continue
|
||||||
|
|
||||||
document_number = document.get('document_number', 'NoDocumentNumber')
|
document_number = document.get('document_number', 'NoDocumentNumber')
|
||||||
|
@ -345,7 +340,7 @@ if API.EXTENSIONS.courtlistener:
|
||||||
target_path = Path(COURTLISTENER_DOCKETS_DIR) / case_shortname / "Docket" / file_name
|
target_path = Path(COURTLISTENER_DOCKETS_DIR) / case_shortname / "Docket" / file_name
|
||||||
target_path.parent.mkdir(parents=True, exist_ok=True)
|
target_path.parent.mkdir(parents=True, exist_ok=True)
|
||||||
await cl_download_file(file_url, target_path, session)
|
await cl_download_file(file_url, target_path, session)
|
||||||
debug(f"Downloaded {file_name} to {target_path}")
|
l.debug(f"Downloaded {file_name} to {target_path}")
|
||||||
|
|
||||||
|
|
||||||
def cl_case_details(docket):
|
def cl_case_details(docket):
|
||||||
|
@ -360,18 +355,18 @@ if API.EXTENSIONS.courtlistener:
|
||||||
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/93.0.4577.82 Safari/537.36'
|
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/93.0.4577.82 Safari/537.36'
|
||||||
}
|
}
|
||||||
async with aiohttp.ClientSession() as session:
|
async with aiohttp.ClientSession() as session:
|
||||||
debug(f"Attempting to download {url} to {path}.")
|
l.debug(f"Attempting to download {url} to {path}.")
|
||||||
try:
|
try:
|
||||||
async with session.get(url, headers=headers, allow_redirects=True) as response:
|
async with session.get(url, headers=headers, allow_redirects=True) as response:
|
||||||
if response.status == 403:
|
if response.status == 403:
|
||||||
err(f"Access denied (403 Forbidden) for URL: {url}. Skipping download.")
|
l.error(f"Access denied (403 Forbidden) for URL: {url}. Skipping download.")
|
||||||
return
|
return
|
||||||
response.raise_for_status()
|
response.raise_for_status()
|
||||||
|
|
||||||
# Check if the response content type is a PDF
|
# Check if the response content type is a PDF
|
||||||
content_type = response.headers.get('Content-Type')
|
content_type = response.headers.get('Content-Type')
|
||||||
if content_type != 'application/pdf':
|
if content_type != 'application/pdf':
|
||||||
err(f"Invalid content type: {content_type}. Skipping download.")
|
l.error(f"Invalid content type: {content_type}. Skipping download.")
|
||||||
return
|
return
|
||||||
|
|
||||||
# Create an in-memory buffer to store the downloaded content
|
# Create an in-memory buffer to store the downloaded content
|
||||||
|
@ -386,7 +381,7 @@ if API.EXTENSIONS.courtlistener:
|
||||||
try:
|
try:
|
||||||
PdfReader(buffer)
|
PdfReader(buffer)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
err(f"Invalid PDF content: {str(e)}. Skipping download.")
|
l.error(f"Invalid PDF content: {str(e)}. Skipping download.")
|
||||||
return
|
return
|
||||||
|
|
||||||
# If the PDF is valid, write the content to the file on disk
|
# If the PDF is valid, write the content to the file on disk
|
||||||
|
@ -395,7 +390,7 @@ if API.EXTENSIONS.courtlistener:
|
||||||
file.write(buffer.getvalue())
|
file.write(buffer.getvalue())
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
err(f"Error downloading file: {str(e)}")
|
l.error(f"Error downloading file: {str(e)}")
|
||||||
|
|
||||||
|
|
||||||
async def cl_search_process_result(result):
|
async def cl_search_process_result(result):
|
||||||
|
@ -404,7 +399,7 @@ if API.EXTENSIONS.courtlistener:
|
||||||
court_id = result.get('court_id')
|
court_id = result.get('court_id')
|
||||||
case_name_short = result.get('caseNameShort')
|
case_name_short = result.get('caseNameShort')
|
||||||
case_name = result.get('caseName')
|
case_name = result.get('caseName')
|
||||||
debug(f"Received payload for case {case_name} ({court_id}) and download url {download_url}")
|
l.debug(f"Received payload for case {case_name} ({court_id}) and download url {download_url}")
|
||||||
|
|
||||||
court_folder = court_id
|
court_folder = court_id
|
||||||
|
|
||||||
|
@ -418,9 +413,9 @@ if API.EXTENSIONS.courtlistener:
|
||||||
target_path.parent.mkdir(parents=True, exist_ok=True)
|
target_path.parent.mkdir(parents=True, exist_ok=True)
|
||||||
|
|
||||||
await cl_download_file(download_url, target_path, session)
|
await cl_download_file(download_url, target_path, session)
|
||||||
debug(f"Downloaded {file_name} to {target_path}")
|
l.debug(f"Downloaded {file_name} to {target_path}")
|
||||||
|
|
||||||
if API.EXTENSIONS.url_shortener:
|
if Sys.EXTENSIONS.url_shortener:
|
||||||
@serve.get("/s", response_class=HTMLResponse)
|
@serve.get("/s", response_class=HTMLResponse)
|
||||||
async def shortener_form(request: Request):
|
async def shortener_form(request: Request):
|
||||||
return templates.TemplateResponse("shortener.html", {"request": request})
|
return templates.TemplateResponse("shortener.html", {"request": request})
|
||||||
|
@ -433,7 +428,7 @@ if API.EXTENSIONS.url_shortener:
|
||||||
if len(custom_code) != 3 or not custom_code.isalnum():
|
if len(custom_code) != 3 or not custom_code.isalnum():
|
||||||
return templates.TemplateResponse("shortener.html", {"request": request, "error": "Custom code must be 3 alphanumeric characters"})
|
return templates.TemplateResponse("shortener.html", {"request": request, "error": "Custom code must be 3 alphanumeric characters"})
|
||||||
|
|
||||||
existing = await API.execute_read_query('SELECT 1 FROM short_urls WHERE short_code = $1', custom_code, table_name="short_urls")
|
existing = await Db.execute_read('SELECT 1 FROM short_urls WHERE short_code = $1', custom_code, table_name="short_urls")
|
||||||
if existing:
|
if existing:
|
||||||
return templates.TemplateResponse("shortener.html", {"request": request, "error": "Custom code already in use"})
|
return templates.TemplateResponse("shortener.html", {"request": request, "error": "Custom code already in use"})
|
||||||
|
|
||||||
|
@ -441,13 +436,13 @@ if API.EXTENSIONS.url_shortener:
|
||||||
else:
|
else:
|
||||||
chars = string.ascii_letters + string.digits
|
chars = string.ascii_letters + string.digits
|
||||||
while True:
|
while True:
|
||||||
debug(f"FOUND THE ISSUE")
|
l.debug(f"FOUND THE ISSUE")
|
||||||
short_code = ''.join(random.choice(chars) for _ in range(3))
|
short_code = ''.join(random.choice(chars) for _ in range(3))
|
||||||
existing = await API.execute_read_query('SELECT 1 FROM short_urls WHERE short_code = $1', short_code, table_name="short_urls")
|
existing = await Db.execute_read('SELECT 1 FROM short_urls WHERE short_code = $1', short_code, table_name="short_urls")
|
||||||
if not existing:
|
if not existing:
|
||||||
break
|
break
|
||||||
|
|
||||||
await API.execute_write_query(
|
await Db.execute_write(
|
||||||
'INSERT INTO short_urls (short_code, long_url) VALUES ($1, $2)',
|
'INSERT INTO short_urls (short_code, long_url) VALUES ($1, $2)',
|
||||||
short_code, long_url,
|
short_code, long_url,
|
||||||
table_name="short_urls"
|
table_name="short_urls"
|
||||||
|
@ -459,7 +454,7 @@ if API.EXTENSIONS.url_shortener:
|
||||||
|
|
||||||
@serve.get("/{short_code}")
|
@serve.get("/{short_code}")
|
||||||
async def redirect_short_url(short_code: str):
|
async def redirect_short_url(short_code: str):
|
||||||
results = await API.execute_read_query(
|
results = await Db.execute_read(
|
||||||
'SELECT long_url FROM short_urls WHERE short_code = $1',
|
'SELECT long_url FROM short_urls WHERE short_code = $1',
|
||||||
short_code,
|
short_code,
|
||||||
table_name="short_urls"
|
table_name="short_urls"
|
||||||
|
@ -474,7 +469,7 @@ if API.EXTENSIONS.url_shortener:
|
||||||
raise HTTPException(status_code=404, detail="Long URL not found")
|
raise HTTPException(status_code=404, detail="Long URL not found")
|
||||||
|
|
||||||
# Increment click count (you may want to do this asynchronously)
|
# Increment click count (you may want to do this asynchronously)
|
||||||
await API.execute_write_query(
|
await Db.execute_write(
|
||||||
'INSERT INTO click_logs (short_code, clicked_at) VALUES ($1, $2)',
|
'INSERT INTO click_logs (short_code, clicked_at) VALUES ($1, $2)',
|
||||||
short_code, datetime.now(),
|
short_code, datetime.now(),
|
||||||
table_name="click_logs"
|
table_name="click_logs"
|
||||||
|
@ -485,7 +480,7 @@ if API.EXTENSIONS.url_shortener:
|
||||||
|
|
||||||
@serve.get("/analytics/{short_code}")
|
@serve.get("/analytics/{short_code}")
|
||||||
async def get_analytics(short_code: str):
|
async def get_analytics(short_code: str):
|
||||||
url_info = await API.execute_read_query(
|
url_info = await Db.execute_read(
|
||||||
'SELECT long_url, created_at FROM short_urls WHERE short_code = $1',
|
'SELECT long_url, created_at FROM short_urls WHERE short_code = $1',
|
||||||
short_code,
|
short_code,
|
||||||
table_name="short_urls"
|
table_name="short_urls"
|
||||||
|
@ -493,13 +488,13 @@ if API.EXTENSIONS.url_shortener:
|
||||||
if not url_info:
|
if not url_info:
|
||||||
raise HTTPException(status_code=404, detail="Short URL not found")
|
raise HTTPException(status_code=404, detail="Short URL not found")
|
||||||
|
|
||||||
click_count = await API.execute_read_query(
|
click_count = await Db.execute_read(
|
||||||
'SELECT COUNT(*) FROM click_logs WHERE short_code = $1',
|
'SELECT COUNT(*) FROM click_logs WHERE short_code = $1',
|
||||||
short_code,
|
short_code,
|
||||||
table_name="click_logs"
|
table_name="click_logs"
|
||||||
)
|
)
|
||||||
|
|
||||||
clicks = await API.execute_read_query(
|
clicks = await Db.execute_read(
|
||||||
'SELECT clicked_at, ip_address, user_agent FROM click_logs WHERE short_code = $1 ORDER BY clicked_at DESC LIMIT 100',
|
'SELECT clicked_at, ip_address, user_agent FROM click_logs WHERE short_code = $1 ORDER BY clicked_at DESC LIMIT 100',
|
||||||
short_code,
|
short_code,
|
||||||
table_name="click_logs"
|
table_name="click_logs"
|
||||||
|
|
|
@ -8,15 +8,12 @@ import httpx
|
||||||
import socket
|
import socket
|
||||||
from fastapi import APIRouter
|
from fastapi import APIRouter
|
||||||
from tailscale import Tailscale
|
from tailscale import Tailscale
|
||||||
from sijapi import L, API, TS_ID
|
from sijapi import Sys, TS_ID
|
||||||
|
from sijapi.logs import get_logger
|
||||||
|
l = get_logger(__name__)
|
||||||
|
|
||||||
|
sys = APIRouter()
|
||||||
|
|
||||||
sys = APIRouter(tags=["public", "trusted", "private"])
|
|
||||||
logger = L.get_module_logger("health")
|
|
||||||
def debug(text: str): logger.debug(text)
|
|
||||||
def info(text: str): logger.info(text)
|
|
||||||
def warn(text: str): logger.warning(text)
|
|
||||||
def err(text: str): logger.error(text)
|
|
||||||
def crit(text: str): logger.critical(text)
|
|
||||||
|
|
||||||
@sys.get("/health")
|
@sys.get("/health")
|
||||||
def get_health():
|
def get_health():
|
||||||
|
@ -28,7 +25,7 @@ def get_health() -> str:
|
||||||
|
|
||||||
@sys.get("/routers")
|
@sys.get("/routers")
|
||||||
def get_routers() -> str:
|
def get_routers() -> str:
|
||||||
active_modules = [module for module, is_active in API.MODULES.__dict__.items() if is_active]
|
active_modules = [module for module, is_active in Sys.MODULES.__dict__.items() if is_active]
|
||||||
return active_modules
|
return active_modules
|
||||||
|
|
||||||
@sys.get("/ip")
|
@sys.get("/ip")
|
||||||
|
@ -36,7 +33,7 @@ def get_local_ip():
|
||||||
"""Get the server's local IP address."""
|
"""Get the server's local IP address."""
|
||||||
s = socket.socket(socket.AF_INET, socket.SOCK_DGRAM)
|
s = socket.socket(socket.AF_INET, socket.SOCK_DGRAM)
|
||||||
try:
|
try:
|
||||||
s.connect((f'{API.SUBNET_BROADCAST}', 1))
|
s.connect((f'{Sys.SUBNET_BROADCAST}', 1))
|
||||||
IP = s.getsockname()[0]
|
IP = s.getsockname()[0]
|
||||||
except Exception:
|
except Exception:
|
||||||
IP = '127.0.0.1'
|
IP = '127.0.0.1'
|
||||||
|
@ -54,7 +51,7 @@ async def get_wan_ip():
|
||||||
wan_info = response.json()
|
wan_info = response.json()
|
||||||
return wan_info.get('ip', 'Unavailable')
|
return wan_info.get('ip', 'Unavailable')
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
err(f"Error fetching WAN IP: {e}")
|
l.error(f"Error fetching WAN IP: {e}")
|
||||||
return "Unavailable"
|
return "Unavailable"
|
||||||
|
|
||||||
@sys.get("/ts_ip")
|
@sys.get("/ts_ip")
|
||||||
|
|
|
@ -27,17 +27,12 @@ from typing import Optional, List, Dict, Union, Tuple
|
||||||
from collections import defaultdict
|
from collections import defaultdict
|
||||||
from dotenv import load_dotenv
|
from dotenv import load_dotenv
|
||||||
from traceback import format_exc
|
from traceback import format_exc
|
||||||
from sijapi import L, TIMING_API_KEY, TIMING_API_URL
|
from sijapi import TIMING_API_KEY, TIMING_API_URL
|
||||||
from sijapi.routers import gis
|
from sijapi.routers import gis
|
||||||
|
from sijapi.logs import get_logger
|
||||||
|
l = get_logger(__name__)
|
||||||
|
|
||||||
timing = APIRouter(tags=["private"])
|
timing = APIRouter(tags=["private"])
|
||||||
logger = L.get_module_logger("timing")
|
|
||||||
def debug(text: str): logger.debug(text)
|
|
||||||
def info(text: str): logger.info(text)
|
|
||||||
def warn(text: str): logger.warning(text)
|
|
||||||
def err(text: str): logger.error(text)
|
|
||||||
def crit(text: str): logger.critical(text)
|
|
||||||
|
|
||||||
|
|
||||||
script_directory = os.path.dirname(os.path.abspath(__file__))
|
script_directory = os.path.dirname(os.path.abspath(__file__))
|
||||||
|
@ -67,17 +62,17 @@ async def post_time_entry_to_timing(entry: Dict):
|
||||||
'Accept': 'application/json',
|
'Accept': 'application/json',
|
||||||
'X-Time-Zone': 'America/Los_Angeles'
|
'X-Time-Zone': 'America/Los_Angeles'
|
||||||
}
|
}
|
||||||
debug(f"Received entry: {entry}")
|
l.debug(f"Received entry: {entry}")
|
||||||
response = None # Initialize response
|
response = None # Initialize response
|
||||||
try:
|
try:
|
||||||
async with httpx.AsyncClient() as client:
|
async with httpx.AsyncClient() as client:
|
||||||
response = await client.post(url, headers=headers, json=entry)
|
response = await client.post(url, headers=headers, json=entry)
|
||||||
response.raise_for_status() # This will only raise for 4xx and 5xx responses
|
response.raise_for_status() # This will only raise for 4xx and 5xx responses
|
||||||
except httpx.HTTPStatusError as exc:
|
except httpx.HTTPStatusError as exc:
|
||||||
debug(f"HTTPStatusError caught: Status code: {exc.response.status_code}, Detail: {exc.response.text}")
|
l.debug(f"HTTPStatusError caught: Status code: {exc.response.status_code}, Detail: {exc.response.text}")
|
||||||
raise HTTPException(status_code=exc.response.status_code, detail=str(exc.response.text))
|
raise HTTPException(status_code=exc.response.status_code, detail=str(exc.response.text))
|
||||||
except Exception as exc:
|
except Exception as exc:
|
||||||
debug(f"General exception caught: {exc}")
|
l.debug(f"General exception caught: {exc}")
|
||||||
raise HTTPException(status_code=500, detail="An unexpected error occurred")
|
raise HTTPException(status_code=500, detail="An unexpected error occurred")
|
||||||
|
|
||||||
if response:
|
if response:
|
||||||
|
|
|
@ -26,17 +26,12 @@ import tempfile
|
||||||
import random
|
import random
|
||||||
import re
|
import re
|
||||||
import os
|
import os
|
||||||
from sijapi import L, API, Dir, Tts, TTS_SEGMENTS_DIR, VOICE_DIR, TTS_OUTPUT_DIR
|
from sijapi import Sys, Dir, Tts, TTS_SEGMENTS_DIR, VOICE_DIR, TTS_OUTPUT_DIR
|
||||||
from sijapi.utilities import sanitize_filename
|
from sijapi.utilities import sanitize_filename
|
||||||
|
from sijapi.logs import get_logger
|
||||||
|
l = get_logger(__name__)
|
||||||
|
|
||||||
### INITIALIZATIONS ###
|
tts = APIRouter()
|
||||||
tts = APIRouter(tags=["trusted", "private"])
|
|
||||||
logger = L.get_module_logger("tts")
|
|
||||||
def debug(text: str): logger.debug(text)
|
|
||||||
def info(text: str): logger.info(text)
|
|
||||||
def warn(text: str): logger.warning(text)
|
|
||||||
def err(text: str): logger.error(text)
|
|
||||||
def crit(text: str): logger.critical(text)
|
|
||||||
|
|
||||||
DEVICE = torch.device('cpu')
|
DEVICE = torch.device('cpu')
|
||||||
|
|
||||||
|
@ -53,7 +48,7 @@ async def list_11l_voices():
|
||||||
async with httpx.AsyncClient() as client:
|
async with httpx.AsyncClient() as client:
|
||||||
try:
|
try:
|
||||||
response = await client.get(url, headers=headers)
|
response = await client.get(url, headers=headers)
|
||||||
debug(f"Response: {response}")
|
l.debug(f"Response: {response}")
|
||||||
if response.status_code == 200:
|
if response.status_code == 200:
|
||||||
voices_data = response.json().get("voices", [])
|
voices_data = response.json().get("voices", [])
|
||||||
formatted_list = ""
|
formatted_list = ""
|
||||||
|
@ -63,7 +58,7 @@ async def list_11l_voices():
|
||||||
formatted_list += f"{name}: `{id}`\n"
|
formatted_list += f"{name}: `{id}`\n"
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
err(f"Error determining voice ID: {e}")
|
l.error(f"Error determining voice ID: {e}")
|
||||||
|
|
||||||
return PlainTextResponse(formatted_list, status_code=200)
|
return PlainTextResponse(formatted_list, status_code=200)
|
||||||
|
|
||||||
|
@ -73,18 +68,18 @@ async def select_voice(voice_name: str) -> str:
|
||||||
try:
|
try:
|
||||||
# Case Insensitive comparison
|
# Case Insensitive comparison
|
||||||
voice_name_lower = voice_name.lower()
|
voice_name_lower = voice_name.lower()
|
||||||
debug(f"Looking for {voice_name_lower}")
|
l.debug(f"Looking for {voice_name_lower}")
|
||||||
for item in VOICE_DIR.iterdir():
|
for item in VOICE_DIR.iterdir():
|
||||||
debug(f"Checking {item.name.lower()}")
|
l.debug(f"Checking {item.name.lower()}")
|
||||||
if item.name.lower() == f"{voice_name_lower}.wav":
|
if item.name.lower() == f"{voice_name_lower}.wav":
|
||||||
debug(f"select_voice received query to use voice: {voice_name}. Found {item} inside {VOICE_DIR}.")
|
l.debug(f"select_voice received query to use voice: {voice_name}. Found {item} inside {VOICE_DIR}.")
|
||||||
return item
|
return item
|
||||||
|
|
||||||
err(f"Voice file not found")
|
l.error(f"Voice file not found")
|
||||||
raise HTTPException(status_code=404, detail="Voice file not found")
|
raise HTTPException(status_code=404, detail="Voice file not found")
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
err(f"Voice file not found: {e}")
|
l.error(f"Voice file not found: {e}")
|
||||||
return None
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
@ -119,51 +114,51 @@ async def generate_speech_endpoint(
|
||||||
else:
|
else:
|
||||||
return await generate_speech(bg_tasks, text_content, voice, voice_file, model, speed, podcast)
|
return await generate_speech(bg_tasks, text_content, voice, voice_file, model, speed, podcast)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
err(f"Error in TTS: {e}")
|
l.error(f"Error in TTS: {e}")
|
||||||
err(traceback.format_exc())
|
l.error(traceback.format_exc())
|
||||||
raise HTTPException(status_code=666, detail="error in TTS")
|
raise HTTPException(status_code=666, detail="error in TTS")
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
async def determine_voice_id(voice_name: str) -> str:
|
async def determine_voice_id(voice_name: str) -> str:
|
||||||
debug(f"Searching for voice id for {voice_name}")
|
l.debug(f"Searching for voice id for {voice_name}")
|
||||||
debug(f"Tts.elevenlabs.voices: {Tts.elevenlabs.voices}")
|
l.debug(f"Tts.elevenlabs.voices: {Tts.elevenlabs.voices}")
|
||||||
|
|
||||||
# Check if the voice is in the configured voices
|
# Check if the voice is in the configured voices
|
||||||
if voice_name and Tts.has_key(f'elevenlabs.voices.{voice_name}'):
|
if voice_name and Tts.has_key(f'elevenlabs.voices.{voice_name}'):
|
||||||
voice_id = Tts.get_value(f'elevenlabs.voices.{voice_name}')
|
voice_id = Tts.get_value(f'elevenlabs.voices.{voice_name}')
|
||||||
debug(f"Found voice ID in config - {voice_id}")
|
l.debug(f"Found voice ID in config - {voice_id}")
|
||||||
return voice_id
|
return voice_id
|
||||||
|
|
||||||
debug(f"Requested voice not among the voices specified in config/tts.yaml. Checking with ElevenLabs API using api_key: {Tts.elevenlabs.key}.")
|
l.debug(f"Requested voice not among the voices specified in config/tts.yaml. Checking with ElevenLabs API using api_key: {Tts.elevenlabs.key}.")
|
||||||
url = "https://api.elevenlabs.io/v1/voices"
|
url = "https://api.elevenlabs.io/v1/voices"
|
||||||
headers = {"xi-api-key": Tts.elevenlabs.key}
|
headers = {"xi-api-key": Tts.elevenlabs.key}
|
||||||
async with httpx.AsyncClient() as client:
|
async with httpx.AsyncClient() as client:
|
||||||
try:
|
try:
|
||||||
response = await client.get(url, headers=headers)
|
response = await client.get(url, headers=headers)
|
||||||
debug(f"Response status: {response.status_code}")
|
l.debug(f"Response status: {response.status_code}")
|
||||||
if response.status_code == 200:
|
if response.status_code == 200:
|
||||||
voices_data = response.json().get("voices", [])
|
voices_data = response.json().get("voices", [])
|
||||||
for voice in voices_data:
|
for voice in voices_data:
|
||||||
if voice_name == voice["voice_id"] or (voice_name and voice_name.lower() == voice["name"].lower()):
|
if voice_name == voice["voice_id"] or (voice_name and voice_name.lower() == voice["name"].lower()):
|
||||||
debug(f"Found voice ID from API - {voice['voice_id']}")
|
l.debug(f"Found voice ID from API - {voice['voice_id']}")
|
||||||
return voice["voice_id"]
|
return voice["voice_id"]
|
||||||
else:
|
else:
|
||||||
err(f"Failed to get voices from ElevenLabs API. Status code: {response.status_code}")
|
l.error(f"Failed to get voices from ElevenLabs API. Status code: {response.status_code}")
|
||||||
err(f"Response content: {response.text}")
|
l.error(f"Response content: {response.text}")
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
err(f"Error determining voice ID: {e}")
|
l.error(f"Error determining voice ID: {e}")
|
||||||
|
|
||||||
warn(f"Voice '{voice_name}' not found; using the default specified in config/tts.yaml: {Tts.elevenlabs.default}")
|
l.warning(f"Voice '{voice_name}' not found; using the default specified in config/tts.yaml: {Tts.elevenlabs.default}")
|
||||||
if Tts.has_key(f'elevenlabs.voices.{Tts.elevenlabs.default}'):
|
if Tts.has_key(f'elevenlabs.voices.{Tts.elevenlabs.default}'):
|
||||||
return Tts.get_value(f'elevenlabs.voices.{Tts.elevenlabs.default}')
|
return Tts.get_value(f'elevenlabs.voices.{Tts.elevenlabs.default}')
|
||||||
else:
|
else:
|
||||||
err(f"Default voice '{Tts.elevenlabs.default}' not found in configuration. Using first available voice.")
|
l.error(f"Default voice '{Tts.elevenlabs.default}' not found in configuration. Using first available voice.")
|
||||||
first_voice = next(iter(vars(Tts.elevenlabs.voices)))
|
first_voice = next(iter(vars(Tts.elevenlabs.voices)))
|
||||||
return Tts.get_value(f'elevenlabs.voices.{first_voice}')
|
return Tts.get_value(f'elevenlabs.voices.{first_voice}')
|
||||||
|
|
||||||
async def elevenlabs_tts(model: str, input_text: str, voice: Optional[str], title: str = None, output_dir: str = None):
|
async def elevenlabs_tts(model: str, input_text: str, voice: Optional[str], title: str = None, output_dir: str = None):
|
||||||
if getattr(API.EXTENSIONS, 'elevenlabs', False):
|
if getattr(Sys.EXTENSIONS, 'elevenlabs', False):
|
||||||
voice_id = await determine_voice_id(voice)
|
voice_id = await determine_voice_id(voice)
|
||||||
|
|
||||||
url = f"https://api.elevenlabs.io/v1/text-to-speech/{voice_id}"
|
url = f"https://api.elevenlabs.io/v1/text-to-speech/{voice_id}"
|
||||||
|
@ -187,11 +182,11 @@ async def elevenlabs_tts(model: str, input_text: str, voice: Optional[str], titl
|
||||||
raise HTTPException(status_code=response.status_code, detail="Error from ElevenLabs API")
|
raise HTTPException(status_code=response.status_code, detail="Error from ElevenLabs API")
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
err(f"Error from Elevenlabs API: {e}")
|
l.error(f"Error from Elevenlabs API: {e}")
|
||||||
raise HTTPException(status_code=500, detail=f"Error from ElevenLabs API: {e}")
|
raise HTTPException(status_code=500, detail=f"Error from ElevenLabs API: {e}")
|
||||||
|
|
||||||
else:
|
else:
|
||||||
warn(f"elevenlabs_tts called but ElevenLabs module is not enabled in config.")
|
l.warning(f"elevenlabs_tts called but ElevenLabs module is not enabled in config.")
|
||||||
raise HTTPException(status_code=400, detail="ElevenLabs TTS is not enabled")
|
raise HTTPException(status_code=400, detail="ElevenLabs TTS is not enabled")
|
||||||
|
|
||||||
async def generate_speech(
|
async def generate_speech(
|
||||||
|
@ -205,13 +200,13 @@ async def generate_speech(
|
||||||
title: str = None,
|
title: str = None,
|
||||||
output_dir = None,
|
output_dir = None,
|
||||||
) -> str:
|
) -> str:
|
||||||
debug(f"Entering generate_speech function")
|
l.debug(f"Entering generate_speech function")
|
||||||
debug(f"API.EXTENSIONS: {API.EXTENSIONS}")
|
l.debug(f"Sys.EXTENSIONS: {Sys.EXTENSIONS}")
|
||||||
debug(f"Type of API.EXTENSIONS: {type(API.EXTENSIONS)}")
|
l.debug(f"Type of Sys.EXTENSIONS: {type(Sys.EXTENSIONS)}")
|
||||||
debug(f"Dir of API.EXTENSIONS: {dir(API.EXTENSIONS)}")
|
l.debug(f"Dir of Sys.EXTENSIONS: {dir(Sys.EXTENSIONS)}")
|
||||||
debug(f"Tts config: {Tts}")
|
l.debug(f"Tts config: {Tts}")
|
||||||
debug(f"Type of Tts: {type(Tts)}")
|
l.debug(f"Type of Tts: {type(Tts)}")
|
||||||
debug(f"Dir of Tts: {dir(Tts)}")
|
l.debug(f"Dir of Tts: {dir(Tts)}")
|
||||||
|
|
||||||
|
|
||||||
use_output_dir = Path(output_dir) if output_dir else TTS_OUTPUT_DIR
|
use_output_dir = Path(output_dir) if output_dir else TTS_OUTPUT_DIR
|
||||||
|
@ -222,26 +217,26 @@ async def generate_speech(
|
||||||
title = title if title else "TTS audio"
|
title = title if title else "TTS audio"
|
||||||
output_path = use_output_dir / f"{dt_datetime.now().strftime('%Y%m%d%H%M%S')} {title}.wav"
|
output_path = use_output_dir / f"{dt_datetime.now().strftime('%Y%m%d%H%M%S')} {title}.wav"
|
||||||
|
|
||||||
debug(f"Model: {model}")
|
l.debug(f"Model: {model}")
|
||||||
debug(f"Voice: {voice}")
|
l.debug(f"Voice: {voice}")
|
||||||
debug(f"Tts.elevenlabs: {Tts.elevenlabs}")
|
l.debug(f"Tts.elevenlabs: {Tts.elevenlabs}")
|
||||||
|
|
||||||
if model == "eleven_turbo_v2" and getattr(API.EXTENSIONS, 'elevenlabs', False):
|
if model == "eleven_turbo_v2" and getattr(Sys.EXTENSIONS, 'elevenlabs', False):
|
||||||
info("Using ElevenLabs.")
|
l.info("Using ElevenLabs.")
|
||||||
audio_file_path = await elevenlabs_tts(model, text, voice, title, use_output_dir)
|
audio_file_path = await elevenlabs_tts(model, text, voice, title, use_output_dir)
|
||||||
elif getattr(API.EXTENSIONS, 'xtts', False):
|
elif getattr(Sys.EXTENSIONS, 'xtts', False):
|
||||||
info("Using XTTS2")
|
l.info("Using XTTS2")
|
||||||
audio_file_path = await local_tts(text, speed, voice, voice_file, podcast, bg_tasks, title, output_path)
|
audio_file_path = await local_tts(text, speed, voice, voice_file, podcast, bg_tasks, title, output_path)
|
||||||
else:
|
else:
|
||||||
err(f"No TTS module enabled!")
|
l.error(f"No TTS module enabled!")
|
||||||
raise ValueError("No TTS module enabled")
|
raise ValueError("No TTS module enabled")
|
||||||
|
|
||||||
if not audio_file_path:
|
if not audio_file_path:
|
||||||
raise ValueError("TTS generation failed: audio_file_path is empty or None")
|
raise ValueError("TTS generation failed: audio_file_path is empty or None")
|
||||||
elif audio_file_path.exists():
|
elif audio_file_path.exists():
|
||||||
info(f"Saved to {audio_file_path}")
|
l.info(f"Saved to {audio_file_path}")
|
||||||
else:
|
else:
|
||||||
warn(f"No file exists at {audio_file_path}")
|
l.warning(f"No file exists at {audio_file_path}")
|
||||||
|
|
||||||
if podcast:
|
if podcast:
|
||||||
podcast_path = Dir.PODCAST / audio_file_path.name
|
podcast_path = Dir.PODCAST / audio_file_path.name
|
||||||
|
@ -249,18 +244,18 @@ async def generate_speech(
|
||||||
if podcast_path != audio_file_path:
|
if podcast_path != audio_file_path:
|
||||||
shutil.copy(audio_file_path, podcast_path)
|
shutil.copy(audio_file_path, podcast_path)
|
||||||
if podcast_path.exists():
|
if podcast_path.exists():
|
||||||
info(f"Saved to podcast path: {podcast_path}")
|
l.info(f"Saved to podcast path: {podcast_path}")
|
||||||
else:
|
else:
|
||||||
warn(f"Podcast mode enabled, but failed to save to {podcast_path}")
|
l.warning(f"Podcast mode enabled, but failed to save to {podcast_path}")
|
||||||
|
|
||||||
if output_dir and Path(output_dir) == use_output_dir:
|
if output_dir and Path(output_dir) == use_output_dir:
|
||||||
debug(f"Keeping {audio_file_path} because it was specified")
|
l.debug(f"Keeping {audio_file_path} because it was specified")
|
||||||
|
|
||||||
else:
|
else:
|
||||||
info(f"Podcast mode enabled and output_dir not specified so we will remove {audio_file_path}")
|
l.info(f"Podcast mode enabled and output_dir not specified so we will remove {audio_file_path}")
|
||||||
bg_tasks.add_task(os.remove, audio_file_path)
|
bg_tasks.add_task(os.remove, audio_file_path)
|
||||||
else:
|
else:
|
||||||
warn(f"Podcast path is the same as audio file path. Using existing file.")
|
l.warning(f"Podcast path is the same as audio file path. Using existing file.")
|
||||||
|
|
||||||
return podcast_path
|
return podcast_path
|
||||||
|
|
||||||
|
@ -268,20 +263,20 @@ async def generate_speech(
|
||||||
|
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
err(f"Failed to generate speech: {e}")
|
l.error(f"Failed to generate speech: {e}")
|
||||||
err(f"Traceback: {traceback.format_exc()}")
|
l.error(f"Traceback: {traceback.format_exc()}")
|
||||||
raise HTTPException(status_code=500, detail=f"Failed to generate speech: {e}")
|
raise HTTPException(status_code=500, detail=f"Failed to generate speech: {e}")
|
||||||
|
|
||||||
|
|
||||||
async def get_model(voice: str = None, voice_file: UploadFile = None):
|
async def get_model(voice: str = None, voice_file: UploadFile = None):
|
||||||
if (voice_file or (voice and await select_voice(voice))) and API.EXTENSIONS.xtts:
|
if (voice_file or (voice and await select_voice(voice))) and Sys.EXTENSIONS.xtts:
|
||||||
return "xtts"
|
return "xtts"
|
||||||
|
|
||||||
elif voice and await determine_voice_id(voice) and API.EXTENSIONS.elevenlabs:
|
elif voice and await determine_voice_id(voice) and Sys.EXTENSIONS.elevenlabs:
|
||||||
return "eleven_turbo_v2"
|
return "eleven_turbo_v2"
|
||||||
|
|
||||||
else:
|
else:
|
||||||
err(f"No model or voice specified, or no TTS module loaded")
|
l.error(f"No model or voice specified, or no TTS module loaded")
|
||||||
raise HTTPException(status_code=400, detail="No model or voice specified, or no TTS module loaded")
|
raise HTTPException(status_code=400, detail="No model or voice specified, or no TTS module loaded")
|
||||||
|
|
||||||
|
|
||||||
|
@ -296,7 +291,7 @@ async def get_text_content(text: Optional[str], file: Optional[UploadFile]) -> s
|
||||||
|
|
||||||
async def get_voice_file_path(voice: str = None, voice_file: UploadFile = None) -> str:
|
async def get_voice_file_path(voice: str = None, voice_file: UploadFile = None) -> str:
|
||||||
if voice:
|
if voice:
|
||||||
debug(f"Looking for voice: {voice}")
|
l.debug(f"Looking for voice: {voice}")
|
||||||
selected_voice = await select_voice(voice)
|
selected_voice = await select_voice(voice)
|
||||||
return selected_voice
|
return selected_voice
|
||||||
|
|
||||||
|
@ -326,7 +321,7 @@ async def get_voice_file_path(voice: str = None, voice_file: UploadFile = None)
|
||||||
return new_file
|
return new_file
|
||||||
|
|
||||||
else:
|
else:
|
||||||
debug(f"No voice specified or file provided, using default voice: {Tts.xtts.default}")
|
l.debug(f"No voice specified or file provided, using default voice: {Tts.xtts.default}")
|
||||||
selected_voice = await select_voice(Tts.xtts.default)
|
selected_voice = await select_voice(Tts.xtts.default)
|
||||||
return selected_voice
|
return selected_voice
|
||||||
|
|
||||||
|
@ -343,7 +338,7 @@ async def local_tts(
|
||||||
output_path: Optional[Path] = None
|
output_path: Optional[Path] = None
|
||||||
) -> str:
|
) -> str:
|
||||||
|
|
||||||
if API.EXTENSIONS.xtts:
|
if Sys.EXTENSIONS.xtts:
|
||||||
from TTS.api import TTS
|
from TTS.api import TTS
|
||||||
|
|
||||||
if output_path:
|
if output_path:
|
||||||
|
@ -368,7 +363,7 @@ async def local_tts(
|
||||||
|
|
||||||
for i, segment in enumerate(segments):
|
for i, segment in enumerate(segments):
|
||||||
segment_file_path = TTS_SEGMENTS_DIR / f"segment_{i}.wav"
|
segment_file_path = TTS_SEGMENTS_DIR / f"segment_{i}.wav"
|
||||||
debug(f"Segment file path: {segment_file_path}")
|
l.debug(f"Segment file path: {segment_file_path}")
|
||||||
|
|
||||||
# Run TTS in a separate thread
|
# Run TTS in a separate thread
|
||||||
await asyncio.to_thread(
|
await asyncio.to_thread(
|
||||||
|
@ -379,7 +374,7 @@ async def local_tts(
|
||||||
speaker_wav=[voice_file_path],
|
speaker_wav=[voice_file_path],
|
||||||
language="en"
|
language="en"
|
||||||
)
|
)
|
||||||
debug(f"Segment file generated: {segment_file_path}")
|
l.debug(f"Segment file generated: {segment_file_path}")
|
||||||
|
|
||||||
# Load and combine audio in a separate thread
|
# Load and combine audio in a separate thread
|
||||||
segment_audio = await asyncio.to_thread(AudioSegment.from_wav, segment_file_path)
|
segment_audio = await asyncio.to_thread(AudioSegment.from_wav, segment_file_path)
|
||||||
|
@ -398,7 +393,7 @@ async def local_tts(
|
||||||
return file_path
|
return file_path
|
||||||
|
|
||||||
else:
|
else:
|
||||||
warn(f"local_tts called but xtts module disabled!")
|
l.warning(f"local_tts called but xtts module disabled!")
|
||||||
return None
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
@ -421,7 +416,7 @@ async def stream_tts(text_content: str, speed: float, voice: str, voice_file) ->
|
||||||
|
|
||||||
async def generate_tts(text: str, speed: float, voice_file_path: str) -> str:
|
async def generate_tts(text: str, speed: float, voice_file_path: str) -> str:
|
||||||
|
|
||||||
if API.EXTENSIONS.xtts:
|
if Sys.EXTENSIONS.xtts:
|
||||||
from TTS.api import TTS
|
from TTS.api import TTS
|
||||||
|
|
||||||
output_dir = tempfile.mktemp(suffix=".wav", dir=tempfile.gettempdir())
|
output_dir = tempfile.mktemp(suffix=".wav", dir=tempfile.gettempdir())
|
||||||
|
@ -432,7 +427,7 @@ async def generate_tts(text: str, speed: float, voice_file_path: str) -> str:
|
||||||
return output_dir
|
return output_dir
|
||||||
|
|
||||||
else:
|
else:
|
||||||
warn(f"generate_tts called but xtts module disabled!")
|
l.warning(f"generate_tts called but xtts module disabled!")
|
||||||
return None
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
@ -466,7 +461,7 @@ def split_text(text, target_length=35, max_length=50):
|
||||||
|
|
||||||
if segment_length + len(sentence_words) > max_length:
|
if segment_length + len(sentence_words) > max_length:
|
||||||
segments.append(' '.join(current_segment))
|
segments.append(' '.join(current_segment))
|
||||||
debug(f"split_text - segment: {' '.join(current_segment)}, word count: {segment_length}")
|
l.debug(f"split_text - segment: {' '.join(current_segment)}, word count: {segment_length}")
|
||||||
|
|
||||||
current_segment = [sentence]
|
current_segment = [sentence]
|
||||||
else:
|
else:
|
||||||
|
@ -474,7 +469,7 @@ def split_text(text, target_length=35, max_length=50):
|
||||||
|
|
||||||
if current_segment:
|
if current_segment:
|
||||||
segments.append(' '.join(current_segment))
|
segments.append(' '.join(current_segment))
|
||||||
debug(f"split_text - segment: {' '.join(current_segment)}, word count: {len(current_segment)}")
|
l.debug(f"split_text - segment: {' '.join(current_segment)}, word count: {len(current_segment)}")
|
||||||
|
|
||||||
return segments
|
return segments
|
||||||
|
|
||||||
|
@ -486,7 +481,7 @@ def clean_text_for_tts(text: str) -> str:
|
||||||
text = re.sub(r'\s+', ' ', text).strip()
|
text = re.sub(r'\s+', ' ', text).strip()
|
||||||
return text
|
return text
|
||||||
else:
|
else:
|
||||||
debug(f"No text received.")
|
l.debug(f"No text received.")
|
||||||
|
|
||||||
|
|
||||||
def copy_to_podcast_dir(file_path):
|
def copy_to_podcast_dir(file_path):
|
||||||
|
|
|
@ -15,17 +15,13 @@ from typing import Dict
|
||||||
from datetime import datetime as dt_datetime, date as dt_date
|
from datetime import datetime as dt_datetime, date as dt_date
|
||||||
from shapely.wkb import loads
|
from shapely.wkb import loads
|
||||||
from binascii import unhexlify
|
from binascii import unhexlify
|
||||||
from sijapi import L, VISUALCROSSING_API_KEY, TZ, API, GEO
|
from sijapi import VISUALCROSSING_API_KEY, TZ, Sys, GEO, Db
|
||||||
from sijapi.utilities import haversine
|
from sijapi.utilities import haversine
|
||||||
from sijapi.routers import gis
|
from sijapi.routers import gis
|
||||||
|
from sijapi.logs import get_logger
|
||||||
|
l = get_logger(__name__)
|
||||||
|
|
||||||
weather = APIRouter()
|
weather = APIRouter()
|
||||||
logger = L.get_module_logger("weather")
|
|
||||||
def debug(text: str): logger.debug(text)
|
|
||||||
def info(text: str): logger.info(text)
|
|
||||||
def warn(text: str): logger.warning(text)
|
|
||||||
def err(text: str): logger.error(text)
|
|
||||||
def crit(text: str): logger.critical(text)
|
|
||||||
|
|
||||||
|
|
||||||
@weather.get("/weather/refresh", response_class=JSONResponse)
|
@weather.get("/weather/refresh", response_class=JSONResponse)
|
||||||
|
@ -48,7 +44,7 @@ async def get_refreshed_weather(
|
||||||
tz = await GEO.tz_at(lat, lon)
|
tz = await GEO.tz_at(lat, lon)
|
||||||
date_time = await gis.dt(date, tz)
|
date_time = await gis.dt(date, tz)
|
||||||
|
|
||||||
debug(f"Passing date_time {date_time.strftime('%Y-%m-%d %H:%M:%S')}, {lat}/{lon} into get_weather")
|
l.debug(f"Passing date_time {date_time.strftime('%Y-%m-%d %H:%M:%S')}, {lat}/{lon} into get_weather")
|
||||||
day = await get_weather(date_time, lat, lon, force_refresh=True)
|
day = await get_weather(date_time, lat, lon, force_refresh=True)
|
||||||
|
|
||||||
if day is None:
|
if day is None:
|
||||||
|
@ -67,12 +63,12 @@ async def get_refreshed_weather(
|
||||||
return JSONResponse(content={"weather": day_dict}, status_code=200)
|
return JSONResponse(content={"weather": day_dict}, status_code=200)
|
||||||
|
|
||||||
except HTTPException as e:
|
except HTTPException as e:
|
||||||
err(f"HTTP Exception in get_refreshed_weather: {e.detail}")
|
l.error(f"HTTP Exception in get_refreshed_weather: {e.detail}")
|
||||||
return JSONResponse(content={"detail": str(e.detail)}, status_code=e.status_code)
|
return JSONResponse(content={"detail": str(e.detail)}, status_code=e.status_code)
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
err(f"Unexpected error in get_refreshed_weather: {str(e)}")
|
l.error(f"Unexpected error in get_refreshed_weather: {str(e)}")
|
||||||
err(f"Traceback: {traceback.format_exc()}")
|
l.error(f"Traceback: {traceback.format_exc()}")
|
||||||
return JSONResponse(content={"detail": "An unexpected error occurred"}, status_code=500)
|
return JSONResponse(content={"detail": "An unexpected error occurred"}, status_code=500)
|
||||||
|
|
||||||
|
|
||||||
|
@ -84,7 +80,7 @@ async def get_weather(date_time: dt_datetime, latitude: float, longitude: float,
|
||||||
try:
|
try:
|
||||||
daily_weather_data = await get_weather_from_db(date_time, latitude, longitude)
|
daily_weather_data = await get_weather_from_db(date_time, latitude, longitude)
|
||||||
if daily_weather_data:
|
if daily_weather_data:
|
||||||
debug(f"Daily weather data from db: {daily_weather_data}")
|
l.debug(f"Daily weather data from db: {daily_weather_data}")
|
||||||
last_updated = str(daily_weather_data['DailyWeather'].get('last_updated'))
|
last_updated = str(daily_weather_data['DailyWeather'].get('last_updated'))
|
||||||
last_updated = await gis.dt(last_updated)
|
last_updated = await gis.dt(last_updated)
|
||||||
stored_loc_data = unhexlify(daily_weather_data['DailyWeather'].get('location'))
|
stored_loc_data = unhexlify(daily_weather_data['DailyWeather'].get('location'))
|
||||||
|
@ -93,19 +89,19 @@ async def get_weather(date_time: dt_datetime, latitude: float, longitude: float,
|
||||||
|
|
||||||
hourly_weather = daily_weather_data.get('HourlyWeather')
|
hourly_weather = daily_weather_data.get('HourlyWeather')
|
||||||
request_haversine = haversine(latitude, longitude, stored_lat, stored_lon)
|
request_haversine = haversine(latitude, longitude, stored_lat, stored_lon)
|
||||||
debug(f"\nINFO:\nlast updated {last_updated}\nstored lat: {stored_lat} - requested lat: {latitude}\nstored lon: {stored_lon} - requested lon: {longitude}\nHaversine: {request_haversine}")
|
l.debug(f"\nINFO:\nlast updated {last_updated}\nstored lat: {stored_lat} - requested lat: {latitude}\nstored lon: {stored_lon} - requested lon: {longitude}\nHaversine: {request_haversine}")
|
||||||
|
|
||||||
if last_updated and (date_time <= dt_datetime.now(TZ) and last_updated > date_time and request_haversine < 8) and hourly_weather and len(hourly_weather) > 0:
|
if last_updated and (date_time <= dt_datetime.now(TZ) and last_updated > date_time and request_haversine < 8) and hourly_weather and len(hourly_weather) > 0:
|
||||||
debug(f"Using existing data")
|
l.debug(f"Using existing data")
|
||||||
fetch_new_data = False
|
fetch_new_data = False
|
||||||
else:
|
else:
|
||||||
fetch_new_data = True
|
fetch_new_data = True
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
err(f"Error checking existing weather data: {e}")
|
l.error(f"Error checking existing weather data: {e}")
|
||||||
fetch_new_data = True
|
fetch_new_data = True
|
||||||
|
|
||||||
if fetch_new_data:
|
if fetch_new_data:
|
||||||
debug(f"Fetching new weather data")
|
l.debug(f"Fetching new weather data")
|
||||||
request_date_str = date_time.strftime("%Y-%m-%d")
|
request_date_str = date_time.strftime("%Y-%m-%d")
|
||||||
url = f"https://weather.visualcrossing.com/VisualCrossingWebServices/rest/services/timeline/{latitude},{longitude}/{request_date_str}/{request_date_str}?unitGroup=us&key={VISUALCROSSING_API_KEY}"
|
url = f"https://weather.visualcrossing.com/VisualCrossingWebServices/rest/services/timeline/{latitude},{longitude}/{request_date_str}/{request_date_str}?unitGroup=us&key={VISUALCROSSING_API_KEY}"
|
||||||
|
|
||||||
|
@ -114,9 +110,14 @@ async def get_weather(date_time: dt_datetime, latitude: float, longitude: float,
|
||||||
response = await client.get(url)
|
response = await client.get(url)
|
||||||
if response.status_code == 200:
|
if response.status_code == 200:
|
||||||
weather_data = response.json()
|
weather_data = response.json()
|
||||||
store_result = await store_weather_to_db(date_time, weather_data)
|
|
||||||
if store_result != "SUCCESS":
|
try:
|
||||||
raise HTTPException(status_code=500, detail=f"Failed to store weather data: {store_result}")
|
store_result = await store_weather_to_db(date_time, weather_data)
|
||||||
|
if store_result != "SUCCESS":
|
||||||
|
raise HTTPException(status_code=500, detail=f"Failed to store weather data: {store_result}")
|
||||||
|
except Exception as e:
|
||||||
|
l.error(f"Error storing weather data: {str(e)}")
|
||||||
|
raise HTTPException(status_code=500, detail=f"Error storing weather data: {str(e)}")
|
||||||
|
|
||||||
daily_weather_data = await get_weather_from_db(date_time, latitude, longitude)
|
daily_weather_data = await get_weather_from_db(date_time, latitude, longitude)
|
||||||
if daily_weather_data is None:
|
if daily_weather_data is None:
|
||||||
|
@ -126,8 +127,8 @@ async def get_weather(date_time: dt_datetime, latitude: float, longitude: float,
|
||||||
except HTTPException:
|
except HTTPException:
|
||||||
raise
|
raise
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
err(f"Exception during API call or data storage: {e}")
|
l.error(f"Exception during API call or data storage: {e}")
|
||||||
err(f"Traceback: {traceback.format_exc()}")
|
l.error(f"Traceback: {traceback.format_exc()}")
|
||||||
raise HTTPException(status_code=500, detail=f"Error fetching or storing weather data: {str(e)}")
|
raise HTTPException(status_code=500, detail=f"Error fetching or storing weather data: {str(e)}")
|
||||||
|
|
||||||
if daily_weather_data is None:
|
if daily_weather_data is None:
|
||||||
|
@ -136,7 +137,7 @@ async def get_weather(date_time: dt_datetime, latitude: float, longitude: float,
|
||||||
return daily_weather_data
|
return daily_weather_data
|
||||||
|
|
||||||
|
|
||||||
|
# weather.py
|
||||||
|
|
||||||
async def store_weather_to_db(date_time: dt_datetime, weather_data: dict):
|
async def store_weather_to_db(date_time: dt_datetime, weather_data: dict):
|
||||||
try:
|
try:
|
||||||
|
@ -154,46 +155,46 @@ async def store_weather_to_db(date_time: dt_datetime, weather_data: dict):
|
||||||
elevation = await GEO.elevation(latitude, longitude)
|
elevation = await GEO.elevation(latitude, longitude)
|
||||||
location_point = f"POINTZ({longitude} {latitude} {elevation})" if elevation else None
|
location_point = f"POINTZ({longitude} {latitude} {elevation})" if elevation else None
|
||||||
|
|
||||||
daily_weather_params = [
|
daily_weather_params = {
|
||||||
location_point,
|
'location': location_point,
|
||||||
await gis.dt(day_data.get('sunriseEpoch')),
|
'sunrise': await gis.dt(day_data.get('sunriseEpoch')),
|
||||||
day_data.get('sunriseEpoch'),
|
'sunriseepoch': day_data.get('sunriseEpoch'),
|
||||||
await gis.dt(day_data.get('sunsetEpoch')),
|
'sunset': await gis.dt(day_data.get('sunsetEpoch')),
|
||||||
day_data.get('sunsetEpoch'),
|
'sunsetepoch': day_data.get('sunsetEpoch'),
|
||||||
day_data.get('description'),
|
'description': day_data.get('description'),
|
||||||
day_data.get('tempmax'),
|
'tempmax': day_data.get('tempmax'),
|
||||||
day_data.get('tempmin'),
|
'tempmin': day_data.get('tempmin'),
|
||||||
day_data.get('uvindex'),
|
'uvindex': day_data.get('uvindex'),
|
||||||
day_data.get('winddir'),
|
'winddir': day_data.get('winddir'),
|
||||||
day_data.get('windspeed'),
|
'windspeed': day_data.get('windspeed'),
|
||||||
day_data.get('icon'),
|
'icon': day_data.get('icon'),
|
||||||
dt_datetime.now(tz),
|
'last_updated': dt_datetime.now(tz),
|
||||||
await gis.dt(day_data.get('datetimeEpoch')),
|
'datetime': await gis.dt(day_data.get('datetimeEpoch')),
|
||||||
day_data.get('datetimeEpoch'),
|
'datetimeepoch': day_data.get('datetimeEpoch'),
|
||||||
day_data.get('temp'),
|
'temp': day_data.get('temp'),
|
||||||
day_data.get('feelslikemax'),
|
'feelslikemax': day_data.get('feelslikemax'),
|
||||||
day_data.get('feelslikemin'),
|
'feelslikemin': day_data.get('feelslikemin'),
|
||||||
day_data.get('feelslike'),
|
'feelslike': day_data.get('feelslike'),
|
||||||
day_data.get('dew'),
|
'dew': day_data.get('dew'),
|
||||||
day_data.get('humidity'),
|
'humidity': day_data.get('humidity'),
|
||||||
day_data.get('precip'),
|
'precip': day_data.get('precip'),
|
||||||
day_data.get('precipprob'),
|
'precipprob': day_data.get('precipprob'),
|
||||||
day_data.get('precipcover'),
|
'precipcover': day_data.get('precipcover'),
|
||||||
preciptype_array,
|
'preciptype': preciptype_array,
|
||||||
day_data.get('snow'),
|
'snow': day_data.get('snow'),
|
||||||
day_data.get('snowdepth'),
|
'snowdepth': day_data.get('snowdepth'),
|
||||||
day_data.get('windgust'),
|
'windgust': day_data.get('windgust'),
|
||||||
day_data.get('pressure'),
|
'pressure': day_data.get('pressure'),
|
||||||
day_data.get('cloudcover'),
|
'cloudcover': day_data.get('cloudcover'),
|
||||||
day_data.get('visibility'),
|
'visibility': day_data.get('visibility'),
|
||||||
day_data.get('solarradiation'),
|
'solarradiation': day_data.get('solarradiation'),
|
||||||
day_data.get('solarenergy'),
|
'solarenergy': day_data.get('solarenergy'),
|
||||||
day_data.get('severerisk', 0),
|
'severerisk': day_data.get('severerisk', 0),
|
||||||
day_data.get('moonphase'),
|
'moonphase': day_data.get('moonphase'),
|
||||||
day_data.get('conditions'),
|
'conditions': day_data.get('conditions'),
|
||||||
stations_array,
|
'stations': stations_array,
|
||||||
day_data.get('source')
|
'source': day_data.get('source')
|
||||||
]
|
}
|
||||||
|
|
||||||
daily_weather_query = '''
|
daily_weather_query = '''
|
||||||
INSERT INTO dailyweather (
|
INSERT INTO dailyweather (
|
||||||
|
@ -205,54 +206,58 @@ async def store_weather_to_db(date_time: dt_datetime, weather_data: dict):
|
||||||
solarradiation, solarenergy, severerisk, moonphase, conditions,
|
solarradiation, solarenergy, severerisk, moonphase, conditions,
|
||||||
stations, source
|
stations, source
|
||||||
) VALUES (
|
) VALUES (
|
||||||
$1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12, $13, $14, $15,
|
:location, :sunrise, :sunriseepoch, :sunset, :sunsetepoch, :description,
|
||||||
$16, $17, $18, $19, $20, $21, $22, $23, $24, $25, $26, $27, $28,
|
:tempmax, :tempmin, :uvindex, :winddir, :windspeed, :icon, :last_updated,
|
||||||
$29, $30, $31, $32, $33, $34, $35, $36, $37, $38
|
:datetime, :datetimeepoch, :temp, :feelslikemax, :feelslikemin, :feelslike,
|
||||||
|
:dew, :humidity, :precip, :precipprob, :precipcover, :preciptype,
|
||||||
|
:snow, :snowdepth, :windgust, :pressure, :cloudcover, :visibility,
|
||||||
|
:solarradiation, :solarenergy, :severerisk, :moonphase, :conditions,
|
||||||
|
:stations, :source
|
||||||
) RETURNING id
|
) RETURNING id
|
||||||
'''
|
'''
|
||||||
|
|
||||||
daily_weather_result = await API.execute_write_query(daily_weather_query, *daily_weather_params, table_name="dailyweather")
|
daily_weather_result = await Db.write(daily_weather_query, **daily_weather_params, table_name="dailyweather")
|
||||||
|
|
||||||
if not daily_weather_result:
|
if daily_weather_result is None:
|
||||||
raise ValueError("Failed to insert daily weather data: no result returned")
|
raise ValueError("Failed to insert daily weather data: no result returned")
|
||||||
|
|
||||||
daily_weather_id = daily_weather_result[0]['id']
|
daily_weather_id = daily_weather_result.fetchone()[0]
|
||||||
debug(f"Inserted daily weather data with id: {daily_weather_id}")
|
l.debug(f"Inserted daily weather data with id: {daily_weather_id}")
|
||||||
|
|
||||||
# Hourly weather insertion
|
# Hourly weather insertion
|
||||||
if 'hours' in day_data:
|
if 'hours' in day_data:
|
||||||
debug(f"Processing {len(day_data['hours'])} hourly records")
|
l.debug(f"Processing {len(day_data['hours'])} hourly records")
|
||||||
for hour_data in day_data['hours']:
|
for hour_data in day_data['hours']:
|
||||||
hour_preciptype_array = hour_data.get('preciptype', []) or []
|
hour_preciptype_array = hour_data.get('preciptype', []) or []
|
||||||
hour_stations_array = hour_data.get('stations', []) or []
|
hour_stations_array = hour_data.get('stations', []) or []
|
||||||
hourly_weather_params = [
|
hourly_weather_params = {
|
||||||
daily_weather_id,
|
'daily_weather_id': str(daily_weather_id), # Convert UUID to string
|
||||||
await gis.dt(hour_data.get('datetimeEpoch')),
|
'datetime': await gis.dt(hour_data.get('datetimeEpoch')),
|
||||||
hour_data.get('datetimeEpoch'),
|
'datetimeepoch': hour_data.get('datetimeEpoch'),
|
||||||
hour_data.get('temp'),
|
'temp': hour_data.get('temp'),
|
||||||
hour_data.get('feelslike'),
|
'feelslike': hour_data.get('feelslike'),
|
||||||
hour_data.get('humidity'),
|
'humidity': hour_data.get('humidity'),
|
||||||
hour_data.get('dew'),
|
'dew': hour_data.get('dew'),
|
||||||
hour_data.get('precip'),
|
'precip': hour_data.get('precip'),
|
||||||
hour_data.get('precipprob'),
|
'precipprob': hour_data.get('precipprob'),
|
||||||
hour_preciptype_array,
|
'preciptype': hour_preciptype_array,
|
||||||
hour_data.get('snow'),
|
'snow': hour_data.get('snow'),
|
||||||
hour_data.get('snowdepth'),
|
'snowdepth': hour_data.get('snowdepth'),
|
||||||
hour_data.get('windgust'),
|
'windgust': hour_data.get('windgust'),
|
||||||
hour_data.get('windspeed'),
|
'windspeed': hour_data.get('windspeed'),
|
||||||
hour_data.get('winddir'),
|
'winddir': hour_data.get('winddir'),
|
||||||
hour_data.get('pressure'),
|
'pressure': hour_data.get('pressure'),
|
||||||
hour_data.get('cloudcover'),
|
'cloudcover': hour_data.get('cloudcover'),
|
||||||
hour_data.get('visibility'),
|
'visibility': hour_data.get('visibility'),
|
||||||
hour_data.get('solarradiation'),
|
'solarradiation': hour_data.get('solarradiation'),
|
||||||
hour_data.get('solarenergy'),
|
'solarenergy': hour_data.get('solarenergy'),
|
||||||
hour_data.get('uvindex'),
|
'uvindex': hour_data.get('uvindex'),
|
||||||
hour_data.get('severerisk', 0),
|
'severerisk': hour_data.get('severerisk', 0),
|
||||||
hour_data.get('conditions'),
|
'conditions': hour_data.get('conditions'),
|
||||||
hour_data.get('icon'),
|
'icon': hour_data.get('icon'),
|
||||||
hour_stations_array,
|
'stations': hour_stations_array,
|
||||||
hour_data.get('source', '')
|
'source': hour_data.get('source', '')
|
||||||
]
|
}
|
||||||
|
|
||||||
hourly_weather_query = '''
|
hourly_weather_query = '''
|
||||||
INSERT INTO hourlyweather (
|
INSERT INTO hourlyweather (
|
||||||
|
@ -262,61 +267,68 @@ async def store_weather_to_db(date_time: dt_datetime, weather_data: dict):
|
||||||
solarradiation, solarenergy, uvindex, severerisk, conditions,
|
solarradiation, solarenergy, uvindex, severerisk, conditions,
|
||||||
icon, stations, source
|
icon, stations, source
|
||||||
) VALUES (
|
) VALUES (
|
||||||
$1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12, $13, $14, $15,
|
:daily_weather_id, :datetime, :datetimeepoch, :temp, :feelslike,
|
||||||
$16, $17, $18, $19, $20, $21, $22, $23, $24, $25, $26
|
:humidity, :dew, :precip, :precipprob, :preciptype, :snow, :snowdepth,
|
||||||
|
:windgust, :windspeed, :winddir, :pressure, :cloudcover, :visibility,
|
||||||
|
:solarradiation, :solarenergy, :uvindex, :severerisk, :conditions,
|
||||||
|
:icon, :stations, :source
|
||||||
) RETURNING id
|
) RETURNING id
|
||||||
'''
|
'''
|
||||||
hourly_result = await API.execute_write_query(hourly_weather_query, *hourly_weather_params, table_name="hourlyweather")
|
hourly_result = await Db.write(hourly_weather_query, **hourly_weather_params, table_name="hourlyweather")
|
||||||
if not hourly_result:
|
if hourly_result is None:
|
||||||
warn(f"Failed to insert hourly weather data for {hour_data.get('datetimeEpoch')}")
|
l.warning(f"Failed to insert hourly weather data for {hour_data.get('datetimeEpoch')}")
|
||||||
else:
|
else:
|
||||||
debug(f"Inserted hourly weather data with id: {hourly_result[0]['id']}")
|
hourly_id = hourly_result.fetchone()[0]
|
||||||
|
l.debug(f"Inserted hourly weather data with id: {hourly_id}")
|
||||||
|
|
||||||
return "SUCCESS"
|
return "SUCCESS"
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
err(f"Error in weather storage: {e}")
|
l.error(f"Error in weather storage: {e}")
|
||||||
err(f"Traceback: {traceback.format_exc()}")
|
l.error(f"Traceback: {traceback.format_exc()}")
|
||||||
return "FAILURE"
|
return "FAILURE"
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
async def get_weather_from_db(date_time: dt_datetime, latitude: float, longitude: float):
|
async def get_weather_from_db(date_time: dt_datetime, latitude: float, longitude: float):
|
||||||
debug(f"Using {date_time.strftime('%Y-%m-%d %H:%M:%S')} as our datetime in get_weather_from_db.")
|
l.debug(f"Using {date_time.strftime('%Y-%m-%d %H:%M:%S')} as our datetime in get_weather_from_db.")
|
||||||
query_date = date_time.date()
|
query_date = date_time.date()
|
||||||
try:
|
try:
|
||||||
# Query to get daily weather data
|
# Query to get daily weather data
|
||||||
daily_query = '''
|
daily_query = '''
|
||||||
SELECT * FROM dailyweather
|
SELECT * FROM dailyweather
|
||||||
WHERE DATE(datetime) = $1
|
WHERE DATE(datetime) = :query_date
|
||||||
AND ST_DWithin(location::geography, ST_MakePoint($2,$3)::geography, 8046.72)
|
AND ST_DWithin(location::geography, ST_MakePoint(:longitude,:latitude)::geography, 8046.72)
|
||||||
ORDER BY ST_Distance(location, ST_MakePoint($4, $5)::geography) ASC
|
ORDER BY ST_Distance(location, ST_MakePoint(:longitude2, :latitude2)::geography) ASC
|
||||||
LIMIT 1
|
LIMIT 1
|
||||||
'''
|
'''
|
||||||
|
daily_weather_records = await Db.read(daily_query, query_date=query_date, longitude=longitude, latitude=latitude, longitude2=longitude, latitude2=latitude, table_name='dailyweather')
|
||||||
daily_weather_records = await API.execute_read_query(daily_query, query_date, longitude, latitude, longitude, latitude, table_name='dailyweather')
|
|
||||||
|
|
||||||
if not daily_weather_records:
|
if not daily_weather_records:
|
||||||
debug(f"No daily weather data retrieved from database.")
|
l.debug(f"No daily weather data retrieved from database.")
|
||||||
return None
|
return None
|
||||||
|
|
||||||
daily_weather_data = daily_weather_records[0]
|
daily_weather_data = daily_weather_records[0]
|
||||||
|
|
||||||
# Query to get hourly weather data
|
|
||||||
hourly_query = '''
|
hourly_query = '''
|
||||||
SELECT * FROM hourlyweather
|
SELECT * FROM hourlyweather
|
||||||
WHERE daily_weather_id = $1
|
WHERE daily_weather_id::text = :daily_weather_id
|
||||||
ORDER BY datetime ASC
|
ORDER BY datetime ASC
|
||||||
'''
|
'''
|
||||||
|
hourly_weather_records = await Db.read(
|
||||||
hourly_weather_records = await API.execute_read_query(hourly_query, daily_weather_data['id'], table_name='hourlyweather')
|
hourly_query,
|
||||||
|
daily_weather_id=str(daily_weather_data['id']),
|
||||||
|
table_name='hourlyweather'
|
||||||
|
)
|
||||||
|
|
||||||
day = {
|
day = {
|
||||||
'DailyWeather': daily_weather_data,
|
'DailyWeather': daily_weather_data,
|
||||||
'HourlyWeather': hourly_weather_records,
|
'HourlyWeather': hourly_weather_records,
|
||||||
}
|
}
|
||||||
|
|
||||||
debug(f"Retrieved weather data for {date_time.date()}")
|
l.debug(f"Retrieved weather data for {date_time.date()}")
|
||||||
return day
|
return day
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
err(f"Unexpected error occurred in get_weather_from_db: {e}")
|
l.error(f"Unexpected error occurred in get_weather_from_db: {e}")
|
||||||
err(f"Traceback: {traceback.format_exc()}")
|
l.error(f"Traceback: {traceback.format_exc()}")
|
||||||
return None
|
return None
|
76
sijapi/serialization.py
Normal file
76
sijapi/serialization.py
Normal file
|
@ -0,0 +1,76 @@
|
||||||
|
# serialization.py
|
||||||
|
|
||||||
|
import json
|
||||||
|
from typing import Any
|
||||||
|
from uuid import UUID
|
||||||
|
from decimal import Decimal
|
||||||
|
import numpy as np
|
||||||
|
import pandas as pd
|
||||||
|
from enum import Enum
|
||||||
|
from pathlib import Path
|
||||||
|
from datetime import datetime as dt_datetime, date, time
|
||||||
|
from .logs import get_logger
|
||||||
|
|
||||||
|
l = get_logger(__name__)
|
||||||
|
|
||||||
|
def serialize(obj: Any) -> Any:
|
||||||
|
"""Serializer for database inputs that keeps datetime objects intact"""
|
||||||
|
if isinstance(obj, (dt_datetime, date, time)):
|
||||||
|
return obj
|
||||||
|
return json_serial(obj)
|
||||||
|
|
||||||
|
|
||||||
|
def json_serial(obj: Any) -> Any:
|
||||||
|
"""JSON serializer for objects not serializable by default json code"""
|
||||||
|
if isinstance(obj, (dt_datetime, date)):
|
||||||
|
return obj.isoformat()
|
||||||
|
if isinstance(obj, time):
|
||||||
|
return obj.isoformat()
|
||||||
|
if isinstance(obj, Decimal):
|
||||||
|
return float(obj)
|
||||||
|
if isinstance(obj, UUID):
|
||||||
|
return str(obj)
|
||||||
|
if isinstance(obj, bytes):
|
||||||
|
return obj.decode('utf-8')
|
||||||
|
if isinstance(obj, Path):
|
||||||
|
return str(obj)
|
||||||
|
if isinstance(obj, (str, int, float, bool)):
|
||||||
|
return obj
|
||||||
|
if isinstance(obj, list):
|
||||||
|
return [json_serial(item) for item in obj]
|
||||||
|
if isinstance(obj, dict):
|
||||||
|
return {json_serial(k): json_serial(v) for k, v in obj.items()}
|
||||||
|
if isinstance(obj, (set, frozenset)):
|
||||||
|
return [json_serial(item) for item in obj]
|
||||||
|
if isinstance(obj, tuple):
|
||||||
|
return list(json_serial(item) for item in obj)
|
||||||
|
if isinstance(obj, np.ndarray):
|
||||||
|
return json_serial(obj.tolist())
|
||||||
|
if isinstance(obj, (pd.DataFrame, pd.Series)):
|
||||||
|
return json_serial(obj.to_dict())
|
||||||
|
if obj is None:
|
||||||
|
return None
|
||||||
|
if isinstance(obj, complex):
|
||||||
|
return [obj.real, obj.imag]
|
||||||
|
if isinstance(obj, Enum):
|
||||||
|
return obj.value
|
||||||
|
if isinstance(obj, range):
|
||||||
|
return {'start': obj.start, 'stop': obj.stop, 'step': obj.step}
|
||||||
|
if hasattr(obj, '__iter__'):
|
||||||
|
return list(json_serial(item) for item in obj)
|
||||||
|
if hasattr(obj, '__dict__'):
|
||||||
|
return {k: json_serial(v) for k, v in obj.__dict__.items() if not k.startswith('_')}
|
||||||
|
raise TypeError(f"Type {type(obj)} not serializable")
|
||||||
|
|
||||||
|
|
||||||
|
def json_dumps(obj: Any) -> str:
|
||||||
|
"""
|
||||||
|
Serialize obj to a JSON formatted str using the custom serializer.
|
||||||
|
"""
|
||||||
|
return json.dumps(obj, default=json_serial)
|
||||||
|
|
||||||
|
def json_loads(json_str: str) -> Any:
|
||||||
|
"""
|
||||||
|
Deserialize json_str to a Python object.
|
||||||
|
"""
|
||||||
|
return json.loads(json_str)
|
|
@ -1,56 +1,47 @@
|
||||||
# utilities.py
|
# utilities.py
|
||||||
import re
|
import re
|
||||||
import os
|
import os
|
||||||
from fastapi import Form
|
import json
|
||||||
import re
|
|
||||||
import io
|
import io
|
||||||
from io import BytesIO
|
|
||||||
import base64
|
import base64
|
||||||
import math
|
import math
|
||||||
import paramiko
|
import paramiko
|
||||||
from dateutil import parser
|
|
||||||
from pathlib import Path
|
|
||||||
import filetype
|
import filetype
|
||||||
import shutil
|
import shutil
|
||||||
import uuid
|
import uuid
|
||||||
import hashlib
|
import hashlib
|
||||||
import requests
|
import requests
|
||||||
from requests.adapters import HTTPAdapter
|
import asyncio
|
||||||
from urllib3.util.retry import Retry
|
import aiohttp
|
||||||
|
import pandas as pd
|
||||||
|
import ipaddress
|
||||||
|
from io import BytesIO
|
||||||
|
from pathlib import Path
|
||||||
|
from dateutil import parser
|
||||||
from urllib.parse import urlparse
|
from urllib.parse import urlparse
|
||||||
from PyPDF2 import PdfReader
|
from PyPDF2 import PdfReader
|
||||||
from better_profanity import profanity
|
from better_profanity import profanity
|
||||||
from adblockparser import AdblockRules
|
from adblockparser import AdblockRules
|
||||||
from pdfminer.high_level import extract_text as pdfminer_extract_text
|
from pdfminer.high_level import extract_text as pdfminer_extract_text
|
||||||
import pytesseract
|
from readability import Document as ReadabilityDocument
|
||||||
from readability import Document
|
|
||||||
from pdf2image import convert_from_path
|
from pdf2image import convert_from_path
|
||||||
from datetime import datetime as dt_datetime, date, time
|
from datetime import datetime as dt_datetime, date, time
|
||||||
from typing import Optional, Union, Tuple, List, Any
|
from typing import Optional, Union, Tuple, List, Any
|
||||||
import asyncio
|
|
||||||
from PIL import Image
|
from PIL import Image
|
||||||
import pandas as pd
|
|
||||||
import ipaddress
|
|
||||||
from scipy.spatial import cKDTree
|
from scipy.spatial import cKDTree
|
||||||
from dateutil.parser import parse as dateutil_parse
|
from dateutil.parser import parse as dateutil_parse
|
||||||
from docx import Document
|
from docx import Document
|
||||||
import aiohttp
|
|
||||||
from bs4 import BeautifulSoup
|
from bs4 import BeautifulSoup
|
||||||
from readability import Document as ReadabilityDocument
|
|
||||||
from markdownify import markdownify as md
|
from markdownify import markdownify as md
|
||||||
from sshtunnel import SSHTunnelForwarder
|
from sshtunnel import SSHTunnelForwarder
|
||||||
from urllib.parse import urlparse
|
from fastapi import Depends, HTTPException, Request, UploadFile, Form
|
||||||
from fastapi import Depends, HTTPException, Request, UploadFile
|
|
||||||
from fastapi.security.api_key import APIKeyHeader
|
from fastapi.security.api_key import APIKeyHeader
|
||||||
|
from requests.adapters import HTTPAdapter
|
||||||
from sijapi import L, API, Archivist, YEAR_FMT, MONTH_FMT, DAY_FMT, DAY_SHORT_FMT, OBSIDIAN_VAULT_DIR, ALLOWED_FILENAME_CHARS, MAX_PATH_LENGTH, ARCHIVE_DIR
|
from urllib3.util.retry import Retry
|
||||||
|
import pytesseract
|
||||||
logger = L.get_module_logger('utilities')
|
from sijapi import Sys, Dir, YEAR_FMT, MONTH_FMT, DAY_FMT, DAY_SHORT_FMT, OBSIDIAN_VAULT_DIR, ALLOWED_FILENAME_CHARS, MAX_PATH_LENGTH, ARCHIVE_DIR
|
||||||
def debug(text: str): logger.debug(text)
|
from sijapi.logs import get_logger
|
||||||
def info(text: str): logger.info(text)
|
l = get_logger(__name__)
|
||||||
def warn(text: str): logger.warning(text)
|
|
||||||
def err(text: str): logger.error(text)
|
|
||||||
def crit(text: str): logger.critical(text)
|
|
||||||
|
|
||||||
|
|
||||||
def assemble_archive_path(filename: str, extension: str = None, date_time: dt_datetime = None, subdir: str = None) -> Tuple[Path, Path]:
|
def assemble_archive_path(filename: str, extension: str = None, date_time: dt_datetime = None, subdir: str = None) -> Tuple[Path, Path]:
|
||||||
|
@ -79,7 +70,7 @@ def assemble_archive_path(filename: str, extension: str = None, date_time: dt_da
|
||||||
filename = f"{day_short} {timestamp} {sanitized_base}{final_extension}"
|
filename = f"{day_short} {timestamp} {sanitized_base}{final_extension}"
|
||||||
|
|
||||||
relative_path = Path(year) / month / day / filename
|
relative_path = Path(year) / month / day / filename
|
||||||
absolute_path = Archivist.dir / relative_path
|
absolute_path = Dir.ARCHIVE / relative_path
|
||||||
|
|
||||||
# Ensure the total path length doesn't exceed MAX_PATH_LENGTH
|
# Ensure the total path length doesn't exceed MAX_PATH_LENGTH
|
||||||
while len(str(absolute_path)) > MAX_PATH_LENGTH and len(sanitized_base) > 0:
|
while len(str(absolute_path)) > MAX_PATH_LENGTH and len(sanitized_base) > 0:
|
||||||
|
@ -138,7 +129,7 @@ def assemble_journal_path(date_time: dt_datetime, subdir: str = None, filename:
|
||||||
relative_path = relative_path / filename
|
relative_path = relative_path / filename
|
||||||
|
|
||||||
else:
|
else:
|
||||||
debug(f"This only happens, theoretically, when no filename nor subdirectory are provided, but an extension is. Which is kinda silly.")
|
l.debug(f"This only happens, theoretically, when no filename nor subdirectory are provided, but an extension is. Which is kinda silly.")
|
||||||
return None, None
|
return None, None
|
||||||
|
|
||||||
absolute_path = OBSIDIAN_VAULT_DIR / relative_path
|
absolute_path = OBSIDIAN_VAULT_DIR / relative_path
|
||||||
|
@ -194,7 +185,7 @@ def contains_profanity(content: str, threshold: float = 0.01, custom_words: Opti
|
||||||
content_profanity_count = sum(1 for word in word_list if profanity.contains_profanity(word))
|
content_profanity_count = sum(1 for word in word_list if profanity.contains_profanity(word))
|
||||||
content_profanity_ratio = content_profanity_count / len(word_list) if word_list else 0
|
content_profanity_ratio = content_profanity_count / len(word_list) if word_list else 0
|
||||||
|
|
||||||
debug(f"Profanity ratio for content: {content_profanity_ratio}")
|
l.debug(f"Profanity ratio for content: {content_profanity_ratio}")
|
||||||
return content_profanity_ratio >= threshold
|
return content_profanity_ratio >= threshold
|
||||||
|
|
||||||
|
|
||||||
|
@ -204,15 +195,15 @@ def load_filter_lists(blocklists_dir: Path):
|
||||||
try:
|
try:
|
||||||
with open(file_path, 'r', encoding='utf-8') as file:
|
with open(file_path, 'r', encoding='utf-8') as file:
|
||||||
rules.extend(file.read().splitlines())
|
rules.extend(file.read().splitlines())
|
||||||
info(f"Loaded blocklist: {file_path.name}")
|
l.info(f"Loaded blocklist: {file_path.name}")
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
err(f"Error loading blocklist {file_path.name}: {str(e)}")
|
l.error(f"Error loading blocklist {file_path.name}: {str(e)}")
|
||||||
return rules
|
return rules
|
||||||
|
|
||||||
|
|
||||||
def initialize_adblock_rules(blocklists_dir: Path):
|
def initialize_adblock_rules(blocklists_dir: Path):
|
||||||
rules = load_filter_lists(blocklists_dir)
|
rules = load_filter_lists(blocklists_dir)
|
||||||
info(f"Initialized AdblockRules with {len(rules)} rules")
|
l.info(f"Initialized AdblockRules with {len(rules)} rules")
|
||||||
return AdblockRules(rules)
|
return AdblockRules(rules)
|
||||||
|
|
||||||
|
|
||||||
|
@ -228,14 +219,14 @@ def get_extension(file):
|
||||||
return file_extension
|
return file_extension
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
err(f"Unable to get extension of {file}")
|
l.error(f"Unable to get extension of {file}")
|
||||||
raise e
|
raise e
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
def sanitize_filename(text, extension: str = None, max_length: int = MAX_PATH_LENGTH):
|
def sanitize_filename(text, extension: str = None, max_length: int = MAX_PATH_LENGTH):
|
||||||
"""Sanitize a string to be used as a safe filename while protecting the file extension."""
|
"""Sanitize a string to be used as a safe filename while protecting the file extension."""
|
||||||
debug(f"Filename before sanitization: {text}")
|
l.debug(f"Filename before sanitization: {text}")
|
||||||
|
|
||||||
# Ensure text is a string
|
# Ensure text is a string
|
||||||
text = str(text)
|
text = str(text)
|
||||||
|
@ -253,7 +244,7 @@ def sanitize_filename(text, extension: str = None, max_length: int = MAX_PATH_LE
|
||||||
base_name = base_name[:max_base_length - 5].rstrip()
|
base_name = base_name[:max_base_length - 5].rstrip()
|
||||||
final_filename = base_name + extension
|
final_filename = base_name + extension
|
||||||
|
|
||||||
debug(f"Filename after sanitization: {final_filename}")
|
l.debug(f"Filename after sanitization: {final_filename}")
|
||||||
return final_filename
|
return final_filename
|
||||||
|
|
||||||
|
|
||||||
|
@ -264,16 +255,16 @@ def check_file_name(file_name, max_length=255):
|
||||||
needs_sanitization = False
|
needs_sanitization = False
|
||||||
|
|
||||||
if len(file_name) > max_length:
|
if len(file_name) > max_length:
|
||||||
debug(f"Filename exceeds maximum length of {max_length}: {file_name}")
|
l.debug(f"Filename exceeds maximum length of {max_length}: {file_name}")
|
||||||
needs_sanitization = True
|
needs_sanitization = True
|
||||||
if re.search(ALLOWED_FILENAME_CHARS, file_name):
|
if re.search(ALLOWED_FILENAME_CHARS, file_name):
|
||||||
debug(f"Filename contains non-word characters (except space, dot, and hyphen): {file_name}")
|
l.debug(f"Filename contains non-word characters (except space, dot, and hyphen): {file_name}")
|
||||||
needs_sanitization = True
|
needs_sanitization = True
|
||||||
if re.search(r'\s{2,}', file_name):
|
if re.search(r'\s{2,}', file_name):
|
||||||
debug(f"Filename contains multiple consecutive spaces: {file_name}")
|
l.debug(f"Filename contains multiple consecutive spaces: {file_name}")
|
||||||
needs_sanitization = True
|
needs_sanitization = True
|
||||||
if file_name != file_name.strip():
|
if file_name != file_name.strip():
|
||||||
debug(f"Filename has leading or trailing spaces: {file_name}")
|
l.debug(f"Filename has leading or trailing spaces: {file_name}")
|
||||||
needs_sanitization = True
|
needs_sanitization = True
|
||||||
|
|
||||||
return needs_sanitization
|
return needs_sanitization
|
||||||
|
@ -316,13 +307,13 @@ async def ocr_pdf(file_path: str) -> str:
|
||||||
texts = await asyncio.gather(*(asyncio.to_thread(pytesseract.image_to_string, image) for image in images))
|
texts = await asyncio.gather(*(asyncio.to_thread(pytesseract.image_to_string, image) for image in images))
|
||||||
return ' '.join(texts)
|
return ' '.join(texts)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
err(f"Error during OCR: {str(e)}")
|
l.error(f"Error during OCR: {str(e)}")
|
||||||
return ""
|
return ""
|
||||||
|
|
||||||
|
|
||||||
async def extract_text_from_pdf(file_path: str) -> str:
|
async def extract_text_from_pdf(file_path: str) -> str:
|
||||||
if not await is_valid_pdf(file_path):
|
if not await is_valid_pdf(file_path):
|
||||||
err(f"Invalid PDF file: {file_path}")
|
l.error(f"Invalid PDF file: {file_path}")
|
||||||
return ""
|
return ""
|
||||||
|
|
||||||
text = ''
|
text = ''
|
||||||
|
@ -340,7 +331,7 @@ async def extract_text_from_pdf(file_path: str) -> str:
|
||||||
if text and not should_use_ocr(text, num_pages):
|
if text and not should_use_ocr(text, num_pages):
|
||||||
return clean_text(text)
|
return clean_text(text)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
err(f"Error extracting text with PyPDF2: {str(e)}")
|
l.error(f"Error extracting text with PyPDF2: {str(e)}")
|
||||||
|
|
||||||
# If PyPDF2 extraction fails or is insufficient, fall back to pdfminer.six
|
# If PyPDF2 extraction fails or is insufficient, fall back to pdfminer.six
|
||||||
try:
|
try:
|
||||||
|
@ -348,10 +339,10 @@ async def extract_text_from_pdf(file_path: str) -> str:
|
||||||
if text_pdfminer and not should_use_ocr(text_pdfminer, num_pages):
|
if text_pdfminer and not should_use_ocr(text_pdfminer, num_pages):
|
||||||
return clean_text(text_pdfminer)
|
return clean_text(text_pdfminer)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
err(f"Error extracting text with pdfminer.six: {e}")
|
l.error(f"Error extracting text with pdfminer.six: {e}")
|
||||||
|
|
||||||
# If both methods fail or are deemed insufficient, use OCR as the last resort
|
# If both methods fail or are deemed insufficient, use OCR as the last resort
|
||||||
debug("Falling back to OCR for text extraction...")
|
l.debug("Falling back to OCR for text extraction...")
|
||||||
return await ocr_pdf(file_path)
|
return await ocr_pdf(file_path)
|
||||||
|
|
||||||
async def is_valid_pdf(file_path: str) -> bool:
|
async def is_valid_pdf(file_path: str) -> bool:
|
||||||
|
@ -360,12 +351,12 @@ async def is_valid_pdf(file_path: str) -> bool:
|
||||||
kind = filetype.guess(file_path)
|
kind = filetype.guess(file_path)
|
||||||
return kind.mime == 'application/pdf'
|
return kind.mime == 'application/pdf'
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
err(f"Error checking file type: {e}")
|
l.error(f"Error checking file type: {e}")
|
||||||
return False
|
return False
|
||||||
|
|
||||||
async def extract_text_from_pdf(file_path: str) -> str:
|
async def extract_text_from_pdf(file_path: str) -> str:
|
||||||
if not await is_valid_pdf(file_path):
|
if not await is_valid_pdf(file_path):
|
||||||
err(f"Invalid PDF file: {file_path}")
|
l.error(f"Invalid PDF file: {file_path}")
|
||||||
return ""
|
return ""
|
||||||
|
|
||||||
text = ''
|
text = ''
|
||||||
|
@ -377,23 +368,23 @@ async def extract_text_from_pdf(file_path: str) -> str:
|
||||||
if text.strip(): # Successfully extracted text
|
if text.strip(): # Successfully extracted text
|
||||||
return clean_text(text)
|
return clean_text(text)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
err(f"Error extracting text with PyPDF2: {str(e)}")
|
l.error(f"Error extracting text with PyPDF2: {str(e)}")
|
||||||
|
|
||||||
try:
|
try:
|
||||||
text_pdfminer = await asyncio.to_thread(pdfminer_extract_text, file_path)
|
text_pdfminer = await asyncio.to_thread(pdfminer_extract_text, file_path)
|
||||||
if text_pdfminer.strip(): # Successfully extracted text
|
if text_pdfminer.strip(): # Successfully extracted text
|
||||||
return clean_text(text_pdfminer)
|
return clean_text(text_pdfminer)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
err(f"Error extracting text with pdfminer.six: {str(e)}")
|
l.error(f"Error extracting text with pdfminer.six: {str(e)}")
|
||||||
|
|
||||||
# Fall back to OCR
|
# Fall back to OCR
|
||||||
debug("Falling back to OCR for text extraction...")
|
l.debug("Falling back to OCR for text extraction...")
|
||||||
try:
|
try:
|
||||||
images = convert_from_path(file_path)
|
images = convert_from_path(file_path)
|
||||||
ocr_texts = await asyncio.gather(*(asyncio.to_thread(pytesseract.image_to_string, img) for img in images))
|
ocr_texts = await asyncio.gather(*(asyncio.to_thread(pytesseract.image_to_string, img) for img in images))
|
||||||
return ' '.join(ocr_texts).strip()
|
return ' '.join(ocr_texts).strip()
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
err(f"OCR failed: {str(e)}")
|
l.error(f"OCR failed: {str(e)}")
|
||||||
return ""
|
return ""
|
||||||
|
|
||||||
async def extract_text_from_docx(file_path: str) -> str:
|
async def extract_text_from_docx(file_path: str) -> str:
|
||||||
|
@ -496,7 +487,7 @@ def encode_image_to_base64(image_path):
|
||||||
base64_str = base64.b64encode(byte_data).decode('utf-8')
|
base64_str = base64.b64encode(byte_data).decode('utf-8')
|
||||||
return base64_str
|
return base64_str
|
||||||
else:
|
else:
|
||||||
debug(f"Error: File does not exist at {image_path}")
|
l.debug(f"Error: File does not exist at {image_path}")
|
||||||
|
|
||||||
def resize_and_convert_image(image_path, max_size=2160, quality=80):
|
def resize_and_convert_image(image_path, max_size=2160, quality=80):
|
||||||
with Image.open(image_path) as img:
|
with Image.open(image_path) as img:
|
||||||
|
@ -534,13 +525,13 @@ def download_file(url, folder):
|
||||||
with open(filepath, 'wb') as f:
|
with open(filepath, 'wb') as f:
|
||||||
f.write(response.content)
|
f.write(response.content)
|
||||||
else:
|
else:
|
||||||
err(f"Failed to download image: {url}, invalid content type: {response.headers.get('Content-Type')}")
|
l.error(f"Failed to download image: {url}, invalid content type: {response.headers.get('Content-Type')}")
|
||||||
return None
|
return None
|
||||||
else:
|
else:
|
||||||
err(f"Failed to download image: {url}, status code: {response.status_code}")
|
l.error(f"Failed to download image: {url}, status code: {response.status_code}")
|
||||||
return None
|
return None
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
err(f"Failed to download image: {url}, error: {str(e)}")
|
l.error(f"Failed to download image: {url}, error: {str(e)}")
|
||||||
return None
|
return None
|
||||||
return filename
|
return filename
|
||||||
|
|
||||||
|
@ -599,7 +590,7 @@ async def run_ssh_command(server, command):
|
||||||
ssh.close()
|
ssh.close()
|
||||||
return output, error
|
return output, error
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
err(f"SSH command failed for server {server.id}: {str(e)}")
|
l.error(f"SSH command failed for server {server.id}: {str(e)}")
|
||||||
raise
|
raise
|
||||||
|
|
||||||
|
|
||||||
|
@ -611,7 +602,7 @@ async def html_to_markdown(url: str = None, source: str = None) -> Optional[str]
|
||||||
async with session.get(url) as response:
|
async with session.get(url) as response:
|
||||||
html_content = await response.text()
|
html_content = await response.text()
|
||||||
else:
|
else:
|
||||||
err(f"Unable to convert nothing to markdown.")
|
l.error(f"Unable to convert nothing to markdown.")
|
||||||
return None
|
return None
|
||||||
|
|
||||||
# Use readability to extract the main content
|
# Use readability to extract the main content
|
||||||
|
@ -630,33 +621,3 @@ async def html_to_markdown(url: str = None, source: str = None) -> Optional[str]
|
||||||
|
|
||||||
return markdown_content
|
return markdown_content
|
||||||
|
|
||||||
|
|
||||||
def json_serial(obj: Any) -> Any:
|
|
||||||
"""JSON serializer for objects not serializable by default json code"""
|
|
||||||
if isinstance(obj, (datetime, date)):
|
|
||||||
return obj.isoformat()
|
|
||||||
if isinstance(obj, time):
|
|
||||||
return obj.isoformat()
|
|
||||||
if isinstance(obj, Decimal):
|
|
||||||
return float(obj)
|
|
||||||
if isinstance(obj, UUID):
|
|
||||||
return str(obj)
|
|
||||||
if isinstance(obj, bytes):
|
|
||||||
return obj.decode('utf-8')
|
|
||||||
if isinstance(obj, Path):
|
|
||||||
return str(obj)
|
|
||||||
if hasattr(obj, '__dict__'):
|
|
||||||
return obj.__dict__
|
|
||||||
raise TypeError(f"Type {type(obj)} not serializable")
|
|
||||||
|
|
||||||
def json_dumps(obj: Any) -> str:
|
|
||||||
"""
|
|
||||||
Serialize obj to a JSON formatted str using the custom serializer.
|
|
||||||
"""
|
|
||||||
return json.dumps(obj, default=json_serial)
|
|
||||||
|
|
||||||
def json_loads(json_str: str) -> Any:
|
|
||||||
"""
|
|
||||||
Deserialize json_str to a Python object.
|
|
||||||
"""
|
|
||||||
return json.loads(json_str)
|
|
Loading…
Reference in a new issue