Latest updates
This commit is contained in:
parent
52032e4084
commit
3eea22172f
31 changed files with 2487 additions and 1231 deletions
69
data/calfire_thp_data.json
Normal file
69
data/calfire_thp_data.json
Normal file
|
@ -0,0 +1,69 @@
|
||||||
|
[
|
||||||
|
{
|
||||||
|
"Land Owner": "SIERRA PACIFIC INDUSTRIES",
|
||||||
|
"Location": "HBM: T2N R3E S17 ; HBM: \nT2N R3E S18 ; HBM: T2N \nR3E S7 ; HBM: T2N R3E S8",
|
||||||
|
"PLSS Coordinates": [
|
||||||
|
"HBM: T2N R3E S17",
|
||||||
|
"HBM: T2N R3E S8"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"Land Owner": "1/29/2024\n2/8/2024\n \n7/12/2024\n \n515.00\nBALLARD RESERVOIR\n(5526.510202);GRAVEN \nRESERVOIR\n(5526.510301);RALSTON \nGULCH(5526.510201)\nMDBM: T41N R10E S25 ; \nMDBM: T41N R10E S26 ; \nMDBM: T41N R10E S27 ; \nMDBM: T41N R10E S28 ; \nMDBM: T41N R10E S33 ; \nMDBM: T41N R10E S34 ; \nMDBM: T41N R10E S35 ; \nMDBM: T41N R10E S36 \nSIERRA PACIFIC LAND & TIMBER \nCOMPANY",
|
||||||
|
"Location": "MDBM:",
|
||||||
|
"PLSS Coordinates": []
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"Land Owner": "2/16/2024\n2/22/2024\n \n7/26/2024\n \n520.00\nARMENTROUT FLAT \n(5526.620003)\n(5526.620003);JIMMERSON \nSPRING\n(5526.610104);MOSQUITO \nLAKE(5526.420403)\nMDBM: T40N R5E S13 ; \nMDBM: T40N R5E S14 ; \nMDBM: T40N R5E S22 ; \nMDBM: T40N R5E S23 ; \nMDBM: T40N R5E S24 ; \nMDBM: T40N R5E S25 ; \nMDBM: T40N R5E S26 ; \nMDBM: T40N R5E S36 ; \nMDBM: T40N R6E S19 ; \nMDBM: T40N R6E S30 \nSIERRA PACIFIC LAND & TIMBER \nCOMPANY \nPage: \n7\n of \n10\n\nTimber Harvesting Plans\nHarvest Document\nReceived\nFiled\nApproval\nTentative \nEnd of \nPublic \nComments\nPublic \nComment \nPeriod \nClosed\nTotal\nAcres\nWatershed\nLocation\nLand Owner(s)",
|
||||||
|
"Location": "MDBM:",
|
||||||
|
"PLSS Coordinates": []
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"Land Owner": "3/5/2024\n3/14/2024\n6/25/2024\n6/21/2024\n6/21/2024\n968.00\nLOWER BUTTE CREEK\n(5526.360103);POISON LAKE\n(5526.360201)\nMDBM: T33N R7E S16 ; \nMDBM: T33N R7E S17 ; \nMDBM: T33N R7E S18 ; \nMDBM: T33N R7E S19 ; \nMDBM: T33N R7E S20 ; \nMDBM: T33N R7E S33 ; \nMDBM: T33N R7E S4 ; \nMDBM: T33N R7E S5 ; \nMDBM: T33N R7E S8 ; \nMDBM: T33N R7E S9 ; \nMDBM: T34N R7E S27 ; \nMDBM: T34N R7E S33 ; \nMDBM: T34N R7E S34 ; \nMDBM: T34N R7E S35 ; \nMDBM: T34N R7E S36 \nSIERRA PACIFIC LAND & TIMBER \nCOMPANY",
|
||||||
|
"Location": "MDBM:",
|
||||||
|
"PLSS Coordinates": []
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"Land Owner": "5/13/2024\n5/23/2024\n \n7/29/2024\n \n351.00\nCEDAR CREEK (1106.400710)\n(1106.400710);COPPER \nCREEK (1106.400704)\n(1106.400704);SQUIRREL \nGULCH(1106.400701)\nMDBM: T36N R7W S3 ; \nMDBM: T37N R7W S21 ; \nMDBM: T37N R7W S22 ; \nMDBM: T37N R7W S27 ; \nMDBM: T37N R7W S33 ; \nMDBM: T37N R7W S35 \nSIERRA PACIFIC LAND & TIMBER \nCOMPANY",
|
||||||
|
"Location": "MDBM:",
|
||||||
|
"PLSS Coordinates": []
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"Land Owner": "4/10/2024\n4/18/2024\n \n5/13/2024\n \n362.00\nCHASE CREEK (8638.000201)\n(8638.000201)\nMDBM: T34N R12E S21 ; \nMDBM: T34N R12E S22 ; \nMDBM: T34N R12E S27 ; \nMDBM: T34N R12E S28 ; \nMDBM: T34N R12E S33 ; \nMDBM: T34N R12E S34 \nSIERRA PACIFIC LAND & TIMBER \nCOMPANY \nPage: \n8\n of \n10\n\nTimber Harvesting Plans\nHarvest Document\nReceived\nFiled\nApproval\nTentative \nEnd of \nPublic \nComments\nPublic \nComment \nPeriod \nClosed\nTotal\nAcres\nWatershed\nLocation\nLand Owner(s)",
|
||||||
|
"Location": "MDBM:",
|
||||||
|
"PLSS Coordinates": []
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"Land Owner": "6/28/2024\n7/3/2024\n \n7/29/2024\n \n500.00\nMCCARTY CREEK\n(5509.630203);PANTHER \nSPRING\n(5509.630202);REFUGE\n(5509.630201)\nMDBM: T27N R2E S1 ; \nMDBM: T27N R2E S10 ; \nMDBM: T27N R2E S11 ; \nMDBM: T27N R2E S2 ; \nMDBM: T27N R2E S3 ; \nMDBM: T27N R3E S5 ; \nMDBM: T27N R3E S6 ; \nMDBM: T28N R2E S34 ; \nMDBM: T28N R2E S35 ; \nMDBM: T28N R2E S36 ; \nMDBM: T28N R3E S31 ; \nMDBM: T28N R3E S32 \nSIERRA PACIFIC LAND & TIMBER \nCOMPANY",
|
||||||
|
"Location": "MDBM:",
|
||||||
|
"PLSS Coordinates": []
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"Land Owner": "SIERRA PACIFIC INDUSTRIES",
|
||||||
|
"Location": "MDBM: T32N R1E S12 ; \nMDBM: T32N R1E S13 ; \nMDBM: T32N R1E S14 ; \nMDBM: T32N R1E S23 ; \nMDBM: T32N R1E S24 ; \nMDBM: T32N R2E S18 ; \nMDBM: T32N R2E S19 ; \nMDBM: T32N R2E S7",
|
||||||
|
"PLSS Coordinates": [
|
||||||
|
"MDBM: T32N R1E S12",
|
||||||
|
"MDBM: T32N R1E S13",
|
||||||
|
"MDBM: T32N R1E S14",
|
||||||
|
"MDBM: T32N R1E S23",
|
||||||
|
"MDBM: T32N R1E S24",
|
||||||
|
"MDBM: T32N R2E S18",
|
||||||
|
"MDBM: T32N R2E S19",
|
||||||
|
"MDBM: T32N R2E S7"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"Land Owner": "7/19/2024\n \n \n8/5/2024\n \n562.00\nBILLIE GULCH\n(1106.400703);CLAWTON \nGULCH\n(1106.400804);HATCHET \nCREEK(1106.400705);HAY \nGULCH\n(1106.400808);NELSON \nCREEK (1106.400702)\n(1106.400702)\nMDBM: T36N R7W S13 ; \nMDBM: T36N R7W S23 ; \nMDBM: T36N R7W S25 ; \nMDBM: T36N R7W S27 ; \nMDBM: T36N R7W S33 ; \nMDBM: T36N R7W S34 ; \nMDBM: T36N R7W S35 \nSIERRA PACIFIC LAND & TIMBER \nCOMPANY \n2-24NTMP-00004-SHA\n7/19/2024\n \n \n9/2/2024\n \n480.00\nLOWER SODA CREEK\n(5525.210202)\nMDBM: T38N R4W S11 ; \nMDBM: T38N R4W S12 ; \nMDBM: T38N R4W S14 ; \nMDBM: T38N R4W S2 \nCASTLE CRAGS LLC",
|
||||||
|
"Location": "MDBM: T38N R4W S11 ; \nMDBM: T38N R4W S12 ; \nMDBM: T38N R4W S14 ; \nMDBM: T38N R4W S2 \nCASTLE CRAGS LLC",
|
||||||
|
"PLSS Coordinates": [
|
||||||
|
"MDBM: T38N R4W S11",
|
||||||
|
"MDBM: T38N R4W S12",
|
||||||
|
"MDBM: T38N R4W S14",
|
||||||
|
"MDBM: T38N R4W S2"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"Land Owner": "5/31/2024\n6/6/2024\n \n7/19/2024\n \n497.00\nLITTLE SILVER CREEK\n(5514.330206);PEAVINE \nCREEK(5514.330101);UNION \nVALLEY RESERVOIR\n(5514.340301)\nMDBM: T12N R14E S28 ; \nMDBM: T12N R14E S29 ; \nMDBM: T12N R14E S32 \nSIERRA PACIFIC LAND & TIMBER \nCOMPANY \n81\n7/22/2024 12:18:13 PM\nPage: \n10\n of \n10",
|
||||||
|
"Location": "MDBM:",
|
||||||
|
"PLSS Coordinates": []
|
||||||
|
}
|
||||||
|
]
|
|
@ -1,16 +1,12 @@
|
||||||
# __init__.py
|
# __init__.py
|
||||||
import os
|
import os
|
||||||
import json
|
|
||||||
import yaml
|
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
import ipaddress
|
import ipaddress
|
||||||
import multiprocessing
|
import multiprocessing
|
||||||
from dotenv import load_dotenv
|
from dotenv import load_dotenv
|
||||||
from dateutil import tz
|
from dateutil import tz
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from pydantic import BaseModel
|
from .classes import Database, Geocoder, APIConfig, Configuration
|
||||||
from typing import List, Optional
|
|
||||||
from .classes import AutoResponder, IMAPConfig, SMTPConfig, EmailAccount, EmailContact, IncomingEmail, Database, Geocoder, APIConfig, Configuration
|
|
||||||
from .logs import Logger
|
from .logs import Logger
|
||||||
|
|
||||||
# INITIALization
|
# INITIALization
|
||||||
|
@ -25,15 +21,15 @@ L = Logger("Central", LOGS_DIR)
|
||||||
# API essentials
|
# API essentials
|
||||||
API = APIConfig.load('api', 'secrets')
|
API = APIConfig.load('api', 'secrets')
|
||||||
Dir = Configuration.load('dirs')
|
Dir = Configuration.load('dirs')
|
||||||
HOST = f"{API.BIND}:{API.PORT}"
|
HOST = f"{API.BIND}:{API.PORT}"
|
||||||
LOCAL_HOSTS = [ipaddress.ip_address(localhost.strip()) for localhost in os.getenv('LOCAL_HOSTS', '127.0.0.1').split(',')] + ['localhost']
|
LOCAL_HOSTS = [ipaddress.ip_address(localhost.strip()) for localhost in os.getenv('LOCAL_HOSTS', '127.0.0.1').split(',')] + ['localhost']
|
||||||
SUBNET_BROADCAST = os.getenv("SUBNET_BROADCAST", '10.255.255.255')
|
SUBNET_BROADCAST = os.getenv("SUBNET_BROADCAST", '10.255.255.255')
|
||||||
MAX_CPU_CORES = min(int(os.getenv("MAX_CPU_CORES", int(multiprocessing.cpu_count()/2))), multiprocessing.cpu_count())
|
MAX_CPU_CORES = min(int(os.getenv("MAX_CPU_CORES", int(multiprocessing.cpu_count()/2))), multiprocessing.cpu_count())
|
||||||
|
|
||||||
DB = Database.from_env()
|
DB = Database.from_env()
|
||||||
|
|
||||||
News = Configuration.load('news', 'secrets')
|
|
||||||
IMG = Configuration.load('img', 'secrets')
|
IMG = Configuration.load('img', 'secrets')
|
||||||
|
News = Configuration.load('news', 'secrets')
|
||||||
|
Scrape = Configuration.load('scrape', 'secrets', Dir)
|
||||||
|
|
||||||
# Directories & general paths
|
# Directories & general paths
|
||||||
ROUTER_DIR = BASE_DIR / "routers"
|
ROUTER_DIR = BASE_DIR / "routers"
|
||||||
|
@ -98,7 +94,6 @@ SUMMARY_TOKEN_LIMIT = int(os.getenv("SUMMARY_TOKEN_LIMIT", 16384))
|
||||||
SUMMARY_INSTRUCT = os.getenv('SUMMARY_INSTRUCT', "You are an AI assistant that provides accurate summaries of text -- nothing more and nothing less. You must not include ANY extraneous text other than the sumary. Do not include comments apart from the summary, do not preface the summary, and do not provide any form of postscript. Do not add paragraph breaks. Do not add any kind of formatting. Your response should begin with, consist of, and end with an accurate plaintext summary.")
|
SUMMARY_INSTRUCT = os.getenv('SUMMARY_INSTRUCT', "You are an AI assistant that provides accurate summaries of text -- nothing more and nothing less. You must not include ANY extraneous text other than the sumary. Do not include comments apart from the summary, do not preface the summary, and do not provide any form of postscript. Do not add paragraph breaks. Do not add any kind of formatting. Your response should begin with, consist of, and end with an accurate plaintext summary.")
|
||||||
SUMMARY_INSTRUCT_TTS = os.getenv('SUMMARY_INSTRUCT_TTS', "You are an AI assistant that provides email summaries for Sanjay. Your response will undergo Text-To-Speech conversion and added to Sanjay's private podcast. Providing adequate context (Sanjay did not send this question to you, he will only hear your response) but aiming for conciseness and precision, and bearing in mind the Text-To-Speech conversion (avoiding acronyms and formalities), summarize the following email.")
|
SUMMARY_INSTRUCT_TTS = os.getenv('SUMMARY_INSTRUCT_TTS', "You are an AI assistant that provides email summaries for Sanjay. Your response will undergo Text-To-Speech conversion and added to Sanjay's private podcast. Providing adequate context (Sanjay did not send this question to you, he will only hear your response) but aiming for conciseness and precision, and bearing in mind the Text-To-Speech conversion (avoiding acronyms and formalities), summarize the following email.")
|
||||||
|
|
||||||
|
|
||||||
# Stable diffusion
|
# Stable diffusion
|
||||||
IMG_DIR = DATA_DIR / "img" / "images"
|
IMG_DIR = DATA_DIR / "img" / "images"
|
||||||
os.makedirs(IMG_DIR, exist_ok=True)
|
os.makedirs(IMG_DIR, exist_ok=True)
|
||||||
|
@ -130,7 +125,6 @@ TTS_SEGMENTS_DIR = TTS_DIR / 'segments'
|
||||||
os.makedirs(TTS_SEGMENTS_DIR, exist_ok=True)
|
os.makedirs(TTS_SEGMENTS_DIR, exist_ok=True)
|
||||||
ELEVENLABS_API_KEY = os.getenv("ELEVENLABS_API_KEY")
|
ELEVENLABS_API_KEY = os.getenv("ELEVENLABS_API_KEY")
|
||||||
|
|
||||||
|
|
||||||
# Calendar & email account
|
# Calendar & email account
|
||||||
MS365_TOGGLE = True if os.getenv("MS365_TOGGLE") == "True" else False
|
MS365_TOGGLE = True if os.getenv("MS365_TOGGLE") == "True" else False
|
||||||
ICAL_TOGGLE = True if os.getenv("ICAL_TOGGLE") == "True" else False
|
ICAL_TOGGLE = True if os.getenv("ICAL_TOGGLE") == "True" else False
|
||||||
|
@ -185,19 +179,17 @@ CADDY_API_KEY = os.getenv("CADDY_API_KEY")
|
||||||
# Microsoft Graph
|
# Microsoft Graph
|
||||||
MS365_CLIENT_ID = os.getenv('MS365_CLIENT_ID')
|
MS365_CLIENT_ID = os.getenv('MS365_CLIENT_ID')
|
||||||
MS365_SECRET = os.getenv('MS365_SECRET')
|
MS365_SECRET = os.getenv('MS365_SECRET')
|
||||||
MS365_TENANT_ID = os.getenv('MS365_TENANT_ID')
|
MS365_TENANT_ID = os.getenv('MS365_TENANT_ID')
|
||||||
MS365_CERT_PATH = CONFIG_DIR / 'MS365' / '.cert.pem' # deprecated
|
MS365_CERT_PATH = CONFIG_DIR / 'MS365' / '.cert.pem' # deprecated
|
||||||
MS365_KEY_PATH = CONFIG_DIR / 'MS365' / '.cert.key' # deprecated
|
MS365_KEY_PATH = CONFIG_DIR / 'MS365' / '.cert.key' # deprecated
|
||||||
MS365_KEY = MS365_KEY_PATH.read_text()
|
MS365_KEY = MS365_KEY_PATH.read_text()
|
||||||
MS365_TOKEN_PATH = CONFIG_DIR / 'MS365' / '.token.txt'
|
MS365_TOKEN_PATH = CONFIG_DIR / 'MS365' / '.token.txt'
|
||||||
MS365_THUMBPRINT = os.getenv('MS365_THUMBPRINT')
|
MS365_THUMBPRINT = os.getenv('MS365_THUMBPRINT')
|
||||||
|
|
||||||
MS365_LOGIN_URL = os.getenv("MS365_LOGIN_URL", "https://login.microsoftonline.com")
|
MS365_LOGIN_URL = os.getenv("MS365_LOGIN_URL", "https://login.microsoftonline.com")
|
||||||
MS365_AUTHORITY_URL = f"{MS365_LOGIN_URL}/{MS365_TENANT_ID}"
|
MS365_AUTHORITY_URL = f"{MS365_LOGIN_URL}/{MS365_TENANT_ID}"
|
||||||
MS365_REDIRECT_PATH = os.getenv("MS365_REDIRECT_PATH", "https://api.sij.ai/o365/oauth_redirect")
|
MS365_REDIRECT_PATH = os.getenv("MS365_REDIRECT_PATH", "https://api.sij.ai/o365/oauth_redirect")
|
||||||
MS365_SCOPE = os.getenv("MS365_SCOPE", 'Calendars.Read,Calendars.ReadWrite,offline_access').split(',')
|
MS365_SCOPE = os.getenv("MS365_SCOPE", 'Calendars.Read,Calendars.ReadWrite,offline_access').split(',')
|
||||||
|
|
||||||
|
|
||||||
# Maintenance
|
# Maintenance
|
||||||
GARBAGE_COLLECTION_INTERVAL = 60 * 60 # Run cleanup every hour
|
GARBAGE_COLLECTION_INTERVAL = 60 * 60 # Run cleanup every hour
|
||||||
GARBAGE_TTL = 60 * 60 * 24 # Delete files older than 24 hours
|
GARBAGE_TTL = 60 * 60 * 24 # Delete files older than 24 hours
|
||||||
|
|
|
@ -28,8 +28,18 @@ parser.add_argument('--test', type=str, help='Load only the specified module.')
|
||||||
args = parser.parse_args()
|
args = parser.parse_args()
|
||||||
|
|
||||||
L.setup_from_args(args)
|
L.setup_from_args(args)
|
||||||
print(f"Debug modules after setup: {L.debug_modules}") # Debug print
|
print(f"Debug modules after setup: {L.debug_modules}")
|
||||||
|
logger = L.get_module_logger("main")
|
||||||
|
def debug(text: str): logger.debug(text)
|
||||||
|
debug(f"Debug message.")
|
||||||
|
def info(text: str): logger.info(text)
|
||||||
|
info(f"Info message.")
|
||||||
|
def warn(text: str): logger.warning(text)
|
||||||
|
warn(f"Warning message.")
|
||||||
|
def err(text: str): logger.error(text)
|
||||||
|
err(f"Error message.")
|
||||||
|
def crit(text: str): logger.critical(text)
|
||||||
|
crit(f"Critical message.")
|
||||||
|
|
||||||
app = FastAPI()
|
app = FastAPI()
|
||||||
app.add_middleware(
|
app.add_middleware(
|
||||||
|
@ -54,13 +64,13 @@ class SimpleAPIKeyMiddleware(BaseHTTPMiddleware):
|
||||||
if api_key_header:
|
if api_key_header:
|
||||||
api_key_header = api_key_header.lower().split("bearer ")[-1]
|
api_key_header = api_key_header.lower().split("bearer ")[-1]
|
||||||
if api_key_header not in API.KEYS and api_key_query not in API.KEYS:
|
if api_key_header not in API.KEYS and api_key_query not in API.KEYS:
|
||||||
L.ERR(f"Invalid API key provided by a requester.")
|
err(f"Invalid API key provided by a requester.")
|
||||||
return JSONResponse(
|
return JSONResponse(
|
||||||
status_code=401,
|
status_code=401,
|
||||||
content={"detail": "Invalid or missing API key"}
|
content={"detail": "Invalid or missing API key"}
|
||||||
)
|
)
|
||||||
response = await call_next(request)
|
response = await call_next(request)
|
||||||
# L.DEBUG(f"Request from {client_ip} is complete")
|
# debug(f"Request from {client_ip} is complete")
|
||||||
return response
|
return response
|
||||||
|
|
||||||
# Add the middleware to your FastAPI app
|
# Add the middleware to your FastAPI app
|
||||||
|
@ -68,8 +78,8 @@ app.add_middleware(SimpleAPIKeyMiddleware)
|
||||||
|
|
||||||
@app.exception_handler(HTTPException)
|
@app.exception_handler(HTTPException)
|
||||||
async def http_exception_handler(request: Request, exc: HTTPException):
|
async def http_exception_handler(request: Request, exc: HTTPException):
|
||||||
L.ERR(f"HTTP Exception: {exc.status_code} - {exc.detail}")
|
err(f"HTTP Exception: {exc.status_code} - {exc.detail}")
|
||||||
L.ERR(f"Request: {request.method} {request.url}")
|
err(f"Request: {request.method} {request.url}")
|
||||||
return JSONResponse(status_code=exc.status_code, content={"detail": exc.detail})
|
return JSONResponse(status_code=exc.status_code, content={"detail": exc.detail})
|
||||||
|
|
||||||
|
|
||||||
|
@ -106,8 +116,8 @@ def main(argv):
|
||||||
if args.test:
|
if args.test:
|
||||||
load_router(args.test)
|
load_router(args.test)
|
||||||
else:
|
else:
|
||||||
L.logger.critical(f"sijapi launched")
|
crit(f"sijapi launched")
|
||||||
L.logger.critical(f"Arguments: {args}")
|
crit(f"Arguments: {args}")
|
||||||
for module_name in API.MODULES.__fields__:
|
for module_name in API.MODULES.__fields__:
|
||||||
if getattr(API.MODULES, module_name):
|
if getattr(API.MODULES, module_name):
|
||||||
load_router(module_name)
|
load_router(module_name)
|
||||||
|
|
|
@ -1,36 +1,27 @@
|
||||||
# classes.py
|
# classes.py
|
||||||
import asyncio
|
import asyncio
|
||||||
import json
|
import json
|
||||||
|
import yaml
|
||||||
import math
|
import math
|
||||||
import os
|
import os
|
||||||
import re
|
import re
|
||||||
from concurrent.futures import ThreadPoolExecutor
|
|
||||||
from contextlib import asynccontextmanager
|
|
||||||
from datetime import datetime, timedelta, timezone
|
|
||||||
from pathlib import Path
|
|
||||||
from typing import Any, Dict, List, Optional, Tuple, Union, TypeVar, Type
|
|
||||||
from zoneinfo import ZoneInfo
|
|
||||||
import aiofiles
|
import aiofiles
|
||||||
import aiohttp
|
import aiohttp
|
||||||
import asyncpg
|
import asyncpg
|
||||||
import reverse_geocoder as rg
|
import reverse_geocoder as rg
|
||||||
import yaml
|
from pathlib import Path
|
||||||
|
from typing import Any, Dict, List, Optional, Tuple, Union, TypeVar
|
||||||
from dotenv import load_dotenv
|
from dotenv import load_dotenv
|
||||||
from pydantic import BaseModel, Field, create_model
|
from pydantic import BaseModel, Field, create_model
|
||||||
from srtm import get_data
|
from concurrent.futures import ThreadPoolExecutor
|
||||||
|
from contextlib import asynccontextmanager
|
||||||
|
from datetime import datetime, timedelta, timezone
|
||||||
from timezonefinder import TimezoneFinder
|
from timezonefinder import TimezoneFinder
|
||||||
|
from zoneinfo import ZoneInfo
|
||||||
|
from srtm import get_data
|
||||||
|
|
||||||
T = TypeVar('T', bound='Configuration')
|
T = TypeVar('T', bound='Configuration')
|
||||||
|
|
||||||
|
|
||||||
import os
|
|
||||||
from pathlib import Path
|
|
||||||
from typing import Union, Optional, Any, Dict, List
|
|
||||||
import yaml
|
|
||||||
import re
|
|
||||||
from pydantic import BaseModel, create_model
|
|
||||||
from dotenv import load_dotenv
|
|
||||||
|
|
||||||
class Configuration(BaseModel):
|
class Configuration(BaseModel):
|
||||||
HOME: Path = Path.home()
|
HOME: Path = Path.home()
|
||||||
_dir_config: Optional['Configuration'] = None
|
_dir_config: Optional['Configuration'] = None
|
||||||
|
@ -40,38 +31,50 @@ class Configuration(BaseModel):
|
||||||
yaml_path = cls._resolve_path(yaml_path, 'config')
|
yaml_path = cls._resolve_path(yaml_path, 'config')
|
||||||
if secrets_path:
|
if secrets_path:
|
||||||
secrets_path = cls._resolve_path(secrets_path, 'config')
|
secrets_path = cls._resolve_path(secrets_path, 'config')
|
||||||
|
|
||||||
try:
|
try:
|
||||||
with yaml_path.open('r') as file:
|
with yaml_path.open('r') as file:
|
||||||
config_data = yaml.safe_load(file)
|
config_data = yaml.safe_load(file)
|
||||||
|
|
||||||
print(f"Loaded configuration data from {yaml_path}")
|
print(f"Loaded configuration data from {yaml_path}")
|
||||||
|
|
||||||
if secrets_path:
|
if secrets_path:
|
||||||
with secrets_path.open('r') as file:
|
with secrets_path.open('r') as file:
|
||||||
secrets_data = yaml.safe_load(file)
|
secrets_data = yaml.safe_load(file)
|
||||||
print(f"Loaded secrets data from {secrets_path}")
|
print(f"Loaded secrets data from {secrets_path}")
|
||||||
config_data.update(secrets_data)
|
|
||||||
|
# If config_data is a list, apply secrets to each item
|
||||||
|
if isinstance(config_data, list):
|
||||||
|
for item in config_data:
|
||||||
|
if isinstance(item, dict):
|
||||||
|
item.update(secrets_data)
|
||||||
|
else:
|
||||||
|
config_data.update(secrets_data)
|
||||||
|
|
||||||
|
# If config_data is a list, create a dict with a single key
|
||||||
|
if isinstance(config_data, list):
|
||||||
|
config_data = {"configurations": config_data}
|
||||||
|
|
||||||
# Ensure HOME is set
|
# Ensure HOME is set
|
||||||
if config_data.get('HOME') is None:
|
if config_data.get('HOME') is None:
|
||||||
config_data['HOME'] = str(Path.home())
|
config_data['HOME'] = str(Path.home())
|
||||||
print(f"HOME was None in config, set to default: {config_data['HOME']}")
|
print(f"HOME was None in config, set to default: {config_data['HOME']}")
|
||||||
|
|
||||||
load_dotenv()
|
load_dotenv()
|
||||||
|
|
||||||
instance = cls.create_dynamic_model(**config_data)
|
instance = cls.create_dynamic_model(**config_data)
|
||||||
instance._dir_config = dir_config or instance
|
instance._dir_config = dir_config or instance
|
||||||
|
|
||||||
resolved_data = instance.resolve_placeholders(config_data)
|
resolved_data = instance.resolve_placeholders(config_data)
|
||||||
instance = cls.create_dynamic_model(**resolved_data)
|
instance = cls.create_dynamic_model(**resolved_data)
|
||||||
instance._dir_config = dir_config or instance
|
instance._dir_config = dir_config or instance
|
||||||
|
|
||||||
return instance
|
return instance
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
print(f"Error loading configuration: {str(e)}")
|
print(f"Error loading configuration: {str(e)}")
|
||||||
raise
|
raise
|
||||||
|
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def _resolve_path(cls, path: Union[str, Path], default_dir: str) -> Path:
|
def _resolve_path(cls, path: Union[str, Path], default_dir: str) -> Path:
|
||||||
base_path = Path(__file__).parent.parent # This will be two levels up from this file
|
base_path = Path(__file__).parent.parent # This will be two levels up from this file
|
||||||
|
@ -92,28 +95,50 @@ class Configuration(BaseModel):
|
||||||
else:
|
else:
|
||||||
return data
|
return data
|
||||||
|
|
||||||
|
def resolve_placeholders(self, data: Any) -> Any:
|
||||||
|
if isinstance(data, dict):
|
||||||
|
resolved_data = {k: self.resolve_placeholders(v) for k, v in data.items()}
|
||||||
|
|
||||||
|
# Special handling for directory placeholders
|
||||||
|
home = Path(resolved_data.get('HOME', self.HOME)).expanduser()
|
||||||
|
sijapi = home / "workshop" / "sijapi"
|
||||||
|
data_dir = sijapi / "data"
|
||||||
|
|
||||||
|
resolved_data['HOME'] = str(home)
|
||||||
|
resolved_data['SIJAPI'] = str(sijapi)
|
||||||
|
resolved_data['DATA'] = str(data_dir)
|
||||||
|
|
||||||
|
return resolved_data
|
||||||
|
elif isinstance(data, list):
|
||||||
|
return [self.resolve_placeholders(v) for v in data]
|
||||||
|
elif isinstance(data, str):
|
||||||
|
return self.resolve_string_placeholders(data)
|
||||||
|
else:
|
||||||
|
return data
|
||||||
|
|
||||||
def resolve_string_placeholders(self, value: str) -> Any:
|
def resolve_string_placeholders(self, value: str) -> Any:
|
||||||
pattern = r'\{\{\s*([^}]+)\s*\}\}'
|
pattern = r'\{\{\s*([^}]+)\s*\}\}'
|
||||||
matches = re.findall(pattern, value)
|
matches = re.findall(pattern, value)
|
||||||
|
|
||||||
for match in matches:
|
for match in matches:
|
||||||
parts = match.split('.')
|
parts = match.split('.')
|
||||||
if len(parts) == 1: # Internal reference
|
if len(parts) == 1: # Internal reference
|
||||||
replacement = getattr(self._dir_config, parts[0], str(Path.home() / parts[0].lower()))
|
replacement = getattr(self, parts[0], str(Path.home() / parts[0].lower()))
|
||||||
elif len(parts) == 2 and parts[0] == 'Dir':
|
elif len(parts) == 2 and parts[0] == 'Dir':
|
||||||
replacement = getattr(self._dir_config, parts[1], str(Path.home() / parts[1].lower()))
|
replacement = getattr(self, parts[1], str(Path.home() / parts[1].lower()))
|
||||||
elif len(parts) == 2 and parts[0] == 'ENV':
|
elif len(parts) == 2 and parts[0] == 'ENV':
|
||||||
replacement = os.getenv(parts[1], '')
|
replacement = os.getenv(parts[1], '')
|
||||||
else:
|
else:
|
||||||
replacement = value # Keep original if not recognized
|
replacement = value # Keep original if not recognized
|
||||||
|
|
||||||
value = value.replace('{{' + match + '}}', str(replacement))
|
value = value.replace('{{' + match + '}}', str(replacement))
|
||||||
|
|
||||||
# Convert to Path if it looks like a file path
|
# Convert to Path if it looks like a file path
|
||||||
if isinstance(value, str) and (value.startswith(('/', '~')) or (':' in value and value[1] == ':')):
|
if isinstance(value, str) and (value.startswith(('/', '~')) or (':' in value and value[1] == ':')):
|
||||||
return Path(value).expanduser()
|
return Path(value).expanduser()
|
||||||
return value
|
return value
|
||||||
|
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def create_dynamic_model(cls, **data):
|
def create_dynamic_model(cls, **data):
|
||||||
for key, value in data.items():
|
for key, value in data.items():
|
||||||
|
@ -121,7 +146,7 @@ class Configuration(BaseModel):
|
||||||
data[key] = cls.create_dynamic_model(**value)
|
data[key] = cls.create_dynamic_model(**value)
|
||||||
elif isinstance(value, list) and all(isinstance(item, dict) for item in value):
|
elif isinstance(value, list) and all(isinstance(item, dict) for item in value):
|
||||||
data[key] = [cls.create_dynamic_model(**item) for item in value]
|
data[key] = [cls.create_dynamic_model(**item) for item in value]
|
||||||
|
|
||||||
DynamicModel = create_model(
|
DynamicModel = create_model(
|
||||||
f'Dynamic{cls.__name__}',
|
f'Dynamic{cls.__name__}',
|
||||||
__base__=cls,
|
__base__=cls,
|
||||||
|
@ -133,7 +158,11 @@ class Configuration(BaseModel):
|
||||||
extra = "allow"
|
extra = "allow"
|
||||||
arbitrary_types_allowed = True
|
arbitrary_types_allowed = True
|
||||||
|
|
||||||
|
from pydantic import BaseModel, create_model
|
||||||
|
from typing import Any, Dict, List, Union
|
||||||
|
from pathlib import Path
|
||||||
|
import yaml
|
||||||
|
import re
|
||||||
|
|
||||||
class APIConfig(BaseModel):
|
class APIConfig(BaseModel):
|
||||||
HOST: str
|
HOST: str
|
||||||
|
@ -143,8 +172,10 @@ class APIConfig(BaseModel):
|
||||||
PUBLIC: List[str]
|
PUBLIC: List[str]
|
||||||
TRUSTED_SUBNETS: List[str]
|
TRUSTED_SUBNETS: List[str]
|
||||||
MODULES: Any # This will be replaced with a dynamic model
|
MODULES: Any # This will be replaced with a dynamic model
|
||||||
|
EXTENSIONS: Any # This will be replaced with a dynamic model
|
||||||
TZ: str
|
TZ: str
|
||||||
KEYS: List[str]
|
KEYS: List[str]
|
||||||
|
GARBAGE: Dict[str, Any]
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def load(cls, config_path: Union[str, Path], secrets_path: Union[str, Path]):
|
def load(cls, config_path: Union[str, Path], secrets_path: Union[str, Path]):
|
||||||
|
@ -154,9 +185,9 @@ class APIConfig(BaseModel):
|
||||||
# Load main configuration
|
# Load main configuration
|
||||||
with open(config_path, 'r') as file:
|
with open(config_path, 'r') as file:
|
||||||
config_data = yaml.safe_load(file)
|
config_data = yaml.safe_load(file)
|
||||||
|
|
||||||
print(f"Loaded main config: {config_data}") # Debug print
|
print(f"Loaded main config: {config_data}") # Debug print
|
||||||
|
|
||||||
# Load secrets
|
# Load secrets
|
||||||
try:
|
try:
|
||||||
with open(secrets_path, 'r') as file:
|
with open(secrets_path, 'r') as file:
|
||||||
|
@ -168,12 +199,12 @@ class APIConfig(BaseModel):
|
||||||
except yaml.YAMLError as e:
|
except yaml.YAMLError as e:
|
||||||
print(f"Error parsing secrets YAML: {e}")
|
print(f"Error parsing secrets YAML: {e}")
|
||||||
secrets_data = {}
|
secrets_data = {}
|
||||||
|
|
||||||
# Resolve internal placeholders
|
# Resolve internal placeholders
|
||||||
config_data = cls.resolve_placeholders(config_data)
|
config_data = cls.resolve_placeholders(config_data)
|
||||||
|
|
||||||
print(f"Resolved config: {config_data}") # Debug print
|
print(f"Resolved config: {config_data}") # Debug print
|
||||||
|
|
||||||
# Handle KEYS placeholder
|
# Handle KEYS placeholder
|
||||||
if isinstance(config_data.get('KEYS'), list) and len(config_data['KEYS']) == 1:
|
if isinstance(config_data.get('KEYS'), list) and len(config_data['KEYS']) == 1:
|
||||||
placeholder = config_data['KEYS'][0]
|
placeholder = config_data['KEYS'][0]
|
||||||
|
@ -189,23 +220,29 @@ class APIConfig(BaseModel):
|
||||||
print(f"Secret key '{secret_key}' not found in secrets file")
|
print(f"Secret key '{secret_key}' not found in secrets file")
|
||||||
else:
|
else:
|
||||||
print(f"Invalid secret placeholder format: {placeholder}")
|
print(f"Invalid secret placeholder format: {placeholder}")
|
||||||
|
|
||||||
# Create dynamic ModulesConfig
|
# Create dynamic ModulesConfig
|
||||||
modules_data = config_data.get('MODULES', {})
|
config_data['MODULES'] = cls._create_dynamic_config(config_data.get('MODULES', {}), 'DynamicModulesConfig')
|
||||||
modules_fields = {}
|
|
||||||
for key, value in modules_data.items():
|
# Create dynamic ExtensionsConfig
|
||||||
if isinstance(value, str):
|
config_data['EXTENSIONS'] = cls._create_dynamic_config(config_data.get('EXTENSIONS', {}), 'DynamicExtensionsConfig')
|
||||||
modules_fields[key] = (bool, value.lower() == 'on')
|
|
||||||
elif isinstance(value, bool):
|
|
||||||
modules_fields[key] = (bool, value)
|
|
||||||
else:
|
|
||||||
raise ValueError(f"Invalid value for module {key}: {value}. Must be 'on', 'off', True, or False.")
|
|
||||||
|
|
||||||
DynamicModulesConfig = create_model('DynamicModulesConfig', **modules_fields)
|
|
||||||
config_data['MODULES'] = DynamicModulesConfig(**modules_data)
|
|
||||||
|
|
||||||
return cls(**config_data)
|
return cls(**config_data)
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def _create_dynamic_config(cls, data: Dict[str, Any], model_name: str):
|
||||||
|
fields = {}
|
||||||
|
for key, value in data.items():
|
||||||
|
if isinstance(value, str):
|
||||||
|
fields[key] = (bool, value.lower() == 'on')
|
||||||
|
elif isinstance(value, bool):
|
||||||
|
fields[key] = (bool, value)
|
||||||
|
else:
|
||||||
|
raise ValueError(f"Invalid value for {key}: {value}. Must be 'on', 'off', True, or False.")
|
||||||
|
|
||||||
|
DynamicConfig = create_model(model_name, **fields)
|
||||||
|
return DynamicConfig(**data)
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def _resolve_path(cls, path: Union[str, Path], default_dir: str) -> Path:
|
def _resolve_path(cls, path: Union[str, Path], default_dir: str) -> Path:
|
||||||
base_path = Path(__file__).parent.parent # This will be two levels up from this file
|
base_path = Path(__file__).parent.parent # This will be two levels up from this file
|
||||||
|
@ -235,23 +272,26 @@ class APIConfig(BaseModel):
|
||||||
resolved_data[key] = [resolve_value(item) for item in value]
|
resolved_data[key] = [resolve_value(item) for item in value]
|
||||||
else:
|
else:
|
||||||
resolved_data[key] = resolve_value(value)
|
resolved_data[key] = resolve_value(value)
|
||||||
|
|
||||||
# Resolve BIND separately to ensure HOST and PORT are used
|
# Resolve BIND separately to ensure HOST and PORT are used
|
||||||
if 'BIND' in resolved_data:
|
if 'BIND' in resolved_data:
|
||||||
resolved_data['BIND'] = resolved_data['BIND'].replace('{{ HOST }}', str(resolved_data['HOST']))
|
resolved_data['BIND'] = resolved_data['BIND'].replace('{{ HOST }}', str(resolved_data['HOST']))
|
||||||
resolved_data['BIND'] = resolved_data['BIND'].replace('{{ PORT }}', str(resolved_data['PORT']))
|
resolved_data['BIND'] = resolved_data['BIND'].replace('{{ PORT }}', str(resolved_data['PORT']))
|
||||||
|
|
||||||
return resolved_data
|
return resolved_data
|
||||||
|
|
||||||
def __getattr__(self, name: str) -> Any:
|
def __getattr__(self, name: str) -> Any:
|
||||||
if name == 'MODULES':
|
if name in ['MODULES', 'EXTENSIONS']:
|
||||||
return self.__dict__['MODULES']
|
return self.__dict__[name]
|
||||||
return super().__getattr__(name)
|
return super().__getattr__(name)
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def active_modules(self) -> List[str]:
|
def active_modules(self) -> List[str]:
|
||||||
return [module for module, is_active in self.MODULES.__dict__.items() if is_active]
|
return [module for module, is_active in self.MODULES.__dict__.items() if is_active]
|
||||||
|
|
||||||
|
@property
|
||||||
|
def active_extensions(self) -> List[str]:
|
||||||
|
return [extension for extension, is_active in self.EXTENSIONS.__dict__.items() if is_active]
|
||||||
|
|
||||||
|
|
||||||
class Location(BaseModel):
|
class Location(BaseModel):
|
||||||
|
@ -265,7 +305,7 @@ class Location(BaseModel):
|
||||||
city: Optional[str] = None
|
city: Optional[str] = None
|
||||||
state: Optional[str] = None
|
state: Optional[str] = None
|
||||||
country: Optional[str] = None
|
country: Optional[str] = None
|
||||||
context: Optional[Dict[str, Any]] = None
|
context: Optional[Dict[str, Any]] = None
|
||||||
class_: Optional[str] = None
|
class_: Optional[str] = None
|
||||||
type: Optional[str] = None
|
type: Optional[str] = None
|
||||||
name: Optional[str] = None
|
name: Optional[str] = None
|
||||||
|
@ -286,6 +326,8 @@ class Location(BaseModel):
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
class Geocoder:
|
class Geocoder:
|
||||||
def __init__(self, named_locs: Union[str, Path] = None, cache_file: Union[str, Path] = 'timezone_cache.json'):
|
def __init__(self, named_locs: Union[str, Path] = None, cache_file: Union[str, Path] = 'timezone_cache.json'):
|
||||||
self.tf = TimezoneFinder()
|
self.tf = TimezoneFinder()
|
||||||
|
@ -319,20 +361,20 @@ class Geocoder:
|
||||||
def find_override_location(self, lat: float, lon: float) -> Optional[str]:
|
def find_override_location(self, lat: float, lon: float) -> Optional[str]:
|
||||||
closest_location = None
|
closest_location = None
|
||||||
closest_distance = float('inf')
|
closest_distance = float('inf')
|
||||||
|
|
||||||
for location in self.override_locations:
|
for location in self.override_locations:
|
||||||
loc_name = location.get("name")
|
loc_name = location.get("name")
|
||||||
loc_lat = location.get("latitude")
|
loc_lat = location.get("latitude")
|
||||||
loc_lon = location.get("longitude")
|
loc_lon = location.get("longitude")
|
||||||
loc_radius = location.get("radius")
|
loc_radius = location.get("radius")
|
||||||
|
|
||||||
distance = self.haversine(lat, lon, loc_lat, loc_lon)
|
distance = self.haversine(lat, lon, loc_lat, loc_lon)
|
||||||
|
|
||||||
if distance <= loc_radius:
|
if distance <= loc_radius:
|
||||||
if distance < closest_distance:
|
if distance < closest_distance:
|
||||||
closest_distance = distance
|
closest_distance = distance
|
||||||
closest_location = loc_name
|
closest_location = loc_name
|
||||||
|
|
||||||
return closest_location
|
return closest_location
|
||||||
|
|
||||||
async def location(self, lat: float, lon: float):
|
async def location(self, lat: float, lon: float):
|
||||||
|
@ -346,7 +388,7 @@ class Geocoder:
|
||||||
async def elevation(self, latitude: float, longitude: float, unit: str = "m") -> float:
|
async def elevation(self, latitude: float, longitude: float, unit: str = "m") -> float:
|
||||||
loop = asyncio.get_running_loop()
|
loop = asyncio.get_running_loop()
|
||||||
elevation = await loop.run_in_executor(self.executor, self.srtm_data.get_elevation, latitude, longitude)
|
elevation = await loop.run_in_executor(self.executor, self.srtm_data.get_elevation, latitude, longitude)
|
||||||
|
|
||||||
if unit == "m":
|
if unit == "m":
|
||||||
return elevation
|
return elevation
|
||||||
elif unit == "km":
|
elif unit == "km":
|
||||||
|
@ -362,12 +404,12 @@ class Geocoder:
|
||||||
return ZoneInfo(timezone_str) if timezone_str else None
|
return ZoneInfo(timezone_str) if timezone_str else None
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
async def lookup(self, lat: float, lon: float):
|
async def lookup(self, lat: float, lon: float):
|
||||||
city, state, country = (await self.location(lat, lon))[0]['name'], (await self.location(lat, lon))[0]['admin1'], (await self.location(lat, lon))[0]['cc']
|
city, state, country = (await self.location(lat, lon))[0]['name'], (await self.location(lat, lon))[0]['admin1'], (await self.location(lat, lon))[0]['cc']
|
||||||
elevation = await self.elevation(lat, lon)
|
elevation = await self.elevation(lat, lon)
|
||||||
timezone = await self.timezone(lat, lon)
|
timezone = await self.timezone(lat, lon)
|
||||||
|
|
||||||
return {
|
return {
|
||||||
"city": city,
|
"city": city,
|
||||||
"state": state,
|
"state": state,
|
||||||
|
@ -379,12 +421,12 @@ class Geocoder:
|
||||||
async def code(self, locations: Union[Location, Tuple[float, float], List[Union[Location, Tuple[float, float]]]]) -> Union[Location, List[Location]]:
|
async def code(self, locations: Union[Location, Tuple[float, float], List[Union[Location, Tuple[float, float]]]]) -> Union[Location, List[Location]]:
|
||||||
if isinstance(locations, (Location, tuple)):
|
if isinstance(locations, (Location, tuple)):
|
||||||
locations = [locations]
|
locations = [locations]
|
||||||
|
|
||||||
processed_locations = []
|
processed_locations = []
|
||||||
for loc in locations:
|
for loc in locations:
|
||||||
if isinstance(loc, tuple):
|
if isinstance(loc, tuple):
|
||||||
processed_locations.append(Location(
|
processed_locations.append(Location(
|
||||||
latitude=loc[0],
|
latitude=loc[0],
|
||||||
longitude=loc[1],
|
longitude=loc[1],
|
||||||
datetime=datetime.now(timezone.utc)
|
datetime=datetime.now(timezone.utc)
|
||||||
))
|
))
|
||||||
|
@ -396,12 +438,11 @@ class Geocoder:
|
||||||
raise ValueError(f"Unsupported location type: {type(loc)}")
|
raise ValueError(f"Unsupported location type: {type(loc)}")
|
||||||
|
|
||||||
coordinates = [(location.latitude, location.longitude) for location in processed_locations]
|
coordinates = [(location.latitude, location.longitude) for location in processed_locations]
|
||||||
|
|
||||||
geocode_results = await asyncio.gather(*[self.location(lat, lon) for lat, lon in coordinates])
|
geocode_results = await asyncio.gather(*[self.location(lat, lon) for lat, lon in coordinates])
|
||||||
elevations = await asyncio.gather(*[self.elevation(lat, lon) for lat, lon in coordinates])
|
elevations = await asyncio.gather(*[self.elevation(lat, lon) for lat, lon in coordinates])
|
||||||
timezone_results = await asyncio.gather(*[self.timezone(lat, lon) for lat, lon in coordinates])
|
timezone_results = await asyncio.gather(*[self.timezone(lat, lon) for lat, lon in coordinates])
|
||||||
|
|
||||||
|
|
||||||
def create_display_name(override_name, result):
|
def create_display_name(override_name, result):
|
||||||
parts = []
|
parts = []
|
||||||
if override_name:
|
if override_name:
|
||||||
|
@ -446,13 +487,13 @@ class Geocoder:
|
||||||
async def geocode_osm(self, latitude: float, longitude: float, email: str):
|
async def geocode_osm(self, latitude: float, longitude: float, email: str):
|
||||||
url = f"https://nominatim.openstreetmap.org/reverse?format=json&lat={latitude}&lon={longitude}"
|
url = f"https://nominatim.openstreetmap.org/reverse?format=json&lat={latitude}&lon={longitude}"
|
||||||
headers = {
|
headers = {
|
||||||
'User-Agent': f'sijapi/1.0 ({email})', # replace with your app name and email
|
'User-Agent': f'sijapi/1.0 ({email})',
|
||||||
}
|
}
|
||||||
async with aiohttp.ClientSession() as session:
|
async with aiohttp.ClientSession() as session:
|
||||||
async with session.get(url, headers=headers) as response:
|
async with session.get(url, headers=headers) as response:
|
||||||
response.raise_for_status()
|
response.raise_for_status()
|
||||||
data = await response.json()
|
data = await response.json()
|
||||||
|
|
||||||
address = data.get("address", {})
|
address = data.get("address", {})
|
||||||
elevation = await self.elevation(latitude, longitude)
|
elevation = await self.elevation(latitude, longitude)
|
||||||
return Location(
|
return Location(
|
||||||
|
@ -465,7 +506,7 @@ class Geocoder:
|
||||||
city=address.get("city"),
|
city=address.get("city"),
|
||||||
state=address.get("state"),
|
state=address.get("state"),
|
||||||
country=address.get("country"),
|
country=address.get("country"),
|
||||||
context={},
|
context={},
|
||||||
class_=data.get("class"),
|
class_=data.get("class"),
|
||||||
type=data.get("type"),
|
type=data.get("type"),
|
||||||
name=data.get("name"),
|
name=data.get("name"),
|
||||||
|
@ -481,7 +522,6 @@ class Geocoder:
|
||||||
timezone=await self.timezone(latitude, longitude)
|
timezone=await self.timezone(latitude, longitude)
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
def round_coords(self, lat: float, lon: float, decimal_places: int = 2) -> Tuple[float, float]:
|
def round_coords(self, lat: float, lon: float, decimal_places: int = 2) -> Tuple[float, float]:
|
||||||
return (round(lat, decimal_places), round(lon, decimal_places))
|
return (round(lat, decimal_places), round(lon, decimal_places))
|
||||||
|
|
||||||
|
@ -501,17 +541,17 @@ class Geocoder:
|
||||||
not self.last_update or
|
not self.last_update or
|
||||||
current_time - self.last_update > timedelta(hours=1) or
|
current_time - self.last_update > timedelta(hours=1) or
|
||||||
not self.coords_equal(rounded_location, self.round_coords(*self.last_location) if self.last_location else (None, None))):
|
not self.coords_equal(rounded_location, self.round_coords(*self.last_location) if self.last_location else (None, None))):
|
||||||
|
|
||||||
|
|
||||||
new_timezone = await self.timezone(lat, lon)
|
new_timezone = await self.timezone(lat, lon)
|
||||||
self.last_timezone = new_timezone
|
self.last_timezone = new_timezone
|
||||||
self.last_update = current_time
|
self.last_update = current_time
|
||||||
self.last_location = (lat, lon) # Store the original, non-rounded coordinates
|
self.last_location = (lat, lon) # Store the original, non-rounded coordinates
|
||||||
await self.tz_save()
|
await self.tz_save()
|
||||||
|
|
||||||
return self.last_timezone
|
return self.last_timezone
|
||||||
|
|
||||||
|
|
||||||
async def tz_save(self):
|
async def tz_save(self):
|
||||||
cache_data = {
|
cache_data = {
|
||||||
'last_timezone': str(self.last_timezone) if self.last_timezone else None,
|
'last_timezone': str(self.last_timezone) if self.last_timezone else None,
|
||||||
|
@ -528,7 +568,7 @@ class Geocoder:
|
||||||
self.last_timezone = ZoneInfo(cache_data['last_timezone']) if cache_data.get('last_timezone') else None
|
self.last_timezone = ZoneInfo(cache_data['last_timezone']) if cache_data.get('last_timezone') else None
|
||||||
self.last_update = datetime.fromisoformat(cache_data['last_update']) if cache_data.get('last_update') else None
|
self.last_update = datetime.fromisoformat(cache_data['last_update']) if cache_data.get('last_update') else None
|
||||||
self.last_location = tuple(cache_data['last_location']) if cache_data.get('last_location') else None
|
self.last_location = tuple(cache_data['last_location']) if cache_data.get('last_location') else None
|
||||||
|
|
||||||
except (FileNotFoundError, json.JSONDecodeError):
|
except (FileNotFoundError, json.JSONDecodeError):
|
||||||
# If file doesn't exist or is invalid, we'll start fresh
|
# If file doesn't exist or is invalid, we'll start fresh
|
||||||
self.last_timezone = None
|
self.last_timezone = None
|
||||||
|
@ -546,7 +586,7 @@ class Geocoder:
|
||||||
async def tz_at(self, lat: float, lon: float) -> Optional[ZoneInfo]:
|
async def tz_at(self, lat: float, lon: float) -> Optional[ZoneInfo]:
|
||||||
"""
|
"""
|
||||||
Get the timezone at a specific latitude and longitude without affecting the cache.
|
Get the timezone at a specific latitude and longitude without affecting the cache.
|
||||||
|
|
||||||
:param lat: Latitude
|
:param lat: Latitude
|
||||||
:param lon: Longitude
|
:param lon: Longitude
|
||||||
:return: ZoneInfo object representing the timezone
|
:return: ZoneInfo object representing the timezone
|
||||||
|
@ -556,7 +596,6 @@ class Geocoder:
|
||||||
def __del__(self):
|
def __del__(self):
|
||||||
self.executor.shutdown()
|
self.executor.shutdown()
|
||||||
|
|
||||||
|
|
||||||
class Database(BaseModel):
|
class Database(BaseModel):
|
||||||
host: str = Field(..., description="Database host")
|
host: str = Field(..., description="Database host")
|
||||||
port: int = Field(5432, description="Database port")
|
port: int = Field(5432, description="Database port")
|
||||||
|
@ -596,7 +635,6 @@ class Database(BaseModel):
|
||||||
def to_dict(self):
|
def to_dict(self):
|
||||||
return self.dict(exclude_none=True)
|
return self.dict(exclude_none=True)
|
||||||
|
|
||||||
|
|
||||||
class IMAPConfig(BaseModel):
|
class IMAPConfig(BaseModel):
|
||||||
username: str
|
username: str
|
||||||
password: str
|
password: str
|
||||||
|
@ -621,7 +659,7 @@ class AutoResponder(BaseModel):
|
||||||
image_prompt: Optional[str] = None
|
image_prompt: Optional[str] = None
|
||||||
image_scene: Optional[str] = None
|
image_scene: Optional[str] = None
|
||||||
smtp: SMTPConfig
|
smtp: SMTPConfig
|
||||||
|
|
||||||
class EmailAccount(BaseModel):
|
class EmailAccount(BaseModel):
|
||||||
name: str
|
name: str
|
||||||
refresh: int
|
refresh: int
|
||||||
|
@ -643,3 +681,12 @@ class IncomingEmail(BaseModel):
|
||||||
subject: str
|
subject: str
|
||||||
body: str
|
body: str
|
||||||
attachments: List[dict] = []
|
attachments: List[dict] = []
|
||||||
|
|
||||||
|
class WidgetUpdate(BaseModel):
|
||||||
|
text: Optional[str] = None
|
||||||
|
progress: Optional[str] = None
|
||||||
|
icon: Optional[str] = None
|
||||||
|
color: Optional[str] = None
|
||||||
|
url: Optional[str] = None
|
||||||
|
shortcut: Optional[str] = None
|
||||||
|
graph: Optional[str] = None
|
|
@ -1 +1 @@
|
||||||
{"last_timezone": "America/Los_Angeles", "last_update": "2024-06-29T09:36:32.143487", "last_location": [44.04645364336354, -123.08688060439617]}
|
{"last_timezone": "America/Los_Angeles", "last_update": "2024-07-22T12:00:14.193328", "last_location": [42.80982885281664, -123.0494316777397]}
|
117
sijapi/helpers/CaPLSS_downloader_and_importer.py
Normal file
117
sijapi/helpers/CaPLSS_downloader_and_importer.py
Normal file
|
@ -0,0 +1,117 @@
|
||||||
|
import requests
|
||||||
|
import json
|
||||||
|
import time
|
||||||
|
import os
|
||||||
|
import subprocess
|
||||||
|
|
||||||
|
def get_feature_count(url):
|
||||||
|
params = {
|
||||||
|
'where': '1=1',
|
||||||
|
'returnCountOnly': 'true',
|
||||||
|
'f': 'json'
|
||||||
|
}
|
||||||
|
response = requests.get(url, params=params)
|
||||||
|
response.raise_for_status()
|
||||||
|
data = response.json()
|
||||||
|
return data.get('count', 0)
|
||||||
|
|
||||||
|
def fetch_features(url, offset, num):
|
||||||
|
params = {
|
||||||
|
'where': '1=1',
|
||||||
|
'outFields': '*',
|
||||||
|
'geometryPrecision': 6,
|
||||||
|
'outSR': 4326,
|
||||||
|
'f': 'json',
|
||||||
|
'resultOffset': offset,
|
||||||
|
'resultRecordCount': num
|
||||||
|
}
|
||||||
|
response = requests.get(url, params=params)
|
||||||
|
response.raise_for_status()
|
||||||
|
return response.json()
|
||||||
|
|
||||||
|
def download_layer(layer_num, layer_name):
|
||||||
|
url = f"https://gis.blm.gov/arcgis/rest/services/Cadastral/BLM_Natl_PLSS_CadNSDI/MapServer/{layer_num}/query"
|
||||||
|
|
||||||
|
total_count = get_feature_count(url)
|
||||||
|
print(f"Total {layer_name} features: {total_count}")
|
||||||
|
|
||||||
|
batch_size = 1000
|
||||||
|
offset = 0
|
||||||
|
all_features = []
|
||||||
|
|
||||||
|
while offset < total_count:
|
||||||
|
print(f"Fetching {layer_name} features {offset} to {offset + batch_size}...")
|
||||||
|
data = fetch_features(url, offset, batch_size)
|
||||||
|
|
||||||
|
new_features = data.get('features', [])
|
||||||
|
if not new_features:
|
||||||
|
break
|
||||||
|
|
||||||
|
all_features.extend(new_features)
|
||||||
|
offset += len(new_features)
|
||||||
|
|
||||||
|
print(f"Progress: {len(all_features)}/{total_count} features")
|
||||||
|
|
||||||
|
time.sleep(1) # Be nice to the server
|
||||||
|
|
||||||
|
print(f"Total {layer_name} features fetched: {len(all_features)}")
|
||||||
|
|
||||||
|
# Convert to GeoJSON
|
||||||
|
geojson_features = [
|
||||||
|
{
|
||||||
|
"type": "Feature",
|
||||||
|
"properties": feature['attributes'],
|
||||||
|
"geometry": feature['geometry']
|
||||||
|
} for feature in all_features
|
||||||
|
]
|
||||||
|
|
||||||
|
full_geojson = {
|
||||||
|
"type": "FeatureCollection",
|
||||||
|
"features": geojson_features
|
||||||
|
}
|
||||||
|
|
||||||
|
# Save to file
|
||||||
|
file_path = f'/Users/sij/workshop/sijapi/sijapi/data/PLSS_{layer_name}.geojson'
|
||||||
|
with open(file_path, 'w') as f:
|
||||||
|
json.dump(full_geojson, f)
|
||||||
|
|
||||||
|
print(f"GeoJSON file saved as '{file_path}'")
|
||||||
|
|
||||||
|
return file_path
|
||||||
|
|
||||||
|
def import_to_postgis(file_path, table_name):
|
||||||
|
db_name = 'sij'
|
||||||
|
db_user = 'sij'
|
||||||
|
db_password = 'Synchr0!'
|
||||||
|
|
||||||
|
ogr2ogr_command = [
|
||||||
|
'ogr2ogr',
|
||||||
|
'-f', 'PostgreSQL',
|
||||||
|
f'PG:dbname={db_name} user={db_user} password={db_password}',
|
||||||
|
file_path,
|
||||||
|
'-nln', table_name,
|
||||||
|
'-overwrite'
|
||||||
|
]
|
||||||
|
|
||||||
|
subprocess.run(ogr2ogr_command, check=True)
|
||||||
|
print(f"Data successfully imported into PostGIS table: {table_name}")
|
||||||
|
|
||||||
|
def main():
|
||||||
|
try:
|
||||||
|
# Download and import Townships (Layer 1)
|
||||||
|
township_file = download_layer(1, "Townships")
|
||||||
|
import_to_postgis(township_file, "public.plss_townships")
|
||||||
|
|
||||||
|
# Download and import Sections (Layer 2)
|
||||||
|
section_file = download_layer(2, "Sections")
|
||||||
|
import_to_postgis(section_file, "public.plss_sections")
|
||||||
|
|
||||||
|
except requests.exceptions.RequestException as e:
|
||||||
|
print(f"Error fetching data: {e}")
|
||||||
|
except subprocess.CalledProcessError as e:
|
||||||
|
print(f"Error importing data into PostGIS: {e}")
|
||||||
|
except Exception as e:
|
||||||
|
print(f"An unexpected error occurred: {e}")
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
73
sijapi/helpers/CalFire_THP_scraper.py
Normal file
73
sijapi/helpers/CalFire_THP_scraper.py
Normal file
|
@ -0,0 +1,73 @@
|
||||||
|
import requests
|
||||||
|
import PyPDF2
|
||||||
|
import io
|
||||||
|
import re
|
||||||
|
|
||||||
|
def scrape_data_from_pdf(url):
|
||||||
|
response = requests.get(url)
|
||||||
|
pdf_file = io.BytesIO(response.content)
|
||||||
|
|
||||||
|
pdf_reader = PyPDF2.PdfReader(pdf_file)
|
||||||
|
|
||||||
|
all_text = ""
|
||||||
|
for page in pdf_reader.pages:
|
||||||
|
all_text += page.extract_text() + "\n"
|
||||||
|
|
||||||
|
return all_text
|
||||||
|
|
||||||
|
def parse_data(raw_data):
|
||||||
|
lines = raw_data.split('\n')
|
||||||
|
data = []
|
||||||
|
current_entry = None
|
||||||
|
|
||||||
|
for line in lines:
|
||||||
|
line = line.strip()
|
||||||
|
if re.match(r'\d+-\d+-\d+-\w+', line):
|
||||||
|
if current_entry:
|
||||||
|
data.append(current_entry)
|
||||||
|
current_entry = {'Harvest Document': line, 'Raw Data': []}
|
||||||
|
elif current_entry:
|
||||||
|
current_entry['Raw Data'].append(line)
|
||||||
|
|
||||||
|
if current_entry:
|
||||||
|
data.append(current_entry)
|
||||||
|
|
||||||
|
return data
|
||||||
|
|
||||||
|
def filter_data(data):
|
||||||
|
return [entry for entry in data if any(owner.lower() in ' '.join(entry['Raw Data']).lower() for owner in ["Sierra Pacific", "SPI", "Land & Timber"])]
|
||||||
|
|
||||||
|
def extract_location(raw_data):
|
||||||
|
location = []
|
||||||
|
for line in raw_data:
|
||||||
|
if 'MDBM:' in line or 'HBM:' in line:
|
||||||
|
location.append(line)
|
||||||
|
return ' '.join(location)
|
||||||
|
|
||||||
|
def extract_plss_coordinates(text):
|
||||||
|
pattern = r'(\w+): T(\d+)([NSEW]) R(\d+)([NSEW]) S(\d+)'
|
||||||
|
return re.findall(pattern, text)
|
||||||
|
|
||||||
|
# Main execution
|
||||||
|
url = "https://caltreesplans.resources.ca.gov/Caltrees/Report/ShowReport.aspx?module=TH_Document&reportID=492&reportType=LINK_REPORT_LIST"
|
||||||
|
raw_data = scrape_data_from_pdf(url)
|
||||||
|
|
||||||
|
parsed_data = parse_data(raw_data)
|
||||||
|
print(f"Total timber plans parsed: {len(parsed_data)}")
|
||||||
|
|
||||||
|
filtered_data = filter_data(parsed_data)
|
||||||
|
print(f"Found {len(filtered_data)} matching entries.")
|
||||||
|
|
||||||
|
for plan in filtered_data:
|
||||||
|
print("\nHarvest Document:", plan['Harvest Document'])
|
||||||
|
|
||||||
|
location = extract_location(plan['Raw Data'])
|
||||||
|
print("Location:", location)
|
||||||
|
|
||||||
|
plss_coordinates = extract_plss_coordinates(location)
|
||||||
|
print("PLSS Coordinates:")
|
||||||
|
for coord in plss_coordinates:
|
||||||
|
meridian, township, township_dir, range_, range_dir, section = coord
|
||||||
|
print(f" {meridian}: T{township}{township_dir} R{range_}{range_dir} S{section}")
|
||||||
|
|
||||||
|
print("-" * 50)
|
23
sijapi/helpers/article.py
Executable file
23
sijapi/helpers/article.py
Executable file
|
@ -0,0 +1,23 @@
|
||||||
|
#!/Users/sij/miniforge3/envs/sijapi/bin/python
|
||||||
|
import sys
|
||||||
|
import asyncio
|
||||||
|
from fastapi import BackgroundTasks
|
||||||
|
from sijapi.routers.news import process_and_save_article
|
||||||
|
|
||||||
|
async def main():
|
||||||
|
if len(sys.argv) != 2:
|
||||||
|
print("Usage: python script.py <article_url>")
|
||||||
|
sys.exit(1)
|
||||||
|
|
||||||
|
url = sys.argv[1]
|
||||||
|
bg_tasks = BackgroundTasks()
|
||||||
|
|
||||||
|
try:
|
||||||
|
result = await process_and_save_article(bg_tasks, url)
|
||||||
|
print(result)
|
||||||
|
except Exception as e:
|
||||||
|
print(f"Error processing article: {str(e)}")
|
||||||
|
sys.exit(1)
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
asyncio.run(main())
|
57
sijapi/helpers/cli.py
Normal file
57
sijapi/helpers/cli.py
Normal file
|
@ -0,0 +1,57 @@
|
||||||
|
# cli.py
|
||||||
|
import click
|
||||||
|
import asyncio
|
||||||
|
from datetime import datetime as dt_datetime, timedelta
|
||||||
|
|
||||||
|
# Import your async functions and dependencies
|
||||||
|
from sijapi import build_daily_note_range_endpoint, gis # broken!
|
||||||
|
|
||||||
|
def async_command(f):
|
||||||
|
@click.command()
|
||||||
|
@click.pass_context
|
||||||
|
def wrapper(ctx, *args, **kwargs):
|
||||||
|
async def run():
|
||||||
|
return await f(*args, **kwargs)
|
||||||
|
return asyncio.run(run())
|
||||||
|
return wrapper
|
||||||
|
|
||||||
|
@click.group()
|
||||||
|
def cli():
|
||||||
|
"""CLI for your application."""
|
||||||
|
pass
|
||||||
|
|
||||||
|
@cli.command()
|
||||||
|
@click.argument('dt_start')
|
||||||
|
@click.argument('dt_end')
|
||||||
|
@async_command
|
||||||
|
async def bulk_note_range(dt_start: str, dt_end: str):
|
||||||
|
"""
|
||||||
|
Build daily notes for a date range.
|
||||||
|
|
||||||
|
DT_START and DT_END should be in YYYY-MM-DD format.
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
start_date = dt_datetime.strptime(dt_start, "%Y-%m-%d")
|
||||||
|
end_date = dt_datetime.strptime(dt_end, "%Y-%m-%d")
|
||||||
|
except ValueError:
|
||||||
|
click.echo("Error: Dates must be in YYYY-MM-DD format.")
|
||||||
|
return
|
||||||
|
|
||||||
|
if start_date > end_date:
|
||||||
|
click.echo("Error: Start date must be before or equal to end date.")
|
||||||
|
return
|
||||||
|
|
||||||
|
results = []
|
||||||
|
current_date = start_date
|
||||||
|
while current_date <= end_date:
|
||||||
|
formatted_date = await gis.dt(current_date)
|
||||||
|
result = await build_daily_note(formatted_date)
|
||||||
|
results.append(result)
|
||||||
|
current_date += timedelta(days=1)
|
||||||
|
|
||||||
|
click.echo("Generated notes for the following dates:")
|
||||||
|
for url in results:
|
||||||
|
click.echo(url)
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
cli()
|
|
@ -43,7 +43,7 @@ async def process_all_emails(account: EmailAccount, summarized_log: Path, autore
|
||||||
|
|
||||||
L.INFO(f"Processed {processed_count} non-unread emails for account {account.name}")
|
L.INFO(f"Processed {processed_count} non-unread emails for account {account.name}")
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
L.ERR(f"An error occurred while processing emails for account {account.name}: {e}")
|
L.logger.error(f"An error occurred while processing emails for account {account.name}: {e}")
|
||||||
|
|
||||||
async def main():
|
async def main():
|
||||||
email_accounts = email.load_email_accounts(EMAIL_CONFIG)
|
email_accounts = email.load_email_accounts(EMAIL_CONFIG)
|
||||||
|
|
|
@ -18,6 +18,11 @@ from sijapi import L, ASR_DIR, WHISPER_CPP_MODELS, GARBAGE_COLLECTION_INTERVAL,
|
||||||
|
|
||||||
asr = APIRouter()
|
asr = APIRouter()
|
||||||
logger = L.get_module_logger("asr")
|
logger = L.get_module_logger("asr")
|
||||||
|
def debug(text: str): logger.debug(text)
|
||||||
|
def info(text: str): logger.info(text)
|
||||||
|
def warn(text: str): logger.warning(text)
|
||||||
|
def err(text: str): logger.error(text)
|
||||||
|
def crit(text: str): logger.critical(text)
|
||||||
|
|
||||||
class TranscribeParams(BaseModel):
|
class TranscribeParams(BaseModel):
|
||||||
model: str = Field(default="small")
|
model: str = Field(default="small")
|
||||||
|
@ -81,7 +86,7 @@ async def transcribe_endpoint(
|
||||||
return JSONResponse(content={"status": "timeout", "message": "Transcription is taking longer than expected. Please check back later."}, status_code=202)
|
return JSONResponse(content={"status": "timeout", "message": "Transcription is taking longer than expected. Please check back later."}, status_code=202)
|
||||||
|
|
||||||
async def transcribe_audio(file_path, params: TranscribeParams):
|
async def transcribe_audio(file_path, params: TranscribeParams):
|
||||||
logger.debug(f"Transcribing audio file from {file_path}...")
|
debug(f"Transcribing audio file from {file_path}...")
|
||||||
file_path = await convert_to_wav(file_path)
|
file_path = await convert_to_wav(file_path)
|
||||||
model = params.model if params.model in WHISPER_CPP_MODELS else 'small'
|
model = params.model if params.model in WHISPER_CPP_MODELS else 'small'
|
||||||
model_path = WHISPER_CPP_DIR / 'models' / f'ggml-{model}.bin'
|
model_path = WHISPER_CPP_DIR / 'models' / f'ggml-{model}.bin'
|
||||||
|
@ -119,11 +124,11 @@ async def transcribe_audio(file_path, params: TranscribeParams):
|
||||||
command.extend(['--dtw', params.dtw])
|
command.extend(['--dtw', params.dtw])
|
||||||
|
|
||||||
command.extend(['-f', file_path])
|
command.extend(['-f', file_path])
|
||||||
logger.debug(f"Command: {command}")
|
debug(f"Command: {command}")
|
||||||
|
|
||||||
# Create a unique ID for this transcription job
|
# Create a unique ID for this transcription job
|
||||||
job_id = str(uuid.uuid4())
|
job_id = str(uuid.uuid4())
|
||||||
logger.debug(f"Created job ID: {job_id}")
|
debug(f"Created job ID: {job_id}")
|
||||||
|
|
||||||
# Store the job status
|
# Store the job status
|
||||||
transcription_results[job_id] = {"status": "processing", "result": None}
|
transcription_results[job_id] = {"status": "processing", "result": None}
|
||||||
|
@ -135,20 +140,20 @@ async def transcribe_audio(file_path, params: TranscribeParams):
|
||||||
poll_interval = 1 # 1 second
|
poll_interval = 1 # 1 second
|
||||||
start_time = asyncio.get_event_loop().time()
|
start_time = asyncio.get_event_loop().time()
|
||||||
|
|
||||||
logger.debug(f"Starting to poll for job {job_id}")
|
debug(f"Starting to poll for job {job_id}")
|
||||||
try:
|
try:
|
||||||
while asyncio.get_event_loop().time() - start_time < max_wait_time:
|
while asyncio.get_event_loop().time() - start_time < max_wait_time:
|
||||||
job_status = transcription_results.get(job_id, {})
|
job_status = transcription_results.get(job_id, {})
|
||||||
logger.debug(f"Current status for job {job_id}: {job_status['status']}")
|
debug(f"Current status for job {job_id}: {job_status['status']}")
|
||||||
if job_status["status"] == "completed":
|
if job_status["status"] == "completed":
|
||||||
logger.info(f"Transcription completed for job {job_id}")
|
info(f"Transcription completed for job {job_id}")
|
||||||
return job_status["result"]
|
return job_status["result"]
|
||||||
elif job_status["status"] == "failed":
|
elif job_status["status"] == "failed":
|
||||||
logger.error(f"Transcription failed for job {job_id}: {job_status.get('error', 'Unknown error')}")
|
err(f"Transcription failed for job {job_id}: {job_status.get('error', 'Unknown error')}")
|
||||||
raise Exception(f"Transcription failed: {job_status.get('error', 'Unknown error')}")
|
raise Exception(f"Transcription failed: {job_status.get('error', 'Unknown error')}")
|
||||||
await asyncio.sleep(poll_interval)
|
await asyncio.sleep(poll_interval)
|
||||||
|
|
||||||
logger.error(f"Transcription timed out for job {job_id}")
|
err(f"Transcription timed out for job {job_id}")
|
||||||
raise TimeoutError("Transcription timed out")
|
raise TimeoutError("Transcription timed out")
|
||||||
finally:
|
finally:
|
||||||
# Ensure the task is cancelled if we exit the loop
|
# Ensure the task is cancelled if we exit the loop
|
||||||
|
@ -159,20 +164,20 @@ async def transcribe_audio(file_path, params: TranscribeParams):
|
||||||
|
|
||||||
async def process_transcription(command, file_path, job_id):
|
async def process_transcription(command, file_path, job_id):
|
||||||
try:
|
try:
|
||||||
logger.debug(f"Starting transcription process for job {job_id}")
|
debug(f"Starting transcription process for job {job_id}")
|
||||||
result = await run_transcription(command, file_path)
|
result = await run_transcription(command, file_path)
|
||||||
transcription_results[job_id] = {"status": "completed", "result": result}
|
transcription_results[job_id] = {"status": "completed", "result": result}
|
||||||
logger.debug(f"Transcription completed for job {job_id}")
|
debug(f"Transcription completed for job {job_id}")
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error(f"Transcription failed for job {job_id}: {str(e)}")
|
err(f"Transcription failed for job {job_id}: {str(e)}")
|
||||||
transcription_results[job_id] = {"status": "failed", "error": str(e)}
|
transcription_results[job_id] = {"status": "failed", "error": str(e)}
|
||||||
finally:
|
finally:
|
||||||
# Clean up the temporary file
|
# Clean up the temporary file
|
||||||
os.remove(file_path)
|
os.remove(file_path)
|
||||||
logger.debug(f"Cleaned up temporary file for job {job_id}")
|
debug(f"Cleaned up temporary file for job {job_id}")
|
||||||
|
|
||||||
async def run_transcription(command, file_path):
|
async def run_transcription(command, file_path):
|
||||||
logger.debug(f"Running transcription command: {' '.join(command)}")
|
debug(f"Running transcription command: {' '.join(command)}")
|
||||||
proc = await asyncio.create_subprocess_exec(
|
proc = await asyncio.create_subprocess_exec(
|
||||||
*command,
|
*command,
|
||||||
stdout=asyncio.subprocess.PIPE,
|
stdout=asyncio.subprocess.PIPE,
|
||||||
|
@ -181,9 +186,9 @@ async def run_transcription(command, file_path):
|
||||||
stdout, stderr = await proc.communicate()
|
stdout, stderr = await proc.communicate()
|
||||||
if proc.returncode != 0:
|
if proc.returncode != 0:
|
||||||
error_message = f"Error running command: {stderr.decode()}"
|
error_message = f"Error running command: {stderr.decode()}"
|
||||||
logger.error(error_message)
|
err(error_message)
|
||||||
raise Exception(error_message)
|
raise Exception(error_message)
|
||||||
logger.debug("Transcription command completed successfully")
|
debug("Transcription command completed successfully")
|
||||||
return stdout.decode().strip()
|
return stdout.decode().strip()
|
||||||
|
|
||||||
async def convert_to_wav(file_path: str):
|
async def convert_to_wav(file_path: str):
|
||||||
|
|
|
@ -14,42 +14,46 @@ from dateutil.parser import isoparse as parse_iso
|
||||||
import threading
|
import threading
|
||||||
from typing import Dict, List, Any
|
from typing import Dict, List, Any
|
||||||
from datetime import datetime, timedelta
|
from datetime import datetime, timedelta
|
||||||
from Foundation import NSDate, NSRunLoop
|
|
||||||
import EventKit as EK
|
|
||||||
from sijapi import L, ICAL_TOGGLE, ICALENDARS, MS365_TOGGLE, MS365_CLIENT_ID, MS365_SECRET, MS365_AUTHORITY_URL, MS365_SCOPE, MS365_REDIRECT_PATH, MS365_TOKEN_PATH
|
from sijapi import L, ICAL_TOGGLE, ICALENDARS, MS365_TOGGLE, MS365_CLIENT_ID, MS365_SECRET, MS365_AUTHORITY_URL, MS365_SCOPE, MS365_REDIRECT_PATH, MS365_TOKEN_PATH
|
||||||
from sijapi.routers import loc
|
from sijapi.routers import gis
|
||||||
|
|
||||||
cal = APIRouter()
|
cal = APIRouter()
|
||||||
oauth2_scheme = OAuth2PasswordBearer(tokenUrl="/token")
|
oauth2_scheme = OAuth2PasswordBearer(tokenUrl="/token")
|
||||||
timeout = httpx.Timeout(12)
|
timeout = httpx.Timeout(12)
|
||||||
logger = L.get_module_logger("cal")
|
logger = L.get_module_logger("cal")
|
||||||
|
def debug(text: str): logger.debug(text)
|
||||||
|
def info(text: str): logger.info(text)
|
||||||
|
def warn(text: str): logger.warning(text)
|
||||||
|
def err(text: str): logger.error(text)
|
||||||
|
def crit(text: str): logger.critical(text)
|
||||||
|
|
||||||
if MS365_TOGGLE is True:
|
if MS365_TOGGLE is True:
|
||||||
logger.critical(f"Visit https://api.sij.ai/o365/login to obtain your Microsoft 365 authentication token.")
|
crit(f"Visit https://api.sij.ai/o365/login to obtain your Microsoft 365 authentication token.")
|
||||||
|
|
||||||
@cal.get("/o365/login")
|
@cal.get("/o365/login")
|
||||||
async def login():
|
async def login():
|
||||||
logger.debug(f"Received request to /o365/login")
|
debug(f"Received request to /o365/login")
|
||||||
logger.debug(f"SCOPE: {MS365_SCOPE}")
|
debug(f"SCOPE: {MS365_SCOPE}")
|
||||||
if not MS365_SCOPE:
|
if not MS365_SCOPE:
|
||||||
logger.error("No scopes defined for authorization.")
|
err("No scopes defined for authorization.")
|
||||||
raise HTTPException(
|
raise HTTPException(
|
||||||
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
|
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
|
||||||
detail="No scopes defined for authorization."
|
detail="No scopes defined for authorization."
|
||||||
)
|
)
|
||||||
authorization_url = f"{MS365_AUTHORITY_URL}/oauth2/v2.0/authorize?client_id={MS365_CLIENT_ID}&response_type=code&redirect_uri={MS365_REDIRECT_PATH}&scope={'+'.join(MS365_SCOPE)}"
|
authorization_url = f"{MS365_AUTHORITY_URL}/oauth2/v2.0/authorize?client_id={MS365_CLIENT_ID}&response_type=code&redirect_uri={MS365_REDIRECT_PATH}&scope={'+'.join(MS365_SCOPE)}"
|
||||||
logger.info(f"Redirecting to authorization URL: {authorization_url}")
|
info(f"Redirecting to authorization URL: {authorization_url}")
|
||||||
return RedirectResponse(authorization_url)
|
return RedirectResponse(authorization_url)
|
||||||
|
|
||||||
@cal.get("/o365/oauth_redirect")
|
@cal.get("/o365/oauth_redirect")
|
||||||
async def oauth_redirect(code: str = None, error: str = None):
|
async def oauth_redirect(code: str = None, error: str = None):
|
||||||
logger.debug(f"Received request to /o365/oauth_redirect")
|
debug(f"Received request to /o365/oauth_redirect")
|
||||||
if error:
|
if error:
|
||||||
logger.error(f"OAuth2 Error: {error}")
|
err(f"OAuth2 Error: {error}")
|
||||||
raise HTTPException(
|
raise HTTPException(
|
||||||
status_code=status.HTTP_400_BAD_REQUEST, detail="OAuth2 Error"
|
status_code=status.HTTP_400_BAD_REQUEST, detail="OAuth2 Error"
|
||||||
)
|
)
|
||||||
logger.info(f"Requesting token with authorization code: {code}")
|
info(f"Requesting token with authorization code: {code}")
|
||||||
token_url = f"{MS365_AUTHORITY_URL}/oauth2/v2.0/token"
|
token_url = f"{MS365_AUTHORITY_URL}/oauth2/v2.0/token"
|
||||||
data = {
|
data = {
|
||||||
"client_id": MS365_CLIENT_ID,
|
"client_id": MS365_CLIENT_ID,
|
||||||
|
@ -60,15 +64,15 @@ if MS365_TOGGLE is True:
|
||||||
}
|
}
|
||||||
async with httpx.AsyncClient(timeout=timeout) as client:
|
async with httpx.AsyncClient(timeout=timeout) as client:
|
||||||
response = await client.post(token_url, data=data)
|
response = await client.post(token_url, data=data)
|
||||||
logger.debug(f"Token endpoint response status code: {response.status_code}")
|
debug(f"Token endpoint response status code: {response.status_code}")
|
||||||
logger.info(f"Token endpoint response text: {response.text}")
|
info(f"Token endpoint response text: {response.text}")
|
||||||
result = response.json()
|
result = response.json()
|
||||||
if 'access_token' in result:
|
if 'access_token' in result:
|
||||||
await save_token(result)
|
await save_token(result)
|
||||||
logger.info("Access token obtained successfully")
|
info("Access token obtained successfully")
|
||||||
return {"message": "Access token stored successfully"}
|
return {"message": "Access token stored successfully"}
|
||||||
else:
|
else:
|
||||||
logger.critical(f"Failed to obtain access token. Response: {result}")
|
crit(f"Failed to obtain access token. Response: {result}")
|
||||||
raise HTTPException(
|
raise HTTPException(
|
||||||
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
|
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
|
||||||
detail="Failed to obtain access token"
|
detail="Failed to obtain access token"
|
||||||
|
@ -76,7 +80,7 @@ if MS365_TOGGLE is True:
|
||||||
|
|
||||||
@cal.get("/o365/me")
|
@cal.get("/o365/me")
|
||||||
async def read_items():
|
async def read_items():
|
||||||
logger.debug(f"Received request to /o365/me")
|
debug(f"Received request to /o365/me")
|
||||||
token = await load_token()
|
token = await load_token()
|
||||||
if not token:
|
if not token:
|
||||||
raise HTTPException(
|
raise HTTPException(
|
||||||
|
@ -89,10 +93,10 @@ if MS365_TOGGLE is True:
|
||||||
response = await client.get(graph_url, headers=headers)
|
response = await client.get(graph_url, headers=headers)
|
||||||
if response.status_code == 200:
|
if response.status_code == 200:
|
||||||
user = response.json()
|
user = response.json()
|
||||||
logger.info(f"User retrieved: {user}")
|
info(f"User retrieved: {user}")
|
||||||
return user
|
return user
|
||||||
else:
|
else:
|
||||||
logger.error("Invalid or expired token")
|
err("Invalid or expired token")
|
||||||
raise HTTPException(
|
raise HTTPException(
|
||||||
status_code=status.HTTP_401_UNAUTHORIZED,
|
status_code=status.HTTP_401_UNAUTHORIZED,
|
||||||
detail="Invalid or expired token",
|
detail="Invalid or expired token",
|
||||||
|
@ -100,14 +104,14 @@ if MS365_TOGGLE is True:
|
||||||
)
|
)
|
||||||
|
|
||||||
async def save_token(token):
|
async def save_token(token):
|
||||||
logger.debug(f"Saving token: {token}")
|
debug(f"Saving token: {token}")
|
||||||
try:
|
try:
|
||||||
token["expires_at"] = int(time.time()) + token["expires_in"]
|
token["expires_at"] = int(time.time()) + token["expires_in"]
|
||||||
with open(MS365_TOKEN_PATH, "w") as file:
|
with open(MS365_TOKEN_PATH, "w") as file:
|
||||||
json.dump(token, file)
|
json.dump(token, file)
|
||||||
logger.debug(f"Saved token to {MS365_TOKEN_PATH}")
|
debug(f"Saved token to {MS365_TOKEN_PATH}")
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error(f"Failed to save token: {e}")
|
err(f"Failed to save token: {e}")
|
||||||
|
|
||||||
async def load_token():
|
async def load_token():
|
||||||
if os.path.exists(MS365_TOKEN_PATH):
|
if os.path.exists(MS365_TOKEN_PATH):
|
||||||
|
@ -115,21 +119,21 @@ if MS365_TOGGLE is True:
|
||||||
with open(MS365_TOKEN_PATH, "r") as file:
|
with open(MS365_TOKEN_PATH, "r") as file:
|
||||||
token = json.load(file)
|
token = json.load(file)
|
||||||
except FileNotFoundError:
|
except FileNotFoundError:
|
||||||
logger.error("Token file not found.")
|
err("Token file not found.")
|
||||||
return None
|
return None
|
||||||
except json.JSONDecodeError:
|
except json.JSONDecodeError:
|
||||||
logger.error("Failed to decode token JSON")
|
err("Failed to decode token JSON")
|
||||||
return None
|
return None
|
||||||
|
|
||||||
if token:
|
if token:
|
||||||
token["expires_at"] = int(time.time()) + token["expires_in"]
|
token["expires_at"] = int(time.time()) + token["expires_in"]
|
||||||
logger.debug(f"Loaded token: {token}") # Add this line to log the loaded token
|
debug(f"Loaded token: {token}") # Add this line to log the loaded token
|
||||||
return token
|
return token
|
||||||
else:
|
else:
|
||||||
logger.debug("No token found.")
|
debug("No token found.")
|
||||||
return None
|
return None
|
||||||
else:
|
else:
|
||||||
logger.error(f"No file found at {MS365_TOKEN_PATH}")
|
err(f"No file found at {MS365_TOKEN_PATH}")
|
||||||
return None
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
@ -159,64 +163,137 @@ if MS365_TOGGLE is True:
|
||||||
response = await client.post(token_url, data=data)
|
response = await client.post(token_url, data=data)
|
||||||
result = response.json()
|
result = response.json()
|
||||||
if "access_token" in result:
|
if "access_token" in result:
|
||||||
logger.info("Access token refreshed successfully")
|
info("Access token refreshed successfully")
|
||||||
return result
|
return result
|
||||||
else:
|
else:
|
||||||
logger.error("Failed to refresh access token")
|
err("Failed to refresh access token")
|
||||||
return None
|
return None
|
||||||
|
|
||||||
|
|
||||||
async def refresh_token():
|
async def refresh_token():
|
||||||
token = await load_token()
|
token = await load_token()
|
||||||
if not token:
|
if not token:
|
||||||
logger.error("No token found in storage")
|
err("No token found in storage")
|
||||||
raise HTTPException(
|
raise HTTPException(
|
||||||
status_code=status.HTTP_401_UNAUTHORIZED,
|
status_code=status.HTTP_401_UNAUTHORIZED,
|
||||||
detail="No token found",
|
detail="No token found",
|
||||||
)
|
)
|
||||||
|
|
||||||
if 'refresh_token' not in token:
|
if 'refresh_token' not in token:
|
||||||
logger.error("Refresh token not found in the loaded token")
|
err("Refresh token not found in the loaded token")
|
||||||
raise HTTPException(
|
raise HTTPException(
|
||||||
status_code=status.HTTP_401_UNAUTHORIZED,
|
status_code=status.HTTP_401_UNAUTHORIZED,
|
||||||
detail="Refresh token not found",
|
detail="Refresh token not found",
|
||||||
)
|
)
|
||||||
|
|
||||||
refresh_token = token['refresh_token']
|
refresh_token = token['refresh_token']
|
||||||
logger.debug("Found refresh token, attempting to refresh access token")
|
debug("Found refresh token, attempting to refresh access token")
|
||||||
|
|
||||||
new_token = await get_new_token_with_refresh_token(refresh_token)
|
new_token = await get_new_token_with_refresh_token(refresh_token)
|
||||||
|
|
||||||
if new_token:
|
if new_token:
|
||||||
await save_token(new_token)
|
await save_token(new_token)
|
||||||
logger.info("Token refreshed and saved successfully")
|
info("Token refreshed and saved successfully")
|
||||||
else:
|
else:
|
||||||
logger.error("Failed to refresh token")
|
err("Failed to refresh token")
|
||||||
raise HTTPException(
|
raise HTTPException(
|
||||||
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
|
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
|
||||||
detail="Failed to refresh token",
|
detail="Failed to refresh token",
|
||||||
)
|
)
|
||||||
|
|
||||||
|
if ICAL_TOGGLE is True:
|
||||||
|
from Foundation import NSDate, NSRunLoop
|
||||||
|
import EventKit as EK
|
||||||
|
|
||||||
def get_calendar_ids() -> Dict[str, str]:
|
# Helper to convert datetime to NSDate
|
||||||
event_store = EK.EKEventStore.alloc().init()
|
def datetime_to_nsdate(dt: datetime) -> NSDate:
|
||||||
all_calendars = event_store.calendarsForEntityType_(0) # 0 corresponds to EKEntityTypeEvent
|
return NSDate.dateWithTimeIntervalSince1970_(dt.timestamp())
|
||||||
|
|
||||||
calendar_identifiers = {
|
def get_calendar_ids() -> Dict[str, str]:
|
||||||
calendar.title() : calendar.calendarIdentifier() for calendar in all_calendars
|
event_store = EK.EKEventStore.alloc().init()
|
||||||
}
|
all_calendars = event_store.calendarsForEntityType_(0) # 0 corresponds to EKEntityTypeEvent
|
||||||
logger.debug(f"{calendar_identifiers}")
|
|
||||||
return calendar_identifiers
|
|
||||||
|
|
||||||
# Helper to convert datetime to NSDate
|
calendar_identifiers = {
|
||||||
def datetime_to_nsdate(dt: datetime) -> NSDate:
|
calendar.title() : calendar.calendarIdentifier() for calendar in all_calendars
|
||||||
return NSDate.dateWithTimeIntervalSince1970_(dt.timestamp())
|
}
|
||||||
|
debug(f"{calendar_identifiers}")
|
||||||
|
return calendar_identifiers
|
||||||
|
|
||||||
|
def get_macos_calendar_events(start_date: datetime, end_date: datetime, calendar_ids: List[str] = None) -> List[Dict]:
|
||||||
|
event_store = EK.EKEventStore.alloc().init()
|
||||||
|
|
||||||
|
# Request access to EventKit
|
||||||
|
def request_access() -> bool:
|
||||||
|
access_granted = []
|
||||||
|
|
||||||
|
def completion_handler(granted, error):
|
||||||
|
if error is not None:
|
||||||
|
err(f"Error: {error}")
|
||||||
|
access_granted.append(granted)
|
||||||
|
with access_granted_condition:
|
||||||
|
access_granted_condition.notify()
|
||||||
|
|
||||||
|
access_granted_condition = threading.Condition()
|
||||||
|
with access_granted_condition:
|
||||||
|
event_store.requestAccessToEntityType_completion_(0, completion_handler) # 0 corresponds to EKEntityTypeEvent
|
||||||
|
access_granted_condition.wait(timeout=10)
|
||||||
|
if access_granted:
|
||||||
|
return access_granted[0]
|
||||||
|
else:
|
||||||
|
err("Request access timed out or failed")
|
||||||
|
return False
|
||||||
|
|
||||||
|
if not request_access():
|
||||||
|
err("Access to calendar data was not granted")
|
||||||
|
return []
|
||||||
|
|
||||||
|
ns_start_date = datetime_to_nsdate(start_date)
|
||||||
|
ns_end_date = datetime_to_nsdate(end_date)
|
||||||
|
|
||||||
|
# Retrieve all calendars
|
||||||
|
all_calendars = event_store.calendarsForEntityType_(0) # 0 corresponds to EKEntityTypeEvent
|
||||||
|
if calendar_ids:
|
||||||
|
selected_calendars = [cal for cal in all_calendars if cal.calendarIdentifier() in calendar_ids]
|
||||||
|
else:
|
||||||
|
selected_calendars = all_calendars
|
||||||
|
|
||||||
|
# Filtering events by selected calendars
|
||||||
|
predicate = event_store.predicateForEventsWithStartDate_endDate_calendars_(ns_start_date, ns_end_date, selected_calendars)
|
||||||
|
events = event_store.eventsMatchingPredicate_(predicate)
|
||||||
|
|
||||||
|
event_list = []
|
||||||
|
for event in events:
|
||||||
|
# Check if event.attendees() returns None
|
||||||
|
if event.attendees():
|
||||||
|
attendees = [{'name': att.name(), 'email': att.emailAddress()} for att in event.attendees() if att.emailAddress()]
|
||||||
|
else:
|
||||||
|
attendees = []
|
||||||
|
|
||||||
|
# Format the start and end dates properly
|
||||||
|
start_date_str = event.startDate().descriptionWithLocale_(None)
|
||||||
|
end_date_str = event.endDate().descriptionWithLocale_(None)
|
||||||
|
|
||||||
|
event_data = {
|
||||||
|
"subject": event.title(),
|
||||||
|
"id": event.eventIdentifier(),
|
||||||
|
"start": start_date_str,
|
||||||
|
"end": end_date_str,
|
||||||
|
"bodyPreview": event.notes() if event.notes() else '',
|
||||||
|
"attendees": attendees,
|
||||||
|
"location": event.location() if event.location() else '',
|
||||||
|
"onlineMeetingUrl": '', # Defaulting to empty as macOS EventKit does not provide this
|
||||||
|
"showAs": 'busy', # Default to 'busy'
|
||||||
|
"isAllDay": event.isAllDay()
|
||||||
|
}
|
||||||
|
|
||||||
|
event_list.append(event_data)
|
||||||
|
|
||||||
|
return event_list
|
||||||
|
|
||||||
@cal.get("/events")
|
@cal.get("/events")
|
||||||
async def get_events_endpoint(start_date: str, end_date: str):
|
async def get_events_endpoint(start_date: str, end_date: str):
|
||||||
start_dt = await loc.dt(start_date)
|
start_dt = await gis.dt(start_date)
|
||||||
end_dt = await loc.dt(end_date)
|
end_dt = await gis.dt(end_date)
|
||||||
datetime.strptime(start_date, "%Y-%m-%d") or datetime.now()
|
datetime.strptime(start_date, "%Y-%m-%d") or datetime.now()
|
||||||
end_dt = datetime.strptime(end_date, "%Y-%m-%d") or datetime.now()
|
end_dt = datetime.strptime(end_date, "%Y-%m-%d") or datetime.now()
|
||||||
response = await get_events(start_dt, end_dt)
|
response = await get_events(start_dt, end_dt)
|
||||||
|
@ -237,80 +314,6 @@ async def get_events(start_dt: datetime, end_dt: datetime) -> List:
|
||||||
return parsed_events
|
return parsed_events
|
||||||
|
|
||||||
|
|
||||||
def get_macos_calendar_events(start_date: datetime, end_date: datetime, calendar_ids: List[str] = None) -> List[Dict]:
|
|
||||||
event_store = EK.EKEventStore.alloc().init()
|
|
||||||
|
|
||||||
# Request access to EventKit
|
|
||||||
def request_access() -> bool:
|
|
||||||
access_granted = []
|
|
||||||
|
|
||||||
def completion_handler(granted, error):
|
|
||||||
if error is not None:
|
|
||||||
logger.error(f"Error: {error}")
|
|
||||||
access_granted.append(granted)
|
|
||||||
# Notify the main thread that the completion handler has executed
|
|
||||||
with access_granted_condition:
|
|
||||||
access_granted_condition.notify()
|
|
||||||
|
|
||||||
access_granted_condition = threading.Condition()
|
|
||||||
with access_granted_condition:
|
|
||||||
event_store.requestAccessToEntityType_completion_(0, completion_handler) # 0 corresponds to EKEntityTypeEvent
|
|
||||||
# Wait for the completion handler to be called
|
|
||||||
access_granted_condition.wait(timeout=10)
|
|
||||||
# Verify that the handler was called and access_granted is not empty
|
|
||||||
if access_granted:
|
|
||||||
return access_granted[0]
|
|
||||||
else:
|
|
||||||
logger.error("Request access timed out or failed")
|
|
||||||
return False
|
|
||||||
|
|
||||||
if not request_access():
|
|
||||||
logger.error("Access to calendar data was not granted")
|
|
||||||
return []
|
|
||||||
|
|
||||||
ns_start_date = datetime_to_nsdate(start_date)
|
|
||||||
ns_end_date = datetime_to_nsdate(end_date)
|
|
||||||
|
|
||||||
# Retrieve all calendars
|
|
||||||
all_calendars = event_store.calendarsForEntityType_(0) # 0 corresponds to EKEntityTypeEvent
|
|
||||||
if calendar_ids:
|
|
||||||
selected_calendars = [cal for cal in all_calendars if cal.calendarIdentifier() in calendar_ids]
|
|
||||||
else:
|
|
||||||
selected_calendars = all_calendars
|
|
||||||
|
|
||||||
# Filtering events by selected calendars
|
|
||||||
predicate = event_store.predicateForEventsWithStartDate_endDate_calendars_(ns_start_date, ns_end_date, selected_calendars)
|
|
||||||
events = event_store.eventsMatchingPredicate_(predicate)
|
|
||||||
|
|
||||||
event_list = []
|
|
||||||
for event in events:
|
|
||||||
# Check if event.attendees() returns None
|
|
||||||
if event.attendees():
|
|
||||||
attendees = [{'name': att.name(), 'email': att.emailAddress()} for att in event.attendees() if att.emailAddress()]
|
|
||||||
else:
|
|
||||||
attendees = []
|
|
||||||
|
|
||||||
# Format the start and end dates properly
|
|
||||||
start_date_str = event.startDate().descriptionWithLocale_(None)
|
|
||||||
end_date_str = event.endDate().descriptionWithLocale_(None)
|
|
||||||
|
|
||||||
event_data = {
|
|
||||||
"subject": event.title(),
|
|
||||||
"id": event.eventIdentifier(),
|
|
||||||
"start": start_date_str,
|
|
||||||
"end": end_date_str,
|
|
||||||
"bodyPreview": event.notes() if event.notes() else '',
|
|
||||||
"attendees": attendees,
|
|
||||||
"location": event.location() if event.location() else '',
|
|
||||||
"onlineMeetingUrl": '', # Defaulting to empty as macOS EventKit does not provide this
|
|
||||||
"showAs": 'busy', # Default to 'busy'
|
|
||||||
"isAllDay": event.isAllDay()
|
|
||||||
}
|
|
||||||
|
|
||||||
event_list.append(event_data)
|
|
||||||
|
|
||||||
return event_list
|
|
||||||
|
|
||||||
async def get_ms365_events(start_date: datetime, end_date: datetime):
|
async def get_ms365_events(start_date: datetime, end_date: datetime):
|
||||||
token = await load_token()
|
token = await load_token()
|
||||||
if token:
|
if token:
|
||||||
|
@ -331,7 +334,7 @@ async def get_ms365_events(start_date: datetime, end_date: datetime):
|
||||||
response = await client.get(graph_url, headers=headers)
|
response = await client.get(graph_url, headers=headers)
|
||||||
|
|
||||||
if response.status_code != 200:
|
if response.status_code != 200:
|
||||||
logger.error("Failed to retrieve events from Microsoft 365")
|
err("Failed to retrieve events from Microsoft 365")
|
||||||
raise HTTPException(
|
raise HTTPException(
|
||||||
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
|
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
|
||||||
detail="Failed to retrieve events",
|
detail="Failed to retrieve events",
|
||||||
|
@ -342,48 +345,48 @@ async def get_ms365_events(start_date: datetime, end_date: datetime):
|
||||||
|
|
||||||
|
|
||||||
async def parse_calendar_for_day(range_start: datetime, range_end: datetime, events: List[Dict[str, Any]]):
|
async def parse_calendar_for_day(range_start: datetime, range_end: datetime, events: List[Dict[str, Any]]):
|
||||||
range_start = await loc.dt(range_start)
|
range_start = await gis.dt(range_start)
|
||||||
range_end = await loc.dt(range_end)
|
range_end = await gis.dt(range_end)
|
||||||
event_list = []
|
event_list = []
|
||||||
|
|
||||||
for event in events:
|
for event in events:
|
||||||
logger.info(f"Event: {event}")
|
info(f"Event: {event}")
|
||||||
start_str = event.get('start')
|
start_str = event.get('start')
|
||||||
end_str = event.get('end')
|
end_str = event.get('end')
|
||||||
|
|
||||||
if isinstance(start_str, dict):
|
if isinstance(start_str, dict):
|
||||||
start_str = start_str.get('dateTime')
|
start_str = start_str.get('dateTime')
|
||||||
else:
|
else:
|
||||||
logger.info(f"Start date string not a dict")
|
info(f"Start date string not a dict")
|
||||||
|
|
||||||
if isinstance(end_str, dict):
|
if isinstance(end_str, dict):
|
||||||
end_str = end_str.get('dateTime')
|
end_str = end_str.get('dateTime')
|
||||||
else:
|
else:
|
||||||
logger.info(f"End date string not a dict")
|
info(f"End date string not a dict")
|
||||||
|
|
||||||
try:
|
try:
|
||||||
start_date = await loc.dt(start_str) if start_str else None
|
start_date = await gis.dt(start_str) if start_str else None
|
||||||
except (ValueError, TypeError) as e:
|
except (ValueError, TypeError) as e:
|
||||||
logger.error(f"Invalid start date format: {start_str}, error: {e}")
|
err(f"Invalid start date format: {start_str}, error: {e}")
|
||||||
continue
|
continue
|
||||||
|
|
||||||
try:
|
try:
|
||||||
end_date = await loc.dt(end_str) if end_str else None
|
end_date = await gis.dt(end_str) if end_str else None
|
||||||
except (ValueError, TypeError) as e:
|
except (ValueError, TypeError) as e:
|
||||||
logger.error(f"Invalid end date format: {end_str}, error: {e}")
|
err(f"Invalid end date format: {end_str}, error: {e}")
|
||||||
continue
|
continue
|
||||||
|
|
||||||
logger.debug(f"Comparing {start_date} with range {range_start} to {range_end}")
|
debug(f"Comparing {start_date} with range {range_start} to {range_end}")
|
||||||
|
|
||||||
if start_date:
|
if start_date:
|
||||||
# Ensure start_date is timezone-aware
|
# Ensure start_date is timezone-aware
|
||||||
start_date = await loc.dt(start_date)
|
start_date = await gis.dt(start_date)
|
||||||
|
|
||||||
# If end_date is not provided, assume it's the same as start_date
|
# If end_date is not provided, assume it's the same as start_date
|
||||||
if not end_date:
|
if not end_date:
|
||||||
end_date = start_date
|
end_date = start_date
|
||||||
else:
|
else:
|
||||||
end_date = await loc.dt(end_date)
|
end_date = await gis.dt(end_date)
|
||||||
|
|
||||||
# Check if the event overlaps with the given range
|
# Check if the event overlaps with the given range
|
||||||
if (start_date < range_end) and (end_date > range_start):
|
if (start_date < range_end) and (end_date > range_start):
|
||||||
|
@ -405,11 +408,11 @@ async def parse_calendar_for_day(range_start: datetime, range_end: datetime, eve
|
||||||
"busy": event.get('showAs', '') in ['busy', 'tentative'],
|
"busy": event.get('showAs', '') in ['busy', 'tentative'],
|
||||||
"all_day": event.get('isAllDay', False)
|
"all_day": event.get('isAllDay', False)
|
||||||
}
|
}
|
||||||
logger.info(f"Event_data: {event_data}")
|
info(f"Event_data: {event_data}")
|
||||||
event_list.append(event_data)
|
event_list.append(event_data)
|
||||||
else:
|
else:
|
||||||
logger.debug(f"Event outside of specified range: {start_date} to {end_date}")
|
debug(f"Event outside of specified range: {start_date} to {end_date}")
|
||||||
else:
|
else:
|
||||||
logger.error(f"Invalid or missing start date for event: {event.get('id', 'Unknown ID')}")
|
err(f"Invalid or missing start date for event: {event.get('id', 'Unknown ID')}")
|
||||||
|
|
||||||
return event_list
|
return event_list
|
|
@ -13,6 +13,11 @@ import os
|
||||||
|
|
||||||
cf = APIRouter()
|
cf = APIRouter()
|
||||||
logger = L.get_module_logger("cal")
|
logger = L.get_module_logger("cal")
|
||||||
|
def debug(text: str): logger.debug(text)
|
||||||
|
def info(text: str): logger.info(text)
|
||||||
|
def warn(text: str): logger.warning(text)
|
||||||
|
def err(text: str): logger.error(text)
|
||||||
|
def crit(text: str): logger.critical(text)
|
||||||
|
|
||||||
class DNSRecordRequest(BaseModel):
|
class DNSRecordRequest(BaseModel):
|
||||||
full_domain: str
|
full_domain: str
|
||||||
|
@ -70,7 +75,7 @@ async def retry_request(url, headers, max_retries=5, backoff_factor=1):
|
||||||
response.raise_for_status()
|
response.raise_for_status()
|
||||||
return response
|
return response
|
||||||
except (httpx.HTTPError, httpx.ConnectTimeout) as e:
|
except (httpx.HTTPError, httpx.ConnectTimeout) as e:
|
||||||
logger.error(f"Request failed: {e}. Retrying {retry + 1}/{max_retries}...")
|
err(f"Request failed: {e}. Retrying {retry + 1}/{max_retries}...")
|
||||||
await sleep(backoff_factor * (2 ** retry))
|
await sleep(backoff_factor * (2 ** retry))
|
||||||
raise HTTPException(status_code=500, detail="Max retries exceeded for Cloudflare API request")
|
raise HTTPException(status_code=500, detail="Max retries exceeded for Cloudflare API request")
|
||||||
|
|
||||||
|
|
|
@ -9,6 +9,11 @@ from sijapi import L, REBOOT_SCRIPT_PATH, HOST_CONFIG, API_CONFIG
|
||||||
|
|
||||||
dist = APIRouter()
|
dist = APIRouter()
|
||||||
logger = L.get_module_logger("dist")
|
logger = L.get_module_logger("dist")
|
||||||
|
def debug(text: str): logger.debug(text)
|
||||||
|
def info(text: str): logger.info(text)
|
||||||
|
def warn(text: str): logger.warning(text)
|
||||||
|
def err(text: str): logger.error(text)
|
||||||
|
def crit(text: str): logger.critical(text)
|
||||||
|
|
||||||
@dist.get("/update-restart-others")
|
@dist.get("/update-restart-others")
|
||||||
async def update_and_restart_others():
|
async def update_and_restart_others():
|
||||||
|
@ -32,10 +37,10 @@ async def update_and_restart_self(safe: bool = True):
|
||||||
stdout=asyncio.subprocess.PIPE, stderr=asyncio.subprocess.PIPE
|
stdout=asyncio.subprocess.PIPE, stderr=asyncio.subprocess.PIPE
|
||||||
)
|
)
|
||||||
stdout, stderr = await process.communicate()
|
stdout, stderr = await process.communicate()
|
||||||
logger.info(f"Update and restart initiated for self. Stdout: {stdout.decode()}. Stderr: {stderr.decode()}")
|
info(f"Update and restart initiated for self. Stdout: {stdout.decode()}. Stderr: {stderr.decode()}")
|
||||||
return {"message": "Update and restart process initiated for this server."}
|
return {"message": "Update and restart process initiated for this server."}
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error(f"Failed to initiate update and restart for self: {str(e)}")
|
err(f"Failed to initiate update and restart for self: {str(e)}")
|
||||||
raise HTTPException(status_code=500, detail=f"Failed to initiate update and restart: {str(e)}")
|
raise HTTPException(status_code=500, detail=f"Failed to initiate update and restart: {str(e)}")
|
||||||
|
|
||||||
@dist.get("/update-and-restart-all")
|
@dist.get("/update-and-restart-all")
|
||||||
|
@ -56,5 +61,5 @@ async def ensure_redundancy():
|
||||||
redundancy = True
|
redundancy = True
|
||||||
break
|
break
|
||||||
except aiohttp.ClientError:
|
except aiohttp.ClientError:
|
||||||
logger.warning(f"Failed to check health of server {server.id}")
|
warn(f"Failed to check health of server {server.id}")
|
||||||
return redundancy
|
return redundancy
|
||||||
|
|
|
@ -21,25 +21,24 @@ import yaml
|
||||||
from typing import List, Dict, Optional, Set
|
from typing import List, Dict, Optional, Set
|
||||||
from datetime import datetime as dt_datetime
|
from datetime import datetime as dt_datetime
|
||||||
from sijapi import L, PODCAST_DIR, DEFAULT_VOICE, EMAIL_CONFIG, EMAIL_LOGS
|
from sijapi import L, PODCAST_DIR, DEFAULT_VOICE, EMAIL_CONFIG, EMAIL_LOGS
|
||||||
from sijapi.routers import img, loc, tts, llm
|
from sijapi.routers import gis, img, tts, llm
|
||||||
from sijapi.utilities import clean_text, assemble_journal_path, extract_text, prefix_lines
|
from sijapi.utilities import clean_text, assemble_journal_path, extract_text, prefix_lines
|
||||||
from sijapi.classes import EmailAccount, IMAPConfig, SMTPConfig, IncomingEmail, EmailContact, AutoResponder
|
from sijapi.classes import EmailAccount, IMAPConfig, SMTPConfig, IncomingEmail, EmailContact, AutoResponder
|
||||||
|
|
||||||
email = APIRouter()
|
email = APIRouter()
|
||||||
|
|
||||||
logger = L.get_module_logger("email")
|
logger = L.get_module_logger("email")
|
||||||
print(f"Email logger level: {logger.level}") # Debug print
|
def debug(text: str): logger.debug(text)
|
||||||
|
def info(text: str): logger.info(text)
|
||||||
logger.debug("This is a debug message from email module")
|
def warn(text: str): logger.warning(text)
|
||||||
logger.info("This is an info message from email module")
|
def err(text: str): logger.error(text)
|
||||||
|
def crit(text: str): logger.critical(text)
|
||||||
|
|
||||||
def load_email_accounts(yaml_path: str) -> List[EmailAccount]:
|
def load_email_accounts(yaml_path: str) -> List[EmailAccount]:
|
||||||
with open(yaml_path, 'r') as file:
|
with open(yaml_path, 'r') as file:
|
||||||
config = yaml.safe_load(file)
|
config = yaml.safe_load(file)
|
||||||
return [EmailAccount(**account) for account in config['accounts']]
|
return [EmailAccount(**account) for account in config['accounts']]
|
||||||
|
|
||||||
|
|
||||||
def get_imap_connection(account: EmailAccount):
|
def get_imap_connection(account: EmailAccount):
|
||||||
return Imbox(account.imap.host,
|
return Imbox(account.imap.host,
|
||||||
username=account.imap.username,
|
username=account.imap.username,
|
||||||
|
@ -59,34 +58,34 @@ def get_smtp_connection(autoresponder: AutoResponder):
|
||||||
|
|
||||||
if smtp_config.encryption == 'SSL':
|
if smtp_config.encryption == 'SSL':
|
||||||
try:
|
try:
|
||||||
logger.debug(f"Attempting SSL connection to {smtp_config.host}:{smtp_config.port}")
|
debug(f"Attempting SSL connection to {smtp_config.host}:{smtp_config.port}")
|
||||||
return SMTP_SSL(smtp_config.host, smtp_config.port, context=context)
|
return SMTP_SSL(smtp_config.host, smtp_config.port, context=context)
|
||||||
except ssl.SSLError as e:
|
except ssl.SSLError as e:
|
||||||
logger.error(f"SSL connection failed: {str(e)}")
|
err(f"SSL connection failed: {str(e)}")
|
||||||
# If SSL fails, try TLS
|
# If SSL fails, try TLS
|
||||||
try:
|
try:
|
||||||
logger.debug(f"Attempting STARTTLS connection to {smtp_config.host}:{smtp_config.port}")
|
debug(f"Attempting STARTTLS connection to {smtp_config.host}:{smtp_config.port}")
|
||||||
smtp = SMTP(smtp_config.host, smtp_config.port)
|
smtp = SMTP(smtp_config.host, smtp_config.port)
|
||||||
smtp.starttls(context=context)
|
smtp.starttls(context=context)
|
||||||
return smtp
|
return smtp
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error(f"STARTTLS connection failed: {str(e)}")
|
err(f"STARTTLS connection failed: {str(e)}")
|
||||||
raise
|
raise
|
||||||
elif smtp_config.encryption == 'STARTTLS':
|
elif smtp_config.encryption == 'STARTTLS':
|
||||||
try:
|
try:
|
||||||
logger.debug(f"Attempting STARTTLS connection to {smtp_config.host}:{smtp_config.port}")
|
debug(f"Attempting STARTTLS connection to {smtp_config.host}:{smtp_config.port}")
|
||||||
smtp = SMTP(smtp_config.host, smtp_config.port)
|
smtp = SMTP(smtp_config.host, smtp_config.port)
|
||||||
smtp.starttls(context=context)
|
smtp.starttls(context=context)
|
||||||
return smtp
|
return smtp
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error(f"STARTTLS connection failed: {str(e)}")
|
err(f"STARTTLS connection failed: {str(e)}")
|
||||||
raise
|
raise
|
||||||
else:
|
else:
|
||||||
try:
|
try:
|
||||||
logger.debug(f"Attempting unencrypted connection to {smtp_config.host}:{smtp_config.port}")
|
debug(f"Attempting unencrypted connection to {smtp_config.host}:{smtp_config.port}")
|
||||||
return SMTP(smtp_config.host, smtp_config.port)
|
return SMTP(smtp_config.host, smtp_config.port)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error(f"Unencrypted connection failed: {str(e)}")
|
err(f"Unencrypted connection failed: {str(e)}")
|
||||||
raise
|
raise
|
||||||
|
|
||||||
async def send_response(to_email: str, subject: str, body: str, profile: AutoResponder, image_attachment: Path = None) -> bool:
|
async def send_response(to_email: str, subject: str, body: str, profile: AutoResponder, image_attachment: Path = None) -> bool:
|
||||||
|
@ -103,20 +102,20 @@ async def send_response(to_email: str, subject: str, body: str, profile: AutoRes
|
||||||
img = MIMEImage(img_file.read(), name=os.path.basename(image_attachment))
|
img = MIMEImage(img_file.read(), name=os.path.basename(image_attachment))
|
||||||
message.attach(img)
|
message.attach(img)
|
||||||
|
|
||||||
logger.debug(f"Sending auto-response to {to_email} concerning {subject} from account {profile.name}...")
|
debug(f"Sending auto-response to {to_email} concerning {subject} from account {profile.name}...")
|
||||||
|
|
||||||
server = get_smtp_connection(profile)
|
server = get_smtp_connection(profile)
|
||||||
logger.debug(f"SMTP connection established: {type(server)}")
|
debug(f"SMTP connection established: {type(server)}")
|
||||||
server.login(profile.smtp.username, profile.smtp.password)
|
server.login(profile.smtp.username, profile.smtp.password)
|
||||||
server.send_message(message)
|
server.send_message(message)
|
||||||
|
|
||||||
logger.info(f"Auto-response sent to {to_email} concerning {subject} from account {profile.name}!")
|
info(f"Auto-response sent to {to_email} concerning {subject} from account {profile.name}!")
|
||||||
return True
|
return True
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error(f"Error in preparing/sending auto-response from account {profile.name}: {str(e)}")
|
err(f"Error in preparing/sending auto-response from account {profile.name}: {str(e)}")
|
||||||
logger.error(f"SMTP details - Host: {profile.smtp.host}, Port: {profile.smtp.port}, Encryption: {profile.smtp.encryption}")
|
err(f"SMTP details - Host: {profile.smtp.host}, Port: {profile.smtp.port}, Encryption: {profile.smtp.encryption}")
|
||||||
logger.error(traceback.format_exc())
|
err(traceback.format_exc())
|
||||||
return False
|
return False
|
||||||
|
|
||||||
finally:
|
finally:
|
||||||
|
@ -124,7 +123,7 @@ async def send_response(to_email: str, subject: str, body: str, profile: AutoRes
|
||||||
try:
|
try:
|
||||||
server.quit()
|
server.quit()
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error(f"Error closing SMTP connection: {str(e)}")
|
err(f"Error closing SMTP connection: {str(e)}")
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
@ -161,15 +160,15 @@ async def process_account_archival(account: EmailAccount):
|
||||||
while True:
|
while True:
|
||||||
try:
|
try:
|
||||||
processed_uids = await load_processed_uids(summarized_log)
|
processed_uids = await load_processed_uids(summarized_log)
|
||||||
logger.debug(f"{len(processed_uids)} emails marked as already summarized are being ignored.")
|
debug(f"{len(processed_uids)} emails marked as already summarized are being ignored.")
|
||||||
with get_imap_connection(account) as inbox:
|
with get_imap_connection(account) as inbox:
|
||||||
unread_messages = inbox.messages(unread=True)
|
unread_messages = inbox.messages(unread=True)
|
||||||
logger.debug(f"There are {len(unread_messages)} unread messages.")
|
debug(f"There are {len(unread_messages)} unread messages.")
|
||||||
for uid, message in unread_messages:
|
for uid, message in unread_messages:
|
||||||
uid_str = uid.decode() if isinstance(uid, bytes) else str(uid)
|
uid_str = uid.decode() if isinstance(uid, bytes) else str(uid)
|
||||||
if uid_str not in processed_uids:
|
if uid_str not in processed_uids:
|
||||||
recipients = [EmailContact(email=recipient['email'], name=recipient.get('name', '')) for recipient in message.sent_to]
|
recipients = [EmailContact(email=recipient['email'], name=recipient.get('name', '')) for recipient in message.sent_to]
|
||||||
localized_datetime = await loc.dt(message.date)
|
localized_datetime = await gis.dt(message.date)
|
||||||
this_email = IncomingEmail(
|
this_email = IncomingEmail(
|
||||||
sender=message.sent_from[0]['email'],
|
sender=message.sent_from[0]['email'],
|
||||||
datetime_received=localized_datetime,
|
datetime_received=localized_datetime,
|
||||||
|
@ -184,13 +183,13 @@ async def process_account_archival(account: EmailAccount):
|
||||||
save_success = await save_email(md_path, md_content)
|
save_success = await save_email(md_path, md_content)
|
||||||
if save_success:
|
if save_success:
|
||||||
await save_processed_uid(summarized_log, account.name, uid_str)
|
await save_processed_uid(summarized_log, account.name, uid_str)
|
||||||
logger.info(f"Summarized email: {uid_str}")
|
info(f"Summarized email: {uid_str}")
|
||||||
else:
|
else:
|
||||||
logger.warning(f"Failed to summarize {this_email.subject}")
|
warn(f"Failed to summarize {this_email.subject}")
|
||||||
else:
|
else:
|
||||||
logger.debug(f"Skipping {uid_str} because it was already processed.")
|
debug(f"Skipping {uid_str} because it was already processed.")
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error(f"An error occurred during summarization for account {account.name}: {e}")
|
err(f"An error occurred during summarization for account {account.name}: {e}")
|
||||||
|
|
||||||
await asyncio.sleep(account.refresh)
|
await asyncio.sleep(account.refresh)
|
||||||
|
|
||||||
|
@ -236,7 +235,7 @@ tags:
|
||||||
return markdown_content
|
return markdown_content
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error(f"Exception: {e}")
|
err(f"Exception: {e}")
|
||||||
return False
|
return False
|
||||||
|
|
||||||
async def save_email(md_path, md_content):
|
async def save_email(md_path, md_content):
|
||||||
|
@ -244,14 +243,14 @@ async def save_email(md_path, md_content):
|
||||||
with open(md_path, 'w', encoding='utf-8') as md_file:
|
with open(md_path, 'w', encoding='utf-8') as md_file:
|
||||||
md_file.write(md_content)
|
md_file.write(md_content)
|
||||||
|
|
||||||
logger.debug(f"Saved markdown to {md_path}")
|
debug(f"Saved markdown to {md_path}")
|
||||||
return True
|
return True
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error(f"Failed to save email: {e}")
|
err(f"Failed to save email: {e}")
|
||||||
return False
|
return False
|
||||||
|
|
||||||
def get_matching_autoresponders(this_email: IncomingEmail, account: EmailAccount) -> List[AutoResponder]:
|
def get_matching_autoresponders(this_email: IncomingEmail, account: EmailAccount) -> List[AutoResponder]:
|
||||||
logger.debug(f"Called get_matching_autoresponders for email \"{this_email.subject},\" account name \"{account.name}\"")
|
debug(f"Called get_matching_autoresponders for email \"{this_email.subject},\" account name \"{account.name}\"")
|
||||||
def matches_list(item: str, this_email: IncomingEmail) -> bool:
|
def matches_list(item: str, this_email: IncomingEmail) -> bool:
|
||||||
if '@' in item:
|
if '@' in item:
|
||||||
return item in this_email.sender
|
return item in this_email.sender
|
||||||
|
@ -262,12 +261,12 @@ def get_matching_autoresponders(this_email: IncomingEmail, account: EmailAccount
|
||||||
whitelist_match = not profile.whitelist or any(matches_list(item, this_email) for item in profile.whitelist)
|
whitelist_match = not profile.whitelist or any(matches_list(item, this_email) for item in profile.whitelist)
|
||||||
blacklist_match = any(matches_list(item, this_email) for item in profile.blacklist)
|
blacklist_match = any(matches_list(item, this_email) for item in profile.blacklist)
|
||||||
if whitelist_match and not blacklist_match:
|
if whitelist_match and not blacklist_match:
|
||||||
logger.debug(f"We have a match for {whitelist_match} and no blacklist matches.")
|
debug(f"We have a match for {whitelist_match} and no blacklist matches.")
|
||||||
matching_profiles.append(profile)
|
matching_profiles.append(profile)
|
||||||
elif whitelist_match and blacklist_match:
|
elif whitelist_match and blacklist_match:
|
||||||
logger.debug(f"Matched whitelist for {whitelist_match}, but also matched blacklist for {blacklist_match}")
|
debug(f"Matched whitelist for {whitelist_match}, but also matched blacklist for {blacklist_match}")
|
||||||
else:
|
else:
|
||||||
logger.debug(f"No whitelist or blacklist matches.")
|
debug(f"No whitelist or blacklist matches.")
|
||||||
return matching_profiles
|
return matching_profiles
|
||||||
|
|
||||||
|
|
||||||
|
@ -278,30 +277,30 @@ async def process_account_autoresponding(account: EmailAccount):
|
||||||
while True:
|
while True:
|
||||||
try:
|
try:
|
||||||
processed_uids = await load_processed_uids(EMAIL_AUTORESPONSE_LOG)
|
processed_uids = await load_processed_uids(EMAIL_AUTORESPONSE_LOG)
|
||||||
logger.debug(f"{len(processed_uids)} emails marked as already responded to are being ignored.")
|
debug(f"{len(processed_uids)} emails marked as already responded to are being ignored.")
|
||||||
|
|
||||||
with get_imap_connection(account) as inbox:
|
with get_imap_connection(account) as inbox:
|
||||||
unread_messages = inbox.messages(unread=True)
|
unread_messages = inbox.messages(unread=True)
|
||||||
logger.debug(f"There are {len(unread_messages)} unread messages.")
|
debug(f"There are {len(unread_messages)} unread messages.")
|
||||||
|
|
||||||
for uid, message in unread_messages:
|
for uid, message in unread_messages:
|
||||||
uid_str = uid.decode() if isinstance(uid, bytes) else str(uid)
|
uid_str = uid.decode() if isinstance(uid, bytes) else str(uid)
|
||||||
if uid_str not in processed_uids:
|
if uid_str not in processed_uids:
|
||||||
await autorespond_single_email(message, uid_str, account, EMAIL_AUTORESPONSE_LOG)
|
await autorespond_single_email(message, uid_str, account, EMAIL_AUTORESPONSE_LOG)
|
||||||
else:
|
else:
|
||||||
logger.debug(f"Skipping {uid_str} because it was already processed.")
|
debug(f"Skipping {uid_str} because it was already processed.")
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error(f"An error occurred during auto-responding for account {account.name}: {e}")
|
err(f"An error occurred during auto-responding for account {account.name}: {e}")
|
||||||
|
|
||||||
await asyncio.sleep(account.refresh)
|
await asyncio.sleep(account.refresh)
|
||||||
|
|
||||||
async def autorespond_single_email(message, uid_str: str, account: EmailAccount, log_file: Path):
|
async def autorespond_single_email(message, uid_str: str, account: EmailAccount, log_file: Path):
|
||||||
this_email = await create_incoming_email(message)
|
this_email = await create_incoming_email(message)
|
||||||
logger.debug(f"Evaluating {this_email.subject} for autoresponse-worthiness...")
|
debug(f"Evaluating {this_email.subject} for autoresponse-worthiness...")
|
||||||
|
|
||||||
matching_profiles = get_matching_autoresponders(this_email, account)
|
matching_profiles = get_matching_autoresponders(this_email, account)
|
||||||
logger.debug(f"Matching profiles: {matching_profiles}")
|
debug(f"Matching profiles: {matching_profiles}")
|
||||||
|
|
||||||
for profile in matching_profiles:
|
for profile in matching_profiles:
|
||||||
response_body = await generate_response(this_email, profile, account)
|
response_body = await generate_response(this_email, profile, account)
|
||||||
|
@ -311,18 +310,18 @@ async def autorespond_single_email(message, uid_str: str, account: EmailAccount,
|
||||||
jpg_path = await img.workflow(profile.image_prompt, earlyout=False, downscale_to_fit=True) if profile.image_prompt else None
|
jpg_path = await img.workflow(profile.image_prompt, earlyout=False, downscale_to_fit=True) if profile.image_prompt else None
|
||||||
success = await send_response(this_email.sender, subject, response_body, profile, jpg_path)
|
success = await send_response(this_email.sender, subject, response_body, profile, jpg_path)
|
||||||
if success:
|
if success:
|
||||||
logger.warning(f"Auto-responded to email: {this_email.subject}")
|
warn(f"Auto-responded to email: {this_email.subject}")
|
||||||
await save_processed_uid(log_file, account.name, uid_str)
|
await save_processed_uid(log_file, account.name, uid_str)
|
||||||
else:
|
else:
|
||||||
logger.warning(f"Failed to send auto-response to {this_email.subject}")
|
warn(f"Failed to send auto-response to {this_email.subject}")
|
||||||
else:
|
else:
|
||||||
logger.warning(f"Unable to generate auto-response for {this_email.subject}")
|
warn(f"Unable to generate auto-response for {this_email.subject}")
|
||||||
|
|
||||||
async def generate_response(this_email: IncomingEmail, profile: AutoResponder, account: EmailAccount) -> Optional[str]:
|
async def generate_response(this_email: IncomingEmail, profile: AutoResponder, account: EmailAccount) -> Optional[str]:
|
||||||
logger.info(f"Generating auto-response to {this_email.subject} with profile: {profile.name}")
|
info(f"Generating auto-response to {this_email.subject} with profile: {profile.name}")
|
||||||
|
|
||||||
now = await loc.dt(dt_datetime.now())
|
now = await gis.dt(dt_datetime.now())
|
||||||
then = await loc.dt(this_email.datetime_received)
|
then = await gis.dt(this_email.datetime_received)
|
||||||
age = now - then
|
age = now - then
|
||||||
usr_prompt = f'''
|
usr_prompt = f'''
|
||||||
Generate a personalized auto-response to the following email:
|
Generate a personalized auto-response to the following email:
|
||||||
|
@ -337,7 +336,7 @@ Respond on behalf of {account.fullname}, who is unable to respond personally bec
|
||||||
|
|
||||||
try:
|
try:
|
||||||
response = await llm.query_ollama(usr_prompt, sys_prompt, profile.ollama_model, 400)
|
response = await llm.query_ollama(usr_prompt, sys_prompt, profile.ollama_model, 400)
|
||||||
logger.debug(f"query_ollama response: {response}")
|
debug(f"query_ollama response: {response}")
|
||||||
|
|
||||||
if isinstance(response, dict) and "message" in response and "content" in response["message"]:
|
if isinstance(response, dict) and "message" in response and "content" in response["message"]:
|
||||||
response = response["message"]["content"]
|
response = response["message"]["content"]
|
||||||
|
@ -345,14 +344,14 @@ Respond on behalf of {account.fullname}, who is unable to respond personally bec
|
||||||
return response + "\n\n"
|
return response + "\n\n"
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error(f"Error generating auto-response: {str(e)}")
|
err(f"Error generating auto-response: {str(e)}")
|
||||||
return None
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
async def create_incoming_email(message) -> IncomingEmail:
|
async def create_incoming_email(message) -> IncomingEmail:
|
||||||
recipients = [EmailContact(email=recipient['email'], name=recipient.get('name', '')) for recipient in message.sent_to]
|
recipients = [EmailContact(email=recipient['email'], name=recipient.get('name', '')) for recipient in message.sent_to]
|
||||||
localized_datetime = await loc.dt(message.date)
|
localized_datetime = await gis.dt(message.date)
|
||||||
return IncomingEmail(
|
return IncomingEmail(
|
||||||
sender=message.sent_from[0]['email'],
|
sender=message.sent_from[0]['email'],
|
||||||
datetime_received=localized_datetime,
|
datetime_received=localized_datetime,
|
||||||
|
@ -372,7 +371,6 @@ async def save_processed_uid(filename: Path, account_name: str, uid: str):
|
||||||
async with aiofiles.open(filename, 'a') as f:
|
async with aiofiles.open(filename, 'a') as f:
|
||||||
await f.write(f"{account_name}:{uid}\n")
|
await f.write(f"{account_name}:{uid}\n")
|
||||||
|
|
||||||
|
|
||||||
async def process_all_accounts():
|
async def process_all_accounts():
|
||||||
email_accounts = load_email_accounts(EMAIL_CONFIG)
|
email_accounts = load_email_accounts(EMAIL_CONFIG)
|
||||||
summarization_tasks = [asyncio.create_task(process_account_archival(account)) for account in email_accounts]
|
summarization_tasks = [asyncio.create_task(process_account_archival(account)) for account in email_accounts]
|
||||||
|
|
568
sijapi/routers/gis.py
Normal file
568
sijapi/routers/gis.py
Normal file
|
@ -0,0 +1,568 @@
|
||||||
|
'''
|
||||||
|
Uses Postgres/PostGIS for location tracking (data obtained via the companion mobile Pythonista scripts), and for geocoding purposes.
|
||||||
|
'''
|
||||||
|
from fastapi import APIRouter, HTTPException, Query
|
||||||
|
from fastapi.responses import HTMLResponse, JSONResponse
|
||||||
|
import random
|
||||||
|
from pathlib import Path
|
||||||
|
import traceback
|
||||||
|
from datetime import datetime, timezone
|
||||||
|
from typing import Union, List
|
||||||
|
import folium
|
||||||
|
from folium.plugins import HeatMap, MarkerCluster, Search
|
||||||
|
from folium.plugins import Fullscreen, MiniMap, MousePosition, Geocoder, Draw, MeasureControl
|
||||||
|
from zoneinfo import ZoneInfo
|
||||||
|
from dateutil.parser import parse as dateutil_parse
|
||||||
|
from typing import Optional, List, Union
|
||||||
|
from sijapi import L, DB, TZ, GEO
|
||||||
|
from sijapi.classes import Location
|
||||||
|
from sijapi.utilities import haversine, assemble_journal_path
|
||||||
|
|
||||||
|
gis = APIRouter()
|
||||||
|
logger = L.get_module_logger("gis")
|
||||||
|
def debug(text: str): logger.debug(text)
|
||||||
|
def info(text: str): logger.info(text)
|
||||||
|
def warn(text: str): logger.warning(text)
|
||||||
|
def err(text: str): logger.error(text)
|
||||||
|
def crit(text: str): logger.critical(text)
|
||||||
|
|
||||||
|
async def dt(
|
||||||
|
date_time: Union[str, int, datetime],
|
||||||
|
tz: Union[str, ZoneInfo, None] = None
|
||||||
|
) -> datetime:
|
||||||
|
try:
|
||||||
|
# Convert integer (epoch time) to UTC datetime
|
||||||
|
if isinstance(date_time, int):
|
||||||
|
date_time = datetime.fromtimestamp(date_time, tz=timezone.utc)
|
||||||
|
debug(f"Converted epoch time {date_time} to UTC datetime object.")
|
||||||
|
|
||||||
|
# Convert string to datetime if necessary
|
||||||
|
elif isinstance(date_time, str):
|
||||||
|
date_time = dateutil_parse(date_time)
|
||||||
|
debug(f"Converted string '{date_time}' to datetime object.")
|
||||||
|
|
||||||
|
if not isinstance(date_time, datetime):
|
||||||
|
raise ValueError(f"Input must be a string, integer (epoch time), or datetime object. What we received: {date_time}, type {type(date_time)}")
|
||||||
|
|
||||||
|
# Ensure the datetime is timezone-aware (UTC if not specified)
|
||||||
|
if date_time.tzinfo is None:
|
||||||
|
date_time = date_time.replace(tzinfo=timezone.utc)
|
||||||
|
debug("Added UTC timezone to naive datetime.")
|
||||||
|
|
||||||
|
# Handle provided timezone
|
||||||
|
if tz is not None:
|
||||||
|
if isinstance(tz, str):
|
||||||
|
if tz == "local":
|
||||||
|
last_loc = await get_timezone_without_timezone(date_time)
|
||||||
|
tz = await GEO.tz_at(last_loc.latitude, last_loc.longitude)
|
||||||
|
debug(f"Using local timezone: {tz}")
|
||||||
|
else:
|
||||||
|
try:
|
||||||
|
tz = ZoneInfo(tz)
|
||||||
|
except Exception as e:
|
||||||
|
err(f"Invalid timezone string '{tz}'. Error: {e}")
|
||||||
|
raise ValueError(f"Invalid timezone string: {tz}")
|
||||||
|
elif isinstance(tz, ZoneInfo):
|
||||||
|
pass # tz is already a ZoneInfo object
|
||||||
|
else:
|
||||||
|
raise ValueError(f"What we needed: tz == 'local', a string, or a ZoneInfo object. What we got: tz, a {type(tz)}, == {tz})")
|
||||||
|
|
||||||
|
# Convert to the provided or determined timezone
|
||||||
|
date_time = date_time.astimezone(tz)
|
||||||
|
debug(f"Converted datetime to timezone: {tz}")
|
||||||
|
|
||||||
|
return date_time
|
||||||
|
except ValueError as e:
|
||||||
|
err(f"Error in dt: {e}")
|
||||||
|
raise
|
||||||
|
except Exception as e:
|
||||||
|
err(f"Unexpected error in dt: {e}")
|
||||||
|
raise ValueError(f"Failed to process datetime: {e}")
|
||||||
|
|
||||||
|
|
||||||
|
async def get_timezone_without_timezone(date_time):
|
||||||
|
# This is a bit convoluted because we're trying to solve the paradox of needing to
|
||||||
|
# know the location in order to determine the timezone, but needing the timezone to be
|
||||||
|
# certain we've chosen the correct location for a provided timezone-naive datetime
|
||||||
|
# (relevant, e.g., if this datetime coincided with inter-timezone travel).
|
||||||
|
# Our imperfect solution is to use UTC for an initial location query to determine
|
||||||
|
# roughly where we were at the time, get that timezone, then check the location again
|
||||||
|
# applying that timezone to the provided datetime. If the location changed between the
|
||||||
|
# datetime in UTC and the localized datetime, we'll use the new location's timezone;
|
||||||
|
# otherwise we'll use the timezone we sourced from the UTC timezone query. But at the
|
||||||
|
# end of the day it's entirely possible to spend the end of the day twice in two different
|
||||||
|
# timezones (or none!), so this is a best-effort solution.
|
||||||
|
|
||||||
|
# Step 1: Use UTC as an interim timezone to query location
|
||||||
|
interim_dt = date_time.replace(tzinfo=ZoneInfo("UTC"))
|
||||||
|
interim_loc = await fetch_last_location_before(interim_dt)
|
||||||
|
|
||||||
|
# Step 2: Get a preliminary timezone based on the interim location
|
||||||
|
interim_tz = await GEO.tz_current((interim_loc.latitude, interim_loc.longitude))
|
||||||
|
|
||||||
|
# Step 3: Apply this preliminary timezone and query location again
|
||||||
|
query_dt = date_time.replace(tzinfo=ZoneInfo(interim_tz))
|
||||||
|
query_loc = await fetch_last_location_before(query_dt)
|
||||||
|
|
||||||
|
# Step 4: Get the final timezone, reusing interim_tz if location hasn't changed
|
||||||
|
return interim_tz if query_loc == interim_loc else await GEO.tz_current(query_loc.latitude, query_loc.longitude)
|
||||||
|
|
||||||
|
|
||||||
|
async def get_last_location() -> Optional[Location]:
|
||||||
|
query_datetime = datetime.now(TZ)
|
||||||
|
debug(f"Query_datetime: {query_datetime}")
|
||||||
|
|
||||||
|
this_location = await fetch_last_location_before(query_datetime)
|
||||||
|
|
||||||
|
if this_location:
|
||||||
|
debug(f"location: {this_location}")
|
||||||
|
return this_location
|
||||||
|
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
async def fetch_locations(start: Union[str, int, datetime], end: Union[str, int, datetime, None] = None) -> List[Location]:
|
||||||
|
start_datetime = await dt(start)
|
||||||
|
if end is None:
|
||||||
|
end_datetime = await dt(start_datetime.replace(hour=23, minute=59, second=59))
|
||||||
|
else:
|
||||||
|
end_datetime = await dt(end) if not isinstance(end, datetime) else end
|
||||||
|
|
||||||
|
if start_datetime.time() == datetime.min.time() and end_datetime.time() == datetime.min.time():
|
||||||
|
end_datetime = await dt(end_datetime.replace(hour=23, minute=59, second=59))
|
||||||
|
|
||||||
|
debug(f"Fetching locations between {start_datetime} and {end_datetime}")
|
||||||
|
|
||||||
|
async with DB.get_connection() as conn:
|
||||||
|
locations = []
|
||||||
|
# Check for records within the specified datetime range
|
||||||
|
range_locations = await conn.fetch('''
|
||||||
|
SELECT id, datetime,
|
||||||
|
ST_X(ST_AsText(location)::geometry) AS longitude,
|
||||||
|
ST_Y(ST_AsText(location)::geometry) AS latitude,
|
||||||
|
ST_Z(ST_AsText(location)::geometry) AS elevation,
|
||||||
|
city, state, zip, street,
|
||||||
|
action, device_type, device_model, device_name, device_os
|
||||||
|
FROM locations
|
||||||
|
WHERE datetime >= $1 AND datetime <= $2
|
||||||
|
ORDER BY datetime DESC
|
||||||
|
''', start_datetime.replace(tzinfo=None), end_datetime.replace(tzinfo=None))
|
||||||
|
|
||||||
|
debug(f"Range locations query returned: {range_locations}")
|
||||||
|
locations.extend(range_locations)
|
||||||
|
|
||||||
|
if not locations and (end is None or start_datetime.date() == end_datetime.date()):
|
||||||
|
location_data = await conn.fetchrow('''
|
||||||
|
SELECT id, datetime,
|
||||||
|
ST_X(ST_AsText(location)::geometry) AS longitude,
|
||||||
|
ST_Y(ST_AsText(location)::geometry) AS latitude,
|
||||||
|
ST_Z(ST_AsText(location)::geometry) AS elevation,
|
||||||
|
city, state, zip, street,
|
||||||
|
action, device_type, device_model, device_name, device_os
|
||||||
|
FROM locations
|
||||||
|
WHERE datetime < $1
|
||||||
|
ORDER BY datetime DESC
|
||||||
|
LIMIT 1
|
||||||
|
''', start_datetime.replace(tzinfo=None))
|
||||||
|
|
||||||
|
debug(f"Fallback query returned: {location_data}")
|
||||||
|
if location_data:
|
||||||
|
locations.append(location_data)
|
||||||
|
|
||||||
|
debug(f"Locations found: {locations}")
|
||||||
|
|
||||||
|
# Sort location_data based on the datetime field in descending order
|
||||||
|
sorted_locations = sorted(locations, key=lambda x: x['datetime'], reverse=True)
|
||||||
|
|
||||||
|
# Create Location objects directly from the location data
|
||||||
|
location_objects = [
|
||||||
|
Location(
|
||||||
|
latitude=location['latitude'],
|
||||||
|
longitude=location['longitude'],
|
||||||
|
datetime=location['datetime'],
|
||||||
|
elevation=location.get('elevation'),
|
||||||
|
city=location.get('city'),
|
||||||
|
state=location.get('state'),
|
||||||
|
zip=location.get('zip'),
|
||||||
|
street=location.get('street'),
|
||||||
|
context={
|
||||||
|
'action': location.get('action'),
|
||||||
|
'device_type': location.get('device_type'),
|
||||||
|
'device_model': location.get('device_model'),
|
||||||
|
'device_name': location.get('device_name'),
|
||||||
|
'device_os': location.get('device_os')
|
||||||
|
}
|
||||||
|
) for location in sorted_locations if location['latitude'] is not None and location['longitude'] is not None
|
||||||
|
]
|
||||||
|
|
||||||
|
return location_objects if location_objects else []
|
||||||
|
|
||||||
|
# Function to fetch the last location before the specified datetime
|
||||||
|
async def fetch_last_location_before(datetime: datetime) -> Optional[Location]:
|
||||||
|
datetime = await dt(datetime)
|
||||||
|
|
||||||
|
debug(f"Fetching last location before {datetime}")
|
||||||
|
|
||||||
|
async with DB.get_connection() as conn:
|
||||||
|
|
||||||
|
location_data = await conn.fetchrow('''
|
||||||
|
SELECT id, datetime,
|
||||||
|
ST_X(ST_AsText(location)::geometry) AS longitude,
|
||||||
|
ST_Y(ST_AsText(location)::geometry) AS latitude,
|
||||||
|
ST_Z(ST_AsText(location)::geometry) AS elevation,
|
||||||
|
city, state, zip, street, country,
|
||||||
|
action
|
||||||
|
FROM locations
|
||||||
|
WHERE datetime < $1
|
||||||
|
ORDER BY datetime DESC
|
||||||
|
LIMIT 1
|
||||||
|
''', datetime.replace(tzinfo=None))
|
||||||
|
|
||||||
|
await conn.close()
|
||||||
|
|
||||||
|
if location_data:
|
||||||
|
debug(f"Last location found: {location_data}")
|
||||||
|
return Location(**location_data)
|
||||||
|
else:
|
||||||
|
debug("No location found before the specified datetime")
|
||||||
|
return None
|
||||||
|
|
||||||
|
@gis.get("/map", response_class=HTMLResponse)
|
||||||
|
async def generate_map_endpoint(
|
||||||
|
start_date: Optional[str] = Query(None),
|
||||||
|
end_date: Optional[str] = Query(None),
|
||||||
|
max_points: int = Query(32767, description="Maximum number of points to display")
|
||||||
|
):
|
||||||
|
try:
|
||||||
|
if start_date and end_date:
|
||||||
|
start_date = await dt(start_date)
|
||||||
|
end_date = await dt(end_date)
|
||||||
|
else:
|
||||||
|
start_date, end_date = await get_date_range()
|
||||||
|
except ValueError:
|
||||||
|
raise HTTPException(status_code=400, detail="Invalid date format")
|
||||||
|
|
||||||
|
info(f"Generating map for {start_date} to {end_date}")
|
||||||
|
html_content = await generate_map(start_date, end_date, max_points)
|
||||||
|
return HTMLResponse(content=html_content)
|
||||||
|
|
||||||
|
async def get_date_range():
|
||||||
|
async with DB.get_connection() as conn:
|
||||||
|
query = "SELECT MIN(datetime) as min_date, MAX(datetime) as max_date FROM locations"
|
||||||
|
row = await conn.fetchrow(query)
|
||||||
|
if row and row['min_date'] and row['max_date']:
|
||||||
|
return row['min_date'], row['max_date']
|
||||||
|
else:
|
||||||
|
return datetime(2022, 1, 1), datetime.now()
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
async def generate_and_save_heatmap(
|
||||||
|
start_date: Union[str, int, datetime],
|
||||||
|
end_date: Optional[Union[str, int, datetime]] = None,
|
||||||
|
output_path: Optional[Path] = None
|
||||||
|
) -> Path:
|
||||||
|
"""
|
||||||
|
Generate a heatmap for the given date range and save it as a PNG file using Folium.
|
||||||
|
|
||||||
|
:param start_date: The start date for the map (or the only date if end_date is not provided)
|
||||||
|
:param end_date: The end date for the map (optional)
|
||||||
|
:param output_path: The path to save the PNG file (optional)
|
||||||
|
:return: The path where the PNG file was saved
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
start_date = await dt(start_date)
|
||||||
|
if end_date:
|
||||||
|
end_date = await dt(end_date)
|
||||||
|
else:
|
||||||
|
end_date = start_date.replace(hour=23, minute=59, second=59)
|
||||||
|
|
||||||
|
# Fetch locations
|
||||||
|
locations = await fetch_locations(start_date, end_date)
|
||||||
|
if not locations:
|
||||||
|
raise ValueError("No locations found for the given date range")
|
||||||
|
|
||||||
|
# Create map
|
||||||
|
m = folium.Map()
|
||||||
|
|
||||||
|
# Prepare heatmap data
|
||||||
|
heat_data = [[loc.latitude, loc.longitude] for loc in locations]
|
||||||
|
|
||||||
|
# Add heatmap layer
|
||||||
|
HeatMap(heat_data).add_to(m)
|
||||||
|
|
||||||
|
# Fit the map to the bounds of all locations
|
||||||
|
bounds = [
|
||||||
|
[min(loc.latitude for loc in locations), min(loc.longitude for loc in locations)],
|
||||||
|
[max(loc.latitude for loc in locations), max(loc.longitude for loc in locations)]
|
||||||
|
]
|
||||||
|
m.fit_bounds(bounds)
|
||||||
|
|
||||||
|
# Generate output path if not provided
|
||||||
|
if output_path is None:
|
||||||
|
output_path, relative_path = assemble_journal_path(end_date, filename="map", extension=".png", no_timestamp=True)
|
||||||
|
|
||||||
|
# Save the map as PNG
|
||||||
|
m.save(str(output_path))
|
||||||
|
|
||||||
|
info(f"Heatmap saved as PNG: {output_path}")
|
||||||
|
return output_path
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
err(f"Error generating and saving heatmap: {str(e)}")
|
||||||
|
raise
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
async def generate_map(start_date: datetime, end_date: datetime, max_points: int):
|
||||||
|
locations = await fetch_locations(start_date, end_date)
|
||||||
|
if not locations:
|
||||||
|
raise HTTPException(status_code=404, detail="No locations found for the given date range")
|
||||||
|
|
||||||
|
info(f"Found {len(locations)} locations for the given date range")
|
||||||
|
|
||||||
|
if len(locations) > max_points:
|
||||||
|
locations = random.sample(locations, max_points)
|
||||||
|
|
||||||
|
map_center = [sum(loc.latitude for loc in locations) / len(locations),
|
||||||
|
sum(loc.longitude for loc in locations) / len(locations)]
|
||||||
|
m = folium.Map(location=map_center, zoom_start=5)
|
||||||
|
|
||||||
|
folium.TileLayer('openstreetmap', name='OpenStreetMap').add_to(m)
|
||||||
|
folium.TileLayer(
|
||||||
|
tiles='https://basemap.nationalmap.gov/arcgis/rest/services/USGSTopo/MapServer/tile/{z}/{y}/{x}',
|
||||||
|
attr='USGS The National Map',
|
||||||
|
name='USGS Topo'
|
||||||
|
).add_to(m)
|
||||||
|
folium.TileLayer(
|
||||||
|
tiles='https://server.arcgisonline.com/ArcGIS/rest/services/World_Topo_Map/MapServer/tile/{z}/{y}/{x}',
|
||||||
|
attr='Esri',
|
||||||
|
name='Esri World Topo'
|
||||||
|
).add_to(m)
|
||||||
|
|
||||||
|
folium.TileLayer('cartodbdark_matter', name='Dark Mode').add_to(m)
|
||||||
|
|
||||||
|
|
||||||
|
# In the generate_map function:
|
||||||
|
draw = Draw(
|
||||||
|
draw_options={
|
||||||
|
'polygon': True,
|
||||||
|
'rectangle': True,
|
||||||
|
'circle': True,
|
||||||
|
'marker': True,
|
||||||
|
'circlemarker': False,
|
||||||
|
},
|
||||||
|
edit_options={'edit': False}
|
||||||
|
)
|
||||||
|
draw.add_to(m)
|
||||||
|
|
||||||
|
MeasureControl(
|
||||||
|
position='topright',
|
||||||
|
primary_length_unit='kilometers',
|
||||||
|
secondary_length_unit='miles',
|
||||||
|
primary_area_unit='sqmeters',
|
||||||
|
secondary_area_unit='acres'
|
||||||
|
).add_to(m)
|
||||||
|
|
||||||
|
m.get_root().html.add_child(folium.Element("""
|
||||||
|
<script>
|
||||||
|
var drawnItems = new L.FeatureGroup();
|
||||||
|
map.addLayer(drawnItems);
|
||||||
|
map.on(L.Draw.Event.CREATED, function (event) {
|
||||||
|
var layer = event.layer;
|
||||||
|
drawnItems.addLayer(layer);
|
||||||
|
var shape = layer.toGeoJSON();
|
||||||
|
var points = [];
|
||||||
|
markerCluster.eachLayer(function (marker) {
|
||||||
|
if (turf.booleanPointInPolygon(marker.toGeoJSON(), shape)) {
|
||||||
|
points.push(marker.getLatLng());
|
||||||
|
}
|
||||||
|
});
|
||||||
|
if (points.length > 0) {
|
||||||
|
alert('Selected ' + points.length + ' points');
|
||||||
|
console.log(points);
|
||||||
|
}
|
||||||
|
});
|
||||||
|
</script>
|
||||||
|
"""))
|
||||||
|
|
||||||
|
# Add marker cluster
|
||||||
|
marker_cluster = MarkerCluster(name="Markers").add_to(m)
|
||||||
|
|
||||||
|
# Prepare data for heatmap
|
||||||
|
heat_data = [[loc.latitude, loc.longitude] for loc in locations]
|
||||||
|
|
||||||
|
# Add heatmap
|
||||||
|
HeatMap(heat_data, name="Heatmap").add_to(m)
|
||||||
|
|
||||||
|
# Add markers to cluster
|
||||||
|
for location in locations:
|
||||||
|
popup_content = f"""
|
||||||
|
{location.city}, {location.state}<br>
|
||||||
|
Elevation: {location.elevation}m<br>
|
||||||
|
Date: {location.datetime}<br>
|
||||||
|
Action: {location.context.get('action', 'N/A')}<br>
|
||||||
|
Device: {location.context.get('device_name', 'N/A')} ({location.context.get('device_model', 'N/A')})
|
||||||
|
"""
|
||||||
|
folium.Marker(
|
||||||
|
location=[location.latitude, location.longitude],
|
||||||
|
popup=popup_content,
|
||||||
|
tooltip=f"{location.city}, {location.state}"
|
||||||
|
).add_to(marker_cluster)
|
||||||
|
|
||||||
|
# Add controls
|
||||||
|
Fullscreen().add_to(m)
|
||||||
|
MiniMap().add_to(m)
|
||||||
|
MousePosition().add_to(m)
|
||||||
|
Geocoder().add_to(m)
|
||||||
|
Draw().add_to(m)
|
||||||
|
|
||||||
|
# Add search functionality
|
||||||
|
Search(
|
||||||
|
layer=marker_cluster,
|
||||||
|
geom_type='Point',
|
||||||
|
placeholder='Search for a location',
|
||||||
|
collapsed=False,
|
||||||
|
search_label='city'
|
||||||
|
).add_to(m)
|
||||||
|
|
||||||
|
# Add layer control
|
||||||
|
folium.LayerControl().add_to(m)
|
||||||
|
|
||||||
|
return m.get_root().render()
|
||||||
|
|
||||||
|
async def post_location(location: Location):
|
||||||
|
# if not location.datetime:
|
||||||
|
# info(f"location appears to be missing datetime: {location}")
|
||||||
|
# else:
|
||||||
|
# debug(f"post_location called with {location.datetime}")
|
||||||
|
async with DB.get_connection() as conn:
|
||||||
|
try:
|
||||||
|
context = location.context or {}
|
||||||
|
action = context.get('action', 'manual')
|
||||||
|
device_type = context.get('device_type', 'Unknown')
|
||||||
|
device_model = context.get('device_model', 'Unknown')
|
||||||
|
device_name = context.get('device_name', 'Unknown')
|
||||||
|
device_os = context.get('device_os', 'Unknown')
|
||||||
|
|
||||||
|
# Parse and localize the datetime
|
||||||
|
localized_datetime = await dt(location.datetime)
|
||||||
|
|
||||||
|
await conn.execute('''
|
||||||
|
INSERT INTO locations (
|
||||||
|
datetime, location, city, state, zip, street, action, device_type, device_model, device_name, device_os,
|
||||||
|
class_, type, name, display_name, amenity, house_number, road, quarter, neighbourhood,
|
||||||
|
suburb, county, country_code, country
|
||||||
|
)
|
||||||
|
VALUES ($1, ST_SetSRID(ST_MakePoint($2, $3, $4), 4326), $5, $6, $7, $8, $9, $10, $11, $12, $13, $14, $15,
|
||||||
|
$16, $17, $18, $19, $20, $21, $22, $23, $24, $25, $26)
|
||||||
|
''', localized_datetime, location.longitude, location.latitude, location.elevation, location.city, location.state,
|
||||||
|
location.zip, location.street, action, device_type, device_model, device_name, device_os,
|
||||||
|
location.class_, location.type, location.name, location.display_name,
|
||||||
|
location.amenity, location.house_number, location.road, location.quarter, location.neighbourhood,
|
||||||
|
location.suburb, location.county, location.country_code, location.country)
|
||||||
|
|
||||||
|
await conn.close()
|
||||||
|
info(f"Successfully posted location: {location.latitude}, {location.longitude}, {location.elevation} on {localized_datetime}")
|
||||||
|
return {
|
||||||
|
'datetime': localized_datetime,
|
||||||
|
'latitude': location.latitude,
|
||||||
|
'longitude': location.longitude,
|
||||||
|
'elevation': location.elevation,
|
||||||
|
'city': location.city,
|
||||||
|
'state': location.state,
|
||||||
|
'zip': location.zip,
|
||||||
|
'street': location.street,
|
||||||
|
'action': action,
|
||||||
|
'device_type': device_type,
|
||||||
|
'device_model': device_model,
|
||||||
|
'device_name': device_name,
|
||||||
|
'device_os': device_os,
|
||||||
|
'class_': location.class_,
|
||||||
|
'type': location.type,
|
||||||
|
'name': location.name,
|
||||||
|
'display_name': location.display_name,
|
||||||
|
'amenity': location.amenity,
|
||||||
|
'house_number': location.house_number,
|
||||||
|
'road': location.road,
|
||||||
|
'quarter': location.quarter,
|
||||||
|
'neighbourhood': location.neighbourhood,
|
||||||
|
'suburb': location.suburb,
|
||||||
|
'county': location.county,
|
||||||
|
'country_code': location.country_code,
|
||||||
|
'country': location.country
|
||||||
|
}
|
||||||
|
except Exception as e:
|
||||||
|
err(f"Error posting location {e}")
|
||||||
|
err(traceback.format_exc())
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
@gis.post("/locate")
|
||||||
|
async def post_locate_endpoint(locations: Union[Location, List[Location]]):
|
||||||
|
if isinstance(locations, Location):
|
||||||
|
locations = [locations]
|
||||||
|
|
||||||
|
# Prepare locations
|
||||||
|
for lcn in locations:
|
||||||
|
if not lcn.datetime:
|
||||||
|
tz = await GEO.tz_at(lcn.latitude, lcn.longitude)
|
||||||
|
lcn.datetime = datetime.now(ZoneInfo(tz)).isoformat()
|
||||||
|
|
||||||
|
if not lcn.context:
|
||||||
|
lcn.context = {
|
||||||
|
"action": "missing",
|
||||||
|
"device_type": "API",
|
||||||
|
"device_model": "Unknown",
|
||||||
|
"device_name": "Unknown",
|
||||||
|
"device_os": "Unknown"
|
||||||
|
}
|
||||||
|
debug(f"Location received for processing: {lcn}")
|
||||||
|
|
||||||
|
geocoded_locations = await GEO.code(locations)
|
||||||
|
|
||||||
|
responses = []
|
||||||
|
if isinstance(geocoded_locations, List):
|
||||||
|
for location in geocoded_locations:
|
||||||
|
debug(f"Final location to be submitted to database: {location}")
|
||||||
|
location_entry = await post_location(location)
|
||||||
|
if location_entry:
|
||||||
|
responses.append({"location_data": location_entry})
|
||||||
|
else:
|
||||||
|
warn(f"Posting location to database appears to have failed.")
|
||||||
|
else:
|
||||||
|
debug(f"Final location to be submitted to database: {geocoded_locations}")
|
||||||
|
location_entry = await post_location(geocoded_locations)
|
||||||
|
if location_entry:
|
||||||
|
responses.append({"location_data": location_entry})
|
||||||
|
else:
|
||||||
|
warn(f"Posting location to database appears to have failed.")
|
||||||
|
|
||||||
|
return {"message": "Locations and weather updated", "results": responses}
|
||||||
|
|
||||||
|
|
||||||
|
@gis.get("/locate", response_model=Location)
|
||||||
|
async def get_last_location_endpoint() -> JSONResponse:
|
||||||
|
this_location = await get_last_location()
|
||||||
|
|
||||||
|
if this_location:
|
||||||
|
location_dict = this_location.model_dump()
|
||||||
|
location_dict["datetime"] = this_location.datetime.isoformat()
|
||||||
|
return JSONResponse(content=location_dict)
|
||||||
|
else:
|
||||||
|
raise HTTPException(status_code=404, detail="No location found before the specified datetime")
|
||||||
|
|
||||||
|
@gis.get("/locate/{datetime_str}", response_model=List[Location])
|
||||||
|
async def get_locate(datetime_str: str, all: bool = False):
|
||||||
|
try:
|
||||||
|
date_time = await dt(datetime_str)
|
||||||
|
except ValueError as e:
|
||||||
|
err(f"Invalid datetime string provided: {datetime_str}")
|
||||||
|
return ["ERROR: INVALID DATETIME PROVIDED. USE YYYYMMDDHHmmss or YYYYMMDD format."]
|
||||||
|
|
||||||
|
locations = await fetch_locations(date_time)
|
||||||
|
if not locations:
|
||||||
|
raise HTTPException(status_code=404, detail="No nearby data found for this date and time")
|
||||||
|
|
||||||
|
return locations if all else [locations[0]]
|
|
@ -12,6 +12,11 @@ from sijapi import L, API, TS_ID, SUBNET_BROADCAST
|
||||||
|
|
||||||
health = APIRouter(tags=["public", "trusted", "private"])
|
health = APIRouter(tags=["public", "trusted", "private"])
|
||||||
logger = L.get_module_logger("health")
|
logger = L.get_module_logger("health")
|
||||||
|
def debug(text: str): logger.debug(text)
|
||||||
|
def info(text: str): logger.info(text)
|
||||||
|
def warn(text: str): logger.warning(text)
|
||||||
|
def err(text: str): logger.error(text)
|
||||||
|
def crit(text: str): logger.critical(text)
|
||||||
|
|
||||||
@health.get("/health")
|
@health.get("/health")
|
||||||
def get_health():
|
def get_health():
|
||||||
|
@ -49,7 +54,7 @@ async def get_wan_ip():
|
||||||
wan_info = response.json()
|
wan_info = response.json()
|
||||||
return wan_info.get('ip', 'Unavailable')
|
return wan_info.get('ip', 'Unavailable')
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error(f"Error fetching WAN IP: {e}")
|
err(f"Error fetching WAN IP: {e}")
|
||||||
return "Unavailable"
|
return "Unavailable"
|
||||||
|
|
||||||
@health.get("/ts_ip")
|
@health.get("/ts_ip")
|
||||||
|
|
|
@ -42,6 +42,11 @@ import base64
|
||||||
|
|
||||||
ig = APIRouter()
|
ig = APIRouter()
|
||||||
logger = L.get_module_logger("ig")
|
logger = L.get_module_logger("ig")
|
||||||
|
def debug(text: str): logger.debug(text)
|
||||||
|
def info(text: str): logger.info(text)
|
||||||
|
def warn(text: str): logger.warning(text)
|
||||||
|
def err(text: str): logger.error(text)
|
||||||
|
def crit(text: str): logger.critical(text)
|
||||||
|
|
||||||
class IG_Request(BaseModel):
|
class IG_Request(BaseModel):
|
||||||
file: Optional[UploadFile] = None # upload a particular file to Instagram
|
file: Optional[UploadFile] = None # upload a particular file to Instagram
|
||||||
|
|
|
@ -34,6 +34,12 @@ from sijapi import API, L, COMFYUI_URL, COMFYUI_OUTPUT_DIR, IMG_CONFIG_PATH, IMG
|
||||||
|
|
||||||
img = APIRouter()
|
img = APIRouter()
|
||||||
logger = L.get_module_logger("img")
|
logger = L.get_module_logger("img")
|
||||||
|
def debug(text: str): logger.debug(text)
|
||||||
|
def info(text: str): logger.info(text)
|
||||||
|
def warn(text: str): logger.warning(text)
|
||||||
|
def err(text: str): logger.error(text)
|
||||||
|
def crit(text: str): logger.critical(text)
|
||||||
|
|
||||||
CLIENT_ID = str(uuid.uuid4())
|
CLIENT_ID = str(uuid.uuid4())
|
||||||
|
|
||||||
@img.post("/img")
|
@img.post("/img")
|
||||||
|
@ -79,12 +85,12 @@ async def workflow(prompt: str, scene: str = None, size: str = None, earlyout: s
|
||||||
|
|
||||||
scene_workflow = random.choice(scene_data['workflows'])
|
scene_workflow = random.choice(scene_data['workflows'])
|
||||||
if size:
|
if size:
|
||||||
logger.debug(f"Specified size: {size}")
|
debug(f"Specified size: {size}")
|
||||||
|
|
||||||
size = size if size else scene_workflow.get('size', '1024x1024')
|
size = size if size else scene_workflow.get('size', '1024x1024')
|
||||||
|
|
||||||
width, height = map(int, size.split('x'))
|
width, height = map(int, size.split('x'))
|
||||||
logger.debug(f"Parsed width: {width}; parsed height: {height}")
|
debug(f"Parsed width: {width}; parsed height: {height}")
|
||||||
|
|
||||||
workflow_path = Path(IMG_WORKFLOWS_DIR) / scene_workflow['workflow']
|
workflow_path = Path(IMG_WORKFLOWS_DIR) / scene_workflow['workflow']
|
||||||
workflow_data = json.loads(workflow_path.read_text())
|
workflow_data = json.loads(workflow_path.read_text())
|
||||||
|
@ -98,22 +104,22 @@ async def workflow(prompt: str, scene: str = None, size: str = None, earlyout: s
|
||||||
}
|
}
|
||||||
|
|
||||||
saved_file_key = await update_prompt_and_get_key(workflow=workflow_data, post=post, positive=image_concept)
|
saved_file_key = await update_prompt_and_get_key(workflow=workflow_data, post=post, positive=image_concept)
|
||||||
print(f"Saved file key: {saved_file_key}")
|
info(f"Saved file key: {saved_file_key}")
|
||||||
|
|
||||||
prompt_id = await queue_prompt(workflow_data)
|
prompt_id = await queue_prompt(workflow_data)
|
||||||
print(f"Prompt ID: {prompt_id}")
|
info(f"Prompt ID: {prompt_id}")
|
||||||
|
|
||||||
max_size = max(width, height) if downscale_to_fit else None
|
max_size = max(width, height) if downscale_to_fit else None
|
||||||
destination_path = Path(destination_path).with_suffix(".jpg") if destination_path else IMG_DIR / f"{prompt_id}.jpg"
|
destination_path = Path(destination_path).with_suffix(".jpg") if destination_path else IMG_DIR / f"{prompt_id}.jpg"
|
||||||
|
|
||||||
if earlyout:
|
if earlyout:
|
||||||
asyncio.create_task(generate_and_save_image(prompt_id, saved_file_key, max_size, destination_path))
|
asyncio.create_task(generate_and_save_image(prompt_id, saved_file_key, max_size, destination_path))
|
||||||
logger.debug(f"Returning {destination_path}")
|
debug(f"Returning {destination_path}")
|
||||||
return destination_path
|
return destination_path
|
||||||
|
|
||||||
else:
|
else:
|
||||||
await generate_and_save_image(prompt_id, saved_file_key, max_size, destination_path)
|
await generate_and_save_image(prompt_id, saved_file_key, max_size, destination_path)
|
||||||
logger.debug(f"Returning {destination_path}")
|
debug(f"Returning {destination_path}")
|
||||||
return destination_path
|
return destination_path
|
||||||
|
|
||||||
|
|
||||||
|
@ -124,10 +130,10 @@ async def generate_and_save_image(prompt_id, saved_file_key, max_size, destinati
|
||||||
jpg_file_path = await save_as_jpg(image_data, prompt_id, quality=90, max_size=max_size, destination_path=destination_path)
|
jpg_file_path = await save_as_jpg(image_data, prompt_id, quality=90, max_size=max_size, destination_path=destination_path)
|
||||||
|
|
||||||
if Path(jpg_file_path) != Path(destination_path):
|
if Path(jpg_file_path) != Path(destination_path):
|
||||||
logger.error(f"Mismatch between jpg_file_path, {jpg_file_path}, and detination_path, {destination_path}")
|
err(f"Mismatch between jpg_file_path, {jpg_file_path}, and detination_path, {destination_path}")
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
print(f"Error in generate_and_save_image: {e}")
|
err(f"Error in generate_and_save_image: {e}")
|
||||||
return None
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
@ -149,7 +155,7 @@ async def poll_status(prompt_id):
|
||||||
status_data = await response.json()
|
status_data = await response.json()
|
||||||
job_data = status_data.get(prompt_id, {})
|
job_data = status_data.get(prompt_id, {})
|
||||||
if job_data.get("status", {}).get("completed", False):
|
if job_data.get("status", {}).get("completed", False):
|
||||||
print(f"{prompt_id} completed in {elapsed_time} seconds.")
|
info(f"{prompt_id} completed in {elapsed_time} seconds.")
|
||||||
return job_data
|
return job_data
|
||||||
await asyncio.sleep(1)
|
await asyncio.sleep(1)
|
||||||
|
|
||||||
|
@ -200,7 +206,7 @@ async def save_as_jpg(image_data, prompt_id, max_size = None, quality = 100, des
|
||||||
return str(destination_path_jpg)
|
return str(destination_path_jpg)
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
print(f"Error processing image: {e}")
|
err(f"Error processing image: {e}")
|
||||||
return None
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
@ -216,11 +222,11 @@ def set_presets(workflow_data, preset_values):
|
||||||
if 'inputs' in workflow_data.get(preset_node, {}):
|
if 'inputs' in workflow_data.get(preset_node, {}):
|
||||||
workflow_data[preset_node]['inputs'][preset_key] = preset_value
|
workflow_data[preset_node]['inputs'][preset_key] = preset_value
|
||||||
else:
|
else:
|
||||||
logger.debug("Node not found in workflow_data")
|
debug("Node not found in workflow_data")
|
||||||
else:
|
else:
|
||||||
logger.debug("Required data missing in preset_values")
|
debug("Required data missing in preset_values")
|
||||||
else:
|
else:
|
||||||
logger.debug("No preset_values found")
|
debug("No preset_values found")
|
||||||
|
|
||||||
|
|
||||||
def get_return_path(destination_path):
|
def get_return_path(destination_path):
|
||||||
|
@ -235,7 +241,7 @@ def get_scene(scene):
|
||||||
IMG_CONFIG = yaml.safe_load(IMG_CONFIG_file)
|
IMG_CONFIG = yaml.safe_load(IMG_CONFIG_file)
|
||||||
for scene_data in IMG_CONFIG['scenes']:
|
for scene_data in IMG_CONFIG['scenes']:
|
||||||
if scene_data['scene'] == scene:
|
if scene_data['scene'] == scene:
|
||||||
logger.debug(f"Found scene for \"{scene}\".")
|
debug(f"Found scene for \"{scene}\".")
|
||||||
return scene_data
|
return scene_data
|
||||||
return None
|
return None
|
||||||
|
|
||||||
|
@ -254,11 +260,11 @@ def get_matching_scene(prompt):
|
||||||
max_count = count
|
max_count = count
|
||||||
scene_data = sc
|
scene_data = sc
|
||||||
if scene_data:
|
if scene_data:
|
||||||
logger.debug(f"Found better-matching scene: the prompt contains {max_count} words that match triggers for {scene_data.get('name')}!")
|
debug(f"Found better-matching scene: the prompt contains {max_count} words that match triggers for {scene_data.get('name')}!")
|
||||||
if scene_data:
|
if scene_data:
|
||||||
return scene_data
|
return scene_data
|
||||||
else:
|
else:
|
||||||
logger.debug(f"No matching scenes found, falling back to default scene.")
|
debug(f"No matching scenes found, falling back to default scene.")
|
||||||
return IMG_CONFIG['scenes'][0]
|
return IMG_CONFIG['scenes'][0]
|
||||||
|
|
||||||
|
|
||||||
|
@ -282,11 +288,11 @@ async def ensure_comfy(retries: int = 4, timeout: float = 6.0):
|
||||||
for attempt in range(retries):
|
for attempt in range(retries):
|
||||||
try:
|
try:
|
||||||
with socket.create_connection(("127.0.0.1", 8188), timeout=2):
|
with socket.create_connection(("127.0.0.1", 8188), timeout=2):
|
||||||
print("ComfyUI is already running.")
|
info("ComfyUI is already running.")
|
||||||
return
|
return
|
||||||
except (socket.timeout, ConnectionRefusedError):
|
except (socket.timeout, ConnectionRefusedError):
|
||||||
if attempt == 0: # Only try to start ComfyUI on the first failed attempt
|
if attempt == 0: # Only try to start ComfyUI on the first failed attempt
|
||||||
print("ComfyUI is not running. Starting it now...")
|
warn("ComfyUI is not running. Starting it now...")
|
||||||
try:
|
try:
|
||||||
tmux_command = (
|
tmux_command = (
|
||||||
"tmux split-window -h "
|
"tmux split-window -h "
|
||||||
|
@ -295,13 +301,14 @@ async def ensure_comfy(retries: int = 4, timeout: float = 6.0):
|
||||||
"python main.py; exec $SHELL\""
|
"python main.py; exec $SHELL\""
|
||||||
)
|
)
|
||||||
subprocess.Popen(tmux_command, shell=True)
|
subprocess.Popen(tmux_command, shell=True)
|
||||||
print("ComfyUI started in a new tmux session.")
|
info("ComfyUI started in a new tmux session.")
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
raise RuntimeError(f"Error starting ComfyUI: {e}")
|
raise RuntimeError(f"Error starting ComfyUI: {e}")
|
||||||
|
|
||||||
print(f"Attempt {attempt + 1}/{retries} failed. Waiting {timeout} seconds before retrying...")
|
warn(f"Attempt {attempt + 1}/{retries} failed. Waiting {timeout} seconds before retrying...")
|
||||||
await asyncio.sleep(timeout)
|
await asyncio.sleep(timeout)
|
||||||
|
|
||||||
|
crit(f"Failed to ensure ComfyUI is running after {retries} attempts with {timeout} second intervals.")
|
||||||
raise RuntimeError(f"Failed to ensure ComfyUI is running after {retries} attempts with {timeout} second intervals.")
|
raise RuntimeError(f"Failed to ensure ComfyUI is running after {retries} attempts with {timeout} second intervals.")
|
||||||
|
|
||||||
# async def upload_and_get_shareable_link(image_path):
|
# async def upload_and_get_shareable_link(image_path):
|
||||||
|
@ -326,10 +333,10 @@ async def ensure_comfy(retries: int = 4, timeout: float = 6.0):
|
||||||
# shareable_link = f"https://{PHOTOPRISM_URL}/p/{photo_uuid}"
|
# shareable_link = f"https://{PHOTOPRISM_URL}/p/{photo_uuid}"
|
||||||
# return shareable_link
|
# return shareable_link
|
||||||
# else:
|
# else:
|
||||||
# logger.error("Could not find the uploaded photo details.")
|
# err("Could not find the uploaded photo details.")
|
||||||
# return None
|
# return None
|
||||||
# except Exception as e:
|
# except Exception as e:
|
||||||
# logger.error(f"Error in upload_and_get_shareable_link: {e}")
|
# err(f"Error in upload_and_get_shareable_link: {e}")
|
||||||
# return None
|
# return None
|
||||||
|
|
||||||
|
|
||||||
|
@ -405,7 +412,7 @@ async def load_workflow(workflow_path: str, workflow:str):
|
||||||
return json.load(file)
|
return json.load(file)
|
||||||
|
|
||||||
|
|
||||||
async def update_prompt_and_get_key(workf0ow: dict, post: dict, positive: str):
|
async def update_prompt_and_get_key(workflow: dict, post: dict, positive: str):
|
||||||
'''
|
'''
|
||||||
Recurses through the workflow searching for and substituting the dynamic values for API_PrePrompt, API_StylePrompt, API_NegativePrompt, width, height, and seed (random integer).
|
Recurses through the workflow searching for and substituting the dynamic values for API_PrePrompt, API_StylePrompt, API_NegativePrompt, width, height, and seed (random integer).
|
||||||
Even more important, it finds and returns the key to the filepath where the file is saved, which we need to decipher status when generation is complete.
|
Even more important, it finds and returns the key to the filepath where the file is saved, which we need to decipher status when generation is complete.
|
||||||
|
@ -436,13 +443,13 @@ Even more important, it finds and returns the key to the filepath where the file
|
||||||
workflow[key] = random.randint(1000000000000, 9999999999999)
|
workflow[key] = random.randint(1000000000000, 9999999999999)
|
||||||
|
|
||||||
elif key in ["width", "max_width", "scaled_width", "height", "max_height", "scaled_height", "side_length", "size", "value", "dimension", "dimensions", "long", "long_side", "short", "short_side", "length"]:
|
elif key in ["width", "max_width", "scaled_width", "height", "max_height", "scaled_height", "side_length", "size", "value", "dimension", "dimensions", "long", "long_side", "short", "short_side", "length"]:
|
||||||
logger.debug(f"Got a hit for a dimension: {key} {value}")
|
debug(f"Got a hit for a dimension: {key} {value}")
|
||||||
if value == 1023:
|
if value == 1023:
|
||||||
workflow[key] = post.get("width", 1024)
|
workflow[key] = post.get("width", 1024)
|
||||||
logger.debug(f"Set {key} to {workflow[key]}.")
|
debug(f"Set {key} to {workflow[key]}.")
|
||||||
elif value == 1025:
|
elif value == 1025:
|
||||||
workflow[key] = post.get("height", 1024)
|
workflow[key] = post.get("height", 1024)
|
||||||
logger.debug(f"Set {key} to {workflow[key]}.")
|
debug(f"Set {key} to {workflow[key]}.")
|
||||||
|
|
||||||
update_recursive(workflow)
|
update_recursive(workflow)
|
||||||
return found_key[0]
|
return found_key[0]
|
||||||
|
|
|
@ -33,10 +33,15 @@ from sijapi.routers.asr import transcribe_audio
|
||||||
|
|
||||||
llm = APIRouter()
|
llm = APIRouter()
|
||||||
logger = L.get_module_logger("llm")
|
logger = L.get_module_logger("llm")
|
||||||
|
def debug(text: str): logger.debug(text)
|
||||||
|
def info(text: str): logger.info(text)
|
||||||
|
def warn(text: str): logger.warning(text)
|
||||||
|
def err(text: str): logger.error(text)
|
||||||
|
def crit(text: str): logger.critical(text)
|
||||||
|
|
||||||
# Initialize chromadb client
|
# Initialize chromadb client
|
||||||
client = chromadb.Client()
|
client = chromadb.Client()
|
||||||
OBSIDIAN_CHROMADB_COLLECTION = client.create_collection("obsidian")
|
# OBSIDIAN_CHROMADB_COLLECTION = client.create_collection("obsidian")
|
||||||
VISION_MODELS = ["llava-phi3", "moondream", "llava", "llava-llama3", "llava:34b", "llava:13b-v1.5-q8_0"]
|
VISION_MODELS = ["llava-phi3", "moondream", "llava", "llava-llama3", "llava:34b", "llava:13b-v1.5-q8_0"]
|
||||||
|
|
||||||
# Function to read all markdown files in the folder
|
# Function to read all markdown files in the folder
|
||||||
|
@ -48,6 +53,7 @@ def read_markdown_files(folder: Path):
|
||||||
documents.append(file.read())
|
documents.append(file.read())
|
||||||
return documents, file_paths
|
return documents, file_paths
|
||||||
|
|
||||||
|
reimplement='''
|
||||||
# Read markdown files and generate embeddings
|
# Read markdown files and generate embeddings
|
||||||
documents, file_paths = read_markdown_files(DOC_DIR)
|
documents, file_paths = read_markdown_files(DOC_DIR)
|
||||||
for i, doc in enumerate(documents):
|
for i, doc in enumerate(documents):
|
||||||
|
@ -57,7 +63,7 @@ for i, doc in enumerate(documents):
|
||||||
ids=[file_paths[i]],
|
ids=[file_paths[i]],
|
||||||
embeddings=[embedding],
|
embeddings=[embedding],
|
||||||
documents=[doc]
|
documents=[doc]
|
||||||
)
|
)'''
|
||||||
|
|
||||||
# Function to retrieve the most relevant document given a prompt
|
# Function to retrieve the most relevant document given a prompt
|
||||||
@llm.get("/retrieve_document/{prompt}")
|
@llm.get("/retrieve_document/{prompt}")
|
||||||
|
@ -89,13 +95,13 @@ async def query_ollama(usr: str, sys: str = LLM_SYS_MSG, model: str = DEFAULT_LL
|
||||||
LLM = Ollama()
|
LLM = Ollama()
|
||||||
response = await LLM.chat(model=model, messages=messages, options={"num_predict": max_tokens})
|
response = await LLM.chat(model=model, messages=messages, options={"num_predict": max_tokens})
|
||||||
|
|
||||||
logger.debug(response)
|
debug(response)
|
||||||
if "message" in response:
|
if "message" in response:
|
||||||
if "content" in response["message"]:
|
if "content" in response["message"]:
|
||||||
content = response["message"]["content"]
|
content = response["message"]["content"]
|
||||||
return content
|
return content
|
||||||
else:
|
else:
|
||||||
logger.debug("No choices found in response")
|
debug("No choices found in response")
|
||||||
return None
|
return None
|
||||||
|
|
||||||
async def query_ollama_multishot(
|
async def query_ollama_multishot(
|
||||||
|
@ -116,12 +122,12 @@ async def query_ollama_multishot(
|
||||||
|
|
||||||
LLM = Ollama()
|
LLM = Ollama()
|
||||||
response = await LLM.chat(model=model, messages=messages, options={"num_predict": max_tokens})
|
response = await LLM.chat(model=model, messages=messages, options={"num_predict": max_tokens})
|
||||||
logger.debug(response)
|
debug(response)
|
||||||
|
|
||||||
if "message" in response and "content" in response["message"]:
|
if "message" in response and "content" in response["message"]:
|
||||||
return response["message"]["content"]
|
return response["message"]["content"]
|
||||||
else:
|
else:
|
||||||
logger.debug("No content found in response")
|
debug("No content found in response")
|
||||||
return None
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
@ -140,21 +146,21 @@ async def chat_completions(request: Request):
|
||||||
raise HTTPException(status_code=400, detail="Message data is required in the request body.")
|
raise HTTPException(status_code=400, detail="Message data is required in the request body.")
|
||||||
|
|
||||||
requested_model = body.get('model', 'default-model')
|
requested_model = body.get('model', 'default-model')
|
||||||
logger.debug(f"Requested model: {requested_model}")
|
debug(f"Requested model: {requested_model}")
|
||||||
stream = body.get('stream')
|
stream = body.get('stream')
|
||||||
token_limit = body.get('max_tokens') or body.get('num_predict')
|
token_limit = body.get('max_tokens') or body.get('num_predict')
|
||||||
|
|
||||||
# Check if the most recent message contains an image_url
|
# Check if the most recent message contains an image_url
|
||||||
recent_message = messages[-1]
|
recent_message = messages[-1]
|
||||||
if recent_message.get('role') == 'user' and is_vision_request(recent_message.get('content')):
|
if recent_message.get('role') == 'user' and is_vision_request(recent_message.get('content')):
|
||||||
logger.debug("Processing as a vision request")
|
debug("Processing as a vision request")
|
||||||
model = "llava"
|
model = "llava"
|
||||||
logger.debug(f"Using model: {model}")
|
debug(f"Using model: {model}")
|
||||||
return StreamingResponse(stream_messages_with_vision(recent_message, model, token_limit), media_type="application/json")
|
return StreamingResponse(stream_messages_with_vision(recent_message, model, token_limit), media_type="application/json")
|
||||||
else:
|
else:
|
||||||
logger.debug("Processing as a standard request")
|
debug("Processing as a standard request")
|
||||||
model = requested_model
|
model = requested_model
|
||||||
logger.debug(f"Using model: {model}")
|
debug(f"Using model: {model}")
|
||||||
if stream:
|
if stream:
|
||||||
return StreamingResponse(stream_messages(messages, model, token_limit), media_type="application/json")
|
return StreamingResponse(stream_messages(messages, model, token_limit), media_type="application/json")
|
||||||
else:
|
else:
|
||||||
|
@ -279,17 +285,17 @@ async def generate_messages(messages: list, model: str = "llama3"):
|
||||||
def is_model_available(model_name):
|
def is_model_available(model_name):
|
||||||
model_data = OllamaList()
|
model_data = OllamaList()
|
||||||
available_models = [model['name'] for model in model_data['models']]
|
available_models = [model['name'] for model in model_data['models']]
|
||||||
logger.debug(f"Available models: {available_models}") # Log using the configured LOGGER
|
debug(f"Available models: {available_models}") # Log using the configured LOGGER
|
||||||
|
|
||||||
matching_models = [model for model in available_models if model.startswith(model_name + ':') or model == model_name]
|
matching_models = [model for model in available_models if model.startswith(model_name + ':') or model == model_name]
|
||||||
if len(matching_models) == 1:
|
if len(matching_models) == 1:
|
||||||
logger.debug(f"Unique match found: {matching_models[0]}")
|
debug(f"Unique match found: {matching_models[0]}")
|
||||||
return True
|
return True
|
||||||
elif len(matching_models) > 1:
|
elif len(matching_models) > 1:
|
||||||
logger.error(f"Ambiguous match found, models: {matching_models}")
|
err(f"Ambiguous match found, models: {matching_models}")
|
||||||
return True
|
return True
|
||||||
else:
|
else:
|
||||||
logger.error(f"No match found for model: {model_name}")
|
err(f"No match found for model: {model_name}")
|
||||||
return False
|
return False
|
||||||
|
|
||||||
|
|
||||||
|
@ -412,12 +418,12 @@ def query_gpt4(llmPrompt: List = [], system_msg: str = "", user_msg: str = "", m
|
||||||
if hasattr(first_choice, "message") and hasattr(first_choice.message, "content"):
|
if hasattr(first_choice, "message") and hasattr(first_choice.message, "content"):
|
||||||
return first_choice.message.content
|
return first_choice.message.content
|
||||||
else:
|
else:
|
||||||
logger.debug("No content attribute in the first choice's message")
|
debug("No content attribute in the first choice's message")
|
||||||
logger.debug(f"No content found in message string: {response.choices}")
|
debug(f"No content found in message string: {response.choices}")
|
||||||
logger.debug("Trying again!")
|
debug("Trying again!")
|
||||||
query_gpt4(messages, max_tokens)
|
query_gpt4(messages, max_tokens)
|
||||||
else:
|
else:
|
||||||
logger.debug(f"No content found in message string: {response}")
|
debug(f"No content found in message string: {response}")
|
||||||
return ""
|
return ""
|
||||||
|
|
||||||
def llava(image_base64, prompt):
|
def llava(image_base64, prompt):
|
||||||
|
@ -427,7 +433,7 @@ def llava(image_base64, prompt):
|
||||||
prompt = f"This is a chat between a user and an assistant. The assistant is helping the user to describe an image. {prompt}",
|
prompt = f"This is a chat between a user and an assistant. The assistant is helping the user to describe an image. {prompt}",
|
||||||
images = [image_base64]
|
images = [image_base64]
|
||||||
)
|
)
|
||||||
logger.debug(response)
|
debug(response)
|
||||||
return "" if "pass" in response["response"].lower() else response["response"]
|
return "" if "pass" in response["response"].lower() else response["response"]
|
||||||
|
|
||||||
def gpt4v(image_base64, prompt_sys: str, prompt_usr: str, max_tokens: int = 150):
|
def gpt4v(image_base64, prompt_sys: str, prompt_usr: str, max_tokens: int = 150):
|
||||||
|
@ -458,7 +464,7 @@ def gpt4v(image_base64, prompt_sys: str, prompt_usr: str, max_tokens: int = 150)
|
||||||
comment_content = first_choice.message.content
|
comment_content = first_choice.message.content
|
||||||
if "PASS" in comment_content:
|
if "PASS" in comment_content:
|
||||||
return ""
|
return ""
|
||||||
logger.debug(f"Generated comment: {comment_content}")
|
debug(f"Generated comment: {comment_content}")
|
||||||
|
|
||||||
response_2 = VISION_LLM.chat.completions.create(
|
response_2 = VISION_LLM.chat.completions.create(
|
||||||
model="gpt-4-vision-preview",
|
model="gpt-4-vision-preview",
|
||||||
|
@ -496,15 +502,15 @@ def gpt4v(image_base64, prompt_sys: str, prompt_usr: str, max_tokens: int = 150)
|
||||||
first_choice = response_2.choices[0]
|
first_choice = response_2.choices[0]
|
||||||
if first_choice.message and first_choice.message.content:
|
if first_choice.message and first_choice.message.content:
|
||||||
final_content = first_choice.message.content
|
final_content = first_choice.message.content
|
||||||
logger.debug(f"Generated comment: {final_content}")
|
debug(f"Generated comment: {final_content}")
|
||||||
if "PASS" in final_content:
|
if "PASS" in final_content:
|
||||||
return ""
|
return ""
|
||||||
else:
|
else:
|
||||||
return final_content
|
return final_content
|
||||||
|
|
||||||
|
|
||||||
logger.debug("Vision response did not contain expected data.")
|
debug("Vision response did not contain expected data.")
|
||||||
logger.debug(f"Vision response: {response_1}")
|
debug(f"Vision response: {response_1}")
|
||||||
asyncio.sleep(15)
|
asyncio.sleep(15)
|
||||||
|
|
||||||
try_again = gpt4v(image_base64, prompt_sys, prompt_usr, max_tokens)
|
try_again = gpt4v(image_base64, prompt_sys, prompt_usr, max_tokens)
|
||||||
|
@ -562,7 +568,7 @@ async def summarize_tts_endpoint(
|
||||||
)
|
)
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error(f"Error in summarize_tts_endpoint: {str(e)}")
|
err(f"Error in summarize_tts_endpoint: {str(e)}")
|
||||||
return JSONResponse(
|
return JSONResponse(
|
||||||
status_code=400,
|
status_code=400,
|
||||||
content={"error": str(e)}
|
content={"error": str(e)}
|
||||||
|
@ -589,11 +595,11 @@ async def summarize_tts(
|
||||||
bg_tasks = BackgroundTasks()
|
bg_tasks = BackgroundTasks()
|
||||||
model = await tts.get_model(voice)
|
model = await tts.get_model(voice)
|
||||||
final_output_path = await tts.generate_speech(bg_tasks, summarized_text, voice, model=model, speed=speed, podcast=podcast, title=filename)
|
final_output_path = await tts.generate_speech(bg_tasks, summarized_text, voice, model=model, speed=speed, podcast=podcast, title=filename)
|
||||||
logger.debug(f"summary_tts completed with final_output_path: {final_output_path}")
|
debug(f"summary_tts completed with final_output_path: {final_output_path}")
|
||||||
return final_output_path
|
return final_output_path
|
||||||
|
|
||||||
|
|
||||||
async def get_title(text: str, LLM: Ollama() = None):
|
async def get_title(text: str, LLM = None):
|
||||||
LLM = LLM if LLM else Ollama()
|
LLM = LLM if LLM else Ollama()
|
||||||
title = await process_chunk("Generate a title for this text", text, 1, 1, 12, LLM)
|
title = await process_chunk("Generate a title for this text", text, 1, 1, 12, LLM)
|
||||||
title = sanitize_filename(title)
|
title = sanitize_filename(title)
|
||||||
|
@ -605,10 +611,10 @@ def split_text_into_chunks(text: str) -> List[str]:
|
||||||
sentences = re.split(r'(?<=[.!?])\s+', text)
|
sentences = re.split(r'(?<=[.!?])\s+', text)
|
||||||
words = text.split()
|
words = text.split()
|
||||||
total_words = len(words)
|
total_words = len(words)
|
||||||
logger.debug(f"Total words: {total_words}. SUMMARY_CHUNK_SIZE: {SUMMARY_CHUNK_SIZE}. SUMMARY_TPW: {SUMMARY_TPW}.")
|
debug(f"Total words: {total_words}. SUMMARY_CHUNK_SIZE: {SUMMARY_CHUNK_SIZE}. SUMMARY_TPW: {SUMMARY_TPW}.")
|
||||||
|
|
||||||
max_words_per_chunk = int(SUMMARY_CHUNK_SIZE / SUMMARY_TPW)
|
max_words_per_chunk = int(SUMMARY_CHUNK_SIZE / SUMMARY_TPW)
|
||||||
logger.debug(f"Maximum words per chunk: {max_words_per_chunk}")
|
debug(f"Maximum words per chunk: {max_words_per_chunk}")
|
||||||
|
|
||||||
chunks = []
|
chunks = []
|
||||||
current_chunk = []
|
current_chunk = []
|
||||||
|
@ -628,7 +634,7 @@ def split_text_into_chunks(text: str) -> List[str]:
|
||||||
if current_chunk:
|
if current_chunk:
|
||||||
chunks.append(' '.join(current_chunk))
|
chunks.append(' '.join(current_chunk))
|
||||||
|
|
||||||
logger.debug(f"Split text into {len(chunks)} chunks.")
|
debug(f"Split text into {len(chunks)} chunks.")
|
||||||
return chunks
|
return chunks
|
||||||
|
|
||||||
|
|
||||||
|
@ -640,11 +646,11 @@ def calculate_max_tokens(text: str) -> int:
|
||||||
|
|
||||||
|
|
||||||
async def extract_text(file: Union[UploadFile, bytes, bytearray, str, Path], bg_tasks: BackgroundTasks = None) -> str:
|
async def extract_text(file: Union[UploadFile, bytes, bytearray, str, Path], bg_tasks: BackgroundTasks = None) -> str:
|
||||||
logger.info(f"Attempting to extract text from file: {file}")
|
info(f"Attempting to extract text from file: {file}")
|
||||||
|
|
||||||
try:
|
try:
|
||||||
if isinstance(file, UploadFile):
|
if isinstance(file, UploadFile):
|
||||||
logger.info("File is an UploadFile object")
|
info("File is an UploadFile object")
|
||||||
file_extension = os.path.splitext(file.filename)[1]
|
file_extension = os.path.splitext(file.filename)[1]
|
||||||
temp_file_path = tempfile.mktemp(suffix=file_extension)
|
temp_file_path = tempfile.mktemp(suffix=file_extension)
|
||||||
with open(temp_file_path, 'wb') as buffer:
|
with open(temp_file_path, 'wb') as buffer:
|
||||||
|
@ -663,7 +669,7 @@ async def extract_text(file: Union[UploadFile, bytes, bytearray, str, Path], bg_
|
||||||
|
|
||||||
_, file_ext = os.path.splitext(file_path)
|
_, file_ext = os.path.splitext(file_path)
|
||||||
file_ext = file_ext.lower()
|
file_ext = file_ext.lower()
|
||||||
logger.info(f"File extension: {file_ext}")
|
info(f"File extension: {file_ext}")
|
||||||
|
|
||||||
if file_ext == '.pdf':
|
if file_ext == '.pdf':
|
||||||
text_content = await extract_text_from_pdf(file_path)
|
text_content = await extract_text_from_pdf(file_path)
|
||||||
|
@ -690,7 +696,7 @@ async def extract_text(file: Union[UploadFile, bytes, bytearray, str, Path], bg_
|
||||||
return text_content
|
return text_content
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error(f"Error extracting text: {str(e)}")
|
err(f"Error extracting text: {str(e)}")
|
||||||
raise ValueError(f"Error extracting text: {str(e)}")
|
raise ValueError(f"Error extracting text: {str(e)}")
|
||||||
|
|
||||||
|
|
||||||
|
@ -699,17 +705,17 @@ async def summarize_text(text: str, instruction: str = SUMMARY_INSTRUCT, length_
|
||||||
|
|
||||||
chunked_text = split_text_into_chunks(text)
|
chunked_text = split_text_into_chunks(text)
|
||||||
total_parts = len(chunked_text)
|
total_parts = len(chunked_text)
|
||||||
logger.debug(f"Total parts: {total_parts}. Length of chunked text: {len(chunked_text)}")
|
debug(f"Total parts: {total_parts}. Length of chunked text: {len(chunked_text)}")
|
||||||
|
|
||||||
total_words_count = sum(len(chunk.split()) for chunk in chunked_text)
|
total_words_count = sum(len(chunk.split()) for chunk in chunked_text)
|
||||||
logger.debug(f"Total words count: {total_words_count}")
|
debug(f"Total words count: {total_words_count}")
|
||||||
total_tokens_count = max(1, int(total_words_count * SUMMARY_TPW))
|
total_tokens_count = max(1, int(total_words_count * SUMMARY_TPW))
|
||||||
logger.debug(f"Total tokens count: {total_tokens_count}")
|
debug(f"Total tokens count: {total_tokens_count}")
|
||||||
|
|
||||||
total_summary_length = length_override if length_override else total_tokens_count // length_quotient
|
total_summary_length = length_override if length_override else total_tokens_count // length_quotient
|
||||||
logger.debug(f"Total summary length: {total_summary_length}")
|
debug(f"Total summary length: {total_summary_length}")
|
||||||
corrected_total_summary_length = min(total_summary_length, SUMMARY_TOKEN_LIMIT)
|
corrected_total_summary_length = min(total_summary_length, SUMMARY_TOKEN_LIMIT)
|
||||||
logger.debug(f"Corrected total summary length: {corrected_total_summary_length}")
|
debug(f"Corrected total summary length: {corrected_total_summary_length}")
|
||||||
|
|
||||||
summaries = await asyncio.gather(*[
|
summaries = await asyncio.gather(*[
|
||||||
process_chunk(instruction, chunk, i+1, total_parts, LLM=LLM)
|
process_chunk(instruction, chunk, i+1, total_parts, LLM=LLM)
|
||||||
|
@ -720,21 +726,21 @@ async def summarize_text(text: str, instruction: str = SUMMARY_INSTRUCT, length_
|
||||||
summaries = [f"\n\n\nPART {i+1} of {total_parts}:\n\n{summary}" for i, summary in enumerate(summaries)]
|
summaries = [f"\n\n\nPART {i+1} of {total_parts}:\n\n{summary}" for i, summary in enumerate(summaries)]
|
||||||
|
|
||||||
concatenated_summary = ' '.join(summaries)
|
concatenated_summary = ' '.join(summaries)
|
||||||
logger.debug(f"Concatenated summary: {concatenated_summary}")
|
debug(f"Concatenated summary: {concatenated_summary}")
|
||||||
logger.debug(f"Concatenated summary length: {len(concatenated_summary.split())}")
|
debug(f"Concatenated summary length: {len(concatenated_summary.split())}")
|
||||||
|
|
||||||
if total_parts > 1:
|
if total_parts > 1:
|
||||||
logger.debug(f"Processing the concatenated_summary to smooth the edges...")
|
debug(f"Processing the concatenated_summary to smooth the edges...")
|
||||||
concatenated_instruct = f"The following text consists of the concatenated {total_parts} summaries of {total_parts} parts of a single document that had to be split for processing. Reword it for clarity and flow as a single cohesive summary, understanding that it all relates to a single document, but that document likely consists of multiple parts potentially from multiple authors. Do not shorten it and do not omit content, simply smooth out the edges between the parts."
|
concatenated_instruct = f"The following text consists of the concatenated {total_parts} summaries of {total_parts} parts of a single document that had to be split for processing. Reword it for clarity and flow as a single cohesive summary, understanding that it all relates to a single document, but that document likely consists of multiple parts potentially from multiple authors. Do not shorten it and do not omit content, simply smooth out the edges between the parts."
|
||||||
final_summary = await process_chunk(concatenated_instruct, concatenated_summary, 1, 1, length_ratio=1, LLM=LLM)
|
final_summary = await process_chunk(concatenated_instruct, concatenated_summary, 1, 1, length_ratio=1, LLM=LLM)
|
||||||
logger.debug(f"Final summary length: {len(final_summary.split())}")
|
debug(f"Final summary length: {len(final_summary.split())}")
|
||||||
return final_summary
|
return final_summary
|
||||||
else:
|
else:
|
||||||
return concatenated_summary
|
return concatenated_summary
|
||||||
|
|
||||||
|
|
||||||
async def process_chunk(instruction: str, text: str, part: int, total_parts: int, length_ratio: float = None, LLM: Ollama = None) -> str:
|
async def process_chunk(instruction: str, text: str, part: int, total_parts: int, length_ratio: float = None, LLM: Ollama = None) -> str:
|
||||||
# logger.debug(f"Processing chunk: {text}")
|
# debug(f"Processing chunk: {text}")
|
||||||
LLM = LLM if LLM else Ollama()
|
LLM = LLM if LLM else Ollama()
|
||||||
|
|
||||||
words_count = len(text.split())
|
words_count = len(text.split())
|
||||||
|
@ -744,14 +750,14 @@ async def process_chunk(instruction: str, text: str, part: int, total_parts: int
|
||||||
max_tokens = min(tokens_count // summary_length_ratio, SUMMARY_CHUNK_SIZE)
|
max_tokens = min(tokens_count // summary_length_ratio, SUMMARY_CHUNK_SIZE)
|
||||||
max_tokens = max(max_tokens, SUMMARY_MIN_LENGTH)
|
max_tokens = max(max_tokens, SUMMARY_MIN_LENGTH)
|
||||||
|
|
||||||
logger.debug(f"Processing part {part} of {total_parts}: Words: {words_count}, Estimated tokens: {tokens_count}, Max output tokens: {max_tokens}")
|
debug(f"Processing part {part} of {total_parts}: Words: {words_count}, Estimated tokens: {tokens_count}, Max output tokens: {max_tokens}")
|
||||||
|
|
||||||
if part and total_parts > 1:
|
if part and total_parts > 1:
|
||||||
prompt = f"{instruction}. Part {part} of {total_parts}:\n{text}"
|
prompt = f"{instruction}. Part {part} of {total_parts}:\n{text}"
|
||||||
else:
|
else:
|
||||||
prompt = f"{instruction}:\n\n{text}"
|
prompt = f"{instruction}:\n\n{text}"
|
||||||
|
|
||||||
logger.debug(f"Starting LLM.generate for part {part} of {total_parts}")
|
info(f"Starting LLM.generate for part {part} of {total_parts}")
|
||||||
response = await LLM.generate(
|
response = await LLM.generate(
|
||||||
model=SUMMARY_MODEL,
|
model=SUMMARY_MODEL,
|
||||||
prompt=prompt,
|
prompt=prompt,
|
||||||
|
@ -760,8 +766,8 @@ async def process_chunk(instruction: str, text: str, part: int, total_parts: int
|
||||||
)
|
)
|
||||||
|
|
||||||
text_response = response['response']
|
text_response = response['response']
|
||||||
logger.debug(f"Completed LLM.generate for part {part} of {total_parts}")
|
info(f"Completed LLM.generate for part {part} of {total_parts}")
|
||||||
logger.debug(f"Result: {text_response}")
|
debug(f"Result: {text_response}")
|
||||||
return text_response
|
return text_response
|
||||||
|
|
||||||
async def title_and_summary(extracted_text: str):
|
async def title_and_summary(extracted_text: str):
|
||||||
|
|
|
@ -261,11 +261,10 @@ async def generate_map(start_date: datetime, end_date: datetime):
|
||||||
return html_content
|
return html_content
|
||||||
|
|
||||||
async def post_location(location: Location):
|
async def post_location(location: Location):
|
||||||
if not location.datetime:
|
# if not location.datetime:
|
||||||
logger.debug(f"location appears to be missing datetime: {location}")
|
# logger.debug(f"location appears to be missing datetime: {location}")
|
||||||
else:
|
# else:
|
||||||
logger.debug(f"post_location called with {location.datetime}")
|
# logger.debug(f"post_location called with {location.datetime}")
|
||||||
|
|
||||||
async with DB.get_connection() as conn:
|
async with DB.get_connection() as conn:
|
||||||
try:
|
try:
|
||||||
context = location.context or {}
|
context = location.context or {}
|
||||||
|
|
|
@ -5,159 +5,178 @@ import asyncio
|
||||||
import shutil
|
import shutil
|
||||||
import requests
|
import requests
|
||||||
from bs4 import BeautifulSoup
|
from bs4 import BeautifulSoup
|
||||||
from zoneinfo import ZoneInfo
|
|
||||||
from urllib.parse import urlparse
|
from urllib.parse import urlparse
|
||||||
from datetime import datetime as dt_datetime, timedelta
|
from datetime import datetime as dt_datetime, timedelta
|
||||||
from typing import Optional
|
from typing import Optional
|
||||||
|
|
||||||
import aiohttp
|
import aiohttp
|
||||||
import aiofiles
|
import aiofiles
|
||||||
import newspaper
|
import newspaper
|
||||||
import trafilatura
|
import trafilatura
|
||||||
|
from newspaper import Article
|
||||||
from readability import Document
|
from readability import Document
|
||||||
from markdownify import markdownify as md
|
from markdownify import markdownify as md
|
||||||
from requests.adapters import HTTPAdapter
|
from requests.adapters import HTTPAdapter
|
||||||
from urllib3.util.retry import Retry
|
from urllib3.util.retry import Retry
|
||||||
|
from fastapi import APIRouter, BackgroundTasks, UploadFile, Form, HTTPException, Query, Path as FastAPIPath
|
||||||
from fastapi import APIRouter, BackgroundTasks, File, UploadFile, Form, HTTPException, Response, Query, Path as FastAPIPath
|
|
||||||
from fastapi.responses import JSONResponse
|
|
||||||
from pydantic import BaseModel
|
|
||||||
|
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from sijapi import API, L, Dir, News, OBSIDIAN_VAULT_DIR, OBSIDIAN_RESOURCES_DIR, OBSIDIAN_BANNER_SCENE, DEFAULT_11L_VOICE, DEFAULT_VOICE, GEO
|
from sijapi import L, News, OBSIDIAN_VAULT_DIR, OBSIDIAN_RESOURCES_DIR, DEFAULT_11L_VOICE, DEFAULT_VOICE
|
||||||
from sijapi.utilities import sanitize_filename, assemble_journal_path, assemble_archive_path
|
from sijapi.utilities import sanitize_filename, assemble_journal_path, assemble_archive_path
|
||||||
from sijapi.routers import llm, tts, asr, loc, note
|
from sijapi.routers import gis, llm, tts, note
|
||||||
|
|
||||||
from newspaper import Article
|
|
||||||
|
|
||||||
news = APIRouter()
|
news = APIRouter()
|
||||||
logger = L.get_module_logger("news")
|
logger = L.get_module_logger("news")
|
||||||
|
def debug(text: str): logger.debug(text)
|
||||||
|
def info(text: str): logger.info(text)
|
||||||
|
def warn(text: str): logger.warning(text)
|
||||||
|
def err(text: str): logger.error(text)
|
||||||
|
def crit(text: str): logger.critical(text)
|
||||||
|
|
||||||
async def download_and_save_article(article, site_name, earliest_date, bg_tasks: BackgroundTasks, tts_mode: str = "summary", voice: str = DEFAULT_11L_VOICE):
|
async def process_and_save_article(
|
||||||
|
bg_tasks: BackgroundTasks,
|
||||||
|
url: str,
|
||||||
|
title: Optional[str] = None,
|
||||||
|
tts_mode: str = "summary",
|
||||||
|
voice: str = DEFAULT_VOICE,
|
||||||
|
site_name: Optional[str] = None
|
||||||
|
) -> str:
|
||||||
try:
|
try:
|
||||||
url = article.url
|
# Fetch and parse article
|
||||||
source = trafilatura.fetch_url(url)
|
article = await fetch_and_parse_article(url)
|
||||||
|
|
||||||
if source is None:
|
# Generate title and file paths
|
||||||
# Fallback to newspaper3k if trafilatura fails
|
title = sanitize_filename(title or article.title or f"Untitled - {dt_datetime.now().strftime('%Y-%m-%d')}")
|
||||||
article.download()
|
markdown_filename, relative_path = assemble_journal_path(dt_datetime.now(), subdir="Articles", filename=title, extension=".md")
|
||||||
article.parse()
|
|
||||||
traf = None
|
|
||||||
else:
|
|
||||||
traf = trafilatura.extract_metadata(filecontent=source, default_url=url)
|
|
||||||
article.download()
|
|
||||||
article.parse()
|
|
||||||
|
|
||||||
# Update article properties, preferring trafilatura data when available
|
# Generate summary
|
||||||
article.title = traf.title if traf and traf.title else article.title or url
|
summary = await generate_summary(article.text)
|
||||||
article.authors = traf.author if traf and traf.author else article.authors or []
|
|
||||||
article.publish_date = traf.date if traf and traf.date else article.publish_date
|
|
||||||
try:
|
|
||||||
article.publish_date = await loc.dt(article.publish_date, "UTC")
|
|
||||||
except:
|
|
||||||
logger.debug(f"Failed to localize {article.publish_date}")
|
|
||||||
article.publish_date = await loc.dt(dt_datetime.now(), "UTC")
|
|
||||||
article.meta_description = traf.description if traf and traf.description else article.meta_description
|
|
||||||
article.text = trafilatura.extract(source, output_format="markdown", include_comments=False) if source else article.text
|
|
||||||
article.top_image = traf.image if traf and traf.image else article.top_image
|
|
||||||
article.source_url = traf.sitename if traf and traf.sitename else urlparse(url).netloc.replace('www.', '').title()
|
|
||||||
article.meta_keywords = traf.categories or traf.tags if traf else article.meta_keywords or []
|
|
||||||
article.meta_keywords = article.meta_keywords if isinstance(article.meta_keywords, list) else [article.meta_keywords]
|
|
||||||
|
|
||||||
if not is_article_within_date_range(article, earliest_date):
|
# Handle TTS
|
||||||
return False
|
audio_link = await handle_tts(bg_tasks, article, title, tts_mode, voice, summary)
|
||||||
|
|
||||||
|
# Generate markdown content
|
||||||
|
markdown_content = generate_markdown_content(article, title, summary, audio_link, site_name)
|
||||||
|
|
||||||
timestamp = dt_datetime.now().strftime('%b %d, %Y at %H:%M')
|
# Save markdown file
|
||||||
readable_title = sanitize_filename(article.title or timestamp)
|
await save_markdown_file(markdown_filename, markdown_content)
|
||||||
markdown_filename, relative_path = assemble_journal_path(dt_datetime.now(), subdir="Articles", filename=readable_title, extension=".md")
|
|
||||||
|
|
||||||
summary = await llm.summarize_text(article.text, "Summarize the provided text. Respond with the summary and nothing else. Do not otherwise acknowledge the request. Just provide the requested summary.")
|
# Add to daily note
|
||||||
summary = summary.replace('\n', ' ') # Remove line breaks
|
await note.add_to_daily_note(relative_path)
|
||||||
|
|
||||||
if tts_mode == "full" or tts_mode == "content":
|
|
||||||
tts_text = article.text
|
|
||||||
elif tts_mode == "summary" or tts_mode == "excerpt":
|
|
||||||
tts_text = summary
|
|
||||||
else:
|
|
||||||
tts_text = None
|
|
||||||
|
|
||||||
banner_markdown = ''
|
|
||||||
try:
|
|
||||||
banner_url = article.top_image
|
|
||||||
if banner_url:
|
|
||||||
banner_image = download_file(banner_url, Path(OBSIDIAN_VAULT_DIR / OBSIDIAN_RESOURCES_DIR / f"{dt_datetime.now().strftime('%Y%m%d%H%M%S')}.jpg"))
|
|
||||||
if banner_image:
|
|
||||||
banner_markdown = f"![[{OBSIDIAN_RESOURCES_DIR}/{banner_image}]]"
|
|
||||||
except Exception as e:
|
|
||||||
logger.error(f"No image found in article")
|
|
||||||
|
|
||||||
|
|
||||||
authors = ', '.join(['[[{}]]'.format(author.strip()) for author in article.authors if author.strip()])
|
|
||||||
if not authors:
|
|
||||||
authors = '[[Unknown Author]]'
|
|
||||||
|
|
||||||
frontmatter = f"""---
|
|
||||||
title: {readable_title}
|
|
||||||
authors: {authors}
|
|
||||||
published: {article.publish_date}
|
|
||||||
added: {timestamp}
|
|
||||||
banner: "{banner_markdown}"
|
|
||||||
tags:
|
|
||||||
"""
|
|
||||||
frontmatter += '\n'.join(f" - {tag}" for tag in article.meta_keywords)
|
|
||||||
frontmatter += '\n---\n'
|
|
||||||
|
|
||||||
body = f"# {readable_title}\n\n"
|
|
||||||
if tts_text:
|
|
||||||
audio_filename = f"{article.publish_date.strftime('%Y-%m-%d')} {readable_title}"
|
|
||||||
try:
|
|
||||||
audio_path = await tts.generate_speech(
|
|
||||||
bg_tasks=bg_tasks,
|
|
||||||
text=tts_text,
|
|
||||||
voice=voice,
|
|
||||||
model="xtts2",
|
|
||||||
podcast=True,
|
|
||||||
title=audio_filename,
|
|
||||||
output_dir=Path(OBSIDIAN_VAULT_DIR) / OBSIDIAN_RESOURCES_DIR
|
|
||||||
)
|
|
||||||
if isinstance(audio_path, Path):
|
|
||||||
audio_ext = audio_path.suffix
|
|
||||||
obsidian_link = f"![[{audio_path.name}]]"
|
|
||||||
body += f"{obsidian_link}\n\n"
|
|
||||||
else:
|
|
||||||
logger.warning(f"Unexpected audio_path type: {type(audio_path)}. Value: {audio_path}")
|
|
||||||
except Exception as e:
|
|
||||||
logger.error(f"Failed to generate TTS for {audio_filename}. Error: {str(e)}")
|
|
||||||
logger.error(f"TTS error details - voice: {voice}, model: eleven_turbo_v2, podcast: True")
|
|
||||||
logger.error(f"Output directory: {Path(OBSIDIAN_VAULT_DIR) / OBSIDIAN_RESOURCES_DIR}")
|
|
||||||
|
|
||||||
body += f"by {authors} in {article.source_url}\n\n"
|
|
||||||
body += f"> [!summary]+\n"
|
|
||||||
body += f"> {summary}\n\n"
|
|
||||||
body += article.text
|
|
||||||
|
|
||||||
markdown_content = frontmatter + body
|
|
||||||
|
|
||||||
with open(markdown_filename, 'w') as md_file:
|
|
||||||
md_file.write(markdown_content)
|
|
||||||
|
|
||||||
logger.info(f"Successfully saved to {markdown_filename}")
|
|
||||||
note.add_to_daily_note(relative_path)
|
|
||||||
print(f"Saved article: {relative_path}")
|
|
||||||
return True
|
|
||||||
|
|
||||||
|
return f"Successfully saved: {relative_path}"
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error(f"Error processing article from {article.url}: {str(e)}")
|
err(f"Failed to process article {url}: {str(e)}")
|
||||||
|
raise HTTPException(status_code=500, detail=str(e))
|
||||||
|
|
||||||
|
|
||||||
|
async def fetch_and_parse_article(url: str) -> Article:
|
||||||
|
source = trafilatura.fetch_url(url)
|
||||||
|
traf = trafilatura.extract_metadata(filecontent=source, default_url=url)
|
||||||
|
|
||||||
|
article = Article(url)
|
||||||
|
article.set_html(source)
|
||||||
|
article.parse()
|
||||||
|
|
||||||
|
# Update article properties with trafilatura data
|
||||||
|
article.title = article.title or traf.title or url
|
||||||
|
article.authors = article.authors or (traf.author if isinstance(traf.author, list) else [traf.author])
|
||||||
|
article.publish_date = await gis.dt(article.publish_date or traf.date or dt_datetime.now(), "UTC")
|
||||||
|
article.text = trafilatura.extract(source, output_format="markdown", include_comments=False) or article.text
|
||||||
|
article.top_image = article.top_image or traf.image
|
||||||
|
article.source_url = traf.sitename or urlparse(url).netloc.replace('www.', '').title()
|
||||||
|
article.meta_keywords = list(set(article.meta_keywords or traf.categories or traf.tags or []))
|
||||||
|
|
||||||
|
return article
|
||||||
|
|
||||||
|
def is_article_within_date_range(article: Article, days_back: int) -> bool:
|
||||||
|
earliest_date = dt_datetime.now().date() - timedelta(days=days_back)
|
||||||
|
return article.publish_date.date() >= earliest_date
|
||||||
|
|
||||||
|
async def generate_summary(text: str) -> str:
|
||||||
|
summary = await llm.summarize_text(text, "Summarize the provided text. Respond with the summary and nothing else.")
|
||||||
|
return summary.replace('\n', ' ')
|
||||||
|
|
||||||
|
async def handle_tts(bg_tasks: BackgroundTasks, article: Article, title: str, tts_mode: str, voice: str, summary: str) -> Optional[str]:
|
||||||
|
if tts_mode in ["full", "content"]:
|
||||||
|
tts_text = article.text
|
||||||
|
elif tts_mode in ["summary", "excerpt"]:
|
||||||
|
tts_text = summary
|
||||||
|
else:
|
||||||
|
return None
|
||||||
|
|
||||||
|
audio_filename = f"{article.publish_date.strftime('%Y-%m-%d')} {title}"
|
||||||
|
try:
|
||||||
|
audio_path = await tts.generate_speech(
|
||||||
|
bg_tasks=bg_tasks,
|
||||||
|
text=tts_text,
|
||||||
|
voice=voice,
|
||||||
|
model="xtts",
|
||||||
|
podcast=True,
|
||||||
|
title=audio_filename,
|
||||||
|
output_dir=Path(OBSIDIAN_VAULT_DIR) / OBSIDIAN_RESOURCES_DIR
|
||||||
|
)
|
||||||
|
return f"![[{Path(audio_path).name}]]"
|
||||||
|
except HTTPException as e:
|
||||||
|
err(f"Failed to generate TTS: {str(e)}")
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
def generate_markdown_content(article: Article, title: str, summary: str, audio_link: Optional[str], site_name: Optional[str] = None) -> str:
|
||||||
|
frontmatter = f"""---
|
||||||
|
title: {title}
|
||||||
|
authors: {', '.join(f'[[{author}]]' for author in article.authors)}
|
||||||
|
published: {article.publish_date}
|
||||||
|
added: {dt_datetime.now().strftime('%b %d, %Y at %H:%M')}
|
||||||
|
banner: "{get_banner_markdown(article.top_image)}"
|
||||||
|
tags:
|
||||||
|
{chr(10).join(f' - {tag}' for tag in article.meta_keywords)}
|
||||||
|
"""
|
||||||
|
if site_name:
|
||||||
|
frontmatter += f"site: {site_name}\n"
|
||||||
|
frontmatter += "---\n\n"
|
||||||
|
|
||||||
|
body = f"# {title}\n\n"
|
||||||
|
if audio_link:
|
||||||
|
body += f"{audio_link}\n\n"
|
||||||
|
body += f"by {', '.join(article.authors)} in [{article.source_url}]({article.url})\n\n"
|
||||||
|
body += f"> [!summary]+\n> {summary}\n\n"
|
||||||
|
body += article.text
|
||||||
|
|
||||||
|
return frontmatter + body
|
||||||
|
|
||||||
|
|
||||||
|
def get_banner_markdown(image_url: str) -> str:
|
||||||
|
if not image_url:
|
||||||
|
return ''
|
||||||
|
try:
|
||||||
|
banner_image = download_file(image_url, Path(OBSIDIAN_VAULT_DIR) / OBSIDIAN_RESOURCES_DIR)
|
||||||
|
return f"![[{OBSIDIAN_RESOURCES_DIR}/{banner_image}]]" if banner_image else ''
|
||||||
|
except Exception as e:
|
||||||
|
err(f"Failed to download banner image: {str(e)}")
|
||||||
|
return ''
|
||||||
|
|
||||||
|
async def save_markdown_file(filename: str, content: str):
|
||||||
|
async with aiofiles.open(filename, 'w', encoding='utf-8') as f:
|
||||||
|
await f.write(content)
|
||||||
|
|
||||||
|
|
||||||
|
async def download_and_save_article(article, site_name, earliest_date, bg_tasks: BackgroundTasks, tts_mode: str = "off", voice: str = DEFAULT_11L_VOICE):
|
||||||
|
try:
|
||||||
|
url = article.url
|
||||||
|
parsed_article = await fetch_and_parse_article(url)
|
||||||
|
|
||||||
|
if not is_article_within_date_range(parsed_article, earliest_date):
|
||||||
|
return False
|
||||||
|
|
||||||
|
return await process_and_save_article(bg_tasks, url, None, tts_mode, voice, site_name=site_name)
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
err(f"Error processing article from {article.url}: {str(e)}")
|
||||||
return False
|
return False
|
||||||
|
|
||||||
# You'll need to update your is_article_within_date_range function:
|
|
||||||
def is_article_within_date_range(article, earliest_date):
|
|
||||||
return article.publish_date is not None and article.publish_date.date() >= earliest_date
|
|
||||||
|
|
||||||
async def process_news_site(site, bg_tasks: BackgroundTasks):
|
async def process_news_site(site, bg_tasks: BackgroundTasks):
|
||||||
print(f"Downloading articles from {site.name}...")
|
info(f"Downloading articles from {site.name}...")
|
||||||
|
|
||||||
earliest_date = dt_datetime.now().date() - timedelta(days=site.days_back)
|
earliest_date = dt_datetime.now().date() - timedelta(days=site.days_back)
|
||||||
|
|
||||||
|
@ -179,11 +198,11 @@ async def process_news_site(site, bg_tasks: BackgroundTasks):
|
||||||
results = await asyncio.gather(*tasks)
|
results = await asyncio.gather(*tasks)
|
||||||
articles_downloaded = sum(results)
|
articles_downloaded = sum(results)
|
||||||
|
|
||||||
print(f"Downloaded {articles_downloaded} articles from {site.name}")
|
info(f"Downloaded {articles_downloaded} articles from {site.name}")
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
print(f"Error processing {site.name}: {str(e)}")
|
err(f"Error processing {site.name}: {str(e)}")
|
||||||
|
|
||||||
|
|
||||||
# Update your news_refresh_endpoint function:
|
|
||||||
@news.get("/news/refresh")
|
@news.get("/news/refresh")
|
||||||
async def news_refresh_endpoint(bg_tasks: BackgroundTasks):
|
async def news_refresh_endpoint(bg_tasks: BackgroundTasks):
|
||||||
tasks = [process_news_site(site, bg_tasks) for site in News.sites]
|
tasks = [process_news_site(site, bg_tasks) for site in News.sites]
|
||||||
|
@ -192,32 +211,41 @@ async def news_refresh_endpoint(bg_tasks: BackgroundTasks):
|
||||||
|
|
||||||
|
|
||||||
async def generate_path(article, site_name):
|
async def generate_path(article, site_name):
|
||||||
publish_date = await loc.dt(article.publish_date, 'UTC') if article.publish_date else await loc.dt(dt_datetime.now(), 'UTC')
|
publish_date = await gis.dt(article.publish_date, 'UTC') if article.publish_date else await gis.dt(dt_datetime.now(), 'UTC')
|
||||||
title_slug = "".join(c if c.isalnum() else "_" for c in article.title)
|
title_slug = "".join(c if c.isalnum() else "_" for c in article.title)
|
||||||
filename = f"{site_name} - {title_slug[:50]}.md"
|
filename = f"{site_name} - {title_slug[:50]}.md"
|
||||||
absolute_path, relative_path = assemble_journal_path(publish_date, 'Articles', filename, extension='.md', no_timestamp=True)
|
absolute_path, relative_path = assemble_journal_path(publish_date, 'Articles', filename, extension='.md', no_timestamp=True)
|
||||||
return absolute_path, relative_path
|
return absolute_path, relative_path
|
||||||
|
|
||||||
|
|
||||||
async def save_article_to_file(content, output_path):
|
async def save_article_to_file(content, output_path):
|
||||||
output_path.parent.mkdir(parents=True, exist_ok=True)
|
output_path.parent.mkdir(parents=True, exist_ok=True)
|
||||||
async with aiofiles.open(output_path, 'w', encoding='utf-8') as file:
|
async with aiofiles.open(output_path, 'w', encoding='utf-8') as file:
|
||||||
await file.write(content)
|
await file.write(content)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
### CLIPPER ###
|
|
||||||
@news.post("/clip")
|
@news.post("/clip")
|
||||||
async def clip_post(
|
async def clip_post(
|
||||||
bg_tasks: BackgroundTasks,
|
bg_tasks: BackgroundTasks,
|
||||||
url: Optional[str] = Form(None),
|
url: str = Form(...),
|
||||||
source: Optional[str] = Form(None),
|
|
||||||
title: Optional[str] = Form(None),
|
title: Optional[str] = Form(None),
|
||||||
tts: str = Form('summary'),
|
tts: str = Form('summary'),
|
||||||
voice: str = Form(DEFAULT_VOICE),
|
voice: str = Form(DEFAULT_VOICE),
|
||||||
encoding: str = Form('utf-8')
|
|
||||||
):
|
):
|
||||||
markdown_filename = await process_article(bg_tasks, url, title, encoding, source, tts, voice)
|
result = await process_and_save_article(bg_tasks, url, title, tts, voice)
|
||||||
return {"message": "Clip saved successfully", "markdown_filename": markdown_filename}
|
return {"message": "Clip saved successfully", "result": result}
|
||||||
|
|
||||||
|
@news.get("/clip")
|
||||||
|
async def clip_get(
|
||||||
|
bg_tasks: BackgroundTasks,
|
||||||
|
url: str,
|
||||||
|
tts: str = Query('summary'),
|
||||||
|
voice: str = Query(DEFAULT_VOICE)
|
||||||
|
):
|
||||||
|
result = await process_and_save_article(bg_tasks, url, None, tts, voice)
|
||||||
|
return {"message": "Clip saved successfully", "result": result}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
@news.post("/archive")
|
@news.post("/archive")
|
||||||
async def archive_post(
|
async def archive_post(
|
||||||
|
@ -229,203 +257,6 @@ async def archive_post(
|
||||||
markdown_filename = await process_archive(url, title, encoding, source)
|
markdown_filename = await process_archive(url, title, encoding, source)
|
||||||
return {"message": "Clip saved successfully", "markdown_filename": markdown_filename}
|
return {"message": "Clip saved successfully", "markdown_filename": markdown_filename}
|
||||||
|
|
||||||
@news.get("/clip")
|
|
||||||
async def clip_get(
|
|
||||||
bg_tasks: BackgroundTasks,
|
|
||||||
url: str,
|
|
||||||
tts: str = Query('summary'),
|
|
||||||
voice: str = Query(DEFAULT_VOICE)
|
|
||||||
):
|
|
||||||
parsed_content = await parse_article(url)
|
|
||||||
markdown_filename = await process_article2(bg_tasks, parsed_content, tts, voice)
|
|
||||||
return {"message": "Clip saved successfully", "markdown_filename": markdown_filename}
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
async def process_article2(
|
|
||||||
bg_tasks: BackgroundTasks,
|
|
||||||
parsed_content: Article,
|
|
||||||
tts_mode: str = "summary",
|
|
||||||
voice: str = DEFAULT_11L_VOICE
|
|
||||||
):
|
|
||||||
timestamp = dt_datetime.now().strftime('%b %d, %Y at %H:%M')
|
|
||||||
|
|
||||||
readable_title = sanitize_filename(parsed_content.title or timestamp)
|
|
||||||
markdown_filename, relative_path = assemble_journal_path(dt_datetime.now(), subdir="Articles", filename=readable_title, extension=".md")
|
|
||||||
|
|
||||||
try:
|
|
||||||
summary = await llm.summarize_text(parsed_content.clean_doc, "Summarize the provided text. Respond with the summary and nothing else. Do not otherwise acknowledge the request. Just provide the requested summary.")
|
|
||||||
summary = summary.replace('\n', ' ') # Remove line breaks
|
|
||||||
|
|
||||||
if tts_mode == "full" or tts_mode == "content":
|
|
||||||
tts_text = parsed_content.clean_doc
|
|
||||||
elif tts_mode == "summary" or tts_mode == "excerpt":
|
|
||||||
tts_text = summary
|
|
||||||
else:
|
|
||||||
tts_text = None
|
|
||||||
|
|
||||||
banner_markdown = ''
|
|
||||||
try:
|
|
||||||
banner_url = parsed_content.top_image
|
|
||||||
if banner_url != '':
|
|
||||||
banner_image = download_file(banner_url, Path(OBSIDIAN_VAULT_DIR / OBSIDIAN_RESOURCES_DIR))
|
|
||||||
if banner_image:
|
|
||||||
banner_markdown = f"![[{OBSIDIAN_RESOURCES_DIR}/{banner_image}]]"
|
|
||||||
|
|
||||||
except Exception as e:
|
|
||||||
logger.error(f"No image found in article")
|
|
||||||
|
|
||||||
authors = ', '.join('[[{}]]'.format(author) for author in parsed_content.authors)
|
|
||||||
published_date = parsed_content.publish_date
|
|
||||||
frontmatter = f"""---
|
|
||||||
title: {readable_title}
|
|
||||||
authors: {authors}
|
|
||||||
published: {published_date}
|
|
||||||
added: {timestamp}
|
|
||||||
banner: "{banner_markdown}"
|
|
||||||
tags:
|
|
||||||
|
|
||||||
"""
|
|
||||||
frontmatter += '\n'.join(f" - {tag}" for tag in parsed_content.tags)
|
|
||||||
frontmatter += '\n---\n'
|
|
||||||
|
|
||||||
body = f"# {readable_title}\n\n"
|
|
||||||
if tts_text:
|
|
||||||
audio_filename = f"{published_date} {readable_title}"
|
|
||||||
try:
|
|
||||||
audio_path = await tts.generate_speech(bg_tasks=bg_tasks, text=tts_text, voice=voice, model="eleven_turbo_v2", podcast=True, title=audio_filename,
|
|
||||||
output_dir=Path(OBSIDIAN_VAULT_DIR) / OBSIDIAN_RESOURCES_DIR)
|
|
||||||
audio_ext = Path(audio_path).suffix
|
|
||||||
obsidian_link = f"![[{OBSIDIAN_RESOURCES_DIR}/{audio_filename}{audio_ext}]]"
|
|
||||||
body += f"{obsidian_link}\n\n"
|
|
||||||
except Exception as e:
|
|
||||||
logger.error(f"Failed to generate TTS for np3k. {e}")
|
|
||||||
|
|
||||||
try:
|
|
||||||
body += f"by {authors} in {parsed_content.canonical_link}" # update with method for getting the newspaper name
|
|
||||||
body += f"> [!summary]+\n"
|
|
||||||
body += f"> {summary}\n\n"
|
|
||||||
body += parsed_content["content"]
|
|
||||||
markdown_content = frontmatter + body
|
|
||||||
|
|
||||||
except Exception as e:
|
|
||||||
logger.error(f"Failed to combine elements of article markdown.")
|
|
||||||
|
|
||||||
try:
|
|
||||||
with open(markdown_filename, 'w') as md_file:
|
|
||||||
md_file.write(markdown_content)
|
|
||||||
|
|
||||||
logger.info(f"Successfully saved to {markdown_filename}")
|
|
||||||
note.add_to_daily_note(relative_path)
|
|
||||||
return markdown_filename
|
|
||||||
|
|
||||||
except Exception as e:
|
|
||||||
logger.error(f"Failed to write markdown file")
|
|
||||||
raise HTTPException(status_code=500, detail=str(e))
|
|
||||||
|
|
||||||
except Exception as e:
|
|
||||||
logger.error(f"Failed to clip: {str(e)}")
|
|
||||||
raise HTTPException(status_code=500, detail=str(e))
|
|
||||||
|
|
||||||
|
|
||||||
async def process_article(
|
|
||||||
bg_tasks: BackgroundTasks,
|
|
||||||
url: str,
|
|
||||||
title: Optional[str] = None,
|
|
||||||
encoding: str = 'utf-8',
|
|
||||||
source: Optional[str] = None,
|
|
||||||
tts_mode: str = "summary",
|
|
||||||
voice: str = DEFAULT_11L_VOICE
|
|
||||||
):
|
|
||||||
|
|
||||||
timestamp = dt_datetime.now().strftime('%b %d, %Y at %H:%M')
|
|
||||||
|
|
||||||
parsed_content = await parse_article(url, source)
|
|
||||||
if parsed_content is None:
|
|
||||||
return {"error": "Failed to retrieve content"}
|
|
||||||
|
|
||||||
readable_title = sanitize_filename(title or parsed_content.get("title") or timestamp)
|
|
||||||
markdown_filename, relative_path = assemble_journal_path(dt_datetime.now(), subdir="Articles", filename=readable_title, extension=".md")
|
|
||||||
|
|
||||||
try:
|
|
||||||
summary = await llm.summarize_text(parsed_content["content"], "Summarize the provided text. Respond with the summary and nothing else. Do not otherwise acknowledge the request. Just provide the requested summary.")
|
|
||||||
summary = summary.replace('\n', ' ') # Remove line breaks
|
|
||||||
|
|
||||||
if tts_mode == "full" or tts_mode == "content":
|
|
||||||
tts_text = parsed_content["content"]
|
|
||||||
elif tts_mode == "summary" or tts_mode == "excerpt":
|
|
||||||
tts_text = summary
|
|
||||||
else:
|
|
||||||
tts_text = None
|
|
||||||
|
|
||||||
banner_markdown = ''
|
|
||||||
try:
|
|
||||||
banner_url = parsed_content.get('image', '')
|
|
||||||
if banner_url != '':
|
|
||||||
banner_image = download_file(banner_url, Path(OBSIDIAN_VAULT_DIR / OBSIDIAN_RESOURCES_DIR))
|
|
||||||
if banner_image:
|
|
||||||
banner_markdown = f"![[{OBSIDIAN_RESOURCES_DIR}/{banner_image}]]"
|
|
||||||
|
|
||||||
except Exception as e:
|
|
||||||
logger.error(f"No image found in article")
|
|
||||||
|
|
||||||
authors = ', '.join('[[{}]]'.format(author) for author in parsed_content.get('authors', ['Unknown']))
|
|
||||||
|
|
||||||
frontmatter = f"""---
|
|
||||||
title: {readable_title}
|
|
||||||
authors: {', '.join('[[{}]]'.format(author) for author in parsed_content.get('authors', ['Unknown']))}
|
|
||||||
published: {parsed_content.get('date_published', 'Unknown')}
|
|
||||||
added: {timestamp}
|
|
||||||
excerpt: {parsed_content.get('excerpt', '')}
|
|
||||||
banner: "{banner_markdown}"
|
|
||||||
tags:
|
|
||||||
|
|
||||||
"""
|
|
||||||
frontmatter += '\n'.join(f" - {tag}" for tag in parsed_content.get('tags', []))
|
|
||||||
frontmatter += '\n---\n'
|
|
||||||
|
|
||||||
body = f"# {readable_title}\n\n"
|
|
||||||
|
|
||||||
if tts_text:
|
|
||||||
datetime_str = dt_datetime.now().strftime("%Y%m%d%H%M%S")
|
|
||||||
audio_filename = f"{datetime_str} {readable_title}"
|
|
||||||
try:
|
|
||||||
audio_path = await tts.generate_speech(bg_tasks=bg_tasks, text=tts_text, voice=voice, model="eleven_turbo_v2", podcast=True, title=audio_filename,
|
|
||||||
output_dir=Path(OBSIDIAN_VAULT_DIR) / OBSIDIAN_RESOURCES_DIR)
|
|
||||||
audio_ext = Path(audio_path).suffix
|
|
||||||
obsidian_link = f"![[{OBSIDIAN_RESOURCES_DIR}/{audio_filename}{audio_ext}]]"
|
|
||||||
body += f"{obsidian_link}\n\n"
|
|
||||||
except Exception as e:
|
|
||||||
logger.error(f"Failed to generate TTS for np3k. {e}")
|
|
||||||
|
|
||||||
try:
|
|
||||||
body += f"by {authors} in [{parsed_content.get('domain', urlparse(url).netloc.replace('www.', ''))}]({url}).\n\n"
|
|
||||||
body += f"> [!summary]+\n"
|
|
||||||
body += f"> {summary}\n\n"
|
|
||||||
body += parsed_content["content"]
|
|
||||||
markdown_content = frontmatter + body
|
|
||||||
|
|
||||||
except Exception as e:
|
|
||||||
logger.error(f"Failed to combine elements of article markdown.")
|
|
||||||
|
|
||||||
try:
|
|
||||||
with open(markdown_filename, 'w', encoding=encoding) as md_file:
|
|
||||||
md_file.write(markdown_content)
|
|
||||||
|
|
||||||
logger.info(f"Successfully saved to {markdown_filename}")
|
|
||||||
note.add_to_daily_note(relative_path)
|
|
||||||
return markdown_filename
|
|
||||||
|
|
||||||
except Exception as e:
|
|
||||||
logger.error(f"Failed to write markdown file")
|
|
||||||
raise HTTPException(status_code=500, detail=str(e))
|
|
||||||
|
|
||||||
except Exception as e:
|
|
||||||
logger.error(f"Failed to clip {url}: {str(e)}")
|
|
||||||
raise HTTPException(status_code=500, detail=str(e))
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
async def parse_article(url: str, source: Optional[str] = None) -> Article:
|
async def parse_article(url: str, source: Optional[str] = None) -> Article:
|
||||||
source = source if source else trafilatura.fetch_url(url)
|
source = source if source else trafilatura.fetch_url(url)
|
||||||
|
@ -436,7 +267,7 @@ async def parse_article(url: str, source: Optional[str] = None) -> Article:
|
||||||
article.set_html(source)
|
article.set_html(source)
|
||||||
article.parse()
|
article.parse()
|
||||||
|
|
||||||
logger.info(f"Parsed {article.title}")
|
info(f"Parsed {article.title}")
|
||||||
|
|
||||||
# Update or set properties based on trafilatura and additional processing
|
# Update or set properties based on trafilatura and additional processing
|
||||||
article.title = article.title or traf.title or url
|
article.title = article.title or traf.title or url
|
||||||
|
@ -444,10 +275,10 @@ async def parse_article(url: str, source: Optional[str] = None) -> Article:
|
||||||
|
|
||||||
article.publish_date = article.publish_date or traf.date
|
article.publish_date = article.publish_date or traf.date
|
||||||
try:
|
try:
|
||||||
article.publish_date = await loc.dt(article.publish_date, "UTC")
|
article.publish_date = await gis.dt(article.publish_date, "UTC")
|
||||||
except:
|
except:
|
||||||
logger.debug(f"Failed to localize {article.publish_date}")
|
debug(f"Failed to localize {article.publish_date}")
|
||||||
article.publish_date = await loc.dt(dt_datetime.now(), "UTC")
|
article.publish_date = await gis.dt(dt_datetime.now(), "UTC")
|
||||||
|
|
||||||
article.meta_description = article.meta_description or traf.description
|
article.meta_description = article.meta_description or traf.description
|
||||||
article.text = trafilatura.extract(source, output_format="markdown", include_comments=False) or article.text
|
article.text = trafilatura.extract(source, output_format="markdown", include_comments=False) or article.text
|
||||||
|
@ -467,7 +298,6 @@ async def parse_article(url: str, source: Optional[str] = None) -> Article:
|
||||||
return article
|
return article
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
async def html_to_markdown(url: str = None, source: str = None) -> Optional[str]:
|
async def html_to_markdown(url: str = None, source: str = None) -> Optional[str]:
|
||||||
if source:
|
if source:
|
||||||
html_content = source
|
html_content = source
|
||||||
|
@ -476,7 +306,7 @@ async def html_to_markdown(url: str = None, source: str = None) -> Optional[str]
|
||||||
async with session.get(url) as response:
|
async with session.get(url) as response:
|
||||||
html_content = await response.text()
|
html_content = await response.text()
|
||||||
else:
|
else:
|
||||||
logger.error(f"Unable to convert nothing to markdown.")
|
err(f"Unable to convert nothing to markdown.")
|
||||||
return None
|
return None
|
||||||
|
|
||||||
# Use readability to extract the main content
|
# Use readability to extract the main content
|
||||||
|
@ -525,12 +355,13 @@ async def process_archive(
|
||||||
markdown_path.parent.mkdir(parents=True, exist_ok=True)
|
markdown_path.parent.mkdir(parents=True, exist_ok=True)
|
||||||
with open(markdown_path, 'w', encoding=encoding) as md_file:
|
with open(markdown_path, 'w', encoding=encoding) as md_file:
|
||||||
md_file.write(markdown_content)
|
md_file.write(markdown_content)
|
||||||
logger.debug(f"Successfully saved to {markdown_path}")
|
debug(f"Successfully saved to {markdown_path}")
|
||||||
return markdown_path
|
return markdown_path
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.warning(f"Failed to write markdown file: {str(e)}")
|
warn(f"Failed to write markdown file: {str(e)}")
|
||||||
return None
|
return None
|
||||||
|
|
||||||
|
|
||||||
def download_file(url, folder):
|
def download_file(url, folder):
|
||||||
os.makedirs(folder, exist_ok=True)
|
os.makedirs(folder, exist_ok=True)
|
||||||
filename = str(uuid.uuid4()) + os.path.splitext(urlparse(url).path)[-1]
|
filename = str(uuid.uuid4()) + os.path.splitext(urlparse(url).path)[-1]
|
||||||
|
@ -552,16 +383,17 @@ def download_file(url, folder):
|
||||||
with open(filepath, 'wb') as f:
|
with open(filepath, 'wb') as f:
|
||||||
f.write(response.content)
|
f.write(response.content)
|
||||||
else:
|
else:
|
||||||
logger.error(f"Failed to download image: {url}, invalid content type: {response.headers.get('Content-Type')}")
|
err(f"Failed to download image: {url}, invalid content type: {response.headers.get('Content-Type')}")
|
||||||
return None
|
return None
|
||||||
else:
|
else:
|
||||||
logger.error(f"Failed to download image: {url}, status code: {response.status_code}")
|
err(f"Failed to download image: {url}, status code: {response.status_code}")
|
||||||
return None
|
return None
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error(f"Failed to download image: {url}, error: {str(e)}")
|
err(f"Failed to download image: {url}, error: {str(e)}")
|
||||||
return None
|
return None
|
||||||
return filename
|
return filename
|
||||||
|
|
||||||
|
|
||||||
def copy_file(local_path, folder):
|
def copy_file(local_path, folder):
|
||||||
os.makedirs(folder, exist_ok=True)
|
os.makedirs(folder, exist_ok=True)
|
||||||
filename = os.path.basename(local_path)
|
filename = os.path.basename(local_path)
|
||||||
|
@ -575,3 +407,188 @@ async def save_file(file: UploadFile, folder: Path) -> Path:
|
||||||
with open(file_path, 'wb') as f:
|
with open(file_path, 'wb') as f:
|
||||||
shutil.copyfileobj(file.file, f)
|
shutil.copyfileobj(file.file, f)
|
||||||
return file_path
|
return file_path
|
||||||
|
|
||||||
|
|
||||||
|
deprecated = '''
|
||||||
|
async def process_article2(
|
||||||
|
bg_tasks: BackgroundTasks,
|
||||||
|
parsed_content: Article,
|
||||||
|
tts_mode: str = "summary",
|
||||||
|
voice: str = DEFAULT_11L_VOICE
|
||||||
|
):
|
||||||
|
timestamp = dt_datetime.now().strftime('%b %d, %Y at %H:%M')
|
||||||
|
|
||||||
|
readable_title = sanitize_filename(parsed_content.title or timestamp)
|
||||||
|
markdown_filename, relative_path = assemble_journal_path(dt_datetime.now(), subdir="Articles", filename=readable_title, extension=".md")
|
||||||
|
|
||||||
|
try:
|
||||||
|
summary = await llm.summarize_text(parsed_content.clean_doc, "Summarize the provided text. Respond with the summary and nothing else. Do not otherwise acknowledge the request. Just provide the requested summary.")
|
||||||
|
summary = summary.replace('\n', ' ') # Remove line breaks
|
||||||
|
|
||||||
|
if tts_mode == "full" or tts_mode == "content":
|
||||||
|
tts_text = parsed_content.clean_doc
|
||||||
|
elif tts_mode == "summary" or tts_mode == "excerpt":
|
||||||
|
tts_text = summary
|
||||||
|
else:
|
||||||
|
tts_text = None
|
||||||
|
|
||||||
|
banner_markdown = ''
|
||||||
|
try:
|
||||||
|
banner_url = parsed_content.top_image
|
||||||
|
if banner_url != '':
|
||||||
|
banner_image = download_file(banner_url, Path(OBSIDIAN_VAULT_DIR / OBSIDIAN_RESOURCES_DIR))
|
||||||
|
if banner_image:
|
||||||
|
banner_markdown = f"![[{OBSIDIAN_RESOURCES_DIR}/{banner_image}]]"
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
err(f"No image found in article")
|
||||||
|
|
||||||
|
authors = ', '.join('[[{}]]'.format(author) for author in parsed_content.authors)
|
||||||
|
published_date = parsed_content.publish_date
|
||||||
|
frontmatter = f"""---
|
||||||
|
title: {readable_title}
|
||||||
|
authors: {authors}
|
||||||
|
published: {published_date}
|
||||||
|
added: {timestamp}
|
||||||
|
banner: "{banner_markdown}"
|
||||||
|
tags:
|
||||||
|
|
||||||
|
"""
|
||||||
|
frontmatter += '\n'.join(f" - {tag}" for tag in parsed_content.tags)
|
||||||
|
frontmatter += '\n---\n'
|
||||||
|
|
||||||
|
body = f"# {readable_title}\n\n"
|
||||||
|
if tts_text:
|
||||||
|
audio_filename = f"{published_date} {readable_title}"
|
||||||
|
try:
|
||||||
|
audio_path = await tts.generate_speech(bg_tasks=bg_tasks, text=tts_text, voice=voice, model="eleven_turbo_v2", podcast=True, title=audio_filename,
|
||||||
|
output_dir=Path(OBSIDIAN_VAULT_DIR) / OBSIDIAN_RESOURCES_DIR)
|
||||||
|
audio_ext = Path(audio_path).suffix
|
||||||
|
obsidian_link = f"![[{OBSIDIAN_RESOURCES_DIR}/{audio_filename}{audio_ext}]]"
|
||||||
|
body += f"{obsidian_link}\n\n"
|
||||||
|
except Exception as e:
|
||||||
|
err(f"Failed to generate TTS for np3k. {e}")
|
||||||
|
|
||||||
|
try:
|
||||||
|
body += f"by {authors} in {parsed_content.canonical_link}" # update with method for getting the newspaper name
|
||||||
|
body += f"> [!summary]+\n"
|
||||||
|
body += f"> {summary}\n\n"
|
||||||
|
body += parsed_content["content"]
|
||||||
|
markdown_content = frontmatter + body
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
err(f"Failed to combine elements of article markdown.")
|
||||||
|
|
||||||
|
try:
|
||||||
|
with open(markdown_filename, 'w') as md_file:
|
||||||
|
md_file.write(markdown_content)
|
||||||
|
|
||||||
|
info(f"Successfully saved to {markdown_filename}")
|
||||||
|
await note.add_to_daily_note(relative_path)
|
||||||
|
return markdown_filename
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
err(f"Failed to write markdown file")
|
||||||
|
raise HTTPException(status_code=500, detail=str(e))
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
err(f"Failed to clip: {str(e)}")
|
||||||
|
raise HTTPException(status_code=500, detail=str(e))
|
||||||
|
|
||||||
|
|
||||||
|
async def process_article(
|
||||||
|
bg_tasks: BackgroundTasks,
|
||||||
|
url: str,
|
||||||
|
title: Optional[str] = None,
|
||||||
|
encoding: str = 'utf-8',
|
||||||
|
source: Optional[str] = None,
|
||||||
|
tts_mode: str = "summary",
|
||||||
|
voice: str = DEFAULT_11L_VOICE
|
||||||
|
):
|
||||||
|
|
||||||
|
timestamp = dt_datetime.now().strftime('%b %d, %Y at %H:%M')
|
||||||
|
|
||||||
|
parsed_content = await parse_article(url, source)
|
||||||
|
if parsed_content is None:
|
||||||
|
return {"error": "Failed to retrieve content"}
|
||||||
|
|
||||||
|
readable_title = sanitize_filename(title or parsed_content.get("title") or timestamp)
|
||||||
|
markdown_filename, relative_path = assemble_journal_path(dt_datetime.now(), subdir="Articles", filename=readable_title, extension=".md")
|
||||||
|
|
||||||
|
try:
|
||||||
|
summary = await llm.summarize_text(parsed_content["content"], "Summarize the provided text. Respond with the summary and nothing else. Do not otherwise acknowledge the request. Just provide the requested summary.")
|
||||||
|
summary = summary.replace('\n', ' ') # Remove line breaks
|
||||||
|
|
||||||
|
if tts_mode == "full" or tts_mode == "content":
|
||||||
|
tts_text = parsed_content["content"]
|
||||||
|
elif tts_mode == "summary" or tts_mode == "excerpt":
|
||||||
|
tts_text = summary
|
||||||
|
else:
|
||||||
|
tts_text = None
|
||||||
|
|
||||||
|
banner_markdown = ''
|
||||||
|
try:
|
||||||
|
banner_url = parsed_content.get('image', '')
|
||||||
|
if banner_url != '':
|
||||||
|
banner_image = download_file(banner_url, Path(OBSIDIAN_VAULT_DIR / OBSIDIAN_RESOURCES_DIR))
|
||||||
|
if banner_image:
|
||||||
|
banner_markdown = f"![[{OBSIDIAN_RESOURCES_DIR}/{banner_image}]]"
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
err(f"No image found in article")
|
||||||
|
|
||||||
|
authors = ', '.join('[[{}]]'.format(author) for author in parsed_content.get('authors', ['Unknown']))
|
||||||
|
|
||||||
|
frontmatter = f"""---
|
||||||
|
title: {readable_title}
|
||||||
|
authors: {', '.join('[[{}]]'.format(author) for author in parsed_content.get('authors', ['Unknown']))}
|
||||||
|
published: {parsed_content.get('date_published', 'Unknown')}
|
||||||
|
added: {timestamp}
|
||||||
|
excerpt: {parsed_content.get('excerpt', '')}
|
||||||
|
banner: "{banner_markdown}"
|
||||||
|
tags:
|
||||||
|
|
||||||
|
"""
|
||||||
|
frontmatter += '\n'.join(f" - {tag}" for tag in parsed_content.get('tags', []))
|
||||||
|
frontmatter += '\n---\n'
|
||||||
|
|
||||||
|
body = f"# {readable_title}\n\n"
|
||||||
|
|
||||||
|
if tts_text:
|
||||||
|
datetime_str = dt_datetime.now().strftime("%Y%m%d%H%M%S")
|
||||||
|
audio_filename = f"{datetime_str} {readable_title}"
|
||||||
|
try:
|
||||||
|
audio_path = await tts.generate_speech(bg_tasks=bg_tasks, text=tts_text, voice=voice, model="eleven_turbo_v2", podcast=True, title=audio_filename,
|
||||||
|
output_dir=Path(OBSIDIAN_VAULT_DIR) / OBSIDIAN_RESOURCES_DIR)
|
||||||
|
audio_ext = Path(audio_path).suffix
|
||||||
|
obsidian_link = f"![[{OBSIDIAN_RESOURCES_DIR}/{audio_filename}{audio_ext}]]"
|
||||||
|
body += f"{obsidian_link}\n\n"
|
||||||
|
except Exception as e:
|
||||||
|
err(f"Failed to generate TTS for np3k. {e}")
|
||||||
|
|
||||||
|
try:
|
||||||
|
body += f"by {authors} in [{parsed_content.get('domain', urlparse(url).netloc.replace('www.', ''))}]({url}).\n\n"
|
||||||
|
body += f"> [!summary]+\n"
|
||||||
|
body += f"> {summary}\n\n"
|
||||||
|
body += parsed_content["content"]
|
||||||
|
markdown_content = frontmatter + body
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
err(f"Failed to combine elements of article markdown.")
|
||||||
|
|
||||||
|
try:
|
||||||
|
with open(markdown_filename, 'w', encoding=encoding) as md_file:
|
||||||
|
md_file.write(markdown_content)
|
||||||
|
|
||||||
|
info(f"Successfully saved to {markdown_filename}")
|
||||||
|
await note.add_to_daily_note(relative_path)
|
||||||
|
return markdown_filename
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
err(f"Failed to write markdown file")
|
||||||
|
raise HTTPException(status_code=500, detail=str(e))
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
err(f"Failed to clip {url}: {str(e)}")
|
||||||
|
raise HTTPException(status_code=500, detail=str(e))
|
||||||
|
'''
|
|
@ -17,24 +17,28 @@ from fastapi import HTTPException, status
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from fastapi import APIRouter, Query, HTTPException
|
from fastapi import APIRouter, Query, HTTPException
|
||||||
from sijapi import API, L, OBSIDIAN_VAULT_DIR, OBSIDIAN_RESOURCES_DIR, OBSIDIAN_BANNER_SCENE, DEFAULT_11L_VOICE, DEFAULT_VOICE, GEO
|
from sijapi import API, L, OBSIDIAN_VAULT_DIR, OBSIDIAN_RESOURCES_DIR, OBSIDIAN_BANNER_SCENE, DEFAULT_11L_VOICE, DEFAULT_VOICE, GEO
|
||||||
from sijapi.routers import cal, img, loc, tts, llm, time, weather, asr
|
from sijapi.routers import asr, cal, gis, img, llm, serve, time, tts, weather
|
||||||
from sijapi.utilities import assemble_journal_path, assemble_archive_path, convert_to_12_hour_format, sanitize_filename, convert_degrees_to_cardinal, check_file_name, HOURLY_COLUMNS_MAPPING
|
from sijapi.utilities import assemble_journal_path, convert_to_12_hour_format, sanitize_filename, convert_degrees_to_cardinal, check_file_name, HOURLY_COLUMNS_MAPPING
|
||||||
from sijapi.classes import Location
|
from sijapi.classes import Location
|
||||||
|
|
||||||
|
|
||||||
note = APIRouter()
|
note = APIRouter()
|
||||||
logger = L.get_module_logger("note")
|
logger = L.get_module_logger("note")
|
||||||
|
def debug(text: str): logger.debug(text)
|
||||||
|
def info(text: str): logger.info(text)
|
||||||
|
def warn(text: str): logger.warning(text)
|
||||||
|
def err(text: str): logger.error(text)
|
||||||
|
def crit(text: str): logger.critical(text)
|
||||||
|
|
||||||
@note.post("/note/add")
|
@note.post("/note/add")
|
||||||
async def note_add_endpoint(file: Optional[UploadFile] = File(None), text: Optional[str] = Form(None), source: Optional[str] = Form(None), bg_tasks: BackgroundTasks = None):
|
async def note_add_endpoint(file: Optional[UploadFile] = File(None), text: Optional[str] = Form(None), source: Optional[str] = Form(None), bg_tasks: BackgroundTasks = None):
|
||||||
logger.debug(f"Received request on /note/add...")
|
debug(f"Received request on /note/add...")
|
||||||
if not file and not text:
|
if not file and not text:
|
||||||
logger.warning(f"... without any file or text!")
|
warn(f"... without any file or text!")
|
||||||
raise HTTPException(status_code=400, detail="Either text or a file must be provided")
|
raise HTTPException(status_code=400, detail="Either text or a file must be provided")
|
||||||
else:
|
else:
|
||||||
result = await process_for_daily_note(file, text, source, bg_tasks)
|
result = await process_for_daily_note(file, text, source, bg_tasks)
|
||||||
logger.info(f"Result on /note/add: {result}")
|
info(f"Result on /note/add: {result}")
|
||||||
return JSONResponse({"message": "Note added successfully", "entry": result}, status_code=201)
|
return JSONResponse({"message": "Note added successfully", "entry": result}, status_code=201)
|
||||||
|
|
||||||
|
|
||||||
|
@ -44,7 +48,7 @@ async def process_for_daily_note(file: Optional[UploadFile] = File(None), text:
|
||||||
transcription_entry = ""
|
transcription_entry = ""
|
||||||
file_entry = ""
|
file_entry = ""
|
||||||
if file:
|
if file:
|
||||||
logger.debug("File received...")
|
debug("File received...")
|
||||||
file_content = await file.read()
|
file_content = await file.read()
|
||||||
audio_io = BytesIO(file_content)
|
audio_io = BytesIO(file_content)
|
||||||
|
|
||||||
|
@ -52,18 +56,18 @@ async def process_for_daily_note(file: Optional[UploadFile] = File(None), text:
|
||||||
guessed_type = mimetypes.guess_type(file.filename)
|
guessed_type = mimetypes.guess_type(file.filename)
|
||||||
file_type = guessed_type[0] if guessed_type[0] else "application/octet-stream"
|
file_type = guessed_type[0] if guessed_type[0] else "application/octet-stream"
|
||||||
|
|
||||||
logger.debug(f"Processing as {file_type}...")
|
debug(f"Processing as {file_type}...")
|
||||||
|
|
||||||
# Extract the main type (e.g., 'audio', 'image', 'video')
|
# Extract the main type (e.g., 'audio', 'image', 'video')
|
||||||
main_type = file_type.split('/')[0]
|
main_type = file_type.split('/')[0]
|
||||||
subdir = main_type.title() if main_type else "Documents"
|
subdir = main_type.title() if main_type else "Documents"
|
||||||
|
|
||||||
absolute_path, relative_path = assemble_journal_path(now, subdir=subdir, filename=file.filename)
|
absolute_path, relative_path = assemble_journal_path(now, subdir=subdir, filename=file.filename)
|
||||||
logger.debug(f"Destination path: {absolute_path}")
|
debug(f"Destination path: {absolute_path}")
|
||||||
|
|
||||||
with open(absolute_path, 'wb') as f:
|
with open(absolute_path, 'wb') as f:
|
||||||
f.write(file_content)
|
f.write(file_content)
|
||||||
logger.debug(f"Processing {f.name}...")
|
debug(f"Processing {f.name}...")
|
||||||
|
|
||||||
if main_type == 'audio':
|
if main_type == 'audio':
|
||||||
transcription = await asr.transcribe_audio(file_path=absolute_path, params=asr.TranscribeParams(model="small-en", language="en", threads=6))
|
transcription = await asr.transcribe_audio(file_path=absolute_path, params=asr.TranscribeParams(model="small-en", language="en", threads=6))
|
||||||
|
@ -74,7 +78,7 @@ async def process_for_daily_note(file: Optional[UploadFile] = File(None), text:
|
||||||
file_entry = f"[Source]({relative_path})"
|
file_entry = f"[Source]({relative_path})"
|
||||||
|
|
||||||
text_entry = text if text else ""
|
text_entry = text if text else ""
|
||||||
logger.debug(f"transcription: {transcription_entry}\nfile_entry: {file_entry}\ntext_entry: {text_entry}")
|
debug(f"transcription: {transcription_entry}\nfile_entry: {file_entry}\ntext_entry: {text_entry}")
|
||||||
return await add_to_daily_note(transcription_entry, file_entry, text_entry, now)
|
return await add_to_daily_note(transcription_entry, file_entry, text_entry, now)
|
||||||
|
|
||||||
|
|
||||||
|
@ -169,7 +173,7 @@ added: {timestamp}
|
||||||
obsidian_link = f"![[{OBSIDIAN_RESOURCES_DIR}/{audio_filename}{audio_ext}]]"
|
obsidian_link = f"![[{OBSIDIAN_RESOURCES_DIR}/{audio_filename}{audio_ext}]]"
|
||||||
body += f"{obsidian_link}\n\n"
|
body += f"{obsidian_link}\n\n"
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error(f"Failed in the TTS portion of clipping: {e}")
|
err(f"Failed in the TTS portion of clipping: {e}")
|
||||||
|
|
||||||
body += f"> [!summary]+\n"
|
body += f"> [!summary]+\n"
|
||||||
body += f"> {summary}\n\n"
|
body += f"> {summary}\n\n"
|
||||||
|
@ -182,12 +186,12 @@ added: {timestamp}
|
||||||
with open(markdown_filename, 'w', encoding=encoding) as md_file:
|
with open(markdown_filename, 'w', encoding=encoding) as md_file:
|
||||||
md_file.write(markdown_content)
|
md_file.write(markdown_content)
|
||||||
|
|
||||||
logger.info(f"Successfully saved to {markdown_filename}")
|
info(f"Successfully saved to {markdown_filename}")
|
||||||
|
|
||||||
return markdown_filename
|
return markdown_filename
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error(f"Failed to clip: {str(e)}")
|
err(f"Failed to clip: {str(e)}")
|
||||||
raise HTTPException(status_code=500, detail=str(e))
|
raise HTTPException(status_code=500, detail=str(e))
|
||||||
|
|
||||||
def list_and_correct_impermissible_files(root_dir, rename: bool = False):
|
def list_and_correct_impermissible_files(root_dir, rename: bool = False):
|
||||||
|
@ -198,7 +202,7 @@ def list_and_correct_impermissible_files(root_dir, rename: bool = False):
|
||||||
if check_file_name(filename):
|
if check_file_name(filename):
|
||||||
file_path = Path(dirpath) / filename
|
file_path = Path(dirpath) / filename
|
||||||
impermissible_files.append(file_path)
|
impermissible_files.append(file_path)
|
||||||
logger.debug(f"Impermissible file found: {file_path}")
|
debug(f"Impermissible file found: {file_path}")
|
||||||
|
|
||||||
# Sanitize the file name
|
# Sanitize the file name
|
||||||
new_filename = sanitize_filename(filename)
|
new_filename = sanitize_filename(filename)
|
||||||
|
@ -216,7 +220,7 @@ def list_and_correct_impermissible_files(root_dir, rename: bool = False):
|
||||||
# Rename the file
|
# Rename the file
|
||||||
if rename:
|
if rename:
|
||||||
os.rename(file_path, new_file_path)
|
os.rename(file_path, new_file_path)
|
||||||
logger.debug(f"Renamed: {file_path} -> {new_file_path}")
|
debug(f"Renamed: {file_path} -> {new_file_path}")
|
||||||
|
|
||||||
return impermissible_files
|
return impermissible_files
|
||||||
|
|
||||||
|
@ -233,7 +237,7 @@ async def build_daily_note_range_endpoint(dt_start: str, dt_end: str):
|
||||||
results = []
|
results = []
|
||||||
current_date = start_date
|
current_date = start_date
|
||||||
while current_date <= end_date:
|
while current_date <= end_date:
|
||||||
formatted_date = await loc.dt(current_date)
|
formatted_date = await gis.dt(current_date)
|
||||||
result = await build_daily_note(formatted_date)
|
result = await build_daily_note(formatted_date)
|
||||||
results.append(result)
|
results.append(result)
|
||||||
current_date += timedelta(days=1)
|
current_date += timedelta(days=1)
|
||||||
|
@ -242,6 +246,37 @@ async def build_daily_note_range_endpoint(dt_start: str, dt_end: str):
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
@note.get("/note/create")
|
||||||
|
async def build_daily_note_getpoint():
|
||||||
|
try:
|
||||||
|
loc = await gis.get_last_location()
|
||||||
|
if not loc:
|
||||||
|
raise ValueError("Unable to retrieve last location")
|
||||||
|
|
||||||
|
tz = await GEO.tz_current(loc)
|
||||||
|
if not tz:
|
||||||
|
raise ValueError(f"Unable to determine timezone for location: {loc}")
|
||||||
|
|
||||||
|
date_time = dt_datetime.now(tz)
|
||||||
|
path = await build_daily_note(date_time, loc.latitude, loc.longitude)
|
||||||
|
path_str = str(path)
|
||||||
|
|
||||||
|
info(f"Successfully created daily note at {path_str}")
|
||||||
|
return JSONResponse(content={"path": path_str}, status_code=200)
|
||||||
|
|
||||||
|
except ValueError as ve:
|
||||||
|
error_msg = f"Value Error in build_daily_note_getpoint: {str(ve)}"
|
||||||
|
err(error_msg)
|
||||||
|
raise HTTPException(status_code=400, detail=error_msg)
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
error_msg = f"Unexpected error in build_daily_note_getpoint: {str(e)}"
|
||||||
|
err(error_msg)
|
||||||
|
err(f"Traceback: {traceback.format_exc()}")
|
||||||
|
raise HTTPException(status_code=500, detail="An unexpected error occurred")
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
@note.post("/note/create")
|
@note.post("/note/create")
|
||||||
async def build_daily_note_endpoint(
|
async def build_daily_note_endpoint(
|
||||||
date_str: Optional[str] = Form(dt_datetime.now().strftime("%Y-%m-%d")),
|
date_str: Optional[str] = Form(dt_datetime.now().strftime("%Y-%m-%d")),
|
||||||
|
@ -258,10 +293,10 @@ async def build_daily_note_endpoint(
|
||||||
else:
|
else:
|
||||||
raise ValueError("Location is not provided or invalid.")
|
raise ValueError("Location is not provided or invalid.")
|
||||||
except (ValueError, AttributeError, TypeError) as e:
|
except (ValueError, AttributeError, TypeError) as e:
|
||||||
logger.warning(f"Falling back to localized datetime due to error: {e}")
|
warn(f"Falling back to localized datetime due to error: {e}")
|
||||||
try:
|
try:
|
||||||
date_time = await loc.dt(date_str)
|
date_time = await gis.dt(date_str)
|
||||||
places = await loc.fetch_locations(date_time)
|
places = await gis.fetch_locations(date_time)
|
||||||
lat, lon = places[0].latitude, places[0].longitude
|
lat, lon = places[0].latitude, places[0].longitude
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
return JSONResponse(content={"error": str(e)}, status_code=400)
|
return JSONResponse(content={"error": str(e)}, status_code=400)
|
||||||
|
@ -278,14 +313,14 @@ async def build_daily_note(date_time: dt_datetime, lat: float = None, lon: float
|
||||||
Obsidian helper. Takes a datetime and creates a new daily note. Note: it uses the sijapi configuration file to place the daily note and does NOT presently interface with Obsidian's daily note or periodic notes extensions. It is your responsibility to ensure they match.
|
Obsidian helper. Takes a datetime and creates a new daily note. Note: it uses the sijapi configuration file to place the daily note and does NOT presently interface with Obsidian's daily note or periodic notes extensions. It is your responsibility to ensure they match.
|
||||||
'''
|
'''
|
||||||
absolute_path, _ = assemble_journal_path(date_time)
|
absolute_path, _ = assemble_journal_path(date_time)
|
||||||
logger.warning(f"Using {date_time.strftime('%Y-%m-%d %H:%M:%S')} as our datetime in build_daily_note.")
|
warn(f"Using {date_time.strftime('%Y-%m-%d %H:%M:%S')} as our datetime in build_daily_note.")
|
||||||
formatted_day = date_time.strftime("%A %B %d, %Y") # Monday May 27, 2024 formatting
|
formatted_day = date_time.strftime("%A %B %d, %Y") # Monday May 27, 2024 formatting
|
||||||
day_before = (date_time - timedelta(days=1)).strftime("%Y-%m-%d %A") # 2024-05-26 Sunday formatting
|
day_before = (date_time - timedelta(days=1)).strftime("%Y-%m-%d %A") # 2024-05-26 Sunday formatting
|
||||||
day_after = (date_time + timedelta(days=1)).strftime("%Y-%m-%d %A") # 2024-05-28 Tuesday formatting
|
day_after = (date_time + timedelta(days=1)).strftime("%Y-%m-%d %A") # 2024-05-28 Tuesday formatting
|
||||||
header = f"# [[{day_before}|← ]] {formatted_day} [[{day_after}| →]]\n\n"
|
header = f"# [[{day_before}|← ]] {formatted_day} [[{day_after}| →]]\n\n"
|
||||||
|
|
||||||
if not lat or not lon:
|
if not lat or not lon:
|
||||||
places = await loc.fetch_locations(date_time)
|
places = await gis.fetch_locations(date_time)
|
||||||
lat, lon = places[0].latitude, places[0].longitude
|
lat, lon = places[0].latitude, places[0].longitude
|
||||||
|
|
||||||
location = await GEO.code((lat, lon))
|
location = await GEO.code((lat, lon))
|
||||||
|
@ -308,6 +343,10 @@ Obsidian helper. Takes a datetime and creates a new daily note. Note: it uses th
|
||||||
_, note_path = assemble_journal_path(date_time, filename="Notes", extension=".md", no_timestamp = True)
|
_, note_path = assemble_journal_path(date_time, filename="Notes", extension=".md", no_timestamp = True)
|
||||||
note_embed = f"![[{note_path}]]"
|
note_embed = f"![[{note_path}]]"
|
||||||
|
|
||||||
|
_, map_path = assemble_journal_path(date_time, filename="Map", extension=".png", no_timestamp = True)
|
||||||
|
map = await gis.generate_and_save_heatmap(date_time, output_path=map_path)
|
||||||
|
map_embed = f"![[{map_path}]]"
|
||||||
|
|
||||||
_, banner_path = assemble_journal_path(date_time, filename="Banner", extension=".jpg", no_timestamp = True)
|
_, banner_path = assemble_journal_path(date_time, filename="Banner", extension=".jpg", no_timestamp = True)
|
||||||
|
|
||||||
body = f"""---
|
body = f"""---
|
||||||
|
@ -320,6 +359,7 @@ created: "{dt_datetime.now().strftime("%Y-%m-%d %H:%M:%S")}"
|
||||||
|
|
||||||
{header}
|
{header}
|
||||||
{weather_embed}
|
{weather_embed}
|
||||||
|
{map_path}
|
||||||
|
|
||||||
## Events
|
## Events
|
||||||
{event_embed}
|
{event_embed}
|
||||||
|
@ -369,7 +409,7 @@ async def update_frontmatter(date_time: dt_datetime, key: str, value: str):
|
||||||
|
|
||||||
# Check if the file exists
|
# Check if the file exists
|
||||||
if not file_path.exists():
|
if not file_path.exists():
|
||||||
logger.critical(f"Markdown file not found at {file_path}")
|
crit(f"Markdown file not found at {file_path}")
|
||||||
raise HTTPException(status_code=404, detail="Markdown file not found.")
|
raise HTTPException(status_code=404, detail="Markdown file not found.")
|
||||||
|
|
||||||
# Read the file
|
# Read the file
|
||||||
|
@ -416,32 +456,29 @@ async def banner_endpoint(dt: str, location: str = None, forecast: str = None, m
|
||||||
'''
|
'''
|
||||||
Endpoint (POST) that generates a new banner image for the Obsidian daily note for a specified date, taking into account optional additional information, then updates the frontmatter if necessary.
|
Endpoint (POST) that generates a new banner image for the Obsidian daily note for a specified date, taking into account optional additional information, then updates the frontmatter if necessary.
|
||||||
'''
|
'''
|
||||||
logger.debug(f"banner_endpoint requested with date: {dt} ({type(dt)})")
|
debug(f"banner_endpoint requested with date: {dt} ({type(dt)})")
|
||||||
date_time = await loc.dt(dt)
|
date_time = await gis.dt(dt)
|
||||||
logger.debug(f"date_time after localization: {date_time} ({type(date_time)})")
|
debug(f"date_time after localization: {date_time} ({type(date_time)})")
|
||||||
context = await generate_context(dt, location, forecast, mood, other_context)
|
context = await generate_context(dt, location, forecast, mood, other_context)
|
||||||
jpg_path = await generate_banner(date_time, location, mood=mood, other_context=other_context)
|
jpg_path = await generate_banner(date_time, location, mood=mood, other_context=other_context)
|
||||||
return jpg_path
|
return jpg_path
|
||||||
|
|
||||||
|
|
||||||
async def generate_banner(dt, location: Location = None, forecast: str = None, mood: str = None, other_context: str = None):
|
async def generate_banner(dt, location: Location = None, forecast: str = None, mood: str = None, other_context: str = None):
|
||||||
# logger.debug(f"Location: {location}, forecast: {forecast}, mood: {mood}, other_context: {other_context}")
|
date_time = await gis.dt(dt)
|
||||||
date_time = await loc.dt(dt)
|
|
||||||
logger.debug(f"generate_banner called with date_time: {date_time}")
|
|
||||||
destination_path, local_path = assemble_journal_path(date_time, filename="Banner", extension=".jpg", no_timestamp = True)
|
destination_path, local_path = assemble_journal_path(date_time, filename="Banner", extension=".jpg", no_timestamp = True)
|
||||||
logger.debug(f"destination path generated: {destination_path}")
|
|
||||||
if not location or not isinstance(location, Location):
|
if not location or not isinstance(location, Location):
|
||||||
locations = await loc.fetch_locations(date_time)
|
locations = await gis.fetch_locations(date_time)
|
||||||
if locations:
|
if locations:
|
||||||
location = locations[0]
|
location = locations[0]
|
||||||
if not forecast:
|
if not forecast:
|
||||||
forecast = await update_dn_weather(date_time, False, location.latitude, location.longitude)
|
forecast = await update_dn_weather(date_time, False, location.latitude, location.longitude)
|
||||||
|
|
||||||
prompt = await generate_context(date_time, location, forecast, mood, other_context)
|
prompt = await generate_context(date_time, location, forecast, mood, other_context)
|
||||||
logger.debug(f"Prompt: {prompt}")
|
debug(f"Prompt: {prompt}")
|
||||||
final_path = await img.workflow(prompt, scene=OBSIDIAN_BANNER_SCENE, destination_path=destination_path)
|
final_path = await img.workflow(prompt, scene=OBSIDIAN_BANNER_SCENE, destination_path=destination_path)
|
||||||
if not str(local_path) in str(final_path):
|
if not str(local_path) in str(final_path):
|
||||||
logger.info(f"Apparent mismatch between local path, {local_path}, and final_path, {final_path}")
|
info(f"Apparent mismatch between local path, {local_path}, and final_path, {final_path}")
|
||||||
jpg_embed = f"\"![[{local_path}]]\""
|
jpg_embed = f"\"![[{local_path}]]\""
|
||||||
await update_frontmatter(date_time, "banner", jpg_embed)
|
await update_frontmatter(date_time, "banner", jpg_embed)
|
||||||
return local_path
|
return local_path
|
||||||
|
@ -469,7 +506,7 @@ async def generate_context(date_time, location: Location, forecast: str, mood: s
|
||||||
if geocoded_location.display_name or geocoded_location.city or geocoded_location.country:
|
if geocoded_location.display_name or geocoded_location.city or geocoded_location.country:
|
||||||
return await generate_context(date_time, geocoded_location, forecast, mood, other_context)
|
return await generate_context(date_time, geocoded_location, forecast, mood, other_context)
|
||||||
else:
|
else:
|
||||||
logger.warning(f"Failed to get a useable location for purposes of generating a banner, but we'll generate one anyway.")
|
warn(f"Failed to get a useable location for purposes of generating a banner, but we'll generate one anyway.")
|
||||||
elif location and isinstance(location, str):
|
elif location and isinstance(location, str):
|
||||||
display_name = f"Location: {location}\n"
|
display_name = f"Location: {location}\n"
|
||||||
else:
|
else:
|
||||||
|
@ -507,7 +544,7 @@ async def generate_context(date_time, location: Location, forecast: str, mood: s
|
||||||
|
|
||||||
|
|
||||||
async def get_note(date_time: dt_datetime):
|
async def get_note(date_time: dt_datetime):
|
||||||
date_time = await loc.dt(date_time);
|
date_time = await gis.dt(date_time);
|
||||||
absolute_path, local_path = assemble_journal_path(date_time, filename = "Notes", extension = ".md", no_timestamp = True)
|
absolute_path, local_path = assemble_journal_path(date_time, filename = "Notes", extension = ".md", no_timestamp = True)
|
||||||
|
|
||||||
if absolute_path.is_file():
|
if absolute_path.is_file():
|
||||||
|
@ -536,9 +573,9 @@ async def note_weather_get(
|
||||||
):
|
):
|
||||||
force_refresh_weather = refresh == "True"
|
force_refresh_weather = refresh == "True"
|
||||||
try:
|
try:
|
||||||
date_time = dt_datetime.now() if date == "0" else await loc.dt(date)
|
date_time = dt_datetime.now() if date == "0" else await gis.dt(date)
|
||||||
logger.warning(f"Using {date_time.strftime('%Y-%m-%d %H:%M:%S')} as our dt_datetime in note_weather_get.")
|
warn(f"Using {date_time.strftime('%Y-%m-%d %H:%M:%S')} as our dt_datetime in note_weather_get.")
|
||||||
logger.debug(f"date: {date} .. date_time: {date_time}")
|
debug(f"date: {date} .. date_time: {date_time}")
|
||||||
content = await update_dn_weather(date_time, force_refresh_weather) #, lat, lon)
|
content = await update_dn_weather(date_time, force_refresh_weather) #, lat, lon)
|
||||||
return JSONResponse(content={"forecast": content}, status_code=200)
|
return JSONResponse(content={"forecast": content}, status_code=200)
|
||||||
|
|
||||||
|
@ -546,14 +583,14 @@ async def note_weather_get(
|
||||||
return JSONResponse(content={"detail": str(e.detail)}, status_code=e.status_code)
|
return JSONResponse(content={"detail": str(e.detail)}, status_code=e.status_code)
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error(f"Error in note_weather_get: {str(e)}")
|
err(f"Error in note_weather_get: {str(e)}")
|
||||||
raise HTTPException(status_code=500, detail=f"An error occurred: {str(e)}")
|
raise HTTPException(status_code=500, detail=f"An error occurred: {str(e)}")
|
||||||
|
|
||||||
|
|
||||||
@note.post("/update/note/{date}")
|
@note.post("/update/note/{date}")
|
||||||
async def post_update_daily_weather_and_calendar_and_timeslips(date: str, refresh: str="False") -> PlainTextResponse:
|
async def post_update_daily_weather_and_calendar_and_timeslips(date: str, refresh: str="False") -> PlainTextResponse:
|
||||||
date_time = await loc.dt(date)
|
date_time = await gis.dt(date)
|
||||||
logger.warning(f"Using {date_time.strftime('%Y-%m-%d %H:%M:%S')} as our dt_datetime in post_update_daily_weather_and_calendar_and_timeslips.")
|
warn(f"Using {date_time.strftime('%Y-%m-%d %H:%M:%S')} as our dt_datetime in post_update_daily_weather_and_calendar_and_timeslips.")
|
||||||
force_refresh_weather = refresh == "True"
|
force_refresh_weather = refresh == "True"
|
||||||
await update_dn_weather(date_time, force_refresh_weather)
|
await update_dn_weather(date_time, force_refresh_weather)
|
||||||
await update_daily_note_events(date_time)
|
await update_daily_note_events(date_time)
|
||||||
|
@ -561,52 +598,52 @@ async def post_update_daily_weather_and_calendar_and_timeslips(date: str, refres
|
||||||
return f"[Refresh]({API.URL}/update/note/{date_time.strftime('%Y-%m-%d')}"
|
return f"[Refresh]({API.URL}/update/note/{date_time.strftime('%Y-%m-%d')}"
|
||||||
|
|
||||||
async def update_dn_weather(date_time: dt_datetime, force_refresh: bool = False, lat: float = None, lon: float = None):
|
async def update_dn_weather(date_time: dt_datetime, force_refresh: bool = False, lat: float = None, lon: float = None):
|
||||||
logger.warning(f"Using {date_time.strftime('%Y-%m-%d %H:%M:%S')} as our datetime in update_dn_weather.")
|
warn(f"Using {date_time.strftime('%Y-%m-%d %H:%M:%S')} as our datetime in update_dn_weather.")
|
||||||
try:
|
try:
|
||||||
if lat and lon:
|
if lat and lon:
|
||||||
place = await GEO.code((lat, lon))
|
place = await GEO.code((lat, lon))
|
||||||
|
|
||||||
else:
|
else:
|
||||||
logger.debug(f"Updating weather for {date_time}")
|
debug(f"Updating weather for {date_time}")
|
||||||
places = await loc.fetch_locations(date_time)
|
places = await gis.fetch_locations(date_time)
|
||||||
place = places[0]
|
place = places[0]
|
||||||
lat = place.latitude
|
lat = place.latitude
|
||||||
lon = place.longitude
|
lon = place.longitude
|
||||||
|
|
||||||
logger.debug(f"lat: {lat}, lon: {lon}, place: {place}")
|
debug(f"lat: {lat}, lon: {lon}, place: {place}")
|
||||||
city = GEO.find_override_location(lat, lon)
|
city = GEO.find_override_location(lat, lon)
|
||||||
if city:
|
if city:
|
||||||
logger.info(f"Using override location: {city}")
|
info(f"Using override location: {city}")
|
||||||
|
|
||||||
else:
|
else:
|
||||||
if place.city and place.city != "":
|
if place.city and place.city != "":
|
||||||
city = place.city
|
city = place.city
|
||||||
logger.info(f"City in data: {city}")
|
info(f"City in data: {city}")
|
||||||
|
|
||||||
else:
|
else:
|
||||||
location = await GEO.code((lat, lon))
|
location = await GEO.code((lat, lon))
|
||||||
logger.debug(f"location: {location}")
|
debug(f"location: {location}")
|
||||||
city = location.name
|
city = location.name
|
||||||
city = city if city else location.city
|
city = city if city else location.city
|
||||||
city = city if city else location.house_number + ' ' + location.road
|
city = city if city else location.house_number + ' ' + location.road
|
||||||
|
|
||||||
logger.debug(f"City geocoded: {city}")
|
debug(f"City geocoded: {city}")
|
||||||
|
|
||||||
# Assemble journal path
|
# Assemble journal path
|
||||||
absolute_path, relative_path = assemble_journal_path(date_time, filename="Weather", extension=".md", no_timestamp = True)
|
absolute_path, relative_path = assemble_journal_path(date_time, filename="Weather", extension=".md", no_timestamp = True)
|
||||||
logger.debug(f"Journal path: absolute_path={absolute_path}, relative_path={relative_path}")
|
debug(f"Journal path: absolute_path={absolute_path}, relative_path={relative_path}")
|
||||||
|
|
||||||
try:
|
try:
|
||||||
logger.debug(f"passing date_time {date_time.strftime('%Y-%m-%d %H:%M:%S')}, {lat}/{lon} into get_weather")
|
debug(f"passing date_time {date_time.strftime('%Y-%m-%d %H:%M:%S')}, {lat}/{lon} into get_weather")
|
||||||
day = await weather.get_weather(date_time, lat, lon, force_refresh)
|
day = await weather.get_weather(date_time, lat, lon, force_refresh)
|
||||||
logger.debug(f"day information obtained from get_weather: {day}")
|
debug(f"day information obtained from get_weather: {day}")
|
||||||
if day:
|
if day:
|
||||||
DailyWeather = day.get('DailyWeather')
|
DailyWeather = day.get('DailyWeather')
|
||||||
HourlyWeather = day.get('HourlyWeather')
|
HourlyWeather = day.get('HourlyWeather')
|
||||||
if DailyWeather:
|
if DailyWeather:
|
||||||
# logger.debug(f"Day: {DailyWeather}")
|
# debug(f"Day: {DailyWeather}")
|
||||||
icon = DailyWeather.get('icon')
|
icon = DailyWeather.get('icon')
|
||||||
logger.debug(f"Icon: {icon}")
|
debug(f"Icon: {icon}")
|
||||||
|
|
||||||
weather_icon, admonition = get_icon_and_admonition(icon) if icon else (":LiSunMoon:", "ad-weather")
|
weather_icon, admonition = get_icon_and_admonition(icon) if icon else (":LiSunMoon:", "ad-weather")
|
||||||
|
|
||||||
|
@ -675,38 +712,38 @@ async def update_dn_weather(date_time: dt_datetime, force_refresh: bool = False,
|
||||||
detailed_forecast += assemble_hourly_data_table(times, condition_symbols, temps, winds)
|
detailed_forecast += assemble_hourly_data_table(times, condition_symbols, temps, winds)
|
||||||
detailed_forecast += f"```\n\n"
|
detailed_forecast += f"```\n\n"
|
||||||
|
|
||||||
logger.debug(f"Detailed forecast: {detailed_forecast}.")
|
debug(f"Detailed forecast: {detailed_forecast}.")
|
||||||
|
|
||||||
with open(absolute_path, 'w', encoding='utf-8') as note_file:
|
with open(absolute_path, 'w', encoding='utf-8') as note_file:
|
||||||
note_file.write(detailed_forecast)
|
note_file.write(detailed_forecast)
|
||||||
|
|
||||||
logger.debug(f"Operation complete.")
|
debug(f"Operation complete.")
|
||||||
|
|
||||||
return narrative
|
return narrative
|
||||||
else:
|
else:
|
||||||
logger.error(f"Failed to get DailyWeather from day: {day}")
|
err(f"Failed to get DailyWeather from day: {day}")
|
||||||
else:
|
else:
|
||||||
logger.error(f"Failed to get day")
|
err(f"Failed to get day")
|
||||||
raise HTTPException(status_code=500, detail="Failed to retrieve weather data")
|
raise HTTPException(status_code=500, detail="Failed to retrieve weather data")
|
||||||
|
|
||||||
except HTTPException as e:
|
except HTTPException as e:
|
||||||
logger.error(f"HTTP error: {e}")
|
err(f"HTTP error: {e}")
|
||||||
logger.error(traceback.format_exc())
|
err(traceback.format_exc())
|
||||||
raise e
|
raise e
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error(f"Error: {e}")
|
err(f"Error: {e}")
|
||||||
logger.error(traceback.format_exc())
|
err(traceback.format_exc())
|
||||||
raise HTTPException(status_code=999, detail=f"Error: {e}")
|
raise HTTPException(status_code=999, detail=f"Error: {e}")
|
||||||
|
|
||||||
except ValueError as ve:
|
except ValueError as ve:
|
||||||
logger.error(f"Value error in update_dn_weather: {str(ve)}")
|
err(f"Value error in update_dn_weather: {str(ve)}")
|
||||||
logger.error(traceback.format_exc())
|
err(traceback.format_exc())
|
||||||
raise HTTPException(status_code=400, detail=f"Value error: {str(ve)}")
|
raise HTTPException(status_code=400, detail=f"Value error: {str(ve)}")
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error(f"Error in update_dn_weather: {str(e)}")
|
err(f"Error in update_dn_weather: {str(e)}")
|
||||||
logger.error(traceback.format_exc())
|
err(traceback.format_exc())
|
||||||
raise HTTPException(status_code=500, detail=f"Error in update_dn_weather: {str(e)}")
|
raise HTTPException(status_code=500, detail=f"Error in update_dn_weather: {str(e)}")
|
||||||
|
|
||||||
def format_hourly_time(hour):
|
def format_hourly_time(hour):
|
||||||
|
@ -714,8 +751,8 @@ def format_hourly_time(hour):
|
||||||
hour_12 = convert_to_12_hour_format(hour.get("datetime"))
|
hour_12 = convert_to_12_hour_format(hour.get("datetime"))
|
||||||
return hour_12
|
return hour_12
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error(f"Error in format_hourly_time: {str(e)}")
|
err(f"Error in format_hourly_time: {str(e)}")
|
||||||
logger.error(traceback.format_exc())
|
err(traceback.format_exc())
|
||||||
return ""
|
return ""
|
||||||
|
|
||||||
def format_hourly_icon(hour, sunrise, sunset):
|
def format_hourly_icon(hour, sunrise, sunset):
|
||||||
|
@ -725,7 +762,7 @@ def format_hourly_icon(hour, sunrise, sunset):
|
||||||
|
|
||||||
precip = hour.get('precip', float(0.0))
|
precip = hour.get('precip', float(0.0))
|
||||||
precip_prob = hour.get('precipprob', float(0.0))
|
precip_prob = hour.get('precipprob', float(0.0))
|
||||||
logger.debug(f"precip: {precip}, prob: {precip_prob}")
|
debug(f"precip: {precip}, prob: {precip_prob}")
|
||||||
|
|
||||||
sp_str = None
|
sp_str = None
|
||||||
|
|
||||||
|
@ -749,8 +786,8 @@ def format_hourly_icon(hour, sunrise, sunset):
|
||||||
return formatted
|
return formatted
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error(f"Error in format_hourly_special: {str(e)}")
|
err(f"Error in format_hourly_special: {str(e)}")
|
||||||
logger.error(traceback.format_exc())
|
err(traceback.format_exc())
|
||||||
return ""
|
return ""
|
||||||
|
|
||||||
def format_hourly_temperature(hour):
|
def format_hourly_temperature(hour):
|
||||||
|
@ -758,8 +795,8 @@ def format_hourly_temperature(hour):
|
||||||
temp_str = f"{hour.get('temp', '')}˚ F"
|
temp_str = f"{hour.get('temp', '')}˚ F"
|
||||||
return temp_str
|
return temp_str
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error(f"Error in format_hourly_temperature: {str(e)}")
|
err(f"Error in format_hourly_temperature: {str(e)}")
|
||||||
logger.error(traceback.format_exc())
|
err(traceback.format_exc())
|
||||||
return ""
|
return ""
|
||||||
|
|
||||||
def format_hourly_wind(hour):
|
def format_hourly_wind(hour):
|
||||||
|
@ -769,8 +806,8 @@ def format_hourly_wind(hour):
|
||||||
wind_str = f"{str(windspeed)}:LiWind: {winddir}"
|
wind_str = f"{str(windspeed)}:LiWind: {winddir}"
|
||||||
return wind_str
|
return wind_str
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error(f"Error in format_hourly_wind: {str(e)}")
|
err(f"Error in format_hourly_wind: {str(e)}")
|
||||||
logger.error(traceback.format_exc())
|
err(traceback.format_exc())
|
||||||
return ""
|
return ""
|
||||||
|
|
||||||
def assemble_hourly_data_table(times, condition_symbols, temps, winds):
|
def assemble_hourly_data_table(times, condition_symbols, temps, winds):
|
||||||
|
@ -783,7 +820,7 @@ def assemble_hourly_data_table(times, condition_symbols, temps, winds):
|
||||||
|
|
||||||
|
|
||||||
def get_icon_and_admonition(icon_str) -> Tuple:
|
def get_icon_and_admonition(icon_str) -> Tuple:
|
||||||
logger.debug(f"Received request for emoji {icon_str}")
|
debug(f"Received request for emoji {icon_str}")
|
||||||
if icon_str.startswith(":") and icon_str.endswith(":"):
|
if icon_str.startswith(":") and icon_str.endswith(":"):
|
||||||
return icon_str
|
return icon_str
|
||||||
|
|
||||||
|
@ -884,7 +921,7 @@ async def format_events_as_markdown(event_data: Dict[str, Union[str, List[Dict[s
|
||||||
total_events = len(event_data["events"])
|
total_events = len(event_data["events"])
|
||||||
event_markdown = f"```ad-events"
|
event_markdown = f"```ad-events"
|
||||||
for event in event_data["events"]:
|
for event in event_data["events"]:
|
||||||
logger.debug(f"event busy status: {event['busy']}; all day status: {event['all_day']}")
|
debug(f"event busy status: {event['busy']}; all day status: {event['all_day']}")
|
||||||
if not event['name'].startswith('TC '):
|
if not event['name'].startswith('TC '):
|
||||||
url = f"hook://ical/eventID={event['uid']}calendarID=17"
|
url = f"hook://ical/eventID={event['uid']}calendarID=17"
|
||||||
if event['url']:
|
if event['url']:
|
||||||
|
@ -957,23 +994,23 @@ async def format_events_as_markdown(event_data: Dict[str, Union[str, List[Dict[s
|
||||||
@note.get("/note/events", response_class=PlainTextResponse)
|
@note.get("/note/events", response_class=PlainTextResponse)
|
||||||
async def note_events_endpoint(date: str = Query(None)):
|
async def note_events_endpoint(date: str = Query(None)):
|
||||||
|
|
||||||
date_time = await loc.dt(date) if date else await loc.dt(dt_datetime.now())
|
date_time = await gis.dt(date) if date else await gis.dt(dt_datetime.now())
|
||||||
response = await update_daily_note_events(date_time)
|
response = await update_daily_note_events(date_time)
|
||||||
return PlainTextResponse(content=response, status_code=200)
|
return PlainTextResponse(content=response, status_code=200)
|
||||||
|
|
||||||
async def update_daily_note_events(date_time: dt_datetime):
|
async def update_daily_note_events(date_time: dt_datetime):
|
||||||
logger.debug(f"Looking up events on date: {date_time.strftime('%Y-%m-%d')}")
|
debug(f"Looking up events on date: {date_time.strftime('%Y-%m-%d')}")
|
||||||
try:
|
try:
|
||||||
events = await cal.get_events(date_time, date_time)
|
events = await cal.get_events(date_time, date_time)
|
||||||
logger.debug(f"Raw events: {events}")
|
debug(f"Raw events: {events}")
|
||||||
event_data = {
|
event_data = {
|
||||||
"date": date_time.strftime('%Y-%m-%d'),
|
"date": date_time.strftime('%Y-%m-%d'),
|
||||||
"events": events
|
"events": events
|
||||||
}
|
}
|
||||||
events_markdown = await format_events_as_markdown(event_data)
|
events_markdown = await format_events_as_markdown(event_data)
|
||||||
logger.debug(f"Markdown events: {events_markdown}")
|
debug(f"Markdown events: {events_markdown}")
|
||||||
absolute_path, _ = assemble_journal_path(date_time, filename="Events", extension=".md", no_timestamp = True)
|
absolute_path, _ = assemble_journal_path(date_time, filename="Events", extension=".md", no_timestamp = True)
|
||||||
logger.debug(f"Writing events to file: {absolute_path}")
|
debug(f"Writing events to file: {absolute_path}")
|
||||||
|
|
||||||
with open(absolute_path, 'w', encoding='utf-8') as note_file:
|
with open(absolute_path, 'w', encoding='utf-8') as note_file:
|
||||||
note_file.write(events_markdown)
|
note_file.write(events_markdown)
|
||||||
|
@ -981,7 +1018,7 @@ async def update_daily_note_events(date_time: dt_datetime):
|
||||||
return events_markdown
|
return events_markdown
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error(f"Error processing events: {e}")
|
err(f"Error processing events: {e}")
|
||||||
raise HTTPException(status_code=500, detail=str(e))
|
raise HTTPException(status_code=500, detail=str(e))
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -8,6 +8,11 @@ from sijapi import L
|
||||||
|
|
||||||
rag = APIRouter()
|
rag = APIRouter()
|
||||||
logger = L.get_module_logger("rag")
|
logger = L.get_module_logger("rag")
|
||||||
|
def debug(text: str): logger.debug(text)
|
||||||
|
def info(text: str): logger.info(text)
|
||||||
|
def warn(text: str): logger.warning(text)
|
||||||
|
def err(text: str): logger.error(text)
|
||||||
|
def crit(text: str): logger.critical(text)
|
||||||
|
|
||||||
rag.get("/rag/search")
|
rag.get("/rag/search")
|
||||||
async def rag_search_endpoint(query: str, scope: str):
|
async def rag_search_endpoint(query: str, scope: str):
|
||||||
|
|
177
sijapi/routers/scrape.py
Normal file
177
sijapi/routers/scrape.py
Normal file
|
@ -0,0 +1,177 @@
|
||||||
|
import asyncio
|
||||||
|
import json
|
||||||
|
import re
|
||||||
|
from fastapi import APIRouter, HTTPException
|
||||||
|
from typing import Dict, List, Any
|
||||||
|
import aiohttp
|
||||||
|
import PyPDF2
|
||||||
|
import io
|
||||||
|
from bs4 import BeautifulSoup
|
||||||
|
from selenium import webdriver
|
||||||
|
from selenium.webdriver.chrome.options import Options
|
||||||
|
from pathlib import Path
|
||||||
|
from sijapi import Scrape, L, Dir
|
||||||
|
|
||||||
|
logger = L.get_module_logger('scrape')
|
||||||
|
def debug(text: str): logger.debug(text)
|
||||||
|
def info(text: str): logger.info(text)
|
||||||
|
def warn(text: str): logger.warning(text)
|
||||||
|
def err(text: str): logger.error(text)
|
||||||
|
def crit(text: str): logger.critical(text)
|
||||||
|
|
||||||
|
scrape = APIRouter()
|
||||||
|
|
||||||
|
# Ensure Dir.DATA is a Path object
|
||||||
|
Dir.DATA = Path(Dir.DATA).expanduser()
|
||||||
|
|
||||||
|
def save_to_json(data: List[Dict], output_file: str):
|
||||||
|
output_path = Dir.DATA / output_file
|
||||||
|
info(f"Saving data to {output_path}")
|
||||||
|
output_path.parent.mkdir(parents=True, exist_ok=True)
|
||||||
|
with open(output_path, 'w') as f:
|
||||||
|
json.dump(data, f, indent=2)
|
||||||
|
info(f"Data saved successfully to {output_path}")
|
||||||
|
|
||||||
|
def load_from_json(output_file: str) -> List[Dict]:
|
||||||
|
output_path = Dir.DATA / output_file
|
||||||
|
info(f"Loading data from {output_path}")
|
||||||
|
try:
|
||||||
|
with open(output_path, 'r') as f:
|
||||||
|
return json.load(f)
|
||||||
|
except FileNotFoundError:
|
||||||
|
warn(f"File {output_path} not found")
|
||||||
|
return []
|
||||||
|
|
||||||
|
async def fetch_content(config: Any) -> str:
|
||||||
|
info(f"Fetching content from {config.url}")
|
||||||
|
if config.content.js_render:
|
||||||
|
return await fetch_with_selenium(config.url)
|
||||||
|
|
||||||
|
async with aiohttp.ClientSession() as session:
|
||||||
|
async with session.get(config.url) as response:
|
||||||
|
if config.content.type == 'pdf':
|
||||||
|
return await handle_pdf(response)
|
||||||
|
elif config.content.type in ['html', 'xml']:
|
||||||
|
return await handle_html_xml(response, config.content.selector)
|
||||||
|
elif config.content.type == 'json':
|
||||||
|
return await handle_json(response)
|
||||||
|
elif config.content.type == 'txt':
|
||||||
|
return await response.text()
|
||||||
|
else:
|
||||||
|
warn(f"Unsupported content type: {config.content.type}")
|
||||||
|
return await response.text()
|
||||||
|
|
||||||
|
async def fetch_with_selenium(url: str) -> str:
|
||||||
|
options = Options()
|
||||||
|
options.add_argument("--headless")
|
||||||
|
driver = webdriver.Chrome(options=options)
|
||||||
|
driver.get(url)
|
||||||
|
content = driver.page_source
|
||||||
|
driver.quit()
|
||||||
|
return content
|
||||||
|
|
||||||
|
async def handle_pdf(response):
|
||||||
|
pdf_content = await response.read()
|
||||||
|
pdf_file = io.BytesIO(pdf_content)
|
||||||
|
pdf_reader = PyPDF2.PdfReader(pdf_file)
|
||||||
|
return "\n".join(page.extract_text() for page in pdf_reader.pages)
|
||||||
|
|
||||||
|
async def handle_html_xml(response, selector):
|
||||||
|
content = await response.text()
|
||||||
|
soup = BeautifulSoup(content, 'html.parser')
|
||||||
|
if selector:
|
||||||
|
return soup.select_one(selector).get_text()
|
||||||
|
return soup.get_text()
|
||||||
|
|
||||||
|
async def handle_json(response):
|
||||||
|
return await response.json()
|
||||||
|
|
||||||
|
def apply_processing_step(data: Any, step: Any) -> Any:
|
||||||
|
info(f"Applying processing step: {step.type}")
|
||||||
|
if step.type == 'regex_split':
|
||||||
|
return re.split(step.pattern, data)[1:]
|
||||||
|
elif step.type == 'keyword_filter':
|
||||||
|
return [item for item in data if any(keyword.lower() in str(item).lower() for keyword in step.keywords)]
|
||||||
|
elif step.type == 'regex_extract':
|
||||||
|
if isinstance(data, list):
|
||||||
|
return [apply_regex_extract(item, step.extractions) for item in data]
|
||||||
|
return apply_regex_extract(data, step.extractions)
|
||||||
|
debug(f"Unknown processing step type: {step.type}")
|
||||||
|
return data
|
||||||
|
|
||||||
|
def apply_regex_extract(text: str, extractions: List[Any]) -> Dict:
|
||||||
|
debug(f"Applying regex extraction on text of length {len(text)}")
|
||||||
|
result = {}
|
||||||
|
for extraction in extractions:
|
||||||
|
extraction_dict = extraction.dict() if hasattr(extraction, 'dict') else extraction
|
||||||
|
flags = sum(getattr(re, flag.upper()) for flag in extraction_dict.get('flags', []))
|
||||||
|
|
||||||
|
pattern = extraction_dict['pattern']
|
||||||
|
matches = re.findall(pattern, text, flags=flags)
|
||||||
|
if matches:
|
||||||
|
if extraction_dict.get('all_matches', False):
|
||||||
|
if extraction_dict.get('group_names'):
|
||||||
|
result[extraction_dict['name']] = [dict(zip(extraction_dict['group_names'], match)) for match in matches]
|
||||||
|
else:
|
||||||
|
result[extraction_dict['name']] = matches
|
||||||
|
else:
|
||||||
|
result[extraction_dict['name']] = matches[-1].strip() # Take the last match
|
||||||
|
|
||||||
|
debug(f"Extracted {len(result)} items")
|
||||||
|
return result
|
||||||
|
|
||||||
|
def apply_post_processing(data: List[Dict], post_processing: List[Any]) -> List[Dict]:
|
||||||
|
info("Applying post-processing steps")
|
||||||
|
for step in post_processing:
|
||||||
|
if step.type == 'custom':
|
||||||
|
data = globals()[step.function](data)
|
||||||
|
return data
|
||||||
|
|
||||||
|
def data_has_changed(new_data: List[Dict], old_data: List[Dict]) -> bool:
|
||||||
|
return new_data != old_data
|
||||||
|
|
||||||
|
@scrape.get("/scrape/{config_name}")
|
||||||
|
async def scrape_site(config_name: str):
|
||||||
|
info(f"Starting scrape operation for {config_name}")
|
||||||
|
|
||||||
|
if not hasattr(Scrape, 'configurations'):
|
||||||
|
# If 'configurations' doesn't exist, assume the entire Scrape object is the configuration
|
||||||
|
config = Scrape if Scrape.name == config_name else None
|
||||||
|
else:
|
||||||
|
config = next((c for c in Scrape.configurations if c.name == config_name), None)
|
||||||
|
|
||||||
|
if not config:
|
||||||
|
raise HTTPException(status_code=404, detail=f"Configuration '{config_name}' not found")
|
||||||
|
|
||||||
|
raw_data = await fetch_content(config)
|
||||||
|
processed_data = raw_data
|
||||||
|
|
||||||
|
for step in config.processing:
|
||||||
|
processed_data = apply_processing_step(processed_data, step)
|
||||||
|
|
||||||
|
processed_data = apply_post_processing(processed_data, config.post_processing)
|
||||||
|
|
||||||
|
# Resolve Dir.DATA in the output file path
|
||||||
|
output_file = config.output_file.replace('{{ Dir.DATA }}', str(Dir.DATA))
|
||||||
|
previous_data = load_from_json(output_file)
|
||||||
|
|
||||||
|
if data_has_changed(processed_data, previous_data):
|
||||||
|
save_to_json(processed_data, output_file)
|
||||||
|
info("Scrape completed with updates")
|
||||||
|
return {"message": "Site updated", "data": processed_data}
|
||||||
|
else:
|
||||||
|
info("Scrape completed with no updates")
|
||||||
|
return {"message": "No updates", "data": processed_data}
|
||||||
|
|
||||||
|
def apply_post_processing(data: List[Dict], post_processing: List[Any]) -> List[Dict]:
|
||||||
|
info("Applying post-processing steps")
|
||||||
|
for step in post_processing:
|
||||||
|
if step.type == 'regex_extract':
|
||||||
|
for entry in data:
|
||||||
|
if step.field in entry:
|
||||||
|
matches = re.findall(step.pattern, entry[step.field])
|
||||||
|
if step.all_matches:
|
||||||
|
entry[step.output_field] = [step.format.format(*match) for match in matches]
|
||||||
|
elif matches:
|
||||||
|
entry[step.output_field] = step.format.format(*matches[0])
|
||||||
|
return data
|
|
@ -28,16 +28,22 @@ from selenium.webdriver.support.ui import WebDriverWait
|
||||||
from selenium.webdriver.support import expected_conditions as EC
|
from selenium.webdriver.support import expected_conditions as EC
|
||||||
|
|
||||||
from sijapi import (
|
from sijapi import (
|
||||||
L, LOGS_DIR, TS_ID, CASETABLE_PATH, COURTLISTENER_DOCKETS_URL, COURTLISTENER_API_KEY,
|
L, API, LOGS_DIR, TS_ID, CASETABLE_PATH, COURTLISTENER_DOCKETS_URL, COURTLISTENER_API_KEY,
|
||||||
COURTLISTENER_BASE_URL, COURTLISTENER_DOCKETS_DIR, COURTLISTENER_SEARCH_DIR, ALERTS_DIR,
|
COURTLISTENER_BASE_URL, COURTLISTENER_DOCKETS_DIR, COURTLISTENER_SEARCH_DIR, ALERTS_DIR,
|
||||||
MAC_UN, MAC_PW, MAC_ID, TS_TAILNET, DATA_DIR, IMG_DIR, PUBLIC_KEY, OBSIDIAN_VAULT_DIR
|
MAC_UN, MAC_PW, MAC_ID, TS_TAILNET, IMG_DIR, PUBLIC_KEY, OBSIDIAN_VAULT_DIR
|
||||||
)
|
)
|
||||||
|
from sijapi.classes import WidgetUpdate
|
||||||
from sijapi.utilities import bool_convert, sanitize_filename, assemble_journal_path
|
from sijapi.utilities import bool_convert, sanitize_filename, assemble_journal_path
|
||||||
from sijapi.routers import loc, note
|
from sijapi.routers import gis
|
||||||
|
|
||||||
|
|
||||||
serve = APIRouter(tags=["public"])
|
serve = APIRouter(tags=["public"])
|
||||||
|
|
||||||
logger = L.get_module_logger("serve")
|
logger = L.get_module_logger("serve")
|
||||||
|
def debug(text: str): logger.debug(text)
|
||||||
|
def info(text: str): logger.info(text)
|
||||||
|
def warn(text: str): logger.warning(text)
|
||||||
|
def err(text: str): logger.err(text)
|
||||||
|
def crit(text: str): logger.critical(text)
|
||||||
|
|
||||||
@serve.get("/pgp")
|
@serve.get("/pgp")
|
||||||
async def get_pgp():
|
async def get_pgp():
|
||||||
|
@ -51,7 +57,6 @@ def serve_image(image_name: str):
|
||||||
else:
|
else:
|
||||||
return {"error": "Image not found"}
|
return {"error": "Image not found"}
|
||||||
|
|
||||||
|
|
||||||
def construct_journal_path(date_str: str) -> Path:
|
def construct_journal_path(date_str: str) -> Path:
|
||||||
try:
|
try:
|
||||||
date_obj = datetime.strptime(date_str, '%Y-%m-%d')
|
date_obj = datetime.strptime(date_str, '%Y-%m-%d')
|
||||||
|
@ -67,19 +72,20 @@ def is_valid_date(date_str: str) -> bool:
|
||||||
except ValueError:
|
except ValueError:
|
||||||
return False
|
return False
|
||||||
|
|
||||||
|
|
||||||
@serve.get("/notes/{file_path:path}")
|
@serve.get("/notes/{file_path:path}")
|
||||||
async def get_file_endpoint(file_path: str):
|
async def get_file_endpoint(file_path: str):
|
||||||
try:
|
try:
|
||||||
date_time = await loc.dt(file_path);
|
date_time = await gis.dt(file_path);
|
||||||
absolute_path, local_path = assemble_journal_path(date_time, no_timestamp = True)
|
absolute_path, local_path = assemble_journal_path(date_time, no_timestamp = True)
|
||||||
except ValueError as e:
|
except ValueError as e:
|
||||||
logger.debug(f"Unable to parse {file_path} as a date, now trying to use it as a local path")
|
debug(f"Unable to parse {file_path} as a date, now trying to use it as a local path")
|
||||||
absolute_path = OBSIDIAN_VAULT_DIR / file_path
|
absolute_path = OBSIDIAN_VAULT_DIR / file_path
|
||||||
if not absolute_path.suffix:
|
if not absolute_path.suffix:
|
||||||
absolute_path = Path(absolute_path.with_suffix(".md"))
|
absolute_path = Path(absolute_path.with_suffix(".md"))
|
||||||
|
|
||||||
if not absolute_path.is_file():
|
if not absolute_path.is_file():
|
||||||
logger.warning(f"{absolute_path} is not a valid file it seems.")
|
warn(f"{absolute_path} is not a valid file it seems.")
|
||||||
elif absolute_path.suffix == '.md':
|
elif absolute_path.suffix == '.md':
|
||||||
try:
|
try:
|
||||||
with open(absolute_path, 'r', encoding='utf-8') as file:
|
with open(absolute_path, 'r', encoding='utf-8') as file:
|
||||||
|
@ -93,19 +99,6 @@ async def get_file_endpoint(file_path: str):
|
||||||
raise HTTPException(status_code=400, detail="Unsupported file type")
|
raise HTTPException(status_code=400, detail="Unsupported file type")
|
||||||
|
|
||||||
|
|
||||||
with open(CASETABLE_PATH, 'r') as file:
|
|
||||||
CASETABLE = json.load(file)
|
|
||||||
|
|
||||||
class WidgetUpdate(BaseModel):
|
|
||||||
text: Optional[str] = None
|
|
||||||
progress: Optional[str] = None
|
|
||||||
icon: Optional[str] = None
|
|
||||||
color: Optional[str] = None
|
|
||||||
url: Optional[str] = None
|
|
||||||
shortcut: Optional[str] = None
|
|
||||||
graph: Optional[str] = None
|
|
||||||
|
|
||||||
|
|
||||||
@serve.get("/health_check")
|
@serve.get("/health_check")
|
||||||
def hook_health():
|
def hook_health():
|
||||||
shellfish_health_check()
|
shellfish_health_check()
|
||||||
|
@ -130,50 +123,33 @@ async def hook_changedetection(webhook_data: dict):
|
||||||
if message and any(word in message.split() for word in ["SPI", "sierra", "pacific"]):
|
if message and any(word in message.split() for word in ["SPI", "sierra", "pacific"]):
|
||||||
filename = ALERTS_DIR / f"alert_{int(time.time())}.json"
|
filename = ALERTS_DIR / f"alert_{int(time.time())}.json"
|
||||||
filename.write_text(json.dumps(webhook_data, indent=4))
|
filename.write_text(json.dumps(webhook_data, indent=4))
|
||||||
|
|
||||||
notify(message)
|
notify(message)
|
||||||
|
|
||||||
return {"status": "received"}
|
return {"status": "received"}
|
||||||
|
|
||||||
|
|
||||||
@serve.post("/cl/search")
|
|
||||||
async def hook_cl_search(request: Request, bg_tasks: BackgroundTasks):
|
|
||||||
client_ip = request.client.host
|
|
||||||
logger.debug(f"Received request from IP: {client_ip}")
|
|
||||||
data = await request.json()
|
|
||||||
payload = data['payload']
|
|
||||||
results = data['payload']['results']
|
|
||||||
|
|
||||||
# Save the payload data
|
|
||||||
timestamp = datetime.now().strftime("%Y%m%d-%H%M%S")
|
|
||||||
payload_file = LOGS_DIR / f"{timestamp}-{client_ip}_search.json"
|
|
||||||
with open(payload_file, 'w') as file:
|
|
||||||
json.dump(payload, file, indent=2)
|
|
||||||
|
|
||||||
for result in results:
|
|
||||||
bg_tasks.add_task(cl_search_process_result, result)
|
|
||||||
return JSONResponse(content={"message": "Received"}, status_code=status.HTTP_200_OK)
|
|
||||||
|
|
||||||
@serve.post("/cl/docket")
|
|
||||||
async def hook_cl_docket(request: Request):
|
|
||||||
client_ip = request.client.host
|
|
||||||
logger.debug(f"Received request from IP: {client_ip}")
|
|
||||||
data = await request.json()
|
|
||||||
await cl_docket(data, client_ip)
|
|
||||||
|
|
||||||
async def notify(alert: str):
|
async def notify(alert: str):
|
||||||
|
fail = True
|
||||||
try:
|
try:
|
||||||
await notify_shellfish(alert)
|
if API.EXTENSIONS.shellfish == "on" or API.EXTENSIONS.shellfish == True:
|
||||||
|
await notify_shellfish(alert)
|
||||||
|
fail = False
|
||||||
|
|
||||||
if TS_ID == MAC_ID:
|
if API.EXTENSIONS.macnotify == "on" or API.EXTENSIONS.macnotify == True:
|
||||||
await notify_local(alert)
|
if TS_ID == MAC_ID:
|
||||||
else:
|
await notify_local(alert)
|
||||||
await notify_remote(f"{MAC_ID}.{TS_TAILNET}.net", alert, MAC_UN, MAC_PW)
|
fail = False
|
||||||
|
else:
|
||||||
except Exception as e:
|
await notify_remote(f"{MAC_ID}.{TS_TAILNET}.net", alert, MAC_UN, MAC_PW)
|
||||||
raise HTTPException(status_code=500, detail=f"Failed to send alert: {str(e)}")
|
fail = False
|
||||||
|
except:
|
||||||
|
fail = True
|
||||||
|
|
||||||
return {"message": alert}
|
if fail == False:
|
||||||
|
info(f"Delivered alert: {alert}")
|
||||||
|
return {"message": alert}
|
||||||
|
else:
|
||||||
|
crit(f"Failed to deliver alert: {alert}")
|
||||||
|
return {"message": f"Failed to deliver alert: {alert}"}
|
||||||
|
|
||||||
async def notify_local(message: str):
|
async def notify_local(message: str):
|
||||||
await asyncio.to_thread(os.system, f'osascript -e \'display notification "{message}" with title "Notification Title"\'')
|
await asyncio.to_thread(os.system, f'osascript -e \'display notification "{message}" with title "Notification Title"\'')
|
||||||
|
@ -194,228 +170,256 @@ async def notify_remote(host: str, message: str, username: str = None, password:
|
||||||
ssh.close()
|
ssh.close()
|
||||||
|
|
||||||
|
|
||||||
async def notify_shellfish(alert: str):
|
if API.EXTENSIONS.shellfish == "on" or API.EXTENSIONS.shellfish == True:
|
||||||
key = "d7e810e7601cd296a05776c169b4fe97a6a5ee1fd46abe38de54f415732b3f4b"
|
async def notify_shellfish(alert: str):
|
||||||
user = "WuqPwm1VpGijF4U5AnIKzqNMVWGioANTRjJoonPm"
|
key = "d7e810e7601cd296a05776c169b4fe97a6a5ee1fd46abe38de54f415732b3f4b"
|
||||||
iv = "ab5bbeb426015da7eedcee8bee3dffb7"
|
user = "WuqPwm1VpGijF4U5AnIKzqNMVWGioANTRjJoonPm"
|
||||||
|
iv = "ab5bbeb426015da7eedcee8bee3dffb7"
|
||||||
plain = "Secure ShellFish Notify 2.0\n" + base64.b64encode(alert.encode()).decode() + "\n"
|
|
||||||
|
plain = "Secure ShellFish Notify 2.0\n" + base64.b64encode(alert.encode()).decode() + "\n"
|
||||||
|
|
||||||
openssl_command = [
|
openssl_command = [
|
||||||
"openssl", "enc", "-aes-256-cbc", "-base64", "-K", key, "-iv", iv
|
"openssl", "enc", "-aes-256-cbc", "-base64", "-K", key, "-iv", iv
|
||||||
]
|
]
|
||||||
|
|
||||||
process = await asyncio.to_thread(subprocess.Popen, openssl_command, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
|
process = await asyncio.to_thread(subprocess.Popen, openssl_command, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
|
||||||
stdout, stderr = await asyncio.to_thread(process.communicate, plain.encode())
|
stdout, stderr = await asyncio.to_thread(process.communicate, plain.encode())
|
||||||
|
|
||||||
if process.returncode != 0:
|
if process.returncode != 0:
|
||||||
raise Exception(f"OpenSSL encryption failed: {stderr.decode()}")
|
raise Exception(f"OpenSSL encryption failed: {stderr.decode()}")
|
||||||
|
|
||||||
base64_encoded = stdout.decode().strip()
|
base64_encoded = stdout.decode().strip()
|
||||||
|
|
||||||
url = f"https://secureshellfish.app/push/?user={user}&mutable"
|
url = f"https://secureshellfish.app/push/?user={user}&mutable"
|
||||||
headers = {"Content-Type": "text/plain"}
|
headers = {"Content-Type": "text/plain"}
|
||||||
async with aiohttp.ClientSession() as session:
|
async with aiohttp.ClientSession() as session:
|
||||||
async with session.post(url, headers=headers, data=base64_encoded) as response:
|
async with session.post(url, headers=headers, data=base64_encoded) as response:
|
||||||
if response.status != 200:
|
if response.status != 200:
|
||||||
raise Exception(f"Failed to send notification: {response.status_code}")
|
raise Exception(f"Failed to send notification: {response.status_code}")
|
||||||
|
|
||||||
## SHELLFISH ##
|
def shellfish_health_check():
|
||||||
def shellfish_health_check():
|
addresses = [
|
||||||
addresses = [
|
"https://api.sij.ai/health",
|
||||||
"https://api.sij.ai/health",
|
"http://100.64.64.20:4444/health",
|
||||||
"http://100.64.64.20:4444/health",
|
"http://100.64.64.30:4444/health",
|
||||||
"http://100.64.64.30:4444/health",
|
"http://100.64.64.11:4444/health",
|
||||||
"http://100.64.64.11:4444/health",
|
"http://100.64.64.15:4444/health"
|
||||||
"http://100.64.64.15:4444/health"
|
]
|
||||||
]
|
|
||||||
|
results = []
|
||||||
results = []
|
up_count = 0
|
||||||
up_count = 0
|
for address in addresses:
|
||||||
for address in addresses:
|
try:
|
||||||
|
response = requests.get(address)
|
||||||
|
if response.status_code == 200:
|
||||||
|
results.append(f"{address} is up")
|
||||||
|
up_count += 1
|
||||||
|
else:
|
||||||
|
results.append(f"{address} returned status code {response.status_code}")
|
||||||
|
except requests.exceptions.RequestException:
|
||||||
|
results.append(f"{address} is down")
|
||||||
|
|
||||||
|
# Generate a simple text-based graph
|
||||||
|
graph = '|' * up_count + '.' * (len(addresses) - up_count)
|
||||||
|
text_update = "\n".join(results)
|
||||||
|
|
||||||
|
widget_command = ["widget", "--text", text_update, "--text", f"Graph: {graph}", "--icon", "network"]
|
||||||
|
output = shellfish_run_widget_command(widget_command)
|
||||||
|
return {"output": output, "graph": graph}
|
||||||
|
|
||||||
|
|
||||||
|
def shellfish_update_widget(update: WidgetUpdate):
|
||||||
|
widget_command = ["widget"]
|
||||||
|
|
||||||
|
if update.text:
|
||||||
|
widget_command.extend(["--text", update.text])
|
||||||
|
if update.progress:
|
||||||
|
widget_command.extend(["--progress", update.progress])
|
||||||
|
if update.icon:
|
||||||
|
widget_command.extend(["--icon", update.icon])
|
||||||
|
if update.color:
|
||||||
|
widget_command.extend(["--color", update.color])
|
||||||
|
if update.url:
|
||||||
|
widget_command.extend(["--url", update.url])
|
||||||
|
if update.shortcut:
|
||||||
|
widget_command.extend(["--shortcut", update.shortcut])
|
||||||
|
if update.graph:
|
||||||
|
widget_command.extend(["--text", update.graph])
|
||||||
|
|
||||||
|
output = shellfish_run_widget_command(widget_command)
|
||||||
|
return {"output": output}
|
||||||
|
|
||||||
|
|
||||||
|
def shellfish_run_widget_command(args: List[str]):
|
||||||
|
result = subprocess.run(args, capture_output=True, text=True, shell=True)
|
||||||
|
if result.returncode != 0:
|
||||||
|
raise HTTPException(status_code=500, detail=result.stderr)
|
||||||
|
return result.stdout
|
||||||
|
|
||||||
|
|
||||||
|
if API.EXTENSIONS.courtlistener == "on" or API.EXTENSIONS.courtlistener == True:
|
||||||
|
with open(CASETABLE_PATH, 'r') as file:
|
||||||
|
CASETABLE = json.load(file)
|
||||||
|
|
||||||
|
@serve.post("/cl/search")
|
||||||
|
async def hook_cl_search(request: Request, bg_tasks: BackgroundTasks):
|
||||||
|
client_ip = request.client.host
|
||||||
|
debug(f"Received request from IP: {client_ip}")
|
||||||
|
data = await request.json()
|
||||||
|
payload = data['payload']
|
||||||
|
results = data['payload']['results']
|
||||||
|
|
||||||
|
# Save the payload data
|
||||||
|
timestamp = datetime.now().strftime("%Y%m%d-%H%M%S")
|
||||||
|
payload_file = LOGS_DIR / f"{timestamp}-{client_ip}_search.json"
|
||||||
|
with open(payload_file, 'w') as file:
|
||||||
|
json.dump(payload, file, indent=2)
|
||||||
|
|
||||||
|
for result in results:
|
||||||
|
bg_tasks.add_task(cl_search_process_result, result)
|
||||||
|
return JSONResponse(content={"message": "Received"}, status_code=status.HTTP_200_OK)
|
||||||
|
|
||||||
|
@serve.post("/cl/docket")
|
||||||
|
async def hook_cl_docket(request: Request):
|
||||||
|
client_ip = request.client.host
|
||||||
|
debug(f"Received request from IP: {client_ip}")
|
||||||
|
data = await request.json()
|
||||||
|
await cl_docket(data, client_ip)
|
||||||
|
|
||||||
|
async def cl_docket(data, client_ip, bg_tasks: BackgroundTasks):
|
||||||
|
payload = data['payload']
|
||||||
|
results = data['payload']['results']
|
||||||
|
timestamp = datetime.now().strftime("%Y%m%d-%H%M%S")
|
||||||
|
payload_file = LOGS_DIR / f"{timestamp}-{client_ip}_docket.json"
|
||||||
|
with open(payload_file, 'w') as file:
|
||||||
|
json.dump(payload, file, indent=2)
|
||||||
|
|
||||||
|
for result in results:
|
||||||
|
bg_tasks.add_task(cl_docket_process, result)
|
||||||
|
return JSONResponse(content={"message": "Received"}, status_code=status.HTTP_200_OK)
|
||||||
|
|
||||||
|
async def cl_docket_process(result):
|
||||||
|
async with httpx.AsyncClient() as session:
|
||||||
|
await cl_docket_process_result(result, session)
|
||||||
|
|
||||||
|
async def cl_docket_process_result(result, session):
|
||||||
|
docket = str(result.get('docket'))
|
||||||
|
case_code, case_shortname = cl_case_details(docket)
|
||||||
|
date_filed = result.get('date_filed', 'No Date Filed')
|
||||||
|
|
||||||
try:
|
try:
|
||||||
response = requests.get(address)
|
date_filed_formatted = datetime.strptime(date_filed, '%Y-%m-%d').strftime('%Y%m%d')
|
||||||
if response.status_code == 200:
|
except ValueError:
|
||||||
results.append(f"{address} is up")
|
date_filed_formatted = 'NoDateFiled'
|
||||||
up_count += 1
|
|
||||||
|
# Fetching court docket information from the API
|
||||||
|
url = f"{COURTLISTENER_DOCKETS_URL}?id={docket}"
|
||||||
|
headers = {'Authorization': f'Token {COURTLISTENER_API_KEY}'}
|
||||||
|
async with aiohttp.ClientSession() as session:
|
||||||
|
async with session.get(url, headers=headers) as response:
|
||||||
|
if response.status == 200:
|
||||||
|
debug(f"Fetching CourtListener docket information for {docket}...")
|
||||||
|
data = await response.json()
|
||||||
|
court_docket = data['results'][0]['docket_number_core']
|
||||||
|
court_docket = f"{court_docket[:2]}-cv-{court_docket[2:]}" # Formatting the docket number
|
||||||
|
case_name = data['results'][0]['case_name']
|
||||||
|
debug(f"Obtained from CourtListener: docket {court_docket}, case name {case_name}.")
|
||||||
|
else:
|
||||||
|
debug("Failed to fetch data from CourtListener API.")
|
||||||
|
court_docket = 'NoCourtDocket'
|
||||||
|
case_name = 'NoCaseName'
|
||||||
|
|
||||||
|
for document in result.get('recap_documents', []):
|
||||||
|
filepath_ia = document.get('filepath_ia')
|
||||||
|
filepath_local = document.get('filepath_local')
|
||||||
|
|
||||||
|
if filepath_ia:
|
||||||
|
file_url = filepath_ia
|
||||||
|
debug(f"Found IA file at {file_url}.")
|
||||||
|
elif filepath_local:
|
||||||
|
file_url = f"{COURTLISTENER_BASE_URL}/{filepath_local}"
|
||||||
|
debug(f"Found local file at {file_url}.")
|
||||||
else:
|
else:
|
||||||
results.append(f"{address} returned status code {response.status_code}")
|
debug(f"No file URL found in filepath_ia or filepath_local for one of the documents.")
|
||||||
except requests.exceptions.RequestException:
|
continue
|
||||||
results.append(f"{address} is down")
|
|
||||||
|
document_number = document.get('document_number', 'NoDocumentNumber')
|
||||||
# Generate a simple text-based graph
|
description = document.get('description', 'NoDescription').replace(" ", "_").replace("/", "_")
|
||||||
graph = '|' * up_count + '.' * (len(addresses) - up_count)
|
description = description[:50] # Truncate description
|
||||||
text_update = "\n".join(results)
|
# case_shortname = case_name # TEMPORARY OVERRIDE
|
||||||
|
file_name = f"{case_code}_{document_number}_{date_filed_formatted}_{description}.pdf"
|
||||||
widget_command = ["widget", "--text", text_update, "--text", f"Graph: {graph}", "--icon", "network"]
|
target_path = Path(COURTLISTENER_DOCKETS_DIR) / case_shortname / "Docket" / file_name
|
||||||
output = shellfish_run_widget_command(widget_command)
|
target_path.parent.mkdir(parents=True, exist_ok=True)
|
||||||
return {"output": output, "graph": graph}
|
await cl_download_file(file_url, target_path, session)
|
||||||
|
debug(f"Downloaded {file_name} to {target_path}")
|
||||||
|
|
||||||
|
def cl_case_details(docket):
|
||||||
|
case_info = CASETABLE.get(str(docket), {"code": "000", "shortname": "UNKNOWN"})
|
||||||
|
case_code = case_info.get("code")
|
||||||
|
short_name = case_info.get("shortname")
|
||||||
|
return case_code, short_name
|
||||||
|
|
||||||
|
async def cl_download_file(url: str, path: Path, session: aiohttp.ClientSession = None):
|
||||||
|
headers = {
|
||||||
|
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/93.0.4577.82 Safari/537.36'
|
||||||
|
}
|
||||||
|
async with aiohttp.ClientSession() as session:
|
||||||
|
debug(f"Attempting to download {url} to {path}.")
|
||||||
|
try:
|
||||||
|
async with session.get(url, headers=headers, allow_redirects=True) as response:
|
||||||
|
if response.status == 403:
|
||||||
|
err(f"Access denied (403 Forbidden) for URL: {url}. Skipping download.")
|
||||||
|
return
|
||||||
|
response.raise_for_status()
|
||||||
|
|
||||||
|
# Check if the response content type is a PDF
|
||||||
|
content_type = response.headers.get('Content-Type')
|
||||||
|
if content_type != 'application/pdf':
|
||||||
|
err(f"Invalid content type: {content_type}. Skipping download.")
|
||||||
|
return
|
||||||
|
|
||||||
|
# Create an in-memory buffer to store the downloaded content
|
||||||
|
buffer = io.BytesIO()
|
||||||
|
async for chunk in response.content.iter_chunked(1024):
|
||||||
|
buffer.write(chunk)
|
||||||
|
|
||||||
|
# Reset the buffer position to the beginning
|
||||||
|
buffer.seek(0)
|
||||||
|
|
||||||
|
# Validate the downloaded PDF content
|
||||||
|
try:
|
||||||
|
PdfReader(buffer)
|
||||||
|
except Exception as e:
|
||||||
|
err(f"Invalid PDF content: {str(e)}. Skipping download.")
|
||||||
|
return
|
||||||
|
|
||||||
|
# If the PDF is valid, write the content to the file on disk
|
||||||
|
path.parent.mkdir(parents=True, exist_ok=True)
|
||||||
|
with path.open('wb') as file:
|
||||||
|
file.write(buffer.getvalue())
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
err(f"Error downloading file: {str(e)}")
|
||||||
|
|
||||||
|
|
||||||
def shellfish_update_widget(update: WidgetUpdate):
|
async def cl_search_process_result(result):
|
||||||
widget_command = ["widget"]
|
async with httpx.AsyncClient() as session:
|
||||||
|
download_url = result.get('download_url')
|
||||||
|
court_id = result.get('court_id')
|
||||||
|
case_name_short = result.get('caseNameShort')
|
||||||
|
case_name = result.get('caseName')
|
||||||
|
debug(f"Received payload for case {case_name} ({court_id}) and download url {download_url}")
|
||||||
|
|
||||||
if update.text:
|
court_folder = court_id
|
||||||
widget_command.extend(["--text", update.text])
|
|
||||||
if update.progress:
|
|
||||||
widget_command.extend(["--progress", update.progress])
|
|
||||||
if update.icon:
|
|
||||||
widget_command.extend(["--icon", update.icon])
|
|
||||||
if update.color:
|
|
||||||
widget_command.extend(["--color", update.color])
|
|
||||||
if update.url:
|
|
||||||
widget_command.extend(["--url", update.url])
|
|
||||||
if update.shortcut:
|
|
||||||
widget_command.extend(["--shortcut", update.shortcut])
|
|
||||||
if update.graph:
|
|
||||||
widget_command.extend(["--text", update.graph])
|
|
||||||
|
|
||||||
output = shellfish_run_widget_command(widget_command)
|
if case_name_short:
|
||||||
return {"output": output}
|
case_folder = case_name_short
|
||||||
|
|
||||||
|
|
||||||
def shellfish_run_widget_command(args: List[str]):
|
|
||||||
result = subprocess.run(args, capture_output=True, text=True, shell=True)
|
|
||||||
if result.returncode != 0:
|
|
||||||
raise HTTPException(status_code=500, detail=result.stderr)
|
|
||||||
return result.stdout
|
|
||||||
|
|
||||||
|
|
||||||
### COURTLISTENER FUNCTIONS ###
|
|
||||||
async def cl_docket(data, client_ip, bg_tasks: BackgroundTasks):
|
|
||||||
payload = data['payload']
|
|
||||||
results = data['payload']['results']
|
|
||||||
timestamp = datetime.now().strftime("%Y%m%d-%H%M%S")
|
|
||||||
payload_file = LOGS_DIR / f"{timestamp}-{client_ip}_docket.json"
|
|
||||||
with open(payload_file, 'w') as file:
|
|
||||||
json.dump(payload, file, indent=2)
|
|
||||||
|
|
||||||
for result in results:
|
|
||||||
bg_tasks.add_task(cl_docket_process, result)
|
|
||||||
return JSONResponse(content={"message": "Received"}, status_code=status.HTTP_200_OK)
|
|
||||||
|
|
||||||
async def cl_docket_process(result):
|
|
||||||
async with httpx.AsyncClient() as session:
|
|
||||||
await cl_docket_process_result(result, session)
|
|
||||||
|
|
||||||
async def cl_docket_process_result(result, session):
|
|
||||||
docket = str(result.get('docket'))
|
|
||||||
case_code, case_shortname = cl_case_details(docket)
|
|
||||||
date_filed = result.get('date_filed', 'No Date Filed')
|
|
||||||
|
|
||||||
try:
|
|
||||||
date_filed_formatted = datetime.strptime(date_filed, '%Y-%m-%d').strftime('%Y%m%d')
|
|
||||||
except ValueError:
|
|
||||||
date_filed_formatted = 'NoDateFiled'
|
|
||||||
|
|
||||||
# Fetching court docket information from the API
|
|
||||||
url = f"{COURTLISTENER_DOCKETS_URL}?id={docket}"
|
|
||||||
headers = {'Authorization': f'Token {COURTLISTENER_API_KEY}'}
|
|
||||||
async with aiohttp.ClientSession() as session:
|
|
||||||
async with session.get(url, headers=headers) as response:
|
|
||||||
if response.status == 200:
|
|
||||||
logger.debug(f"Fetching CourtListener docket information for {docket}...")
|
|
||||||
data = await response.json()
|
|
||||||
court_docket = data['results'][0]['docket_number_core']
|
|
||||||
court_docket = f"{court_docket[:2]}-cv-{court_docket[2:]}" # Formatting the docket number
|
|
||||||
case_name = data['results'][0]['case_name']
|
|
||||||
logger.debug(f"Obtained from CourtListener: docket {court_docket}, case name {case_name}.")
|
|
||||||
else:
|
else:
|
||||||
logger.debug("Failed to fetch data from CourtListener API.")
|
case_folder = case_name
|
||||||
court_docket = 'NoCourtDocket'
|
|
||||||
case_name = 'NoCaseName'
|
|
||||||
|
|
||||||
for document in result.get('recap_documents', []):
|
file_name = download_url.split('/')[-1]
|
||||||
filepath_ia = document.get('filepath_ia')
|
target_path = Path(COURTLISTENER_SEARCH_DIR) / court_folder / case_folder / file_name
|
||||||
filepath_local = document.get('filepath_local')
|
target_path.parent.mkdir(parents=True, exist_ok=True)
|
||||||
|
|
||||||
if filepath_ia:
|
await cl_download_file(download_url, target_path, session)
|
||||||
file_url = filepath_ia
|
debug(f"Downloaded {file_name} to {target_path}")
|
||||||
logger.debug(f"Found IA file at {file_url}.")
|
|
||||||
elif filepath_local:
|
|
||||||
file_url = f"{COURTLISTENER_BASE_URL}/{filepath_local}"
|
|
||||||
logger.debug(f"Found local file at {file_url}.")
|
|
||||||
else:
|
|
||||||
logger.debug(f"No file URL found in filepath_ia or filepath_local for one of the documents.")
|
|
||||||
continue
|
|
||||||
|
|
||||||
document_number = document.get('document_number', 'NoDocumentNumber')
|
|
||||||
description = document.get('description', 'NoDescription').replace(" ", "_").replace("/", "_")
|
|
||||||
description = description[:50] # Truncate description
|
|
||||||
# case_shortname = case_name # TEMPORARY OVERRIDE
|
|
||||||
file_name = f"{case_code}_{document_number}_{date_filed_formatted}_{description}.pdf"
|
|
||||||
target_path = Path(COURTLISTENER_DOCKETS_DIR) / case_shortname / "Docket" / file_name
|
|
||||||
target_path.parent.mkdir(parents=True, exist_ok=True)
|
|
||||||
await cl_download_file(file_url, target_path, session)
|
|
||||||
logger.debug(f"Downloaded {file_name} to {target_path}")
|
|
||||||
|
|
||||||
def cl_case_details(docket):
|
|
||||||
case_info = CASETABLE.get(str(docket), {"code": "000", "shortname": "UNKNOWN"})
|
|
||||||
case_code = case_info.get("code")
|
|
||||||
short_name = case_info.get("shortname")
|
|
||||||
return case_code, short_name
|
|
||||||
|
|
||||||
async def cl_download_file(url: str, path: Path, session: aiohttp.ClientSession = None):
|
|
||||||
headers = {
|
|
||||||
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/93.0.4577.82 Safari/537.36'
|
|
||||||
}
|
|
||||||
async with aiohttp.ClientSession() as session:
|
|
||||||
logger.debug(f"Attempting to download {url} to {path}.")
|
|
||||||
try:
|
|
||||||
async with session.get(url, headers=headers, allow_redirects=True) as response:
|
|
||||||
if response.status == 403:
|
|
||||||
logger.error(f"Access denied (403 Forbidden) for URL: {url}. Skipping download.")
|
|
||||||
return
|
|
||||||
response.raise_for_status()
|
|
||||||
|
|
||||||
# Check if the response content type is a PDF
|
|
||||||
content_type = response.headers.get('Content-Type')
|
|
||||||
if content_type != 'application/pdf':
|
|
||||||
logger.error(f"Invalid content type: {content_type}. Skipping download.")
|
|
||||||
return
|
|
||||||
|
|
||||||
# Create an in-memory buffer to store the downloaded content
|
|
||||||
buffer = io.BytesIO()
|
|
||||||
async for chunk in response.content.iter_chunked(1024):
|
|
||||||
buffer.write(chunk)
|
|
||||||
|
|
||||||
# Reset the buffer position to the beginning
|
|
||||||
buffer.seek(0)
|
|
||||||
|
|
||||||
# Validate the downloaded PDF content
|
|
||||||
try:
|
|
||||||
PdfReader(buffer)
|
|
||||||
except Exception as e:
|
|
||||||
logger.error(f"Invalid PDF content: {str(e)}. Skipping download.")
|
|
||||||
return
|
|
||||||
|
|
||||||
# If the PDF is valid, write the content to the file on disk
|
|
||||||
path.parent.mkdir(parents=True, exist_ok=True)
|
|
||||||
with path.open('wb') as file:
|
|
||||||
file.write(buffer.getvalue())
|
|
||||||
|
|
||||||
except Exception as e:
|
|
||||||
logger.error(f"Error downloading file: {str(e)}")
|
|
||||||
|
|
||||||
|
|
||||||
async def cl_search_process_result(result):
|
|
||||||
async with httpx.AsyncClient() as session:
|
|
||||||
download_url = result.get('download_url')
|
|
||||||
court_id = result.get('court_id')
|
|
||||||
case_name_short = result.get('caseNameShort')
|
|
||||||
case_name = result.get('caseName')
|
|
||||||
logger.debug(f"Received payload for case {case_name} ({court_id}) and download url {download_url}")
|
|
||||||
|
|
||||||
court_folder = court_id
|
|
||||||
|
|
||||||
if case_name_short:
|
|
||||||
case_folder = case_name_short
|
|
||||||
else:
|
|
||||||
case_folder = case_name
|
|
||||||
|
|
||||||
file_name = download_url.split('/')[-1]
|
|
||||||
target_path = Path(COURTLISTENER_SEARCH_DIR) / court_folder / case_folder / file_name
|
|
||||||
target_path.parent.mkdir(parents=True, exist_ok=True)
|
|
||||||
|
|
||||||
await cl_download_file(download_url, target_path, session)
|
|
||||||
logger.debug(f"Downloaded {file_name} to {target_path}")
|
|
||||||
|
|
|
@ -7,25 +7,27 @@ from semaphore import Bot, ChatContext
|
||||||
from sijapi import L
|
from sijapi import L
|
||||||
|
|
||||||
signal = APIRouter()
|
signal = APIRouter()
|
||||||
|
|
||||||
logger = L.get_module_logger("signal")
|
logger = L.get_module_logger("signal")
|
||||||
|
|
||||||
|
def debug(text: str): logger.debug(text)
|
||||||
|
def info(text: str): logger.info(text)
|
||||||
|
def warn(text: str): logger.warning(text)
|
||||||
|
def err(text: str): logger.error(text)
|
||||||
|
def crit(text: str): logger.critical(text)
|
||||||
|
|
||||||
async def echo(ctx: ChatContext) -> None:
|
async def echo(ctx: ChatContext) -> None:
|
||||||
if not ctx.message.empty():
|
if not ctx.message.empty():
|
||||||
await ctx.message.typing_started()
|
await ctx.message.typing_started()
|
||||||
await ctx.message.reply(ctx.message.get_body())
|
await ctx.message.reply(ctx.message.get_body())
|
||||||
await ctx.message.typing_stopped()
|
await ctx.message.typing_stopped()
|
||||||
|
|
||||||
|
|
||||||
async def main() -> None:
|
async def main() -> None:
|
||||||
"""Start the bot."""
|
"""Start the bot."""
|
||||||
# Connect the bot to number.
|
|
||||||
async with Bot(os.environ["SIGNAL_PHONE_NUMBER"]) as bot:
|
async with Bot(os.environ["SIGNAL_PHONE_NUMBER"]) as bot:
|
||||||
bot.register_handler("", echo)
|
bot.register_handler("", echo)
|
||||||
|
|
||||||
# Run the bot until you press Ctrl-C.
|
|
||||||
await bot.start()
|
await bot.start()
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
import anyio
|
import anyio
|
||||||
anyio.run(main)
|
anyio.run(main)
|
|
@ -26,11 +26,18 @@ from collections import defaultdict
|
||||||
from dotenv import load_dotenv
|
from dotenv import load_dotenv
|
||||||
from traceback import format_exc
|
from traceback import format_exc
|
||||||
from sijapi import L, TIMING_API_KEY, TIMING_API_URL
|
from sijapi import L, TIMING_API_KEY, TIMING_API_URL
|
||||||
from sijapi.routers import loc
|
from sijapi.routers import gis
|
||||||
|
|
||||||
|
|
||||||
time = APIRouter(tags=["private"])
|
time = APIRouter(tags=["private"])
|
||||||
logger = L.get_module_logger("time")
|
logger = L.get_module_logger("time")
|
||||||
|
def debug(text: str): logger.debug(text)
|
||||||
|
def info(text: str): logger.info(text)
|
||||||
|
def warn(text: str): logger.warning(text)
|
||||||
|
def err(text: str): logger.error(text)
|
||||||
|
def crit(text: str): logger.critical(text)
|
||||||
|
|
||||||
|
|
||||||
script_directory = os.path.dirname(os.path.abspath(__file__))
|
script_directory = os.path.dirname(os.path.abspath(__file__))
|
||||||
|
|
||||||
# Configuration constants
|
# Configuration constants
|
||||||
|
@ -58,17 +65,17 @@ async def post_time_entry_to_timing(entry: Dict):
|
||||||
'Accept': 'application/json',
|
'Accept': 'application/json',
|
||||||
'X-Time-Zone': 'America/Los_Angeles'
|
'X-Time-Zone': 'America/Los_Angeles'
|
||||||
}
|
}
|
||||||
logger.debug(f"Received entry: {entry}")
|
debug(f"Received entry: {entry}")
|
||||||
response = None # Initialize response
|
response = None # Initialize response
|
||||||
try:
|
try:
|
||||||
async with httpx.AsyncClient() as client:
|
async with httpx.AsyncClient() as client:
|
||||||
response = await client.post(url, headers=headers, json=entry)
|
response = await client.post(url, headers=headers, json=entry)
|
||||||
response.raise_for_status() # This will only raise for 4xx and 5xx responses
|
response.raise_for_status() # This will only raise for 4xx and 5xx responses
|
||||||
except httpx.HTTPStatusError as exc:
|
except httpx.HTTPStatusError as exc:
|
||||||
logger.debug(f"HTTPStatusError caught: Status code: {exc.response.status_code}, Detail: {exc.response.text}")
|
debug(f"HTTPStatusError caught: Status code: {exc.response.status_code}, Detail: {exc.response.text}")
|
||||||
raise HTTPException(status_code=exc.response.status_code, detail=str(exc.response.text))
|
raise HTTPException(status_code=exc.response.status_code, detail=str(exc.response.text))
|
||||||
except Exception as exc:
|
except Exception as exc:
|
||||||
logger.debug(f"General exception caught: {exc}")
|
debug(f"General exception caught: {exc}")
|
||||||
raise HTTPException(status_code=500, detail="An unexpected error occurred")
|
raise HTTPException(status_code=500, detail="An unexpected error occurred")
|
||||||
|
|
||||||
if response:
|
if response:
|
||||||
|
@ -97,8 +104,8 @@ def truncate_project_title(title):
|
||||||
|
|
||||||
|
|
||||||
async def fetch_and_prepare_timing_data(start: datetime, end: Optional[datetime] = None) -> List[Dict]:
|
async def fetch_and_prepare_timing_data(start: datetime, end: Optional[datetime] = None) -> List[Dict]:
|
||||||
# start_date = await loc.dt(start)
|
# start_date = await gis.dt(start)
|
||||||
# end_date = await loc.dt(end) if end else None
|
# end_date = await gis.dt(end) if end else None
|
||||||
# Adjust the start date to include the day before and format the end date
|
# Adjust the start date to include the day before and format the end date
|
||||||
start_date_adjusted = (start - timedelta(days=1)).strftime("%Y-%m-%dT00:00:00")
|
start_date_adjusted = (start - timedelta(days=1)).strftime("%Y-%m-%dT00:00:00")
|
||||||
end_date_formatted = f"{datetime.strftime(end, '%Y-%m-%d')}T23:59:59" if end else f"{datetime.strftime(start, '%Y-%m-%d')}T23:59:59"
|
end_date_formatted = f"{datetime.strftime(end, '%Y-%m-%d')}T23:59:59" if end else f"{datetime.strftime(start, '%Y-%m-%d')}T23:59:59"
|
||||||
|
@ -312,8 +319,8 @@ async def get_timing_markdown3(
|
||||||
):
|
):
|
||||||
|
|
||||||
# Fetch and process timing data
|
# Fetch and process timing data
|
||||||
start = await loc.dt(start_date)
|
start = await gis.dt(start_date)
|
||||||
end = await loc.dt(end_date) if end_date else None
|
end = await gis.dt(end_date) if end_date else None
|
||||||
timing_data = await fetch_and_prepare_timing_data(start, end)
|
timing_data = await fetch_and_prepare_timing_data(start, end)
|
||||||
|
|
||||||
# Retain these for processing Markdown data with the correct timezone
|
# Retain these for processing Markdown data with the correct timezone
|
||||||
|
@ -372,8 +379,8 @@ async def get_timing_markdown(
|
||||||
start: str = Query(..., regex=r"\d{4}-\d{2}-\d{2}"),
|
start: str = Query(..., regex=r"\d{4}-\d{2}-\d{2}"),
|
||||||
end: Optional[str] = Query(None, regex=r"\d{4}-\d{2}-\d{2}")
|
end: Optional[str] = Query(None, regex=r"\d{4}-\d{2}-\d{2}")
|
||||||
):
|
):
|
||||||
start_date = await loc.dt(start)
|
start_date = await gis.dt(start)
|
||||||
end_date = await loc.dt(end)
|
end_date = await gis.dt(end)
|
||||||
markdown_formatted_data = await process_timing_markdown(start_date, end_date)
|
markdown_formatted_data = await process_timing_markdown(start_date, end_date)
|
||||||
|
|
||||||
return Response(content=markdown_formatted_data, media_type="text/markdown")
|
return Response(content=markdown_formatted_data, media_type="text/markdown")
|
||||||
|
@ -441,8 +448,8 @@ async def get_timing_json(
|
||||||
):
|
):
|
||||||
|
|
||||||
# Fetch and process timing data
|
# Fetch and process timing data
|
||||||
start = await loc.dt(start_date)
|
start = await gis.dt(start_date)
|
||||||
end = await loc.dt(end_date)
|
end = await gis.dt(end_date)
|
||||||
timing_data = await fetch_and_prepare_timing_data(start, end)
|
timing_data = await fetch_and_prepare_timing_data(start, end)
|
||||||
|
|
||||||
# Convert processed data to the required JSON structure
|
# Convert processed data to the required JSON structure
|
||||||
|
|
|
@ -31,6 +31,12 @@ from sijapi.utilities import sanitize_filename
|
||||||
### INITIALIZATIONS ###
|
### INITIALIZATIONS ###
|
||||||
tts = APIRouter(tags=["trusted", "private"])
|
tts = APIRouter(tags=["trusted", "private"])
|
||||||
logger = L.get_module_logger("tts")
|
logger = L.get_module_logger("tts")
|
||||||
|
def debug(text: str): logger.debug(text)
|
||||||
|
def info(text: str): logger.info(text)
|
||||||
|
def warn(text: str): logger.warning(text)
|
||||||
|
def err(text: str): logger.error(text)
|
||||||
|
def crit(text: str): logger.critical(text)
|
||||||
|
|
||||||
DEVICE = torch.device('cpu')
|
DEVICE = torch.device('cpu')
|
||||||
MODEL_NAME = "tts_models/multilingual/multi-dataset/xtts_v2"
|
MODEL_NAME = "tts_models/multilingual/multi-dataset/xtts_v2"
|
||||||
|
|
||||||
|
@ -47,7 +53,7 @@ async def list_11l_voices():
|
||||||
async with httpx.AsyncClient() as client:
|
async with httpx.AsyncClient() as client:
|
||||||
try:
|
try:
|
||||||
response = await client.get(url, headers=headers)
|
response = await client.get(url, headers=headers)
|
||||||
logger.debug(f"Response: {response}")
|
debug(f"Response: {response}")
|
||||||
if response.status_code == 200:
|
if response.status_code == 200:
|
||||||
voices_data = response.json().get("voices", [])
|
voices_data = response.json().get("voices", [])
|
||||||
formatted_list = ""
|
formatted_list = ""
|
||||||
|
@ -57,7 +63,7 @@ async def list_11l_voices():
|
||||||
formatted_list += f"{name}: `{id}`\n"
|
formatted_list += f"{name}: `{id}`\n"
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error(f"Error determining voice ID: {str(e)}")
|
err(f"Error determining voice ID: {str(e)}")
|
||||||
|
|
||||||
return PlainTextResponse(formatted_list, status_code=200)
|
return PlainTextResponse(formatted_list, status_code=200)
|
||||||
|
|
||||||
|
@ -67,18 +73,18 @@ async def select_voice(voice_name: str) -> str:
|
||||||
try:
|
try:
|
||||||
# Case Insensitive comparison
|
# Case Insensitive comparison
|
||||||
voice_name_lower = voice_name.lower()
|
voice_name_lower = voice_name.lower()
|
||||||
logger.debug(f"Looking for {voice_name_lower}")
|
debug(f"Looking for {voice_name_lower}")
|
||||||
for item in VOICE_DIR.iterdir():
|
for item in VOICE_DIR.iterdir():
|
||||||
logger.debug(f"Checking {item.name.lower()}")
|
debug(f"Checking {item.name.lower()}")
|
||||||
if item.name.lower() == f"{voice_name_lower}.wav":
|
if item.name.lower() == f"{voice_name_lower}.wav":
|
||||||
logger.debug(f"select_voice received query to use voice: {voice_name}. Found {item} inside {VOICE_DIR}.")
|
debug(f"select_voice received query to use voice: {voice_name}. Found {item} inside {VOICE_DIR}.")
|
||||||
return str(item)
|
return str(item)
|
||||||
|
|
||||||
logger.error(f"Voice file not found")
|
err(f"Voice file not found")
|
||||||
raise HTTPException(status_code=404, detail="Voice file not found")
|
raise HTTPException(status_code=404, detail="Voice file not found")
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error(f"Voice file not found: {str(e)}")
|
err(f"Voice file not found: {str(e)}")
|
||||||
return None
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
@ -113,8 +119,8 @@ async def generate_speech_endpoint(
|
||||||
else:
|
else:
|
||||||
return await generate_speech(bg_tasks, text_content, voice, voice_file, model, speed, podcast)
|
return await generate_speech(bg_tasks, text_content, voice, voice_file, model, speed, podcast)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error(f"Error in TTS: {str(e)}")
|
err(f"Error in TTS: {str(e)}")
|
||||||
logger.error(traceback.format_exc())
|
err(traceback.format_exc())
|
||||||
raise HTTPException(status_code=666, detail="error in TTS")
|
raise HTTPException(status_code=666, detail="error in TTS")
|
||||||
|
|
||||||
async def generate_speech(
|
async def generate_speech(
|
||||||
|
@ -136,18 +142,20 @@ async def generate_speech(
|
||||||
model = model if model else await get_model(voice, voice_file)
|
model = model if model else await get_model(voice, voice_file)
|
||||||
title = title if title else "TTS audio"
|
title = title if title else "TTS audio"
|
||||||
output_path = output_dir / f"{dt_datetime.now().strftime('%Y%m%d%H%M%S')} {title}.wav"
|
output_path = output_dir / f"{dt_datetime.now().strftime('%Y%m%d%H%M%S')} {title}.wav"
|
||||||
|
|
||||||
if model == "eleven_turbo_v2":
|
if model == "eleven_turbo_v2":
|
||||||
logger.info("Using ElevenLabs.")
|
info("Using ElevenLabs.")
|
||||||
audio_file_path = await elevenlabs_tts(model, text, voice, title, output_dir)
|
audio_file_path = await elevenlabs_tts(model, text, voice, title, output_dir)
|
||||||
else: # if model == "xtts":
|
else: # if model == "xtts":
|
||||||
logger.info("Using XTTS2")
|
info("Using XTTS2")
|
||||||
audio_file_path = await local_tts(text, speed, voice, voice_file, podcast, bg_tasks, title, output_path)
|
audio_file_path = await local_tts(text, speed, voice, voice_file, podcast, bg_tasks, title, output_path)
|
||||||
#else:
|
|
||||||
# raise HTTPException(status_code=400, detail="Invalid model specified")
|
|
||||||
|
|
||||||
if podcast == True:
|
if not audio_file_path:
|
||||||
podcast_path = Path(PODCAST_DIR) / audio_file_path.name
|
raise ValueError("TTS generation failed: audio_file_path is empty or None")
|
||||||
logger.debug(f"Podcast path: {podcast_path}")
|
|
||||||
|
if podcast:
|
||||||
|
podcast_path = Path(PODCAST_DIR) / Path(audio_file_path).name
|
||||||
|
debug(f"Podcast path: {podcast_path}")
|
||||||
shutil.copy(str(audio_file_path), str(podcast_path))
|
shutil.copy(str(audio_file_path), str(podcast_path))
|
||||||
bg_tasks.add_task(os.remove, str(audio_file_path))
|
bg_tasks.add_task(os.remove, str(audio_file_path))
|
||||||
return str(podcast_path)
|
return str(podcast_path)
|
||||||
|
@ -155,11 +163,12 @@ async def generate_speech(
|
||||||
return str(audio_file_path)
|
return str(audio_file_path)
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error(f"Failed to generate speech: {str(e)}")
|
err(f"Failed to generate speech: {str(e)}")
|
||||||
raise HTTPException(status_code=500, detail=f"Failed to generate speech: {str(e)}")
|
raise HTTPException(status_code=500, detail=f"Failed to generate speech: {str(e)}")
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
async def get_model(voice: str = None, voice_file: UploadFile = None):
|
async def get_model(voice: str = None, voice_file: UploadFile = None):
|
||||||
if voice_file or (voice and await select_voice(voice)):
|
if voice_file or (voice and await select_voice(voice)):
|
||||||
return "xtts"
|
return "xtts"
|
||||||
|
@ -171,7 +180,7 @@ async def get_model(voice: str = None, voice_file: UploadFile = None):
|
||||||
raise HTTPException(status_code=400, detail="No model or voice specified")
|
raise HTTPException(status_code=400, detail="No model or voice specified")
|
||||||
|
|
||||||
async def determine_voice_id(voice_name: str) -> str:
|
async def determine_voice_id(voice_name: str) -> str:
|
||||||
logger.debug(f"Searching for voice id for {voice_name}")
|
debug(f"Searching for voice id for {voice_name}")
|
||||||
|
|
||||||
hardcoded_voices = {
|
hardcoded_voices = {
|
||||||
"alloy": "E3A1KVbKoWSIKSZwSUsW",
|
"alloy": "E3A1KVbKoWSIKSZwSUsW",
|
||||||
|
@ -188,23 +197,23 @@ async def determine_voice_id(voice_name: str) -> str:
|
||||||
|
|
||||||
if voice_name in hardcoded_voices:
|
if voice_name in hardcoded_voices:
|
||||||
voice_id = hardcoded_voices[voice_name]
|
voice_id = hardcoded_voices[voice_name]
|
||||||
logger.debug(f"Found voice ID - {voice_id}")
|
debug(f"Found voice ID - {voice_id}")
|
||||||
return voice_id
|
return voice_id
|
||||||
|
|
||||||
logger.debug(f"Requested voice not among the hardcoded options.. checking with 11L next.")
|
debug(f"Requested voice not among the hardcoded options.. checking with 11L next.")
|
||||||
url = "https://api.elevenlabs.io/v1/voices"
|
url = "https://api.elevenlabs.io/v1/voices"
|
||||||
headers = {"xi-api-key": ELEVENLABS_API_KEY}
|
headers = {"xi-api-key": ELEVENLABS_API_KEY}
|
||||||
async with httpx.AsyncClient() as client:
|
async with httpx.AsyncClient() as client:
|
||||||
try:
|
try:
|
||||||
response = await client.get(url, headers=headers)
|
response = await client.get(url, headers=headers)
|
||||||
logger.debug(f"Response: {response}")
|
debug(f"Response: {response}")
|
||||||
if response.status_code == 200:
|
if response.status_code == 200:
|
||||||
voices_data = response.json().get("voices", [])
|
voices_data = response.json().get("voices", [])
|
||||||
for voice in voices_data:
|
for voice in voices_data:
|
||||||
if voice_name == voice["voice_id"] or voice_name == voice["name"]:
|
if voice_name == voice["voice_id"] or voice_name == voice["name"]:
|
||||||
return voice["voice_id"]
|
return voice["voice_id"]
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error(f"Error determining voice ID: {str(e)}")
|
err(f"Error determining voice ID: {str(e)}")
|
||||||
|
|
||||||
# as a last fallback, rely on David Attenborough
|
# as a last fallback, rely on David Attenborough
|
||||||
return "b42GBisbu9r5m5n6pHF7"
|
return "b42GBisbu9r5m5n6pHF7"
|
||||||
|
@ -248,7 +257,7 @@ async def get_text_content(text: Optional[str], file: Optional[UploadFile]) -> s
|
||||||
|
|
||||||
async def get_voice_file_path(voice: str = None, voice_file: UploadFile = None) -> str:
|
async def get_voice_file_path(voice: str = None, voice_file: UploadFile = None) -> str:
|
||||||
if voice:
|
if voice:
|
||||||
logger.debug(f"Looking for voice: {voice}")
|
debug(f"Looking for voice: {voice}")
|
||||||
selected_voice = await select_voice(voice)
|
selected_voice = await select_voice(voice)
|
||||||
return selected_voice
|
return selected_voice
|
||||||
elif voice_file and isinstance(voice_file, UploadFile):
|
elif voice_file and isinstance(voice_file, UploadFile):
|
||||||
|
@ -277,7 +286,7 @@ async def get_voice_file_path(voice: str = None, voice_file: UploadFile = None)
|
||||||
return str(new_file)
|
return str(new_file)
|
||||||
|
|
||||||
else:
|
else:
|
||||||
logger.debug(f"{dt_datetime.now().strftime('%Y%m%d%H%M%S')}: No voice specified or file provided, using default voice: {DEFAULT_VOICE}")
|
debug(f"{dt_datetime.now().strftime('%Y%m%d%H%M%S')}: No voice specified or file provided, using default voice: {DEFAULT_VOICE}")
|
||||||
selected_voice = await select_voice(DEFAULT_VOICE)
|
selected_voice = await select_voice(DEFAULT_VOICE)
|
||||||
return selected_voice
|
return selected_voice
|
||||||
|
|
||||||
|
@ -315,7 +324,7 @@ async def local_tts(
|
||||||
|
|
||||||
for i, segment in enumerate(segments):
|
for i, segment in enumerate(segments):
|
||||||
segment_file_path = TTS_SEGMENTS_DIR / f"segment_{i}.wav"
|
segment_file_path = TTS_SEGMENTS_DIR / f"segment_{i}.wav"
|
||||||
logger.debug(f"Segment file path: {segment_file_path}")
|
debug(f"Segment file path: {segment_file_path}")
|
||||||
|
|
||||||
# Run TTS in a separate thread
|
# Run TTS in a separate thread
|
||||||
await asyncio.to_thread(
|
await asyncio.to_thread(
|
||||||
|
@ -326,7 +335,7 @@ async def local_tts(
|
||||||
speaker_wav=[voice_file_path],
|
speaker_wav=[voice_file_path],
|
||||||
language="en"
|
language="en"
|
||||||
)
|
)
|
||||||
logger.debug(f"Segment file generated: {segment_file_path}")
|
debug(f"Segment file generated: {segment_file_path}")
|
||||||
|
|
||||||
# Load and combine audio in a separate thread
|
# Load and combine audio in a separate thread
|
||||||
segment_audio = await asyncio.to_thread(AudioSegment.from_wav, str(segment_file_path))
|
segment_audio = await asyncio.to_thread(AudioSegment.from_wav, str(segment_file_path))
|
||||||
|
@ -401,7 +410,7 @@ def split_text(text, target_length=35, max_length=50):
|
||||||
|
|
||||||
if segment_length + len(sentence_words) > max_length:
|
if segment_length + len(sentence_words) > max_length:
|
||||||
segments.append(' '.join(current_segment))
|
segments.append(' '.join(current_segment))
|
||||||
logger.debug(f"split_text - segment: {' '.join(current_segment)}, word count: {segment_length}")
|
debug(f"split_text - segment: {' '.join(current_segment)}, word count: {segment_length}")
|
||||||
|
|
||||||
current_segment = [sentence]
|
current_segment = [sentence]
|
||||||
else:
|
else:
|
||||||
|
@ -409,7 +418,7 @@ def split_text(text, target_length=35, max_length=50):
|
||||||
|
|
||||||
if current_segment:
|
if current_segment:
|
||||||
segments.append(' '.join(current_segment))
|
segments.append(' '.join(current_segment))
|
||||||
logger.debug(f"split_text - segment: {' '.join(current_segment)}, word count: {len(current_segment)}")
|
debug(f"split_text - segment: {' '.join(current_segment)}, word count: {len(current_segment)}")
|
||||||
|
|
||||||
return segments
|
return segments
|
||||||
|
|
||||||
|
@ -421,7 +430,7 @@ def clean_text_for_tts(text: str) -> str:
|
||||||
text = re.sub(r'\s+', ' ', text).strip()
|
text = re.sub(r'\s+', ' ', text).strip()
|
||||||
return text
|
return text
|
||||||
else:
|
else:
|
||||||
logger.debug(f"No text received.")
|
debug(f"No text received.")
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -13,10 +13,15 @@ from shapely.wkb import loads
|
||||||
from binascii import unhexlify
|
from binascii import unhexlify
|
||||||
from sijapi import L, VISUALCROSSING_API_KEY, TZ, DB, GEO
|
from sijapi import L, VISUALCROSSING_API_KEY, TZ, DB, GEO
|
||||||
from sijapi.utilities import haversine
|
from sijapi.utilities import haversine
|
||||||
from sijapi.routers import loc
|
from sijapi.routers import gis
|
||||||
|
|
||||||
weather = APIRouter()
|
weather = APIRouter()
|
||||||
logger = L.get_module_logger("weather")
|
logger = L.get_module_logger("weather")
|
||||||
|
def debug(text: str): logger.debug(text)
|
||||||
|
def info(text: str): logger.info(text)
|
||||||
|
def warn(text: str): logger.warning(text)
|
||||||
|
def err(text: str): logger.error(text)
|
||||||
|
def crit(text: str): logger.critical(text)
|
||||||
|
|
||||||
@weather.get("/weather/refresh", response_class=JSONResponse)
|
@weather.get("/weather/refresh", response_class=JSONResponse)
|
||||||
async def get_refreshed_weather(
|
async def get_refreshed_weather(
|
||||||
|
@ -26,16 +31,16 @@ async def get_refreshed_weather(
|
||||||
# date = await date
|
# date = await date
|
||||||
try:
|
try:
|
||||||
if latlon == "None":
|
if latlon == "None":
|
||||||
date_time = await loc.dt(date)
|
date_time = await gis.dt(date)
|
||||||
place = await loc.fetch_last_location_before(date_time)
|
place = await gis.fetch_last_location_before(date_time)
|
||||||
lat = place.latitude
|
lat = place.latitude
|
||||||
lon = place.longitude
|
lon = place.longitude
|
||||||
else:
|
else:
|
||||||
lat, lon = latlon.split(',')
|
lat, lon = latlon.split(',')
|
||||||
tz = await GEO.tz_at(lat, lon)
|
tz = await GEO.tz_at(lat, lon)
|
||||||
date_time = await loc.dt(date, tz)
|
date_time = await gis.dt(date, tz)
|
||||||
|
|
||||||
logger.debug(f"passing date_time {date_time.strftime('%Y-%m-%d %H:%M:%S')}, {lat}/{lon} into get_weather")
|
debug(f"passing date_time {date_time.strftime('%Y-%m-%d %H:%M:%S')}, {lat}/{lon} into get_weather")
|
||||||
day = await get_weather(date_time, lat, lon, force_refresh=True)
|
day = await get_weather(date_time, lat, lon, force_refresh=True)
|
||||||
day_str = str(day)
|
day_str = str(day)
|
||||||
return JSONResponse(content={"weather": day_str}, status_code=200)
|
return JSONResponse(content={"weather": day_str}, status_code=200)
|
||||||
|
@ -44,20 +49,18 @@ async def get_refreshed_weather(
|
||||||
return JSONResponse(content={"detail": str(e.detail)}, status_code=e.status_code)
|
return JSONResponse(content={"detail": str(e.detail)}, status_code=e.status_code)
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error(f"Error in note_weather_get: {str(e)}")
|
err(f"Error in note_weather_get: {str(e)}")
|
||||||
raise HTTPException(status_code=500, detail=f"An error occurred: {str(e)}")
|
raise HTTPException(status_code=500, detail=f"An error occurred: {str(e)}")
|
||||||
|
|
||||||
async def get_weather(date_time: dt_datetime, latitude: float, longitude: float, force_refresh: bool = False):
|
async def get_weather(date_time: dt_datetime, latitude: float, longitude: float, force_refresh: bool = False):
|
||||||
logger.debug(f"Called get_weather with lat: {latitude}, lon: {longitude}, date_time: {date_time}")
|
|
||||||
logger.warning(f"Using {date_time} as our datetime in get_weather.")
|
|
||||||
fetch_new_data = True
|
fetch_new_data = True
|
||||||
if force_refresh == False:
|
if force_refresh == False:
|
||||||
daily_weather_data = await get_weather_from_db(date_time, latitude, longitude)
|
daily_weather_data = await get_weather_from_db(date_time, latitude, longitude)
|
||||||
if daily_weather_data:
|
if daily_weather_data:
|
||||||
try:
|
try:
|
||||||
logger.debug(f"Daily weather data from db: {daily_weather_data}")
|
debug(f"Daily weather data from db: {daily_weather_data}")
|
||||||
last_updated = str(daily_weather_data['DailyWeather'].get('last_updated'))
|
last_updated = str(daily_weather_data['DailyWeather'].get('last_updated'))
|
||||||
last_updated = await loc.dt(last_updated)
|
last_updated = await gis.dt(last_updated)
|
||||||
stored_loc_data = unhexlify(daily_weather_data['DailyWeather'].get('location'))
|
stored_loc_data = unhexlify(daily_weather_data['DailyWeather'].get('location'))
|
||||||
stored_loc = loads(stored_loc_data)
|
stored_loc = loads(stored_loc_data)
|
||||||
stored_lat = stored_loc.y
|
stored_lat = stored_loc.y
|
||||||
|
@ -65,68 +68,64 @@ async def get_weather(date_time: dt_datetime, latitude: float, longitude: float,
|
||||||
stored_ele = stored_loc.z
|
stored_ele = stored_loc.z
|
||||||
|
|
||||||
hourly_weather = daily_weather_data.get('HourlyWeather')
|
hourly_weather = daily_weather_data.get('HourlyWeather')
|
||||||
|
# debug(f"Hourly: {hourly_weather}")
|
||||||
logger.debug(f"Hourly: {hourly_weather}")
|
|
||||||
|
|
||||||
logger.debug(f"\nINFO:\nlast updated {last_updated}\nstored lat: {stored_lat} - requested lat: {latitude}\nstored lon: {stored_lon} - requested lon: {longitude}\n")
|
|
||||||
|
|
||||||
request_haversine = haversine(latitude, longitude, stored_lat, stored_lon)
|
request_haversine = haversine(latitude, longitude, stored_lat, stored_lon)
|
||||||
logger.debug(f"\nINFO:\nlast updated {last_updated}\nstored lat: {stored_lat} - requested lat: {latitude}\nstored lon: {stored_lon} - requested lon: {longitude}\nHaversine: {request_haversine}")
|
debug(f"\nINFO:\nlast updated {last_updated}\nstored lat: {stored_lat} - requested lat: {latitude}\nstored lon: {stored_lon} - requested lon: {longitude}\nHaversine: {request_haversine}")
|
||||||
|
|
||||||
if last_updated and (date_time <= dt_datetime.now(TZ) and last_updated > date_time and request_haversine < 8) and hourly_weather and len(hourly_weather) > 0:
|
if last_updated and (date_time <= dt_datetime.now(TZ) and last_updated > date_time and request_haversine < 8) and hourly_weather and len(hourly_weather) > 0:
|
||||||
logger.debug(f"We can use existing data... :')")
|
debug(f"We can use existing data... :')")
|
||||||
fetch_new_data = False
|
fetch_new_data = False
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error(f"Error in get_weather: {e}")
|
err(f"Error in get_weather: {e}")
|
||||||
|
|
||||||
if fetch_new_data:
|
if fetch_new_data:
|
||||||
logger.debug(f"We require new data!")
|
debug(f"We require new data!")
|
||||||
request_date_str = date_time.strftime("%Y-%m-%d")
|
request_date_str = date_time.strftime("%Y-%m-%d")
|
||||||
logger.warning(f"Using {date_time.strftime('%Y-%m-%d')} as our datetime for fetching new data.")
|
debug(f"Using {date_time.strftime('%Y-%m-%d')} as our datetime for fetching new data.")
|
||||||
url = f"https://weather.visualcrossing.com/VisualCrossingWebServices/rest/services/timeline/{latitude},{longitude}/{request_date_str}/{request_date_str}?unitGroup=us&key={VISUALCROSSING_API_KEY}"
|
url = f"https://weather.visualcrossing.com/VisualCrossingWebServices/rest/services/timeline/{latitude},{longitude}/{request_date_str}/{request_date_str}?unitGroup=us&key={VISUALCROSSING_API_KEY}"
|
||||||
try:
|
try:
|
||||||
async with AsyncClient() as client:
|
async with AsyncClient() as client:
|
||||||
response = await client.get(url)
|
response = await client.get(url)
|
||||||
if response.status_code == 200:
|
if response.status_code == 200:
|
||||||
logger.debug(f"Successfully obtained data from VC...")
|
debug(f"Successfully obtained data from VC...")
|
||||||
try:
|
try:
|
||||||
weather_data = response.json()
|
weather_data = response.json()
|
||||||
store_result = await store_weather_to_db(date_time, weather_data)
|
store_result = await store_weather_to_db(date_time, weather_data)
|
||||||
if store_result == "SUCCESS":
|
if store_result == "SUCCESS":
|
||||||
logger.debug(f"New weather data for {request_date_str} stored in database...")
|
debug(f"New weather data for {request_date_str} stored in database...")
|
||||||
else:
|
else:
|
||||||
logger.error(f"Failed to store weather data for {request_date_str} in database! {store_result}")
|
err(f"Failed to store weather data for {request_date_str} in database! {store_result}")
|
||||||
|
|
||||||
logger.debug(f"Attempting to retrieve data for {date_time}, {latitude}, {longitude}")
|
debug(f"Attempting to retrieve data for {date_time}, {latitude}, {longitude}")
|
||||||
daily_weather_data = await get_weather_from_db(date_time, latitude, longitude)
|
daily_weather_data = await get_weather_from_db(date_time, latitude, longitude)
|
||||||
if daily_weather_data is not None:
|
if daily_weather_data is not None:
|
||||||
return daily_weather_data
|
return daily_weather_data
|
||||||
else:
|
else:
|
||||||
raise HTTPException(status_code=500, detail="Weather data was not properly stored.")
|
raise HTTPException(status_code=500, detail="Weather data was not properly stored.")
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error(f"Problem parsing VC response or storing data: {e}")
|
err(f"Problem parsing VC response or storing data: {e}")
|
||||||
raise HTTPException(status_code=500, detail="Weather data was not properly stored.")
|
raise HTTPException(status_code=500, detail="Weather data was not properly stored.")
|
||||||
else:
|
else:
|
||||||
logger.error(f"Failed to fetch weather data: {response.status_code}, {response.text}")
|
err(f"Failed to fetch weather data: {response.status_code}, {response.text}")
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error(f"Exception during API call: {e}")
|
err(f"Exception during API call: {e}")
|
||||||
|
|
||||||
return daily_weather_data
|
return daily_weather_data
|
||||||
|
|
||||||
|
|
||||||
async def store_weather_to_db(date_time: dt_datetime, weather_data: dict):
|
async def store_weather_to_db(date_time: dt_datetime, weather_data: dict):
|
||||||
logger.warning(f"Using {date_time.strftime('%Y-%m-%d %H:%M:%S')} as our datetime in store_weather_to_db")
|
warn(f"Using {date_time.strftime('%Y-%m-%d %H:%M:%S')} as our datetime in store_weather_to_db")
|
||||||
async with DB.get_connection() as conn:
|
async with DB.get_connection() as conn:
|
||||||
try:
|
try:
|
||||||
day_data = weather_data.get('days')[0]
|
day_data = weather_data.get('days')[0]
|
||||||
logger.debug(f"RAW DAY_DATA: {day_data}")
|
debug(f"RAW DAY_DATA: {day_data}")
|
||||||
# Handle preciptype and stations as PostgreSQL arrays
|
# Handle preciptype and stations as PostgreSQL arrays
|
||||||
preciptype_array = day_data.get('preciptype', []) or []
|
preciptype_array = day_data.get('preciptype', []) or []
|
||||||
stations_array = day_data.get('stations', []) or []
|
stations_array = day_data.get('stations', []) or []
|
||||||
|
|
||||||
date_str = date_time.strftime("%Y-%m-%d")
|
date_str = date_time.strftime("%Y-%m-%d")
|
||||||
logger.warning(f"Using {date_str} in our query in store_weather_to_db.")
|
warn(f"Using {date_str} in our query in store_weather_to_db.")
|
||||||
|
|
||||||
# Get location details from weather data if available
|
# Get location details from weather data if available
|
||||||
longitude = weather_data.get('longitude')
|
longitude = weather_data.get('longitude')
|
||||||
|
@ -135,11 +134,11 @@ async def store_weather_to_db(date_time: dt_datetime, weather_data: dict):
|
||||||
elevation = await GEO.elevation(latitude, longitude)
|
elevation = await GEO.elevation(latitude, longitude)
|
||||||
location_point = f"POINTZ({longitude} {latitude} {elevation})" if longitude and latitude and elevation else None
|
location_point = f"POINTZ({longitude} {latitude} {elevation})" if longitude and latitude and elevation else None
|
||||||
|
|
||||||
logger.warning(f"Uncorrected datetimes in store_weather_to_db: {day_data['datetime']}, sunrise: {day_data['sunrise']}, sunset: {day_data['sunset']}")
|
warn(f"Uncorrected datetimes in store_weather_to_db: {day_data['datetime']}, sunrise: {day_data['sunrise']}, sunset: {day_data['sunset']}")
|
||||||
day_data['datetime'] = await loc.dt(day_data.get('datetimeEpoch'))
|
day_data['datetime'] = await gis.dt(day_data.get('datetimeEpoch'))
|
||||||
day_data['sunrise'] = await loc.dt(day_data.get('sunriseEpoch'))
|
day_data['sunrise'] = await gis.dt(day_data.get('sunriseEpoch'))
|
||||||
day_data['sunset'] = await loc.dt(day_data.get('sunsetEpoch'))
|
day_data['sunset'] = await gis.dt(day_data.get('sunsetEpoch'))
|
||||||
logger.warning(f"Corrected datetimes in store_weather_to_db: {day_data['datetime']}, sunrise: {day_data['sunrise']}, sunset: {day_data['sunset']}")
|
warn(f"Corrected datetimes in store_weather_to_db: {day_data['datetime']}, sunrise: {day_data['sunrise']}, sunset: {day_data['sunset']}")
|
||||||
|
|
||||||
daily_weather_params = (
|
daily_weather_params = (
|
||||||
day_data.get('sunrise'), day_data.get('sunriseEpoch'),
|
day_data.get('sunrise'), day_data.get('sunriseEpoch'),
|
||||||
|
@ -163,7 +162,7 @@ async def store_weather_to_db(date_time: dt_datetime, weather_data: dict):
|
||||||
location_point
|
location_point
|
||||||
)
|
)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error(f"Failed to prepare database query in store_weather_to_db! {e}")
|
err(f"Failed to prepare database query in store_weather_to_db! {e}")
|
||||||
|
|
||||||
try:
|
try:
|
||||||
daily_weather_query = '''
|
daily_weather_query = '''
|
||||||
|
@ -183,11 +182,11 @@ async def store_weather_to_db(date_time: dt_datetime, weather_data: dict):
|
||||||
daily_weather_id = await conn.fetchval(daily_weather_query, *daily_weather_params)
|
daily_weather_id = await conn.fetchval(daily_weather_query, *daily_weather_params)
|
||||||
|
|
||||||
if 'hours' in day_data:
|
if 'hours' in day_data:
|
||||||
logger.debug(f"Processing hours now...")
|
debug(f"Processing hours now...")
|
||||||
for hour_data in day_data['hours']:
|
for hour_data in day_data['hours']:
|
||||||
try:
|
try:
|
||||||
await asyncio.sleep(0.01)
|
await asyncio.sleep(0.01)
|
||||||
hour_data['datetime'] = await loc.dt(hour_data.get('datetimeEpoch'))
|
hour_data['datetime'] = await gis.dt(hour_data.get('datetimeEpoch'))
|
||||||
hour_preciptype_array = hour_data.get('preciptype', []) or []
|
hour_preciptype_array = hour_data.get('preciptype', []) or []
|
||||||
hour_stations_array = hour_data.get('stations', []) or []
|
hour_stations_array = hour_data.get('stations', []) or []
|
||||||
hourly_weather_params = (
|
hourly_weather_params = (
|
||||||
|
@ -229,22 +228,22 @@ async def store_weather_to_db(date_time: dt_datetime, weather_data: dict):
|
||||||
'''
|
'''
|
||||||
async with conn.transaction():
|
async with conn.transaction():
|
||||||
hourly_weather_id = await conn.fetchval(hourly_weather_query, *hourly_weather_params)
|
hourly_weather_id = await conn.fetchval(hourly_weather_query, *hourly_weather_params)
|
||||||
logger.debug(f"Done processing hourly_weather_id {hourly_weather_id}")
|
debug(f"Done processing hourly_weather_id {hourly_weather_id}")
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error(f"EXCEPTION: {e}")
|
err(f"EXCEPTION: {e}")
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error(f"EXCEPTION: {e}")
|
err(f"EXCEPTION: {e}")
|
||||||
|
|
||||||
return "SUCCESS"
|
return "SUCCESS"
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error(f"Error in dailyweather storage: {e}")
|
err(f"Error in dailyweather storage: {e}")
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
async def get_weather_from_db(date_time: dt_datetime, latitude: float, longitude: float):
|
async def get_weather_from_db(date_time: dt_datetime, latitude: float, longitude: float):
|
||||||
logger.warning(f"Using {date_time.strftime('%Y-%m-%d %H:%M:%S')} as our datetime in get_weather_from_db.")
|
warn(f"Using {date_time.strftime('%Y-%m-%d %H:%M:%S')} as our datetime in get_weather_from_db.")
|
||||||
async with DB.get_connection() as conn:
|
async with DB.get_connection() as conn:
|
||||||
query_date = date_time.date()
|
query_date = date_time.date()
|
||||||
try:
|
try:
|
||||||
|
@ -260,19 +259,19 @@ async def get_weather_from_db(date_time: dt_datetime, latitude: float, longitude
|
||||||
daily_weather_record = await conn.fetchrow(query, query_date, longitude, latitude, longitude, latitude)
|
daily_weather_record = await conn.fetchrow(query, query_date, longitude, latitude, longitude, latitude)
|
||||||
|
|
||||||
if daily_weather_record is None:
|
if daily_weather_record is None:
|
||||||
logger.debug(f"No daily weather data retrieved from database.")
|
debug(f"No daily weather data retrieved from database.")
|
||||||
return None
|
return None
|
||||||
|
|
||||||
# Convert asyncpg.Record to a mutable dictionary
|
# Convert asyncpg.Record to a mutable dictionary
|
||||||
daily_weather_data = dict(daily_weather_record)
|
daily_weather_data = dict(daily_weather_record)
|
||||||
# logger.debug(f"Daily weather data prior to tz corrections: {daily_weather_data}")
|
# debug(f"Daily weather data prior to tz corrections: {daily_weather_data}")
|
||||||
# Now we can modify the dictionary
|
# Now we can modify the dictionary
|
||||||
# tz = await GEO.tz_at(latitude, longitude)
|
# tz = await GEO.tz_at(latitude, longitude)
|
||||||
# daily_weather_data['datetime'] = await loc.dt(daily_weather_data.get('datetime'), tz)
|
# daily_weather_data['datetime'] = await gis.dt(daily_weather_data.get('datetime'), tz)
|
||||||
# daily_weather_data['sunrise'] = await loc.dt(daily_weather_data.get('sunrise'), tz)
|
# daily_weather_data['sunrise'] = await gis.dt(daily_weather_data.get('sunrise'), tz)
|
||||||
# daily_weather_data['sunset'] = await loc.dt(daily_weather_data.get('sunset'), tz)
|
# daily_weather_data['sunset'] = await gis.dt(daily_weather_data.get('sunset'), tz)
|
||||||
|
|
||||||
# logger.debug(f"Daily weather data after tz corrections: {daily_weather_data}")
|
# debug(f"Daily weather data after tz corrections: {daily_weather_data}")
|
||||||
|
|
||||||
# Query to get hourly weather data
|
# Query to get hourly weather data
|
||||||
query = '''
|
query = '''
|
||||||
|
@ -285,17 +284,17 @@ async def get_weather_from_db(date_time: dt_datetime, latitude: float, longitude
|
||||||
hourly_weather_data = []
|
hourly_weather_data = []
|
||||||
for record in hourly_weather_records:
|
for record in hourly_weather_records:
|
||||||
hour_data = dict(record)
|
hour_data = dict(record)
|
||||||
# hour_data['datetime'] = await loc.dt(hour_data.get('datetime'), tz)
|
# hour_data['datetime'] = await gis.dt(hour_data.get('datetime'), tz)
|
||||||
hourly_weather_data.append(hour_data)
|
hourly_weather_data.append(hour_data)
|
||||||
|
|
||||||
# logger.debug(f"Hourly weather data after tz corrections: {hourly_weather_data}")
|
# debug(f"Hourly weather data after tz corrections: {hourly_weather_data}")
|
||||||
day = {
|
day = {
|
||||||
'DailyWeather': daily_weather_data,
|
'DailyWeather': daily_weather_data,
|
||||||
'HourlyWeather': hourly_weather_data,
|
'HourlyWeather': hourly_weather_data,
|
||||||
}
|
}
|
||||||
# logger.debug(f"day: {day}")
|
# debug(f"day: {day}")
|
||||||
return day
|
return day
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error(f"Unexpected error occurred: {e}")
|
err(f"Unexpected error occurred: {e}")
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -30,17 +30,22 @@ from fastapi.security.api_key import APIKeyHeader
|
||||||
|
|
||||||
from sijapi import L, API, YEAR_FMT, MONTH_FMT, DAY_FMT, DAY_SHORT_FMT, OBSIDIAN_VAULT_DIR, ALLOWED_FILENAME_CHARS, MAX_PATH_LENGTH, ARCHIVE_DIR
|
from sijapi import L, API, YEAR_FMT, MONTH_FMT, DAY_FMT, DAY_SHORT_FMT, OBSIDIAN_VAULT_DIR, ALLOWED_FILENAME_CHARS, MAX_PATH_LENGTH, ARCHIVE_DIR
|
||||||
logger = L.get_module_logger('utilities')
|
logger = L.get_module_logger('utilities')
|
||||||
|
def debug(text: str): logger.debug(text)
|
||||||
|
def info(text: str): logger.info(text)
|
||||||
|
def warn(text: str): logger.warning(text)
|
||||||
|
def err(text: str): logger.error(text)
|
||||||
|
def crit(text: str): logger.critical(text)
|
||||||
|
|
||||||
api_key_header = APIKeyHeader(name="Authorization", auto_error=False)
|
api_key_header = APIKeyHeader(name="Authorization", auto_error=False)
|
||||||
|
|
||||||
def validate_api_key(request: Request, api_key: str = Depends(api_key_header)):
|
def validate_api_key(request: Request, api_key: str = Depends(api_key_header)):
|
||||||
if request.url.path in API.PUBLIC:
|
if request.url.path in API.PUBLIC:
|
||||||
return
|
return
|
||||||
|
|
||||||
client_ip = ipaddress.ip_address(request.client.host)
|
client_ip = ipaddress.ip_address(request.client.host)
|
||||||
trusted_subnets = [ipaddress.ip_network(subnet) for subnet in API.TRUSTED_SUBNETS]
|
trusted_subnets = [ipaddress.ip_network(subnet) for subnet in API.TRUSTED_SUBNETS]
|
||||||
if any(client_ip in subnet for subnet in trusted_subnets):
|
if any(client_ip in subnet for subnet in trusted_subnets):
|
||||||
return
|
return
|
||||||
|
|
||||||
# Check header-based API key
|
# Check header-based API key
|
||||||
if api_key:
|
if api_key:
|
||||||
|
@ -63,18 +68,18 @@ def assemble_archive_path(filename: str, extension: str = ".md", date_time: date
|
||||||
day = date_time.strftime(DAY_FMT)
|
day = date_time.strftime(DAY_FMT)
|
||||||
day_short = date_time.strftime(DAY_SHORT_FMT)
|
day_short = date_time.strftime(DAY_SHORT_FMT)
|
||||||
timestamp = date_time.strftime("%H%M%S")
|
timestamp = date_time.strftime("%H%M%S")
|
||||||
|
|
||||||
# Ensure the extension is preserved
|
# Ensure the extension is preserved
|
||||||
base_name, ext = os.path.splitext(filename)
|
base_name, ext = os.path.splitext(filename)
|
||||||
extension = ext if ext else extension
|
extension = ext if ext else extension
|
||||||
|
|
||||||
# Initial sanitization
|
# Initial sanitization
|
||||||
sanitized_base = sanitize_filename(base_name, '')
|
sanitized_base = sanitize_filename(base_name, '')
|
||||||
filename = f"{day_short} {timestamp} {sanitized_base}{extension}"
|
filename = f"{day_short} {timestamp} {sanitized_base}{extension}"
|
||||||
|
|
||||||
relative_path = Path(year) / month / day / filename
|
relative_path = Path(year) / month / day / filename
|
||||||
absolute_path = ARCHIVE_DIR / relative_path
|
absolute_path = ARCHIVE_DIR / relative_path
|
||||||
|
|
||||||
# Ensure the total path length doesn't exceed MAX_PATH_LENGTH
|
# Ensure the total path length doesn't exceed MAX_PATH_LENGTH
|
||||||
while len(str(absolute_path)) > MAX_PATH_LENGTH:
|
while len(str(absolute_path)) > MAX_PATH_LENGTH:
|
||||||
# Truncate the sanitized_base, not the full filename
|
# Truncate the sanitized_base, not the full filename
|
||||||
|
@ -82,7 +87,7 @@ def assemble_archive_path(filename: str, extension: str = ".md", date_time: date
|
||||||
filename = f"{day_short} {timestamp} {sanitized_base}{extension}"
|
filename = f"{day_short} {timestamp} {sanitized_base}{extension}"
|
||||||
relative_path = Path(year) / month / day / filename
|
relative_path = Path(year) / month / day / filename
|
||||||
absolute_path = ARCHIVE_DIR / relative_path
|
absolute_path = ARCHIVE_DIR / relative_path
|
||||||
|
|
||||||
return absolute_path, relative_path
|
return absolute_path, relative_path
|
||||||
|
|
||||||
|
|
||||||
|
@ -111,17 +116,17 @@ def assemble_journal_path(date_time: datetime, subdir: str = None, filename: str
|
||||||
extension = extension if extension.startswith(".") else f".{extension}"
|
extension = extension if extension.startswith(".") else f".{extension}"
|
||||||
else:
|
else:
|
||||||
extension = validate_extension(filename, [".md", ".m4a", ".wav", ".aiff", ".flac", ".mp3", ".mp4", ".pdf", ".js", ".json", ".yaml", ".py"]) or ".md"
|
extension = validate_extension(filename, [".md", ".m4a", ".wav", ".aiff", ".flac", ".mp3", ".mp4", ".pdf", ".js", ".json", ".yaml", ".py"]) or ".md"
|
||||||
|
|
||||||
filename = sanitize_filename(filename)
|
filename = sanitize_filename(filename)
|
||||||
filename = f"{day_short} {filename}" if no_timestamp else f"{day_short} {timestamp} {filename}"
|
filename = f"{day_short} {filename}" if no_timestamp else f"{day_short} {timestamp} {filename}"
|
||||||
filename = f"{filename}{extension}" if not filename.endswith(extension) else filename
|
filename = f"{filename}{extension}" if not filename.endswith(extension) else filename
|
||||||
relative_path = relative_path / filename
|
relative_path = relative_path / filename
|
||||||
|
|
||||||
else:
|
else:
|
||||||
logger.debug(f"This only happens, theoretically, when no filename nor subdirectory are provided, but an extension is. Which is kinda silly.")
|
debug(f"This only happens, theoretically, when no filename nor subdirectory are provided, but an extension is. Which is kinda silly.")
|
||||||
return None, None
|
return None, None
|
||||||
|
|
||||||
absolute_path = OBSIDIAN_VAULT_DIR / relative_path
|
absolute_path = OBSIDIAN_VAULT_DIR / relative_path
|
||||||
os.makedirs(absolute_path.parent, exist_ok=True)
|
os.makedirs(absolute_path.parent, exist_ok=True)
|
||||||
return absolute_path, relative_path
|
return absolute_path, relative_path
|
||||||
|
|
||||||
|
@ -142,8 +147,8 @@ def f(file):
|
||||||
if hasattr(file, 'read') and callable(file.read):
|
if hasattr(file, 'read') and callable(file.read):
|
||||||
return file
|
return file
|
||||||
if isinstance(file, (bytes, bytearray)):
|
if isinstance(file, (bytes, bytearray)):
|
||||||
return file
|
return file
|
||||||
|
|
||||||
if isinstance(file, Path):
|
if isinstance(file, Path):
|
||||||
file_path = file
|
file_path = file
|
||||||
elif isinstance(file, str):
|
elif isinstance(file, str):
|
||||||
|
@ -165,16 +170,16 @@ def get_extension(file):
|
||||||
file_path = Path(file.filename)
|
file_path = Path(file.filename)
|
||||||
file_extension = file_path.suffix
|
file_extension = file_path.suffix
|
||||||
return file_extension
|
return file_extension
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error(f"Unable to get extension of {file}")
|
err(f"Unable to get extension of {file}")
|
||||||
raise e
|
raise e
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
def sanitize_filename(text, extension: str = None, max_length: int = MAX_PATH_LENGTH):
|
def sanitize_filename(text, extension: str = None, max_length: int = MAX_PATH_LENGTH):
|
||||||
"""Sanitize a string to be used as a safe filename while protecting the file extension."""
|
"""Sanitize a string to be used as a safe filename while protecting the file extension."""
|
||||||
logger.debug(f"Filename before sanitization: {text}")
|
debug(f"Filename before sanitization: {text}")
|
||||||
|
|
||||||
text = re.sub(r'\s+', ' ', text)
|
text = re.sub(r'\s+', ' ', text)
|
||||||
sanitized = re.sub(ALLOWED_FILENAME_CHARS, '', text)
|
sanitized = re.sub(ALLOWED_FILENAME_CHARS, '', text)
|
||||||
|
@ -186,7 +191,7 @@ def sanitize_filename(text, extension: str = None, max_length: int = MAX_PATH_LE
|
||||||
base_name = base_name[:max_base_length - 5].rstrip()
|
base_name = base_name[:max_base_length - 5].rstrip()
|
||||||
final_filename = base_name + extension
|
final_filename = base_name + extension
|
||||||
|
|
||||||
logger.debug(f"Filename after sanitization: {final_filename}")
|
debug(f"Filename after sanitization: {final_filename}")
|
||||||
return final_filename
|
return final_filename
|
||||||
|
|
||||||
|
|
||||||
|
@ -196,16 +201,16 @@ def check_file_name(file_name, max_length=255):
|
||||||
needs_sanitization = False
|
needs_sanitization = False
|
||||||
|
|
||||||
if len(file_name) > max_length:
|
if len(file_name) > max_length:
|
||||||
logger.debug(f"Filename exceeds maximum length of {max_length}: {file_name}")
|
debug(f"Filename exceeds maximum length of {max_length}: {file_name}")
|
||||||
needs_sanitization = True
|
needs_sanitization = True
|
||||||
if re.search(ALLOWED_FILENAME_CHARS, file_name):
|
if re.search(ALLOWED_FILENAME_CHARS, file_name):
|
||||||
logger.debug(f"Filename contains non-word characters (except space, dot, and hyphen): {file_name}")
|
debug(f"Filename contains non-word characters (except space, dot, and hyphen): {file_name}")
|
||||||
needs_sanitization = True
|
needs_sanitization = True
|
||||||
if re.search(r'\s{2,}', file_name):
|
if re.search(r'\s{2,}', file_name):
|
||||||
logger.debug(f"Filename contains multiple consecutive spaces: {file_name}")
|
debug(f"Filename contains multiple consecutive spaces: {file_name}")
|
||||||
needs_sanitization = True
|
needs_sanitization = True
|
||||||
if file_name != file_name.strip():
|
if file_name != file_name.strip():
|
||||||
logger.debug(f"Filename has leading or trailing spaces: {file_name}")
|
debug(f"Filename has leading or trailing spaces: {file_name}")
|
||||||
needs_sanitization = True
|
needs_sanitization = True
|
||||||
|
|
||||||
return needs_sanitization
|
return needs_sanitization
|
||||||
|
@ -230,7 +235,7 @@ async def extract_text(file_path: str) -> str:
|
||||||
"""Extract text from file."""
|
"""Extract text from file."""
|
||||||
if file_path.endswith('.pdf'):
|
if file_path.endswith('.pdf'):
|
||||||
return await extract_text_from_pdf(file_path)
|
return await extract_text_from_pdf(file_path)
|
||||||
|
|
||||||
elif file_path.endswith('.docx'):
|
elif file_path.endswith('.docx'):
|
||||||
return await extract_text_from_docx(file_path)
|
return await extract_text_from_docx(file_path)
|
||||||
|
|
||||||
|
@ -248,13 +253,13 @@ async def ocr_pdf(file_path: str) -> str:
|
||||||
texts = await asyncio.gather(*(asyncio.to_thread(pytesseract.image_to_string, image) for image in images))
|
texts = await asyncio.gather(*(asyncio.to_thread(pytesseract.image_to_string, image) for image in images))
|
||||||
return ' '.join(texts)
|
return ' '.join(texts)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error(f"Error during OCR: {str(e)}")
|
err(f"Error during OCR: {str(e)}")
|
||||||
return ""
|
return ""
|
||||||
|
|
||||||
|
|
||||||
async def extract_text_from_pdf(file_path: str) -> str:
|
async def extract_text_from_pdf(file_path: str) -> str:
|
||||||
if not await is_valid_pdf(file_path):
|
if not await is_valid_pdf(file_path):
|
||||||
logger.error(f"Invalid PDF file: {file_path}")
|
err(f"Invalid PDF file: {file_path}")
|
||||||
return ""
|
return ""
|
||||||
|
|
||||||
text = ''
|
text = ''
|
||||||
|
@ -267,12 +272,12 @@ async def extract_text_from_pdf(file_path: str) -> str:
|
||||||
text_content = page.extract_text() + ' ' if page.extract_text() else ''
|
text_content = page.extract_text() + ' ' if page.extract_text() else ''
|
||||||
text += text_content
|
text += text_content
|
||||||
num_pages = len(reader.pages)
|
num_pages = len(reader.pages)
|
||||||
|
|
||||||
# If text was extracted successfully and it's deemed sufficient, return it
|
# If text was extracted successfully and it's deemed sufficient, return it
|
||||||
if text and not should_use_ocr(text, num_pages):
|
if text and not should_use_ocr(text, num_pages):
|
||||||
return clean_text(text)
|
return clean_text(text)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error(f"Error extracting text with PyPDF2: {str(e)}")
|
err(f"Error extracting text with PyPDF2: {str(e)}")
|
||||||
|
|
||||||
# If PyPDF2 extraction fails or is insufficient, fall back to pdfminer.six
|
# If PyPDF2 extraction fails or is insufficient, fall back to pdfminer.six
|
||||||
try:
|
try:
|
||||||
|
@ -280,10 +285,10 @@ async def extract_text_from_pdf(file_path: str) -> str:
|
||||||
if text_pdfminer and not should_use_ocr(text_pdfminer, num_pages):
|
if text_pdfminer and not should_use_ocr(text_pdfminer, num_pages):
|
||||||
return clean_text(text_pdfminer)
|
return clean_text(text_pdfminer)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error(f"Error extracting text with pdfminer.six: {e}")
|
err(f"Error extracting text with pdfminer.six: {e}")
|
||||||
|
|
||||||
# If both methods fail or are deemed insufficient, use OCR as the last resort
|
# If both methods fail or are deemed insufficient, use OCR as the last resort
|
||||||
logger.debug("Falling back to OCR for text extraction...")
|
debug("Falling back to OCR for text extraction...")
|
||||||
return await ocr_pdf(file_path)
|
return await ocr_pdf(file_path)
|
||||||
|
|
||||||
async def is_valid_pdf(file_path: str) -> bool:
|
async def is_valid_pdf(file_path: str) -> bool:
|
||||||
|
@ -292,12 +297,12 @@ async def is_valid_pdf(file_path: str) -> bool:
|
||||||
kind = filetype.guess(file_path)
|
kind = filetype.guess(file_path)
|
||||||
return kind.mime == 'application/pdf'
|
return kind.mime == 'application/pdf'
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error(f"Error checking file type: {e}")
|
err(f"Error checking file type: {e}")
|
||||||
return False
|
return False
|
||||||
|
|
||||||
async def extract_text_from_pdf(file_path: str) -> str:
|
async def extract_text_from_pdf(file_path: str) -> str:
|
||||||
if not await is_valid_pdf(file_path):
|
if not await is_valid_pdf(file_path):
|
||||||
logger.error(f"Invalid PDF file: {file_path}")
|
err(f"Invalid PDF file: {file_path}")
|
||||||
return ""
|
return ""
|
||||||
|
|
||||||
text = ''
|
text = ''
|
||||||
|
@ -309,23 +314,23 @@ async def extract_text_from_pdf(file_path: str) -> str:
|
||||||
if text.strip(): # Successfully extracted text
|
if text.strip(): # Successfully extracted text
|
||||||
return clean_text(text)
|
return clean_text(text)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error(f"Error extracting text with PyPDF2: {str(e)}")
|
err(f"Error extracting text with PyPDF2: {str(e)}")
|
||||||
|
|
||||||
try:
|
try:
|
||||||
text_pdfminer = await asyncio.to_thread(pdfminer_extract_text, file_path)
|
text_pdfminer = await asyncio.to_thread(pdfminer_extract_text, file_path)
|
||||||
if text_pdfminer.strip(): # Successfully extracted text
|
if text_pdfminer.strip(): # Successfully extracted text
|
||||||
return clean_text(text_pdfminer)
|
return clean_text(text_pdfminer)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error(f"Error extracting text with pdfminer.six: {str(e)}")
|
err(f"Error extracting text with pdfminer.six: {str(e)}")
|
||||||
|
|
||||||
# Fall back to OCR
|
# Fall back to OCR
|
||||||
logger.debug("Falling back to OCR for text extraction...")
|
debug("Falling back to OCR for text extraction...")
|
||||||
try:
|
try:
|
||||||
images = convert_from_path(file_path)
|
images = convert_from_path(file_path)
|
||||||
ocr_texts = await asyncio.gather(*(asyncio.to_thread(pytesseract.image_to_string, img) for img in images))
|
ocr_texts = await asyncio.gather(*(asyncio.to_thread(pytesseract.image_to_string, img) for img in images))
|
||||||
return ' '.join(ocr_texts).strip()
|
return ' '.join(ocr_texts).strip()
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error(f"OCR failed: {str(e)}")
|
err(f"OCR failed: {str(e)}")
|
||||||
return ""
|
return ""
|
||||||
|
|
||||||
async def extract_text_from_docx(file_path: str) -> str:
|
async def extract_text_from_docx(file_path: str) -> str:
|
||||||
|
@ -333,7 +338,7 @@ async def extract_text_from_docx(file_path: str) -> str:
|
||||||
doc = Document(file_path)
|
doc = Document(file_path)
|
||||||
full_text = [paragraph.text for paragraph in doc.paragraphs]
|
full_text = [paragraph.text for paragraph in doc.paragraphs]
|
||||||
return '\n'.join(full_text)
|
return '\n'.join(full_text)
|
||||||
|
|
||||||
return await asyncio.to_thread(read_docx, file_path)
|
return await asyncio.to_thread(read_docx, file_path)
|
||||||
|
|
||||||
# Correcting read_text_file to be asynchronous
|
# Correcting read_text_file to be asynchronous
|
||||||
|
@ -345,7 +350,7 @@ def _sync_read_text_file(file_path: str) -> str:
|
||||||
# Actual synchronous file reading operation
|
# Actual synchronous file reading operation
|
||||||
with open(file_path, 'r', encoding='utf-8') as file:
|
with open(file_path, 'r', encoding='utf-8') as file:
|
||||||
return file.read()
|
return file.read()
|
||||||
|
|
||||||
def should_use_ocr(text, num_pages) -> bool:
|
def should_use_ocr(text, num_pages) -> bool:
|
||||||
if not text:
|
if not text:
|
||||||
return True # No text was extracted, use OCR
|
return True # No text was extracted, use OCR
|
||||||
|
@ -377,7 +382,7 @@ def convert_degrees_to_cardinal(d):
|
||||||
"""
|
"""
|
||||||
dirs = ["N", "NNE", "NE", "ENE", "E", "ESE", "SE", "SSE", "S", "SSW", "SW", "WSW", "W", "WNW", "NW", "NNW"]
|
dirs = ["N", "NNE", "NE", "ENE", "E", "ESE", "SE", "SSE", "S", "SSW", "SW", "WSW", "W", "WNW", "NW", "NNW"]
|
||||||
ix = round(d / (360. / len(dirs)))
|
ix = round(d / (360. / len(dirs)))
|
||||||
return dirs[ix % len(dirs)]
|
return dirs[ix % len(dirs)]
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
@ -409,7 +414,7 @@ def convert_to_12_hour_format(datetime_obj_or_str):
|
||||||
datetime_obj_or_str = datetime_obj_or_str.strftime("%H:%M:%S")
|
datetime_obj_or_str = datetime_obj_or_str.strftime("%H:%M:%S")
|
||||||
else:
|
else:
|
||||||
datetime_obj = datetime_obj_or_str
|
datetime_obj = datetime_obj_or_str
|
||||||
|
|
||||||
if isinstance(datetime_obj_or_str, str):
|
if isinstance(datetime_obj_or_str, str):
|
||||||
time24 = datetime_obj_or_str
|
time24 = datetime_obj_or_str
|
||||||
else:
|
else:
|
||||||
|
@ -427,8 +432,8 @@ def encode_image_to_base64(image_path):
|
||||||
byte_data = output_buffer.getvalue()
|
byte_data = output_buffer.getvalue()
|
||||||
base64_str = base64.b64encode(byte_data).decode('utf-8')
|
base64_str = base64.b64encode(byte_data).decode('utf-8')
|
||||||
return base64_str
|
return base64_str
|
||||||
else:
|
else:
|
||||||
logger.debug(f"Error: File does not exist at {image_path}")
|
debug(f"Error: File does not exist at {image_path}")
|
||||||
|
|
||||||
def resize_and_convert_image(image_path, max_size=2160, quality=80):
|
def resize_and_convert_image(image_path, max_size=2160, quality=80):
|
||||||
with Image.open(image_path) as img:
|
with Image.open(image_path) as img:
|
||||||
|
@ -458,7 +463,7 @@ def load_geonames_data(path: str):
|
||||||
names=columns,
|
names=columns,
|
||||||
low_memory=False
|
low_memory=False
|
||||||
)
|
)
|
||||||
|
|
||||||
return data
|
return data
|
||||||
|
|
||||||
async def run_ssh_command(server, command):
|
async def run_ssh_command(server, command):
|
||||||
|
@ -472,5 +477,5 @@ async def run_ssh_command(server, command):
|
||||||
ssh.close()
|
ssh.close()
|
||||||
return output, error
|
return output, error
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error(f"SSH command failed for server {server.id}: {str(e)}")
|
err(f"SSH command failed for server {server.id}: {str(e)}")
|
||||||
raise
|
raise
|
||||||
|
|
Loading…
Reference in a new issue